The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department and William Jolitz of UUNET Technologies Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   42  */
   43 /*-
   44  * Copyright (c) 2003 Networks Associates Technology, Inc.
   45  * All rights reserved.
   46  *
   47  * This software was developed for the FreeBSD Project by Jake Burkholder,
   48  * Safeport Network Services, and Network Associates Laboratories, the
   49  * Security Research Division of Network Associates, Inc. under
   50  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   51  * CHATS research program.
   52  *
   53  * Redistribution and use in source and binary forms, with or without
   54  * modification, are permitted provided that the following conditions
   55  * are met:
   56  * 1. Redistributions of source code must retain the above copyright
   57  *    notice, this list of conditions and the following disclaimer.
   58  * 2. Redistributions in binary form must reproduce the above copyright
   59  *    notice, this list of conditions and the following disclaimer in the
   60  *    documentation and/or other materials provided with the distribution.
   61  *
   62  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   72  * SUCH DAMAGE.
   73  */
   74 
   75 #include <sys/cdefs.h>
   76 __FBSDID("$FreeBSD: releng/6.0/sys/i386/i386/pmap.c 151793 2005-10-28 06:49:49Z ade $");
   77 
   78 /*
   79  *      Manages physical address maps.
   80  *
   81  *      In addition to hardware address maps, this
   82  *      module is called upon to provide software-use-only
   83  *      maps which may or may not be stored in the same
   84  *      form as hardware maps.  These pseudo-maps are
   85  *      used to store intermediate results from copy
   86  *      operations to and from address spaces.
   87  *
   88  *      Since the information managed by this module is
   89  *      also stored by the logical address mapping module,
   90  *      this module may throw away valid virtual-to-physical
   91  *      mappings at almost any time.  However, invalidations
   92  *      of virtual-to-physical mappings must be done as
   93  *      requested.
   94  *
   95  *      In order to cope with hardware architectures which
   96  *      make virtual-to-physical map invalidates expensive,
   97  *      this module may delay invalidate or reduced protection
   98  *      operations until such time as they are actually
   99  *      necessary.  This module is given full information as
  100  *      to which processors are currently using which maps,
  101  *      and to when physical maps must be made correct.
  102  */
  103 
  104 #include "opt_cpu.h"
  105 #include "opt_pmap.h"
  106 #include "opt_msgbuf.h"
  107 #include "opt_kstack_pages.h"
  108 
  109 #include <sys/param.h>
  110 #include <sys/systm.h>
  111 #include <sys/kernel.h>
  112 #include <sys/lock.h>
  113 #include <sys/malloc.h>
  114 #include <sys/mman.h>
  115 #include <sys/msgbuf.h>
  116 #include <sys/mutex.h>
  117 #include <sys/proc.h>
  118 #include <sys/sx.h>
  119 #include <sys/vmmeter.h>
  120 #include <sys/sched.h>
  121 #include <sys/sysctl.h>
  122 #ifdef SMP
  123 #include <sys/smp.h>
  124 #endif
  125 
  126 #include <vm/vm.h>
  127 #include <vm/vm_param.h>
  128 #include <vm/vm_kern.h>
  129 #include <vm/vm_page.h>
  130 #include <vm/vm_map.h>
  131 #include <vm/vm_object.h>
  132 #include <vm/vm_extern.h>
  133 #include <vm/vm_pageout.h>
  134 #include <vm/vm_pager.h>
  135 #include <vm/uma.h>
  136 
  137 #include <machine/cpu.h>
  138 #include <machine/cputypes.h>
  139 #include <machine/md_var.h>
  140 #include <machine/pcb.h>
  141 #include <machine/specialreg.h>
  142 #ifdef SMP
  143 #include <machine/smp.h>
  144 #endif
  145 
  146 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
  147 #define CPU_ENABLE_SSE
  148 #endif
  149 
  150 #ifndef PMAP_SHPGPERPROC
  151 #define PMAP_SHPGPERPROC 200
  152 #endif
  153 
  154 #if defined(DIAGNOSTIC)
  155 #define PMAP_DIAGNOSTIC
  156 #endif
  157 
  158 #define MINPV 2048
  159 
  160 #if !defined(PMAP_DIAGNOSTIC)
  161 #define PMAP_INLINE __inline
  162 #else
  163 #define PMAP_INLINE
  164 #endif
  165 
  166 /*
  167  * Get PDEs and PTEs for user/kernel address space
  168  */
  169 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  170 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  171 
  172 #define pmap_pde_v(pte)         ((*(int *)pte & PG_V) != 0)
  173 #define pmap_pte_w(pte)         ((*(int *)pte & PG_W) != 0)
  174 #define pmap_pte_m(pte)         ((*(int *)pte & PG_M) != 0)
  175 #define pmap_pte_u(pte)         ((*(int *)pte & PG_A) != 0)
  176 #define pmap_pte_v(pte)         ((*(int *)pte & PG_V) != 0)
  177 
  178 #define pmap_pte_set_w(pte, v)  ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
  179     atomic_clear_int((u_int *)(pte), PG_W))
  180 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
  181 
  182 struct pmap kernel_pmap_store;
  183 LIST_HEAD(pmaplist, pmap);
  184 static struct pmaplist allpmaps;
  185 static struct mtx allpmaps_lock;
  186 
  187 vm_paddr_t avail_end;   /* PA of last available physical page */
  188 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  189 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  190 int pgeflag = 0;                /* PG_G or-in */
  191 int pseflag = 0;                /* PG_PS or-in */
  192 
  193 static int nkpt;
  194 vm_offset_t kernel_vm_end;
  195 extern u_int32_t KERNend;
  196 
  197 #ifdef PAE
  198 static uma_zone_t pdptzone;
  199 #endif
  200 
  201 /*
  202  * Data for the pv entry allocation mechanism
  203  */
  204 static uma_zone_t pvzone;
  205 static struct vm_object pvzone_obj;
  206 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  207 int pmap_pagedaemon_waken;
  208 
  209 /*
  210  * All those kernel PT submaps that BSD is so fond of
  211  */
  212 struct sysmaps {
  213         struct  mtx lock;
  214         pt_entry_t *CMAP1;
  215         pt_entry_t *CMAP2;
  216         caddr_t CADDR1;
  217         caddr_t CADDR2;
  218 };
  219 static struct sysmaps sysmaps_pcpu[MAXCPU];
  220 pt_entry_t *CMAP1 = 0;
  221 static pt_entry_t *CMAP3;
  222 caddr_t CADDR1 = 0, ptvmmap = 0;
  223 static caddr_t CADDR3;
  224 struct msgbuf *msgbufp = 0;
  225 
  226 /*
  227  * Crashdump maps.
  228  */
  229 static caddr_t crashdumpmap;
  230 
  231 #ifdef SMP
  232 extern pt_entry_t *SMPpt;
  233 #endif
  234 static pt_entry_t *PMAP1 = 0, *PMAP2;
  235 static pt_entry_t *PADDR1 = 0, *PADDR2;
  236 #ifdef SMP
  237 static int PMAP1cpu;
  238 static int PMAP1changedcpu;
  239 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
  240            &PMAP1changedcpu, 0,
  241            "Number of times pmap_pte_quick changed CPU with same PMAP1");
  242 #endif
  243 static int PMAP1changed;
  244 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
  245            &PMAP1changed, 0,
  246            "Number of times pmap_pte_quick changed PMAP1");
  247 static int PMAP1unchanged;
  248 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
  249            &PMAP1unchanged, 0,
  250            "Number of times pmap_pte_quick didn't change PMAP1");
  251 static struct mtx PMAP2mutex;
  252 
  253 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
  254 static pv_entry_t get_pv_entry(void);
  255 static void     pmap_clear_ptes(vm_page_t m, int bit);
  256 
  257 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
  258 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
  259 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  260                                         vm_offset_t va);
  261 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
  262 
  263 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
  264 
  265 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
  266 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m);
  267 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
  268 static void pmap_pte_release(pt_entry_t *pte);
  269 static int pmap_unuse_pt(pmap_t, vm_offset_t);
  270 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  271 #ifdef PAE
  272 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  273 #endif
  274 
  275 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
  276 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  277 
  278 /*
  279  * Move the kernel virtual free pointer to the next
  280  * 4MB.  This is used to help improve performance
  281  * by using a large (4MB) page for much of the kernel
  282  * (.text, .data, .bss)
  283  */
  284 static vm_offset_t
  285 pmap_kmem_choose(vm_offset_t addr)
  286 {
  287         vm_offset_t newaddr = addr;
  288 
  289 #ifndef DISABLE_PSE
  290         if (cpu_feature & CPUID_PSE)
  291                 newaddr = (addr + PDRMASK) & ~PDRMASK;
  292 #endif
  293         return newaddr;
  294 }
  295 
  296 /*
  297  *      Bootstrap the system enough to run with virtual memory.
  298  *
  299  *      On the i386 this is called after mapping has already been enabled
  300  *      and just syncs the pmap module with what has already been done.
  301  *      [We can't call it easily with mapping off since the kernel is not
  302  *      mapped with PA == VA, hence we would have to relocate every address
  303  *      from the linked base (virtual) address "KERNBASE" to the actual
  304  *      (physical) address starting relative to 0]
  305  */
  306 void
  307 pmap_bootstrap(firstaddr, loadaddr)
  308         vm_paddr_t firstaddr;
  309         vm_paddr_t loadaddr;
  310 {
  311         vm_offset_t va;
  312         pt_entry_t *pte, *unused;
  313         struct sysmaps *sysmaps;
  314         int i;
  315 
  316         /*
  317          * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  318          * large. It should instead be correctly calculated in locore.s and
  319          * not based on 'first' (which is a physical address, not a virtual
  320          * address, for the start of unused physical memory). The kernel
  321          * page tables are NOT double mapped and thus should not be included
  322          * in this calculation.
  323          */
  324         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  325         virtual_avail = pmap_kmem_choose(virtual_avail);
  326 
  327         virtual_end = VM_MAX_KERNEL_ADDRESS;
  328 
  329         /*
  330          * Initialize the kernel pmap (which is statically allocated).
  331          */
  332         PMAP_LOCK_INIT(kernel_pmap);
  333         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
  334 #ifdef PAE
  335         kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
  336 #endif
  337         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  338         TAILQ_INIT(&kernel_pmap->pm_pvlist);
  339         LIST_INIT(&allpmaps);
  340         mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
  341         mtx_lock_spin(&allpmaps_lock);
  342         LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
  343         mtx_unlock_spin(&allpmaps_lock);
  344         nkpt = NKPT;
  345 
  346         /*
  347          * Reserve some special page table entries/VA space for temporary
  348          * mapping of pages.
  349          */
  350 #define SYSMAP(c, p, v, n)      \
  351         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  352 
  353         va = virtual_avail;
  354         pte = vtopte(va);
  355 
  356         /*
  357          * CMAP1/CMAP2 are used for zeroing and copying pages.
  358          * CMAP3 is used for the idle process page zeroing.
  359          */
  360         for (i = 0; i < MAXCPU; i++) {
  361                 sysmaps = &sysmaps_pcpu[i];
  362                 mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
  363                 SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
  364                 SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
  365         }
  366         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  367         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  368         *CMAP3 = 0;
  369 
  370         /*
  371          * Crashdump maps.
  372          */
  373         SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
  374 
  375         /*
  376          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  377          */
  378         SYSMAP(caddr_t, unused, ptvmmap, 1)
  379 
  380         /*
  381          * msgbufp is used to map the system message buffer.
  382          */
  383         SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
  384 
  385         /*
  386          * ptemap is used for pmap_pte_quick
  387          */
  388         SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
  389         SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
  390 
  391         mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
  392 
  393         virtual_avail = va;
  394 
  395         *CMAP1 = 0;
  396         for (i = 0; i < NKPT; i++)
  397                 PTD[i] = 0;
  398 
  399         /* Turn on PG_G on kernel page(s) */
  400         pmap_set_pg();
  401 }
  402 
  403 /*
  404  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  405  */
  406 void
  407 pmap_set_pg(void)
  408 {
  409         pd_entry_t pdir;
  410         pt_entry_t *pte;
  411         vm_offset_t va, endva;
  412         int i; 
  413 
  414         if (pgeflag == 0)
  415                 return;
  416 
  417         i = KERNLOAD/NBPDR;
  418         endva = KERNBASE + KERNend;
  419 
  420         if (pseflag) {
  421                 va = KERNBASE + KERNLOAD;
  422                 while (va  < endva) {
  423                         pdir = kernel_pmap->pm_pdir[KPTDI+i];
  424                         pdir |= pgeflag;
  425                         kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
  426                         invltlb();      /* Play it safe, invltlb() every time */
  427                         i++;
  428                         va += NBPDR;
  429                 }
  430         } else {
  431                 va = (vm_offset_t)btext;
  432                 while (va < endva) {
  433                         pte = vtopte(va);
  434                         if (*pte)
  435                                 *pte |= pgeflag;
  436                         invltlb();      /* Play it safe, invltlb() every time */
  437                         va += PAGE_SIZE;
  438                 }
  439         }
  440 }
  441 
  442 /*
  443  * Initialize a vm_page's machine-dependent fields.
  444  */
  445 void
  446 pmap_page_init(vm_page_t m)
  447 {
  448 
  449         TAILQ_INIT(&m->md.pv_list);
  450         m->md.pv_list_count = 0;
  451 }
  452 
  453 #ifdef PAE
  454 
  455 static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt");
  456 
  457 static void *
  458 pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  459 {
  460         *flags = UMA_SLAB_PRIV;
  461         return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL,
  462             1, 0));
  463 }
  464 #endif
  465 
  466 /*
  467  *      Initialize the pmap module.
  468  *      Called by vm_init, to initialize any structures that the pmap
  469  *      system needs to map virtual memory.
  470  */
  471 void
  472 pmap_init(void)
  473 {
  474 
  475         /*
  476          * init the pv free list
  477          */
  478         pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
  479             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
  480         uma_prealloc(pvzone, MINPV);
  481 
  482 #ifdef PAE
  483         pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
  484             NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
  485             UMA_ZONE_VM | UMA_ZONE_NOFREE);
  486         uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
  487 #endif
  488 }
  489 
  490 /*
  491  * Initialize the address space (zone) for the pv_entries.  Set a
  492  * high water mark so that the system can recover from excessive
  493  * numbers of pv entries.
  494  */
  495 void
  496 pmap_init2()
  497 {
  498         int shpgperproc = PMAP_SHPGPERPROC;
  499 
  500         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  501         pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  502         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  503         pv_entry_high_water = 9 * (pv_entry_max / 10);
  504         uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
  505 }
  506 
  507 
  508 /***************************************************
  509  * Low level helper routines.....
  510  ***************************************************/
  511 
  512 #if defined(PMAP_DIAGNOSTIC)
  513 
  514 /*
  515  * This code checks for non-writeable/modified pages.
  516  * This should be an invalid condition.
  517  */
  518 static int
  519 pmap_nw_modified(pt_entry_t ptea)
  520 {
  521         int pte;
  522 
  523         pte = (int) ptea;
  524 
  525         if ((pte & (PG_M|PG_RW)) == PG_M)
  526                 return 1;
  527         else
  528                 return 0;
  529 }
  530 #endif
  531 
  532 
  533 /*
  534  * this routine defines the region(s) of memory that should
  535  * not be tested for the modified bit.
  536  */
  537 static PMAP_INLINE int
  538 pmap_track_modified(vm_offset_t va)
  539 {
  540         if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
  541                 return 1;
  542         else
  543                 return 0;
  544 }
  545 
  546 #ifdef SMP
  547 /*
  548  * For SMP, these functions have to use the IPI mechanism for coherence.
  549  */
  550 void
  551 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  552 {
  553         u_int cpumask;
  554         u_int other_cpus;
  555 
  556         if (smp_started) {
  557                 if (!(read_eflags() & PSL_I))
  558                         panic("%s: interrupts disabled", __func__);
  559                 mtx_lock_spin(&smp_ipi_mtx);
  560         } else
  561                 critical_enter();
  562         /*
  563          * We need to disable interrupt preemption but MUST NOT have
  564          * interrupts disabled here.
  565          * XXX we may need to hold schedlock to get a coherent pm_active
  566          * XXX critical sections disable interrupts again
  567          */
  568         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  569                 invlpg(va);
  570                 smp_invlpg(va);
  571         } else {
  572                 cpumask = PCPU_GET(cpumask);
  573                 other_cpus = PCPU_GET(other_cpus);
  574                 if (pmap->pm_active & cpumask)
  575                         invlpg(va);
  576                 if (pmap->pm_active & other_cpus)
  577                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  578         }
  579         if (smp_started)
  580                 mtx_unlock_spin(&smp_ipi_mtx);
  581         else
  582                 critical_exit();
  583 }
  584 
  585 void
  586 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  587 {
  588         u_int cpumask;
  589         u_int other_cpus;
  590         vm_offset_t addr;
  591 
  592         if (smp_started) {
  593                 if (!(read_eflags() & PSL_I))
  594                         panic("%s: interrupts disabled", __func__);
  595                 mtx_lock_spin(&smp_ipi_mtx);
  596         } else
  597                 critical_enter();
  598         /*
  599          * We need to disable interrupt preemption but MUST NOT have
  600          * interrupts disabled here.
  601          * XXX we may need to hold schedlock to get a coherent pm_active
  602          * XXX critical sections disable interrupts again
  603          */
  604         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  605                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  606                         invlpg(addr);
  607                 smp_invlpg_range(sva, eva);
  608         } else {
  609                 cpumask = PCPU_GET(cpumask);
  610                 other_cpus = PCPU_GET(other_cpus);
  611                 if (pmap->pm_active & cpumask)
  612                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  613                                 invlpg(addr);
  614                 if (pmap->pm_active & other_cpus)
  615                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  616                             sva, eva);
  617         }
  618         if (smp_started)
  619                 mtx_unlock_spin(&smp_ipi_mtx);
  620         else
  621                 critical_exit();
  622 }
  623 
  624 void
  625 pmap_invalidate_all(pmap_t pmap)
  626 {
  627         u_int cpumask;
  628         u_int other_cpus;
  629 
  630         if (smp_started) {
  631                 if (!(read_eflags() & PSL_I))
  632                         panic("%s: interrupts disabled", __func__);
  633                 mtx_lock_spin(&smp_ipi_mtx);
  634         } else
  635                 critical_enter();
  636         /*
  637          * We need to disable interrupt preemption but MUST NOT have
  638          * interrupts disabled here.
  639          * XXX we may need to hold schedlock to get a coherent pm_active
  640          * XXX critical sections disable interrupts again
  641          */
  642         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  643                 invltlb();
  644                 smp_invltlb();
  645         } else {
  646                 cpumask = PCPU_GET(cpumask);
  647                 other_cpus = PCPU_GET(other_cpus);
  648                 if (pmap->pm_active & cpumask)
  649                         invltlb();
  650                 if (pmap->pm_active & other_cpus)
  651                         smp_masked_invltlb(pmap->pm_active & other_cpus);
  652         }
  653         if (smp_started)
  654                 mtx_unlock_spin(&smp_ipi_mtx);
  655         else
  656                 critical_exit();
  657 }
  658 #else /* !SMP */
  659 /*
  660  * Normal, non-SMP, 486+ invalidation functions.
  661  * We inline these within pmap.c for speed.
  662  */
  663 PMAP_INLINE void
  664 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  665 {
  666 
  667         if (pmap == kernel_pmap || pmap->pm_active)
  668                 invlpg(va);
  669 }
  670 
  671 PMAP_INLINE void
  672 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  673 {
  674         vm_offset_t addr;
  675 
  676         if (pmap == kernel_pmap || pmap->pm_active)
  677                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  678                         invlpg(addr);
  679 }
  680 
  681 PMAP_INLINE void
  682 pmap_invalidate_all(pmap_t pmap)
  683 {
  684 
  685         if (pmap == kernel_pmap || pmap->pm_active)
  686                 invltlb();
  687 }
  688 #endif /* !SMP */
  689 
  690 /*
  691  * Are we current address space or kernel?  N.B. We return FALSE when
  692  * a pmap's page table is in use because a kernel thread is borrowing
  693  * it.  The borrowed page table can change spontaneously, making any
  694  * dependence on its continued use subject to a race condition.
  695  */
  696 static __inline int
  697 pmap_is_current(pmap_t pmap)
  698 {
  699 
  700         return (pmap == kernel_pmap ||
  701                 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
  702             (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
  703 }
  704 
  705 /*
  706  * If the given pmap is not the current or kernel pmap, the returned pte must
  707  * be released by passing it to pmap_pte_release().
  708  */
  709 pt_entry_t *
  710 pmap_pte(pmap_t pmap, vm_offset_t va)
  711 {
  712         pd_entry_t newpf;
  713         pd_entry_t *pde;
  714 
  715         pde = pmap_pde(pmap, va);
  716         if (*pde & PG_PS)
  717                 return (pde);
  718         if (*pde != 0) {
  719                 /* are we current address space or kernel? */
  720                 if (pmap_is_current(pmap))
  721                         return (vtopte(va));
  722                 mtx_lock(&PMAP2mutex);
  723                 newpf = *pde & PG_FRAME;
  724                 if ((*PMAP2 & PG_FRAME) != newpf) {
  725                         *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
  726                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
  727                 }
  728                 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
  729         }
  730         return (0);
  731 }
  732 
  733 /*
  734  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
  735  * being NULL.
  736  */
  737 static __inline void
  738 pmap_pte_release(pt_entry_t *pte)
  739 {
  740 
  741         if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
  742                 mtx_unlock(&PMAP2mutex);
  743 }
  744 
  745 static __inline void
  746 invlcaddr(void *caddr)
  747 {
  748 
  749         invlpg((u_int)caddr);
  750 }
  751 
  752 /*
  753  * Super fast pmap_pte routine best used when scanning
  754  * the pv lists.  This eliminates many coarse-grained
  755  * invltlb calls.  Note that many of the pv list
  756  * scans are across different pmaps.  It is very wasteful
  757  * to do an entire invltlb for checking a single mapping.
  758  *
  759  * If the given pmap is not the current pmap, vm_page_queue_mtx
  760  * must be held and curthread pinned to a CPU.
  761  */
  762 static pt_entry_t *
  763 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
  764 {
  765         pd_entry_t newpf;
  766         pd_entry_t *pde;
  767 
  768         pde = pmap_pde(pmap, va);
  769         if (*pde & PG_PS)
  770                 return (pde);
  771         if (*pde != 0) {
  772                 /* are we current address space or kernel? */
  773                 if (pmap_is_current(pmap))
  774                         return (vtopte(va));
  775                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  776                 KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
  777                 newpf = *pde & PG_FRAME;
  778                 if ((*PMAP1 & PG_FRAME) != newpf) {
  779                         *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
  780 #ifdef SMP
  781                         PMAP1cpu = PCPU_GET(cpuid);
  782 #endif
  783                         invlcaddr(PADDR1);
  784                         PMAP1changed++;
  785                 } else
  786 #ifdef SMP
  787                 if (PMAP1cpu != PCPU_GET(cpuid)) {
  788                         PMAP1cpu = PCPU_GET(cpuid);
  789                         invlcaddr(PADDR1);
  790                         PMAP1changedcpu++;
  791                 } else
  792 #endif
  793                         PMAP1unchanged++;
  794                 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
  795         }
  796         return (0);
  797 }
  798 
  799 /*
  800  *      Routine:        pmap_extract
  801  *      Function:
  802  *              Extract the physical page address associated
  803  *              with the given map/virtual_address pair.
  804  */
  805 vm_paddr_t 
  806 pmap_extract(pmap_t pmap, vm_offset_t va)
  807 {
  808         vm_paddr_t rtval;
  809         pt_entry_t *pte;
  810         pd_entry_t pde;
  811 
  812         rtval = 0;
  813         PMAP_LOCK(pmap);
  814         pde = pmap->pm_pdir[va >> PDRSHIFT];
  815         if (pde != 0) {
  816                 if ((pde & PG_PS) != 0) {
  817                         rtval = (pde & ~PDRMASK) | (va & PDRMASK);
  818                         PMAP_UNLOCK(pmap);
  819                         return rtval;
  820                 }
  821                 pte = pmap_pte(pmap, va);
  822                 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
  823                 pmap_pte_release(pte);
  824         }
  825         PMAP_UNLOCK(pmap);
  826         return (rtval);
  827 }
  828 
  829 /*
  830  *      Routine:        pmap_extract_and_hold
  831  *      Function:
  832  *              Atomically extract and hold the physical page
  833  *              with the given pmap and virtual address pair
  834  *              if that mapping permits the given protection.
  835  */
  836 vm_page_t
  837 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
  838 {
  839         pd_entry_t pde;
  840         pt_entry_t pte;
  841         vm_page_t m;
  842 
  843         m = NULL;
  844         vm_page_lock_queues();
  845         PMAP_LOCK(pmap);
  846         pde = *pmap_pde(pmap, va);
  847         if (pde != 0) {
  848                 if (pde & PG_PS) {
  849                         if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
  850                                 m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
  851                                     (va & PDRMASK));
  852                                 vm_page_hold(m);
  853                         }
  854                 } else {
  855                         sched_pin();
  856                         pte = *pmap_pte_quick(pmap, va);
  857                         if (pte != 0 &&
  858                             ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
  859                                 m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
  860                                 vm_page_hold(m);
  861                         }
  862                         sched_unpin();
  863                 }
  864         }
  865         vm_page_unlock_queues();
  866         PMAP_UNLOCK(pmap);
  867         return (m);
  868 }
  869 
  870 /***************************************************
  871  * Low level mapping routines.....
  872  ***************************************************/
  873 
  874 /*
  875  * Add a wired page to the kva.
  876  * Note: not SMP coherent.
  877  */
  878 PMAP_INLINE void 
  879 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  880 {
  881         pt_entry_t *pte;
  882 
  883         pte = vtopte(va);
  884         pte_store(pte, pa | PG_RW | PG_V | pgeflag);
  885 }
  886 
  887 /*
  888  * Remove a page from the kernel pagetables.
  889  * Note: not SMP coherent.
  890  */
  891 PMAP_INLINE void
  892 pmap_kremove(vm_offset_t va)
  893 {
  894         pt_entry_t *pte;
  895 
  896         pte = vtopte(va);
  897         pte_clear(pte);
  898 }
  899 
  900 /*
  901  *      Used to map a range of physical addresses into kernel
  902  *      virtual address space.
  903  *
  904  *      The value passed in '*virt' is a suggested virtual address for
  905  *      the mapping. Architectures which can support a direct-mapped
  906  *      physical to virtual region can return the appropriate address
  907  *      within that region, leaving '*virt' unchanged. Other
  908  *      architectures should map the pages starting at '*virt' and
  909  *      update '*virt' with the first usable address after the mapped
  910  *      region.
  911  */
  912 vm_offset_t
  913 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
  914 {
  915         vm_offset_t va, sva;
  916 
  917         va = sva = *virt;
  918         while (start < end) {
  919                 pmap_kenter(va, start);
  920                 va += PAGE_SIZE;
  921                 start += PAGE_SIZE;
  922         }
  923         pmap_invalidate_range(kernel_pmap, sva, va);
  924         *virt = va;
  925         return (sva);
  926 }
  927 
  928 
  929 /*
  930  * Add a list of wired pages to the kva
  931  * this routine is only used for temporary
  932  * kernel mappings that do not need to have
  933  * page modification or references recorded.
  934  * Note that old mappings are simply written
  935  * over.  The page *must* be wired.
  936  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  937  */
  938 void
  939 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
  940 {
  941         vm_offset_t va;
  942 
  943         va = sva;
  944         while (count-- > 0) {
  945                 pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
  946                 va += PAGE_SIZE;
  947                 m++;
  948         }
  949         pmap_invalidate_range(kernel_pmap, sva, va);
  950 }
  951 
  952 /*
  953  * This routine tears out page mappings from the
  954  * kernel -- it is meant only for temporary mappings.
  955  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  956  */
  957 void
  958 pmap_qremove(vm_offset_t sva, int count)
  959 {
  960         vm_offset_t va;
  961 
  962         va = sva;
  963         while (count-- > 0) {
  964                 pmap_kremove(va);
  965                 va += PAGE_SIZE;
  966         }
  967         pmap_invalidate_range(kernel_pmap, sva, va);
  968 }
  969 
  970 /***************************************************
  971  * Page table page management routines.....
  972  ***************************************************/
  973 
  974 /*
  975  * This routine unholds page table pages, and if the hold count
  976  * drops to zero, then it decrements the wire count.
  977  */
  978 static PMAP_INLINE int
  979 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
  980 {
  981 
  982         --m->wire_count;
  983         if (m->wire_count == 0)
  984                 return _pmap_unwire_pte_hold(pmap, m);
  985         else
  986                 return 0;
  987 }
  988 
  989 static int 
  990 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
  991 {
  992         vm_offset_t pteva;
  993 
  994         /*
  995          * unmap the page table page
  996          */
  997         pmap->pm_pdir[m->pindex] = 0;
  998         --pmap->pm_stats.resident_count;
  999 
 1000         /*
 1001          * Do an invltlb to make the invalidated mapping
 1002          * take effect immediately.
 1003          */
 1004         pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 1005         pmap_invalidate_page(pmap, pteva);
 1006 
 1007         vm_page_free_zero(m);
 1008         atomic_subtract_int(&cnt.v_wire_count, 1);
 1009         return 1;
 1010 }
 1011 
 1012 /*
 1013  * After removing a page table entry, this routine is used to
 1014  * conditionally free the page, and manage the hold/wire counts.
 1015  */
 1016 static int
 1017 pmap_unuse_pt(pmap_t pmap, vm_offset_t va)
 1018 {
 1019         pd_entry_t ptepde;
 1020         vm_page_t mpte;
 1021 
 1022         if (va >= VM_MAXUSER_ADDRESS)
 1023                 return 0;
 1024         ptepde = *pmap_pde(pmap, va);
 1025         mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 1026         return pmap_unwire_pte_hold(pmap, mpte);
 1027 }
 1028 
 1029 void
 1030 pmap_pinit0(pmap)
 1031         struct pmap *pmap;
 1032 {
 1033 
 1034         PMAP_LOCK_INIT(pmap);
 1035         pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 1036 #ifdef PAE
 1037         pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 1038 #endif
 1039         pmap->pm_active = 0;
 1040         PCPU_SET(curpmap, pmap);
 1041         TAILQ_INIT(&pmap->pm_pvlist);
 1042         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1043         mtx_lock_spin(&allpmaps_lock);
 1044         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1045         mtx_unlock_spin(&allpmaps_lock);
 1046 }
 1047 
 1048 /*
 1049  * Initialize a preallocated and zeroed pmap structure,
 1050  * such as one in a vmspace structure.
 1051  */
 1052 void
 1053 pmap_pinit(pmap)
 1054         register struct pmap *pmap;
 1055 {
 1056         vm_page_t m, ptdpg[NPGPTD];
 1057         vm_paddr_t pa;
 1058         static int color;
 1059         int i;
 1060 
 1061         PMAP_LOCK_INIT(pmap);
 1062 
 1063         /*
 1064          * No need to allocate page table space yet but we do need a valid
 1065          * page directory table.
 1066          */
 1067         if (pmap->pm_pdir == NULL) {
 1068                 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 1069                     NBPTD);
 1070 #ifdef PAE
 1071                 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 1072                 KASSERT(((vm_offset_t)pmap->pm_pdpt &
 1073                     ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 1074                     ("pmap_pinit: pdpt misaligned"));
 1075                 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 1076                     ("pmap_pinit: pdpt above 4g"));
 1077 #endif
 1078         }
 1079 
 1080         /*
 1081          * allocate the page directory page(s)
 1082          */
 1083         for (i = 0; i < NPGPTD;) {
 1084                 m = vm_page_alloc(NULL, color++,
 1085                     VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1086                     VM_ALLOC_ZERO);
 1087                 if (m == NULL)
 1088                         VM_WAIT;
 1089                 else {
 1090                         ptdpg[i++] = m;
 1091                 }
 1092         }
 1093 
 1094         pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 1095 
 1096         for (i = 0; i < NPGPTD; i++) {
 1097                 if ((ptdpg[i]->flags & PG_ZERO) == 0)
 1098                         bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 1099         }
 1100 
 1101         mtx_lock_spin(&allpmaps_lock);
 1102         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1103         mtx_unlock_spin(&allpmaps_lock);
 1104         /* Wire in kernel global address entries. */
 1105         /* XXX copies current process, does not fill in MPPTDI */
 1106         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 1107 #ifdef SMP
 1108         pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1109 #endif
 1110 
 1111         /* install self-referential address mapping entry(s) */
 1112         for (i = 0; i < NPGPTD; i++) {
 1113                 pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 1114                 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 1115 #ifdef PAE
 1116                 pmap->pm_pdpt[i] = pa | PG_V;
 1117 #endif
 1118         }
 1119 
 1120         pmap->pm_active = 0;
 1121         TAILQ_INIT(&pmap->pm_pvlist);
 1122         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1123 }
 1124 
 1125 /*
 1126  * this routine is called if the page table page is not
 1127  * mapped correctly.
 1128  */
 1129 static vm_page_t
 1130 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
 1131 {
 1132         vm_paddr_t ptepa;
 1133         vm_page_t m;
 1134 
 1135         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1136             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1137             ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1138 
 1139         /*
 1140          * Allocate a page table page.
 1141          */
 1142         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1143             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1144                 if (flags & M_WAITOK) {
 1145                         PMAP_UNLOCK(pmap);
 1146                         vm_page_unlock_queues();
 1147                         VM_WAIT;
 1148                         vm_page_lock_queues();
 1149                         PMAP_LOCK(pmap);
 1150                 }
 1151 
 1152                 /*
 1153                  * Indicate the need to retry.  While waiting, the page table
 1154                  * page may have been allocated.
 1155                  */
 1156                 return (NULL);
 1157         }
 1158         if ((m->flags & PG_ZERO) == 0)
 1159                 pmap_zero_page(m);
 1160 
 1161         /*
 1162          * Map the pagetable page into the process address space, if
 1163          * it isn't already there.
 1164          */
 1165 
 1166         pmap->pm_stats.resident_count++;
 1167 
 1168         ptepa = VM_PAGE_TO_PHYS(m);
 1169         pmap->pm_pdir[ptepindex] =
 1170                 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1171 
 1172         return m;
 1173 }
 1174 
 1175 static vm_page_t
 1176 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 1177 {
 1178         unsigned ptepindex;
 1179         pd_entry_t ptepa;
 1180         vm_page_t m;
 1181 
 1182         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1183             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1184             ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1185 
 1186         /*
 1187          * Calculate pagetable page index
 1188          */
 1189         ptepindex = va >> PDRSHIFT;
 1190 retry:
 1191         /*
 1192          * Get the page directory entry
 1193          */
 1194         ptepa = pmap->pm_pdir[ptepindex];
 1195 
 1196         /*
 1197          * This supports switching from a 4MB page to a
 1198          * normal 4K page.
 1199          */
 1200         if (ptepa & PG_PS) {
 1201                 pmap->pm_pdir[ptepindex] = 0;
 1202                 ptepa = 0;
 1203                 pmap_invalidate_all(kernel_pmap);
 1204         }
 1205 
 1206         /*
 1207          * If the page table page is mapped, we just increment the
 1208          * hold count, and activate it.
 1209          */
 1210         if (ptepa) {
 1211                 m = PHYS_TO_VM_PAGE(ptepa);
 1212                 m->wire_count++;
 1213         } else {
 1214                 /*
 1215                  * Here if the pte page isn't mapped, or if it has
 1216                  * been deallocated. 
 1217                  */
 1218                 m = _pmap_allocpte(pmap, ptepindex, flags);
 1219                 if (m == NULL && (flags & M_WAITOK))
 1220                         goto retry;
 1221         }
 1222         return (m);
 1223 }
 1224 
 1225 
 1226 /***************************************************
 1227 * Pmap allocation/deallocation routines.
 1228  ***************************************************/
 1229 
 1230 #ifdef SMP
 1231 /*
 1232  * Deal with a SMP shootdown of other users of the pmap that we are
 1233  * trying to dispose of.  This can be a bit hairy.
 1234  */
 1235 static u_int *lazymask;
 1236 static u_int lazyptd;
 1237 static volatile u_int lazywait;
 1238 
 1239 void pmap_lazyfix_action(void);
 1240 
 1241 void
 1242 pmap_lazyfix_action(void)
 1243 {
 1244         u_int mymask = PCPU_GET(cpumask);
 1245 
 1246         if (rcr3() == lazyptd)
 1247                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1248         atomic_clear_int(lazymask, mymask);
 1249         atomic_store_rel_int(&lazywait, 1);
 1250 }
 1251 
 1252 static void
 1253 pmap_lazyfix_self(u_int mymask)
 1254 {
 1255 
 1256         if (rcr3() == lazyptd)
 1257                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1258         atomic_clear_int(lazymask, mymask);
 1259 }
 1260 
 1261 
 1262 static void
 1263 pmap_lazyfix(pmap_t pmap)
 1264 {
 1265         u_int mymask;
 1266         u_int mask;
 1267         register u_int spins;
 1268 
 1269         while ((mask = pmap->pm_active) != 0) {
 1270                 spins = 50000000;
 1271                 mask = mask & -mask;    /* Find least significant set bit */
 1272                 mtx_lock_spin(&smp_ipi_mtx);
 1273 #ifdef PAE
 1274                 lazyptd = vtophys(pmap->pm_pdpt);
 1275 #else
 1276                 lazyptd = vtophys(pmap->pm_pdir);
 1277 #endif
 1278                 mymask = PCPU_GET(cpumask);
 1279                 if (mask == mymask) {
 1280                         lazymask = &pmap->pm_active;
 1281                         pmap_lazyfix_self(mymask);
 1282                 } else {
 1283                         atomic_store_rel_int((u_int *)&lazymask,
 1284                             (u_int)&pmap->pm_active);
 1285                         atomic_store_rel_int(&lazywait, 0);
 1286                         ipi_selected(mask, IPI_LAZYPMAP);
 1287                         while (lazywait == 0) {
 1288                                 ia32_pause();
 1289                                 if (--spins == 0)
 1290                                         break;
 1291                         }
 1292                 }
 1293                 mtx_unlock_spin(&smp_ipi_mtx);
 1294                 if (spins == 0)
 1295                         printf("pmap_lazyfix: spun for 50000000\n");
 1296         }
 1297 }
 1298 
 1299 #else   /* SMP */
 1300 
 1301 /*
 1302  * Cleaning up on uniprocessor is easy.  For various reasons, we're
 1303  * unlikely to have to even execute this code, including the fact
 1304  * that the cleanup is deferred until the parent does a wait(2), which
 1305  * means that another userland process has run.
 1306  */
 1307 static void
 1308 pmap_lazyfix(pmap_t pmap)
 1309 {
 1310         u_int cr3;
 1311 
 1312         cr3 = vtophys(pmap->pm_pdir);
 1313         if (cr3 == rcr3()) {
 1314                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1315                 pmap->pm_active &= ~(PCPU_GET(cpumask));
 1316         }
 1317 }
 1318 #endif  /* SMP */
 1319 
 1320 /*
 1321  * Release any resources held by the given physical map.
 1322  * Called when a pmap initialized by pmap_pinit is being released.
 1323  * Should only be called if the map contains no valid mappings.
 1324  */
 1325 void
 1326 pmap_release(pmap_t pmap)
 1327 {
 1328         vm_page_t m, ptdpg[NPGPTD];
 1329         int i;
 1330 
 1331         KASSERT(pmap->pm_stats.resident_count == 0,
 1332             ("pmap_release: pmap resident count %ld != 0",
 1333             pmap->pm_stats.resident_count));
 1334 
 1335         pmap_lazyfix(pmap);
 1336         mtx_lock_spin(&allpmaps_lock);
 1337         LIST_REMOVE(pmap, pm_list);
 1338         mtx_unlock_spin(&allpmaps_lock);
 1339 
 1340         for (i = 0; i < NPGPTD; i++)
 1341                 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]);
 1342 
 1343         bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 1344             sizeof(*pmap->pm_pdir));
 1345 #ifdef SMP
 1346         pmap->pm_pdir[MPPTDI] = 0;
 1347 #endif
 1348 
 1349         pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 1350 
 1351         vm_page_lock_queues();
 1352         for (i = 0; i < NPGPTD; i++) {
 1353                 m = ptdpg[i];
 1354 #ifdef PAE
 1355                 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 1356                     ("pmap_release: got wrong ptd page"));
 1357 #endif
 1358                 m->wire_count--;
 1359                 atomic_subtract_int(&cnt.v_wire_count, 1);
 1360                 vm_page_free_zero(m);
 1361         }
 1362         vm_page_unlock_queues();
 1363         PMAP_LOCK_DESTROY(pmap);
 1364 }
 1365 
 1366 static int
 1367 kvm_size(SYSCTL_HANDLER_ARGS)
 1368 {
 1369         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1370 
 1371         return sysctl_handle_long(oidp, &ksize, 0, req);
 1372 }
 1373 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1374     0, 0, kvm_size, "IU", "Size of KVM");
 1375 
 1376 static int
 1377 kvm_free(SYSCTL_HANDLER_ARGS)
 1378 {
 1379         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1380 
 1381         return sysctl_handle_long(oidp, &kfree, 0, req);
 1382 }
 1383 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1384     0, 0, kvm_free, "IU", "Amount of KVM free");
 1385 
 1386 /*
 1387  * grow the number of kernel page table entries, if needed
 1388  */
 1389 void
 1390 pmap_growkernel(vm_offset_t addr)
 1391 {
 1392         struct pmap *pmap;
 1393         vm_paddr_t ptppaddr;
 1394         vm_page_t nkpg;
 1395         pd_entry_t newpdir;
 1396         pt_entry_t *pde;
 1397 
 1398         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 1399         if (kernel_vm_end == 0) {
 1400                 kernel_vm_end = KERNBASE;
 1401                 nkpt = 0;
 1402                 while (pdir_pde(PTD, kernel_vm_end)) {
 1403                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1404                         nkpt++;
 1405                 }
 1406         }
 1407         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 1408         while (kernel_vm_end < addr) {
 1409                 if (pdir_pde(PTD, kernel_vm_end)) {
 1410                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1411                         continue;
 1412                 }
 1413 
 1414                 /*
 1415                  * This index is bogus, but out of the way
 1416                  */
 1417                 nkpg = vm_page_alloc(NULL, nkpt,
 1418                     VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 1419                 if (!nkpg)
 1420                         panic("pmap_growkernel: no memory to grow kernel");
 1421 
 1422                 nkpt++;
 1423 
 1424                 pmap_zero_page(nkpg);
 1425                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1426                 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 1427                 pdir_pde(PTD, kernel_vm_end) = newpdir;
 1428 
 1429                 mtx_lock_spin(&allpmaps_lock);
 1430                 LIST_FOREACH(pmap, &allpmaps, pm_list) {
 1431                         pde = pmap_pde(pmap, kernel_vm_end);
 1432                         pde_store(pde, newpdir);
 1433                 }
 1434                 mtx_unlock_spin(&allpmaps_lock);
 1435                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1436         }
 1437 }
 1438 
 1439 
 1440 /***************************************************
 1441  * page management routines.
 1442  ***************************************************/
 1443 
 1444 /*
 1445  * free the pv_entry back to the free list
 1446  */
 1447 static PMAP_INLINE void
 1448 free_pv_entry(pv_entry_t pv)
 1449 {
 1450         pv_entry_count--;
 1451         uma_zfree(pvzone, pv);
 1452 }
 1453 
 1454 /*
 1455  * get a new pv_entry, allocating a block from the system
 1456  * when needed.
 1457  * the memory allocation is performed bypassing the malloc code
 1458  * because of the possibility of allocations at interrupt time.
 1459  */
 1460 static pv_entry_t
 1461 get_pv_entry(void)
 1462 {
 1463         pv_entry_count++;
 1464         if (pv_entry_high_water &&
 1465                 (pv_entry_count > pv_entry_high_water) &&
 1466                 (pmap_pagedaemon_waken == 0)) {
 1467                 pmap_pagedaemon_waken = 1;
 1468                 wakeup (&vm_pages_needed);
 1469         }
 1470         return uma_zalloc(pvzone, M_NOWAIT);
 1471 }
 1472 
 1473 
 1474 static int
 1475 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 1476 {
 1477         pv_entry_t pv;
 1478         int rtval;
 1479 
 1480         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1481         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1482         if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1483                 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1484                         if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1485                                 break;
 1486                 }
 1487         } else {
 1488                 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1489                         if (va == pv->pv_va) 
 1490                                 break;
 1491                 }
 1492         }
 1493 
 1494         rtval = 0;
 1495         if (pv) {
 1496                 rtval = pmap_unuse_pt(pmap, va);
 1497                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1498                 m->md.pv_list_count--;
 1499                 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1500                         vm_page_flag_clear(m, PG_WRITEABLE);
 1501 
 1502                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1503                 free_pv_entry(pv);
 1504         }
 1505                         
 1506         return rtval;
 1507 }
 1508 
 1509 /*
 1510  * Create a pv entry for page at pa for
 1511  * (pmap, va).
 1512  */
 1513 static void
 1514 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 1515 {
 1516         pv_entry_t pv;
 1517 
 1518         pv = get_pv_entry();
 1519         if (pv == NULL)
 1520                 panic("no pv entries: increase vm.pmap.shpgperproc");
 1521         pv->pv_va = va;
 1522         pv->pv_pmap = pmap;
 1523 
 1524         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1525         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1526         TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1527         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1528         m->md.pv_list_count++;
 1529 }
 1530 
 1531 /*
 1532  * pmap_remove_pte: do the things to unmap a page in a process
 1533  */
 1534 static int
 1535 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 1536 {
 1537         pt_entry_t oldpte;
 1538         vm_page_t m;
 1539 
 1540         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1541         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1542         oldpte = pte_load_clear(ptq);
 1543         if (oldpte & PG_W)
 1544                 pmap->pm_stats.wired_count -= 1;
 1545         /*
 1546          * Machines that don't support invlpg, also don't support
 1547          * PG_G.
 1548          */
 1549         if (oldpte & PG_G)
 1550                 pmap_invalidate_page(kernel_pmap, va);
 1551         pmap->pm_stats.resident_count -= 1;
 1552         if (oldpte & PG_MANAGED) {
 1553                 m = PHYS_TO_VM_PAGE(oldpte);
 1554                 if (oldpte & PG_M) {
 1555 #if defined(PMAP_DIAGNOSTIC)
 1556                         if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1557                                 printf(
 1558         "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1559                                     va, oldpte);
 1560                         }
 1561 #endif
 1562                         if (pmap_track_modified(va))
 1563                                 vm_page_dirty(m);
 1564                 }
 1565                 if (oldpte & PG_A)
 1566                         vm_page_flag_set(m, PG_REFERENCED);
 1567                 return pmap_remove_entry(pmap, m, va);
 1568         } else {
 1569                 return pmap_unuse_pt(pmap, va);
 1570         }
 1571 }
 1572 
 1573 /*
 1574  * Remove a single page from a process address space
 1575  */
 1576 static void
 1577 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 1578 {
 1579         pt_entry_t *pte;
 1580 
 1581         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1582         KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 1583         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1584         if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 1585                 return;
 1586         pmap_remove_pte(pmap, pte, va);
 1587         pmap_invalidate_page(pmap, va);
 1588 }
 1589 
 1590 /*
 1591  *      Remove the given range of addresses from the specified map.
 1592  *
 1593  *      It is assumed that the start and end are properly
 1594  *      rounded to the page size.
 1595  */
 1596 void
 1597 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1598 {
 1599         vm_offset_t pdnxt;
 1600         pd_entry_t ptpaddr;
 1601         pt_entry_t *pte;
 1602         int anyvalid;
 1603 
 1604         /*
 1605          * Perform an unsynchronized read.  This is, however, safe.
 1606          */
 1607         if (pmap->pm_stats.resident_count == 0)
 1608                 return;
 1609 
 1610         anyvalid = 0;
 1611 
 1612         vm_page_lock_queues();
 1613         sched_pin();
 1614         PMAP_LOCK(pmap);
 1615 
 1616         /*
 1617          * special handling of removing one page.  a very
 1618          * common operation and easy to short circuit some
 1619          * code.
 1620          */
 1621         if ((sva + PAGE_SIZE == eva) && 
 1622             ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 1623                 pmap_remove_page(pmap, sva);
 1624                 goto out;
 1625         }
 1626 
 1627         for (; sva < eva; sva = pdnxt) {
 1628                 unsigned pdirindex;
 1629 
 1630                 /*
 1631                  * Calculate index for next page table.
 1632                  */
 1633                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 1634                 if (pmap->pm_stats.resident_count == 0)
 1635                         break;
 1636 
 1637                 pdirindex = sva >> PDRSHIFT;
 1638                 ptpaddr = pmap->pm_pdir[pdirindex];
 1639 
 1640                 /*
 1641                  * Weed out invalid mappings. Note: we assume that the page
 1642                  * directory table is always allocated, and in kernel virtual.
 1643                  */
 1644                 if (ptpaddr == 0)
 1645                         continue;
 1646 
 1647                 /*
 1648                  * Check for large page.
 1649                  */
 1650                 if ((ptpaddr & PG_PS) != 0) {
 1651                         pmap->pm_pdir[pdirindex] = 0;
 1652                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1653                         anyvalid = 1;
 1654                         continue;
 1655                 }
 1656 
 1657                 /*
 1658                  * Limit our scan to either the end of the va represented
 1659                  * by the current page table page, or to the end of the
 1660                  * range being removed.
 1661                  */
 1662                 if (pdnxt > eva)
 1663                         pdnxt = eva;
 1664 
 1665                 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 1666                     sva += PAGE_SIZE) {
 1667                         if (*pte == 0)
 1668                                 continue;
 1669                         anyvalid = 1;
 1670                         if (pmap_remove_pte(pmap, pte, sva))
 1671                                 break;
 1672                 }
 1673         }
 1674 out:
 1675         sched_unpin();
 1676         vm_page_unlock_queues();
 1677         if (anyvalid)
 1678                 pmap_invalidate_all(pmap);
 1679         PMAP_UNLOCK(pmap);
 1680 }
 1681 
 1682 /*
 1683  *      Routine:        pmap_remove_all
 1684  *      Function:
 1685  *              Removes this physical page from
 1686  *              all physical maps in which it resides.
 1687  *              Reflects back modify bits to the pager.
 1688  *
 1689  *      Notes:
 1690  *              Original versions of this routine were very
 1691  *              inefficient because they iteratively called
 1692  *              pmap_remove (slow...)
 1693  */
 1694 
 1695 void
 1696 pmap_remove_all(vm_page_t m)
 1697 {
 1698         register pv_entry_t pv;
 1699         pt_entry_t *pte, tpte;
 1700 
 1701 #if defined(PMAP_DIAGNOSTIC)
 1702         /*
 1703          * XXX This makes pmap_remove_all() illegal for non-managed pages!
 1704          */
 1705         if (m->flags & PG_FICTITIOUS) {
 1706                 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
 1707                     VM_PAGE_TO_PHYS(m));
 1708         }
 1709 #endif
 1710         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1711         sched_pin();
 1712         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1713                 PMAP_LOCK(pv->pv_pmap);
 1714                 pv->pv_pmap->pm_stats.resident_count--;
 1715                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1716                 tpte = pte_load_clear(pte);
 1717                 if (tpte & PG_W)
 1718                         pv->pv_pmap->pm_stats.wired_count--;
 1719                 if (tpte & PG_A)
 1720                         vm_page_flag_set(m, PG_REFERENCED);
 1721 
 1722                 /*
 1723                  * Update the vm_page_t clean and reference bits.
 1724                  */
 1725                 if (tpte & PG_M) {
 1726 #if defined(PMAP_DIAGNOSTIC)
 1727                         if (pmap_nw_modified((pt_entry_t) tpte)) {
 1728                                 printf(
 1729         "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1730                                     pv->pv_va, tpte);
 1731                         }
 1732 #endif
 1733                         if (pmap_track_modified(pv->pv_va))
 1734                                 vm_page_dirty(m);
 1735                 }
 1736                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 1737                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1738                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1739                 m->md.pv_list_count--;
 1740                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va);
 1741                 PMAP_UNLOCK(pv->pv_pmap);
 1742                 free_pv_entry(pv);
 1743         }
 1744         vm_page_flag_clear(m, PG_WRITEABLE);
 1745         sched_unpin();
 1746 }
 1747 
 1748 /*
 1749  *      Set the physical protection on the
 1750  *      specified range of this map as requested.
 1751  */
 1752 void
 1753 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1754 {
 1755         vm_offset_t pdnxt;
 1756         pd_entry_t ptpaddr;
 1757         pt_entry_t *pte;
 1758         int anychanged;
 1759 
 1760         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1761                 pmap_remove(pmap, sva, eva);
 1762                 return;
 1763         }
 1764 
 1765         if (prot & VM_PROT_WRITE)
 1766                 return;
 1767 
 1768         anychanged = 0;
 1769 
 1770         vm_page_lock_queues();
 1771         sched_pin();
 1772         PMAP_LOCK(pmap);
 1773         for (; sva < eva; sva = pdnxt) {
 1774                 unsigned obits, pbits, pdirindex;
 1775 
 1776                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 1777 
 1778                 pdirindex = sva >> PDRSHIFT;
 1779                 ptpaddr = pmap->pm_pdir[pdirindex];
 1780 
 1781                 /*
 1782                  * Weed out invalid mappings. Note: we assume that the page
 1783                  * directory table is always allocated, and in kernel virtual.
 1784                  */
 1785                 if (ptpaddr == 0)
 1786                         continue;
 1787 
 1788                 /*
 1789                  * Check for large page.
 1790                  */
 1791                 if ((ptpaddr & PG_PS) != 0) {
 1792                         pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1793                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1794                         anychanged = 1;
 1795                         continue;
 1796                 }
 1797 
 1798                 if (pdnxt > eva)
 1799                         pdnxt = eva;
 1800 
 1801                 for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 1802                     sva += PAGE_SIZE) {
 1803                         vm_page_t m;
 1804 
 1805 retry:
 1806                         /*
 1807                          * Regardless of whether a pte is 32 or 64 bits in
 1808                          * size, PG_RW, PG_A, and PG_M are among the least
 1809                          * significant 32 bits.
 1810                          */
 1811                         obits = pbits = *(u_int *)pte;
 1812                         if (pbits & PG_MANAGED) {
 1813                                 m = NULL;
 1814                                 if (pbits & PG_A) {
 1815                                         m = PHYS_TO_VM_PAGE(*pte);
 1816                                         vm_page_flag_set(m, PG_REFERENCED);
 1817                                         pbits &= ~PG_A;
 1818                                 }
 1819                                 if ((pbits & PG_M) != 0 &&
 1820                                     pmap_track_modified(sva)) {
 1821                                         if (m == NULL)
 1822                                                 m = PHYS_TO_VM_PAGE(*pte);
 1823                                         vm_page_dirty(m);
 1824                                 }
 1825                         }
 1826 
 1827                         pbits &= ~(PG_RW | PG_M);
 1828 
 1829                         if (pbits != obits) {
 1830                                 if (!atomic_cmpset_int((u_int *)pte, obits,
 1831                                     pbits))
 1832                                         goto retry;
 1833                                 if (obits & PG_G)
 1834                                         pmap_invalidate_page(pmap, sva);
 1835                                 else
 1836                                         anychanged = 1;
 1837                         }
 1838                 }
 1839         }
 1840         sched_unpin();
 1841         vm_page_unlock_queues();
 1842         if (anychanged)
 1843                 pmap_invalidate_all(pmap);
 1844         PMAP_UNLOCK(pmap);
 1845 }
 1846 
 1847 /*
 1848  *      Insert the given physical page (p) at
 1849  *      the specified virtual address (v) in the
 1850  *      target physical map with the protection requested.
 1851  *
 1852  *      If specified, the page will be wired down, meaning
 1853  *      that the related pte can not be reclaimed.
 1854  *
 1855  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 1856  *      or lose information.  That is, this routine must actually
 1857  *      insert this page into the given map NOW.
 1858  */
 1859 void
 1860 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1861            boolean_t wired)
 1862 {
 1863         vm_paddr_t pa;
 1864         register pt_entry_t *pte;
 1865         vm_paddr_t opa;
 1866         pt_entry_t origpte, newpte;
 1867         vm_page_t mpte, om;
 1868 
 1869         va &= PG_FRAME;
 1870 #ifdef PMAP_DIAGNOSTIC
 1871         if (va > VM_MAX_KERNEL_ADDRESS)
 1872                 panic("pmap_enter: toobig");
 1873         if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1874                 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 1875 #endif
 1876 
 1877         mpte = NULL;
 1878 
 1879         vm_page_lock_queues();
 1880         PMAP_LOCK(pmap);
 1881         sched_pin();
 1882 
 1883         /*
 1884          * In the case that a page table page is not
 1885          * resident, we are creating it here.
 1886          */
 1887         if (va < VM_MAXUSER_ADDRESS) {
 1888                 mpte = pmap_allocpte(pmap, va, M_WAITOK);
 1889         }
 1890 #if 0 && defined(PMAP_DIAGNOSTIC)
 1891         else {
 1892                 pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 1893                 origpte = *pdeaddr;
 1894                 if ((origpte & PG_V) == 0) { 
 1895                         panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
 1896                                 pmap->pm_pdir[PTDPTDI], origpte, va);
 1897                 }
 1898         }
 1899 #endif
 1900 
 1901         pte = pmap_pte_quick(pmap, va);
 1902 
 1903         /*
 1904          * Page Directory table entry not valid, we need a new PT page
 1905          */
 1906         if (pte == NULL) {
 1907                 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
 1908                         (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 1909         }
 1910 
 1911         pa = VM_PAGE_TO_PHYS(m);
 1912         om = NULL;
 1913         origpte = *pte;
 1914         opa = origpte & PG_FRAME;
 1915 
 1916         if (origpte & PG_PS) {
 1917                 /*
 1918                  * Yes, I know this will truncate upper address bits for PAE,
 1919                  * but I'm actually more interested in the lower bits
 1920                  */
 1921                 printf("pmap_enter: va %p, pte %p, origpte %p\n",
 1922                     (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
 1923                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 1924         }
 1925 
 1926         /*
 1927          * Mapping has not changed, must be protection or wiring change.
 1928          */
 1929         if (origpte && (opa == pa)) {
 1930                 /*
 1931                  * Wiring change, just update stats. We don't worry about
 1932                  * wiring PT pages as they remain resident as long as there
 1933                  * are valid mappings in them. Hence, if a user page is wired,
 1934                  * the PT page will be also.
 1935                  */
 1936                 if (wired && ((origpte & PG_W) == 0))
 1937                         pmap->pm_stats.wired_count++;
 1938                 else if (!wired && (origpte & PG_W))
 1939                         pmap->pm_stats.wired_count--;
 1940 
 1941 #if defined(PMAP_DIAGNOSTIC)
 1942                 if (pmap_nw_modified((pt_entry_t) origpte)) {
 1943                         printf(
 1944         "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1945                             va, origpte);
 1946                 }
 1947 #endif
 1948 
 1949                 /*
 1950                  * Remove extra pte reference
 1951                  */
 1952                 if (mpte)
 1953                         mpte->wire_count--;
 1954 
 1955                 /*
 1956                  * We might be turning off write access to the page,
 1957                  * so we go ahead and sense modify status.
 1958                  */
 1959                 if (origpte & PG_MANAGED) {
 1960                         om = m;
 1961                         pa |= PG_MANAGED;
 1962                 }
 1963                 goto validate;
 1964         } 
 1965         /*
 1966          * Mapping has changed, invalidate old range and fall through to
 1967          * handle validating new mapping.
 1968          */
 1969         if (opa) {
 1970                 int err;
 1971                 if (origpte & PG_W)
 1972                         pmap->pm_stats.wired_count--;
 1973                 if (origpte & PG_MANAGED) {
 1974                         om = PHYS_TO_VM_PAGE(opa);
 1975                         err = pmap_remove_entry(pmap, om, va);
 1976                 } else
 1977                         err = pmap_unuse_pt(pmap, va);
 1978                 if (err)
 1979                         panic("pmap_enter: pte vanished, va: 0x%x", va);
 1980         } else
 1981                 pmap->pm_stats.resident_count++;
 1982 
 1983         /*
 1984          * Enter on the PV list if part of our managed memory. Note that we
 1985          * raise IPL while manipulating pv_table since pmap_enter can be
 1986          * called at interrupt time.
 1987          */
 1988         if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
 1989                 pmap_insert_entry(pmap, va, m);
 1990                 pa |= PG_MANAGED;
 1991         }
 1992 
 1993         /*
 1994          * Increment counters
 1995          */
 1996         if (wired)
 1997                 pmap->pm_stats.wired_count++;
 1998 
 1999 validate:
 2000         /*
 2001          * Now validate mapping with desired protection/wiring.
 2002          */
 2003         newpte = (pt_entry_t)(pa | PG_V);
 2004         if ((prot & VM_PROT_WRITE) != 0)
 2005                 newpte |= PG_RW;
 2006         if (wired)
 2007                 newpte |= PG_W;
 2008         if (va < VM_MAXUSER_ADDRESS)
 2009                 newpte |= PG_U;
 2010         if (pmap == kernel_pmap)
 2011                 newpte |= pgeflag;
 2012 
 2013         /*
 2014          * if the mapping or permission bits are different, we need
 2015          * to update the pte.
 2016          */
 2017         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2018                 if (origpte & PG_MANAGED) {
 2019                         origpte = pte_load_store(pte, newpte | PG_A);
 2020                         if ((origpte & PG_M) && pmap_track_modified(va))
 2021                                 vm_page_dirty(om);
 2022                         if (origpte & PG_A)
 2023                                 vm_page_flag_set(om, PG_REFERENCED);
 2024                 } else
 2025                         pte_store(pte, newpte | PG_A);
 2026                 if (origpte) {
 2027                         pmap_invalidate_page(pmap, va);
 2028                 }
 2029         }
 2030         sched_unpin();
 2031         vm_page_unlock_queues();
 2032         PMAP_UNLOCK(pmap);
 2033 }
 2034 
 2035 /*
 2036  * this code makes some *MAJOR* assumptions:
 2037  * 1. Current pmap & pmap exists.
 2038  * 2. Not wired.
 2039  * 3. Read access.
 2040  * 4. No page table pages.
 2041  * 6. Page IS managed.
 2042  * but is *MUCH* faster than pmap_enter...
 2043  */
 2044 
 2045 vm_page_t
 2046 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2047 {
 2048         pt_entry_t *pte;
 2049         vm_paddr_t pa;
 2050 
 2051         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2052         VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
 2053         PMAP_LOCK(pmap);
 2054 
 2055         /*
 2056          * In the case that a page table page is not
 2057          * resident, we are creating it here.
 2058          */
 2059         if (va < VM_MAXUSER_ADDRESS) {
 2060                 unsigned ptepindex;
 2061                 pd_entry_t ptepa;
 2062 
 2063                 /*
 2064                  * Calculate pagetable page index
 2065                  */
 2066                 ptepindex = va >> PDRSHIFT;
 2067                 if (mpte && (mpte->pindex == ptepindex)) {
 2068                         mpte->wire_count++;
 2069                 } else {
 2070 retry:
 2071                         /*
 2072                          * Get the page directory entry
 2073                          */
 2074                         ptepa = pmap->pm_pdir[ptepindex];
 2075 
 2076                         /*
 2077                          * If the page table page is mapped, we just increment
 2078                          * the hold count, and activate it.
 2079                          */
 2080                         if (ptepa) {
 2081                                 if (ptepa & PG_PS)
 2082                                         panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2083                                 mpte = PHYS_TO_VM_PAGE(ptepa);
 2084                                 mpte->wire_count++;
 2085                         } else {
 2086                                 mpte = _pmap_allocpte(pmap, ptepindex,
 2087                                     M_NOWAIT);
 2088                                 if (mpte == NULL) {
 2089                                         PMAP_UNLOCK(pmap);
 2090                                         vm_page_busy(m);
 2091                                         vm_page_unlock_queues();
 2092                                         VM_OBJECT_UNLOCK(m->object);
 2093                                         VM_WAIT;
 2094                                         VM_OBJECT_LOCK(m->object);
 2095                                         vm_page_lock_queues();
 2096                                         vm_page_wakeup(m);
 2097                                         PMAP_LOCK(pmap);
 2098                                         goto retry;
 2099                                 }
 2100                         }
 2101                 }
 2102         } else {
 2103                 mpte = NULL;
 2104         }
 2105 
 2106         /*
 2107          * This call to vtopte makes the assumption that we are
 2108          * entering the page into the current pmap.  In order to support
 2109          * quick entry into any pmap, one would likely use pmap_pte_quick.
 2110          * But that isn't as quick as vtopte.
 2111          */
 2112         pte = vtopte(va);
 2113         if (*pte) {
 2114                 if (mpte != NULL) {
 2115                         pmap_unwire_pte_hold(pmap, mpte);
 2116                         mpte = NULL;
 2117                 }
 2118                 goto out;
 2119         }
 2120 
 2121         /*
 2122          * Enter on the PV list if part of our managed memory. Note that we
 2123          * raise IPL while manipulating pv_table since pmap_enter can be
 2124          * called at interrupt time.
 2125          */
 2126         if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2127                 pmap_insert_entry(pmap, va, m);
 2128 
 2129         /*
 2130          * Increment counters
 2131          */
 2132         pmap->pm_stats.resident_count++;
 2133 
 2134         pa = VM_PAGE_TO_PHYS(m);
 2135 
 2136         /*
 2137          * Now validate mapping with RO protection
 2138          */
 2139         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2140                 pte_store(pte, pa | PG_V | PG_U);
 2141         else
 2142                 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 2143 out:
 2144         PMAP_UNLOCK(pmap);
 2145         return mpte;
 2146 }
 2147 
 2148 /*
 2149  * Make a temporary mapping for a physical address.  This is only intended
 2150  * to be used for panic dumps.
 2151  */
 2152 void *
 2153 pmap_kenter_temporary(vm_paddr_t pa, int i)
 2154 {
 2155         vm_offset_t va;
 2156 
 2157         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2158         pmap_kenter(va, pa);
 2159         invlpg(va);
 2160         return ((void *)crashdumpmap);
 2161 }
 2162 
 2163 /*
 2164  * This code maps large physical mmap regions into the
 2165  * processor address space.  Note that some shortcuts
 2166  * are taken, but the code works.
 2167  */
 2168 void
 2169 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 2170                     vm_object_t object, vm_pindex_t pindex,
 2171                     vm_size_t size)
 2172 {
 2173         vm_page_t p;
 2174 
 2175         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2176         KASSERT(object->type == OBJT_DEVICE,
 2177             ("pmap_object_init_pt: non-device object"));
 2178         if (pseflag && 
 2179             ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 2180                 int i;
 2181                 vm_page_t m[1];
 2182                 unsigned int ptepindex;
 2183                 int npdes;
 2184                 pd_entry_t ptepa;
 2185 
 2186                 PMAP_LOCK(pmap);
 2187                 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 2188                         goto out;
 2189                 PMAP_UNLOCK(pmap);
 2190 retry:
 2191                 p = vm_page_lookup(object, pindex);
 2192                 if (p != NULL) {
 2193                         vm_page_lock_queues();
 2194                         if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 2195                                 goto retry;
 2196                 } else {
 2197                         p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2198                         if (p == NULL)
 2199                                 return;
 2200                         m[0] = p;
 2201 
 2202                         if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2203                                 vm_page_lock_queues();
 2204                                 vm_page_free(p);
 2205                                 vm_page_unlock_queues();
 2206                                 return;
 2207                         }
 2208 
 2209                         p = vm_page_lookup(object, pindex);
 2210                         vm_page_lock_queues();
 2211                         vm_page_wakeup(p);
 2212                 }
 2213                 vm_page_unlock_queues();
 2214 
 2215                 ptepa = VM_PAGE_TO_PHYS(p);
 2216                 if (ptepa & (NBPDR - 1))
 2217                         return;
 2218 
 2219                 p->valid = VM_PAGE_BITS_ALL;
 2220 
 2221                 PMAP_LOCK(pmap);
 2222                 pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2223                 npdes = size >> PDRSHIFT;
 2224                 for(i = 0; i < npdes; i++) {
 2225                         pde_store(&pmap->pm_pdir[ptepindex],
 2226                             ptepa | PG_U | PG_RW | PG_V | PG_PS);
 2227                         ptepa += NBPDR;
 2228                         ptepindex += 1;
 2229                 }
 2230                 pmap_invalidate_all(pmap);
 2231 out:
 2232                 PMAP_UNLOCK(pmap);
 2233         }
 2234 }
 2235 
 2236 /*
 2237  *      Routine:        pmap_change_wiring
 2238  *      Function:       Change the wiring attribute for a map/virtual-address
 2239  *                      pair.
 2240  *      In/out conditions:
 2241  *                      The mapping must already exist in the pmap.
 2242  */
 2243 void
 2244 pmap_change_wiring(pmap, va, wired)
 2245         register pmap_t pmap;
 2246         vm_offset_t va;
 2247         boolean_t wired;
 2248 {
 2249         register pt_entry_t *pte;
 2250 
 2251         PMAP_LOCK(pmap);
 2252         pte = pmap_pte(pmap, va);
 2253 
 2254         if (wired && !pmap_pte_w(pte))
 2255                 pmap->pm_stats.wired_count++;
 2256         else if (!wired && pmap_pte_w(pte))
 2257                 pmap->pm_stats.wired_count--;
 2258 
 2259         /*
 2260          * Wiring is not a hardware characteristic so there is no need to
 2261          * invalidate TLB.
 2262          */
 2263         pmap_pte_set_w(pte, wired);
 2264         pmap_pte_release(pte);
 2265         PMAP_UNLOCK(pmap);
 2266 }
 2267 
 2268 
 2269 
 2270 /*
 2271  *      Copy the range specified by src_addr/len
 2272  *      from the source map to the range dst_addr/len
 2273  *      in the destination map.
 2274  *
 2275  *      This routine is only advisory and need not do anything.
 2276  */
 2277 
 2278 void
 2279 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 2280           vm_offset_t src_addr)
 2281 {
 2282         vm_offset_t addr;
 2283         vm_offset_t end_addr = src_addr + len;
 2284         vm_offset_t pdnxt;
 2285         vm_page_t m;
 2286 
 2287         if (dst_addr != src_addr)
 2288                 return;
 2289 
 2290         if (!pmap_is_current(src_pmap))
 2291                 return;
 2292 
 2293         vm_page_lock_queues();
 2294         if (dst_pmap < src_pmap) {
 2295                 PMAP_LOCK(dst_pmap);
 2296                 PMAP_LOCK(src_pmap);
 2297         } else {
 2298                 PMAP_LOCK(src_pmap);
 2299                 PMAP_LOCK(dst_pmap);
 2300         }
 2301         sched_pin();
 2302         for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 2303                 pt_entry_t *src_pte, *dst_pte;
 2304                 vm_page_t dstmpte, srcmpte;
 2305                 pd_entry_t srcptepaddr;
 2306                 unsigned ptepindex;
 2307 
 2308                 if (addr >= UPT_MIN_ADDRESS)
 2309                         panic("pmap_copy: invalid to pmap_copy page tables");
 2310 
 2311                 /*
 2312                  * Don't let optional prefaulting of pages make us go
 2313                  * way below the low water mark of free pages or way
 2314                  * above high water mark of used pv entries.
 2315                  */
 2316                 if (cnt.v_free_count < cnt.v_free_reserved ||
 2317                     pv_entry_count > pv_entry_high_water)
 2318                         break;
 2319                 
 2320                 pdnxt = (addr + NBPDR) & ~PDRMASK;
 2321                 ptepindex = addr >> PDRSHIFT;
 2322 
 2323                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 2324                 if (srcptepaddr == 0)
 2325                         continue;
 2326                         
 2327                 if (srcptepaddr & PG_PS) {
 2328                         if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2329                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 2330                                 dst_pmap->pm_stats.resident_count +=
 2331                                     NBPDR / PAGE_SIZE;
 2332                         }
 2333                         continue;
 2334                 }
 2335 
 2336                 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 2337                 if (srcmpte->wire_count == 0)
 2338                         panic("pmap_copy: source page table page is unused");
 2339 
 2340                 if (pdnxt > end_addr)
 2341                         pdnxt = end_addr;
 2342 
 2343                 src_pte = vtopte(addr);
 2344                 while (addr < pdnxt) {
 2345                         pt_entry_t ptetemp;
 2346                         ptetemp = *src_pte;
 2347                         /*
 2348                          * we only virtual copy managed pages
 2349                          */
 2350                         if ((ptetemp & PG_MANAGED) != 0) {
 2351                                 /*
 2352                                  * We have to check after allocpte for the
 2353                                  * pte still being around...  allocpte can
 2354                                  * block.
 2355                                  */
 2356                                 dstmpte = pmap_allocpte(dst_pmap, addr,
 2357                                     M_NOWAIT);
 2358                                 if (dstmpte == NULL)
 2359                                         break;
 2360                                 dst_pte = pmap_pte_quick(dst_pmap, addr);
 2361                                 if (*dst_pte == 0) {
 2362                                         /*
 2363                                          * Clear the modified and
 2364                                          * accessed (referenced) bits
 2365                                          * during the copy.
 2366                                          */
 2367                                         m = PHYS_TO_VM_PAGE(ptetemp);
 2368                                         *dst_pte = ptetemp & ~(PG_M | PG_A);
 2369                                         dst_pmap->pm_stats.resident_count++;
 2370                                         pmap_insert_entry(dst_pmap, addr, m);
 2371                                 } else
 2372                                         pmap_unwire_pte_hold(dst_pmap, dstmpte);
 2373                                 if (dstmpte->wire_count >= srcmpte->wire_count)
 2374                                         break;
 2375                         }
 2376                         addr += PAGE_SIZE;
 2377                         src_pte++;
 2378                 }
 2379         }
 2380         sched_unpin();
 2381         vm_page_unlock_queues();
 2382         PMAP_UNLOCK(src_pmap);
 2383         PMAP_UNLOCK(dst_pmap);
 2384 }       
 2385 
 2386 static __inline void
 2387 pagezero(void *page)
 2388 {
 2389 #if defined(I686_CPU)
 2390         if (cpu_class == CPUCLASS_686) {
 2391 #if defined(CPU_ENABLE_SSE)
 2392                 if (cpu_feature & CPUID_SSE2)
 2393                         sse2_pagezero(page);
 2394                 else
 2395 #endif
 2396                         i686_pagezero(page);
 2397         } else
 2398 #endif
 2399                 bzero(page, PAGE_SIZE);
 2400 }
 2401 
 2402 /*
 2403  *      pmap_zero_page zeros the specified hardware page by mapping 
 2404  *      the page into KVM and using bzero to clear its contents.
 2405  */
 2406 void
 2407 pmap_zero_page(vm_page_t m)
 2408 {
 2409         struct sysmaps *sysmaps;
 2410 
 2411         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 2412         mtx_lock(&sysmaps->lock);
 2413         if (*sysmaps->CMAP2)
 2414                 panic("pmap_zero_page: CMAP2 busy");
 2415         sched_pin();
 2416         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2417         invlcaddr(sysmaps->CADDR2);
 2418         pagezero(sysmaps->CADDR2);
 2419         *sysmaps->CMAP2 = 0;
 2420         sched_unpin();
 2421         mtx_unlock(&sysmaps->lock);
 2422 }
 2423 
 2424 /*
 2425  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 2426  *      the page into KVM and using bzero to clear its contents.
 2427  *
 2428  *      off and size may not cover an area beyond a single hardware page.
 2429  */
 2430 void
 2431 pmap_zero_page_area(vm_page_t m, int off, int size)
 2432 {
 2433         struct sysmaps *sysmaps;
 2434 
 2435         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 2436         mtx_lock(&sysmaps->lock);
 2437         if (*sysmaps->CMAP2)
 2438                 panic("pmap_zero_page: CMAP2 busy");
 2439         sched_pin();
 2440         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2441         invlcaddr(sysmaps->CADDR2);
 2442         if (off == 0 && size == PAGE_SIZE) 
 2443                 pagezero(sysmaps->CADDR2);
 2444         else
 2445                 bzero((char *)sysmaps->CADDR2 + off, size);
 2446         *sysmaps->CMAP2 = 0;
 2447         sched_unpin();
 2448         mtx_unlock(&sysmaps->lock);
 2449 }
 2450 
 2451 /*
 2452  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 2453  *      the page into KVM and using bzero to clear its contents.  This
 2454  *      is intended to be called from the vm_pagezero process only and
 2455  *      outside of Giant.
 2456  */
 2457 void
 2458 pmap_zero_page_idle(vm_page_t m)
 2459 {
 2460 
 2461         if (*CMAP3)
 2462                 panic("pmap_zero_page: CMAP3 busy");
 2463         sched_pin();
 2464         *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2465         invlcaddr(CADDR3);
 2466         pagezero(CADDR3);
 2467         *CMAP3 = 0;
 2468         sched_unpin();
 2469 }
 2470 
 2471 /*
 2472  *      pmap_copy_page copies the specified (machine independent)
 2473  *      page by mapping the page into virtual memory and using
 2474  *      bcopy to copy the page, one machine dependent page at a
 2475  *      time.
 2476  */
 2477 void
 2478 pmap_copy_page(vm_page_t src, vm_page_t dst)
 2479 {
 2480         struct sysmaps *sysmaps;
 2481 
 2482         sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 2483         mtx_lock(&sysmaps->lock);
 2484         if (*sysmaps->CMAP1)
 2485                 panic("pmap_copy_page: CMAP1 busy");
 2486         if (*sysmaps->CMAP2)
 2487                 panic("pmap_copy_page: CMAP2 busy");
 2488         sched_pin();
 2489         invlpg((u_int)sysmaps->CADDR1);
 2490         invlpg((u_int)sysmaps->CADDR2);
 2491         *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
 2492         *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
 2493         bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 2494         *sysmaps->CMAP1 = 0;
 2495         *sysmaps->CMAP2 = 0;
 2496         sched_unpin();
 2497         mtx_unlock(&sysmaps->lock);
 2498 }
 2499 
 2500 /*
 2501  * Returns true if the pmap's pv is one of the first
 2502  * 16 pvs linked to from this page.  This count may
 2503  * be changed upwards or downwards in the future; it
 2504  * is only necessary that true be returned for a small
 2505  * subset of pmaps for proper page aging.
 2506  */
 2507 boolean_t
 2508 pmap_page_exists_quick(pmap, m)
 2509         pmap_t pmap;
 2510         vm_page_t m;
 2511 {
 2512         pv_entry_t pv;
 2513         int loops = 0;
 2514 
 2515         if (m->flags & PG_FICTITIOUS)
 2516                 return FALSE;
 2517 
 2518         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2519         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2520                 if (pv->pv_pmap == pmap) {
 2521                         return TRUE;
 2522                 }
 2523                 loops++;
 2524                 if (loops >= 16)
 2525                         break;
 2526         }
 2527         return (FALSE);
 2528 }
 2529 
 2530 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2531 /*
 2532  * Remove all pages from specified address space
 2533  * this aids process exit speeds.  Also, this code
 2534  * is special cased for current process only, but
 2535  * can have the more generic (and slightly slower)
 2536  * mode enabled.  This is much faster than pmap_remove
 2537  * in the case of running down an entire address space.
 2538  */
 2539 void
 2540 pmap_remove_pages(pmap, sva, eva)
 2541         pmap_t pmap;
 2542         vm_offset_t sva, eva;
 2543 {
 2544         pt_entry_t *pte, tpte;
 2545         vm_page_t m;
 2546         pv_entry_t pv, npv;
 2547 
 2548 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2549         if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
 2550                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 2551                 return;
 2552         }
 2553 #endif
 2554         vm_page_lock_queues();
 2555         PMAP_LOCK(pmap);
 2556         sched_pin();
 2557         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2558 
 2559                 if (pv->pv_va >= eva || pv->pv_va < sva) {
 2560                         npv = TAILQ_NEXT(pv, pv_plist);
 2561                         continue;
 2562                 }
 2563 
 2564 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2565                 pte = vtopte(pv->pv_va);
 2566 #else
 2567                 pte = pmap_pte_quick(pmap, pv->pv_va);
 2568 #endif
 2569                 tpte = *pte;
 2570 
 2571                 if (tpte == 0) {
 2572                         printf("TPTE at %p  IS ZERO @ VA %08x\n",
 2573                                                         pte, pv->pv_va);
 2574                         panic("bad pte");
 2575                 }
 2576 
 2577 /*
 2578  * We cannot remove wired pages from a process' mapping at this time
 2579  */
 2580                 if (tpte & PG_W) {
 2581                         npv = TAILQ_NEXT(pv, pv_plist);
 2582                         continue;
 2583                 }
 2584 
 2585                 m = PHYS_TO_VM_PAGE(tpte);
 2586                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 2587                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 2588                     m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
 2589 
 2590                 KASSERT(m < &vm_page_array[vm_page_array_size],
 2591                         ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
 2592 
 2593                 pmap->pm_stats.resident_count--;
 2594 
 2595                 pte_clear(pte);
 2596 
 2597                 /*
 2598                  * Update the vm_page_t clean and reference bits.
 2599                  */
 2600                 if (tpte & PG_M) {
 2601                         vm_page_dirty(m);
 2602                 }
 2603 
 2604                 npv = TAILQ_NEXT(pv, pv_plist);
 2605                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 2606 
 2607                 m->md.pv_list_count--;
 2608                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2609                 if (TAILQ_EMPTY(&m->md.pv_list))
 2610                         vm_page_flag_clear(m, PG_WRITEABLE);
 2611 
 2612                 pmap_unuse_pt(pmap, pv->pv_va);
 2613                 free_pv_entry(pv);
 2614         }
 2615         sched_unpin();
 2616         pmap_invalidate_all(pmap);
 2617         PMAP_UNLOCK(pmap);
 2618         vm_page_unlock_queues();
 2619 }
 2620 
 2621 /*
 2622  *      pmap_is_modified:
 2623  *
 2624  *      Return whether or not the specified physical page was modified
 2625  *      in any physical maps.
 2626  */
 2627 boolean_t
 2628 pmap_is_modified(vm_page_t m)
 2629 {
 2630         pv_entry_t pv;
 2631         pt_entry_t *pte;
 2632         boolean_t rv;
 2633 
 2634         rv = FALSE;
 2635         if (m->flags & PG_FICTITIOUS)
 2636                 return (rv);
 2637 
 2638         sched_pin();
 2639         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2640         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2641                 /*
 2642                  * if the bit being tested is the modified bit, then
 2643                  * mark clean_map and ptes as never
 2644                  * modified.
 2645                  */
 2646                 if (!pmap_track_modified(pv->pv_va))
 2647                         continue;
 2648 #if defined(PMAP_DIAGNOSTIC)
 2649                 if (!pv->pv_pmap) {
 2650                         printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 2651                         continue;
 2652                 }
 2653 #endif
 2654                 PMAP_LOCK(pv->pv_pmap);
 2655                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2656                 rv = (*pte & PG_M) != 0;
 2657                 PMAP_UNLOCK(pv->pv_pmap);
 2658                 if (rv)
 2659                         break;
 2660         }
 2661         sched_unpin();
 2662         return (rv);
 2663 }
 2664 
 2665 /*
 2666  *      pmap_is_prefaultable:
 2667  *
 2668  *      Return whether or not the specified virtual address is elgible
 2669  *      for prefault.
 2670  */
 2671 boolean_t
 2672 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 2673 {
 2674         pt_entry_t *pte;
 2675         boolean_t rv;
 2676 
 2677         rv = FALSE;
 2678         PMAP_LOCK(pmap);
 2679         if (*pmap_pde(pmap, addr)) {
 2680                 pte = vtopte(addr);
 2681                 rv = *pte == 0;
 2682         }
 2683         PMAP_UNLOCK(pmap);
 2684         return (rv);
 2685 }
 2686 
 2687 /*
 2688  *      Clear the given bit in each of the given page's ptes.  The bit is
 2689  *      expressed as a 32-bit mask.  Consequently, if the pte is 64 bits in
 2690  *      size, only a bit within the least significant 32 can be cleared.
 2691  */
 2692 static __inline void
 2693 pmap_clear_ptes(vm_page_t m, int bit)
 2694 {
 2695         register pv_entry_t pv;
 2696         pt_entry_t pbits, *pte;
 2697 
 2698         if ((m->flags & PG_FICTITIOUS) ||
 2699             (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 2700                 return;
 2701 
 2702         sched_pin();
 2703         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2704         /*
 2705          * Loop over all current mappings setting/clearing as appropos If
 2706          * setting RO do we need to clear the VAC?
 2707          */
 2708         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2709                 /*
 2710                  * don't write protect pager mappings
 2711                  */
 2712                 if (bit == PG_RW) {
 2713                         if (!pmap_track_modified(pv->pv_va))
 2714                                 continue;
 2715                 }
 2716 
 2717 #if defined(PMAP_DIAGNOSTIC)
 2718                 if (!pv->pv_pmap) {
 2719                         printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 2720                         continue;
 2721                 }
 2722 #endif
 2723 
 2724                 PMAP_LOCK(pv->pv_pmap);
 2725                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2726 retry:
 2727                 pbits = *pte;
 2728                 if (pbits & bit) {
 2729                         if (bit == PG_RW) {
 2730                                 /*
 2731                                  * Regardless of whether a pte is 32 or 64 bits
 2732                                  * in size, PG_RW and PG_M are among the least
 2733                                  * significant 32 bits.
 2734                                  */
 2735                                 if (!atomic_cmpset_int((u_int *)pte, pbits,
 2736                                     pbits & ~(PG_RW | PG_M)))
 2737                                         goto retry;
 2738                                 if (pbits & PG_M) {
 2739                                         vm_page_dirty(m);
 2740                                 }
 2741                         } else {
 2742                                 atomic_clear_int((u_int *)pte, bit);
 2743                         }
 2744                         pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2745                 }
 2746                 PMAP_UNLOCK(pv->pv_pmap);
 2747         }
 2748         if (bit == PG_RW)
 2749                 vm_page_flag_clear(m, PG_WRITEABLE);
 2750         sched_unpin();
 2751 }
 2752 
 2753 /*
 2754  *      pmap_page_protect:
 2755  *
 2756  *      Lower the permission for all mappings to a given page.
 2757  */
 2758 void
 2759 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2760 {
 2761         if ((prot & VM_PROT_WRITE) == 0) {
 2762                 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2763                         pmap_clear_ptes(m, PG_RW);
 2764                 } else {
 2765                         pmap_remove_all(m);
 2766                 }
 2767         }
 2768 }
 2769 
 2770 /*
 2771  *      pmap_ts_referenced:
 2772  *
 2773  *      Return a count of reference bits for a page, clearing those bits.
 2774  *      It is not necessary for every reference bit to be cleared, but it
 2775  *      is necessary that 0 only be returned when there are truly no
 2776  *      reference bits set.
 2777  *
 2778  *      XXX: The exact number of bits to check and clear is a matter that
 2779  *      should be tested and standardized at some point in the future for
 2780  *      optimal aging of shared pages.
 2781  */
 2782 int
 2783 pmap_ts_referenced(vm_page_t m)
 2784 {
 2785         register pv_entry_t pv, pvf, pvn;
 2786         pt_entry_t *pte;
 2787         pt_entry_t v;
 2788         int rtval = 0;
 2789 
 2790         if (m->flags & PG_FICTITIOUS)
 2791                 return (rtval);
 2792 
 2793         sched_pin();
 2794         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2795         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2796 
 2797                 pvf = pv;
 2798 
 2799                 do {
 2800                         pvn = TAILQ_NEXT(pv, pv_list);
 2801 
 2802                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2803 
 2804                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2805 
 2806                         if (!pmap_track_modified(pv->pv_va))
 2807                                 continue;
 2808 
 2809                         PMAP_LOCK(pv->pv_pmap);
 2810                         pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2811 
 2812                         if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
 2813                                 atomic_clear_int((u_int *)pte, PG_A);
 2814                                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2815 
 2816                                 rtval++;
 2817                                 if (rtval > 4) {
 2818                                         PMAP_UNLOCK(pv->pv_pmap);
 2819                                         break;
 2820                                 }
 2821                         }
 2822                         PMAP_UNLOCK(pv->pv_pmap);
 2823                 } while ((pv = pvn) != NULL && pv != pvf);
 2824         }
 2825         sched_unpin();
 2826 
 2827         return (rtval);
 2828 }
 2829 
 2830 /*
 2831  *      Clear the modify bits on the specified physical page.
 2832  */
 2833 void
 2834 pmap_clear_modify(vm_page_t m)
 2835 {
 2836         pmap_clear_ptes(m, PG_M);
 2837 }
 2838 
 2839 /*
 2840  *      pmap_clear_reference:
 2841  *
 2842  *      Clear the reference bit on the specified physical page.
 2843  */
 2844 void
 2845 pmap_clear_reference(vm_page_t m)
 2846 {
 2847         pmap_clear_ptes(m, PG_A);
 2848 }
 2849 
 2850 /*
 2851  * Miscellaneous support routines follow
 2852  */
 2853 
 2854 /*
 2855  * Map a set of physical memory pages into the kernel virtual
 2856  * address space. Return a pointer to where it is mapped. This
 2857  * routine is intended to be used for mapping device memory,
 2858  * NOT real memory.
 2859  */
 2860 void *
 2861 pmap_mapdev(pa, size)
 2862         vm_paddr_t pa;
 2863         vm_size_t size;
 2864 {
 2865         vm_offset_t va, tmpva, offset;
 2866 
 2867         offset = pa & PAGE_MASK;
 2868         size = roundup(offset + size, PAGE_SIZE);
 2869         pa = pa & PG_FRAME;
 2870 
 2871         if (pa < KERNLOAD && pa + size <= KERNLOAD)
 2872                 va = KERNBASE + pa;
 2873         else
 2874                 va = kmem_alloc_nofault(kernel_map, size);
 2875         if (!va)
 2876                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 2877 
 2878         for (tmpva = va; size > 0; ) {
 2879                 pmap_kenter(tmpva, pa);
 2880                 size -= PAGE_SIZE;
 2881                 tmpva += PAGE_SIZE;
 2882                 pa += PAGE_SIZE;
 2883         }
 2884         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2885         return ((void *)(va + offset));
 2886 }
 2887 
 2888 void
 2889 pmap_unmapdev(va, size)
 2890         vm_offset_t va;
 2891         vm_size_t size;
 2892 {
 2893         vm_offset_t base, offset, tmpva;
 2894 
 2895         if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 2896                 return;
 2897         base = va & PG_FRAME;
 2898         offset = va & PAGE_MASK;
 2899         size = roundup(offset + size, PAGE_SIZE);
 2900         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 2901                 pmap_kremove(tmpva);
 2902         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2903         kmem_free(kernel_map, base, size);
 2904 }
 2905 
 2906 /*
 2907  * perform the pmap work for mincore
 2908  */
 2909 int
 2910 pmap_mincore(pmap, addr)
 2911         pmap_t pmap;
 2912         vm_offset_t addr;
 2913 {
 2914         pt_entry_t *ptep, pte;
 2915         vm_page_t m;
 2916         int val = 0;
 2917         
 2918         PMAP_LOCK(pmap);
 2919         ptep = pmap_pte(pmap, addr);
 2920         pte = (ptep != NULL) ? *ptep : 0;
 2921         pmap_pte_release(ptep);
 2922         PMAP_UNLOCK(pmap);
 2923 
 2924         if (pte != 0) {
 2925                 vm_paddr_t pa;
 2926 
 2927                 val = MINCORE_INCORE;
 2928                 if ((pte & PG_MANAGED) == 0)
 2929                         return val;
 2930 
 2931                 pa = pte & PG_FRAME;
 2932 
 2933                 m = PHYS_TO_VM_PAGE(pa);
 2934 
 2935                 /*
 2936                  * Modified by us
 2937                  */
 2938                 if (pte & PG_M)
 2939                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 2940                 else {
 2941                         /*
 2942                          * Modified by someone else
 2943                          */
 2944                         vm_page_lock_queues();
 2945                         if (m->dirty || pmap_is_modified(m))
 2946                                 val |= MINCORE_MODIFIED_OTHER;
 2947                         vm_page_unlock_queues();
 2948                 }
 2949                 /*
 2950                  * Referenced by us
 2951                  */
 2952                 if (pte & PG_A)
 2953                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 2954                 else {
 2955                         /*
 2956                          * Referenced by someone else
 2957                          */
 2958                         vm_page_lock_queues();
 2959                         if ((m->flags & PG_REFERENCED) ||
 2960                             pmap_ts_referenced(m)) {
 2961                                 val |= MINCORE_REFERENCED_OTHER;
 2962                                 vm_page_flag_set(m, PG_REFERENCED);
 2963                         }
 2964                         vm_page_unlock_queues();
 2965                 }
 2966         } 
 2967         return val;
 2968 }
 2969 
 2970 void
 2971 pmap_activate(struct thread *td)
 2972 {
 2973         struct proc *p = td->td_proc;
 2974         pmap_t  pmap, oldpmap;
 2975         u_int32_t  cr3;
 2976 
 2977         critical_enter();
 2978         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 2979         oldpmap = PCPU_GET(curpmap);
 2980 #if defined(SMP)
 2981         atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 2982         atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 2983 #else
 2984         oldpmap->pm_active &= ~1;
 2985         pmap->pm_active |= 1;
 2986 #endif
 2987 #ifdef PAE
 2988         cr3 = vtophys(pmap->pm_pdpt);
 2989 #else
 2990         cr3 = vtophys(pmap->pm_pdir);
 2991 #endif
 2992         /* XXXKSE this is wrong.
 2993          * pmap_activate is for the current thread on the current cpu
 2994          */
 2995         if (p->p_flag & P_SA) {
 2996                 /* Make sure all other cr3 entries are updated. */
 2997                 /* what if they are running?  XXXKSE (maybe abort them) */
 2998                 FOREACH_THREAD_IN_PROC(p, td) {
 2999                         td->td_pcb->pcb_cr3 = cr3;
 3000                 }
 3001         } else {
 3002                 td->td_pcb->pcb_cr3 = cr3;
 3003         }
 3004         load_cr3(cr3);
 3005         PCPU_SET(curpmap, pmap);
 3006         critical_exit();
 3007 }
 3008 
 3009 vm_offset_t
 3010 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3011 {
 3012 
 3013         if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3014                 return addr;
 3015         }
 3016 
 3017         addr = (addr + PDRMASK) & ~PDRMASK;
 3018         return addr;
 3019 }
 3020 
 3021 
 3022 #if defined(PMAP_DEBUG)
 3023 pmap_pid_dump(int pid)
 3024 {
 3025         pmap_t pmap;
 3026         struct proc *p;
 3027         int npte = 0;
 3028         int index;
 3029 
 3030         sx_slock(&allproc_lock);
 3031         LIST_FOREACH(p, &allproc, p_list) {
 3032                 if (p->p_pid != pid)
 3033                         continue;
 3034 
 3035                 if (p->p_vmspace) {
 3036                         int i,j;
 3037                         index = 0;
 3038                         pmap = vmspace_pmap(p->p_vmspace);
 3039                         for (i = 0; i < NPDEPTD; i++) {
 3040                                 pd_entry_t *pde;
 3041                                 pt_entry_t *pte;
 3042                                 vm_offset_t base = i << PDRSHIFT;
 3043                                 
 3044                                 pde = &pmap->pm_pdir[i];
 3045                                 if (pde && pmap_pde_v(pde)) {
 3046                                         for (j = 0; j < NPTEPG; j++) {
 3047                                                 vm_offset_t va = base + (j << PAGE_SHIFT);
 3048                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3049                                                         if (index) {
 3050                                                                 index = 0;
 3051                                                                 printf("\n");
 3052                                                         }
 3053                                                         sx_sunlock(&allproc_lock);
 3054                                                         return npte;
 3055                                                 }
 3056                                                 pte = pmap_pte(pmap, va);
 3057                                                 if (pte && pmap_pte_v(pte)) {
 3058                                                         pt_entry_t pa;
 3059                                                         vm_page_t m;
 3060                                                         pa = *pte;
 3061                                                         m = PHYS_TO_VM_PAGE(pa);
 3062                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3063                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 3064                                                         npte++;
 3065                                                         index++;
 3066                                                         if (index >= 2) {
 3067                                                                 index = 0;
 3068                                                                 printf("\n");
 3069                                                         } else {
 3070                                                                 printf(" ");
 3071                                                         }
 3072                                                 }
 3073                                         }
 3074                                 }
 3075                         }
 3076                 }
 3077         }
 3078         sx_sunlock(&allproc_lock);
 3079         return npte;
 3080 }
 3081 #endif
 3082 
 3083 #if defined(DEBUG)
 3084 
 3085 static void     pads(pmap_t pm);
 3086 void            pmap_pvdump(vm_offset_t pa);
 3087 
 3088 /* print address space of pmap*/
 3089 static void
 3090 pads(pm)
 3091         pmap_t pm;
 3092 {
 3093         int i, j;
 3094         vm_paddr_t va;
 3095         pt_entry_t *ptep;
 3096 
 3097         if (pm == kernel_pmap)
 3098                 return;
 3099         for (i = 0; i < NPDEPTD; i++)
 3100                 if (pm->pm_pdir[i])
 3101                         for (j = 0; j < NPTEPG; j++) {
 3102                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3103                                 if (pm == kernel_pmap && va < KERNBASE)
 3104                                         continue;
 3105                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3106                                         continue;
 3107                                 ptep = pmap_pte(pm, va);
 3108                                 if (pmap_pte_v(ptep))
 3109                                         printf("%x:%x ", va, *ptep);
 3110                         };
 3111 
 3112 }
 3113 
 3114 void
 3115 pmap_pvdump(pa)
 3116         vm_paddr_t pa;
 3117 {
 3118         pv_entry_t pv;
 3119         vm_page_t m;
 3120 
 3121         printf("pa %x", pa);
 3122         m = PHYS_TO_VM_PAGE(pa);
 3123         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3124                 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3125                 pads(pv->pv_pmap);
 3126         }
 3127         printf(" ");
 3128 }
 3129 #endif

Cache object: 6bf3a57b756da6fb10549a2e31eb969c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.