The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department and William Jolitz of UUNET Technologies Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   42  */
   43 /*-
   44  * Copyright (c) 2003 Networks Associates Technology, Inc.
   45  * All rights reserved.
   46  *
   47  * This software was developed for the FreeBSD Project by Jake Burkholder,
   48  * Safeport Network Services, and Network Associates Laboratories, the
   49  * Security Research Division of Network Associates, Inc. under
   50  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   51  * CHATS research program.
   52  *
   53  * Redistribution and use in source and binary forms, with or without
   54  * modification, are permitted provided that the following conditions
   55  * are met:
   56  * 1. Redistributions of source code must retain the above copyright
   57  *    notice, this list of conditions and the following disclaimer.
   58  * 2. Redistributions in binary form must reproduce the above copyright
   59  *    notice, this list of conditions and the following disclaimer in the
   60  *    documentation and/or other materials provided with the distribution.
   61  *
   62  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   72  * SUCH DAMAGE.
   73  */
   74 
   75 #include <sys/cdefs.h>
   76 __FBSDID("$FreeBSD: releng/5.3/sys/i386/i386/pmap.c 136358 2004-10-10 19:08:00Z alc $");
   77 
   78 /*
   79  *      Manages physical address maps.
   80  *
   81  *      In addition to hardware address maps, this
   82  *      module is called upon to provide software-use-only
   83  *      maps which may or may not be stored in the same
   84  *      form as hardware maps.  These pseudo-maps are
   85  *      used to store intermediate results from copy
   86  *      operations to and from address spaces.
   87  *
   88  *      Since the information managed by this module is
   89  *      also stored by the logical address mapping module,
   90  *      this module may throw away valid virtual-to-physical
   91  *      mappings at almost any time.  However, invalidations
   92  *      of virtual-to-physical mappings must be done as
   93  *      requested.
   94  *
   95  *      In order to cope with hardware architectures which
   96  *      make virtual-to-physical map invalidates expensive,
   97  *      this module may delay invalidate or reduced protection
   98  *      operations until such time as they are actually
   99  *      necessary.  This module is given full information as
  100  *      to which processors are currently using which maps,
  101  *      and to when physical maps must be made correct.
  102  */
  103 
  104 #include "opt_cpu.h"
  105 #include "opt_pmap.h"
  106 #include "opt_msgbuf.h"
  107 #include "opt_kstack_pages.h"
  108 
  109 #include <sys/param.h>
  110 #include <sys/systm.h>
  111 #include <sys/kernel.h>
  112 #include <sys/lock.h>
  113 #include <sys/malloc.h>
  114 #include <sys/mman.h>
  115 #include <sys/msgbuf.h>
  116 #include <sys/mutex.h>
  117 #include <sys/proc.h>
  118 #include <sys/sx.h>
  119 #include <sys/user.h>
  120 #include <sys/vmmeter.h>
  121 #include <sys/sched.h>
  122 #include <sys/sysctl.h>
  123 #ifdef SMP
  124 #include <sys/smp.h>
  125 #endif
  126 
  127 #include <vm/vm.h>
  128 #include <vm/vm_param.h>
  129 #include <vm/vm_kern.h>
  130 #include <vm/vm_page.h>
  131 #include <vm/vm_map.h>
  132 #include <vm/vm_object.h>
  133 #include <vm/vm_extern.h>
  134 #include <vm/vm_pageout.h>
  135 #include <vm/vm_pager.h>
  136 #include <vm/uma.h>
  137 
  138 #include <machine/cpu.h>
  139 #include <machine/cputypes.h>
  140 #include <machine/md_var.h>
  141 #include <machine/specialreg.h>
  142 #ifdef SMP
  143 #include <machine/smp.h>
  144 #endif
  145 
  146 #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
  147 #define CPU_ENABLE_SSE
  148 #endif
  149 #if defined(CPU_DISABLE_SSE)
  150 #undef CPU_ENABLE_SSE
  151 #endif
  152 
  153 #ifndef PMAP_SHPGPERPROC
  154 #define PMAP_SHPGPERPROC 200
  155 #endif
  156 
  157 #if defined(DIAGNOSTIC)
  158 #define PMAP_DIAGNOSTIC
  159 #endif
  160 
  161 #define MINPV 2048
  162 
  163 #if !defined(PMAP_DIAGNOSTIC)
  164 #define PMAP_INLINE __inline
  165 #else
  166 #define PMAP_INLINE
  167 #endif
  168 
  169 /*
  170  * Get PDEs and PTEs for user/kernel address space
  171  */
  172 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  173 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  174 
  175 #define pmap_pde_v(pte)         ((*(int *)pte & PG_V) != 0)
  176 #define pmap_pte_w(pte)         ((*(int *)pte & PG_W) != 0)
  177 #define pmap_pte_m(pte)         ((*(int *)pte & PG_M) != 0)
  178 #define pmap_pte_u(pte)         ((*(int *)pte & PG_A) != 0)
  179 #define pmap_pte_v(pte)         ((*(int *)pte & PG_V) != 0)
  180 
  181 #define pmap_pte_set_w(pte, v)  ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
  182     atomic_clear_int((u_int *)(pte), PG_W))
  183 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
  184 
  185 struct pmap kernel_pmap_store;
  186 LIST_HEAD(pmaplist, pmap);
  187 static struct pmaplist allpmaps;
  188 static struct mtx allpmaps_lock;
  189 
  190 vm_paddr_t avail_end;   /* PA of last available physical page */
  191 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  192 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  193 static boolean_t pmap_initialized = FALSE;      /* Has pmap_init completed? */
  194 int pgeflag = 0;                /* PG_G or-in */
  195 int pseflag = 0;                /* PG_PS or-in */
  196 
  197 static int nkpt;
  198 vm_offset_t kernel_vm_end;
  199 extern u_int32_t KERNend;
  200 
  201 #ifdef PAE
  202 static uma_zone_t pdptzone;
  203 #endif
  204 
  205 /*
  206  * Data for the pv entry allocation mechanism
  207  */
  208 static uma_zone_t pvzone;
  209 static struct vm_object pvzone_obj;
  210 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  211 int pmap_pagedaemon_waken;
  212 
  213 /*
  214  * All those kernel PT submaps that BSD is so fond of
  215  */
  216 pt_entry_t *CMAP1 = 0;
  217 static pt_entry_t *CMAP2, *CMAP3;
  218 caddr_t CADDR1 = 0, ptvmmap = 0;
  219 static caddr_t CADDR2, CADDR3;
  220 static struct mtx CMAPCADDR12_lock;
  221 struct msgbuf *msgbufp = 0;
  222 
  223 /*
  224  * Crashdump maps.
  225  */
  226 static caddr_t crashdumpmap;
  227 
  228 #ifdef SMP
  229 extern pt_entry_t *SMPpt;
  230 #endif
  231 static pt_entry_t *PMAP1 = 0, *PMAP2;
  232 static pt_entry_t *PADDR1 = 0, *PADDR2;
  233 #ifdef SMP
  234 static int PMAP1cpu;
  235 static int PMAP1changedcpu;
  236 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
  237            &PMAP1changedcpu, 0,
  238            "Number of times pmap_pte_quick changed CPU with same PMAP1");
  239 #endif
  240 static int PMAP1changed;
  241 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
  242            &PMAP1changed, 0,
  243            "Number of times pmap_pte_quick changed PMAP1");
  244 static int PMAP1unchanged;
  245 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
  246            &PMAP1unchanged, 0,
  247            "Number of times pmap_pte_quick didn't change PMAP1");
  248 static struct mtx PMAP2mutex;
  249 
  250 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
  251 static pv_entry_t get_pv_entry(void);
  252 static void     pmap_clear_ptes(vm_page_t m, int bit);
  253 
  254 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
  255 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
  256 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  257                                         vm_offset_t va);
  258 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
  259 
  260 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
  261 
  262 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
  263 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m);
  264 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
  265 static void pmap_pte_release(pt_entry_t *pte);
  266 static int pmap_unuse_pt(pmap_t, vm_offset_t);
  267 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  268 #ifdef PAE
  269 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  270 #endif
  271 
  272 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
  273 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  274 
  275 /*
  276  * Move the kernel virtual free pointer to the next
  277  * 4MB.  This is used to help improve performance
  278  * by using a large (4MB) page for much of the kernel
  279  * (.text, .data, .bss)
  280  */
  281 static vm_offset_t
  282 pmap_kmem_choose(vm_offset_t addr)
  283 {
  284         vm_offset_t newaddr = addr;
  285 
  286 #ifndef DISABLE_PSE
  287         if (cpu_feature & CPUID_PSE)
  288                 newaddr = (addr + PDRMASK) & ~PDRMASK;
  289 #endif
  290         return newaddr;
  291 }
  292 
  293 /*
  294  *      Bootstrap the system enough to run with virtual memory.
  295  *
  296  *      On the i386 this is called after mapping has already been enabled
  297  *      and just syncs the pmap module with what has already been done.
  298  *      [We can't call it easily with mapping off since the kernel is not
  299  *      mapped with PA == VA, hence we would have to relocate every address
  300  *      from the linked base (virtual) address "KERNBASE" to the actual
  301  *      (physical) address starting relative to 0]
  302  */
  303 void
  304 pmap_bootstrap(firstaddr, loadaddr)
  305         vm_paddr_t firstaddr;
  306         vm_paddr_t loadaddr;
  307 {
  308         vm_offset_t va;
  309         pt_entry_t *pte, *unused;
  310         int i;
  311 
  312         /*
  313          * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  314          * large. It should instead be correctly calculated in locore.s and
  315          * not based on 'first' (which is a physical address, not a virtual
  316          * address, for the start of unused physical memory). The kernel
  317          * page tables are NOT double mapped and thus should not be included
  318          * in this calculation.
  319          */
  320         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  321         virtual_avail = pmap_kmem_choose(virtual_avail);
  322 
  323         virtual_end = VM_MAX_KERNEL_ADDRESS;
  324 
  325         /*
  326          * Initialize the kernel pmap (which is statically allocated).
  327          */
  328         PMAP_LOCK_INIT(kernel_pmap);
  329         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
  330 #ifdef PAE
  331         kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
  332 #endif
  333         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  334         TAILQ_INIT(&kernel_pmap->pm_pvlist);
  335         LIST_INIT(&allpmaps);
  336         mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
  337         mtx_lock_spin(&allpmaps_lock);
  338         LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
  339         mtx_unlock_spin(&allpmaps_lock);
  340         nkpt = NKPT;
  341 
  342         /*
  343          * Reserve some special page table entries/VA space for temporary
  344          * mapping of pages.
  345          */
  346 #define SYSMAP(c, p, v, n)      \
  347         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  348 
  349         va = virtual_avail;
  350         pte = vtopte(va);
  351 
  352         /*
  353          * CMAP1/CMAP2 are used for zeroing and copying pages.
  354          * CMAP3 is used for the idle process page zeroing.
  355          */
  356         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  357         SYSMAP(caddr_t, CMAP2, CADDR2, 1)
  358         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  359         *CMAP3 = 0;
  360 
  361         mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
  362 
  363         /*
  364          * Crashdump maps.
  365          */
  366         SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
  367 
  368         /*
  369          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  370          */
  371         SYSMAP(caddr_t, unused, ptvmmap, 1)
  372 
  373         /*
  374          * msgbufp is used to map the system message buffer.
  375          */
  376         SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
  377 
  378         /*
  379          * ptemap is used for pmap_pte_quick
  380          */
  381         SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
  382         SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
  383 
  384         mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
  385 
  386         virtual_avail = va;
  387 
  388         *CMAP1 = *CMAP2 = 0;
  389         for (i = 0; i < NKPT; i++)
  390                 PTD[i] = 0;
  391 
  392         /* Turn on PG_G on kernel page(s) */
  393         pmap_set_pg();
  394 }
  395 
  396 /*
  397  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  398  */
  399 void
  400 pmap_set_pg(void)
  401 {
  402         pd_entry_t pdir;
  403         pt_entry_t *pte;
  404         vm_offset_t va, endva;
  405         int i; 
  406 
  407         if (pgeflag == 0)
  408                 return;
  409 
  410         i = KERNLOAD/NBPDR;
  411         endva = KERNBASE + KERNend;
  412 
  413         if (pseflag) {
  414                 va = KERNBASE + KERNLOAD;
  415                 while (va  < endva) {
  416                         pdir = kernel_pmap->pm_pdir[KPTDI+i];
  417                         pdir |= pgeflag;
  418                         kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
  419                         invltlb();      /* Play it safe, invltlb() every time */
  420                         i++;
  421                         va += NBPDR;
  422                 }
  423         } else {
  424                 va = (vm_offset_t)btext;
  425                 while (va < endva) {
  426                         pte = vtopte(va);
  427                         if (*pte)
  428                                 *pte |= pgeflag;
  429                         invltlb();      /* Play it safe, invltlb() every time */
  430                         va += PAGE_SIZE;
  431                 }
  432         }
  433 }
  434 
  435 #ifdef PAE
  436 
  437 static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt");
  438 
  439 static void *
  440 pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  441 {
  442         *flags = UMA_SLAB_PRIV;
  443         return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL,
  444             1, 0));
  445 }
  446 #endif
  447 
  448 /*
  449  *      Initialize the pmap module.
  450  *      Called by vm_init, to initialize any structures that the pmap
  451  *      system needs to map virtual memory.
  452  *      pmap_init has been enhanced to support in a fairly consistant
  453  *      way, discontiguous physical memory.
  454  */
  455 void
  456 pmap_init(void)
  457 {
  458         int i;
  459 
  460         /*
  461          * Allocate memory for random pmap data structures.  Includes the
  462          * pv_head_table.
  463          */
  464 
  465         for(i = 0; i < vm_page_array_size; i++) {
  466                 vm_page_t m;
  467 
  468                 m = &vm_page_array[i];
  469                 TAILQ_INIT(&m->md.pv_list);
  470                 m->md.pv_list_count = 0;
  471         }
  472 
  473         /*
  474          * init the pv free list
  475          */
  476         pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
  477             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
  478         uma_prealloc(pvzone, MINPV);
  479 
  480 #ifdef PAE
  481         pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
  482             NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
  483             UMA_ZONE_VM | UMA_ZONE_NOFREE);
  484         uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
  485 #endif
  486 
  487         /*
  488          * Now it is safe to enable pv_table recording.
  489          */
  490         pmap_initialized = TRUE;
  491 }
  492 
  493 /*
  494  * Initialize the address space (zone) for the pv_entries.  Set a
  495  * high water mark so that the system can recover from excessive
  496  * numbers of pv entries.
  497  */
  498 void
  499 pmap_init2()
  500 {
  501         int shpgperproc = PMAP_SHPGPERPROC;
  502 
  503         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  504         pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  505         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  506         pv_entry_high_water = 9 * (pv_entry_max / 10);
  507         uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
  508 }
  509 
  510 
  511 /***************************************************
  512  * Low level helper routines.....
  513  ***************************************************/
  514 
  515 #if defined(PMAP_DIAGNOSTIC)
  516 
  517 /*
  518  * This code checks for non-writeable/modified pages.
  519  * This should be an invalid condition.
  520  */
  521 static int
  522 pmap_nw_modified(pt_entry_t ptea)
  523 {
  524         int pte;
  525 
  526         pte = (int) ptea;
  527 
  528         if ((pte & (PG_M|PG_RW)) == PG_M)
  529                 return 1;
  530         else
  531                 return 0;
  532 }
  533 #endif
  534 
  535 
  536 /*
  537  * this routine defines the region(s) of memory that should
  538  * not be tested for the modified bit.
  539  */
  540 static PMAP_INLINE int
  541 pmap_track_modified(vm_offset_t va)
  542 {
  543         if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
  544                 return 1;
  545         else
  546                 return 0;
  547 }
  548 
  549 #ifdef I386_CPU
  550 /*
  551  * i386 only has "invalidate everything" and no SMP to worry about.
  552  */
  553 PMAP_INLINE void
  554 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  555 {
  556 
  557         if (pmap == kernel_pmap || pmap->pm_active)
  558                 invltlb();
  559 }
  560 
  561 PMAP_INLINE void
  562 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  563 {
  564 
  565         if (pmap == kernel_pmap || pmap->pm_active)
  566                 invltlb();
  567 }
  568 
  569 PMAP_INLINE void
  570 pmap_invalidate_all(pmap_t pmap)
  571 {
  572 
  573         if (pmap == kernel_pmap || pmap->pm_active)
  574                 invltlb();
  575 }
  576 #else /* !I386_CPU */
  577 #ifdef SMP
  578 /*
  579  * For SMP, these functions have to use the IPI mechanism for coherence.
  580  */
  581 void
  582 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  583 {
  584         u_int cpumask;
  585         u_int other_cpus;
  586 
  587         if (smp_started) {
  588                 if (!(read_eflags() & PSL_I))
  589                         panic("%s: interrupts disabled", __func__);
  590                 mtx_lock_spin(&smp_rv_mtx);
  591         } else
  592                 critical_enter();
  593         /*
  594          * We need to disable interrupt preemption but MUST NOT have
  595          * interrupts disabled here.
  596          * XXX we may need to hold schedlock to get a coherent pm_active
  597          * XXX critical sections disable interrupts again
  598          */
  599         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  600                 invlpg(va);
  601                 smp_invlpg(va);
  602         } else {
  603                 cpumask = PCPU_GET(cpumask);
  604                 other_cpus = PCPU_GET(other_cpus);
  605                 if (pmap->pm_active & cpumask)
  606                         invlpg(va);
  607                 if (pmap->pm_active & other_cpus)
  608                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  609         }
  610         if (smp_started)
  611                 mtx_unlock_spin(&smp_rv_mtx);
  612         else
  613                 critical_exit();
  614 }
  615 
  616 void
  617 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  618 {
  619         u_int cpumask;
  620         u_int other_cpus;
  621         vm_offset_t addr;
  622 
  623         if (smp_started) {
  624                 if (!(read_eflags() & PSL_I))
  625                         panic("%s: interrupts disabled", __func__);
  626                 mtx_lock_spin(&smp_rv_mtx);
  627         } else
  628                 critical_enter();
  629         /*
  630          * We need to disable interrupt preemption but MUST NOT have
  631          * interrupts disabled here.
  632          * XXX we may need to hold schedlock to get a coherent pm_active
  633          * XXX critical sections disable interrupts again
  634          */
  635         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  636                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  637                         invlpg(addr);
  638                 smp_invlpg_range(sva, eva);
  639         } else {
  640                 cpumask = PCPU_GET(cpumask);
  641                 other_cpus = PCPU_GET(other_cpus);
  642                 if (pmap->pm_active & cpumask)
  643                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  644                                 invlpg(addr);
  645                 if (pmap->pm_active & other_cpus)
  646                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  647                             sva, eva);
  648         }
  649         if (smp_started)
  650                 mtx_unlock_spin(&smp_rv_mtx);
  651         else
  652                 critical_exit();
  653 }
  654 
  655 void
  656 pmap_invalidate_all(pmap_t pmap)
  657 {
  658         u_int cpumask;
  659         u_int other_cpus;
  660 
  661         if (smp_started) {
  662                 if (!(read_eflags() & PSL_I))
  663                         panic("%s: interrupts disabled", __func__);
  664                 mtx_lock_spin(&smp_rv_mtx);
  665         } else
  666                 critical_enter();
  667         /*
  668          * We need to disable interrupt preemption but MUST NOT have
  669          * interrupts disabled here.
  670          * XXX we may need to hold schedlock to get a coherent pm_active
  671          * XXX critical sections disable interrupts again
  672          */
  673         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  674                 invltlb();
  675                 smp_invltlb();
  676         } else {
  677                 cpumask = PCPU_GET(cpumask);
  678                 other_cpus = PCPU_GET(other_cpus);
  679                 if (pmap->pm_active & cpumask)
  680                         invltlb();
  681                 if (pmap->pm_active & other_cpus)
  682                         smp_masked_invltlb(pmap->pm_active & other_cpus);
  683         }
  684         if (smp_started)
  685                 mtx_unlock_spin(&smp_rv_mtx);
  686         else
  687                 critical_exit();
  688 }
  689 #else /* !SMP */
  690 /*
  691  * Normal, non-SMP, 486+ invalidation functions.
  692  * We inline these within pmap.c for speed.
  693  */
  694 PMAP_INLINE void
  695 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  696 {
  697 
  698         if (pmap == kernel_pmap || pmap->pm_active)
  699                 invlpg(va);
  700 }
  701 
  702 PMAP_INLINE void
  703 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  704 {
  705         vm_offset_t addr;
  706 
  707         if (pmap == kernel_pmap || pmap->pm_active)
  708                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  709                         invlpg(addr);
  710 }
  711 
  712 PMAP_INLINE void
  713 pmap_invalidate_all(pmap_t pmap)
  714 {
  715 
  716         if (pmap == kernel_pmap || pmap->pm_active)
  717                 invltlb();
  718 }
  719 #endif /* !SMP */
  720 #endif /* !I386_CPU */
  721 
  722 /*
  723  * Are we current address space or kernel?  N.B. We return FALSE when
  724  * a pmap's page table is in use because a kernel thread is borrowing
  725  * it.  The borrowed page table can change spontaneously, making any
  726  * dependence on its continued use subject to a race condition.
  727  */
  728 static __inline int
  729 pmap_is_current(pmap_t pmap)
  730 {
  731 
  732         return (pmap == kernel_pmap ||
  733                 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
  734             (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
  735 }
  736 
  737 /*
  738  * If the given pmap is not the current or kernel pmap, the returned pte must
  739  * be released by passing it to pmap_pte_release().
  740  */
  741 pt_entry_t *
  742 pmap_pte(pmap_t pmap, vm_offset_t va)
  743 {
  744         pd_entry_t newpf;
  745         pd_entry_t *pde;
  746 
  747         pde = pmap_pde(pmap, va);
  748         if (*pde & PG_PS)
  749                 return (pde);
  750         if (*pde != 0) {
  751                 /* are we current address space or kernel? */
  752                 if (pmap_is_current(pmap))
  753                         return (vtopte(va));
  754                 mtx_lock(&PMAP2mutex);
  755                 newpf = *pde & PG_FRAME;
  756                 if ((*PMAP2 & PG_FRAME) != newpf) {
  757                         *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
  758                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
  759                 }
  760                 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
  761         }
  762         return (0);
  763 }
  764 
  765 /*
  766  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
  767  * being NULL.
  768  */
  769 static __inline void
  770 pmap_pte_release(pt_entry_t *pte)
  771 {
  772 
  773         if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
  774                 mtx_unlock(&PMAP2mutex);
  775 }
  776 
  777 static __inline void
  778 invlcaddr(void *caddr)
  779 {
  780 #ifdef I386_CPU
  781         invltlb();
  782 #else
  783         invlpg((u_int)caddr);
  784 #endif
  785 }
  786 
  787 /*
  788  * Super fast pmap_pte routine best used when scanning
  789  * the pv lists.  This eliminates many coarse-grained
  790  * invltlb calls.  Note that many of the pv list
  791  * scans are across different pmaps.  It is very wasteful
  792  * to do an entire invltlb for checking a single mapping.
  793  *
  794  * If the given pmap is not the current pmap, vm_page_queue_mtx
  795  * must be held and curthread pinned to a CPU.
  796  */
  797 static pt_entry_t *
  798 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
  799 {
  800         pd_entry_t newpf;
  801         pd_entry_t *pde;
  802 
  803         pde = pmap_pde(pmap, va);
  804         if (*pde & PG_PS)
  805                 return (pde);
  806         if (*pde != 0) {
  807                 /* are we current address space or kernel? */
  808                 if (pmap_is_current(pmap))
  809                         return (vtopte(va));
  810                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  811                 KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
  812                 newpf = *pde & PG_FRAME;
  813                 if ((*PMAP1 & PG_FRAME) != newpf) {
  814                         *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
  815 #ifdef SMP
  816                         PMAP1cpu = PCPU_GET(cpuid);
  817 #endif
  818                         invlcaddr(PADDR1);
  819                         PMAP1changed++;
  820                 } else
  821 #ifdef SMP
  822                 if (PMAP1cpu != PCPU_GET(cpuid)) {
  823                         PMAP1cpu = PCPU_GET(cpuid);
  824                         invlcaddr(PADDR1);
  825                         PMAP1changedcpu++;
  826                 } else
  827 #endif
  828                         PMAP1unchanged++;
  829                 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
  830         }
  831         return (0);
  832 }
  833 
  834 /*
  835  *      Routine:        pmap_extract
  836  *      Function:
  837  *              Extract the physical page address associated
  838  *              with the given map/virtual_address pair.
  839  */
  840 vm_paddr_t 
  841 pmap_extract(pmap_t pmap, vm_offset_t va)
  842 {
  843         vm_paddr_t rtval;
  844         pt_entry_t *pte;
  845         pd_entry_t pde;
  846 
  847         rtval = 0;
  848         PMAP_LOCK(pmap);
  849         pde = pmap->pm_pdir[va >> PDRSHIFT];
  850         if (pde != 0) {
  851                 if ((pde & PG_PS) != 0) {
  852                         rtval = (pde & ~PDRMASK) | (va & PDRMASK);
  853                         PMAP_UNLOCK(pmap);
  854                         return rtval;
  855                 }
  856                 pte = pmap_pte(pmap, va);
  857                 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
  858                 pmap_pte_release(pte);
  859         }
  860         PMAP_UNLOCK(pmap);
  861         return (rtval);
  862 }
  863 
  864 /*
  865  *      Routine:        pmap_extract_and_hold
  866  *      Function:
  867  *              Atomically extract and hold the physical page
  868  *              with the given pmap and virtual address pair
  869  *              if that mapping permits the given protection.
  870  */
  871 vm_page_t
  872 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
  873 {
  874         pd_entry_t pde;
  875         pt_entry_t pte;
  876         vm_page_t m;
  877 
  878         m = NULL;
  879         vm_page_lock_queues();
  880         PMAP_LOCK(pmap);
  881         pde = *pmap_pde(pmap, va);
  882         if (pde != 0) {
  883                 if (pde & PG_PS) {
  884                         if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
  885                                 m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
  886                                     (va & PDRMASK));
  887                                 vm_page_hold(m);
  888                         }
  889                 } else {
  890                         sched_pin();
  891                         pte = *pmap_pte_quick(pmap, va);
  892                         if (pte != 0 &&
  893                             ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
  894                                 m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
  895                                 vm_page_hold(m);
  896                         }
  897                         sched_unpin();
  898                 }
  899         }
  900         vm_page_unlock_queues();
  901         PMAP_UNLOCK(pmap);
  902         return (m);
  903 }
  904 
  905 /***************************************************
  906  * Low level mapping routines.....
  907  ***************************************************/
  908 
  909 /*
  910  * Add a wired page to the kva.
  911  * Note: not SMP coherent.
  912  */
  913 PMAP_INLINE void 
  914 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  915 {
  916         pt_entry_t *pte;
  917 
  918         pte = vtopte(va);
  919         pte_store(pte, pa | PG_RW | PG_V | pgeflag);
  920 }
  921 
  922 /*
  923  * Remove a page from the kernel pagetables.
  924  * Note: not SMP coherent.
  925  */
  926 PMAP_INLINE void
  927 pmap_kremove(vm_offset_t va)
  928 {
  929         pt_entry_t *pte;
  930 
  931         pte = vtopte(va);
  932         pte_clear(pte);
  933 }
  934 
  935 /*
  936  *      Used to map a range of physical addresses into kernel
  937  *      virtual address space.
  938  *
  939  *      The value passed in '*virt' is a suggested virtual address for
  940  *      the mapping. Architectures which can support a direct-mapped
  941  *      physical to virtual region can return the appropriate address
  942  *      within that region, leaving '*virt' unchanged. Other
  943  *      architectures should map the pages starting at '*virt' and
  944  *      update '*virt' with the first usable address after the mapped
  945  *      region.
  946  */
  947 vm_offset_t
  948 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
  949 {
  950         vm_offset_t va, sva;
  951 
  952         va = sva = *virt;
  953         while (start < end) {
  954                 pmap_kenter(va, start);
  955                 va += PAGE_SIZE;
  956                 start += PAGE_SIZE;
  957         }
  958         pmap_invalidate_range(kernel_pmap, sva, va);
  959         *virt = va;
  960         return (sva);
  961 }
  962 
  963 
  964 /*
  965  * Add a list of wired pages to the kva
  966  * this routine is only used for temporary
  967  * kernel mappings that do not need to have
  968  * page modification or references recorded.
  969  * Note that old mappings are simply written
  970  * over.  The page *must* be wired.
  971  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  972  */
  973 void
  974 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
  975 {
  976         vm_offset_t va;
  977 
  978         va = sva;
  979         while (count-- > 0) {
  980                 pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
  981                 va += PAGE_SIZE;
  982                 m++;
  983         }
  984         pmap_invalidate_range(kernel_pmap, sva, va);
  985 }
  986 
  987 /*
  988  * This routine tears out page mappings from the
  989  * kernel -- it is meant only for temporary mappings.
  990  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  991  */
  992 void
  993 pmap_qremove(vm_offset_t sva, int count)
  994 {
  995         vm_offset_t va;
  996 
  997         va = sva;
  998         while (count-- > 0) {
  999                 pmap_kremove(va);
 1000                 va += PAGE_SIZE;
 1001         }
 1002         pmap_invalidate_range(kernel_pmap, sva, va);
 1003 }
 1004 
 1005 /***************************************************
 1006  * Page table page management routines.....
 1007  ***************************************************/
 1008 
 1009 /*
 1010  * This routine unholds page table pages, and if the hold count
 1011  * drops to zero, then it decrements the wire count.
 1012  */
 1013 static PMAP_INLINE int
 1014 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1015 {
 1016 
 1017         --m->wire_count;
 1018         if (m->wire_count == 0)
 1019                 return _pmap_unwire_pte_hold(pmap, m);
 1020         else
 1021                 return 0;
 1022 }
 1023 
 1024 static int 
 1025 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1026 {
 1027         vm_offset_t pteva;
 1028 
 1029         /*
 1030          * unmap the page table page
 1031          */
 1032         pmap->pm_pdir[m->pindex] = 0;
 1033         --pmap->pm_stats.resident_count;
 1034 
 1035         /*
 1036          * Do an invltlb to make the invalidated mapping
 1037          * take effect immediately.
 1038          */
 1039         pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 1040         pmap_invalidate_page(pmap, pteva);
 1041 
 1042         vm_page_free_zero(m);
 1043         atomic_subtract_int(&cnt.v_wire_count, 1);
 1044         return 1;
 1045 }
 1046 
 1047 /*
 1048  * After removing a page table entry, this routine is used to
 1049  * conditionally free the page, and manage the hold/wire counts.
 1050  */
 1051 static int
 1052 pmap_unuse_pt(pmap_t pmap, vm_offset_t va)
 1053 {
 1054         pd_entry_t ptepde;
 1055         vm_page_t mpte;
 1056 
 1057         if (va >= VM_MAXUSER_ADDRESS)
 1058                 return 0;
 1059         ptepde = *pmap_pde(pmap, va);
 1060         mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 1061         return pmap_unwire_pte_hold(pmap, mpte);
 1062 }
 1063 
 1064 void
 1065 pmap_pinit0(pmap)
 1066         struct pmap *pmap;
 1067 {
 1068 
 1069         PMAP_LOCK_INIT(pmap);
 1070         pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 1071 #ifdef PAE
 1072         pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 1073 #endif
 1074         pmap->pm_active = 0;
 1075         PCPU_SET(curpmap, pmap);
 1076         TAILQ_INIT(&pmap->pm_pvlist);
 1077         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1078         mtx_lock_spin(&allpmaps_lock);
 1079         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1080         mtx_unlock_spin(&allpmaps_lock);
 1081 }
 1082 
 1083 /*
 1084  * Initialize a preallocated and zeroed pmap structure,
 1085  * such as one in a vmspace structure.
 1086  */
 1087 void
 1088 pmap_pinit(pmap)
 1089         register struct pmap *pmap;
 1090 {
 1091         vm_page_t m, ptdpg[NPGPTD];
 1092         vm_paddr_t pa;
 1093         static int color;
 1094         int i;
 1095 
 1096         PMAP_LOCK_INIT(pmap);
 1097 
 1098         /*
 1099          * No need to allocate page table space yet but we do need a valid
 1100          * page directory table.
 1101          */
 1102         if (pmap->pm_pdir == NULL) {
 1103                 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 1104                     NBPTD);
 1105 #ifdef PAE
 1106                 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 1107                 KASSERT(((vm_offset_t)pmap->pm_pdpt &
 1108                     ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 1109                     ("pmap_pinit: pdpt misaligned"));
 1110                 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 1111                     ("pmap_pinit: pdpt above 4g"));
 1112 #endif
 1113         }
 1114 
 1115         /*
 1116          * allocate the page directory page(s)
 1117          */
 1118         for (i = 0; i < NPGPTD;) {
 1119                 m = vm_page_alloc(NULL, color++,
 1120                     VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1121                     VM_ALLOC_ZERO);
 1122                 if (m == NULL)
 1123                         VM_WAIT;
 1124                 else {
 1125                         ptdpg[i++] = m;
 1126                 }
 1127         }
 1128 
 1129         pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 1130 
 1131         for (i = 0; i < NPGPTD; i++) {
 1132                 if ((ptdpg[i]->flags & PG_ZERO) == 0)
 1133                         bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 1134         }
 1135 
 1136         mtx_lock_spin(&allpmaps_lock);
 1137         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1138         mtx_unlock_spin(&allpmaps_lock);
 1139         /* Wire in kernel global address entries. */
 1140         /* XXX copies current process, does not fill in MPPTDI */
 1141         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 1142 #ifdef SMP
 1143         pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1144 #endif
 1145 
 1146         /* install self-referential address mapping entry(s) */
 1147         for (i = 0; i < NPGPTD; i++) {
 1148                 pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 1149                 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 1150 #ifdef PAE
 1151                 pmap->pm_pdpt[i] = pa | PG_V;
 1152 #endif
 1153         }
 1154 
 1155         pmap->pm_active = 0;
 1156         TAILQ_INIT(&pmap->pm_pvlist);
 1157         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1158 }
 1159 
 1160 /*
 1161  * this routine is called if the page table page is not
 1162  * mapped correctly.
 1163  */
 1164 static vm_page_t
 1165 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
 1166 {
 1167         vm_paddr_t ptepa;
 1168         vm_page_t m;
 1169 
 1170         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1171             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1172             ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1173 
 1174         /*
 1175          * Allocate a page table page.
 1176          */
 1177         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1178             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1179                 if (flags & M_WAITOK) {
 1180                         PMAP_UNLOCK(pmap);
 1181                         vm_page_unlock_queues();
 1182                         VM_WAIT;
 1183                         vm_page_lock_queues();
 1184                         PMAP_LOCK(pmap);
 1185                 }
 1186 
 1187                 /*
 1188                  * Indicate the need to retry.  While waiting, the page table
 1189                  * page may have been allocated.
 1190                  */
 1191                 return (NULL);
 1192         }
 1193         if ((m->flags & PG_ZERO) == 0)
 1194                 pmap_zero_page(m);
 1195 
 1196         /*
 1197          * Map the pagetable page into the process address space, if
 1198          * it isn't already there.
 1199          */
 1200 
 1201         pmap->pm_stats.resident_count++;
 1202 
 1203         ptepa = VM_PAGE_TO_PHYS(m);
 1204         pmap->pm_pdir[ptepindex] =
 1205                 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1206 
 1207         return m;
 1208 }
 1209 
 1210 static vm_page_t
 1211 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 1212 {
 1213         unsigned ptepindex;
 1214         pd_entry_t ptepa;
 1215         vm_page_t m;
 1216 
 1217         KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
 1218             (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
 1219             ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
 1220 
 1221         /*
 1222          * Calculate pagetable page index
 1223          */
 1224         ptepindex = va >> PDRSHIFT;
 1225 retry:
 1226         /*
 1227          * Get the page directory entry
 1228          */
 1229         ptepa = pmap->pm_pdir[ptepindex];
 1230 
 1231         /*
 1232          * This supports switching from a 4MB page to a
 1233          * normal 4K page.
 1234          */
 1235         if (ptepa & PG_PS) {
 1236                 pmap->pm_pdir[ptepindex] = 0;
 1237                 ptepa = 0;
 1238                 pmap_invalidate_all(kernel_pmap);
 1239         }
 1240 
 1241         /*
 1242          * If the page table page is mapped, we just increment the
 1243          * hold count, and activate it.
 1244          */
 1245         if (ptepa) {
 1246                 m = PHYS_TO_VM_PAGE(ptepa);
 1247                 m->wire_count++;
 1248         } else {
 1249                 /*
 1250                  * Here if the pte page isn't mapped, or if it has
 1251                  * been deallocated. 
 1252                  */
 1253                 m = _pmap_allocpte(pmap, ptepindex, flags);
 1254                 if (m == NULL && (flags & M_WAITOK))
 1255                         goto retry;
 1256         }
 1257         return (m);
 1258 }
 1259 
 1260 
 1261 /***************************************************
 1262 * Pmap allocation/deallocation routines.
 1263  ***************************************************/
 1264 
 1265 #ifdef SMP
 1266 /*
 1267  * Deal with a SMP shootdown of other users of the pmap that we are
 1268  * trying to dispose of.  This can be a bit hairy.
 1269  */
 1270 static u_int *lazymask;
 1271 static u_int lazyptd;
 1272 static volatile u_int lazywait;
 1273 
 1274 void pmap_lazyfix_action(void);
 1275 
 1276 void
 1277 pmap_lazyfix_action(void)
 1278 {
 1279         u_int mymask = PCPU_GET(cpumask);
 1280 
 1281         if (rcr3() == lazyptd)
 1282                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1283         atomic_clear_int(lazymask, mymask);
 1284         atomic_store_rel_int(&lazywait, 1);
 1285 }
 1286 
 1287 static void
 1288 pmap_lazyfix_self(u_int mymask)
 1289 {
 1290 
 1291         if (rcr3() == lazyptd)
 1292                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1293         atomic_clear_int(lazymask, mymask);
 1294 }
 1295 
 1296 
 1297 static void
 1298 pmap_lazyfix(pmap_t pmap)
 1299 {
 1300         u_int mymask = PCPU_GET(cpumask);
 1301         u_int mask;
 1302         register u_int spins;
 1303 
 1304         while ((mask = pmap->pm_active) != 0) {
 1305                 spins = 50000000;
 1306                 mask = mask & -mask;    /* Find least significant set bit */
 1307                 mtx_lock_spin(&smp_rv_mtx);
 1308 #ifdef PAE
 1309                 lazyptd = vtophys(pmap->pm_pdpt);
 1310 #else
 1311                 lazyptd = vtophys(pmap->pm_pdir);
 1312 #endif
 1313                 if (mask == mymask) {
 1314                         lazymask = &pmap->pm_active;
 1315                         pmap_lazyfix_self(mymask);
 1316                 } else {
 1317                         atomic_store_rel_int((u_int *)&lazymask,
 1318                             (u_int)&pmap->pm_active);
 1319                         atomic_store_rel_int(&lazywait, 0);
 1320                         ipi_selected(mask, IPI_LAZYPMAP);
 1321                         while (lazywait == 0) {
 1322                                 ia32_pause();
 1323                                 if (--spins == 0)
 1324                                         break;
 1325                         }
 1326                 }
 1327                 mtx_unlock_spin(&smp_rv_mtx);
 1328                 if (spins == 0)
 1329                         printf("pmap_lazyfix: spun for 50000000\n");
 1330         }
 1331 }
 1332 
 1333 #else   /* SMP */
 1334 
 1335 /*
 1336  * Cleaning up on uniprocessor is easy.  For various reasons, we're
 1337  * unlikely to have to even execute this code, including the fact
 1338  * that the cleanup is deferred until the parent does a wait(2), which
 1339  * means that another userland process has run.
 1340  */
 1341 static void
 1342 pmap_lazyfix(pmap_t pmap)
 1343 {
 1344         u_int cr3;
 1345 
 1346         cr3 = vtophys(pmap->pm_pdir);
 1347         if (cr3 == rcr3()) {
 1348                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1349                 pmap->pm_active &= ~(PCPU_GET(cpumask));
 1350         }
 1351 }
 1352 #endif  /* SMP */
 1353 
 1354 /*
 1355  * Release any resources held by the given physical map.
 1356  * Called when a pmap initialized by pmap_pinit is being released.
 1357  * Should only be called if the map contains no valid mappings.
 1358  */
 1359 void
 1360 pmap_release(pmap_t pmap)
 1361 {
 1362         vm_page_t m, ptdpg[NPGPTD];
 1363         int i;
 1364 
 1365         KASSERT(pmap->pm_stats.resident_count == 0,
 1366             ("pmap_release: pmap resident count %ld != 0",
 1367             pmap->pm_stats.resident_count));
 1368 
 1369         pmap_lazyfix(pmap);
 1370         mtx_lock_spin(&allpmaps_lock);
 1371         LIST_REMOVE(pmap, pm_list);
 1372         mtx_unlock_spin(&allpmaps_lock);
 1373 
 1374         for (i = 0; i < NPGPTD; i++)
 1375                 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]);
 1376 
 1377         bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 1378             sizeof(*pmap->pm_pdir));
 1379 #ifdef SMP
 1380         pmap->pm_pdir[MPPTDI] = 0;
 1381 #endif
 1382 
 1383         pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 1384 
 1385         vm_page_lock_queues();
 1386         for (i = 0; i < NPGPTD; i++) {
 1387                 m = ptdpg[i];
 1388 #ifdef PAE
 1389                 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 1390                     ("pmap_release: got wrong ptd page"));
 1391 #endif
 1392                 m->wire_count--;
 1393                 atomic_subtract_int(&cnt.v_wire_count, 1);
 1394                 vm_page_free_zero(m);
 1395         }
 1396         vm_page_unlock_queues();
 1397         PMAP_LOCK_DESTROY(pmap);
 1398 }
 1399 
 1400 static int
 1401 kvm_size(SYSCTL_HANDLER_ARGS)
 1402 {
 1403         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1404 
 1405         return sysctl_handle_long(oidp, &ksize, 0, req);
 1406 }
 1407 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1408     0, 0, kvm_size, "IU", "Size of KVM");
 1409 
 1410 static int
 1411 kvm_free(SYSCTL_HANDLER_ARGS)
 1412 {
 1413         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1414 
 1415         return sysctl_handle_long(oidp, &kfree, 0, req);
 1416 }
 1417 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1418     0, 0, kvm_free, "IU", "Amount of KVM free");
 1419 
 1420 /*
 1421  * grow the number of kernel page table entries, if needed
 1422  */
 1423 void
 1424 pmap_growkernel(vm_offset_t addr)
 1425 {
 1426         struct pmap *pmap;
 1427         vm_paddr_t ptppaddr;
 1428         vm_page_t nkpg;
 1429         pd_entry_t newpdir;
 1430         pt_entry_t *pde;
 1431 
 1432         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 1433         if (kernel_vm_end == 0) {
 1434                 kernel_vm_end = KERNBASE;
 1435                 nkpt = 0;
 1436                 while (pdir_pde(PTD, kernel_vm_end)) {
 1437                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1438                         nkpt++;
 1439                 }
 1440         }
 1441         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 1442         while (kernel_vm_end < addr) {
 1443                 if (pdir_pde(PTD, kernel_vm_end)) {
 1444                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1445                         continue;
 1446                 }
 1447 
 1448                 /*
 1449                  * This index is bogus, but out of the way
 1450                  */
 1451                 nkpg = vm_page_alloc(NULL, nkpt,
 1452                     VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 1453                 if (!nkpg)
 1454                         panic("pmap_growkernel: no memory to grow kernel");
 1455 
 1456                 nkpt++;
 1457 
 1458                 pmap_zero_page(nkpg);
 1459                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1460                 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 1461                 pdir_pde(PTD, kernel_vm_end) = newpdir;
 1462 
 1463                 mtx_lock_spin(&allpmaps_lock);
 1464                 LIST_FOREACH(pmap, &allpmaps, pm_list) {
 1465                         pde = pmap_pde(pmap, kernel_vm_end);
 1466                         pde_store(pde, newpdir);
 1467                 }
 1468                 mtx_unlock_spin(&allpmaps_lock);
 1469                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1470         }
 1471 }
 1472 
 1473 
 1474 /***************************************************
 1475  * page management routines.
 1476  ***************************************************/
 1477 
 1478 /*
 1479  * free the pv_entry back to the free list
 1480  */
 1481 static PMAP_INLINE void
 1482 free_pv_entry(pv_entry_t pv)
 1483 {
 1484         pv_entry_count--;
 1485         uma_zfree(pvzone, pv);
 1486 }
 1487 
 1488 /*
 1489  * get a new pv_entry, allocating a block from the system
 1490  * when needed.
 1491  * the memory allocation is performed bypassing the malloc code
 1492  * because of the possibility of allocations at interrupt time.
 1493  */
 1494 static pv_entry_t
 1495 get_pv_entry(void)
 1496 {
 1497         pv_entry_count++;
 1498         if (pv_entry_high_water &&
 1499                 (pv_entry_count > pv_entry_high_water) &&
 1500                 (pmap_pagedaemon_waken == 0)) {
 1501                 pmap_pagedaemon_waken = 1;
 1502                 wakeup (&vm_pages_needed);
 1503         }
 1504         return uma_zalloc(pvzone, M_NOWAIT);
 1505 }
 1506 
 1507 
 1508 static int
 1509 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 1510 {
 1511         pv_entry_t pv;
 1512         int rtval;
 1513 
 1514         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1515         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1516         if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1517                 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1518                         if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1519                                 break;
 1520                 }
 1521         } else {
 1522                 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1523                         if (va == pv->pv_va) 
 1524                                 break;
 1525                 }
 1526         }
 1527 
 1528         rtval = 0;
 1529         if (pv) {
 1530                 rtval = pmap_unuse_pt(pmap, va);
 1531                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1532                 m->md.pv_list_count--;
 1533                 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1534                         vm_page_flag_clear(m, PG_WRITEABLE);
 1535 
 1536                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1537                 free_pv_entry(pv);
 1538         }
 1539                         
 1540         return rtval;
 1541 }
 1542 
 1543 /*
 1544  * Create a pv entry for page at pa for
 1545  * (pmap, va).
 1546  */
 1547 static void
 1548 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 1549 {
 1550         pv_entry_t pv;
 1551 
 1552         pv = get_pv_entry();
 1553         pv->pv_va = va;
 1554         pv->pv_pmap = pmap;
 1555 
 1556         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1557         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1558         TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1559         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1560         m->md.pv_list_count++;
 1561 }
 1562 
 1563 /*
 1564  * pmap_remove_pte: do the things to unmap a page in a process
 1565  */
 1566 static int
 1567 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 1568 {
 1569         pt_entry_t oldpte;
 1570         vm_page_t m;
 1571 
 1572         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1573         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1574         oldpte = pte_load_clear(ptq);
 1575         if (oldpte & PG_W)
 1576                 pmap->pm_stats.wired_count -= 1;
 1577         /*
 1578          * Machines that don't support invlpg, also don't support
 1579          * PG_G.
 1580          */
 1581         if (oldpte & PG_G)
 1582                 pmap_invalidate_page(kernel_pmap, va);
 1583         pmap->pm_stats.resident_count -= 1;
 1584         if (oldpte & PG_MANAGED) {
 1585                 m = PHYS_TO_VM_PAGE(oldpte);
 1586                 if (oldpte & PG_M) {
 1587 #if defined(PMAP_DIAGNOSTIC)
 1588                         if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1589                                 printf(
 1590         "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1591                                     va, oldpte);
 1592                         }
 1593 #endif
 1594                         if (pmap_track_modified(va))
 1595                                 vm_page_dirty(m);
 1596                 }
 1597                 if (oldpte & PG_A)
 1598                         vm_page_flag_set(m, PG_REFERENCED);
 1599                 return pmap_remove_entry(pmap, m, va);
 1600         } else {
 1601                 return pmap_unuse_pt(pmap, va);
 1602         }
 1603 }
 1604 
 1605 /*
 1606  * Remove a single page from a process address space
 1607  */
 1608 static void
 1609 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 1610 {
 1611         pt_entry_t *pte;
 1612 
 1613         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1614         KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 1615         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1616         if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 1617                 return;
 1618         pmap_remove_pte(pmap, pte, va);
 1619         pmap_invalidate_page(pmap, va);
 1620 }
 1621 
 1622 /*
 1623  *      Remove the given range of addresses from the specified map.
 1624  *
 1625  *      It is assumed that the start and end are properly
 1626  *      rounded to the page size.
 1627  */
 1628 void
 1629 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1630 {
 1631         vm_offset_t pdnxt;
 1632         pd_entry_t ptpaddr;
 1633         pt_entry_t *pte;
 1634         int anyvalid;
 1635 
 1636         /*
 1637          * Perform an unsynchronized read.  This is, however, safe.
 1638          */
 1639         if (pmap->pm_stats.resident_count == 0)
 1640                 return;
 1641 
 1642         anyvalid = 0;
 1643 
 1644         vm_page_lock_queues();
 1645         sched_pin();
 1646         PMAP_LOCK(pmap);
 1647 
 1648         /*
 1649          * special handling of removing one page.  a very
 1650          * common operation and easy to short circuit some
 1651          * code.
 1652          */
 1653         if ((sva + PAGE_SIZE == eva) && 
 1654             ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 1655                 pmap_remove_page(pmap, sva);
 1656                 goto out;
 1657         }
 1658 
 1659         for (; sva < eva; sva = pdnxt) {
 1660                 unsigned pdirindex;
 1661 
 1662                 /*
 1663                  * Calculate index for next page table.
 1664                  */
 1665                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 1666                 if (pmap->pm_stats.resident_count == 0)
 1667                         break;
 1668 
 1669                 pdirindex = sva >> PDRSHIFT;
 1670                 ptpaddr = pmap->pm_pdir[pdirindex];
 1671 
 1672                 /*
 1673                  * Weed out invalid mappings. Note: we assume that the page
 1674                  * directory table is always allocated, and in kernel virtual.
 1675                  */
 1676                 if (ptpaddr == 0)
 1677                         continue;
 1678 
 1679                 /*
 1680                  * Check for large page.
 1681                  */
 1682                 if ((ptpaddr & PG_PS) != 0) {
 1683                         pmap->pm_pdir[pdirindex] = 0;
 1684                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1685                         anyvalid = 1;
 1686                         continue;
 1687                 }
 1688 
 1689                 /*
 1690                  * Limit our scan to either the end of the va represented
 1691                  * by the current page table page, or to the end of the
 1692                  * range being removed.
 1693                  */
 1694                 if (pdnxt > eva)
 1695                         pdnxt = eva;
 1696 
 1697                 for (; sva != pdnxt; sva += PAGE_SIZE) {
 1698                         if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
 1699                             *pte == 0)
 1700                                 continue;
 1701                         anyvalid = 1;
 1702                         if (pmap_remove_pte(pmap, pte, sva))
 1703                                 break;
 1704                 }
 1705         }
 1706 out:
 1707         sched_unpin();
 1708         vm_page_unlock_queues();
 1709         if (anyvalid)
 1710                 pmap_invalidate_all(pmap);
 1711         PMAP_UNLOCK(pmap);
 1712 }
 1713 
 1714 /*
 1715  *      Routine:        pmap_remove_all
 1716  *      Function:
 1717  *              Removes this physical page from
 1718  *              all physical maps in which it resides.
 1719  *              Reflects back modify bits to the pager.
 1720  *
 1721  *      Notes:
 1722  *              Original versions of this routine were very
 1723  *              inefficient because they iteratively called
 1724  *              pmap_remove (slow...)
 1725  */
 1726 
 1727 void
 1728 pmap_remove_all(vm_page_t m)
 1729 {
 1730         register pv_entry_t pv;
 1731         pt_entry_t *pte, tpte;
 1732 
 1733 #if defined(PMAP_DIAGNOSTIC)
 1734         /*
 1735          * XXX This makes pmap_remove_all() illegal for non-managed pages!
 1736          */
 1737         if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 1738                 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
 1739                     VM_PAGE_TO_PHYS(m));
 1740         }
 1741 #endif
 1742         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1743         sched_pin();
 1744         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1745                 PMAP_LOCK(pv->pv_pmap);
 1746                 pv->pv_pmap->pm_stats.resident_count--;
 1747                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1748                 tpte = pte_load_clear(pte);
 1749                 if (tpte & PG_W)
 1750                         pv->pv_pmap->pm_stats.wired_count--;
 1751                 if (tpte & PG_A)
 1752                         vm_page_flag_set(m, PG_REFERENCED);
 1753 
 1754                 /*
 1755                  * Update the vm_page_t clean and reference bits.
 1756                  */
 1757                 if (tpte & PG_M) {
 1758 #if defined(PMAP_DIAGNOSTIC)
 1759                         if (pmap_nw_modified((pt_entry_t) tpte)) {
 1760                                 printf(
 1761         "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1762                                     pv->pv_va, tpte);
 1763                         }
 1764 #endif
 1765                         if (pmap_track_modified(pv->pv_va))
 1766                                 vm_page_dirty(m);
 1767                 }
 1768                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 1769                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1770                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1771                 m->md.pv_list_count--;
 1772                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va);
 1773                 PMAP_UNLOCK(pv->pv_pmap);
 1774                 free_pv_entry(pv);
 1775         }
 1776         vm_page_flag_clear(m, PG_WRITEABLE);
 1777         sched_unpin();
 1778 }
 1779 
 1780 /*
 1781  *      Set the physical protection on the
 1782  *      specified range of this map as requested.
 1783  */
 1784 void
 1785 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1786 {
 1787         vm_offset_t pdnxt;
 1788         pd_entry_t ptpaddr;
 1789         int anychanged;
 1790 
 1791         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1792                 pmap_remove(pmap, sva, eva);
 1793                 return;
 1794         }
 1795 
 1796         if (prot & VM_PROT_WRITE)
 1797                 return;
 1798 
 1799         anychanged = 0;
 1800 
 1801         vm_page_lock_queues();
 1802         sched_pin();
 1803         PMAP_LOCK(pmap);
 1804         for (; sva < eva; sva = pdnxt) {
 1805                 unsigned obits, pbits, pdirindex;
 1806 
 1807                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 1808 
 1809                 pdirindex = sva >> PDRSHIFT;
 1810                 ptpaddr = pmap->pm_pdir[pdirindex];
 1811 
 1812                 /*
 1813                  * Weed out invalid mappings. Note: we assume that the page
 1814                  * directory table is always allocated, and in kernel virtual.
 1815                  */
 1816                 if (ptpaddr == 0)
 1817                         continue;
 1818 
 1819                 /*
 1820                  * Check for large page.
 1821                  */
 1822                 if ((ptpaddr & PG_PS) != 0) {
 1823                         pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1824                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1825                         anychanged = 1;
 1826                         continue;
 1827                 }
 1828 
 1829                 if (pdnxt > eva)
 1830                         pdnxt = eva;
 1831 
 1832                 for (; sva != pdnxt; sva += PAGE_SIZE) {
 1833                         pt_entry_t *pte;
 1834                         vm_page_t m;
 1835 
 1836                         if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
 1837                                 continue;
 1838 retry:
 1839                         /*
 1840                          * Regardless of whether a pte is 32 or 64 bits in
 1841                          * size, PG_RW, PG_A, and PG_M are among the least
 1842                          * significant 32 bits.
 1843                          */
 1844                         obits = pbits = *(u_int *)pte;
 1845                         if (pbits & PG_MANAGED) {
 1846                                 m = NULL;
 1847                                 if (pbits & PG_A) {
 1848                                         m = PHYS_TO_VM_PAGE(pbits);
 1849                                         vm_page_flag_set(m, PG_REFERENCED);
 1850                                         pbits &= ~PG_A;
 1851                                 }
 1852                                 if ((pbits & PG_M) != 0 &&
 1853                                     pmap_track_modified(sva)) {
 1854                                         if (m == NULL)
 1855                                                 m = PHYS_TO_VM_PAGE(pbits);
 1856                                         vm_page_dirty(m);
 1857                                 }
 1858                         }
 1859 
 1860                         pbits &= ~(PG_RW | PG_M);
 1861 
 1862                         if (pbits != obits) {
 1863                                 if (!atomic_cmpset_int((u_int *)pte, obits,
 1864                                     pbits))
 1865                                         goto retry;
 1866                                 anychanged = 1;
 1867                         }
 1868                 }
 1869         }
 1870         sched_unpin();
 1871         vm_page_unlock_queues();
 1872         if (anychanged)
 1873                 pmap_invalidate_all(pmap);
 1874         PMAP_UNLOCK(pmap);
 1875 }
 1876 
 1877 /*
 1878  *      Insert the given physical page (p) at
 1879  *      the specified virtual address (v) in the
 1880  *      target physical map with the protection requested.
 1881  *
 1882  *      If specified, the page will be wired down, meaning
 1883  *      that the related pte can not be reclaimed.
 1884  *
 1885  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 1886  *      or lose information.  That is, this routine must actually
 1887  *      insert this page into the given map NOW.
 1888  */
 1889 void
 1890 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1891            boolean_t wired)
 1892 {
 1893         vm_paddr_t pa;
 1894         register pt_entry_t *pte;
 1895         vm_paddr_t opa;
 1896         pt_entry_t origpte, newpte;
 1897         vm_page_t mpte, om;
 1898 
 1899         va &= PG_FRAME;
 1900 #ifdef PMAP_DIAGNOSTIC
 1901         if (va > VM_MAX_KERNEL_ADDRESS)
 1902                 panic("pmap_enter: toobig");
 1903         if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1904                 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 1905 #endif
 1906 
 1907         mpte = NULL;
 1908 
 1909         vm_page_lock_queues();
 1910         PMAP_LOCK(pmap);
 1911         sched_pin();
 1912 
 1913         /*
 1914          * In the case that a page table page is not
 1915          * resident, we are creating it here.
 1916          */
 1917         if (va < VM_MAXUSER_ADDRESS) {
 1918                 mpte = pmap_allocpte(pmap, va, M_WAITOK);
 1919         }
 1920 #if 0 && defined(PMAP_DIAGNOSTIC)
 1921         else {
 1922                 pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 1923                 origpte = *pdeaddr;
 1924                 if ((origpte & PG_V) == 0) { 
 1925                         panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
 1926                                 pmap->pm_pdir[PTDPTDI], origpte, va);
 1927                 }
 1928         }
 1929 #endif
 1930 
 1931         pte = pmap_pte_quick(pmap, va);
 1932 
 1933         /*
 1934          * Page Directory table entry not valid, we need a new PT page
 1935          */
 1936         if (pte == NULL) {
 1937                 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
 1938                         (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 1939         }
 1940 
 1941         pa = VM_PAGE_TO_PHYS(m);
 1942         om = NULL;
 1943         origpte = *pte;
 1944         opa = origpte & PG_FRAME;
 1945 
 1946         if (origpte & PG_PS) {
 1947                 /*
 1948                  * Yes, I know this will truncate upper address bits for PAE,
 1949                  * but I'm actually more interested in the lower bits
 1950                  */
 1951                 printf("pmap_enter: va %p, pte %p, origpte %p\n",
 1952                     (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
 1953                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 1954         }
 1955 
 1956         /*
 1957          * Mapping has not changed, must be protection or wiring change.
 1958          */
 1959         if (origpte && (opa == pa)) {
 1960                 /*
 1961                  * Wiring change, just update stats. We don't worry about
 1962                  * wiring PT pages as they remain resident as long as there
 1963                  * are valid mappings in them. Hence, if a user page is wired,
 1964                  * the PT page will be also.
 1965                  */
 1966                 if (wired && ((origpte & PG_W) == 0))
 1967                         pmap->pm_stats.wired_count++;
 1968                 else if (!wired && (origpte & PG_W))
 1969                         pmap->pm_stats.wired_count--;
 1970 
 1971 #if defined(PMAP_DIAGNOSTIC)
 1972                 if (pmap_nw_modified((pt_entry_t) origpte)) {
 1973                         printf(
 1974         "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1975                             va, origpte);
 1976                 }
 1977 #endif
 1978 
 1979                 /*
 1980                  * Remove extra pte reference
 1981                  */
 1982                 if (mpte)
 1983                         mpte->wire_count--;
 1984 
 1985                 /*
 1986                  * We might be turning off write access to the page,
 1987                  * so we go ahead and sense modify status.
 1988                  */
 1989                 if (origpte & PG_MANAGED) {
 1990                         om = m;
 1991                         pa |= PG_MANAGED;
 1992                 }
 1993                 goto validate;
 1994         } 
 1995         /*
 1996          * Mapping has changed, invalidate old range and fall through to
 1997          * handle validating new mapping.
 1998          */
 1999         if (opa) {
 2000                 int err;
 2001                 if (origpte & PG_W)
 2002                         pmap->pm_stats.wired_count--;
 2003                 if (origpte & PG_MANAGED) {
 2004                         om = PHYS_TO_VM_PAGE(opa);
 2005                         err = pmap_remove_entry(pmap, om, va);
 2006                 } else
 2007                         err = pmap_unuse_pt(pmap, va);
 2008                 if (err)
 2009                         panic("pmap_enter: pte vanished, va: 0x%x", va);
 2010         } else
 2011                 pmap->pm_stats.resident_count++;
 2012 
 2013         /*
 2014          * Enter on the PV list if part of our managed memory. Note that we
 2015          * raise IPL while manipulating pv_table since pmap_enter can be
 2016          * called at interrupt time.
 2017          */
 2018         if (pmap_initialized && 
 2019             (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 2020                 pmap_insert_entry(pmap, va, m);
 2021                 pa |= PG_MANAGED;
 2022         }
 2023 
 2024         /*
 2025          * Increment counters
 2026          */
 2027         if (wired)
 2028                 pmap->pm_stats.wired_count++;
 2029 
 2030 validate:
 2031         /*
 2032          * Now validate mapping with desired protection/wiring.
 2033          */
 2034         newpte = (pt_entry_t)(pa | PG_V);
 2035         if ((prot & VM_PROT_WRITE) != 0)
 2036                 newpte |= PG_RW;
 2037         if (wired)
 2038                 newpte |= PG_W;
 2039         if (va < VM_MAXUSER_ADDRESS)
 2040                 newpte |= PG_U;
 2041         if (pmap == kernel_pmap)
 2042                 newpte |= pgeflag;
 2043 
 2044         /*
 2045          * if the mapping or permission bits are different, we need
 2046          * to update the pte.
 2047          */
 2048         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2049                 if (origpte & PG_MANAGED) {
 2050                         origpte = pte_load_store(pte, newpte | PG_A);
 2051                         if ((origpte & PG_M) && pmap_track_modified(va))
 2052                                 vm_page_dirty(om);
 2053                         if (origpte & PG_A)
 2054                                 vm_page_flag_set(om, PG_REFERENCED);
 2055                 } else
 2056                         pte_store(pte, newpte | PG_A);
 2057                 if (origpte) {
 2058                         pmap_invalidate_page(pmap, va);
 2059                 }
 2060         }
 2061         sched_unpin();
 2062         vm_page_unlock_queues();
 2063         PMAP_UNLOCK(pmap);
 2064 }
 2065 
 2066 /*
 2067  * this code makes some *MAJOR* assumptions:
 2068  * 1. Current pmap & pmap exists.
 2069  * 2. Not wired.
 2070  * 3. Read access.
 2071  * 4. No page table pages.
 2072  * 5. Tlbflush is deferred to calling procedure.
 2073  * 6. Page IS managed.
 2074  * but is *MUCH* faster than pmap_enter...
 2075  */
 2076 
 2077 vm_page_t
 2078 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2079 {
 2080         pt_entry_t *pte;
 2081         vm_paddr_t pa;
 2082 
 2083         vm_page_lock_queues();
 2084         PMAP_LOCK(pmap);
 2085 
 2086         /*
 2087          * In the case that a page table page is not
 2088          * resident, we are creating it here.
 2089          */
 2090         if (va < VM_MAXUSER_ADDRESS) {
 2091                 unsigned ptepindex;
 2092                 pd_entry_t ptepa;
 2093 
 2094                 /*
 2095                  * Calculate pagetable page index
 2096                  */
 2097                 ptepindex = va >> PDRSHIFT;
 2098                 if (mpte && (mpte->pindex == ptepindex)) {
 2099                         mpte->wire_count++;
 2100                 } else {
 2101 retry:
 2102                         /*
 2103                          * Get the page directory entry
 2104                          */
 2105                         ptepa = pmap->pm_pdir[ptepindex];
 2106 
 2107                         /*
 2108                          * If the page table page is mapped, we just increment
 2109                          * the hold count, and activate it.
 2110                          */
 2111                         if (ptepa) {
 2112                                 if (ptepa & PG_PS)
 2113                                         panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2114                                 mpte = PHYS_TO_VM_PAGE(ptepa);
 2115                                 mpte->wire_count++;
 2116                         } else {
 2117                                 mpte = _pmap_allocpte(pmap, ptepindex,
 2118                                     M_WAITOK);
 2119                                 if (mpte == NULL)
 2120                                         goto retry;
 2121                         }
 2122                 }
 2123         } else {
 2124                 mpte = NULL;
 2125         }
 2126 
 2127         /*
 2128          * This call to vtopte makes the assumption that we are
 2129          * entering the page into the current pmap.  In order to support
 2130          * quick entry into any pmap, one would likely use pmap_pte_quick.
 2131          * But that isn't as quick as vtopte.
 2132          */
 2133         pte = vtopte(va);
 2134         if (*pte) {
 2135                 if (mpte != NULL) {
 2136                         pmap_unwire_pte_hold(pmap, mpte);
 2137                         mpte = NULL;
 2138                 }
 2139                 goto out;
 2140         }
 2141 
 2142         /*
 2143          * Enter on the PV list if part of our managed memory. Note that we
 2144          * raise IPL while manipulating pv_table since pmap_enter can be
 2145          * called at interrupt time.
 2146          */
 2147         if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2148                 pmap_insert_entry(pmap, va, m);
 2149 
 2150         /*
 2151          * Increment counters
 2152          */
 2153         pmap->pm_stats.resident_count++;
 2154 
 2155         pa = VM_PAGE_TO_PHYS(m);
 2156 
 2157         /*
 2158          * Now validate mapping with RO protection
 2159          */
 2160         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2161                 pte_store(pte, pa | PG_V | PG_U);
 2162         else
 2163                 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 2164 out:
 2165         vm_page_unlock_queues();
 2166         PMAP_UNLOCK(pmap);
 2167         return mpte;
 2168 }
 2169 
 2170 /*
 2171  * Make a temporary mapping for a physical address.  This is only intended
 2172  * to be used for panic dumps.
 2173  */
 2174 void *
 2175 pmap_kenter_temporary(vm_paddr_t pa, int i)
 2176 {
 2177         vm_offset_t va;
 2178 
 2179         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2180         pmap_kenter(va, pa);
 2181 #ifndef I386_CPU
 2182         invlpg(va);
 2183 #else
 2184         invltlb();
 2185 #endif
 2186         return ((void *)crashdumpmap);
 2187 }
 2188 
 2189 /*
 2190  * This code maps large physical mmap regions into the
 2191  * processor address space.  Note that some shortcuts
 2192  * are taken, but the code works.
 2193  */
 2194 void
 2195 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 2196                     vm_object_t object, vm_pindex_t pindex,
 2197                     vm_size_t size)
 2198 {
 2199         vm_page_t p;
 2200 
 2201         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2202         KASSERT(object->type == OBJT_DEVICE,
 2203             ("pmap_object_init_pt: non-device object"));
 2204         if (pseflag && 
 2205             ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 2206                 int i;
 2207                 vm_page_t m[1];
 2208                 unsigned int ptepindex;
 2209                 int npdes;
 2210                 pd_entry_t ptepa;
 2211 
 2212                 PMAP_LOCK(pmap);
 2213                 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 2214                         goto out;
 2215                 PMAP_UNLOCK(pmap);
 2216 retry:
 2217                 p = vm_page_lookup(object, pindex);
 2218                 if (p != NULL) {
 2219                         vm_page_lock_queues();
 2220                         if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 2221                                 goto retry;
 2222                 } else {
 2223                         p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2224                         if (p == NULL)
 2225                                 return;
 2226                         m[0] = p;
 2227 
 2228                         if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2229                                 vm_page_lock_queues();
 2230                                 vm_page_free(p);
 2231                                 vm_page_unlock_queues();
 2232                                 return;
 2233                         }
 2234 
 2235                         p = vm_page_lookup(object, pindex);
 2236                         vm_page_lock_queues();
 2237                         vm_page_wakeup(p);
 2238                 }
 2239                 vm_page_unlock_queues();
 2240 
 2241                 ptepa = VM_PAGE_TO_PHYS(p);
 2242                 if (ptepa & (NBPDR - 1))
 2243                         return;
 2244 
 2245                 p->valid = VM_PAGE_BITS_ALL;
 2246 
 2247                 PMAP_LOCK(pmap);
 2248                 pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2249                 npdes = size >> PDRSHIFT;
 2250                 for(i = 0; i < npdes; i++) {
 2251                         pde_store(&pmap->pm_pdir[ptepindex],
 2252                             ptepa | PG_U | PG_RW | PG_V | PG_PS);
 2253                         ptepa += NBPDR;
 2254                         ptepindex += 1;
 2255                 }
 2256                 pmap_invalidate_all(pmap);
 2257 out:
 2258                 PMAP_UNLOCK(pmap);
 2259         }
 2260 }
 2261 
 2262 /*
 2263  *      Routine:        pmap_change_wiring
 2264  *      Function:       Change the wiring attribute for a map/virtual-address
 2265  *                      pair.
 2266  *      In/out conditions:
 2267  *                      The mapping must already exist in the pmap.
 2268  */
 2269 void
 2270 pmap_change_wiring(pmap, va, wired)
 2271         register pmap_t pmap;
 2272         vm_offset_t va;
 2273         boolean_t wired;
 2274 {
 2275         register pt_entry_t *pte;
 2276 
 2277         PMAP_LOCK(pmap);
 2278         pte = pmap_pte(pmap, va);
 2279 
 2280         if (wired && !pmap_pte_w(pte))
 2281                 pmap->pm_stats.wired_count++;
 2282         else if (!wired && pmap_pte_w(pte))
 2283                 pmap->pm_stats.wired_count--;
 2284 
 2285         /*
 2286          * Wiring is not a hardware characteristic so there is no need to
 2287          * invalidate TLB.
 2288          */
 2289         pmap_pte_set_w(pte, wired);
 2290         pmap_pte_release(pte);
 2291         PMAP_UNLOCK(pmap);
 2292 }
 2293 
 2294 
 2295 
 2296 /*
 2297  *      Copy the range specified by src_addr/len
 2298  *      from the source map to the range dst_addr/len
 2299  *      in the destination map.
 2300  *
 2301  *      This routine is only advisory and need not do anything.
 2302  */
 2303 
 2304 void
 2305 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 2306           vm_offset_t src_addr)
 2307 {
 2308         vm_offset_t addr;
 2309         vm_offset_t end_addr = src_addr + len;
 2310         vm_offset_t pdnxt;
 2311         vm_page_t m;
 2312 
 2313         if (dst_addr != src_addr)
 2314                 return;
 2315 
 2316         if (!pmap_is_current(src_pmap))
 2317                 return;
 2318 
 2319         vm_page_lock_queues();
 2320         if (dst_pmap < src_pmap) {
 2321                 PMAP_LOCK(dst_pmap);
 2322                 PMAP_LOCK(src_pmap);
 2323         } else {
 2324                 PMAP_LOCK(src_pmap);
 2325                 PMAP_LOCK(dst_pmap);
 2326         }
 2327         sched_pin();
 2328         for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 2329                 pt_entry_t *src_pte, *dst_pte;
 2330                 vm_page_t dstmpte, srcmpte;
 2331                 pd_entry_t srcptepaddr;
 2332                 unsigned ptepindex;
 2333 
 2334                 if (addr >= UPT_MIN_ADDRESS)
 2335                         panic("pmap_copy: invalid to pmap_copy page tables");
 2336 
 2337                 /*
 2338                  * Don't let optional prefaulting of pages make us go
 2339                  * way below the low water mark of free pages or way
 2340                  * above high water mark of used pv entries.
 2341                  */
 2342                 if (cnt.v_free_count < cnt.v_free_reserved ||
 2343                     pv_entry_count > pv_entry_high_water)
 2344                         break;
 2345                 
 2346                 pdnxt = (addr + NBPDR) & ~PDRMASK;
 2347                 ptepindex = addr >> PDRSHIFT;
 2348 
 2349                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 2350                 if (srcptepaddr == 0)
 2351                         continue;
 2352                         
 2353                 if (srcptepaddr & PG_PS) {
 2354                         if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2355                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 2356                                 dst_pmap->pm_stats.resident_count +=
 2357                                     NBPDR / PAGE_SIZE;
 2358                         }
 2359                         continue;
 2360                 }
 2361 
 2362                 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 2363                 if (srcmpte->wire_count == 0)
 2364                         panic("pmap_copy: source page table page is unused");
 2365 
 2366                 if (pdnxt > end_addr)
 2367                         pdnxt = end_addr;
 2368 
 2369                 src_pte = vtopte(addr);
 2370                 while (addr < pdnxt) {
 2371                         pt_entry_t ptetemp;
 2372                         ptetemp = *src_pte;
 2373                         /*
 2374                          * we only virtual copy managed pages
 2375                          */
 2376                         if ((ptetemp & PG_MANAGED) != 0) {
 2377                                 /*
 2378                                  * We have to check after allocpte for the
 2379                                  * pte still being around...  allocpte can
 2380                                  * block.
 2381                                  */
 2382                                 dstmpte = pmap_allocpte(dst_pmap, addr,
 2383                                     M_NOWAIT);
 2384                                 if (dstmpte == NULL)
 2385                                         break;
 2386                                 dst_pte = pmap_pte_quick(dst_pmap, addr);
 2387                                 if (*dst_pte == 0) {
 2388                                         /*
 2389                                          * Clear the modified and
 2390                                          * accessed (referenced) bits
 2391                                          * during the copy.
 2392                                          */
 2393                                         m = PHYS_TO_VM_PAGE(ptetemp);
 2394                                         *dst_pte = ptetemp & ~(PG_M | PG_A);
 2395                                         dst_pmap->pm_stats.resident_count++;
 2396                                         pmap_insert_entry(dst_pmap, addr, m);
 2397                                 } else
 2398                                         pmap_unwire_pte_hold(dst_pmap, dstmpte);
 2399                                 if (dstmpte->wire_count >= srcmpte->wire_count)
 2400                                         break;
 2401                         }
 2402                         addr += PAGE_SIZE;
 2403                         src_pte++;
 2404                 }
 2405         }
 2406         sched_unpin();
 2407         vm_page_unlock_queues();
 2408         PMAP_UNLOCK(src_pmap);
 2409         PMAP_UNLOCK(dst_pmap);
 2410 }       
 2411 
 2412 static __inline void
 2413 pagezero(void *page)
 2414 {
 2415 #if defined(I686_CPU)
 2416         if (cpu_class == CPUCLASS_686) {
 2417 #if defined(CPU_ENABLE_SSE)
 2418                 if (cpu_feature & CPUID_SSE2)
 2419                         sse2_pagezero(page);
 2420                 else
 2421 #endif
 2422                         i686_pagezero(page);
 2423         } else
 2424 #endif
 2425                 bzero(page, PAGE_SIZE);
 2426 }
 2427 
 2428 /*
 2429  *      pmap_zero_page zeros the specified hardware page by mapping 
 2430  *      the page into KVM and using bzero to clear its contents.
 2431  */
 2432 void
 2433 pmap_zero_page(vm_page_t m)
 2434 {
 2435 
 2436         mtx_lock(&CMAPCADDR12_lock);
 2437         if (*CMAP2)
 2438                 panic("pmap_zero_page: CMAP2 busy");
 2439         sched_pin();
 2440         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2441         invlcaddr(CADDR2);
 2442         pagezero(CADDR2);
 2443         *CMAP2 = 0;
 2444         sched_unpin();
 2445         mtx_unlock(&CMAPCADDR12_lock);
 2446 }
 2447 
 2448 /*
 2449  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 2450  *      the page into KVM and using bzero to clear its contents.
 2451  *
 2452  *      off and size may not cover an area beyond a single hardware page.
 2453  */
 2454 void
 2455 pmap_zero_page_area(vm_page_t m, int off, int size)
 2456 {
 2457 
 2458         mtx_lock(&CMAPCADDR12_lock);
 2459         if (*CMAP2)
 2460                 panic("pmap_zero_page: CMAP2 busy");
 2461         sched_pin();
 2462         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2463         invlcaddr(CADDR2);
 2464         if (off == 0 && size == PAGE_SIZE) 
 2465                 pagezero(CADDR2);
 2466         else
 2467                 bzero((char *)CADDR2 + off, size);
 2468         *CMAP2 = 0;
 2469         sched_unpin();
 2470         mtx_unlock(&CMAPCADDR12_lock);
 2471 }
 2472 
 2473 /*
 2474  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 2475  *      the page into KVM and using bzero to clear its contents.  This
 2476  *      is intended to be called from the vm_pagezero process only and
 2477  *      outside of Giant.
 2478  */
 2479 void
 2480 pmap_zero_page_idle(vm_page_t m)
 2481 {
 2482 
 2483         if (*CMAP3)
 2484                 panic("pmap_zero_page: CMAP3 busy");
 2485         sched_pin();
 2486         *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2487         invlcaddr(CADDR3);
 2488         pagezero(CADDR3);
 2489         *CMAP3 = 0;
 2490         sched_unpin();
 2491 }
 2492 
 2493 /*
 2494  *      pmap_copy_page copies the specified (machine independent)
 2495  *      page by mapping the page into virtual memory and using
 2496  *      bcopy to copy the page, one machine dependent page at a
 2497  *      time.
 2498  */
 2499 void
 2500 pmap_copy_page(vm_page_t src, vm_page_t dst)
 2501 {
 2502 
 2503         mtx_lock(&CMAPCADDR12_lock);
 2504         if (*CMAP1)
 2505                 panic("pmap_copy_page: CMAP1 busy");
 2506         if (*CMAP2)
 2507                 panic("pmap_copy_page: CMAP2 busy");
 2508         sched_pin();
 2509 #ifdef I386_CPU
 2510         invltlb();
 2511 #else
 2512         invlpg((u_int)CADDR1);
 2513         invlpg((u_int)CADDR2);
 2514 #endif
 2515         *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
 2516         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
 2517         bcopy(CADDR1, CADDR2, PAGE_SIZE);
 2518         *CMAP1 = 0;
 2519         *CMAP2 = 0;
 2520         sched_unpin();
 2521         mtx_unlock(&CMAPCADDR12_lock);
 2522 }
 2523 
 2524 /*
 2525  * Returns true if the pmap's pv is one of the first
 2526  * 16 pvs linked to from this page.  This count may
 2527  * be changed upwards or downwards in the future; it
 2528  * is only necessary that true be returned for a small
 2529  * subset of pmaps for proper page aging.
 2530  */
 2531 boolean_t
 2532 pmap_page_exists_quick(pmap, m)
 2533         pmap_t pmap;
 2534         vm_page_t m;
 2535 {
 2536         pv_entry_t pv;
 2537         int loops = 0;
 2538 
 2539         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2540                 return FALSE;
 2541 
 2542         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2543         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2544                 if (pv->pv_pmap == pmap) {
 2545                         return TRUE;
 2546                 }
 2547                 loops++;
 2548                 if (loops >= 16)
 2549                         break;
 2550         }
 2551         return (FALSE);
 2552 }
 2553 
 2554 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2555 /*
 2556  * Remove all pages from specified address space
 2557  * this aids process exit speeds.  Also, this code
 2558  * is special cased for current process only, but
 2559  * can have the more generic (and slightly slower)
 2560  * mode enabled.  This is much faster than pmap_remove
 2561  * in the case of running down an entire address space.
 2562  */
 2563 void
 2564 pmap_remove_pages(pmap, sva, eva)
 2565         pmap_t pmap;
 2566         vm_offset_t sva, eva;
 2567 {
 2568         pt_entry_t *pte, tpte;
 2569         vm_page_t m;
 2570         pv_entry_t pv, npv;
 2571 
 2572 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2573         if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
 2574                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 2575                 return;
 2576         }
 2577 #endif
 2578         vm_page_lock_queues();
 2579         PMAP_LOCK(pmap);
 2580         sched_pin();
 2581         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2582 
 2583                 if (pv->pv_va >= eva || pv->pv_va < sva) {
 2584                         npv = TAILQ_NEXT(pv, pv_plist);
 2585                         continue;
 2586                 }
 2587 
 2588 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2589                 pte = vtopte(pv->pv_va);
 2590 #else
 2591                 pte = pmap_pte_quick(pmap, pv->pv_va);
 2592 #endif
 2593                 tpte = *pte;
 2594 
 2595                 if (tpte == 0) {
 2596                         printf("TPTE at %p  IS ZERO @ VA %08x\n",
 2597                                                         pte, pv->pv_va);
 2598                         panic("bad pte");
 2599                 }
 2600 
 2601 /*
 2602  * We cannot remove wired pages from a process' mapping at this time
 2603  */
 2604                 if (tpte & PG_W) {
 2605                         npv = TAILQ_NEXT(pv, pv_plist);
 2606                         continue;
 2607                 }
 2608 
 2609                 m = PHYS_TO_VM_PAGE(tpte);
 2610                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 2611                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 2612                     m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
 2613 
 2614                 KASSERT(m < &vm_page_array[vm_page_array_size],
 2615                         ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
 2616 
 2617                 pmap->pm_stats.resident_count--;
 2618 
 2619                 pte_clear(pte);
 2620 
 2621                 /*
 2622                  * Update the vm_page_t clean and reference bits.
 2623                  */
 2624                 if (tpte & PG_M) {
 2625                         vm_page_dirty(m);
 2626                 }
 2627 
 2628                 npv = TAILQ_NEXT(pv, pv_plist);
 2629                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 2630 
 2631                 m->md.pv_list_count--;
 2632                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2633                 if (TAILQ_EMPTY(&m->md.pv_list))
 2634                         vm_page_flag_clear(m, PG_WRITEABLE);
 2635 
 2636                 pmap_unuse_pt(pmap, pv->pv_va);
 2637                 free_pv_entry(pv);
 2638         }
 2639         sched_unpin();
 2640         pmap_invalidate_all(pmap);
 2641         PMAP_UNLOCK(pmap);
 2642         vm_page_unlock_queues();
 2643 }
 2644 
 2645 /*
 2646  *      pmap_is_modified:
 2647  *
 2648  *      Return whether or not the specified physical page was modified
 2649  *      in any physical maps.
 2650  */
 2651 boolean_t
 2652 pmap_is_modified(vm_page_t m)
 2653 {
 2654         pv_entry_t pv;
 2655         pt_entry_t *pte;
 2656         boolean_t rv;
 2657 
 2658         rv = FALSE;
 2659         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2660                 return (rv);
 2661 
 2662         sched_pin();
 2663         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2664         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2665                 /*
 2666                  * if the bit being tested is the modified bit, then
 2667                  * mark clean_map and ptes as never
 2668                  * modified.
 2669                  */
 2670                 if (!pmap_track_modified(pv->pv_va))
 2671                         continue;
 2672 #if defined(PMAP_DIAGNOSTIC)
 2673                 if (!pv->pv_pmap) {
 2674                         printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 2675                         continue;
 2676                 }
 2677 #endif
 2678                 PMAP_LOCK(pv->pv_pmap);
 2679                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2680                 rv = (*pte & PG_M) != 0;
 2681                 PMAP_UNLOCK(pv->pv_pmap);
 2682                 if (rv)
 2683                         break;
 2684         }
 2685         sched_unpin();
 2686         return (rv);
 2687 }
 2688 
 2689 /*
 2690  *      pmap_is_prefaultable:
 2691  *
 2692  *      Return whether or not the specified virtual address is elgible
 2693  *      for prefault.
 2694  */
 2695 boolean_t
 2696 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 2697 {
 2698         pt_entry_t *pte;
 2699         boolean_t rv;
 2700 
 2701         rv = FALSE;
 2702         PMAP_LOCK(pmap);
 2703         if (*pmap_pde(pmap, addr)) {
 2704                 pte = vtopte(addr);
 2705                 rv = *pte == 0;
 2706         }
 2707         PMAP_UNLOCK(pmap);
 2708         return (rv);
 2709 }
 2710 
 2711 /*
 2712  *      Clear the given bit in each of the given page's ptes.  The bit is
 2713  *      expressed as a 32-bit mask.  Consequently, if the pte is 64 bits in
 2714  *      size, only a bit within the least significant 32 can be cleared.
 2715  */
 2716 static __inline void
 2717 pmap_clear_ptes(vm_page_t m, int bit)
 2718 {
 2719         register pv_entry_t pv;
 2720         pt_entry_t pbits, *pte;
 2721 
 2722         if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
 2723             (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 2724                 return;
 2725 
 2726         sched_pin();
 2727         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2728         /*
 2729          * Loop over all current mappings setting/clearing as appropos If
 2730          * setting RO do we need to clear the VAC?
 2731          */
 2732         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2733                 /*
 2734                  * don't write protect pager mappings
 2735                  */
 2736                 if (bit == PG_RW) {
 2737                         if (!pmap_track_modified(pv->pv_va))
 2738                                 continue;
 2739                 }
 2740 
 2741 #if defined(PMAP_DIAGNOSTIC)
 2742                 if (!pv->pv_pmap) {
 2743                         printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 2744                         continue;
 2745                 }
 2746 #endif
 2747 
 2748                 PMAP_LOCK(pv->pv_pmap);
 2749                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2750 retry:
 2751                 pbits = *pte;
 2752                 if (pbits & bit) {
 2753                         if (bit == PG_RW) {
 2754                                 /*
 2755                                  * Regardless of whether a pte is 32 or 64 bits
 2756                                  * in size, PG_RW and PG_M are among the least
 2757                                  * significant 32 bits.
 2758                                  */
 2759                                 if (!atomic_cmpset_int((u_int *)pte, pbits,
 2760                                     pbits & ~(PG_RW | PG_M)))
 2761                                         goto retry;
 2762                                 if (pbits & PG_M) {
 2763                                         vm_page_dirty(m);
 2764                                 }
 2765                         } else {
 2766                                 atomic_clear_int((u_int *)pte, bit);
 2767                         }
 2768                         pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2769                 }
 2770                 PMAP_UNLOCK(pv->pv_pmap);
 2771         }
 2772         if (bit == PG_RW)
 2773                 vm_page_flag_clear(m, PG_WRITEABLE);
 2774         sched_unpin();
 2775 }
 2776 
 2777 /*
 2778  *      pmap_page_protect:
 2779  *
 2780  *      Lower the permission for all mappings to a given page.
 2781  */
 2782 void
 2783 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2784 {
 2785         if ((prot & VM_PROT_WRITE) == 0) {
 2786                 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2787                         pmap_clear_ptes(m, PG_RW);
 2788                 } else {
 2789                         pmap_remove_all(m);
 2790                 }
 2791         }
 2792 }
 2793 
 2794 /*
 2795  *      pmap_ts_referenced:
 2796  *
 2797  *      Return a count of reference bits for a page, clearing those bits.
 2798  *      It is not necessary for every reference bit to be cleared, but it
 2799  *      is necessary that 0 only be returned when there are truly no
 2800  *      reference bits set.
 2801  *
 2802  *      XXX: The exact number of bits to check and clear is a matter that
 2803  *      should be tested and standardized at some point in the future for
 2804  *      optimal aging of shared pages.
 2805  */
 2806 int
 2807 pmap_ts_referenced(vm_page_t m)
 2808 {
 2809         register pv_entry_t pv, pvf, pvn;
 2810         pt_entry_t *pte;
 2811         pt_entry_t v;
 2812         int rtval = 0;
 2813 
 2814         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2815                 return (rtval);
 2816 
 2817         sched_pin();
 2818         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2819         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2820 
 2821                 pvf = pv;
 2822 
 2823                 do {
 2824                         pvn = TAILQ_NEXT(pv, pv_list);
 2825 
 2826                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2827 
 2828                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2829 
 2830                         if (!pmap_track_modified(pv->pv_va))
 2831                                 continue;
 2832 
 2833                         PMAP_LOCK(pv->pv_pmap);
 2834                         pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2835 
 2836                         if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
 2837                                 atomic_clear_int((u_int *)pte, PG_A);
 2838                                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2839 
 2840                                 rtval++;
 2841                                 if (rtval > 4) {
 2842                                         PMAP_UNLOCK(pv->pv_pmap);
 2843                                         break;
 2844                                 }
 2845                         }
 2846                         PMAP_UNLOCK(pv->pv_pmap);
 2847                 } while ((pv = pvn) != NULL && pv != pvf);
 2848         }
 2849         sched_unpin();
 2850 
 2851         return (rtval);
 2852 }
 2853 
 2854 /*
 2855  *      Clear the modify bits on the specified physical page.
 2856  */
 2857 void
 2858 pmap_clear_modify(vm_page_t m)
 2859 {
 2860         pmap_clear_ptes(m, PG_M);
 2861 }
 2862 
 2863 /*
 2864  *      pmap_clear_reference:
 2865  *
 2866  *      Clear the reference bit on the specified physical page.
 2867  */
 2868 void
 2869 pmap_clear_reference(vm_page_t m)
 2870 {
 2871         pmap_clear_ptes(m, PG_A);
 2872 }
 2873 
 2874 /*
 2875  * Miscellaneous support routines follow
 2876  */
 2877 
 2878 /*
 2879  * Map a set of physical memory pages into the kernel virtual
 2880  * address space. Return a pointer to where it is mapped. This
 2881  * routine is intended to be used for mapping device memory,
 2882  * NOT real memory.
 2883  */
 2884 void *
 2885 pmap_mapdev(pa, size)
 2886         vm_paddr_t pa;
 2887         vm_size_t size;
 2888 {
 2889         vm_offset_t va, tmpva, offset;
 2890 
 2891         offset = pa & PAGE_MASK;
 2892         size = roundup(offset + size, PAGE_SIZE);
 2893         pa = pa & PG_FRAME;
 2894 
 2895         if (pa < KERNLOAD && pa + size <= KERNLOAD)
 2896                 va = KERNBASE + pa;
 2897         else
 2898                 va = kmem_alloc_nofault(kernel_map, size);
 2899         if (!va)
 2900                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 2901 
 2902         for (tmpva = va; size > 0; ) {
 2903                 pmap_kenter(tmpva, pa);
 2904                 size -= PAGE_SIZE;
 2905                 tmpva += PAGE_SIZE;
 2906                 pa += PAGE_SIZE;
 2907         }
 2908         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2909         return ((void *)(va + offset));
 2910 }
 2911 
 2912 void
 2913 pmap_unmapdev(va, size)
 2914         vm_offset_t va;
 2915         vm_size_t size;
 2916 {
 2917         vm_offset_t base, offset, tmpva;
 2918 
 2919         if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 2920                 return;
 2921         base = va & PG_FRAME;
 2922         offset = va & PAGE_MASK;
 2923         size = roundup(offset + size, PAGE_SIZE);
 2924         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 2925                 pmap_kremove(tmpva);
 2926         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2927         kmem_free(kernel_map, base, size);
 2928 }
 2929 
 2930 /*
 2931  * perform the pmap work for mincore
 2932  */
 2933 int
 2934 pmap_mincore(pmap, addr)
 2935         pmap_t pmap;
 2936         vm_offset_t addr;
 2937 {
 2938         pt_entry_t *ptep, pte;
 2939         vm_page_t m;
 2940         int val = 0;
 2941         
 2942         PMAP_LOCK(pmap);
 2943         ptep = pmap_pte(pmap, addr);
 2944         pte = (ptep != NULL) ? *ptep : 0;
 2945         pmap_pte_release(ptep);
 2946         PMAP_UNLOCK(pmap);
 2947 
 2948         if (pte != 0) {
 2949                 vm_paddr_t pa;
 2950 
 2951                 val = MINCORE_INCORE;
 2952                 if ((pte & PG_MANAGED) == 0)
 2953                         return val;
 2954 
 2955                 pa = pte & PG_FRAME;
 2956 
 2957                 m = PHYS_TO_VM_PAGE(pa);
 2958 
 2959                 /*
 2960                  * Modified by us
 2961                  */
 2962                 if (pte & PG_M)
 2963                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 2964                 else {
 2965                         /*
 2966                          * Modified by someone else
 2967                          */
 2968                         vm_page_lock_queues();
 2969                         if (m->dirty || pmap_is_modified(m))
 2970                                 val |= MINCORE_MODIFIED_OTHER;
 2971                         vm_page_unlock_queues();
 2972                 }
 2973                 /*
 2974                  * Referenced by us
 2975                  */
 2976                 if (pte & PG_A)
 2977                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 2978                 else {
 2979                         /*
 2980                          * Referenced by someone else
 2981                          */
 2982                         vm_page_lock_queues();
 2983                         if ((m->flags & PG_REFERENCED) ||
 2984                             pmap_ts_referenced(m)) {
 2985                                 val |= MINCORE_REFERENCED_OTHER;
 2986                                 vm_page_flag_set(m, PG_REFERENCED);
 2987                         }
 2988                         vm_page_unlock_queues();
 2989                 }
 2990         } 
 2991         return val;
 2992 }
 2993 
 2994 void
 2995 pmap_activate(struct thread *td)
 2996 {
 2997         struct proc *p = td->td_proc;
 2998         pmap_t  pmap, oldpmap;
 2999         u_int32_t  cr3;
 3000 
 3001         critical_enter();
 3002         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 3003         oldpmap = PCPU_GET(curpmap);
 3004 #if defined(SMP)
 3005         atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 3006         atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 3007 #else
 3008         oldpmap->pm_active &= ~1;
 3009         pmap->pm_active |= 1;
 3010 #endif
 3011 #ifdef PAE
 3012         cr3 = vtophys(pmap->pm_pdpt);
 3013 #else
 3014         cr3 = vtophys(pmap->pm_pdir);
 3015 #endif
 3016         /* XXXKSE this is wrong.
 3017          * pmap_activate is for the current thread on the current cpu
 3018          */
 3019         if (p->p_flag & P_SA) {
 3020                 /* Make sure all other cr3 entries are updated. */
 3021                 /* what if they are running?  XXXKSE (maybe abort them) */
 3022                 FOREACH_THREAD_IN_PROC(p, td) {
 3023                         td->td_pcb->pcb_cr3 = cr3;
 3024                 }
 3025         } else {
 3026                 td->td_pcb->pcb_cr3 = cr3;
 3027         }
 3028         load_cr3(cr3);
 3029         PCPU_SET(curpmap, pmap);
 3030         critical_exit();
 3031 }
 3032 
 3033 vm_offset_t
 3034 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3035 {
 3036 
 3037         if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3038                 return addr;
 3039         }
 3040 
 3041         addr = (addr + PDRMASK) & ~PDRMASK;
 3042         return addr;
 3043 }
 3044 
 3045 
 3046 #if defined(PMAP_DEBUG)
 3047 pmap_pid_dump(int pid)
 3048 {
 3049         pmap_t pmap;
 3050         struct proc *p;
 3051         int npte = 0;
 3052         int index;
 3053 
 3054         sx_slock(&allproc_lock);
 3055         LIST_FOREACH(p, &allproc, p_list) {
 3056                 if (p->p_pid != pid)
 3057                         continue;
 3058 
 3059                 if (p->p_vmspace) {
 3060                         int i,j;
 3061                         index = 0;
 3062                         pmap = vmspace_pmap(p->p_vmspace);
 3063                         for (i = 0; i < NPDEPTD; i++) {
 3064                                 pd_entry_t *pde;
 3065                                 pt_entry_t *pte;
 3066                                 vm_offset_t base = i << PDRSHIFT;
 3067                                 
 3068                                 pde = &pmap->pm_pdir[i];
 3069                                 if (pde && pmap_pde_v(pde)) {
 3070                                         for (j = 0; j < NPTEPG; j++) {
 3071                                                 vm_offset_t va = base + (j << PAGE_SHIFT);
 3072                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3073                                                         if (index) {
 3074                                                                 index = 0;
 3075                                                                 printf("\n");
 3076                                                         }
 3077                                                         sx_sunlock(&allproc_lock);
 3078                                                         return npte;
 3079                                                 }
 3080                                                 pte = pmap_pte(pmap, va);
 3081                                                 if (pte && pmap_pte_v(pte)) {
 3082                                                         pt_entry_t pa;
 3083                                                         vm_page_t m;
 3084                                                         pa = *pte;
 3085                                                         m = PHYS_TO_VM_PAGE(pa);
 3086                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3087                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 3088                                                         npte++;
 3089                                                         index++;
 3090                                                         if (index >= 2) {
 3091                                                                 index = 0;
 3092                                                                 printf("\n");
 3093                                                         } else {
 3094                                                                 printf(" ");
 3095                                                         }
 3096                                                 }
 3097                                         }
 3098                                 }
 3099                         }
 3100                 }
 3101         }
 3102         sx_sunlock(&allproc_lock);
 3103         return npte;
 3104 }
 3105 #endif
 3106 
 3107 #if defined(DEBUG)
 3108 
 3109 static void     pads(pmap_t pm);
 3110 void            pmap_pvdump(vm_offset_t pa);
 3111 
 3112 /* print address space of pmap*/
 3113 static void
 3114 pads(pm)
 3115         pmap_t pm;
 3116 {
 3117         int i, j;
 3118         vm_paddr_t va;
 3119         pt_entry_t *ptep;
 3120 
 3121         if (pm == kernel_pmap)
 3122                 return;
 3123         for (i = 0; i < NPDEPTD; i++)
 3124                 if (pm->pm_pdir[i])
 3125                         for (j = 0; j < NPTEPG; j++) {
 3126                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3127                                 if (pm == kernel_pmap && va < KERNBASE)
 3128                                         continue;
 3129                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3130                                         continue;
 3131                                 ptep = pmap_pte(pm, va);
 3132                                 if (pmap_pte_v(ptep))
 3133                                         printf("%x:%x ", va, *ptep);
 3134                         };
 3135 
 3136 }
 3137 
 3138 void
 3139 pmap_pvdump(pa)
 3140         vm_paddr_t pa;
 3141 {
 3142         pv_entry_t pv;
 3143         vm_page_t m;
 3144 
 3145         printf("pa %x", pa);
 3146         m = PHYS_TO_VM_PAGE(pa);
 3147         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3148                 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3149                 pads(pv->pv_pmap);
 3150         }
 3151         printf(" ");
 3152 }
 3153 #endif

Cache object: 2abd7bb2668173ae3f742d10222e22c5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.