The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department and William Jolitz of UUNET Technologies Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   42  */
   43 /*-
   44  * Copyright (c) 2003 Networks Associates Technology, Inc.
   45  * All rights reserved.
   46  *
   47  * This software was developed for the FreeBSD Project by Jake Burkholder,
   48  * Safeport Network Services, and Network Associates Laboratories, the
   49  * Security Research Division of Network Associates, Inc. under
   50  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   51  * CHATS research program.
   52  *
   53  * Redistribution and use in source and binary forms, with or without
   54  * modification, are permitted provided that the following conditions
   55  * are met:
   56  * 1. Redistributions of source code must retain the above copyright
   57  *    notice, this list of conditions and the following disclaimer.
   58  * 2. Redistributions in binary form must reproduce the above copyright
   59  *    notice, this list of conditions and the following disclaimer in the
   60  *    documentation and/or other materials provided with the distribution.
   61  *
   62  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   72  * SUCH DAMAGE.
   73  */
   74 
   75 #include <sys/cdefs.h>
   76 __FBSDID("$FreeBSD: releng/5.2/sys/i386/i386/pmap.c 122284 2003-11-08 03:01:26Z alc $");
   77 
   78 /*
   79  *      Manages physical address maps.
   80  *
   81  *      In addition to hardware address maps, this
   82  *      module is called upon to provide software-use-only
   83  *      maps which may or may not be stored in the same
   84  *      form as hardware maps.  These pseudo-maps are
   85  *      used to store intermediate results from copy
   86  *      operations to and from address spaces.
   87  *
   88  *      Since the information managed by this module is
   89  *      also stored by the logical address mapping module,
   90  *      this module may throw away valid virtual-to-physical
   91  *      mappings at almost any time.  However, invalidations
   92  *      of virtual-to-physical mappings must be done as
   93  *      requested.
   94  *
   95  *      In order to cope with hardware architectures which
   96  *      make virtual-to-physical map invalidates expensive,
   97  *      this module may delay invalidate or reduced protection
   98  *      operations until such time as they are actually
   99  *      necessary.  This module is given full information as
  100  *      to which processors are currently using which maps,
  101  *      and to when physical maps must be made correct.
  102  */
  103 
  104 #include "opt_pmap.h"
  105 #include "opt_msgbuf.h"
  106 #include "opt_kstack_pages.h"
  107 
  108 #include <sys/param.h>
  109 #include <sys/systm.h>
  110 #include <sys/kernel.h>
  111 #include <sys/lock.h>
  112 #include <sys/mman.h>
  113 #include <sys/msgbuf.h>
  114 #include <sys/mutex.h>
  115 #include <sys/proc.h>
  116 #include <sys/sx.h>
  117 #include <sys/user.h>
  118 #include <sys/vmmeter.h>
  119 #include <sys/sysctl.h>
  120 #ifdef SMP
  121 #include <sys/smp.h>
  122 #endif
  123 
  124 #include <vm/vm.h>
  125 #include <vm/vm_param.h>
  126 #include <vm/vm_kern.h>
  127 #include <vm/vm_page.h>
  128 #include <vm/vm_map.h>
  129 #include <vm/vm_object.h>
  130 #include <vm/vm_extern.h>
  131 #include <vm/vm_pageout.h>
  132 #include <vm/vm_pager.h>
  133 #include <vm/uma.h>
  134 
  135 #include <machine/cpu.h>
  136 #include <machine/cputypes.h>
  137 #include <machine/md_var.h>
  138 #include <machine/specialreg.h>
  139 #ifdef SMP
  140 #include <machine/smp.h>
  141 #endif
  142 
  143 #define PMAP_KEEP_PDIRS
  144 #ifndef PMAP_SHPGPERPROC
  145 #define PMAP_SHPGPERPROC 200
  146 #endif
  147 
  148 #if defined(DIAGNOSTIC)
  149 #define PMAP_DIAGNOSTIC
  150 #endif
  151 
  152 #define MINPV 2048
  153 
  154 #if !defined(PMAP_DIAGNOSTIC)
  155 #define PMAP_INLINE __inline
  156 #else
  157 #define PMAP_INLINE
  158 #endif
  159 
  160 /*
  161  * Get PDEs and PTEs for user/kernel address space
  162  */
  163 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  164 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  165 
  166 #define pmap_pde_v(pte)         ((*(int *)pte & PG_V) != 0)
  167 #define pmap_pte_w(pte)         ((*(int *)pte & PG_W) != 0)
  168 #define pmap_pte_m(pte)         ((*(int *)pte & PG_M) != 0)
  169 #define pmap_pte_u(pte)         ((*(int *)pte & PG_A) != 0)
  170 #define pmap_pte_v(pte)         ((*(int *)pte & PG_V) != 0)
  171 
  172 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
  173 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
  174 
  175 /*
  176  * Given a map and a machine independent protection code,
  177  * convert to a vax protection code.
  178  */
  179 #define pte_prot(m, p)  (protection_codes[p])
  180 static int protection_codes[8];
  181 
  182 struct pmap kernel_pmap_store;
  183 LIST_HEAD(pmaplist, pmap);
  184 static struct pmaplist allpmaps;
  185 static struct mtx allpmaps_lock;
  186 #ifdef SMP
  187 static struct mtx lazypmap_lock;
  188 #endif
  189 
  190 vm_paddr_t avail_start; /* PA of first available physical page */
  191 vm_paddr_t avail_end;   /* PA of last available physical page */
  192 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  193 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  194 static boolean_t pmap_initialized = FALSE;      /* Has pmap_init completed? */
  195 int pgeflag = 0;                /* PG_G or-in */
  196 int pseflag = 0;                /* PG_PS or-in */
  197 
  198 static int nkpt;
  199 vm_offset_t kernel_vm_end;
  200 extern u_int32_t KERNend;
  201 
  202 #ifdef PAE
  203 static uma_zone_t pdptzone;
  204 #endif
  205 
  206 /*
  207  * Data for the pv entry allocation mechanism
  208  */
  209 static uma_zone_t pvzone;
  210 static struct vm_object pvzone_obj;
  211 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  212 int pmap_pagedaemon_waken;
  213 
  214 /*
  215  * All those kernel PT submaps that BSD is so fond of
  216  */
  217 pt_entry_t *CMAP1 = 0;
  218 static pt_entry_t *CMAP2, *CMAP3, *ptmmap;
  219 caddr_t CADDR1 = 0, ptvmmap = 0;
  220 static caddr_t CADDR2, CADDR3;
  221 static struct mtx CMAPCADDR12_lock;
  222 static pt_entry_t *msgbufmap;
  223 struct msgbuf *msgbufp = 0;
  224 
  225 /*
  226  * Crashdump maps.
  227  */
  228 static pt_entry_t *pt_crashdumpmap;
  229 static caddr_t crashdumpmap;
  230 
  231 #ifdef SMP
  232 extern pt_entry_t *SMPpt;
  233 #endif
  234 static pt_entry_t *PMAP1 = 0, *PMAP2;
  235 static pt_entry_t *PADDR1 = 0, *PADDR2;
  236 
  237 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
  238 static pv_entry_t get_pv_entry(void);
  239 static void     i386_protection_init(void);
  240 static void     pmap_clear_ptes(vm_page_t m, int bit)
  241     __always_inline;
  242 
  243 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
  244 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
  245 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  246                                         vm_offset_t va);
  247 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va,
  248                 vm_page_t mpte, vm_page_t m);
  249 
  250 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va);
  251 
  252 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
  253 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
  254 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
  255 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  256 static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  257 #ifdef PAE
  258 static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  259 #endif
  260 
  261 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
  262 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
  263 
  264 /*
  265  * Move the kernel virtual free pointer to the next
  266  * 4MB.  This is used to help improve performance
  267  * by using a large (4MB) page for much of the kernel
  268  * (.text, .data, .bss)
  269  */
  270 static vm_offset_t
  271 pmap_kmem_choose(vm_offset_t addr)
  272 {
  273         vm_offset_t newaddr = addr;
  274 
  275 #ifndef DISABLE_PSE
  276         if (cpu_feature & CPUID_PSE)
  277                 newaddr = (addr + PDRMASK) & ~PDRMASK;
  278 #endif
  279         return newaddr;
  280 }
  281 
  282 /*
  283  *      Bootstrap the system enough to run with virtual memory.
  284  *
  285  *      On the i386 this is called after mapping has already been enabled
  286  *      and just syncs the pmap module with what has already been done.
  287  *      [We can't call it easily with mapping off since the kernel is not
  288  *      mapped with PA == VA, hence we would have to relocate every address
  289  *      from the linked base (virtual) address "KERNBASE" to the actual
  290  *      (physical) address starting relative to 0]
  291  */
  292 void
  293 pmap_bootstrap(firstaddr, loadaddr)
  294         vm_paddr_t firstaddr;
  295         vm_paddr_t loadaddr;
  296 {
  297         vm_offset_t va;
  298         pt_entry_t *pte;
  299         int i;
  300 
  301         avail_start = firstaddr;
  302 
  303         /*
  304          * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  305          * large. It should instead be correctly calculated in locore.s and
  306          * not based on 'first' (which is a physical address, not a virtual
  307          * address, for the start of unused physical memory). The kernel
  308          * page tables are NOT double mapped and thus should not be included
  309          * in this calculation.
  310          */
  311         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  312         virtual_avail = pmap_kmem_choose(virtual_avail);
  313 
  314         virtual_end = VM_MAX_KERNEL_ADDRESS;
  315 
  316         /*
  317          * Initialize protection array.
  318          */
  319         i386_protection_init();
  320 
  321         /*
  322          * Initialize the kernel pmap (which is statically allocated).
  323          */
  324         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
  325 #ifdef PAE
  326         kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
  327 #endif
  328         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  329         TAILQ_INIT(&kernel_pmap->pm_pvlist);
  330         LIST_INIT(&allpmaps);
  331 #ifdef SMP
  332         mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN);
  333 #endif
  334         mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
  335         mtx_lock_spin(&allpmaps_lock);
  336         LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
  337         mtx_unlock_spin(&allpmaps_lock);
  338         nkpt = NKPT;
  339 
  340         /*
  341          * Reserve some special page table entries/VA space for temporary
  342          * mapping of pages.
  343          */
  344 #define SYSMAP(c, p, v, n)      \
  345         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  346 
  347         va = virtual_avail;
  348         pte = vtopte(va);
  349 
  350         /*
  351          * CMAP1/CMAP2 are used for zeroing and copying pages.
  352          * CMAP3 is used for the idle process page zeroing.
  353          */
  354         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  355         SYSMAP(caddr_t, CMAP2, CADDR2, 1)
  356         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  357         *CMAP3 = 0;
  358 
  359         mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
  360 
  361         /*
  362          * Crashdump maps.
  363          */
  364         SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
  365 
  366         /*
  367          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  368          * XXX ptmmap is not used.
  369          */
  370         SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
  371 
  372         /*
  373          * msgbufp is used to map the system message buffer.
  374          * XXX msgbufmap is not used.
  375          */
  376         SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
  377                atop(round_page(MSGBUF_SIZE)))
  378 
  379         /*
  380          * ptemap is used for pmap_pte_quick
  381          */
  382         SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
  383         SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
  384 
  385         virtual_avail = va;
  386 
  387         *CMAP1 = *CMAP2 = 0;
  388         for (i = 0; i < NKPT; i++)
  389                 PTD[i] = 0;
  390 
  391         /* Turn on PG_G on kernel page(s) */
  392         pmap_set_pg();
  393 }
  394 
  395 /*
  396  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  397  */
  398 void
  399 pmap_set_pg(void)
  400 {
  401         pd_entry_t pdir;
  402         pt_entry_t *pte;
  403         vm_offset_t va, endva;
  404         int i; 
  405 
  406         if (pgeflag == 0)
  407                 return;
  408 
  409         i = KERNLOAD/NBPDR;
  410         endva = KERNBASE + KERNend;
  411 
  412         if (pseflag) {
  413                 va = KERNBASE + KERNLOAD;
  414                 while (va  < endva) {
  415                         pdir = kernel_pmap->pm_pdir[KPTDI+i];
  416                         pdir |= pgeflag;
  417                         kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
  418                         invltlb();      /* Play it safe, invltlb() every time */
  419                         i++;
  420                         va += NBPDR;
  421                 }
  422         } else {
  423                 va = (vm_offset_t)btext;
  424                 while (va < endva) {
  425                         pte = vtopte(va);
  426                         if (*pte)
  427                                 *pte |= pgeflag;
  428                         invltlb();      /* Play it safe, invltlb() every time */
  429                         va += PAGE_SIZE;
  430                 }
  431         }
  432 }
  433 
  434 static void *
  435 pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  436 {
  437         *flags = UMA_SLAB_PRIV;
  438         return (void *)kmem_alloc(kernel_map, bytes);
  439 }
  440 
  441 #ifdef PAE
  442 static void *
  443 pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  444 {
  445         *flags = UMA_SLAB_PRIV;
  446         return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0));
  447 }
  448 #endif
  449 
  450 /*
  451  *      Initialize the pmap module.
  452  *      Called by vm_init, to initialize any structures that the pmap
  453  *      system needs to map virtual memory.
  454  *      pmap_init has been enhanced to support in a fairly consistant
  455  *      way, discontiguous physical memory.
  456  */
  457 void
  458 pmap_init(phys_start, phys_end)
  459         vm_paddr_t phys_start, phys_end;
  460 {
  461         int i;
  462         int initial_pvs;
  463 
  464         /*
  465          * Allocate memory for random pmap data structures.  Includes the
  466          * pv_head_table.
  467          */
  468 
  469         for(i = 0; i < vm_page_array_size; i++) {
  470                 vm_page_t m;
  471 
  472                 m = &vm_page_array[i];
  473                 TAILQ_INIT(&m->md.pv_list);
  474                 m->md.pv_list_count = 0;
  475         }
  476 
  477         /*
  478          * init the pv free list
  479          */
  480         initial_pvs = vm_page_array_size;
  481         if (initial_pvs < MINPV)
  482                 initial_pvs = MINPV;
  483         pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
  484             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
  485         uma_zone_set_allocf(pvzone, pmap_pv_allocf);
  486         uma_prealloc(pvzone, initial_pvs);
  487 
  488 #ifdef PAE
  489         pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
  490             NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
  491             UMA_ZONE_VM | UMA_ZONE_NOFREE);
  492         uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
  493 #endif
  494 
  495         /*
  496          * Now it is safe to enable pv_table recording.
  497          */
  498         pmap_initialized = TRUE;
  499 }
  500 
  501 /*
  502  * Initialize the address space (zone) for the pv_entries.  Set a
  503  * high water mark so that the system can recover from excessive
  504  * numbers of pv entries.
  505  */
  506 void
  507 pmap_init2()
  508 {
  509         int shpgperproc = PMAP_SHPGPERPROC;
  510 
  511         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  512         pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  513         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  514         pv_entry_high_water = 9 * (pv_entry_max / 10);
  515         uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
  516 }
  517 
  518 
  519 /***************************************************
  520  * Low level helper routines.....
  521  ***************************************************/
  522 
  523 #if defined(PMAP_DIAGNOSTIC)
  524 
  525 /*
  526  * This code checks for non-writeable/modified pages.
  527  * This should be an invalid condition.
  528  */
  529 static int
  530 pmap_nw_modified(pt_entry_t ptea)
  531 {
  532         int pte;
  533 
  534         pte = (int) ptea;
  535 
  536         if ((pte & (PG_M|PG_RW)) == PG_M)
  537                 return 1;
  538         else
  539                 return 0;
  540 }
  541 #endif
  542 
  543 
  544 /*
  545  * this routine defines the region(s) of memory that should
  546  * not be tested for the modified bit.
  547  */
  548 static PMAP_INLINE int
  549 pmap_track_modified(vm_offset_t va)
  550 {
  551         if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
  552                 return 1;
  553         else
  554                 return 0;
  555 }
  556 
  557 #ifdef I386_CPU
  558 /*
  559  * i386 only has "invalidate everything" and no SMP to worry about.
  560  */
  561 PMAP_INLINE void
  562 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  563 {
  564 
  565         if (pmap == kernel_pmap || pmap->pm_active)
  566                 invltlb();
  567 }
  568 
  569 PMAP_INLINE void
  570 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  571 {
  572 
  573         if (pmap == kernel_pmap || pmap->pm_active)
  574                 invltlb();
  575 }
  576 
  577 PMAP_INLINE void
  578 pmap_invalidate_all(pmap_t pmap)
  579 {
  580 
  581         if (pmap == kernel_pmap || pmap->pm_active)
  582                 invltlb();
  583 }
  584 #else /* !I386_CPU */
  585 #ifdef SMP
  586 /*
  587  * For SMP, these functions have to use the IPI mechanism for coherence.
  588  */
  589 void
  590 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  591 {
  592         u_int cpumask;
  593         u_int other_cpus;
  594 
  595         if (smp_started) {
  596                 if (!(read_eflags() & PSL_I))
  597                         panic("%s: interrupts disabled", __func__);
  598                 mtx_lock_spin(&smp_tlb_mtx);
  599         } else
  600                 critical_enter();
  601         /*
  602          * We need to disable interrupt preemption but MUST NOT have
  603          * interrupts disabled here.
  604          * XXX we may need to hold schedlock to get a coherent pm_active
  605          * XXX critical sections disable interrupts again
  606          */
  607         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  608                 invlpg(va);
  609                 smp_invlpg(va);
  610         } else {
  611                 cpumask = PCPU_GET(cpumask);
  612                 other_cpus = PCPU_GET(other_cpus);
  613                 if (pmap->pm_active & cpumask)
  614                         invlpg(va);
  615                 if (pmap->pm_active & other_cpus)
  616                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  617         }
  618         if (smp_started)
  619                 mtx_unlock_spin(&smp_tlb_mtx);
  620         else
  621                 critical_exit();
  622 }
  623 
  624 void
  625 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  626 {
  627         u_int cpumask;
  628         u_int other_cpus;
  629         vm_offset_t addr;
  630 
  631         if (smp_started) {
  632                 if (!(read_eflags() & PSL_I))
  633                         panic("%s: interrupts disabled", __func__);
  634                 mtx_lock_spin(&smp_tlb_mtx);
  635         } else
  636                 critical_enter();
  637         /*
  638          * We need to disable interrupt preemption but MUST NOT have
  639          * interrupts disabled here.
  640          * XXX we may need to hold schedlock to get a coherent pm_active
  641          * XXX critical sections disable interrupts again
  642          */
  643         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  644                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  645                         invlpg(addr);
  646                 smp_invlpg_range(sva, eva);
  647         } else {
  648                 cpumask = PCPU_GET(cpumask);
  649                 other_cpus = PCPU_GET(other_cpus);
  650                 if (pmap->pm_active & cpumask)
  651                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  652                                 invlpg(addr);
  653                 if (pmap->pm_active & other_cpus)
  654                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  655                             sva, eva);
  656         }
  657         if (smp_started)
  658                 mtx_unlock_spin(&smp_tlb_mtx);
  659         else
  660                 critical_exit();
  661 }
  662 
  663 void
  664 pmap_invalidate_all(pmap_t pmap)
  665 {
  666         u_int cpumask;
  667         u_int other_cpus;
  668 
  669         if (smp_started) {
  670                 if (!(read_eflags() & PSL_I))
  671                         panic("%s: interrupts disabled", __func__);
  672                 mtx_lock_spin(&smp_tlb_mtx);
  673         } else
  674                 critical_enter();
  675         /*
  676          * We need to disable interrupt preemption but MUST NOT have
  677          * interrupts disabled here.
  678          * XXX we may need to hold schedlock to get a coherent pm_active
  679          * XXX critical sections disable interrupts again
  680          */
  681         if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
  682                 invltlb();
  683                 smp_invltlb();
  684         } else {
  685                 cpumask = PCPU_GET(cpumask);
  686                 other_cpus = PCPU_GET(other_cpus);
  687                 if (pmap->pm_active & cpumask)
  688                         invltlb();
  689                 if (pmap->pm_active & other_cpus)
  690                         smp_masked_invltlb(pmap->pm_active & other_cpus);
  691         }
  692         if (smp_started)
  693                 mtx_unlock_spin(&smp_tlb_mtx);
  694         else
  695                 critical_exit();
  696 }
  697 #else /* !SMP */
  698 /*
  699  * Normal, non-SMP, 486+ invalidation functions.
  700  * We inline these within pmap.c for speed.
  701  */
  702 PMAP_INLINE void
  703 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  704 {
  705 
  706         if (pmap == kernel_pmap || pmap->pm_active)
  707                 invlpg(va);
  708 }
  709 
  710 PMAP_INLINE void
  711 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  712 {
  713         vm_offset_t addr;
  714 
  715         if (pmap == kernel_pmap || pmap->pm_active)
  716                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  717                         invlpg(addr);
  718 }
  719 
  720 PMAP_INLINE void
  721 pmap_invalidate_all(pmap_t pmap)
  722 {
  723 
  724         if (pmap == kernel_pmap || pmap->pm_active)
  725                 invltlb();
  726 }
  727 #endif /* !SMP */
  728 #endif /* !I386_CPU */
  729 
  730 /*
  731  * Are we current address space or kernel?  N.B. We return FALSE when
  732  * a pmap's page table is in use because a kernel thread is borrowing
  733  * it.  The borrowed page table can change spontaneously, making any
  734  * dependence on its continued use subject to a race condition.
  735  */
  736 static __inline int
  737 pmap_is_current(pmap_t pmap)
  738 {
  739 
  740         return (pmap == kernel_pmap ||
  741                 (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
  742             (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
  743 }
  744 
  745 /*
  746  * If the given pmap is not the current pmap, Giant must be held.
  747  */
  748 pt_entry_t *
  749 pmap_pte(pmap_t pmap, vm_offset_t va)
  750 {
  751         pd_entry_t newpf;
  752         pd_entry_t *pde;
  753 
  754         pde = pmap_pde(pmap, va);
  755         if (*pde & PG_PS)
  756                 return (pde);
  757         if (*pde != 0) {
  758                 /* are we current address space or kernel? */
  759                 if (pmap_is_current(pmap))
  760                         return (vtopte(va));
  761                 GIANT_REQUIRED;
  762                 newpf = *pde & PG_FRAME;
  763                 if ((*PMAP2 & PG_FRAME) != newpf) {
  764                         *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
  765                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
  766                 }
  767                 return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
  768         }
  769         return (0);
  770 }
  771 
  772 /*
  773  * Super fast pmap_pte routine best used when scanning
  774  * the pv lists.  This eliminates many coarse-grained
  775  * invltlb calls.  Note that many of the pv list
  776  * scans are across different pmaps.  It is very wasteful
  777  * to do an entire invltlb for checking a single mapping.
  778  *
  779  * If the given pmap is not the current pmap, vm_page_queue_mtx
  780  * must be held.
  781  */
  782 static pt_entry_t *
  783 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
  784 {
  785         pd_entry_t newpf;
  786         pd_entry_t *pde;
  787 
  788         pde = pmap_pde(pmap, va);
  789         if (*pde & PG_PS)
  790                 return (pde);
  791         if (*pde != 0) {
  792                 /* are we current address space or kernel? */
  793                 if (pmap_is_current(pmap))
  794                         return (vtopte(va));
  795                 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
  796                 newpf = *pde & PG_FRAME;
  797                 if ((*PMAP1 & PG_FRAME) != newpf) {
  798                         *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
  799                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
  800                 }
  801                 return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
  802         }
  803         return (0);
  804 }
  805 
  806 /*
  807  *      Routine:        pmap_extract
  808  *      Function:
  809  *              Extract the physical page address associated
  810  *              with the given map/virtual_address pair.
  811  */
  812 vm_paddr_t 
  813 pmap_extract(pmap, va)
  814         register pmap_t pmap;
  815         vm_offset_t va;
  816 {
  817         vm_paddr_t rtval;
  818         pt_entry_t *pte;
  819         pd_entry_t pde;
  820 
  821         if (pmap == 0)
  822                 return 0;
  823         pde = pmap->pm_pdir[va >> PDRSHIFT];
  824         if (pde != 0) {
  825                 if ((pde & PG_PS) != 0) {
  826                         rtval = (pde & ~PDRMASK) | (va & PDRMASK);
  827                         return rtval;
  828                 }
  829                 pte = pmap_pte(pmap, va);
  830                 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
  831                 return rtval;
  832         }
  833         return 0;
  834 
  835 }
  836 
  837 /*
  838  *      Routine:        pmap_extract_and_hold
  839  *      Function:
  840  *              Atomically extract and hold the physical page
  841  *              with the given pmap and virtual address pair
  842  *              if that mapping permits the given protection.
  843  */
  844 vm_page_t
  845 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
  846 {
  847         vm_paddr_t pa;
  848         vm_page_t m;
  849 
  850         m = NULL;
  851         mtx_lock(&Giant);
  852         if ((pa = pmap_extract(pmap, va)) != 0) {
  853                 m = PHYS_TO_VM_PAGE(pa);
  854                 vm_page_lock_queues();
  855                 vm_page_hold(m);
  856                 vm_page_unlock_queues();
  857         }
  858         mtx_unlock(&Giant);
  859         return (m);
  860 }
  861 
  862 /***************************************************
  863  * Low level mapping routines.....
  864  ***************************************************/
  865 
  866 /*
  867  * Add a wired page to the kva.
  868  * Note: not SMP coherent.
  869  */
  870 PMAP_INLINE void 
  871 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  872 {
  873         pt_entry_t *pte;
  874 
  875         pte = vtopte(va);
  876         pte_store(pte, pa | PG_RW | PG_V | pgeflag);
  877 }
  878 
  879 /*
  880  * Remove a page from the kernel pagetables.
  881  * Note: not SMP coherent.
  882  */
  883 PMAP_INLINE void
  884 pmap_kremove(vm_offset_t va)
  885 {
  886         pt_entry_t *pte;
  887 
  888         pte = vtopte(va);
  889         pte_clear(pte);
  890 }
  891 
  892 /*
  893  *      Used to map a range of physical addresses into kernel
  894  *      virtual address space.
  895  *
  896  *      The value passed in '*virt' is a suggested virtual address for
  897  *      the mapping. Architectures which can support a direct-mapped
  898  *      physical to virtual region can return the appropriate address
  899  *      within that region, leaving '*virt' unchanged. Other
  900  *      architectures should map the pages starting at '*virt' and
  901  *      update '*virt' with the first usable address after the mapped
  902  *      region.
  903  */
  904 vm_offset_t
  905 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
  906 {
  907         vm_offset_t va, sva;
  908 
  909         va = sva = *virt;
  910         while (start < end) {
  911                 pmap_kenter(va, start);
  912                 va += PAGE_SIZE;
  913                 start += PAGE_SIZE;
  914         }
  915         pmap_invalidate_range(kernel_pmap, sva, va);
  916         *virt = va;
  917         return (sva);
  918 }
  919 
  920 
  921 /*
  922  * Add a list of wired pages to the kva
  923  * this routine is only used for temporary
  924  * kernel mappings that do not need to have
  925  * page modification or references recorded.
  926  * Note that old mappings are simply written
  927  * over.  The page *must* be wired.
  928  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  929  */
  930 void
  931 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
  932 {
  933         vm_offset_t va;
  934 
  935         va = sva;
  936         while (count-- > 0) {
  937                 pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
  938                 va += PAGE_SIZE;
  939                 m++;
  940         }
  941         pmap_invalidate_range(kernel_pmap, sva, va);
  942 }
  943 
  944 /*
  945  * This routine tears out page mappings from the
  946  * kernel -- it is meant only for temporary mappings.
  947  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  948  */
  949 void
  950 pmap_qremove(vm_offset_t sva, int count)
  951 {
  952         vm_offset_t va;
  953 
  954         va = sva;
  955         while (count-- > 0) {
  956                 pmap_kremove(va);
  957                 va += PAGE_SIZE;
  958         }
  959         pmap_invalidate_range(kernel_pmap, sva, va);
  960 }
  961 
  962 /***************************************************
  963  * Page table page management routines.....
  964  ***************************************************/
  965 
  966 /*
  967  * This routine unholds page table pages, and if the hold count
  968  * drops to zero, then it decrements the wire count.
  969  */
  970 static int 
  971 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
  972 {
  973 
  974         while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt"))
  975                 vm_page_lock_queues();
  976 
  977         if (m->hold_count == 0) {
  978                 vm_offset_t pteva;
  979                 /*
  980                  * unmap the page table page
  981                  */
  982                 pmap->pm_pdir[m->pindex] = 0;
  983                 --pmap->pm_stats.resident_count;
  984                 /*
  985                  * We never unwire a kernel page table page, making a
  986                  * check for the kernel_pmap unnecessary.
  987                  */
  988                 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)) {
  989                         /*
  990                          * Do an invltlb to make the invalidated mapping
  991                          * take effect immediately.
  992                          */
  993                         pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
  994                         pmap_invalidate_page(pmap, pteva);
  995                 }
  996 
  997                 /*
  998                  * If the page is finally unwired, simply free it.
  999                  */
 1000                 --m->wire_count;
 1001                 if (m->wire_count == 0) {
 1002                         vm_page_busy(m);
 1003                         vm_page_free_zero(m);
 1004                         atomic_subtract_int(&cnt.v_wire_count, 1);
 1005                 }
 1006                 return 1;
 1007         }
 1008         return 0;
 1009 }
 1010 
 1011 static PMAP_INLINE int
 1012 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1013 {
 1014         vm_page_unhold(m);
 1015         if (m->hold_count == 0)
 1016                 return _pmap_unwire_pte_hold(pmap, m);
 1017         else
 1018                 return 0;
 1019 }
 1020 
 1021 /*
 1022  * After removing a page table entry, this routine is used to
 1023  * conditionally free the page, and manage the hold/wire counts.
 1024  */
 1025 static int
 1026 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 1027 {
 1028 
 1029         if (va >= VM_MAXUSER_ADDRESS)
 1030                 return 0;
 1031 
 1032         return pmap_unwire_pte_hold(pmap, mpte);
 1033 }
 1034 
 1035 void
 1036 pmap_pinit0(pmap)
 1037         struct pmap *pmap;
 1038 {
 1039 
 1040         pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 1041 #ifdef PAE
 1042         pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 1043 #endif
 1044         pmap->pm_active = 0;
 1045         PCPU_SET(curpmap, pmap);
 1046         TAILQ_INIT(&pmap->pm_pvlist);
 1047         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1048         mtx_lock_spin(&allpmaps_lock);
 1049         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1050         mtx_unlock_spin(&allpmaps_lock);
 1051 }
 1052 
 1053 /*
 1054  * Initialize a preallocated and zeroed pmap structure,
 1055  * such as one in a vmspace structure.
 1056  */
 1057 void
 1058 pmap_pinit(pmap)
 1059         register struct pmap *pmap;
 1060 {
 1061         vm_page_t m, ptdpg[NPGPTD];
 1062         vm_paddr_t pa;
 1063         static int color;
 1064         int i;
 1065 
 1066         /*
 1067          * No need to allocate page table space yet but we do need a valid
 1068          * page directory table.
 1069          */
 1070         if (pmap->pm_pdir == NULL) {
 1071                 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 1072                     NBPTD);
 1073 #ifdef PAE
 1074                 pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 1075                 KASSERT(((vm_offset_t)pmap->pm_pdpt &
 1076                     ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 1077                     ("pmap_pinit: pdpt misaligned"));
 1078                 KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 1079                     ("pmap_pinit: pdpt above 4g"));
 1080 #endif
 1081         }
 1082 
 1083         /*
 1084          * allocate the page directory page(s)
 1085          */
 1086         for (i = 0; i < NPGPTD;) {
 1087                 m = vm_page_alloc(NULL, color++,
 1088                     VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1089                     VM_ALLOC_ZERO);
 1090                 if (m == NULL)
 1091                         VM_WAIT;
 1092                 else {
 1093                         vm_page_lock_queues();
 1094                         vm_page_flag_clear(m, PG_BUSY);
 1095                         vm_page_unlock_queues();
 1096                         ptdpg[i++] = m;
 1097                 }
 1098         }
 1099 
 1100         pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 1101 
 1102         for (i = 0; i < NPGPTD; i++) {
 1103                 if ((ptdpg[i]->flags & PG_ZERO) == 0)
 1104                         bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 1105         }
 1106 
 1107         mtx_lock_spin(&allpmaps_lock);
 1108         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1109         mtx_unlock_spin(&allpmaps_lock);
 1110         /* Wire in kernel global address entries. */
 1111         /* XXX copies current process, does not fill in MPPTDI */
 1112         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 1113 #ifdef SMP
 1114         pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1115 #endif
 1116 
 1117         /* install self-referential address mapping entry(s) */
 1118         for (i = 0; i < NPGPTD; i++) {
 1119                 pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 1120                 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 1121 #ifdef PAE
 1122                 pmap->pm_pdpt[i] = pa | PG_V;
 1123 #endif
 1124         }
 1125 
 1126         pmap->pm_active = 0;
 1127         TAILQ_INIT(&pmap->pm_pvlist);
 1128         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1129 }
 1130 
 1131 /*
 1132  * Wire in kernel global address entries.  To avoid a race condition
 1133  * between pmap initialization and pmap_growkernel, this procedure
 1134  * should be called after the vmspace is attached to the process
 1135  * but before this pmap is activated.
 1136  */
 1137 void
 1138 pmap_pinit2(pmap)
 1139         struct pmap *pmap;
 1140 {
 1141         /* XXX: Remove this stub when no longer called */
 1142 }
 1143 
 1144 /*
 1145  * this routine is called if the page table page is not
 1146  * mapped correctly.
 1147  */
 1148 static vm_page_t
 1149 _pmap_allocpte(pmap, ptepindex)
 1150         pmap_t  pmap;
 1151         unsigned ptepindex;
 1152 {
 1153         vm_paddr_t ptepa;
 1154         vm_page_t m;
 1155 
 1156         /*
 1157          * Allocate a page table page.
 1158          */
 1159         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1160             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1161                 VM_WAIT;
 1162                 /*
 1163                  * Indicate the need to retry.  While waiting, the page table
 1164                  * page may have been allocated.
 1165                  */
 1166                 return (NULL);
 1167         }
 1168         if ((m->flags & PG_ZERO) == 0)
 1169                 pmap_zero_page(m);
 1170 
 1171         KASSERT(m->queue == PQ_NONE,
 1172                 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
 1173 
 1174         /*
 1175          * Increment the hold count for the page table page
 1176          * (denoting a new mapping.)
 1177          */
 1178         m->hold_count++;
 1179 
 1180         /*
 1181          * Map the pagetable page into the process address space, if
 1182          * it isn't already there.
 1183          */
 1184 
 1185         pmap->pm_stats.resident_count++;
 1186 
 1187         ptepa = VM_PAGE_TO_PHYS(m);
 1188         pmap->pm_pdir[ptepindex] =
 1189                 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1190 
 1191         vm_page_lock_queues();
 1192         vm_page_flag_clear(m, PG_ZERO);
 1193         vm_page_wakeup(m);
 1194         vm_page_unlock_queues();
 1195 
 1196         return m;
 1197 }
 1198 
 1199 static vm_page_t
 1200 pmap_allocpte(pmap_t pmap, vm_offset_t va)
 1201 {
 1202         unsigned ptepindex;
 1203         pd_entry_t ptepa;
 1204         vm_page_t m;
 1205 
 1206         /*
 1207          * Calculate pagetable page index
 1208          */
 1209         ptepindex = va >> PDRSHIFT;
 1210 retry:
 1211         /*
 1212          * Get the page directory entry
 1213          */
 1214         ptepa = pmap->pm_pdir[ptepindex];
 1215 
 1216         /*
 1217          * This supports switching from a 4MB page to a
 1218          * normal 4K page.
 1219          */
 1220         if (ptepa & PG_PS) {
 1221                 pmap->pm_pdir[ptepindex] = 0;
 1222                 ptepa = 0;
 1223                 pmap_invalidate_all(kernel_pmap);
 1224         }
 1225 
 1226         /*
 1227          * If the page table page is mapped, we just increment the
 1228          * hold count, and activate it.
 1229          */
 1230         if (ptepa) {
 1231                 m = PHYS_TO_VM_PAGE(ptepa);
 1232                 m->hold_count++;
 1233         } else {
 1234                 /*
 1235                  * Here if the pte page isn't mapped, or if it has
 1236                  * been deallocated. 
 1237                  */
 1238                 m = _pmap_allocpte(pmap, ptepindex);
 1239                 if (m == NULL)
 1240                         goto retry;
 1241         }
 1242         return (m);
 1243 }
 1244 
 1245 
 1246 /***************************************************
 1247 * Pmap allocation/deallocation routines.
 1248  ***************************************************/
 1249 
 1250 #ifdef SMP
 1251 /*
 1252  * Deal with a SMP shootdown of other users of the pmap that we are
 1253  * trying to dispose of.  This can be a bit hairy.
 1254  */
 1255 static u_int *lazymask;
 1256 static u_int lazyptd;
 1257 static volatile u_int lazywait;
 1258 
 1259 void pmap_lazyfix_action(void);
 1260 
 1261 void
 1262 pmap_lazyfix_action(void)
 1263 {
 1264         u_int mymask = PCPU_GET(cpumask);
 1265 
 1266         if (rcr3() == lazyptd)
 1267                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1268         atomic_clear_int(lazymask, mymask);
 1269         atomic_store_rel_int(&lazywait, 1);
 1270 }
 1271 
 1272 static void
 1273 pmap_lazyfix_self(u_int mymask)
 1274 {
 1275 
 1276         if (rcr3() == lazyptd)
 1277                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1278         atomic_clear_int(lazymask, mymask);
 1279 }
 1280 
 1281 
 1282 static void
 1283 pmap_lazyfix(pmap_t pmap)
 1284 {
 1285         u_int mymask = PCPU_GET(cpumask);
 1286         u_int mask;
 1287         register u_int spins;
 1288 
 1289         while ((mask = pmap->pm_active) != 0) {
 1290                 spins = 50000000;
 1291                 mask = mask & -mask;    /* Find least significant set bit */
 1292                 mtx_lock_spin(&lazypmap_lock);
 1293 #ifdef PAE
 1294                 lazyptd = vtophys(pmap->pm_pdpt);
 1295 #else
 1296                 lazyptd = vtophys(pmap->pm_pdir);
 1297 #endif
 1298                 if (mask == mymask) {
 1299                         lazymask = &pmap->pm_active;
 1300                         pmap_lazyfix_self(mymask);
 1301                 } else {
 1302                         atomic_store_rel_int((u_int *)&lazymask,
 1303                             (u_int)&pmap->pm_active);
 1304                         atomic_store_rel_int(&lazywait, 0);
 1305                         ipi_selected(mask, IPI_LAZYPMAP);
 1306                         while (lazywait == 0) {
 1307                                 ia32_pause();
 1308                                 if (--spins == 0)
 1309                                         break;
 1310                         }
 1311                 }
 1312                 mtx_unlock_spin(&lazypmap_lock);
 1313                 if (spins == 0)
 1314                         printf("pmap_lazyfix: spun for 50000000\n");
 1315         }
 1316 }
 1317 
 1318 #else   /* SMP */
 1319 
 1320 /*
 1321  * Cleaning up on uniprocessor is easy.  For various reasons, we're
 1322  * unlikely to have to even execute this code, including the fact
 1323  * that the cleanup is deferred until the parent does a wait(2), which
 1324  * means that another userland process has run.
 1325  */
 1326 static void
 1327 pmap_lazyfix(pmap_t pmap)
 1328 {
 1329         u_int cr3;
 1330 
 1331         cr3 = vtophys(pmap->pm_pdir);
 1332         if (cr3 == rcr3()) {
 1333                 load_cr3(PCPU_GET(curpcb)->pcb_cr3);
 1334                 pmap->pm_active &= ~(PCPU_GET(cpumask));
 1335         }
 1336 }
 1337 #endif  /* SMP */
 1338 
 1339 /*
 1340  * Release any resources held by the given physical map.
 1341  * Called when a pmap initialized by pmap_pinit is being released.
 1342  * Should only be called if the map contains no valid mappings.
 1343  */
 1344 void
 1345 pmap_release(pmap_t pmap)
 1346 {
 1347         vm_page_t m, ptdpg[NPGPTD];
 1348         int i;
 1349 
 1350         KASSERT(pmap->pm_stats.resident_count == 0,
 1351             ("pmap_release: pmap resident count %ld != 0",
 1352             pmap->pm_stats.resident_count));
 1353 
 1354         pmap_lazyfix(pmap);
 1355         mtx_lock_spin(&allpmaps_lock);
 1356         LIST_REMOVE(pmap, pm_list);
 1357         mtx_unlock_spin(&allpmaps_lock);
 1358 
 1359         for (i = 0; i < NPGPTD; i++)
 1360                 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]);
 1361 
 1362         bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 1363             sizeof(*pmap->pm_pdir));
 1364 #ifdef SMP
 1365         pmap->pm_pdir[MPPTDI] = 0;
 1366 #endif
 1367 
 1368         pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 1369 
 1370         vm_page_lock_queues();
 1371         for (i = 0; i < NPGPTD; i++) {
 1372                 m = ptdpg[i];
 1373 #ifdef PAE
 1374                 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 1375                     ("pmap_release: got wrong ptd page"));
 1376 #endif
 1377                 m->wire_count--;
 1378                 atomic_subtract_int(&cnt.v_wire_count, 1);
 1379                 vm_page_busy(m);
 1380                 vm_page_free_zero(m);
 1381         }
 1382         vm_page_unlock_queues();
 1383 }
 1384 
 1385 static int
 1386 kvm_size(SYSCTL_HANDLER_ARGS)
 1387 {
 1388         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1389 
 1390         return sysctl_handle_long(oidp, &ksize, 0, req);
 1391 }
 1392 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1393     0, 0, kvm_size, "IU", "Size of KVM");
 1394 
 1395 static int
 1396 kvm_free(SYSCTL_HANDLER_ARGS)
 1397 {
 1398         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1399 
 1400         return sysctl_handle_long(oidp, &kfree, 0, req);
 1401 }
 1402 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1403     0, 0, kvm_free, "IU", "Amount of KVM free");
 1404 
 1405 /*
 1406  * grow the number of kernel page table entries, if needed
 1407  */
 1408 void
 1409 pmap_growkernel(vm_offset_t addr)
 1410 {
 1411         struct pmap *pmap;
 1412         int s;
 1413         vm_paddr_t ptppaddr;
 1414         vm_page_t nkpg;
 1415         pd_entry_t newpdir;
 1416         pt_entry_t *pde;
 1417 
 1418         s = splhigh();
 1419         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 1420         if (kernel_vm_end == 0) {
 1421                 kernel_vm_end = KERNBASE;
 1422                 nkpt = 0;
 1423                 while (pdir_pde(PTD, kernel_vm_end)) {
 1424                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1425                         nkpt++;
 1426                 }
 1427         }
 1428         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 1429         while (kernel_vm_end < addr) {
 1430                 if (pdir_pde(PTD, kernel_vm_end)) {
 1431                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1432                         continue;
 1433                 }
 1434 
 1435                 /*
 1436                  * This index is bogus, but out of the way
 1437                  */
 1438                 nkpg = vm_page_alloc(NULL, nkpt,
 1439                     VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 1440                 if (!nkpg)
 1441                         panic("pmap_growkernel: no memory to grow kernel");
 1442 
 1443                 nkpt++;
 1444 
 1445                 pmap_zero_page(nkpg);
 1446                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1447                 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 1448                 pdir_pde(PTD, kernel_vm_end) = newpdir;
 1449 
 1450                 mtx_lock_spin(&allpmaps_lock);
 1451                 LIST_FOREACH(pmap, &allpmaps, pm_list) {
 1452                         pde = pmap_pde(pmap, kernel_vm_end);
 1453                         pde_store(pde, newpdir);
 1454                 }
 1455                 mtx_unlock_spin(&allpmaps_lock);
 1456                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1457         }
 1458         splx(s);
 1459 }
 1460 
 1461 
 1462 /***************************************************
 1463  * page management routines.
 1464  ***************************************************/
 1465 
 1466 /*
 1467  * free the pv_entry back to the free list
 1468  */
 1469 static PMAP_INLINE void
 1470 free_pv_entry(pv_entry_t pv)
 1471 {
 1472         pv_entry_count--;
 1473         uma_zfree(pvzone, pv);
 1474 }
 1475 
 1476 /*
 1477  * get a new pv_entry, allocating a block from the system
 1478  * when needed.
 1479  * the memory allocation is performed bypassing the malloc code
 1480  * because of the possibility of allocations at interrupt time.
 1481  */
 1482 static pv_entry_t
 1483 get_pv_entry(void)
 1484 {
 1485         pv_entry_count++;
 1486         if (pv_entry_high_water &&
 1487                 (pv_entry_count > pv_entry_high_water) &&
 1488                 (pmap_pagedaemon_waken == 0)) {
 1489                 pmap_pagedaemon_waken = 1;
 1490                 wakeup (&vm_pages_needed);
 1491         }
 1492         return uma_zalloc(pvzone, M_NOWAIT);
 1493 }
 1494 
 1495 /*
 1496  * If it is the first entry on the list, it is actually
 1497  * in the header and we must copy the following entry up
 1498  * to the header.  Otherwise we must search the list for
 1499  * the entry.  In either case we free the now unused entry.
 1500  */
 1501 
 1502 static int
 1503 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 1504 {
 1505         pv_entry_t pv;
 1506         int rtval;
 1507         int s;
 1508 
 1509         s = splvm();
 1510         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1511         if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1512                 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1513                         if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1514                                 break;
 1515                 }
 1516         } else {
 1517                 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1518                         if (va == pv->pv_va) 
 1519                                 break;
 1520                 }
 1521         }
 1522 
 1523         rtval = 0;
 1524         if (pv) {
 1525                 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 1526                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1527                 m->md.pv_list_count--;
 1528                 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1529                         vm_page_flag_clear(m, PG_WRITEABLE);
 1530 
 1531                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1532                 free_pv_entry(pv);
 1533         }
 1534                         
 1535         splx(s);
 1536         return rtval;
 1537 }
 1538 
 1539 /*
 1540  * Create a pv entry for page at pa for
 1541  * (pmap, va).
 1542  */
 1543 static void
 1544 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 1545 {
 1546 
 1547         int s;
 1548         pv_entry_t pv;
 1549 
 1550         s = splvm();
 1551         pv = get_pv_entry();
 1552         pv->pv_va = va;
 1553         pv->pv_pmap = pmap;
 1554         pv->pv_ptem = mpte;
 1555 
 1556         vm_page_lock_queues();
 1557         TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1558         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1559         m->md.pv_list_count++;
 1560 
 1561         vm_page_unlock_queues();
 1562         splx(s);
 1563 }
 1564 
 1565 /*
 1566  * pmap_remove_pte: do the things to unmap a page in a process
 1567  */
 1568 static int
 1569 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 1570 {
 1571         pt_entry_t oldpte;
 1572         vm_page_t m, mpte;
 1573 
 1574         oldpte = pte_load_clear(ptq);
 1575         if (oldpte & PG_W)
 1576                 pmap->pm_stats.wired_count -= 1;
 1577         /*
 1578          * Machines that don't support invlpg, also don't support
 1579          * PG_G.
 1580          */
 1581         if (oldpte & PG_G)
 1582                 pmap_invalidate_page(kernel_pmap, va);
 1583         pmap->pm_stats.resident_count -= 1;
 1584         if (oldpte & PG_MANAGED) {
 1585                 m = PHYS_TO_VM_PAGE(oldpte);
 1586                 if (oldpte & PG_M) {
 1587 #if defined(PMAP_DIAGNOSTIC)
 1588                         if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1589                                 printf(
 1590         "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1591                                     va, oldpte);
 1592                         }
 1593 #endif
 1594                         if (pmap_track_modified(va))
 1595                                 vm_page_dirty(m);
 1596                 }
 1597                 if (oldpte & PG_A)
 1598                         vm_page_flag_set(m, PG_REFERENCED);
 1599                 return pmap_remove_entry(pmap, m, va);
 1600         } else {
 1601                 mpte = PHYS_TO_VM_PAGE(*pmap_pde(pmap, va));
 1602                 return pmap_unuse_pt(pmap, va, mpte);
 1603         }
 1604 }
 1605 
 1606 /*
 1607  * Remove a single page from a process address space
 1608  */
 1609 static void
 1610 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 1611 {
 1612         pt_entry_t *pte;
 1613 
 1614         if ((pte = pmap_pte(pmap, va)) == NULL || *pte == 0)
 1615                 return;
 1616         pmap_remove_pte(pmap, pte, va);
 1617         pmap_invalidate_page(pmap, va);
 1618 }
 1619 
 1620 /*
 1621  *      Remove the given range of addresses from the specified map.
 1622  *
 1623  *      It is assumed that the start and end are properly
 1624  *      rounded to the page size.
 1625  */
 1626 void
 1627 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1628 {
 1629         vm_offset_t pdnxt;
 1630         pd_entry_t ptpaddr;
 1631         pt_entry_t *pte;
 1632         int anyvalid;
 1633 
 1634         if (pmap == NULL)
 1635                 return;
 1636 
 1637         if (pmap->pm_stats.resident_count == 0)
 1638                 return;
 1639 
 1640         /*
 1641          * special handling of removing one page.  a very
 1642          * common operation and easy to short circuit some
 1643          * code.
 1644          */
 1645         if ((sva + PAGE_SIZE == eva) && 
 1646             ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 1647                 pmap_remove_page(pmap, sva);
 1648                 return;
 1649         }
 1650 
 1651         anyvalid = 0;
 1652 
 1653         for (; sva < eva; sva = pdnxt) {
 1654                 unsigned pdirindex;
 1655 
 1656                 /*
 1657                  * Calculate index for next page table.
 1658                  */
 1659                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 1660                 if (pmap->pm_stats.resident_count == 0)
 1661                         break;
 1662 
 1663                 pdirindex = sva >> PDRSHIFT;
 1664                 ptpaddr = pmap->pm_pdir[pdirindex];
 1665 
 1666                 /*
 1667                  * Weed out invalid mappings. Note: we assume that the page
 1668                  * directory table is always allocated, and in kernel virtual.
 1669                  */
 1670                 if (ptpaddr == 0)
 1671                         continue;
 1672 
 1673                 /*
 1674                  * Check for large page.
 1675                  */
 1676                 if ((ptpaddr & PG_PS) != 0) {
 1677                         pmap->pm_pdir[pdirindex] = 0;
 1678                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1679                         anyvalid = 1;
 1680                         continue;
 1681                 }
 1682 
 1683                 /*
 1684                  * Limit our scan to either the end of the va represented
 1685                  * by the current page table page, or to the end of the
 1686                  * range being removed.
 1687                  */
 1688                 if (pdnxt > eva)
 1689                         pdnxt = eva;
 1690 
 1691                 for (; sva != pdnxt; sva += PAGE_SIZE) {
 1692                         if ((pte = pmap_pte(pmap, sva)) == NULL ||
 1693                             *pte == 0)
 1694                                 continue;
 1695                         anyvalid = 1;
 1696                         if (pmap_remove_pte(pmap, pte, sva))
 1697                                 break;
 1698                 }
 1699         }
 1700 
 1701         if (anyvalid)
 1702                 pmap_invalidate_all(pmap);
 1703 }
 1704 
 1705 /*
 1706  *      Routine:        pmap_remove_all
 1707  *      Function:
 1708  *              Removes this physical page from
 1709  *              all physical maps in which it resides.
 1710  *              Reflects back modify bits to the pager.
 1711  *
 1712  *      Notes:
 1713  *              Original versions of this routine were very
 1714  *              inefficient because they iteratively called
 1715  *              pmap_remove (slow...)
 1716  */
 1717 
 1718 void
 1719 pmap_remove_all(vm_page_t m)
 1720 {
 1721         register pv_entry_t pv;
 1722         pt_entry_t *pte, tpte;
 1723         int s;
 1724 
 1725 #if defined(PMAP_DIAGNOSTIC)
 1726         /*
 1727          * XXX This makes pmap_remove_all() illegal for non-managed pages!
 1728          */
 1729         if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 1730                 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
 1731                     VM_PAGE_TO_PHYS(m));
 1732         }
 1733 #endif
 1734         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1735         s = splvm();
 1736         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1737                 pv->pv_pmap->pm_stats.resident_count--;
 1738                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1739                 tpte = pte_load_clear(pte);
 1740                 if (tpte & PG_W)
 1741                         pv->pv_pmap->pm_stats.wired_count--;
 1742                 if (tpte & PG_A)
 1743                         vm_page_flag_set(m, PG_REFERENCED);
 1744 
 1745                 /*
 1746                  * Update the vm_page_t clean and reference bits.
 1747                  */
 1748                 if (tpte & PG_M) {
 1749 #if defined(PMAP_DIAGNOSTIC)
 1750                         if (pmap_nw_modified((pt_entry_t) tpte)) {
 1751                                 printf(
 1752         "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1753                                     pv->pv_va, tpte);
 1754                         }
 1755 #endif
 1756                         if (pmap_track_modified(pv->pv_va))
 1757                                 vm_page_dirty(m);
 1758                 }
 1759                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 1760                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1761                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1762                 m->md.pv_list_count--;
 1763                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 1764                 free_pv_entry(pv);
 1765         }
 1766         vm_page_flag_clear(m, PG_WRITEABLE);
 1767         splx(s);
 1768 }
 1769 
 1770 /*
 1771  *      Set the physical protection on the
 1772  *      specified range of this map as requested.
 1773  */
 1774 void
 1775 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1776 {
 1777         vm_offset_t pdnxt;
 1778         pd_entry_t ptpaddr;
 1779         int anychanged;
 1780 
 1781         if (pmap == NULL)
 1782                 return;
 1783 
 1784         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1785                 pmap_remove(pmap, sva, eva);
 1786                 return;
 1787         }
 1788 
 1789         if (prot & VM_PROT_WRITE)
 1790                 return;
 1791 
 1792         anychanged = 0;
 1793 
 1794         for (; sva < eva; sva = pdnxt) {
 1795                 unsigned pdirindex;
 1796 
 1797                 pdnxt = (sva + NBPDR) & ~PDRMASK;
 1798 
 1799                 pdirindex = sva >> PDRSHIFT;
 1800                 ptpaddr = pmap->pm_pdir[pdirindex];
 1801 
 1802                 /*
 1803                  * Weed out invalid mappings. Note: we assume that the page
 1804                  * directory table is always allocated, and in kernel virtual.
 1805                  */
 1806                 if (ptpaddr == 0)
 1807                         continue;
 1808 
 1809                 /*
 1810                  * Check for large page.
 1811                  */
 1812                 if ((ptpaddr & PG_PS) != 0) {
 1813                         pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1814                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1815                         anychanged = 1;
 1816                         continue;
 1817                 }
 1818 
 1819                 if (pdnxt > eva)
 1820                         pdnxt = eva;
 1821 
 1822                 for (; sva != pdnxt; sva += PAGE_SIZE) {
 1823                         pt_entry_t pbits;
 1824                         pt_entry_t *pte;
 1825                         vm_page_t m;
 1826 
 1827                         if ((pte = pmap_pte(pmap, sva)) == NULL)
 1828                                 continue;
 1829                         pbits = *pte;
 1830                         if (pbits & PG_MANAGED) {
 1831                                 m = NULL;
 1832                                 if (pbits & PG_A) {
 1833                                         m = PHYS_TO_VM_PAGE(pbits);
 1834                                         vm_page_flag_set(m, PG_REFERENCED);
 1835                                         pbits &= ~PG_A;
 1836                                 }
 1837                                 if ((pbits & PG_M) != 0 &&
 1838                                     pmap_track_modified(sva)) {
 1839                                         if (m == NULL)
 1840                                                 m = PHYS_TO_VM_PAGE(pbits);
 1841                                         vm_page_dirty(m);
 1842                                         pbits &= ~PG_M;
 1843                                 }
 1844                         }
 1845 
 1846                         pbits &= ~PG_RW;
 1847 
 1848                         if (pbits != *pte) {
 1849                                 pte_store(pte, pbits);
 1850                                 anychanged = 1;
 1851                         }
 1852                 }
 1853         }
 1854         if (anychanged)
 1855                 pmap_invalidate_all(pmap);
 1856 }
 1857 
 1858 /*
 1859  *      Insert the given physical page (p) at
 1860  *      the specified virtual address (v) in the
 1861  *      target physical map with the protection requested.
 1862  *
 1863  *      If specified, the page will be wired down, meaning
 1864  *      that the related pte can not be reclaimed.
 1865  *
 1866  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 1867  *      or lose information.  That is, this routine must actually
 1868  *      insert this page into the given map NOW.
 1869  */
 1870 void
 1871 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1872            boolean_t wired)
 1873 {
 1874         vm_paddr_t pa;
 1875         register pt_entry_t *pte;
 1876         vm_paddr_t opa;
 1877         pt_entry_t origpte, newpte;
 1878         vm_page_t mpte;
 1879 
 1880         if (pmap == NULL)
 1881                 return;
 1882 
 1883         va &= PG_FRAME;
 1884 #ifdef PMAP_DIAGNOSTIC
 1885         if (va > VM_MAX_KERNEL_ADDRESS)
 1886                 panic("pmap_enter: toobig");
 1887         if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1888                 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 1889 #endif
 1890 
 1891         mpte = NULL;
 1892         /*
 1893          * In the case that a page table page is not
 1894          * resident, we are creating it here.
 1895          */
 1896         if (va < VM_MAXUSER_ADDRESS) {
 1897                 mpte = pmap_allocpte(pmap, va);
 1898         }
 1899 #if 0 && defined(PMAP_DIAGNOSTIC)
 1900         else {
 1901                 pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 1902                 origpte = *pdeaddr;
 1903                 if ((origpte & PG_V) == 0) { 
 1904                         panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
 1905                                 pmap->pm_pdir[PTDPTDI], origpte, va);
 1906                 }
 1907         }
 1908 #endif
 1909 
 1910         pte = pmap_pte(pmap, va);
 1911 
 1912         /*
 1913          * Page Directory table entry not valid, we need a new PT page
 1914          */
 1915         if (pte == NULL) {
 1916                 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
 1917                         (uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 1918         }
 1919 
 1920         pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 1921         origpte = *pte;
 1922         opa = origpte & PG_FRAME;
 1923 
 1924         if (origpte & PG_PS) {
 1925                 /*
 1926                  * Yes, I know this will truncate upper address bits for PAE,
 1927                  * but I'm actually more interested in the lower bits
 1928                  */
 1929                 printf("pmap_enter: va %p, pte %p, origpte %p\n",
 1930                     (void *)va, (void *)pte, (void *)(uintptr_t)origpte);
 1931                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 1932         }
 1933 
 1934         /*
 1935          * Mapping has not changed, must be protection or wiring change.
 1936          */
 1937         if (origpte && (opa == pa)) {
 1938                 /*
 1939                  * Wiring change, just update stats. We don't worry about
 1940                  * wiring PT pages as they remain resident as long as there
 1941                  * are valid mappings in them. Hence, if a user page is wired,
 1942                  * the PT page will be also.
 1943                  */
 1944                 if (wired && ((origpte & PG_W) == 0))
 1945                         pmap->pm_stats.wired_count++;
 1946                 else if (!wired && (origpte & PG_W))
 1947                         pmap->pm_stats.wired_count--;
 1948 
 1949 #if defined(PMAP_DIAGNOSTIC)
 1950                 if (pmap_nw_modified((pt_entry_t) origpte)) {
 1951                         printf(
 1952         "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1953                             va, origpte);
 1954                 }
 1955 #endif
 1956 
 1957                 /*
 1958                  * Remove extra pte reference
 1959                  */
 1960                 if (mpte)
 1961                         mpte->hold_count--;
 1962 
 1963                 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 1964                         if ((origpte & PG_RW) == 0) {
 1965                                 pte_store(pte, origpte | PG_RW);
 1966                                 pmap_invalidate_page(pmap, va);
 1967                         }
 1968                         return;
 1969                 }
 1970 
 1971                 /*
 1972                  * We might be turning off write access to the page,
 1973                  * so we go ahead and sense modify status.
 1974                  */
 1975                 if (origpte & PG_MANAGED) {
 1976                         if ((origpte & PG_M) && pmap_track_modified(va)) {
 1977                                 vm_page_t om;
 1978                                 om = PHYS_TO_VM_PAGE(opa);
 1979                                 vm_page_dirty(om);
 1980                         }
 1981                         pa |= PG_MANAGED;
 1982                 }
 1983                 goto validate;
 1984         } 
 1985         /*
 1986          * Mapping has changed, invalidate old range and fall through to
 1987          * handle validating new mapping.
 1988          */
 1989         if (opa) {
 1990                 int err;
 1991                 vm_page_lock_queues();
 1992                 err = pmap_remove_pte(pmap, pte, va);
 1993                 vm_page_unlock_queues();
 1994                 if (err)
 1995                         panic("pmap_enter: pte vanished, va: 0x%x", va);
 1996         }
 1997 
 1998         /*
 1999          * Enter on the PV list if part of our managed memory. Note that we
 2000          * raise IPL while manipulating pv_table since pmap_enter can be
 2001          * called at interrupt time.
 2002          */
 2003         if (pmap_initialized && 
 2004             (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 2005                 pmap_insert_entry(pmap, va, mpte, m);
 2006                 pa |= PG_MANAGED;
 2007         }
 2008 
 2009         /*
 2010          * Increment counters
 2011          */
 2012         pmap->pm_stats.resident_count++;
 2013         if (wired)
 2014                 pmap->pm_stats.wired_count++;
 2015 
 2016 validate:
 2017         /*
 2018          * Now validate mapping with desired protection/wiring.
 2019          */
 2020         newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V);
 2021 
 2022         if (wired)
 2023                 newpte |= PG_W;
 2024         if (va < VM_MAXUSER_ADDRESS)
 2025                 newpte |= PG_U;
 2026         if (pmap == kernel_pmap)
 2027                 newpte |= pgeflag;
 2028 
 2029         /*
 2030          * if the mapping or permission bits are different, we need
 2031          * to update the pte.
 2032          */
 2033         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2034                 pte_store(pte, newpte | PG_A);
 2035                 /*if (origpte)*/ {
 2036                         pmap_invalidate_page(pmap, va);
 2037                 }
 2038         }
 2039 }
 2040 
 2041 /*
 2042  * this code makes some *MAJOR* assumptions:
 2043  * 1. Current pmap & pmap exists.
 2044  * 2. Not wired.
 2045  * 3. Read access.
 2046  * 4. No page table pages.
 2047  * 5. Tlbflush is deferred to calling procedure.
 2048  * 6. Page IS managed.
 2049  * but is *MUCH* faster than pmap_enter...
 2050  */
 2051 
 2052 vm_page_t
 2053 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2054 {
 2055         pt_entry_t *pte;
 2056         vm_paddr_t pa;
 2057 
 2058         /*
 2059          * In the case that a page table page is not
 2060          * resident, we are creating it here.
 2061          */
 2062         if (va < VM_MAXUSER_ADDRESS) {
 2063                 unsigned ptepindex;
 2064                 pd_entry_t ptepa;
 2065 
 2066                 /*
 2067                  * Calculate pagetable page index
 2068                  */
 2069                 ptepindex = va >> PDRSHIFT;
 2070                 if (mpte && (mpte->pindex == ptepindex)) {
 2071                         mpte->hold_count++;
 2072                 } else {
 2073 retry:
 2074                         /*
 2075                          * Get the page directory entry
 2076                          */
 2077                         ptepa = pmap->pm_pdir[ptepindex];
 2078 
 2079                         /*
 2080                          * If the page table page is mapped, we just increment
 2081                          * the hold count, and activate it.
 2082                          */
 2083                         if (ptepa) {
 2084                                 if (ptepa & PG_PS)
 2085                                         panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2086                                 mpte = PHYS_TO_VM_PAGE(ptepa);
 2087                                 mpte->hold_count++;
 2088                         } else {
 2089                                 mpte = _pmap_allocpte(pmap, ptepindex);
 2090                                 if (mpte == NULL)
 2091                                         goto retry;
 2092                         }
 2093                 }
 2094         } else {
 2095                 mpte = NULL;
 2096         }
 2097 
 2098         /*
 2099          * This call to vtopte makes the assumption that we are
 2100          * entering the page into the current pmap.  In order to support
 2101          * quick entry into any pmap, one would likely use pmap_pte_quick.
 2102          * But that isn't as quick as vtopte.
 2103          */
 2104         pte = vtopte(va);
 2105         if (*pte) {
 2106                 if (mpte != NULL) {
 2107                         vm_page_lock_queues();
 2108                         pmap_unwire_pte_hold(pmap, mpte);
 2109                         vm_page_unlock_queues();
 2110                 }
 2111                 return 0;
 2112         }
 2113 
 2114         /*
 2115          * Enter on the PV list if part of our managed memory. Note that we
 2116          * raise IPL while manipulating pv_table since pmap_enter can be
 2117          * called at interrupt time.
 2118          */
 2119         if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2120                 pmap_insert_entry(pmap, va, mpte, m);
 2121 
 2122         /*
 2123          * Increment counters
 2124          */
 2125         pmap->pm_stats.resident_count++;
 2126 
 2127         pa = VM_PAGE_TO_PHYS(m);
 2128 
 2129         /*
 2130          * Now validate mapping with RO protection
 2131          */
 2132         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2133                 pte_store(pte, pa | PG_V | PG_U);
 2134         else
 2135                 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 2136 
 2137         return mpte;
 2138 }
 2139 
 2140 /*
 2141  * Make a temporary mapping for a physical address.  This is only intended
 2142  * to be used for panic dumps.
 2143  */
 2144 void *
 2145 pmap_kenter_temporary(vm_offset_t pa, int i)
 2146 {
 2147         vm_offset_t va;
 2148 
 2149         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2150         pmap_kenter(va, pa);
 2151 #ifndef I386_CPU
 2152         invlpg(va);
 2153 #else
 2154         invltlb();
 2155 #endif
 2156         return ((void *)crashdumpmap);
 2157 }
 2158 
 2159 /*
 2160  * This code maps large physical mmap regions into the
 2161  * processor address space.  Note that some shortcuts
 2162  * are taken, but the code works.
 2163  */
 2164 void
 2165 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 2166                     vm_object_t object, vm_pindex_t pindex,
 2167                     vm_size_t size)
 2168 {
 2169         vm_page_t p;
 2170 
 2171         VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 2172         KASSERT(object->type == OBJT_DEVICE,
 2173             ("pmap_object_init_pt: non-device object"));
 2174         if (pseflag && 
 2175             ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 2176                 int i;
 2177                 vm_page_t m[1];
 2178                 unsigned int ptepindex;
 2179                 int npdes;
 2180                 pd_entry_t ptepa;
 2181 
 2182                 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 2183                         return;
 2184 retry:
 2185                 p = vm_page_lookup(object, pindex);
 2186                 if (p != NULL) {
 2187                         vm_page_lock_queues();
 2188                         if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 2189                                 goto retry;
 2190                 } else {
 2191                         p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2192                         if (p == NULL)
 2193                                 return;
 2194                         m[0] = p;
 2195 
 2196                         if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2197                                 vm_page_lock_queues();
 2198                                 vm_page_free(p);
 2199                                 vm_page_unlock_queues();
 2200                                 return;
 2201                         }
 2202 
 2203                         p = vm_page_lookup(object, pindex);
 2204                         vm_page_lock_queues();
 2205                         vm_page_wakeup(p);
 2206                 }
 2207                 vm_page_unlock_queues();
 2208 
 2209                 ptepa = VM_PAGE_TO_PHYS(p);
 2210                 if (ptepa & (NBPDR - 1))
 2211                         return;
 2212 
 2213                 p->valid = VM_PAGE_BITS_ALL;
 2214 
 2215                 pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2216                 npdes = size >> PDRSHIFT;
 2217                 for(i = 0; i < npdes; i++) {
 2218                         pde_store(&pmap->pm_pdir[ptepindex],
 2219                             ptepa | PG_U | PG_RW | PG_V | PG_PS);
 2220                         ptepa += NBPDR;
 2221                         ptepindex += 1;
 2222                 }
 2223                 pmap_invalidate_all(pmap);
 2224         }
 2225 }
 2226 
 2227 /*
 2228  *      Routine:        pmap_change_wiring
 2229  *      Function:       Change the wiring attribute for a map/virtual-address
 2230  *                      pair.
 2231  *      In/out conditions:
 2232  *                      The mapping must already exist in the pmap.
 2233  */
 2234 void
 2235 pmap_change_wiring(pmap, va, wired)
 2236         register pmap_t pmap;
 2237         vm_offset_t va;
 2238         boolean_t wired;
 2239 {
 2240         register pt_entry_t *pte;
 2241 
 2242         if (pmap == NULL)
 2243                 return;
 2244 
 2245         pte = pmap_pte(pmap, va);
 2246 
 2247         if (wired && !pmap_pte_w(pte))
 2248                 pmap->pm_stats.wired_count++;
 2249         else if (!wired && pmap_pte_w(pte))
 2250                 pmap->pm_stats.wired_count--;
 2251 
 2252         /*
 2253          * Wiring is not a hardware characteristic so there is no need to
 2254          * invalidate TLB.
 2255          */
 2256         pmap_pte_set_w(pte, wired);
 2257 }
 2258 
 2259 
 2260 
 2261 /*
 2262  *      Copy the range specified by src_addr/len
 2263  *      from the source map to the range dst_addr/len
 2264  *      in the destination map.
 2265  *
 2266  *      This routine is only advisory and need not do anything.
 2267  */
 2268 
 2269 void
 2270 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 2271           vm_offset_t src_addr)
 2272 {
 2273         vm_offset_t addr;
 2274         vm_offset_t end_addr = src_addr + len;
 2275         vm_offset_t pdnxt;
 2276         vm_page_t m;
 2277 
 2278         if (dst_addr != src_addr)
 2279                 return;
 2280 
 2281         if (!pmap_is_current(src_pmap))
 2282                 return;
 2283 
 2284         for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 2285                 pt_entry_t *src_pte, *dst_pte;
 2286                 vm_page_t dstmpte, srcmpte;
 2287                 pd_entry_t srcptepaddr;
 2288                 unsigned ptepindex;
 2289 
 2290                 if (addr >= UPT_MIN_ADDRESS)
 2291                         panic("pmap_copy: invalid to pmap_copy page tables\n");
 2292 
 2293                 /*
 2294                  * Don't let optional prefaulting of pages make us go
 2295                  * way below the low water mark of free pages or way
 2296                  * above high water mark of used pv entries.
 2297                  */
 2298                 if (cnt.v_free_count < cnt.v_free_reserved ||
 2299                     pv_entry_count > pv_entry_high_water)
 2300                         break;
 2301                 
 2302                 pdnxt = (addr + NBPDR) & ~PDRMASK;
 2303                 ptepindex = addr >> PDRSHIFT;
 2304 
 2305                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 2306                 if (srcptepaddr == 0)
 2307                         continue;
 2308                         
 2309                 if (srcptepaddr & PG_PS) {
 2310                         if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2311                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 2312                                 dst_pmap->pm_stats.resident_count +=
 2313                                     NBPDR / PAGE_SIZE;
 2314                         }
 2315                         continue;
 2316                 }
 2317 
 2318                 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 2319                 if (srcmpte->hold_count == 0 || (srcmpte->flags & PG_BUSY))
 2320                         continue;
 2321 
 2322                 if (pdnxt > end_addr)
 2323                         pdnxt = end_addr;
 2324 
 2325                 src_pte = vtopte(addr);
 2326                 while (addr < pdnxt) {
 2327                         pt_entry_t ptetemp;
 2328                         ptetemp = *src_pte;
 2329                         /*
 2330                          * we only virtual copy managed pages
 2331                          */
 2332                         if ((ptetemp & PG_MANAGED) != 0) {
 2333                                 /*
 2334                                  * We have to check after allocpte for the
 2335                                  * pte still being around...  allocpte can
 2336                                  * block.
 2337                                  */
 2338                                 dstmpte = pmap_allocpte(dst_pmap, addr);
 2339                                 dst_pte = pmap_pte(dst_pmap, addr);
 2340                                 if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 2341                                         /*
 2342                                          * Clear the modified and
 2343                                          * accessed (referenced) bits
 2344                                          * during the copy.
 2345                                          */
 2346                                         m = PHYS_TO_VM_PAGE(ptetemp);
 2347                                         *dst_pte = ptetemp & ~(PG_M | PG_A);
 2348                                         dst_pmap->pm_stats.resident_count++;
 2349                                         pmap_insert_entry(dst_pmap, addr,
 2350                                                 dstmpte, m);
 2351                                 } else {
 2352                                         vm_page_lock_queues();
 2353                                         pmap_unwire_pte_hold(dst_pmap, dstmpte);
 2354                                         vm_page_unlock_queues();
 2355                                 }
 2356                                 if (dstmpte->hold_count >= srcmpte->hold_count)
 2357                                         break;
 2358                         }
 2359                         addr += PAGE_SIZE;
 2360                         src_pte++;
 2361                 }
 2362         }
 2363 }       
 2364 
 2365 #ifdef SMP
 2366 
 2367 /*
 2368  *      pmap_zpi_switchout*()
 2369  *
 2370  *      These functions allow us to avoid doing IPIs alltogether in certain
 2371  *      temporary page-mapping situations (page zeroing).  Instead to deal
 2372  *      with being preempted and moved onto a different cpu we invalidate
 2373  *      the page when the scheduler switches us in.  This does not occur
 2374  *      very often so we remain relatively optimal with very little effort.
 2375  */
 2376 static void
 2377 pmap_zpi_switchout12(void)
 2378 {
 2379         invlpg((u_int)CADDR1);
 2380         invlpg((u_int)CADDR2);
 2381 }
 2382 
 2383 static void
 2384 pmap_zpi_switchout2(void)
 2385 {
 2386         invlpg((u_int)CADDR2);
 2387 }
 2388 
 2389 static void
 2390 pmap_zpi_switchout3(void)
 2391 {
 2392         invlpg((u_int)CADDR3);
 2393 }
 2394 
 2395 #endif
 2396 
 2397 static __inline void
 2398 pagezero(void *page)
 2399 {
 2400 #if defined(I686_CPU)
 2401         if (cpu_class == CPUCLASS_686) {
 2402 #if defined(CPU_ENABLE_SSE)
 2403                 if (cpu_feature & CPUID_SSE2)
 2404                         sse2_pagezero(page);
 2405                 else
 2406 #endif
 2407                         i686_pagezero(page);
 2408         } else
 2409 #endif
 2410                 bzero(page, PAGE_SIZE);
 2411 }
 2412 
 2413 static __inline void
 2414 invlcaddr(void *caddr)
 2415 {
 2416 #ifdef I386_CPU
 2417         invltlb();
 2418 #else
 2419         invlpg((u_int)caddr);
 2420 #endif
 2421 }
 2422 
 2423 /*
 2424  *      pmap_zero_page zeros the specified hardware page by mapping 
 2425  *      the page into KVM and using bzero to clear its contents.
 2426  */
 2427 void
 2428 pmap_zero_page(vm_page_t m)
 2429 {
 2430 
 2431         mtx_lock(&CMAPCADDR12_lock);
 2432         if (*CMAP2)
 2433                 panic("pmap_zero_page: CMAP2 busy");
 2434 #ifdef SMP
 2435         curthread->td_pcb->pcb_switchout = pmap_zpi_switchout2;
 2436 #endif
 2437         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2438 #ifdef SMP
 2439         invlpg((u_int)CADDR2);
 2440 #endif
 2441         pagezero(CADDR2);
 2442         *CMAP2 = 0;
 2443         invlcaddr(CADDR2);
 2444 #ifdef SMP
 2445         curthread->td_pcb->pcb_switchout = NULL;
 2446 #endif
 2447         mtx_unlock(&CMAPCADDR12_lock);
 2448 }
 2449 
 2450 /*
 2451  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 2452  *      the page into KVM and using bzero to clear its contents.
 2453  *
 2454  *      off and size may not cover an area beyond a single hardware page.
 2455  */
 2456 void
 2457 pmap_zero_page_area(vm_page_t m, int off, int size)
 2458 {
 2459 
 2460         mtx_lock(&CMAPCADDR12_lock);
 2461         if (*CMAP2)
 2462                 panic("pmap_zero_page: CMAP2 busy");
 2463 #ifdef SMP
 2464         curthread->td_pcb->pcb_switchout = pmap_zpi_switchout2;
 2465 #endif
 2466         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2467 #ifdef SMP
 2468         invlpg((u_int)CADDR2);
 2469 #endif
 2470         if (off == 0 && size == PAGE_SIZE) 
 2471                 pagezero(CADDR2);
 2472         else
 2473                 bzero((char *)CADDR2 + off, size);
 2474         *CMAP2 = 0;
 2475         invlcaddr(CADDR2);
 2476 #ifdef SMP
 2477         curthread->td_pcb->pcb_switchout = NULL;
 2478 #endif
 2479         mtx_unlock(&CMAPCADDR12_lock);
 2480 }
 2481 
 2482 /*
 2483  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 2484  *      the page into KVM and using bzero to clear its contents.  This
 2485  *      is intended to be called from the vm_pagezero process only and
 2486  *      outside of Giant.
 2487  */
 2488 void
 2489 pmap_zero_page_idle(vm_page_t m)
 2490 {
 2491 
 2492         if (*CMAP3)
 2493                 panic("pmap_zero_page: CMAP3 busy");
 2494 #ifdef SMP
 2495         curthread->td_pcb->pcb_switchout = pmap_zpi_switchout3;
 2496 #endif
 2497         *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 2498 #ifdef SMP
 2499         invlpg((u_int)CADDR3);
 2500 #endif
 2501         pagezero(CADDR3);
 2502         *CMAP3 = 0;
 2503         invlcaddr(CADDR3);
 2504 #ifdef SMP
 2505         curthread->td_pcb->pcb_switchout = NULL;
 2506 #endif
 2507 }
 2508 
 2509 /*
 2510  *      pmap_copy_page copies the specified (machine independent)
 2511  *      page by mapping the page into virtual memory and using
 2512  *      bcopy to copy the page, one machine dependent page at a
 2513  *      time.
 2514  */
 2515 void
 2516 pmap_copy_page(vm_page_t src, vm_page_t dst)
 2517 {
 2518 
 2519         mtx_lock(&CMAPCADDR12_lock);
 2520         if (*CMAP1)
 2521                 panic("pmap_copy_page: CMAP1 busy");
 2522         if (*CMAP2)
 2523                 panic("pmap_copy_page: CMAP2 busy");
 2524 #ifdef SMP
 2525         curthread->td_pcb->pcb_switchout = pmap_zpi_switchout12;
 2526 #endif
 2527         *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
 2528         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
 2529 #ifdef SMP
 2530         invlpg((u_int)CADDR1);
 2531         invlpg((u_int)CADDR2);
 2532 #endif
 2533         bcopy(CADDR1, CADDR2, PAGE_SIZE);
 2534         *CMAP1 = 0;
 2535         *CMAP2 = 0;
 2536 #ifdef I386_CPU
 2537         invltlb();
 2538 #else
 2539         invlpg((u_int)CADDR1);
 2540         invlpg((u_int)CADDR2);
 2541 #endif
 2542 #ifdef SMP
 2543         curthread->td_pcb->pcb_switchout = NULL;
 2544 #endif
 2545         mtx_unlock(&CMAPCADDR12_lock);
 2546 }
 2547 
 2548 /*
 2549  * Returns true if the pmap's pv is one of the first
 2550  * 16 pvs linked to from this page.  This count may
 2551  * be changed upwards or downwards in the future; it
 2552  * is only necessary that true be returned for a small
 2553  * subset of pmaps for proper page aging.
 2554  */
 2555 boolean_t
 2556 pmap_page_exists_quick(pmap, m)
 2557         pmap_t pmap;
 2558         vm_page_t m;
 2559 {
 2560         pv_entry_t pv;
 2561         int loops = 0;
 2562         int s;
 2563 
 2564         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2565                 return FALSE;
 2566 
 2567         s = splvm();
 2568         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2569         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2570                 if (pv->pv_pmap == pmap) {
 2571                         splx(s);
 2572                         return TRUE;
 2573                 }
 2574                 loops++;
 2575                 if (loops >= 16)
 2576                         break;
 2577         }
 2578         splx(s);
 2579         return (FALSE);
 2580 }
 2581 
 2582 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2583 /*
 2584  * Remove all pages from specified address space
 2585  * this aids process exit speeds.  Also, this code
 2586  * is special cased for current process only, but
 2587  * can have the more generic (and slightly slower)
 2588  * mode enabled.  This is much faster than pmap_remove
 2589  * in the case of running down an entire address space.
 2590  */
 2591 void
 2592 pmap_remove_pages(pmap, sva, eva)
 2593         pmap_t pmap;
 2594         vm_offset_t sva, eva;
 2595 {
 2596         pt_entry_t *pte, tpte;
 2597         vm_page_t m;
 2598         pv_entry_t pv, npv;
 2599         int s;
 2600 
 2601 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2602         if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
 2603                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 2604                 return;
 2605         }
 2606 #endif
 2607         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2608         s = splvm();
 2609         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2610 
 2611                 if (pv->pv_va >= eva || pv->pv_va < sva) {
 2612                         npv = TAILQ_NEXT(pv, pv_plist);
 2613                         continue;
 2614                 }
 2615 
 2616 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2617                 pte = vtopte(pv->pv_va);
 2618 #else
 2619                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2620 #endif
 2621                 tpte = *pte;
 2622 
 2623                 if (tpte == 0) {
 2624                         printf("TPTE at %p  IS ZERO @ VA %08x\n",
 2625                                                         pte, pv->pv_va);
 2626                         panic("bad pte");
 2627                 }
 2628 
 2629 /*
 2630  * We cannot remove wired pages from a process' mapping at this time
 2631  */
 2632                 if (tpte & PG_W) {
 2633                         npv = TAILQ_NEXT(pv, pv_plist);
 2634                         continue;
 2635                 }
 2636 
 2637                 m = PHYS_TO_VM_PAGE(tpte);
 2638                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 2639                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 2640                     m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
 2641 
 2642                 KASSERT(m < &vm_page_array[vm_page_array_size],
 2643                         ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
 2644 
 2645                 pv->pv_pmap->pm_stats.resident_count--;
 2646 
 2647                 pte_clear(pte);
 2648 
 2649                 /*
 2650                  * Update the vm_page_t clean and reference bits.
 2651                  */
 2652                 if (tpte & PG_M) {
 2653                         vm_page_dirty(m);
 2654                 }
 2655 
 2656                 npv = TAILQ_NEXT(pv, pv_plist);
 2657                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 2658 
 2659                 m->md.pv_list_count--;
 2660                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2661                 if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 2662                         vm_page_flag_clear(m, PG_WRITEABLE);
 2663                 }
 2664 
 2665                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 2666                 free_pv_entry(pv);
 2667         }
 2668         splx(s);
 2669         pmap_invalidate_all(pmap);
 2670 }
 2671 
 2672 /*
 2673  *      pmap_is_modified:
 2674  *
 2675  *      Return whether or not the specified physical page was modified
 2676  *      in any physical maps.
 2677  */
 2678 boolean_t
 2679 pmap_is_modified(vm_page_t m)
 2680 {
 2681         pv_entry_t pv;
 2682         pt_entry_t *pte;
 2683         int s;
 2684 
 2685         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2686                 return FALSE;
 2687 
 2688         s = splvm();
 2689         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2690         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2691                 /*
 2692                  * if the bit being tested is the modified bit, then
 2693                  * mark clean_map and ptes as never
 2694                  * modified.
 2695                  */
 2696                 if (!pmap_track_modified(pv->pv_va))
 2697                         continue;
 2698 #if defined(PMAP_DIAGNOSTIC)
 2699                 if (!pv->pv_pmap) {
 2700                         printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 2701                         continue;
 2702                 }
 2703 #endif
 2704                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2705                 if (*pte & PG_M) {
 2706                         splx(s);
 2707                         return TRUE;
 2708                 }
 2709         }
 2710         splx(s);
 2711         return (FALSE);
 2712 }
 2713 
 2714 /*
 2715  *      pmap_is_prefaultable:
 2716  *
 2717  *      Return whether or not the specified virtual address is elgible
 2718  *      for prefault.
 2719  */
 2720 boolean_t
 2721 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 2722 {
 2723         pt_entry_t *pte;
 2724 
 2725         if ((*pmap_pde(pmap, addr)) == 0) 
 2726                 return (FALSE);
 2727         pte = vtopte(addr);
 2728         if (*pte)
 2729                 return (FALSE);
 2730         return (TRUE);
 2731 }
 2732 
 2733 /*
 2734  *      Clear the given bit in each of the given page's ptes.
 2735  */
 2736 static __inline void
 2737 pmap_clear_ptes(vm_page_t m, int bit)
 2738 {
 2739         register pv_entry_t pv;
 2740         pt_entry_t pbits, *pte;
 2741         int s;
 2742 
 2743         if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
 2744             (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 2745                 return;
 2746 
 2747         s = splvm();
 2748         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2749         /*
 2750          * Loop over all current mappings setting/clearing as appropos If
 2751          * setting RO do we need to clear the VAC?
 2752          */
 2753         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2754                 /*
 2755                  * don't write protect pager mappings
 2756                  */
 2757                 if (bit == PG_RW) {
 2758                         if (!pmap_track_modified(pv->pv_va))
 2759                                 continue;
 2760                 }
 2761 
 2762 #if defined(PMAP_DIAGNOSTIC)
 2763                 if (!pv->pv_pmap) {
 2764                         printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 2765                         continue;
 2766                 }
 2767 #endif
 2768 
 2769                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2770                 pbits = *pte;
 2771                 if (pbits & bit) {
 2772                         if (bit == PG_RW) {
 2773                                 if (pbits & PG_M) {
 2774                                         vm_page_dirty(m);
 2775                                 }
 2776                                 pte_store(pte, pbits & ~(PG_M|PG_RW));
 2777                         } else {
 2778                                 pte_store(pte, pbits & ~bit);
 2779                         }
 2780                         pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2781                 }
 2782         }
 2783         if (bit == PG_RW)
 2784                 vm_page_flag_clear(m, PG_WRITEABLE);
 2785         splx(s);
 2786 }
 2787 
 2788 /*
 2789  *      pmap_page_protect:
 2790  *
 2791  *      Lower the permission for all mappings to a given page.
 2792  */
 2793 void
 2794 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2795 {
 2796         if ((prot & VM_PROT_WRITE) == 0) {
 2797                 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2798                         pmap_clear_ptes(m, PG_RW);
 2799                 } else {
 2800                         pmap_remove_all(m);
 2801                 }
 2802         }
 2803 }
 2804 
 2805 /*
 2806  *      pmap_ts_referenced:
 2807  *
 2808  *      Return a count of reference bits for a page, clearing those bits.
 2809  *      It is not necessary for every reference bit to be cleared, but it
 2810  *      is necessary that 0 only be returned when there are truly no
 2811  *      reference bits set.
 2812  *
 2813  *      XXX: The exact number of bits to check and clear is a matter that
 2814  *      should be tested and standardized at some point in the future for
 2815  *      optimal aging of shared pages.
 2816  */
 2817 int
 2818 pmap_ts_referenced(vm_page_t m)
 2819 {
 2820         register pv_entry_t pv, pvf, pvn;
 2821         pt_entry_t *pte;
 2822         pt_entry_t v;
 2823         int s;
 2824         int rtval = 0;
 2825 
 2826         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2827                 return (rtval);
 2828 
 2829         s = splvm();
 2830         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2831         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2832 
 2833                 pvf = pv;
 2834 
 2835                 do {
 2836                         pvn = TAILQ_NEXT(pv, pv_list);
 2837 
 2838                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2839 
 2840                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2841 
 2842                         if (!pmap_track_modified(pv->pv_va))
 2843                                 continue;
 2844 
 2845                         pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2846 
 2847                         if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
 2848                                 pte_store(pte, v & ~PG_A);
 2849                                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 2850 
 2851                                 rtval++;
 2852                                 if (rtval > 4) {
 2853                                         break;
 2854                                 }
 2855                         }
 2856                 } while ((pv = pvn) != NULL && pv != pvf);
 2857         }
 2858         splx(s);
 2859 
 2860         return (rtval);
 2861 }
 2862 
 2863 /*
 2864  *      Clear the modify bits on the specified physical page.
 2865  */
 2866 void
 2867 pmap_clear_modify(vm_page_t m)
 2868 {
 2869         pmap_clear_ptes(m, PG_M);
 2870 }
 2871 
 2872 /*
 2873  *      pmap_clear_reference:
 2874  *
 2875  *      Clear the reference bit on the specified physical page.
 2876  */
 2877 void
 2878 pmap_clear_reference(vm_page_t m)
 2879 {
 2880         pmap_clear_ptes(m, PG_A);
 2881 }
 2882 
 2883 /*
 2884  * Miscellaneous support routines follow
 2885  */
 2886 
 2887 static void
 2888 i386_protection_init()
 2889 {
 2890         register int *kp, prot;
 2891 
 2892         kp = protection_codes;
 2893         for (prot = 0; prot < 8; prot++) {
 2894                 switch (prot) {
 2895                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 2896                         /*
 2897                          * Read access is also 0. There isn't any execute bit,
 2898                          * so just make it readable.
 2899                          */
 2900                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 2901                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 2902                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 2903                         *kp++ = 0;
 2904                         break;
 2905                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 2906                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 2907                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 2908                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 2909                         *kp++ = PG_RW;
 2910                         break;
 2911                 }
 2912         }
 2913 }
 2914 
 2915 /*
 2916  * Map a set of physical memory pages into the kernel virtual
 2917  * address space. Return a pointer to where it is mapped. This
 2918  * routine is intended to be used for mapping device memory,
 2919  * NOT real memory.
 2920  */
 2921 void *
 2922 pmap_mapdev(pa, size)
 2923         vm_paddr_t pa;
 2924         vm_size_t size;
 2925 {
 2926         vm_offset_t va, tmpva, offset;
 2927 
 2928         offset = pa & PAGE_MASK;
 2929         size = roundup(offset + size, PAGE_SIZE);
 2930         pa = pa & PG_FRAME;
 2931 
 2932         if (pa < KERNLOAD && pa + size <= KERNLOAD)
 2933                 va = KERNBASE + pa;
 2934         else
 2935                 va = kmem_alloc_nofault(kernel_map, size);
 2936         if (!va)
 2937                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 2938 
 2939         for (tmpva = va; size > 0; ) {
 2940                 pmap_kenter(tmpva, pa);
 2941                 size -= PAGE_SIZE;
 2942                 tmpva += PAGE_SIZE;
 2943                 pa += PAGE_SIZE;
 2944         }
 2945         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2946         return ((void *)(va + offset));
 2947 }
 2948 
 2949 void
 2950 pmap_unmapdev(va, size)
 2951         vm_offset_t va;
 2952         vm_size_t size;
 2953 {
 2954         vm_offset_t base, offset, tmpva;
 2955 
 2956         if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 2957                 return;
 2958         base = va & PG_FRAME;
 2959         offset = va & PAGE_MASK;
 2960         size = roundup(offset + size, PAGE_SIZE);
 2961         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
 2962                 pmap_kremove(tmpva);
 2963         pmap_invalidate_range(kernel_pmap, va, tmpva);
 2964         kmem_free(kernel_map, base, size);
 2965 }
 2966 
 2967 /*
 2968  * perform the pmap work for mincore
 2969  */
 2970 int
 2971 pmap_mincore(pmap, addr)
 2972         pmap_t pmap;
 2973         vm_offset_t addr;
 2974 {
 2975         pt_entry_t *ptep, pte;
 2976         vm_page_t m;
 2977         int val = 0;
 2978         
 2979         ptep = pmap_pte(pmap, addr);
 2980         if (ptep == 0) {
 2981                 return 0;
 2982         }
 2983 
 2984         if ((pte = *ptep) != 0) {
 2985                 vm_paddr_t pa;
 2986 
 2987                 val = MINCORE_INCORE;
 2988                 if ((pte & PG_MANAGED) == 0)
 2989                         return val;
 2990 
 2991                 pa = pte & PG_FRAME;
 2992 
 2993                 m = PHYS_TO_VM_PAGE(pa);
 2994 
 2995                 /*
 2996                  * Modified by us
 2997                  */
 2998                 if (pte & PG_M)
 2999                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 3000                 else {
 3001                         /*
 3002                          * Modified by someone else
 3003                          */
 3004                         vm_page_lock_queues();
 3005                         if (m->dirty || pmap_is_modified(m))
 3006                                 val |= MINCORE_MODIFIED_OTHER;
 3007                         vm_page_unlock_queues();
 3008                 }
 3009                 /*
 3010                  * Referenced by us
 3011                  */
 3012                 if (pte & PG_A)
 3013                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 3014                 else {
 3015                         /*
 3016                          * Referenced by someone else
 3017                          */
 3018                         vm_page_lock_queues();
 3019                         if ((m->flags & PG_REFERENCED) ||
 3020                             pmap_ts_referenced(m)) {
 3021                                 val |= MINCORE_REFERENCED_OTHER;
 3022                                 vm_page_flag_set(m, PG_REFERENCED);
 3023                         }
 3024                         vm_page_unlock_queues();
 3025                 }
 3026         } 
 3027         return val;
 3028 }
 3029 
 3030 void
 3031 pmap_activate(struct thread *td)
 3032 {
 3033         struct proc *p = td->td_proc;
 3034         pmap_t  pmap, oldpmap;
 3035         u_int32_t  cr3;
 3036 
 3037         critical_enter();
 3038         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 3039         oldpmap = PCPU_GET(curpmap);
 3040 #if defined(SMP)
 3041         atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
 3042         atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
 3043 #else
 3044         oldpmap->pm_active &= ~1;
 3045         pmap->pm_active |= 1;
 3046 #endif
 3047 #ifdef PAE
 3048         cr3 = vtophys(pmap->pm_pdpt);
 3049 #else
 3050         cr3 = vtophys(pmap->pm_pdir);
 3051 #endif
 3052         /* XXXKSE this is wrong.
 3053          * pmap_activate is for the current thread on the current cpu
 3054          */
 3055         if (p->p_flag & P_SA) {
 3056                 /* Make sure all other cr3 entries are updated. */
 3057                 /* what if they are running?  XXXKSE (maybe abort them) */
 3058                 FOREACH_THREAD_IN_PROC(p, td) {
 3059                         td->td_pcb->pcb_cr3 = cr3;
 3060                 }
 3061         } else {
 3062                 td->td_pcb->pcb_cr3 = cr3;
 3063         }
 3064         load_cr3(cr3);
 3065         PCPU_SET(curpmap, pmap);
 3066         critical_exit();
 3067 }
 3068 
 3069 vm_offset_t
 3070 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3071 {
 3072 
 3073         if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3074                 return addr;
 3075         }
 3076 
 3077         addr = (addr + PDRMASK) & ~PDRMASK;
 3078         return addr;
 3079 }
 3080 
 3081 
 3082 #if defined(PMAP_DEBUG)
 3083 pmap_pid_dump(int pid)
 3084 {
 3085         pmap_t pmap;
 3086         struct proc *p;
 3087         int npte = 0;
 3088         int index;
 3089 
 3090         sx_slock(&allproc_lock);
 3091         LIST_FOREACH(p, &allproc, p_list) {
 3092                 if (p->p_pid != pid)
 3093                         continue;
 3094 
 3095                 if (p->p_vmspace) {
 3096                         int i,j;
 3097                         index = 0;
 3098                         pmap = vmspace_pmap(p->p_vmspace);
 3099                         for (i = 0; i < NPDEPTD; i++) {
 3100                                 pd_entry_t *pde;
 3101                                 pt_entry_t *pte;
 3102                                 vm_offset_t base = i << PDRSHIFT;
 3103                                 
 3104                                 pde = &pmap->pm_pdir[i];
 3105                                 if (pde && pmap_pde_v(pde)) {
 3106                                         for (j = 0; j < NPTEPG; j++) {
 3107                                                 vm_offset_t va = base + (j << PAGE_SHIFT);
 3108                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3109                                                         if (index) {
 3110                                                                 index = 0;
 3111                                                                 printf("\n");
 3112                                                         }
 3113                                                         sx_sunlock(&allproc_lock);
 3114                                                         return npte;
 3115                                                 }
 3116                                                 pte = pmap_pte(pmap, va);
 3117                                                 if (pte && pmap_pte_v(pte)) {
 3118                                                         pt_entry_t pa;
 3119                                                         vm_page_t m;
 3120                                                         pa = *pte;
 3121                                                         m = PHYS_TO_VM_PAGE(pa);
 3122                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3123                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 3124                                                         npte++;
 3125                                                         index++;
 3126                                                         if (index >= 2) {
 3127                                                                 index = 0;
 3128                                                                 printf("\n");
 3129                                                         } else {
 3130                                                                 printf(" ");
 3131                                                         }
 3132                                                 }
 3133                                         }
 3134                                 }
 3135                         }
 3136                 }
 3137         }
 3138         sx_sunlock(&allproc_lock);
 3139         return npte;
 3140 }
 3141 #endif
 3142 
 3143 #if defined(DEBUG)
 3144 
 3145 static void     pads(pmap_t pm);
 3146 void            pmap_pvdump(vm_offset_t pa);
 3147 
 3148 /* print address space of pmap*/
 3149 static void
 3150 pads(pm)
 3151         pmap_t pm;
 3152 {
 3153         int i, j;
 3154         vm_paddr_t va;
 3155         pt_entry_t *ptep;
 3156 
 3157         if (pm == kernel_pmap)
 3158                 return;
 3159         for (i = 0; i < NPDEPTD; i++)
 3160                 if (pm->pm_pdir[i])
 3161                         for (j = 0; j < NPTEPG; j++) {
 3162                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3163                                 if (pm == kernel_pmap && va < KERNBASE)
 3164                                         continue;
 3165                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3166                                         continue;
 3167                                 ptep = pmap_pte(pm, va);
 3168                                 if (pmap_pte_v(ptep))
 3169                                         printf("%x:%x ", va, *ptep);
 3170                         };
 3171 
 3172 }
 3173 
 3174 void
 3175 pmap_pvdump(pa)
 3176         vm_paddr_t pa;
 3177 {
 3178         pv_entry_t pv;
 3179         vm_page_t m;
 3180 
 3181         printf("pa %x", pa);
 3182         m = PHYS_TO_VM_PAGE(pa);
 3183         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3184                 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3185                 pads(pv->pv_pmap);
 3186         }
 3187         printf(" ");
 3188 }
 3189 #endif

Cache object: cac0b30ebf2ce76f4c4abc2c4960d89d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.