pmap.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department and William Jolitz of UUNET Technologies Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   42  * $FreeBSD: releng/5.0/sys/i386/i386/pmap.c 107882 2002-12-14 21:08:30Z alc $
   43  */
   44 
   45 /*
   46  *      Manages physical address maps.
   47  *
   48  *      In addition to hardware address maps, this
   49  *      module is called upon to provide software-use-only
   50  *      maps which may or may not be stored in the same
   51  *      form as hardware maps.  These pseudo-maps are
   52  *      used to store intermediate results from copy
   53  *      operations to and from address spaces.
   54  *
   55  *      Since the information managed by this module is
   56  *      also stored by the logical address mapping module,
   57  *      this module may throw away valid virtual-to-physical
   58  *      mappings at almost any time.  However, invalidations
   59  *      of virtual-to-physical mappings must be done as
   60  *      requested.
   61  *
   62  *      In order to cope with hardware architectures which
   63  *      make virtual-to-physical map invalidates expensive,
   64  *      this module may delay invalidate or reduced protection
   65  *      operations until such time as they are actually
   66  *      necessary.  This module is given full information as
   67  *      to which processors are currently using which maps,
   68  *      and to when physical maps must be made correct.
   69  */
   70 
   71 #include "opt_pmap.h"
   72 #include "opt_msgbuf.h"
   73 #include "opt_kstack_pages.h"
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/kernel.h>
   78 #include <sys/lock.h>
   79 #include <sys/mman.h>
   80 #include <sys/msgbuf.h>
   81 #include <sys/mutex.h>
   82 #include <sys/proc.h>
   83 #include <sys/sx.h>
   84 #include <sys/user.h>
   85 #include <sys/vmmeter.h>
   86 #include <sys/sysctl.h>
   87 #ifdef SMP
   88 #include <sys/smp.h>
   89 #endif
   90 
   91 #include <vm/vm.h>
   92 #include <vm/vm_param.h>
   93 #include <vm/vm_kern.h>
   94 #include <vm/vm_page.h>
   95 #include <vm/vm_map.h>
   96 #include <vm/vm_object.h>
   97 #include <vm/vm_extern.h>
   98 #include <vm/vm_pageout.h>
   99 #include <vm/vm_pager.h>
  100 #include <vm/uma.h>
  101 
  102 #include <machine/cpu.h>
  103 #include <machine/cputypes.h>
  104 #include <machine/md_var.h>
  105 #include <machine/specialreg.h>
  106 #if defined(SMP) || defined(APIC_IO)
  107 #include <machine/smp.h>
  108 #include <machine/apic.h>
  109 #include <machine/segments.h>
  110 #include <machine/tss.h>
  111 #endif /* SMP || APIC_IO */
  112 
  113 #define PMAP_KEEP_PDIRS
  114 #ifndef PMAP_SHPGPERPROC
  115 #define PMAP_SHPGPERPROC 200
  116 #endif
  117 
  118 #if defined(DIAGNOSTIC)
  119 #define PMAP_DIAGNOSTIC
  120 #endif
  121 
  122 #define MINPV 2048
  123 
  124 #if !defined(PMAP_DIAGNOSTIC)
  125 #define PMAP_INLINE __inline
  126 #else
  127 #define PMAP_INLINE
  128 #endif
  129 
  130 /*
  131  * Get PDEs and PTEs for user/kernel address space
  132  */
  133 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  134 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
  135 
  136 #define pmap_pde_v(pte)         ((*(int *)pte & PG_V) != 0)
  137 #define pmap_pte_w(pte)         ((*(int *)pte & PG_W) != 0)
  138 #define pmap_pte_m(pte)         ((*(int *)pte & PG_M) != 0)
  139 #define pmap_pte_u(pte)         ((*(int *)pte & PG_A) != 0)
  140 #define pmap_pte_v(pte)         ((*(int *)pte & PG_V) != 0)
  141 
  142 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
  143 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
  144 
  145 /*
  146  * Given a map and a machine independent protection code,
  147  * convert to a vax protection code.
  148  */
  149 #define pte_prot(m, p)  (protection_codes[p])
  150 static int protection_codes[8];
  151 
  152 struct pmap kernel_pmap_store;
  153 LIST_HEAD(pmaplist, pmap);
  154 struct pmaplist allpmaps;
  155 
  156 vm_offset_t avail_start;        /* PA of first available physical page */
  157 vm_offset_t avail_end;          /* PA of last available physical page */
  158 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  159 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  160 static boolean_t pmap_initialized = FALSE;      /* Has pmap_init completed? */
  161 static int pgeflag;             /* PG_G or-in */
  162 static int pseflag;             /* PG_PS or-in */
  163 
  164 static vm_object_t kptobj;
  165 
  166 static int nkpt;
  167 vm_offset_t kernel_vm_end;
  168 extern u_int32_t KERNend;
  169 
  170 /*
  171  * Data for the pv entry allocation mechanism
  172  */
  173 static uma_zone_t pvzone;
  174 static struct vm_object pvzone_obj;
  175 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  176 int pmap_pagedaemon_waken;
  177 
  178 /*
  179  * All those kernel PT submaps that BSD is so fond of
  180  */
  181 pt_entry_t *CMAP1 = 0;
  182 static pt_entry_t *CMAP2, *CMAP3, *ptmmap;
  183 caddr_t CADDR1 = 0, ptvmmap = 0;
  184 static caddr_t CADDR2, CADDR3;
  185 static pt_entry_t *msgbufmap;
  186 struct msgbuf *msgbufp = 0;
  187 
  188 /*
  189  * Crashdump maps.
  190  */
  191 static pt_entry_t *pt_crashdumpmap;
  192 static caddr_t crashdumpmap;
  193 
  194 #ifdef SMP
  195 extern pt_entry_t *SMPpt;
  196 #endif
  197 static pt_entry_t *PMAP1 = 0;
  198 static pt_entry_t *PADDR1 = 0;
  199 
  200 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
  201 static pt_entry_t *get_ptbase(pmap_t pmap);
  202 static pv_entry_t get_pv_entry(void);
  203 static void     i386_protection_init(void);
  204 static __inline void    pmap_changebit(vm_page_t m, int bit, boolean_t setem);
  205 
  206 static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va,
  207                                       vm_page_t m, vm_page_t mpte);
  208 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
  209 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
  210 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
  211                                         vm_offset_t va);
  212 static boolean_t pmap_testbit(vm_page_t m, int bit);
  213 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va,
  214                 vm_page_t mpte, vm_page_t m);
  215 
  216 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va);
  217 
  218 static int pmap_release_free_page(pmap_t pmap, vm_page_t p);
  219 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
  220 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
  221 static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex);
  222 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
  223 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  224 static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
  225 
  226 static pd_entry_t pdir4mb;
  227 
  228 /*
  229  *      Routine:        pmap_pte
  230  *      Function:
  231  *              Extract the page table entry associated
  232  *              with the given map/virtual_address pair.
  233  */
  234 
  235 PMAP_INLINE pt_entry_t *
  236 pmap_pte(pmap, va)
  237         register pmap_t pmap;
  238         vm_offset_t va;
  239 {
  240         pd_entry_t *pdeaddr;
  241 
  242         if (pmap) {
  243                 pdeaddr = pmap_pde(pmap, va);
  244                 if (*pdeaddr & PG_PS)
  245                         return pdeaddr;
  246                 if (*pdeaddr) {
  247                         return get_ptbase(pmap) + i386_btop(va);
  248                 }
  249         }
  250         return (0);
  251 }
  252 
  253 /*
  254  * Move the kernel virtual free pointer to the next
  255  * 4MB.  This is used to help improve performance
  256  * by using a large (4MB) page for much of the kernel
  257  * (.text, .data, .bss)
  258  */
  259 static vm_offset_t
  260 pmap_kmem_choose(vm_offset_t addr)
  261 {
  262         vm_offset_t newaddr = addr;
  263 
  264 #ifndef DISABLE_PSE
  265         if (cpu_feature & CPUID_PSE)
  266                 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
  267 #endif
  268         return newaddr;
  269 }
  270 
  271 /*
  272  *      Bootstrap the system enough to run with virtual memory.
  273  *
  274  *      On the i386 this is called after mapping has already been enabled
  275  *      and just syncs the pmap module with what has already been done.
  276  *      [We can't call it easily with mapping off since the kernel is not
  277  *      mapped with PA == VA, hence we would have to relocate every address
  278  *      from the linked base (virtual) address "KERNBASE" to the actual
  279  *      (physical) address starting relative to 0]
  280  */
  281 void
  282 pmap_bootstrap(firstaddr, loadaddr)
  283         vm_offset_t firstaddr;
  284         vm_offset_t loadaddr;
  285 {
  286         vm_offset_t va;
  287         pt_entry_t *pte;
  288         int i;
  289 
  290         avail_start = firstaddr;
  291 
  292         /*
  293          * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  294          * large. It should instead be correctly calculated in locore.s and
  295          * not based on 'first' (which is a physical address, not a virtual
  296          * address, for the start of unused physical memory). The kernel
  297          * page tables are NOT double mapped and thus should not be included
  298          * in this calculation.
  299          */
  300         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  301         virtual_avail = pmap_kmem_choose(virtual_avail);
  302 
  303         virtual_end = VM_MAX_KERNEL_ADDRESS;
  304 
  305         /*
  306          * Initialize protection array.
  307          */
  308         i386_protection_init();
  309 
  310         /*
  311          * Initialize the kernel pmap (which is statically allocated).
  312          */
  313         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
  314         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  315         TAILQ_INIT(&kernel_pmap->pm_pvlist);
  316         LIST_INIT(&allpmaps);
  317         LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
  318         nkpt = NKPT;
  319 
  320         /*
  321          * Reserve some special page table entries/VA space for temporary
  322          * mapping of pages.
  323          */
  324 #define SYSMAP(c, p, v, n)      \
  325         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  326 
  327         va = virtual_avail;
  328         pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
  329 
  330         /*
  331          * CMAP1/CMAP2 are used for zeroing and copying pages.
  332          * CMAP3 is used for the idle process page zeroing.
  333          */
  334         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  335         SYSMAP(caddr_t, CMAP2, CADDR2, 1)
  336         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  337 
  338         /*
  339          * Crashdump maps.
  340          */
  341         SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
  342 
  343         /*
  344          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  345          * XXX ptmmap is not used.
  346          */
  347         SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
  348 
  349         /*
  350          * msgbufp is used to map the system message buffer.
  351          * XXX msgbufmap is not used.
  352          */
  353         SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
  354                atop(round_page(MSGBUF_SIZE)))
  355 
  356         /*
  357          * ptemap is used for pmap_pte_quick
  358          */
  359         SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
  360 
  361         virtual_avail = va;
  362 
  363         *CMAP1 = *CMAP2 = 0;
  364         for (i = 0; i < NKPT; i++)
  365                 PTD[i] = 0;
  366 
  367         pgeflag = 0;
  368 #ifndef DISABLE_PG_G
  369         if (cpu_feature & CPUID_PGE)
  370                 pgeflag = PG_G;
  371 #endif
  372         
  373 /*
  374  * Initialize the 4MB page size flag
  375  */
  376         pseflag = 0;
  377 /*
  378  * The 4MB page version of the initial
  379  * kernel page mapping.
  380  */
  381         pdir4mb = 0;
  382 
  383 #ifndef DISABLE_PSE
  384         if (cpu_feature & CPUID_PSE) {
  385                 pd_entry_t ptditmp;
  386                 /*
  387                  * Note that we have enabled PSE mode
  388                  */
  389                 pseflag = PG_PS;
  390                 ptditmp = *(PTmap + i386_btop(KERNBASE));
  391                 ptditmp &= ~(NBPDR - 1);
  392                 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
  393                 pdir4mb = ptditmp;
  394         }
  395 #endif
  396 #ifndef SMP
  397         /*
  398          * Turn on PGE/PSE.  SMP does this later on since the
  399          * 4K page tables are required for AP boot (for now).
  400          * XXX fixme.
  401          */
  402         pmap_set_opt();
  403 #endif
  404 #ifdef SMP
  405         if (cpu_apic_address == 0)
  406                 panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
  407 
  408         /* local apic is mapped on last page */
  409         SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
  410             (cpu_apic_address & PG_FRAME));
  411 #endif
  412         invltlb();
  413 }
  414 
  415 /*
  416  * Enable 4MB page mode for MP startup.  Turn on PG_G support.
  417  * BSP will run this after all the AP's have started up.
  418  */
  419 void
  420 pmap_set_opt(void)
  421 {
  422         pt_entry_t *pte;
  423         vm_offset_t va, endva;
  424 
  425         if (pgeflag && (cpu_feature & CPUID_PGE)) {
  426                 load_cr4(rcr4() | CR4_PGE);
  427                 invltlb();              /* Insurance */
  428         }
  429 #ifndef DISABLE_PSE
  430         if (pseflag && (cpu_feature & CPUID_PSE)) {
  431                 load_cr4(rcr4() | CR4_PSE);
  432                 invltlb();              /* Insurance */
  433         }
  434 #endif
  435         if (PCPU_GET(cpuid) == 0) {
  436 #ifndef DISABLE_PSE
  437                 if (pdir4mb) {
  438                         kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
  439                         invltlb();      /* Insurance */
  440                 }
  441 #endif
  442                 if (pgeflag) {
  443                         /* Turn on PG_G for text, data, bss pages. */
  444                         va = (vm_offset_t)btext;
  445 #ifndef DISABLE_PSE
  446                         if (pseflag && (cpu_feature & CPUID_PSE)) {
  447                                 if (va < KERNBASE + (1 << PDRSHIFT))
  448                                         va = KERNBASE + (1 << PDRSHIFT);
  449                         }
  450 #endif
  451                         endva = KERNBASE + KERNend;
  452                         while (va < endva) {
  453                                 pte = vtopte(va);
  454                                 if (*pte)
  455                                         *pte |= pgeflag;
  456                                 va += PAGE_SIZE;
  457                         }
  458                         invltlb();      /* Insurance */
  459                 }
  460                 /*
  461                  * We do not need to broadcast the invltlb here, because
  462                  * each AP does it the moment it is released from the boot
  463                  * lock.  See ap_init().
  464                  */
  465         }
  466 }
  467 
  468 static void *
  469 pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
  470 {
  471         *flags = UMA_SLAB_PRIV;
  472         return (void *)kmem_alloc(kernel_map, bytes);
  473 }
  474 
  475 /*
  476  *      Initialize the pmap module.
  477  *      Called by vm_init, to initialize any structures that the pmap
  478  *      system needs to map virtual memory.
  479  *      pmap_init has been enhanced to support in a fairly consistant
  480  *      way, discontiguous physical memory.
  481  */
  482 void
  483 pmap_init(phys_start, phys_end)
  484         vm_offset_t phys_start, phys_end;
  485 {
  486         int i;
  487         int initial_pvs;
  488 
  489         /*
  490          * object for kernel page table pages
  491          */
  492         kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
  493 
  494         /*
  495          * Allocate memory for random pmap data structures.  Includes the
  496          * pv_head_table.
  497          */
  498 
  499         for(i = 0; i < vm_page_array_size; i++) {
  500                 vm_page_t m;
  501 
  502                 m = &vm_page_array[i];
  503                 TAILQ_INIT(&m->md.pv_list);
  504                 m->md.pv_list_count = 0;
  505         }
  506 
  507         /*
  508          * init the pv free list
  509          */
  510         initial_pvs = vm_page_array_size;
  511         if (initial_pvs < MINPV)
  512                 initial_pvs = MINPV;
  513         pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
  514             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
  515         uma_zone_set_allocf(pvzone, pmap_allocf);
  516         uma_prealloc(pvzone, initial_pvs);
  517 
  518         /*
  519          * Now it is safe to enable pv_table recording.
  520          */
  521         pmap_initialized = TRUE;
  522 }
  523 
  524 /*
  525  * Initialize the address space (zone) for the pv_entries.  Set a
  526  * high water mark so that the system can recover from excessive
  527  * numbers of pv entries.
  528  */
  529 void
  530 pmap_init2()
  531 {
  532         int shpgperproc = PMAP_SHPGPERPROC;
  533 
  534         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  535         pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  536         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  537         pv_entry_high_water = 9 * (pv_entry_max / 10);
  538         uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
  539 }
  540 
  541 
  542 /***************************************************
  543  * Low level helper routines.....
  544  ***************************************************/
  545 
  546 #if defined(PMAP_DIAGNOSTIC)
  547 
  548 /*
  549  * This code checks for non-writeable/modified pages.
  550  * This should be an invalid condition.
  551  */
  552 static int
  553 pmap_nw_modified(pt_entry_t ptea)
  554 {
  555         int pte;
  556 
  557         pte = (int) ptea;
  558 
  559         if ((pte & (PG_M|PG_RW)) == PG_M)
  560                 return 1;
  561         else
  562                 return 0;
  563 }
  564 #endif
  565 
  566 
  567 /*
  568  * this routine defines the region(s) of memory that should
  569  * not be tested for the modified bit.
  570  */
  571 static PMAP_INLINE int
  572 pmap_track_modified(vm_offset_t va)
  573 {
  574         if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
  575                 return 1;
  576         else
  577                 return 0;
  578 }
  579 
  580 #ifdef I386_CPU
  581 /*
  582  * i386 only has "invalidate everything" and no SMP to worry about.
  583  */
  584 PMAP_INLINE void
  585 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  586 {
  587 
  588         if (pmap == kernel_pmap || pmap->pm_active)
  589                 invltlb();
  590 }
  591 
  592 PMAP_INLINE void
  593 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  594 {
  595 
  596         if (pmap == kernel_pmap || pmap->pm_active)
  597                 invltlb();
  598 }
  599 
  600 PMAP_INLINE void
  601 pmap_invalidate_all(pmap_t pmap)
  602 {
  603 
  604         if (pmap == kernel_pmap || pmap->pm_active)
  605                 invltlb();
  606 }
  607 #else /* !I386_CPU */
  608 #ifdef SMP
  609 /*
  610  * For SMP, these functions have to use the IPI mechanism for coherence.
  611  */
  612 void
  613 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  614 {
  615         u_int cpumask;
  616         u_int other_cpus;
  617 
  618         critical_enter();
  619         /*
  620          * We need to disable interrupt preemption but MUST NOT have
  621          * interrupts disabled here.
  622          * XXX we may need to hold schedlock to get a coherent pm_active
  623          */
  624         if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
  625                 invlpg(va);
  626                 smp_invlpg(va);
  627         } else {
  628                 cpumask = PCPU_GET(cpumask);
  629                 other_cpus = PCPU_GET(other_cpus);
  630                 if (pmap->pm_active & cpumask)
  631                         invlpg(va);
  632                 if (pmap->pm_active & other_cpus)
  633                         smp_masked_invlpg(pmap->pm_active & other_cpus, va);
  634         }
  635         critical_exit();
  636 }
  637 
  638 void
  639 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  640 {
  641         u_int cpumask;
  642         u_int other_cpus;
  643         vm_offset_t addr;
  644 
  645         critical_enter();
  646         /*
  647          * We need to disable interrupt preemption but MUST NOT have
  648          * interrupts disabled here.
  649          * XXX we may need to hold schedlock to get a coherent pm_active
  650          */
  651         if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
  652                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  653                         invlpg(addr);
  654                 smp_invlpg_range(sva, eva);
  655         } else {
  656                 cpumask = PCPU_GET(cpumask);
  657                 other_cpus = PCPU_GET(other_cpus);
  658                 if (pmap->pm_active & cpumask)
  659                         for (addr = sva; addr < eva; addr += PAGE_SIZE)
  660                                 invlpg(addr);
  661                 if (pmap->pm_active & other_cpus)
  662                         smp_masked_invlpg_range(pmap->pm_active & other_cpus,
  663                             sva, eva);
  664         }
  665         critical_exit();
  666 }
  667 
  668 void
  669 pmap_invalidate_all(pmap_t pmap)
  670 {
  671         u_int cpumask;
  672         u_int other_cpus;
  673 
  674 #ifdef SWTCH_OPTIM_STATS
  675         tlb_flush_count++;
  676 #endif
  677         critical_enter();
  678         /*
  679          * We need to disable interrupt preemption but MUST NOT have
  680          * interrupts disabled here.
  681          * XXX we may need to hold schedlock to get a coherent pm_active
  682          */
  683         if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
  684                 invltlb();
  685                 smp_invltlb();
  686         } else {
  687                 cpumask = PCPU_GET(cpumask);
  688                 other_cpus = PCPU_GET(other_cpus);
  689                 if (pmap->pm_active & cpumask)
  690                         invltlb();
  691                 if (pmap->pm_active & other_cpus)
  692                         smp_masked_invltlb(pmap->pm_active & other_cpus);
  693         }
  694         critical_exit();
  695 }
  696 #else /* !SMP */
  697 /*
  698  * Normal, non-SMP, 486+ invalidation functions.
  699  * We inline these within pmap.c for speed.
  700  */
  701 PMAP_INLINE void
  702 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  703 {
  704 
  705         if (pmap == kernel_pmap || pmap->pm_active)
  706                 invlpg(va);
  707 }
  708 
  709 PMAP_INLINE void
  710 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  711 {
  712         vm_offset_t addr;
  713 
  714         if (pmap == kernel_pmap || pmap->pm_active)
  715                 for (addr = sva; addr < eva; addr += PAGE_SIZE)
  716                         invlpg(addr);
  717 }
  718 
  719 PMAP_INLINE void
  720 pmap_invalidate_all(pmap_t pmap)
  721 {
  722 
  723         if (pmap == kernel_pmap || pmap->pm_active)
  724                 invltlb();
  725 }
  726 #endif /* !SMP */
  727 #endif /* !I386_CPU */
  728 
  729 /*
  730  * Return an address which is the base of the Virtual mapping of
  731  * all the PTEs for the given pmap. Note this doesn't say that
  732  * all the PTEs will be present or that the pages there are valid.
  733  * The PTEs are made available by the recursive mapping trick.
  734  * It will map in the alternate PTE space if needed.
  735  */
  736 static pt_entry_t *
  737 get_ptbase(pmap)
  738         pmap_t pmap;
  739 {
  740         pd_entry_t frame;
  741 
  742         /* are we current address space or kernel? */
  743         if (pmap == kernel_pmap)
  744                 return PTmap;
  745         frame = pmap->pm_pdir[PTDPTDI] & PG_FRAME;
  746         if (frame == (PTDpde & PG_FRAME))
  747                 return PTmap;
  748         /* otherwise, we are alternate address space */
  749         if (frame != (APTDpde & PG_FRAME)) {
  750                 APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
  751                 pmap_invalidate_all(kernel_pmap);       /* XXX Bandaid */
  752         }
  753         return APTmap;
  754 }
  755 
  756 /*
  757  * Super fast pmap_pte routine best used when scanning
  758  * the pv lists.  This eliminates many coarse-grained
  759  * invltlb calls.  Note that many of the pv list
  760  * scans are across different pmaps.  It is very wasteful
  761  * to do an entire invltlb for checking a single mapping.
  762  */
  763 
  764 static pt_entry_t * 
  765 pmap_pte_quick(pmap, va)
  766         register pmap_t pmap;
  767         vm_offset_t va;
  768 {
  769         pd_entry_t pde, newpf;
  770         pde = pmap->pm_pdir[va >> PDRSHIFT];
  771         if (pde != 0) {
  772                 pd_entry_t frame = pmap->pm_pdir[PTDPTDI] & PG_FRAME;
  773                 unsigned index = i386_btop(va);
  774                 /* are we current address space or kernel? */
  775                 if (pmap == kernel_pmap || frame == (PTDpde & PG_FRAME))
  776                         return PTmap + index;
  777                 newpf = pde & PG_FRAME;
  778                 if (((*PMAP1) & PG_FRAME) != newpf) {
  779                         *PMAP1 = newpf | PG_RW | PG_V;
  780                         pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
  781                 }
  782                 return PADDR1 + (index & (NPTEPG - 1));
  783         }
  784         return (0);
  785 }
  786 
  787 /*
  788  *      Routine:        pmap_extract
  789  *      Function:
  790  *              Extract the physical page address associated
  791  *              with the given map/virtual_address pair.
  792  */
  793 vm_offset_t 
  794 pmap_extract(pmap, va)
  795         register pmap_t pmap;
  796         vm_offset_t va;
  797 {
  798         vm_offset_t rtval;      /* XXX FIXME */
  799         vm_offset_t pdirindex;
  800 
  801         if (pmap == 0)
  802                 return 0;
  803         pdirindex = va >> PDRSHIFT;
  804         rtval = pmap->pm_pdir[pdirindex];
  805         if (rtval != 0) {
  806                 pt_entry_t *pte;
  807                 if ((rtval & PG_PS) != 0) {
  808                         rtval &= ~(NBPDR - 1);
  809                         rtval |= va & (NBPDR - 1);
  810                         return rtval;
  811                 }
  812                 pte = get_ptbase(pmap) + i386_btop(va);
  813                 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
  814                 return rtval;
  815         }
  816         return 0;
  817 
  818 }
  819 
  820 /***************************************************
  821  * Low level mapping routines.....
  822  ***************************************************/
  823 
  824 /*
  825  * Add a wired page to the kva.
  826  * Note: not SMP coherent.
  827  */
  828 PMAP_INLINE void 
  829 pmap_kenter(vm_offset_t va, vm_offset_t pa)
  830 {
  831         pt_entry_t *pte;
  832 
  833         pte = vtopte(va);
  834         *pte = pa | PG_RW | PG_V | pgeflag;
  835 }
  836 
  837 /*
  838  * Remove a page from the kernel pagetables.
  839  * Note: not SMP coherent.
  840  */
  841 PMAP_INLINE void
  842 pmap_kremove(vm_offset_t va)
  843 {
  844         pt_entry_t *pte;
  845 
  846         pte = vtopte(va);
  847         *pte = 0;
  848 }
  849 
  850 /*
  851  *      Used to map a range of physical addresses into kernel
  852  *      virtual address space.
  853  *
  854  *      The value passed in '*virt' is a suggested virtual address for
  855  *      the mapping. Architectures which can support a direct-mapped
  856  *      physical to virtual region can return the appropriate address
  857  *      within that region, leaving '*virt' unchanged. Other
  858  *      architectures should map the pages starting at '*virt' and
  859  *      update '*virt' with the first usable address after the mapped
  860  *      region.
  861  */
  862 vm_offset_t
  863 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
  864 {
  865         vm_offset_t va, sva;
  866 
  867         va = sva = *virt;
  868         while (start < end) {
  869                 pmap_kenter(va, start);
  870                 va += PAGE_SIZE;
  871                 start += PAGE_SIZE;
  872         }
  873         pmap_invalidate_range(kernel_pmap, sva, va);
  874         *virt = va;
  875         return (sva);
  876 }
  877 
  878 
  879 /*
  880  * Add a list of wired pages to the kva
  881  * this routine is only used for temporary
  882  * kernel mappings that do not need to have
  883  * page modification or references recorded.
  884  * Note that old mappings are simply written
  885  * over.  The page *must* be wired.
  886  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  887  */
  888 void
  889 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
  890 {
  891         vm_offset_t va;
  892 
  893         va = sva;
  894         while (count-- > 0) {
  895                 pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
  896                 va += PAGE_SIZE;
  897                 m++;
  898         }
  899         pmap_invalidate_range(kernel_pmap, sva, va);
  900 }
  901 
  902 /*
  903  * This routine tears out page mappings from the
  904  * kernel -- it is meant only for temporary mappings.
  905  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  906  */
  907 void
  908 pmap_qremove(vm_offset_t sva, int count)
  909 {
  910         vm_offset_t va;
  911 
  912         va = sva;
  913         while (count-- > 0) {
  914                 pmap_kremove(va);
  915                 va += PAGE_SIZE;
  916         }
  917         pmap_invalidate_range(kernel_pmap, sva, va);
  918 }
  919 
  920 static vm_page_t
  921 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
  922 {
  923         vm_page_t m;
  924 
  925 retry:
  926         m = vm_page_lookup(object, pindex);
  927         if (m != NULL) {
  928                 vm_page_lock_queues();
  929                 if (vm_page_sleep_if_busy(m, FALSE, "pplookp"))
  930                         goto retry;
  931                 vm_page_unlock_queues();
  932         }
  933         return m;
  934 }
  935 
  936 #ifndef KSTACK_MAX_PAGES
  937 #define KSTACK_MAX_PAGES 32
  938 #endif
  939 
  940 /*
  941  * Create the kernel stack (including pcb for i386) for a new thread.
  942  * This routine directly affects the fork perf for a process and
  943  * create performance for a thread.
  944  */
  945 void
  946 pmap_new_thread(struct thread *td, int pages)
  947 {
  948         int i;
  949         vm_page_t ma[KSTACK_MAX_PAGES];
  950         vm_object_t ksobj;
  951         vm_page_t m;
  952         vm_offset_t ks;
  953 
  954         /* Bounds check */
  955         if (pages <= 1)
  956                 pages = KSTACK_PAGES;
  957         else if (pages > KSTACK_MAX_PAGES)
  958                 pages = KSTACK_MAX_PAGES;
  959 
  960         /*
  961          * allocate object for the kstack
  962          */
  963         ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
  964         td->td_kstack_obj = ksobj;
  965 
  966         /* get a kernel virtual address for the kstack for this thread */
  967 #ifdef KSTACK_GUARD
  968         ks = kmem_alloc_nofault(kernel_map, (pages + 1) * PAGE_SIZE);
  969         if (ks == 0)
  970                 panic("pmap_new_thread: kstack allocation failed");
  971         if (*vtopte(ks) != 0)
  972                 pmap_qremove(ks, 1);
  973         ks += PAGE_SIZE;
  974         td->td_kstack = ks;
  975 #else
  976         /* get a kernel virtual address for the kstack for this thread */
  977         ks = kmem_alloc_nofault(kernel_map, pages * PAGE_SIZE);
  978         if (ks == 0)
  979                 panic("pmap_new_thread: kstack allocation failed");
  980         td->td_kstack = ks;
  981 #endif
  982         /*
  983          * Knowing the number of pages allocated is useful when you
  984          * want to deallocate them.
  985          */
  986         td->td_kstack_pages = pages;
  987 
  988         /* 
  989          * For the length of the stack, link in a real page of ram for each
  990          * page of stack.
  991          */
  992         for (i = 0; i < pages; i++) {
  993                 /*
  994                  * Get a kernel stack page
  995                  */
  996                 m = vm_page_grab(ksobj, i,
  997                     VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
  998                 ma[i] = m;
  999 
 1000                 vm_page_wakeup(m);
 1001                 vm_page_flag_clear(m, PG_ZERO);
 1002                 m->valid = VM_PAGE_BITS_ALL;
 1003         }
 1004         pmap_qenter(ks, ma, pages);
 1005 }
 1006 
 1007 /*
 1008  * Dispose the kernel stack for a thread that has exited.
 1009  * This routine directly impacts the exit perf of a process and thread.
 1010  */
 1011 void
 1012 pmap_dispose_thread(td)
 1013         struct thread *td;
 1014 {
 1015         int i;
 1016         int pages;
 1017         vm_object_t ksobj;
 1018         vm_offset_t ks;
 1019         vm_page_t m;
 1020 
 1021         pages = td->td_kstack_pages;
 1022         ksobj = td->td_kstack_obj;
 1023         ks = td->td_kstack;
 1024         pmap_qremove(ks, pages);
 1025         for (i = 0; i < pages; i++) {
 1026                 m = vm_page_lookup(ksobj, i);
 1027                 if (m == NULL)
 1028                         panic("pmap_dispose_thread: kstack already missing?");
 1029                 vm_page_lock_queues();
 1030                 vm_page_busy(m);
 1031                 vm_page_unwire(m, 0);
 1032                 vm_page_free(m);
 1033                 vm_page_unlock_queues();
 1034         }
 1035         /*
 1036          * Free the space that this stack was mapped to in the kernel
 1037          * address map.
 1038          */
 1039 #ifdef KSTACK_GUARD
 1040         kmem_free(kernel_map, ks - PAGE_SIZE, (pages + 1) * PAGE_SIZE);
 1041 #else
 1042         kmem_free(kernel_map, ks, pages * PAGE_SIZE);
 1043 #endif
 1044         vm_object_deallocate(ksobj);
 1045 }
 1046 
 1047 /*
 1048  * Set up a variable sized alternate kstack.  Though it may look MI, it may
 1049  * need to be different on certain arches like ia64.
 1050  */
 1051 void
 1052 pmap_new_altkstack(struct thread *td, int pages)
 1053 {
 1054         /* shuffle the original stack */
 1055         td->td_altkstack_obj = td->td_kstack_obj;
 1056         td->td_altkstack = td->td_kstack;
 1057         td->td_altkstack_pages = td->td_kstack_pages;
 1058 
 1059         pmap_new_thread(td, pages);
 1060 }
 1061 
 1062 void
 1063 pmap_dispose_altkstack(td)
 1064         struct thread *td;
 1065 {
 1066         pmap_dispose_thread(td);
 1067 
 1068         /* restore the original kstack */
 1069         td->td_kstack = td->td_altkstack;
 1070         td->td_kstack_obj = td->td_altkstack_obj;
 1071         td->td_kstack_pages = td->td_altkstack_pages;
 1072         td->td_altkstack = 0;
 1073         td->td_altkstack_obj = NULL;
 1074         td->td_altkstack_pages = 0;
 1075 }
 1076 
 1077 /*
 1078  * Allow the Kernel stack for a thread to be prejudicially paged out.
 1079  */
 1080 void
 1081 pmap_swapout_thread(td)
 1082         struct thread *td;
 1083 {
 1084         int i;
 1085         int pages;
 1086         vm_object_t ksobj;
 1087         vm_offset_t ks;
 1088         vm_page_t m;
 1089 
 1090         pages = td->td_kstack_pages;
 1091         ksobj = td->td_kstack_obj;
 1092         ks = td->td_kstack;
 1093         pmap_qremove(ks, pages);
 1094         for (i = 0; i < pages; i++) {
 1095                 m = vm_page_lookup(ksobj, i);
 1096                 if (m == NULL)
 1097                         panic("pmap_swapout_thread: kstack already missing?");
 1098                 vm_page_lock_queues();
 1099                 vm_page_dirty(m);
 1100                 vm_page_unwire(m, 0);
 1101                 vm_page_unlock_queues();
 1102         }
 1103 }
 1104 
 1105 /*
 1106  * Bring the kernel stack for a specified thread back in.
 1107  */
 1108 void
 1109 pmap_swapin_thread(td)
 1110         struct thread *td;
 1111 {
 1112         int i, rv;
 1113         int pages;
 1114         vm_page_t ma[KSTACK_MAX_PAGES];
 1115         vm_object_t ksobj;
 1116         vm_offset_t ks;
 1117         vm_page_t m;
 1118 
 1119         pages = td->td_kstack_pages;
 1120         ksobj = td->td_kstack_obj;
 1121         ks = td->td_kstack;
 1122         for (i = 0; i < pages; i++) {
 1123                 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1124                 if (m->valid != VM_PAGE_BITS_ALL) {
 1125                         rv = vm_pager_get_pages(ksobj, &m, 1, 0);
 1126                         if (rv != VM_PAGER_OK)
 1127                                 panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid);
 1128                         m = vm_page_lookup(ksobj, i);
 1129                         m->valid = VM_PAGE_BITS_ALL;
 1130                 }
 1131                 ma[i] = m;
 1132                 vm_page_lock_queues();
 1133                 vm_page_wire(m);
 1134                 vm_page_wakeup(m);
 1135                 vm_page_unlock_queues();
 1136         }
 1137         pmap_qenter(ks, ma, pages);
 1138 }
 1139 
 1140 /***************************************************
 1141  * Page table page management routines.....
 1142  ***************************************************/
 1143 
 1144 /*
 1145  * This routine unholds page table pages, and if the hold count
 1146  * drops to zero, then it decrements the wire count.
 1147  */
 1148 static int 
 1149 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1150 {
 1151 
 1152         while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt"))
 1153                 vm_page_lock_queues();
 1154 
 1155         if (m->hold_count == 0) {
 1156                 vm_offset_t pteva;
 1157                 /*
 1158                  * unmap the page table page
 1159                  */
 1160                 pmap->pm_pdir[m->pindex] = 0;
 1161                 --pmap->pm_stats.resident_count;
 1162                 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) ==
 1163                     (PTDpde & PG_FRAME)) {
 1164                         /*
 1165                          * Do a invltlb to make the invalidated mapping
 1166                          * take effect immediately.
 1167                          */
 1168                         pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 1169                         pmap_invalidate_page(pmap, pteva);
 1170                 }
 1171 
 1172                 if (pmap->pm_ptphint == m)
 1173                         pmap->pm_ptphint = NULL;
 1174 
 1175                 /*
 1176                  * If the page is finally unwired, simply free it.
 1177                  */
 1178                 --m->wire_count;
 1179                 if (m->wire_count == 0) {
 1180                         vm_page_busy(m);
 1181                         vm_page_free_zero(m);
 1182                         --cnt.v_wire_count;
 1183                 }
 1184                 return 1;
 1185         }
 1186         return 0;
 1187 }
 1188 
 1189 static PMAP_INLINE int
 1190 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1191 {
 1192         vm_page_unhold(m);
 1193         if (m->hold_count == 0)
 1194                 return _pmap_unwire_pte_hold(pmap, m);
 1195         else
 1196                 return 0;
 1197 }
 1198 
 1199 /*
 1200  * After removing a page table entry, this routine is used to
 1201  * conditionally free the page, and manage the hold/wire counts.
 1202  */
 1203 static int
 1204 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 1205 {
 1206         unsigned ptepindex;
 1207         if (va >= VM_MAXUSER_ADDRESS)
 1208                 return 0;
 1209 
 1210         if (mpte == NULL) {
 1211                 ptepindex = (va >> PDRSHIFT);
 1212                 if (pmap->pm_ptphint &&
 1213                         (pmap->pm_ptphint->pindex == ptepindex)) {
 1214                         mpte = pmap->pm_ptphint;
 1215                 } else {
 1216                         while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL &&
 1217                                vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
 1218                                 vm_page_lock_queues();
 1219                         pmap->pm_ptphint = mpte;
 1220                 }
 1221         }
 1222 
 1223         return pmap_unwire_pte_hold(pmap, mpte);
 1224 }
 1225 
 1226 void
 1227 pmap_pinit0(pmap)
 1228         struct pmap *pmap;
 1229 {
 1230         pmap->pm_pdir =
 1231                 (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 1232         pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t)IdlePTD);
 1233 #ifndef I386_CPU
 1234         invlpg((vm_offset_t)pmap->pm_pdir);
 1235 #else
 1236         invltlb();
 1237 #endif
 1238         pmap->pm_ptphint = NULL;
 1239         pmap->pm_active = 0;
 1240         TAILQ_INIT(&pmap->pm_pvlist);
 1241         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1242         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1243 }
 1244 
 1245 /*
 1246  * Initialize a preallocated and zeroed pmap structure,
 1247  * such as one in a vmspace structure.
 1248  */
 1249 void
 1250 pmap_pinit(pmap)
 1251         register struct pmap *pmap;
 1252 {
 1253         vm_page_t ptdpg;
 1254 
 1255         /*
 1256          * No need to allocate page table space yet but we do need a valid
 1257          * page directory table.
 1258          */
 1259         if (pmap->pm_pdir == NULL)
 1260                 pmap->pm_pdir =
 1261                         (pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 1262 
 1263         /*
 1264          * allocate object for the ptes
 1265          */
 1266         if (pmap->pm_pteobj == NULL)
 1267                 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1);
 1268 
 1269         /*
 1270          * allocate the page directory page
 1271          */
 1272         ptdpg = vm_page_grab(pmap->pm_pteobj, PTDPTDI,
 1273             VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 1274         vm_page_flag_clear(ptdpg, PG_BUSY);
 1275         ptdpg->valid = VM_PAGE_BITS_ALL;
 1276 
 1277         pmap_qenter((vm_offset_t) pmap->pm_pdir, &ptdpg, 1);
 1278         if ((ptdpg->flags & PG_ZERO) == 0)
 1279                 bzero(pmap->pm_pdir, PAGE_SIZE);
 1280 
 1281         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1282         /* Wire in kernel global address entries. */
 1283         /* XXX copies current process, does not fill in MPPTDI */
 1284         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 1285 #ifdef SMP
 1286         pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1287 #endif
 1288 
 1289         /* install self-referential address mapping entry */
 1290         pmap->pm_pdir[PTDPTDI] =
 1291                 VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 1292 
 1293         pmap->pm_active = 0;
 1294         pmap->pm_ptphint = NULL;
 1295         TAILQ_INIT(&pmap->pm_pvlist);
 1296         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1297 }
 1298 
 1299 /*
 1300  * Wire in kernel global address entries.  To avoid a race condition
 1301  * between pmap initialization and pmap_growkernel, this procedure
 1302  * should be called after the vmspace is attached to the process
 1303  * but before this pmap is activated.
 1304  */
 1305 void
 1306 pmap_pinit2(pmap)
 1307         struct pmap *pmap;
 1308 {
 1309         /* XXX: Remove this stub when no longer called */
 1310 }
 1311 
 1312 static int
 1313 pmap_release_free_page(pmap_t pmap, vm_page_t p)
 1314 {
 1315         pd_entry_t *pde = pmap->pm_pdir;
 1316 
 1317         /*
 1318          * This code optimizes the case of freeing non-busy
 1319          * page-table pages.  Those pages are zero now, and
 1320          * might as well be placed directly into the zero queue.
 1321          */
 1322         vm_page_lock_queues();
 1323         if (vm_page_sleep_if_busy(p, FALSE, "pmaprl"))
 1324                 return (0);
 1325         vm_page_busy(p);
 1326 
 1327         /*
 1328          * Remove the page table page from the processes address space.
 1329          */
 1330         pde[p->pindex] = 0;
 1331         pmap->pm_stats.resident_count--;
 1332 
 1333         if (p->hold_count)  {
 1334                 panic("pmap_release: freeing held page table page");
 1335         }
 1336         /*
 1337          * Page directory pages need to have the kernel
 1338          * stuff cleared, so they can go into the zero queue also.
 1339          */
 1340         if (p->pindex == PTDPTDI) {
 1341                 bzero(pde + KPTDI, nkpt * PTESIZE);
 1342 #ifdef SMP
 1343                 pde[MPPTDI] = 0;
 1344 #endif
 1345                 pde[APTDPTDI] = 0;
 1346                 pmap_kremove((vm_offset_t) pmap->pm_pdir);
 1347         }
 1348 
 1349         if (pmap->pm_ptphint == p)
 1350                 pmap->pm_ptphint = NULL;
 1351 
 1352         p->wire_count--;
 1353         cnt.v_wire_count--;
 1354         vm_page_free_zero(p);
 1355         vm_page_unlock_queues();
 1356         return 1;
 1357 }
 1358 
 1359 /*
 1360  * this routine is called if the page table page is not
 1361  * mapped correctly.
 1362  */
 1363 static vm_page_t
 1364 _pmap_allocpte(pmap, ptepindex)
 1365         pmap_t  pmap;
 1366         unsigned ptepindex;
 1367 {
 1368         vm_offset_t pteva, ptepa;       /* XXXPA */
 1369         vm_page_t m;
 1370 
 1371         /*
 1372          * Find or fabricate a new pagetable page
 1373          */
 1374         m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 1375             VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 1376 
 1377         KASSERT(m->queue == PQ_NONE,
 1378                 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
 1379 
 1380         /*
 1381          * Increment the hold count for the page table page
 1382          * (denoting a new mapping.)
 1383          */
 1384         m->hold_count++;
 1385 
 1386         /*
 1387          * Map the pagetable page into the process address space, if
 1388          * it isn't already there.
 1389          */
 1390 
 1391         pmap->pm_stats.resident_count++;
 1392 
 1393         ptepa = VM_PAGE_TO_PHYS(m);
 1394         pmap->pm_pdir[ptepindex] =
 1395                 (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 1396 
 1397         /*
 1398          * Set the page table hint
 1399          */
 1400         pmap->pm_ptphint = m;
 1401 
 1402         /*
 1403          * Try to use the new mapping, but if we cannot, then
 1404          * do it with the routine that maps the page explicitly.
 1405          */
 1406         if ((m->flags & PG_ZERO) == 0) {
 1407                 if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) ==
 1408                     (PTDpde & PG_FRAME)) {
 1409                         pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex);
 1410                         bzero((caddr_t) pteva, PAGE_SIZE);
 1411                 } else {
 1412                         pmap_zero_page(m);
 1413                 }
 1414         }
 1415 
 1416         m->valid = VM_PAGE_BITS_ALL;
 1417         vm_page_flag_clear(m, PG_ZERO);
 1418         vm_page_wakeup(m);
 1419 
 1420         return m;
 1421 }
 1422 
 1423 static vm_page_t
 1424 pmap_allocpte(pmap_t pmap, vm_offset_t va)
 1425 {
 1426         unsigned ptepindex;
 1427         pd_entry_t ptepa;
 1428         vm_page_t m;
 1429 
 1430         /*
 1431          * Calculate pagetable page index
 1432          */
 1433         ptepindex = va >> PDRSHIFT;
 1434 
 1435         /*
 1436          * Get the page directory entry
 1437          */
 1438         ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 1439 
 1440         /*
 1441          * This supports switching from a 4MB page to a
 1442          * normal 4K page.
 1443          */
 1444         if (ptepa & PG_PS) {
 1445                 pmap->pm_pdir[ptepindex] = 0;
 1446                 ptepa = 0;
 1447                 pmap_invalidate_all(kernel_pmap);
 1448         }
 1449 
 1450         /*
 1451          * If the page table page is mapped, we just increment the
 1452          * hold count, and activate it.
 1453          */
 1454         if (ptepa) {
 1455                 /*
 1456                  * In order to get the page table page, try the
 1457                  * hint first.
 1458                  */
 1459                 if (pmap->pm_ptphint &&
 1460                         (pmap->pm_ptphint->pindex == ptepindex)) {
 1461                         m = pmap->pm_ptphint;
 1462                 } else {
 1463                         m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 1464                         pmap->pm_ptphint = m;
 1465                 }
 1466                 m->hold_count++;
 1467                 return m;
 1468         }
 1469         /*
 1470          * Here if the pte page isn't mapped, or if it has been deallocated.
 1471          */
 1472         return _pmap_allocpte(pmap, ptepindex);
 1473 }
 1474 
 1475 
 1476 /***************************************************
 1477 * Pmap allocation/deallocation routines.
 1478  ***************************************************/
 1479 
 1480 /*
 1481  * Release any resources held by the given physical map.
 1482  * Called when a pmap initialized by pmap_pinit is being released.
 1483  * Should only be called if the map contains no valid mappings.
 1484  */
 1485 void
 1486 pmap_release(pmap_t pmap)
 1487 {
 1488         vm_page_t p,n,ptdpg;
 1489         vm_object_t object = pmap->pm_pteobj;
 1490         int curgeneration;
 1491 
 1492 #if defined(DIAGNOSTIC)
 1493         if (object->ref_count != 1)
 1494                 panic("pmap_release: pteobj reference count != 1");
 1495 #endif
 1496         
 1497         ptdpg = NULL;
 1498         LIST_REMOVE(pmap, pm_list);
 1499 retry:
 1500         curgeneration = object->generation;
 1501         for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 1502                 n = TAILQ_NEXT(p, listq);
 1503                 if (p->pindex == PTDPTDI) {
 1504                         ptdpg = p;
 1505                         continue;
 1506                 }
 1507                 while (1) {
 1508                         if (!pmap_release_free_page(pmap, p) &&
 1509                                 (object->generation != curgeneration))
 1510                                 goto retry;
 1511                 }
 1512         }
 1513 
 1514         if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 1515                 goto retry;
 1516 }
 1517 
 1518 static int
 1519 kvm_size(SYSCTL_HANDLER_ARGS)
 1520 {
 1521         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1522 
 1523         return sysctl_handle_long(oidp, &ksize, 0, req);
 1524 }
 1525 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1526     0, 0, kvm_size, "IU", "Size of KVM");
 1527 
 1528 static int
 1529 kvm_free(SYSCTL_HANDLER_ARGS)
 1530 {
 1531         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1532 
 1533         return sysctl_handle_long(oidp, &kfree, 0, req);
 1534 }
 1535 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1536     0, 0, kvm_free, "IU", "Amount of KVM free");
 1537 
 1538 /*
 1539  * grow the number of kernel page table entries, if needed
 1540  */
 1541 void
 1542 pmap_growkernel(vm_offset_t addr)
 1543 {
 1544         struct pmap *pmap;
 1545         int s;
 1546         vm_offset_t ptppaddr;
 1547         vm_page_t nkpg;
 1548         pd_entry_t newpdir;
 1549 
 1550         s = splhigh();
 1551         if (kernel_vm_end == 0) {
 1552                 kernel_vm_end = KERNBASE;
 1553                 nkpt = 0;
 1554                 while (pdir_pde(PTD, kernel_vm_end)) {
 1555                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1556                         nkpt++;
 1557                 }
 1558         }
 1559         addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 1560         while (kernel_vm_end < addr) {
 1561                 if (pdir_pde(PTD, kernel_vm_end)) {
 1562                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1563                         continue;
 1564                 }
 1565 
 1566                 /*
 1567                  * This index is bogus, but out of the way
 1568                  */
 1569                 nkpg = vm_page_alloc(kptobj, nkpt,
 1570                                      VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 1571                 if (!nkpg)
 1572                         panic("pmap_growkernel: no memory to grow kernel");
 1573 
 1574                 nkpt++;
 1575 
 1576                 pmap_zero_page(nkpg);
 1577                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1578                 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 1579                 pdir_pde(PTD, kernel_vm_end) = newpdir;
 1580 
 1581                 LIST_FOREACH(pmap, &allpmaps, pm_list) {
 1582                         *pmap_pde(pmap, kernel_vm_end) = newpdir;
 1583                 }
 1584                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1585         }
 1586         splx(s);
 1587 }
 1588 
 1589 
 1590 /***************************************************
 1591  * page management routines.
 1592  ***************************************************/
 1593 
 1594 /*
 1595  * free the pv_entry back to the free list
 1596  */
 1597 static PMAP_INLINE void
 1598 free_pv_entry(pv_entry_t pv)
 1599 {
 1600         pv_entry_count--;
 1601         uma_zfree(pvzone, pv);
 1602 }
 1603 
 1604 /*
 1605  * get a new pv_entry, allocating a block from the system
 1606  * when needed.
 1607  * the memory allocation is performed bypassing the malloc code
 1608  * because of the possibility of allocations at interrupt time.
 1609  */
 1610 static pv_entry_t
 1611 get_pv_entry(void)
 1612 {
 1613         pv_entry_count++;
 1614         if (pv_entry_high_water &&
 1615                 (pv_entry_count > pv_entry_high_water) &&
 1616                 (pmap_pagedaemon_waken == 0)) {
 1617                 pmap_pagedaemon_waken = 1;
 1618                 wakeup (&vm_pages_needed);
 1619         }
 1620         return uma_zalloc(pvzone, M_NOWAIT);
 1621 }
 1622 
 1623 /*
 1624  * If it is the first entry on the list, it is actually
 1625  * in the header and we must copy the following entry up
 1626  * to the header.  Otherwise we must search the list for
 1627  * the entry.  In either case we free the now unused entry.
 1628  */
 1629 
 1630 static int
 1631 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 1632 {
 1633         pv_entry_t pv;
 1634         int rtval;
 1635         int s;
 1636 
 1637         s = splvm();
 1638         if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1639                 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1640                         if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1641                                 break;
 1642                 }
 1643         } else {
 1644                 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1645                         if (va == pv->pv_va) 
 1646                                 break;
 1647                 }
 1648         }
 1649 
 1650         rtval = 0;
 1651         if (pv) {
 1652                 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 1653                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1654                 m->md.pv_list_count--;
 1655                 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1656                         vm_page_flag_clear(m, PG_WRITEABLE);
 1657 
 1658                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1659                 free_pv_entry(pv);
 1660         }
 1661                         
 1662         splx(s);
 1663         return rtval;
 1664 }
 1665 
 1666 /*
 1667  * Create a pv entry for page at pa for
 1668  * (pmap, va).
 1669  */
 1670 static void
 1671 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 1672 {
 1673 
 1674         int s;
 1675         pv_entry_t pv;
 1676 
 1677         s = splvm();
 1678         pv = get_pv_entry();
 1679         pv->pv_va = va;
 1680         pv->pv_pmap = pmap;
 1681         pv->pv_ptem = mpte;
 1682 
 1683         TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1684         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1685         m->md.pv_list_count++;
 1686 
 1687         splx(s);
 1688 }
 1689 
 1690 /*
 1691  * pmap_remove_pte: do the things to unmap a page in a process
 1692  */
 1693 static int
 1694 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 1695 {
 1696         pt_entry_t oldpte;
 1697         vm_page_t m;
 1698 
 1699         oldpte = atomic_readandclear_int(ptq);
 1700         if (oldpte & PG_W)
 1701                 pmap->pm_stats.wired_count -= 1;
 1702         /*
 1703          * Machines that don't support invlpg, also don't support
 1704          * PG_G.
 1705          */
 1706         if (oldpte & PG_G)
 1707                 pmap_invalidate_page(kernel_pmap, va);
 1708         pmap->pm_stats.resident_count -= 1;
 1709         if (oldpte & PG_MANAGED) {
 1710                 m = PHYS_TO_VM_PAGE(oldpte);
 1711                 if (oldpte & PG_M) {
 1712 #if defined(PMAP_DIAGNOSTIC)
 1713                         if (pmap_nw_modified((pt_entry_t) oldpte)) {
 1714                                 printf(
 1715         "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1716                                     va, oldpte);
 1717                         }
 1718 #endif
 1719                         if (pmap_track_modified(va))
 1720                                 vm_page_dirty(m);
 1721                 }
 1722                 if (oldpte & PG_A)
 1723                         vm_page_flag_set(m, PG_REFERENCED);
 1724                 return pmap_remove_entry(pmap, m, va);
 1725         } else {
 1726                 return pmap_unuse_pt(pmap, va, NULL);
 1727         }
 1728 
 1729         return 0;
 1730 }
 1731 
 1732 /*
 1733  * Remove a single page from a process address space
 1734  */
 1735 static void
 1736 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 1737 {
 1738         register pt_entry_t *ptq;
 1739 
 1740         /*
 1741          * if there is no pte for this address, just skip it!!!
 1742          */
 1743         if (*pmap_pde(pmap, va) == 0) {
 1744                 return;
 1745         }
 1746 
 1747         /*
 1748          * get a local va for mappings for this pmap.
 1749          */
 1750         ptq = get_ptbase(pmap) + i386_btop(va);
 1751         if (*ptq) {
 1752                 (void) pmap_remove_pte(pmap, ptq, va);
 1753                 pmap_invalidate_page(pmap, va);
 1754         }
 1755         return;
 1756 }
 1757 
 1758 /*
 1759  *      Remove the given range of addresses from the specified map.
 1760  *
 1761  *      It is assumed that the start and end are properly
 1762  *      rounded to the page size.
 1763  */
 1764 void
 1765 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1766 {
 1767         register pt_entry_t *ptbase;
 1768         vm_offset_t pdnxt;
 1769         pd_entry_t ptpaddr;
 1770         vm_offset_t sindex, eindex;
 1771         int anyvalid;
 1772 
 1773         if (pmap == NULL)
 1774                 return;
 1775 
 1776         if (pmap->pm_stats.resident_count == 0)
 1777                 return;
 1778 
 1779         /*
 1780          * special handling of removing one page.  a very
 1781          * common operation and easy to short circuit some
 1782          * code.
 1783          */
 1784         if ((sva + PAGE_SIZE == eva) && 
 1785             ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 1786                 pmap_remove_page(pmap, sva);
 1787                 return;
 1788         }
 1789 
 1790         anyvalid = 0;
 1791 
 1792         /*
 1793          * Get a local virtual address for the mappings that are being
 1794          * worked with.
 1795          */
 1796         ptbase = get_ptbase(pmap);
 1797 
 1798         sindex = i386_btop(sva);
 1799         eindex = i386_btop(eva);
 1800 
 1801         for (; sindex < eindex; sindex = pdnxt) {
 1802                 unsigned pdirindex;
 1803 
 1804                 /*
 1805                  * Calculate index for next page table.
 1806                  */
 1807                 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 1808                 if (pmap->pm_stats.resident_count == 0)
 1809                         break;
 1810 
 1811                 pdirindex = sindex / NPDEPG;
 1812                 ptpaddr = pmap->pm_pdir[pdirindex];
 1813                 if ((ptpaddr & PG_PS) != 0) {
 1814                         pmap->pm_pdir[pdirindex] = 0;
 1815                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1816                         anyvalid++;
 1817                         continue;
 1818                 }
 1819 
 1820                 /*
 1821                  * Weed out invalid mappings. Note: we assume that the page
 1822                  * directory table is always allocated, and in kernel virtual.
 1823                  */
 1824                 if (ptpaddr == 0)
 1825                         continue;
 1826 
 1827                 /*
 1828                  * Limit our scan to either the end of the va represented
 1829                  * by the current page table page, or to the end of the
 1830                  * range being removed.
 1831                  */
 1832                 if (pdnxt > eindex) {
 1833                         pdnxt = eindex;
 1834                 }
 1835 
 1836                 for (; sindex != pdnxt; sindex++) {
 1837                         vm_offset_t va;
 1838                         if (ptbase[sindex] == 0) {
 1839                                 continue;
 1840                         }
 1841                         va = i386_ptob(sindex);
 1842                         
 1843                         anyvalid++;
 1844                         if (pmap_remove_pte(pmap,
 1845                                 ptbase + sindex, va))
 1846                                 break;
 1847                 }
 1848         }
 1849 
 1850         if (anyvalid)
 1851                 pmap_invalidate_all(pmap);
 1852 }
 1853 
 1854 /*
 1855  *      Routine:        pmap_remove_all
 1856  *      Function:
 1857  *              Removes this physical page from
 1858  *              all physical maps in which it resides.
 1859  *              Reflects back modify bits to the pager.
 1860  *
 1861  *      Notes:
 1862  *              Original versions of this routine were very
 1863  *              inefficient because they iteratively called
 1864  *              pmap_remove (slow...)
 1865  */
 1866 
 1867 void
 1868 pmap_remove_all(vm_page_t m)
 1869 {
 1870         register pv_entry_t pv;
 1871         pt_entry_t *pte, tpte;
 1872         int s;
 1873 
 1874 #if defined(PMAP_DIAGNOSTIC)
 1875         /*
 1876          * XXX This makes pmap_remove_all() illegal for non-managed pages!
 1877          */
 1878         if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 1879                 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
 1880                     VM_PAGE_TO_PHYS(m));
 1881         }
 1882 #endif
 1883         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 1884         s = splvm();
 1885         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1886                 pv->pv_pmap->pm_stats.resident_count--;
 1887                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1888                 tpte = atomic_readandclear_int(pte);
 1889                 if (tpte & PG_W)
 1890                         pv->pv_pmap->pm_stats.wired_count--;
 1891                 if (tpte & PG_A)
 1892                         vm_page_flag_set(m, PG_REFERENCED);
 1893 
 1894                 /*
 1895                  * Update the vm_page_t clean and reference bits.
 1896                  */
 1897                 if (tpte & PG_M) {
 1898 #if defined(PMAP_DIAGNOSTIC)
 1899                         if (pmap_nw_modified((pt_entry_t) tpte)) {
 1900                                 printf(
 1901         "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1902                                     pv->pv_va, tpte);
 1903                         }
 1904 #endif
 1905                         if (pmap_track_modified(pv->pv_va))
 1906                                 vm_page_dirty(m);
 1907                 }
 1908                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 1909                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1910                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1911                 m->md.pv_list_count--;
 1912                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 1913                 free_pv_entry(pv);
 1914         }
 1915         vm_page_flag_clear(m, PG_WRITEABLE);
 1916         splx(s);
 1917 }
 1918 
 1919 /*
 1920  *      Set the physical protection on the
 1921  *      specified range of this map as requested.
 1922  */
 1923 void
 1924 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1925 {
 1926         register pt_entry_t *ptbase;
 1927         vm_offset_t pdnxt;
 1928         pd_entry_t ptpaddr;
 1929         vm_offset_t sindex, eindex;
 1930         int anychanged;
 1931 
 1932         if (pmap == NULL)
 1933                 return;
 1934 
 1935         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1936                 pmap_remove(pmap, sva, eva);
 1937                 return;
 1938         }
 1939 
 1940         if (prot & VM_PROT_WRITE)
 1941                 return;
 1942 
 1943         anychanged = 0;
 1944 
 1945         ptbase = get_ptbase(pmap);
 1946 
 1947         sindex = i386_btop(sva);
 1948         eindex = i386_btop(eva);
 1949 
 1950         for (; sindex < eindex; sindex = pdnxt) {
 1951 
 1952                 unsigned pdirindex;
 1953 
 1954                 pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 1955 
 1956                 pdirindex = sindex / NPDEPG;
 1957                 ptpaddr = pmap->pm_pdir[pdirindex];
 1958                 if ((ptpaddr & PG_PS) != 0) {
 1959                         pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1960                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1961                         anychanged++;
 1962                         continue;
 1963                 }
 1964 
 1965                 /*
 1966                  * Weed out invalid mappings. Note: we assume that the page
 1967                  * directory table is always allocated, and in kernel virtual.
 1968                  */
 1969                 if (ptpaddr == 0)
 1970                         continue;
 1971 
 1972                 if (pdnxt > eindex) {
 1973                         pdnxt = eindex;
 1974                 }
 1975 
 1976                 for (; sindex != pdnxt; sindex++) {
 1977 
 1978                         pt_entry_t pbits;
 1979                         vm_page_t m;
 1980 
 1981                         pbits = ptbase[sindex];
 1982 
 1983                         if (pbits & PG_MANAGED) {
 1984                                 m = NULL;
 1985                                 if (pbits & PG_A) {
 1986                                         m = PHYS_TO_VM_PAGE(pbits);
 1987                                         vm_page_flag_set(m, PG_REFERENCED);
 1988                                         pbits &= ~PG_A;
 1989                                 }
 1990                                 if (pbits & PG_M) {
 1991                                         if (pmap_track_modified(i386_ptob(sindex))) {
 1992                                                 if (m == NULL)
 1993                                                         m = PHYS_TO_VM_PAGE(pbits);
 1994                                                 vm_page_dirty(m);
 1995                                                 pbits &= ~PG_M;
 1996                                         }
 1997                                 }
 1998                         }
 1999 
 2000                         pbits &= ~PG_RW;
 2001 
 2002                         if (pbits != ptbase[sindex]) {
 2003                                 ptbase[sindex] = pbits;
 2004                                 anychanged = 1;
 2005                         }
 2006                 }
 2007         }
 2008         if (anychanged)
 2009                 pmap_invalidate_all(pmap);
 2010 }
 2011 
 2012 /*
 2013  *      Insert the given physical page (p) at
 2014  *      the specified virtual address (v) in the
 2015  *      target physical map with the protection requested.
 2016  *
 2017  *      If specified, the page will be wired down, meaning
 2018  *      that the related pte can not be reclaimed.
 2019  *
 2020  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 2021  *      or lose information.  That is, this routine must actually
 2022  *      insert this page into the given map NOW.
 2023  */
 2024 void
 2025 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 2026            boolean_t wired)
 2027 {
 2028         vm_offset_t pa;
 2029         register pt_entry_t *pte;
 2030         vm_offset_t opa;
 2031         pt_entry_t origpte, newpte;
 2032         vm_page_t mpte;
 2033 
 2034         if (pmap == NULL)
 2035                 return;
 2036 
 2037         va &= PG_FRAME;
 2038 #ifdef PMAP_DIAGNOSTIC
 2039         if (va > VM_MAX_KERNEL_ADDRESS)
 2040                 panic("pmap_enter: toobig");
 2041         if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 2042                 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 2043 #endif
 2044 
 2045         mpte = NULL;
 2046         /*
 2047          * In the case that a page table page is not
 2048          * resident, we are creating it here.
 2049          */
 2050         if (va < VM_MAXUSER_ADDRESS) {
 2051                 mpte = pmap_allocpte(pmap, va);
 2052         }
 2053 #if 0 && defined(PMAP_DIAGNOSTIC)
 2054         else {
 2055                 pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 2056                 origpte = *pdeaddr;
 2057                 if ((origpte & PG_V) == 0) { 
 2058                         panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
 2059                                 pmap->pm_pdir[PTDPTDI], origpte, va);
 2060                 }
 2061         }
 2062 #endif
 2063 
 2064         pte = pmap_pte(pmap, va);
 2065 
 2066         /*
 2067          * Page Directory table entry not valid, we need a new PT page
 2068          */
 2069         if (pte == NULL) {
 2070                 panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
 2071                         (void *)pmap->pm_pdir[PTDPTDI], va);
 2072         }
 2073 
 2074         pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 2075         origpte = *(vm_offset_t *)pte;
 2076         opa = origpte & PG_FRAME;
 2077 
 2078         if (origpte & PG_PS)
 2079                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 2080 
 2081         /*
 2082          * Mapping has not changed, must be protection or wiring change.
 2083          */
 2084         if (origpte && (opa == pa)) {
 2085                 /*
 2086                  * Wiring change, just update stats. We don't worry about
 2087                  * wiring PT pages as they remain resident as long as there
 2088                  * are valid mappings in them. Hence, if a user page is wired,
 2089                  * the PT page will be also.
 2090                  */
 2091                 if (wired && ((origpte & PG_W) == 0))
 2092                         pmap->pm_stats.wired_count++;
 2093                 else if (!wired && (origpte & PG_W))
 2094                         pmap->pm_stats.wired_count--;
 2095 
 2096 #if defined(PMAP_DIAGNOSTIC)
 2097                 if (pmap_nw_modified((pt_entry_t) origpte)) {
 2098                         printf(
 2099         "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 2100                             va, origpte);
 2101                 }
 2102 #endif
 2103 
 2104                 /*
 2105                  * Remove extra pte reference
 2106                  */
 2107                 if (mpte)
 2108                         mpte->hold_count--;
 2109 
 2110                 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 2111                         if ((origpte & PG_RW) == 0) {
 2112                                 *pte |= PG_RW;
 2113                                 pmap_invalidate_page(pmap, va);
 2114                         }
 2115                         return;
 2116                 }
 2117 
 2118                 /*
 2119                  * We might be turning off write access to the page,
 2120                  * so we go ahead and sense modify status.
 2121                  */
 2122                 if (origpte & PG_MANAGED) {
 2123                         if ((origpte & PG_M) && pmap_track_modified(va)) {
 2124                                 vm_page_t om;
 2125                                 om = PHYS_TO_VM_PAGE(opa);
 2126                                 vm_page_dirty(om);
 2127                         }
 2128                         pa |= PG_MANAGED;
 2129                 }
 2130                 goto validate;
 2131         } 
 2132         /*
 2133          * Mapping has changed, invalidate old range and fall through to
 2134          * handle validating new mapping.
 2135          */
 2136         if (opa) {
 2137                 int err;
 2138                 vm_page_lock_queues();
 2139                 err = pmap_remove_pte(pmap, pte, va);
 2140                 vm_page_unlock_queues();
 2141                 if (err)
 2142                         panic("pmap_enter: pte vanished, va: 0x%x", va);
 2143         }
 2144 
 2145         /*
 2146          * Enter on the PV list if part of our managed memory. Note that we
 2147          * raise IPL while manipulating pv_table since pmap_enter can be
 2148          * called at interrupt time.
 2149          */
 2150         if (pmap_initialized && 
 2151             (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 2152                 pmap_insert_entry(pmap, va, mpte, m);
 2153                 pa |= PG_MANAGED;
 2154         }
 2155 
 2156         /*
 2157          * Increment counters
 2158          */
 2159         pmap->pm_stats.resident_count++;
 2160         if (wired)
 2161                 pmap->pm_stats.wired_count++;
 2162 
 2163 validate:
 2164         /*
 2165          * Now validate mapping with desired protection/wiring.
 2166          */
 2167         newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 2168 
 2169         if (wired)
 2170                 newpte |= PG_W;
 2171         if (va < VM_MAXUSER_ADDRESS)
 2172                 newpte |= PG_U;
 2173         if (pmap == kernel_pmap)
 2174                 newpte |= pgeflag;
 2175 
 2176         /*
 2177          * if the mapping or permission bits are different, we need
 2178          * to update the pte.
 2179          */
 2180         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2181                 *pte = newpte | PG_A;
 2182                 /*if (origpte)*/ {
 2183                         pmap_invalidate_page(pmap, va);
 2184                 }
 2185         }
 2186 }
 2187 
 2188 /*
 2189  * this code makes some *MAJOR* assumptions:
 2190  * 1. Current pmap & pmap exists.
 2191  * 2. Not wired.
 2192  * 3. Read access.
 2193  * 4. No page table pages.
 2194  * 5. Tlbflush is deferred to calling procedure.
 2195  * 6. Page IS managed.
 2196  * but is *MUCH* faster than pmap_enter...
 2197  */
 2198 
 2199 static vm_page_t
 2200 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2201 {
 2202         pt_entry_t *pte;
 2203         vm_offset_t pa;
 2204 
 2205         /*
 2206          * In the case that a page table page is not
 2207          * resident, we are creating it here.
 2208          */
 2209         if (va < VM_MAXUSER_ADDRESS) {
 2210                 unsigned ptepindex;
 2211                 pd_entry_t ptepa;
 2212 
 2213                 /*
 2214                  * Calculate pagetable page index
 2215                  */
 2216                 ptepindex = va >> PDRSHIFT;
 2217                 if (mpte && (mpte->pindex == ptepindex)) {
 2218                         mpte->hold_count++;
 2219                 } else {
 2220 retry:
 2221                         /*
 2222                          * Get the page directory entry
 2223                          */
 2224                         ptepa = pmap->pm_pdir[ptepindex];
 2225 
 2226                         /*
 2227                          * If the page table page is mapped, we just increment
 2228                          * the hold count, and activate it.
 2229                          */
 2230                         if (ptepa) {
 2231                                 if (ptepa & PG_PS)
 2232                                         panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2233                                 if (pmap->pm_ptphint &&
 2234                                         (pmap->pm_ptphint->pindex == ptepindex)) {
 2235                                         mpte = pmap->pm_ptphint;
 2236                                 } else {
 2237                                         mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 2238                                         pmap->pm_ptphint = mpte;
 2239                                 }
 2240                                 if (mpte == NULL)
 2241                                         goto retry;
 2242                                 mpte->hold_count++;
 2243                         } else {
 2244                                 mpte = _pmap_allocpte(pmap, ptepindex);
 2245                         }
 2246                 }
 2247         } else {
 2248                 mpte = NULL;
 2249         }
 2250 
 2251         /*
 2252          * This call to vtopte makes the assumption that we are
 2253          * entering the page into the current pmap.  In order to support
 2254          * quick entry into any pmap, one would likely use pmap_pte_quick.
 2255          * But that isn't as quick as vtopte.
 2256          */
 2257         pte = vtopte(va);
 2258         if (*pte) {
 2259                 if (mpte != NULL) {
 2260                         vm_page_lock_queues();
 2261                         pmap_unwire_pte_hold(pmap, mpte);
 2262                         vm_page_unlock_queues();
 2263                 }
 2264                 return 0;
 2265         }
 2266 
 2267         /*
 2268          * Enter on the PV list if part of our managed memory. Note that we
 2269          * raise IPL while manipulating pv_table since pmap_enter can be
 2270          * called at interrupt time.
 2271          */
 2272         if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2273                 pmap_insert_entry(pmap, va, mpte, m);
 2274 
 2275         /*
 2276          * Increment counters
 2277          */
 2278         pmap->pm_stats.resident_count++;
 2279 
 2280         pa = VM_PAGE_TO_PHYS(m);
 2281 
 2282         /*
 2283          * Now validate mapping with RO protection
 2284          */
 2285         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2286                 *pte = pa | PG_V | PG_U;
 2287         else
 2288                 *pte = pa | PG_V | PG_U | PG_MANAGED;
 2289 
 2290         return mpte;
 2291 }
 2292 
 2293 /*
 2294  * Make a temporary mapping for a physical address.  This is only intended
 2295  * to be used for panic dumps.
 2296  */
 2297 void *
 2298 pmap_kenter_temporary(vm_offset_t pa, int i)
 2299 {
 2300         vm_offset_t va;
 2301 
 2302         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2303         pmap_kenter(va, pa);
 2304 #ifndef I386_CPU
 2305         invlpg(va);
 2306 #else
 2307         invltlb();
 2308 #endif
 2309         return ((void *)crashdumpmap);
 2310 }
 2311 
 2312 #define MAX_INIT_PT (96)
 2313 /*
 2314  * pmap_object_init_pt preloads the ptes for a given object
 2315  * into the specified pmap.  This eliminates the blast of soft
 2316  * faults on process startup and immediately after an mmap.
 2317  */
 2318 void
 2319 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 2320                     vm_object_t object, vm_pindex_t pindex,
 2321                     vm_size_t size, int limit)
 2322 {
 2323         vm_offset_t tmpidx;
 2324         int psize;
 2325         vm_page_t p, mpte;
 2326 
 2327         if (pmap == NULL || object == NULL)
 2328                 return;
 2329 
 2330         /*
 2331          * This code maps large physical mmap regions into the
 2332          * processor address space.  Note that some shortcuts
 2333          * are taken, but the code works.
 2334          */
 2335         if (pseflag && (object->type == OBJT_DEVICE) &&
 2336             ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 2337                 int i;
 2338                 vm_page_t m[1];
 2339                 unsigned int ptepindex;
 2340                 int npdes;
 2341                 pd_entry_t ptepa;
 2342 
 2343                 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 2344                         return;
 2345 
 2346 retry:
 2347                 p = vm_page_lookup(object, pindex);
 2348                 if (p != NULL) {
 2349                         vm_page_lock_queues();
 2350                         if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 2351                                 goto retry;
 2352                         vm_page_unlock_queues();
 2353                 } else {
 2354                         p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2355                         if (p == NULL)
 2356                                 return;
 2357                         m[0] = p;
 2358 
 2359                         if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2360                                 vm_page_lock_queues();
 2361                                 vm_page_free(p);
 2362                                 vm_page_unlock_queues();
 2363                                 return;
 2364                         }
 2365 
 2366                         p = vm_page_lookup(object, pindex);
 2367                         vm_page_wakeup(p);
 2368                 }
 2369 
 2370                 ptepa = VM_PAGE_TO_PHYS(p);
 2371                 if (ptepa & (NBPDR - 1)) {
 2372                         return;
 2373                 }
 2374 
 2375                 p->valid = VM_PAGE_BITS_ALL;
 2376 
 2377                 pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2378                 npdes = size >> PDRSHIFT;
 2379                 for(i = 0; i < npdes; i++) {
 2380                         pmap->pm_pdir[ptepindex] =
 2381                             ptepa | PG_U | PG_RW | PG_V | PG_PS;
 2382                         ptepa += NBPDR;
 2383                         ptepindex += 1;
 2384                 }
 2385                 pmap_invalidate_all(kernel_pmap);
 2386                 return;
 2387         }
 2388 
 2389         psize = i386_btop(size);
 2390 
 2391         if ((object->type != OBJT_VNODE) ||
 2392             ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
 2393              (object->resident_page_count > MAX_INIT_PT))) {
 2394                 return;
 2395         }
 2396 
 2397         if (psize + pindex > object->size) {
 2398                 if (object->size < pindex)
 2399                         return;
 2400                 psize = object->size - pindex;
 2401         }
 2402 
 2403         mpte = NULL;
 2404 
 2405         if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
 2406                 if (p->pindex < pindex) {
 2407                         p = vm_page_splay(pindex, object->root);
 2408                         if ((object->root = p)->pindex < pindex)
 2409                                 p = TAILQ_NEXT(p, listq);
 2410                 }
 2411         }
 2412         /*
 2413          * Assert: the variable p is either (1) the page with the
 2414          * least pindex greater than or equal to the parameter pindex
 2415          * or (2) NULL.
 2416          */
 2417         for (;
 2418              p != NULL && (tmpidx = p->pindex - pindex) < psize;
 2419              p = TAILQ_NEXT(p, listq)) {
 2420                 /*
 2421                  * don't allow an madvise to blow away our really
 2422                  * free pages allocating pv entries.
 2423                  */
 2424                 if ((limit & MAP_PREFAULT_MADVISE) &&
 2425                     cnt.v_free_count < cnt.v_free_reserved) {
 2426                         break;
 2427                 }
 2428                 vm_page_lock_queues();
 2429                 if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
 2430                     (p->busy == 0) &&
 2431                     (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2432                         if ((p->queue - p->pc) == PQ_CACHE)
 2433                                 vm_page_deactivate(p);
 2434                         vm_page_busy(p);
 2435                         vm_page_unlock_queues();
 2436                         mpte = pmap_enter_quick(pmap, 
 2437                                 addr + i386_ptob(tmpidx), p, mpte);
 2438                         vm_page_lock_queues();
 2439                         vm_page_wakeup(p);
 2440                 }
 2441                 vm_page_unlock_queues();
 2442         }
 2443         return;
 2444 }
 2445 
 2446 /*
 2447  * pmap_prefault provides a quick way of clustering
 2448  * pagefaults into a processes address space.  It is a "cousin"
 2449  * of pmap_object_init_pt, except it runs at page fault time instead
 2450  * of mmap time.
 2451  */
 2452 #define PFBAK 4
 2453 #define PFFOR 4
 2454 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 2455 
 2456 static int pmap_prefault_pageorder[] = {
 2457         -1 * PAGE_SIZE, 1 * PAGE_SIZE,
 2458         -2 * PAGE_SIZE, 2 * PAGE_SIZE,
 2459         -3 * PAGE_SIZE, 3 * PAGE_SIZE,
 2460         -4 * PAGE_SIZE, 4 * PAGE_SIZE
 2461 };
 2462 
 2463 void
 2464 pmap_prefault(pmap, addra, entry)
 2465         pmap_t pmap;
 2466         vm_offset_t addra;
 2467         vm_map_entry_t entry;
 2468 {
 2469         int i;
 2470         vm_offset_t starta;
 2471         vm_offset_t addr;
 2472         vm_pindex_t pindex;
 2473         vm_page_t m, mpte;
 2474         vm_object_t object;
 2475 
 2476         if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
 2477                 return;
 2478 
 2479         object = entry->object.vm_object;
 2480 
 2481         starta = addra - PFBAK * PAGE_SIZE;
 2482         if (starta < entry->start) {
 2483                 starta = entry->start;
 2484         } else if (starta > addra) {
 2485                 starta = 0;
 2486         }
 2487 
 2488         mpte = NULL;
 2489         for (i = 0; i < PAGEORDER_SIZE; i++) {
 2490                 vm_object_t lobject;
 2491                 pt_entry_t *pte;
 2492 
 2493                 addr = addra + pmap_prefault_pageorder[i];
 2494                 if (addr > addra + (PFFOR * PAGE_SIZE))
 2495                         addr = 0;
 2496 
 2497                 if (addr < starta || addr >= entry->end)
 2498                         continue;
 2499 
 2500                 if ((*pmap_pde(pmap, addr)) == 0) 
 2501                         continue;
 2502 
 2503                 pte = vtopte(addr);
 2504                 if (*pte)
 2505                         continue;
 2506 
 2507                 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 2508                 lobject = object;
 2509                 for (m = vm_page_lookup(lobject, pindex);
 2510                     (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 2511                     lobject = lobject->backing_object) {
 2512                         if (lobject->backing_object_offset & PAGE_MASK)
 2513                                 break;
 2514                         pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 2515                         m = vm_page_lookup(lobject->backing_object, pindex);
 2516                 }
 2517 
 2518                 /*
 2519                  * give-up when a page is not in memory
 2520                  */
 2521                 if (m == NULL)
 2522                         break;
 2523                 vm_page_lock_queues();
 2524                 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 2525                         (m->busy == 0) &&
 2526                     (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2527 
 2528                         if ((m->queue - m->pc) == PQ_CACHE) {
 2529                                 vm_page_deactivate(m);
 2530                         }
 2531                         vm_page_busy(m);
 2532                         vm_page_unlock_queues();
 2533                         mpte = pmap_enter_quick(pmap, addr, m, mpte);
 2534                         vm_page_lock_queues();
 2535                         vm_page_wakeup(m);
 2536                 }
 2537                 vm_page_unlock_queues();
 2538         }
 2539 }
 2540 
 2541 /*
 2542  *      Routine:        pmap_change_wiring
 2543  *      Function:       Change the wiring attribute for a map/virtual-address
 2544  *                      pair.
 2545  *      In/out conditions:
 2546  *                      The mapping must already exist in the pmap.
 2547  */
 2548 void
 2549 pmap_change_wiring(pmap, va, wired)
 2550         register pmap_t pmap;
 2551         vm_offset_t va;
 2552         boolean_t wired;
 2553 {
 2554         register pt_entry_t *pte;
 2555 
 2556         if (pmap == NULL)
 2557                 return;
 2558 
 2559         pte = pmap_pte(pmap, va);
 2560 
 2561         if (wired && !pmap_pte_w(pte))
 2562                 pmap->pm_stats.wired_count++;
 2563         else if (!wired && pmap_pte_w(pte))
 2564                 pmap->pm_stats.wired_count--;
 2565 
 2566         /*
 2567          * Wiring is not a hardware characteristic so there is no need to
 2568          * invalidate TLB.
 2569          */
 2570         pmap_pte_set_w(pte, wired);
 2571 }
 2572 
 2573 
 2574 
 2575 /*
 2576  *      Copy the range specified by src_addr/len
 2577  *      from the source map to the range dst_addr/len
 2578  *      in the destination map.
 2579  *
 2580  *      This routine is only advisory and need not do anything.
 2581  */
 2582 
 2583 void
 2584 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 2585           vm_offset_t src_addr)
 2586 {
 2587         vm_offset_t addr;
 2588         vm_offset_t end_addr = src_addr + len;
 2589         vm_offset_t pdnxt;
 2590         pd_entry_t src_frame, dst_frame;
 2591         vm_page_t m;
 2592 
 2593         if (dst_addr != src_addr)
 2594                 return;
 2595 
 2596         src_frame = src_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 2597         if (src_frame != (PTDpde & PG_FRAME))
 2598                 return;
 2599 
 2600         dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 2601         for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 2602                 pt_entry_t *src_pte, *dst_pte;
 2603                 vm_page_t dstmpte, srcmpte;
 2604                 pd_entry_t srcptepaddr;
 2605                 unsigned ptepindex;
 2606 
 2607                 if (addr >= UPT_MIN_ADDRESS)
 2608                         panic("pmap_copy: invalid to pmap_copy page tables\n");
 2609 
 2610                 /*
 2611                  * Don't let optional prefaulting of pages make us go
 2612                  * way below the low water mark of free pages or way
 2613                  * above high water mark of used pv entries.
 2614                  */
 2615                 if (cnt.v_free_count < cnt.v_free_reserved ||
 2616                     pv_entry_count > pv_entry_high_water)
 2617                         break;
 2618                 
 2619                 pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 2620                 ptepindex = addr >> PDRSHIFT;
 2621 
 2622                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 2623                 if (srcptepaddr == 0)
 2624                         continue;
 2625                         
 2626                 if (srcptepaddr & PG_PS) {
 2627                         if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2628                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 2629                                 dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 2630                         }
 2631                         continue;
 2632                 }
 2633 
 2634                 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 2635                 if ((srcmpte == NULL) ||
 2636                     (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 2637                         continue;
 2638 
 2639                 if (pdnxt > end_addr)
 2640                         pdnxt = end_addr;
 2641 
 2642                 /*
 2643                  * Have to recheck this before every avtopte() call below
 2644                  * in case we have blocked and something else used APTDpde.
 2645                  */
 2646                 if (dst_frame != (APTDpde & PG_FRAME)) {
 2647                         APTDpde = dst_frame | PG_RW | PG_V;
 2648                         pmap_invalidate_all(kernel_pmap); /* XXX Bandaid */
 2649                 }
 2650                 src_pte = vtopte(addr);
 2651                 dst_pte = avtopte(addr);
 2652                 while (addr < pdnxt) {
 2653                         pt_entry_t ptetemp;
 2654                         ptetemp = *src_pte;
 2655                         /*
 2656                          * we only virtual copy managed pages
 2657                          */
 2658                         if ((ptetemp & PG_MANAGED) != 0) {
 2659                                 /*
 2660                                  * We have to check after allocpte for the
 2661                                  * pte still being around...  allocpte can
 2662                                  * block.
 2663                                  */
 2664                                 dstmpte = pmap_allocpte(dst_pmap, addr);
 2665                                 if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 2666                                         /*
 2667                                          * Clear the modified and
 2668                                          * accessed (referenced) bits
 2669                                          * during the copy.
 2670                                          */
 2671                                         m = PHYS_TO_VM_PAGE(ptetemp);
 2672                                         *dst_pte = ptetemp & ~(PG_M | PG_A);
 2673                                         dst_pmap->pm_stats.resident_count++;
 2674                                         pmap_insert_entry(dst_pmap, addr,
 2675                                                 dstmpte, m);
 2676                                 } else {
 2677                                         vm_page_lock_queues();
 2678                                         pmap_unwire_pte_hold(dst_pmap, dstmpte);
 2679                                         vm_page_unlock_queues();
 2680                                 }
 2681                                 if (dstmpte->hold_count >= srcmpte->hold_count)
 2682                                         break;
 2683                         }
 2684                         addr += PAGE_SIZE;
 2685                         src_pte++;
 2686                         dst_pte++;
 2687                 }
 2688         }
 2689 }       
 2690 
 2691 #ifdef SMP
 2692 
 2693 /*
 2694  *      pmap_zpi_switchin*()
 2695  *
 2696  *      These functions allow us to avoid doing IPIs alltogether in certain
 2697  *      temporary page-mapping situations (page zeroing).  Instead to deal
 2698  *      with being preempted and moved onto a different cpu we invalidate
 2699  *      the page when the scheduler switches us in.  This does not occur
 2700  *      very often so we remain relatively optimal with very little effort.
 2701  */
 2702 static void
 2703 pmap_zpi_switchin12(void)
 2704 {
 2705         invlpg((u_int)CADDR1);
 2706         invlpg((u_int)CADDR2);
 2707 }
 2708 
 2709 static void
 2710 pmap_zpi_switchin2(void)
 2711 {
 2712         invlpg((u_int)CADDR2);
 2713 }
 2714 
 2715 static void
 2716 pmap_zpi_switchin3(void)
 2717 {
 2718         invlpg((u_int)CADDR3);
 2719 }
 2720 
 2721 #endif
 2722 
 2723 /*
 2724  *      pmap_zero_page zeros the specified hardware page by mapping 
 2725  *      the page into KVM and using bzero to clear its contents.
 2726  */
 2727 void
 2728 pmap_zero_page(vm_page_t m)
 2729 {
 2730         vm_offset_t phys;
 2731 
 2732         phys = VM_PAGE_TO_PHYS(m);
 2733         if (*CMAP2)
 2734                 panic("pmap_zero_page: CMAP2 busy");
 2735         *CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
 2736 #ifdef I386_CPU
 2737         invltlb();
 2738 #else
 2739 #ifdef SMP
 2740         curthread->td_switchin = pmap_zpi_switchin2;
 2741 #endif
 2742         invlpg((u_int)CADDR2);
 2743 #endif
 2744 #if defined(I686_CPU)
 2745         if (cpu_class == CPUCLASS_686)
 2746                 i686_pagezero(CADDR2);
 2747         else
 2748 #endif
 2749                 bzero(CADDR2, PAGE_SIZE);
 2750 #ifdef SMP
 2751         curthread->td_switchin = NULL;
 2752 #endif
 2753         *CMAP2 = 0;
 2754 }
 2755 
 2756 /*
 2757  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 2758  *      the page into KVM and using bzero to clear its contents.
 2759  *
 2760  *      off and size may not cover an area beyond a single hardware page.
 2761  */
 2762 void
 2763 pmap_zero_page_area(vm_page_t m, int off, int size)
 2764 {
 2765         vm_offset_t phys;
 2766 
 2767         phys = VM_PAGE_TO_PHYS(m);
 2768         if (*CMAP2)
 2769                 panic("pmap_zero_page: CMAP2 busy");
 2770         *CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
 2771 #ifdef I386_CPU
 2772         invltlb();
 2773 #else
 2774 #ifdef SMP
 2775         curthread->td_switchin = pmap_zpi_switchin2;
 2776 #endif
 2777         invlpg((u_int)CADDR2);
 2778 #endif
 2779 #if defined(I686_CPU)
 2780         if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 2781                 i686_pagezero(CADDR2);
 2782         else
 2783 #endif
 2784                 bzero((char *)CADDR2 + off, size);
 2785 #ifdef SMP
 2786         curthread->td_switchin = NULL;
 2787 #endif
 2788         *CMAP2 = 0;
 2789 }
 2790 
 2791 /*
 2792  *      pmap_zero_page_idle zeros the specified hardware page by mapping 
 2793  *      the page into KVM and using bzero to clear its contents.  This
 2794  *      is intended to be called from the vm_pagezero process only and
 2795  *      outside of Giant.
 2796  */
 2797 void
 2798 pmap_zero_page_idle(vm_page_t m)
 2799 {
 2800         vm_offset_t phys;
 2801 
 2802         phys = VM_PAGE_TO_PHYS(m);
 2803         if (*CMAP3)
 2804                 panic("pmap_zero_page: CMAP3 busy");
 2805         *CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
 2806 #ifdef I386_CPU
 2807         invltlb();
 2808 #else
 2809 #ifdef SMP
 2810         curthread->td_switchin = pmap_zpi_switchin3;
 2811 #endif
 2812         invlpg((u_int)CADDR3);
 2813 #endif
 2814 #if defined(I686_CPU)
 2815         if (cpu_class == CPUCLASS_686)
 2816                 i686_pagezero(CADDR3);
 2817         else
 2818 #endif
 2819                 bzero(CADDR3, PAGE_SIZE);
 2820 #ifdef SMP
 2821         curthread->td_switchin = NULL;
 2822 #endif
 2823         *CMAP3 = 0;
 2824 }
 2825 
 2826 /*
 2827  *      pmap_copy_page copies the specified (machine independent)
 2828  *      page by mapping the page into virtual memory and using
 2829  *      bcopy to copy the page, one machine dependent page at a
 2830  *      time.
 2831  */
 2832 void
 2833 pmap_copy_page(vm_page_t src, vm_page_t dst)
 2834 {
 2835 
 2836         if (*CMAP1)
 2837                 panic("pmap_copy_page: CMAP1 busy");
 2838         if (*CMAP2)
 2839                 panic("pmap_copy_page: CMAP2 busy");
 2840         *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
 2841         *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
 2842 #ifdef I386_CPU
 2843         invltlb();
 2844 #else
 2845 #ifdef SMP
 2846         curthread->td_switchin = pmap_zpi_switchin12;
 2847 #endif
 2848         invlpg((u_int)CADDR1);
 2849         invlpg((u_int)CADDR2);
 2850 #endif
 2851         bcopy(CADDR1, CADDR2, PAGE_SIZE);
 2852 #ifdef SMP
 2853         curthread->td_switchin = NULL;
 2854 #endif
 2855         *CMAP1 = 0;
 2856         *CMAP2 = 0;
 2857 }
 2858 
 2859 /*
 2860  * Returns true if the pmap's pv is one of the first
 2861  * 16 pvs linked to from this page.  This count may
 2862  * be changed upwards or downwards in the future; it
 2863  * is only necessary that true be returned for a small
 2864  * subset of pmaps for proper page aging.
 2865  */
 2866 boolean_t
 2867 pmap_page_exists_quick(pmap, m)
 2868         pmap_t pmap;
 2869         vm_page_t m;
 2870 {
 2871         pv_entry_t pv;
 2872         int loops = 0;
 2873         int s;
 2874 
 2875         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2876                 return FALSE;
 2877 
 2878         s = splvm();
 2879         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2880         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2881                 if (pv->pv_pmap == pmap) {
 2882                         splx(s);
 2883                         return TRUE;
 2884                 }
 2885                 loops++;
 2886                 if (loops >= 16)
 2887                         break;
 2888         }
 2889         splx(s);
 2890         return (FALSE);
 2891 }
 2892 
 2893 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2894 /*
 2895  * Remove all pages from specified address space
 2896  * this aids process exit speeds.  Also, this code
 2897  * is special cased for current process only, but
 2898  * can have the more generic (and slightly slower)
 2899  * mode enabled.  This is much faster than pmap_remove
 2900  * in the case of running down an entire address space.
 2901  */
 2902 void
 2903 pmap_remove_pages(pmap, sva, eva)
 2904         pmap_t pmap;
 2905         vm_offset_t sva, eva;
 2906 {
 2907         pt_entry_t *pte, tpte;
 2908         vm_page_t m;
 2909         pv_entry_t pv, npv;
 2910         int s;
 2911 
 2912 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2913         if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
 2914                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 2915                 return;
 2916         }
 2917 #endif
 2918         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 2919         s = splvm();
 2920         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2921 
 2922                 if (pv->pv_va >= eva || pv->pv_va < sva) {
 2923                         npv = TAILQ_NEXT(pv, pv_plist);
 2924                         continue;
 2925                 }
 2926 
 2927 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2928                 pte = vtopte(pv->pv_va);
 2929 #else
 2930                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2931 #endif
 2932                 tpte = *pte;
 2933 
 2934                 if (tpte == 0) {
 2935                         printf("TPTE at %p  IS ZERO @ VA %08x\n",
 2936                                                         pte, pv->pv_va);
 2937                         panic("bad pte");
 2938                 }
 2939 
 2940 /*
 2941  * We cannot remove wired pages from a process' mapping at this time
 2942  */
 2943                 if (tpte & PG_W) {
 2944                         npv = TAILQ_NEXT(pv, pv_plist);
 2945                         continue;
 2946                 }
 2947 
 2948                 m = PHYS_TO_VM_PAGE(tpte);
 2949                 KASSERT(m->phys_addr == (tpte & PG_FRAME),
 2950                     ("vm_page_t %p phys_addr mismatch %08x %08x",
 2951                     m, m->phys_addr, tpte));
 2952 
 2953                 KASSERT(m < &vm_page_array[vm_page_array_size],
 2954                         ("pmap_remove_pages: bad tpte %x", tpte));
 2955 
 2956                 pv->pv_pmap->pm_stats.resident_count--;
 2957 
 2958                 *pte = 0;
 2959 
 2960                 /*
 2961                  * Update the vm_page_t clean and reference bits.
 2962                  */
 2963                 if (tpte & PG_M) {
 2964                         vm_page_dirty(m);
 2965                 }
 2966 
 2967                 npv = TAILQ_NEXT(pv, pv_plist);
 2968                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 2969 
 2970                 m->md.pv_list_count--;
 2971                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2972                 if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 2973                         vm_page_flag_clear(m, PG_WRITEABLE);
 2974                 }
 2975 
 2976                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 2977                 free_pv_entry(pv);
 2978         }
 2979         splx(s);
 2980         pmap_invalidate_all(pmap);
 2981 }
 2982 
 2983 /*
 2984  * pmap_testbit tests bits in pte's
 2985  * note that the testbit/changebit routines are inline,
 2986  * and a lot of things compile-time evaluate.
 2987  */
 2988 static boolean_t
 2989 pmap_testbit(m, bit)
 2990         vm_page_t m;
 2991         int bit;
 2992 {
 2993         pv_entry_t pv;
 2994         pt_entry_t *pte;
 2995         int s;
 2996 
 2997         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2998                 return FALSE;
 2999 
 3000         if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 3001                 return FALSE;
 3002 
 3003         s = splvm();
 3004 
 3005         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3006                 /*
 3007                  * if the bit being tested is the modified bit, then
 3008                  * mark clean_map and ptes as never
 3009                  * modified.
 3010                  */
 3011                 if (bit & (PG_A|PG_M)) {
 3012                         if (!pmap_track_modified(pv->pv_va))
 3013                                 continue;
 3014                 }
 3015 
 3016 #if defined(PMAP_DIAGNOSTIC)
 3017                 if (!pv->pv_pmap) {
 3018                         printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 3019                         continue;
 3020                 }
 3021 #endif
 3022                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 3023                 if (*pte & bit) {
 3024                         splx(s);
 3025                         return TRUE;
 3026                 }
 3027         }
 3028         splx(s);
 3029         return (FALSE);
 3030 }
 3031 
 3032 /*
 3033  * this routine is used to modify bits in ptes
 3034  */
 3035 static __inline void
 3036 pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 3037 {
 3038         register pv_entry_t pv;
 3039         register pt_entry_t *pte;
 3040         int s;
 3041 
 3042         if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
 3043             (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 3044                 return;
 3045 
 3046         s = splvm();
 3047         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 3048         /*
 3049          * Loop over all current mappings setting/clearing as appropos If
 3050          * setting RO do we need to clear the VAC?
 3051          */
 3052         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3053                 /*
 3054                  * don't write protect pager mappings
 3055                  */
 3056                 if (!setem && (bit == PG_RW)) {
 3057                         if (!pmap_track_modified(pv->pv_va))
 3058                                 continue;
 3059                 }
 3060 
 3061 #if defined(PMAP_DIAGNOSTIC)
 3062                 if (!pv->pv_pmap) {
 3063                         printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 3064                         continue;
 3065                 }
 3066 #endif
 3067 
 3068                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 3069 
 3070                 if (setem) {
 3071                         *pte |= bit;
 3072                         pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 3073                 } else {
 3074                         pt_entry_t pbits = *pte;
 3075                         if (pbits & bit) {
 3076                                 if (bit == PG_RW) {
 3077                                         if (pbits & PG_M) {
 3078                                                 vm_page_dirty(m);
 3079                                         }
 3080                                         *pte = pbits & ~(PG_M|PG_RW);
 3081                                 } else {
 3082                                         *pte = pbits & ~bit;
 3083                                 }
 3084                                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 3085                         }
 3086                 }
 3087         }
 3088         if (!setem && bit == PG_RW)
 3089                 vm_page_flag_clear(m, PG_WRITEABLE);
 3090         splx(s);
 3091 }
 3092 
 3093 /*
 3094  *      pmap_page_protect:
 3095  *
 3096  *      Lower the permission for all mappings to a given page.
 3097  */
 3098 void
 3099 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 3100 {
 3101         if ((prot & VM_PROT_WRITE) == 0) {
 3102                 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 3103                         pmap_changebit(m, PG_RW, FALSE);
 3104                 } else {
 3105                         pmap_remove_all(m);
 3106                 }
 3107         }
 3108 }
 3109 
 3110 vm_offset_t
 3111 pmap_phys_address(ppn)
 3112         int ppn;
 3113 {
 3114         return (i386_ptob(ppn));
 3115 }
 3116 
 3117 /*
 3118  *      pmap_ts_referenced:
 3119  *
 3120  *      Return a count of reference bits for a page, clearing those bits.
 3121  *      It is not necessary for every reference bit to be cleared, but it
 3122  *      is necessary that 0 only be returned when there are truly no
 3123  *      reference bits set.
 3124  *
 3125  *      XXX: The exact number of bits to check and clear is a matter that
 3126  *      should be tested and standardized at some point in the future for
 3127  *      optimal aging of shared pages.
 3128  */
 3129 int
 3130 pmap_ts_referenced(vm_page_t m)
 3131 {
 3132         register pv_entry_t pv, pvf, pvn;
 3133         pt_entry_t *pte;
 3134         int s;
 3135         int rtval = 0;
 3136 
 3137         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 3138                 return (rtval);
 3139 
 3140         s = splvm();
 3141         mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 3142         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 3143 
 3144                 pvf = pv;
 3145 
 3146                 do {
 3147                         pvn = TAILQ_NEXT(pv, pv_list);
 3148 
 3149                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 3150 
 3151                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 3152 
 3153                         if (!pmap_track_modified(pv->pv_va))
 3154                                 continue;
 3155 
 3156                         pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 3157 
 3158                         if (pte && (*pte & PG_A)) {
 3159                                 *pte &= ~PG_A;
 3160 
 3161                                 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 3162 
 3163                                 rtval++;
 3164                                 if (rtval > 4) {
 3165                                         break;
 3166                                 }
 3167                         }
 3168                 } while ((pv = pvn) != NULL && pv != pvf);
 3169         }
 3170         splx(s);
 3171 
 3172         return (rtval);
 3173 }
 3174 
 3175 /*
 3176  *      pmap_is_modified:
 3177  *
 3178  *      Return whether or not the specified physical page was modified
 3179  *      in any physical maps.
 3180  */
 3181 boolean_t
 3182 pmap_is_modified(vm_page_t m)
 3183 {
 3184         return pmap_testbit(m, PG_M);
 3185 }
 3186 
 3187 /*
 3188  *      Clear the modify bits on the specified physical page.
 3189  */
 3190 void
 3191 pmap_clear_modify(vm_page_t m)
 3192 {
 3193         pmap_changebit(m, PG_M, FALSE);
 3194 }
 3195 
 3196 /*
 3197  *      pmap_clear_reference:
 3198  *
 3199  *      Clear the reference bit on the specified physical page.
 3200  */
 3201 void
 3202 pmap_clear_reference(vm_page_t m)
 3203 {
 3204         pmap_changebit(m, PG_A, FALSE);
 3205 }
 3206 
 3207 /*
 3208  * Miscellaneous support routines follow
 3209  */
 3210 
 3211 static void
 3212 i386_protection_init()
 3213 {
 3214         register int *kp, prot;
 3215 
 3216         kp = protection_codes;
 3217         for (prot = 0; prot < 8; prot++) {
 3218                 switch (prot) {
 3219                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 3220                         /*
 3221                          * Read access is also 0. There isn't any execute bit,
 3222                          * so just make it readable.
 3223                          */
 3224                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 3225                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 3226                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 3227                         *kp++ = 0;
 3228                         break;
 3229                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 3230                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3231                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 3232                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3233                         *kp++ = PG_RW;
 3234                         break;
 3235                 }
 3236         }
 3237 }
 3238 
 3239 /*
 3240  * Map a set of physical memory pages into the kernel virtual
 3241  * address space. Return a pointer to where it is mapped. This
 3242  * routine is intended to be used for mapping device memory,
 3243  * NOT real memory.
 3244  */
 3245 void *
 3246 pmap_mapdev(pa, size)
 3247         vm_offset_t pa;
 3248         vm_size_t size;
 3249 {
 3250         vm_offset_t va, tmpva, offset;
 3251         pt_entry_t *pte;
 3252 
 3253         offset = pa & PAGE_MASK;
 3254         size = roundup(offset + size, PAGE_SIZE);
 3255 
 3256         GIANT_REQUIRED;
 3257 
 3258         va = kmem_alloc_pageable(kernel_map, size);
 3259         if (!va)
 3260                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 3261 
 3262         pa = pa & PG_FRAME;
 3263         for (tmpva = va; size > 0; ) {
 3264                 pte = vtopte(tmpva);
 3265                 *pte = pa | PG_RW | PG_V | pgeflag;
 3266                 size -= PAGE_SIZE;
 3267                 tmpva += PAGE_SIZE;
 3268                 pa += PAGE_SIZE;
 3269         }
 3270         pmap_invalidate_range(kernel_pmap, va, tmpva);
 3271         return ((void *)(va + offset));
 3272 }
 3273 
 3274 void
 3275 pmap_unmapdev(va, size)
 3276         vm_offset_t va;
 3277         vm_size_t size;
 3278 {
 3279         vm_offset_t base, offset, tmpva;
 3280         pt_entry_t *pte;
 3281 
 3282         base = va & PG_FRAME;
 3283         offset = va & PAGE_MASK;
 3284         size = roundup(offset + size, PAGE_SIZE);
 3285         for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
 3286                 pte = vtopte(tmpva);
 3287                 *pte = 0;
 3288         }
 3289         pmap_invalidate_range(kernel_pmap, va, tmpva);
 3290         kmem_free(kernel_map, base, size);
 3291 }
 3292 
 3293 /*
 3294  * perform the pmap work for mincore
 3295  */
 3296 int
 3297 pmap_mincore(pmap, addr)
 3298         pmap_t pmap;
 3299         vm_offset_t addr;
 3300 {
 3301         pt_entry_t *ptep, pte;
 3302         vm_page_t m;
 3303         int val = 0;
 3304         
 3305         ptep = pmap_pte(pmap, addr);
 3306         if (ptep == 0) {
 3307                 return 0;
 3308         }
 3309 
 3310         if ((pte = *ptep) != 0) {
 3311                 vm_offset_t pa;
 3312 
 3313                 val = MINCORE_INCORE;
 3314                 if ((pte & PG_MANAGED) == 0)
 3315                         return val;
 3316 
 3317                 pa = pte & PG_FRAME;
 3318 
 3319                 m = PHYS_TO_VM_PAGE(pa);
 3320 
 3321                 /*
 3322                  * Modified by us
 3323                  */
 3324                 if (pte & PG_M)
 3325                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 3326                 else {
 3327                         /*
 3328                          * Modified by someone else
 3329                          */
 3330                         vm_page_lock_queues();
 3331                         if (m->dirty || pmap_is_modified(m))
 3332                                 val |= MINCORE_MODIFIED_OTHER;
 3333                         vm_page_unlock_queues();
 3334                 }
 3335                 /*
 3336                  * Referenced by us
 3337                  */
 3338                 if (pte & PG_A)
 3339                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 3340                 else {
 3341                         /*
 3342                          * Referenced by someone else
 3343                          */
 3344                         vm_page_lock_queues();
 3345                         if ((m->flags & PG_REFERENCED) ||
 3346                             pmap_ts_referenced(m)) {
 3347                                 val |= MINCORE_REFERENCED_OTHER;
 3348                                 vm_page_flag_set(m, PG_REFERENCED);
 3349                         }
 3350                         vm_page_unlock_queues();
 3351                 }
 3352         } 
 3353         return val;
 3354 }
 3355 
 3356 void
 3357 pmap_activate(struct thread *td)
 3358 {
 3359         struct proc *p = td->td_proc;
 3360         pmap_t  pmap;
 3361         u_int32_t  cr3;
 3362 
 3363         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 3364 #if defined(SMP)
 3365         pmap->pm_active |= PCPU_GET(cpumask);
 3366 #else
 3367         pmap->pm_active |= 1;
 3368 #endif
 3369         cr3 = vtophys(pmap->pm_pdir);
 3370         /* XXXKSE this is wrong.
 3371          * pmap_activate is for the current thread on the current cpu
 3372          */
 3373         if (p->p_flag & P_KSES) {
 3374                 /* Make sure all other cr3 entries are updated. */
 3375                 /* what if they are running?  XXXKSE (maybe abort them) */
 3376                 FOREACH_THREAD_IN_PROC(p, td) {
 3377                         td->td_pcb->pcb_cr3 = cr3;
 3378                 }
 3379         } else {
 3380                 td->td_pcb->pcb_cr3 = cr3;
 3381         }
 3382         load_cr3(cr3);
 3383 #ifdef SWTCH_OPTIM_STATS
 3384         tlb_flush_count++;
 3385 #endif
 3386 }
 3387 
 3388 vm_offset_t
 3389 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3390 {
 3391 
 3392         if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3393                 return addr;
 3394         }
 3395 
 3396         addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 3397         return addr;
 3398 }
 3399 
 3400 
 3401 #if defined(PMAP_DEBUG)
 3402 pmap_pid_dump(int pid)
 3403 {
 3404         pmap_t pmap;
 3405         struct proc *p;
 3406         int npte = 0;
 3407         int index;
 3408 
 3409         sx_slock(&allproc_lock);
 3410         LIST_FOREACH(p, &allproc, p_list) {
 3411                 if (p->p_pid != pid)
 3412                         continue;
 3413 
 3414                 if (p->p_vmspace) {
 3415                         int i,j;
 3416                         index = 0;
 3417                         pmap = vmspace_pmap(p->p_vmspace);
 3418                         for (i = 0; i < NPDEPG; i++) {
 3419                                 pd_entry_t *pde;
 3420                                 pt_entry_t *pte;
 3421                                 vm_offset_t base = i << PDRSHIFT;
 3422                                 
 3423                                 pde = &pmap->pm_pdir[i];
 3424                                 if (pde && pmap_pde_v(pde)) {
 3425                                         for (j = 0; j < NPTEPG; j++) {
 3426                                                 vm_offset_t va = base + (j << PAGE_SHIFT);
 3427                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3428                                                         if (index) {
 3429                                                                 index = 0;
 3430                                                                 printf("\n");
 3431                                                         }
 3432                                                         sx_sunlock(&allproc_lock);
 3433                                                         return npte;
 3434                                                 }
 3435                                                 pte = pmap_pte_quick(pmap, va);
 3436                                                 if (pte && pmap_pte_v(pte)) {
 3437                                                         pt_entry_t pa;
 3438                                                         vm_page_t m;
 3439                                                         pa = *pte;
 3440                                                         m = PHYS_TO_VM_PAGE(pa);
 3441                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3442                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 3443                                                         npte++;
 3444                                                         index++;
 3445                                                         if (index >= 2) {
 3446                                                                 index = 0;
 3447                                                                 printf("\n");
 3448                                                         } else {
 3449                                                                 printf(" ");
 3450                                                         }
 3451                                                 }
 3452                                         }
 3453                                 }
 3454                         }
 3455                 }
 3456         }
 3457         sx_sunlock(&allproc_lock);
 3458         return npte;
 3459 }
 3460 #endif
 3461 
 3462 #if defined(DEBUG)
 3463 
 3464 static void     pads(pmap_t pm);
 3465 void            pmap_pvdump(vm_offset_t pa);
 3466 
 3467 /* print address space of pmap*/
 3468 static void
 3469 pads(pm)
 3470         pmap_t pm;
 3471 {
 3472         int i, j;
 3473         vm_offset_t va;
 3474         pt_entry_t *ptep;
 3475 
 3476         if (pm == kernel_pmap)
 3477                 return;
 3478         for (i = 0; i < NPDEPG; i++)
 3479                 if (pm->pm_pdir[i])
 3480                         for (j = 0; j < NPTEPG; j++) {
 3481                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3482                                 if (pm == kernel_pmap && va < KERNBASE)
 3483                                         continue;
 3484                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3485                                         continue;
 3486                                 ptep = pmap_pte_quick(pm, va);
 3487                                 if (pmap_pte_v(ptep))
 3488                                         printf("%x:%x ", va, *ptep);
 3489                         };
 3490 
 3491 }
 3492 
 3493 void
 3494 pmap_pvdump(pa)
 3495         vm_offset_t pa;
 3496 {
 3497         pv_entry_t pv;
 3498         vm_page_t m;
 3499 
 3500         printf("pa %x", pa);
 3501         m = PHYS_TO_VM_PAGE(pa);
 3502         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3503                 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3504                 pads(pv->pv_pmap);
 3505         }
 3506         printf(" ");
 3507 }
 3508 #endif
Cache object: 61c28a6aeffb524bd1febfbd0f8a5a3d
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/i386/i386/pmap.c

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c