The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department and William Jolitz of UUNET Technologies Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the University of
   24  *      California, Berkeley and its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   42  * $FreeBSD$
   43  */
   44 
   45 /*-
   46  * Copyright (c) 2003 Networks Associates Technology, Inc.
   47  * All rights reserved.
   48  *
   49  * This software was developed for the FreeBSD Project by Jake Burkholder,
   50  * Safeport Network Services, and Network Associates Laboratories, the
   51  * Security Research Division of Network Associates, Inc. under
   52  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   53  * CHATS research program.
   54  *
   55  * Redistribution and use in source and binary forms, with or without
   56  * modification, are permitted provided that the following conditions
   57  * are met:
   58  * 1. Redistributions of source code must retain the above copyright
   59  *    notice, this list of conditions and the following disclaimer.
   60  * 2. Redistributions in binary form must reproduce the above copyright
   61  *    notice, this list of conditions and the following disclaimer in the
   62  *    documentation and/or other materials provided with the distribution.
   63  *
   64  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   65  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   66  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   67  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   68  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   69  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   70  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   71  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   72  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   73  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   74  * SUCH DAMAGE.
   75  */
   76 
   77 /*
   78  *      Manages physical address maps.
   79  *
   80  *      In addition to hardware address maps, this
   81  *      module is called upon to provide software-use-only
   82  *      maps which may or may not be stored in the same
   83  *      form as hardware maps.  These pseudo-maps are
   84  *      used to store intermediate results from copy
   85  *      operations to and from address spaces.
   86  *
   87  *      Since the information managed by this module is
   88  *      also stored by the logical address mapping module,
   89  *      this module may throw away valid virtual-to-physical
   90  *      mappings at almost any time.  However, invalidations
   91  *      of virtual-to-physical mappings must be done as
   92  *      requested.
   93  *
   94  *      In order to cope with hardware architectures which
   95  *      make virtual-to-physical map invalidates expensive,
   96  *      this module may delay invalidate or reduced protection
   97  *      operations until such time as they are actually
   98  *      necessary.  This module is given full information as
   99  *      to which processors are currently using which maps,
  100  *      and to when physical maps must be made correct.
  101  */
  102 
  103 #include "opt_disable_pse.h"
  104 #include "opt_pmap.h"
  105 #include "opt_msgbuf.h"
  106 #include "opt_user_ldt.h"
  107 
  108 #include <sys/param.h>
  109 #include <sys/systm.h>
  110 #include <sys/kernel.h>
  111 #include <sys/proc.h>
  112 #include <sys/msgbuf.h>
  113 #include <sys/vmmeter.h>
  114 #include <sys/mman.h>
  115 #include <sys/malloc.h>
  116 
  117 #include <machine/cpu.h>
  118 #include <machine/ipl.h>
  119 #include <vm/vm.h>
  120 #include <vm/vm_param.h>
  121 #include <sys/sysctl.h>
  122 #include <sys/lock.h>
  123 #include <vm/vm_kern.h>
  124 #include <vm/vm_page.h>
  125 #include <vm/vm_map.h>
  126 #include <vm/vm_object.h>
  127 #include <vm/vm_extern.h>
  128 #include <vm/vm_pageout.h>
  129 #include <vm/vm_pager.h>
  130 #include <vm/vm_zone.h>
  131 
  132 #include <sys/user.h>
  133 
  134 #include <machine/cpu.h>
  135 #include <machine/cputypes.h>
  136 #include <machine/md_var.h>
  137 #include <machine/specialreg.h>
  138 #if defined(SMP) || defined(APIC_IO)
  139 #include <machine/smp.h>
  140 #include <machine/apic.h>
  141 #include <machine/segments.h>
  142 #include <machine/tss.h>
  143 #include <machine/globaldata.h>
  144 #endif /* SMP || APIC_IO */
  145 
  146 #define PMAP_KEEP_PDIRS
  147 #ifndef PMAP_SHPGPERPROC
  148 #define PMAP_SHPGPERPROC 200
  149 #endif
  150 
  151 #if defined(DIAGNOSTIC)
  152 #define PMAP_DIAGNOSTIC
  153 #endif
  154 
  155 #define MINPV 2048
  156 
  157 #if !defined(PMAP_DIAGNOSTIC)
  158 #define PMAP_INLINE __inline
  159 #else
  160 #define PMAP_INLINE
  161 #endif
  162 
  163 /*
  164  * Get PDEs and PTEs for user/kernel address space
  165  */
  166 #define pmap_pde(m, v)  (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
  167 #define pdir_pde(m, v)  (m[(vm_offset_t)(v) >> PDRSHIFT])
  168 
  169 #define pmap_pde_v(pte)         ((*pte & PG_V) != 0)
  170 #define pmap_pte_w(pte)         ((*pte & PG_W) != 0)
  171 #define pmap_pte_m(pte)         ((*pte & PG_M) != 0)
  172 #define pmap_pte_u(pte)         ((*pte & PG_A) != 0)
  173 #define pmap_pte_v(pte)         ((*pte & PG_V) != 0)
  174 
  175 #define pmap_pte_set_w(pte, v)  ((v) ? (*pte |= PG_W) : (*pte &= ~PG_W))
  176 #define pmap_pte_set_prot(pte, v) (*pte = (*pte & ~PG_PROT) | (v))
  177 
  178 /*
  179  * Given a map and a machine independent protection code,
  180  * convert to a vax protection code.
  181  */
  182 #define pte_prot(m, p)  (protection_codes[p])
  183 static int protection_codes[8];
  184 
  185 static struct pmap kernel_pmap_store;
  186 pmap_t kernel_pmap;
  187 
  188 vm_paddr_t avail_start; /* PA of first available physical page */
  189 vm_paddr_t avail_end;   /* PA of last available physical page */
  190 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  191 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  192 static boolean_t pmap_initialized = FALSE;      /* Has pmap_init completed? */
  193 static int pgeflag;             /* PG_G or-in */
  194 static int pseflag;             /* PG_PS or-in */
  195 
  196 static vm_object_t kptobj;
  197 
  198 static int nkpt;
  199 vm_offset_t kernel_vm_end;
  200 
  201 /*
  202  * Data for the pv entry allocation mechanism
  203  */
  204 static vm_zone_t pvzone;
  205 static struct vm_zone pvzone_store;
  206 static struct vm_object pvzone_obj;
  207 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  208 static int pmap_pagedaemon_waken = 0;
  209 static struct pv_entry *pvinit;
  210 
  211 /*
  212  * All those kernel PT submaps that BSD is so fond of
  213  */
  214 #ifdef SMP
  215 extern pt_entry_t *SMPpt;
  216 #define CMAP1   prv_CMAP1
  217 #define CMAP2   prv_CMAP2
  218 #define CMAP3   prv_CMAP3
  219 #define PMAP1   prv_PMAP1
  220 #define PMAP2   prv_PMAP2
  221 #define CADDR1  prv_CADDR1
  222 #define CADDR2  prv_CADDR2
  223 #define CADDR3  prv_CADDR3
  224 #define PADDR1  prv_PADDR1
  225 #define PADDR2  prv_PADDR2
  226 #else
  227 static pt_entry_t *CMAP1, *CMAP2, *CMAP3;
  228 static caddr_t CADDR1, CADDR2, CADDR3;
  229 static pd_entry_t *PMAP1;
  230 static pt_entry_t *PADDR1;
  231 static pd_entry_t *PMAP2;
  232 static pt_entry_t *PADDR2;
  233 #endif
  234 
  235 static pt_entry_t *ptmmap;
  236 caddr_t ptvmmap = 0;
  237 static pt_entry_t *msgbufmap;
  238 struct msgbuf *msgbufp = 0;
  239 
  240 /*
  241  * Crashdump maps.
  242  */
  243 static pt_entry_t *pt_crashdumpmap;
  244 static caddr_t crashdumpmap;
  245 
  246 static pd_entry_t pdir4mb;
  247 
  248 static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv));
  249 static pv_entry_t get_pv_entry __P((void));
  250 static void     i386_protection_init __P((void));
  251 static __inline void    pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
  252 static void     pmap_remove_all __P((vm_page_t m));
  253 static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
  254                                       vm_page_t m, vm_page_t mpte));
  255 static int pmap_remove_pte __P((pmap_t pmap, pt_entry_t* ptq,
  256                                         vm_offset_t sva));
  257 static void pmap_remove_page __P((pmap_t pmap, vm_offset_t va));
  258 static int pmap_remove_entry __P((pmap_t pmap, vm_page_t m,
  259                                         vm_offset_t va));
  260 static boolean_t pmap_testbit __P((vm_page_t m, int bit));
  261 static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
  262                 vm_page_t mpte, vm_page_t m));
  263 
  264 static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
  265 static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
  266 static pt_entry_t *pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
  267 static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
  268 static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
  269 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
  270 
  271 static int      pmap_is_current(pmap_t);
  272 
  273 #ifdef PAE
  274 static pdpt_entry_t *pmap_alloc_pdpt(void);
  275 static void     pmap_free_pdpt(pdpt_entry_t *);
  276 #endif
  277 #if defined(I686_CPU) && !defined(NO_PSE_HACK)
  278 static int has_pse_bug = 0;     /* Initialized so that it can be patched. */
  279 #endif
  280 
  281 /*
  282  * Move the kernel virtual free pointer to the next
  283  * 4MB.  This is used to help improve performance
  284  * by using a large (4MB) page for much of the kernel
  285  * (.text, .data, .bss)
  286  */
  287 static vm_offset_t
  288 pmap_kmem_choose(vm_offset_t addr)
  289 {
  290         vm_offset_t newaddr = addr;
  291 #if defined(I686_CPU) && !defined(NO_PSE_HACK)
  292         /* Deal with un-resolved Pentium4 issues */
  293         if (cpu == CPU_686 && (cpu_id & 0xf00) == 0xf00 &&
  294             strcmp(cpu_vendor, "GenuineIntel") == 0) {
  295                 has_pse_bug = 1;
  296                 return newaddr;
  297         }
  298 #endif
  299 #ifndef DISABLE_PSE
  300         if (cpu_feature & CPUID_PSE) {
  301                 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
  302         }
  303 #endif
  304         return newaddr;
  305 }
  306 
  307 /*
  308  *      Bootstrap the system enough to run with virtual memory.
  309  *
  310  *      On the i386 this is called after mapping has already been enabled
  311  *      and just syncs the pmap module with what has already been done.
  312  *      [We can't call it easily with mapping off since the kernel is not
  313  *      mapped with PA == VA, hence we would have to relocate every address
  314  *      from the linked base (virtual) address "KERNBASE" to the actual
  315  *      (physical) address starting relative to 0]
  316  */
  317 void
  318 pmap_bootstrap(vm_paddr_t firstaddr, vm_paddr_t loadaddr)
  319 {
  320         vm_offset_t va;
  321         pt_entry_t *pte;
  322 #ifdef SMP
  323         struct globaldata *gd;
  324 #endif
  325         int i;
  326 
  327         avail_start = firstaddr;
  328 
  329         /*
  330          * The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
  331          * large. It should instead be correctly calculated in locore.s and
  332          * not based on 'first' (which is a physical address, not a virtual
  333          * address, for the start of unused physical memory). The kernel
  334          * page tables are NOT double mapped and thus should not be included
  335          * in this calculation.
  336          */
  337         virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
  338         virtual_avail = pmap_kmem_choose(virtual_avail);
  339 
  340         virtual_end = VM_MAX_KERNEL_ADDRESS;
  341 
  342         /*
  343          * Initialize protection array.
  344          */
  345         i386_protection_init();
  346 
  347         /*
  348          * The kernel's pmap is statically allocated so we don't have to use
  349          * pmap_create, which is unlikely to work correctly at this part of
  350          * the boot sequence (XXX and which no longer exists).
  351          */
  352         kernel_pmap = &kernel_pmap_store;
  353 
  354         kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
  355         kernel_pmap->pm_active = -1;    /* don't allow deactivation */
  356 #ifdef PAE
  357         kernel_pmap->pm_pdpt = IdlePDPT;
  358 #endif
  359         TAILQ_INIT(&kernel_pmap->pm_pvlist);
  360         nkpt = NKPT;
  361 
  362         /*
  363          * Reserve some special page table entries/VA space for temporary
  364          * mapping of pages.
  365          */
  366 #define SYSMAP(c, p, v, n)      \
  367         v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
  368 
  369         va = virtual_avail;
  370         pte = vtopte(va);
  371 
  372 #ifndef SMP
  373         /*
  374          * CMAP1/CMAP2/CMAP3 are used for zeroing and copying pages.
  375          */
  376         SYSMAP(caddr_t, CMAP1, CADDR1, 1)
  377         SYSMAP(caddr_t, CMAP2, CADDR2, 1)
  378         SYSMAP(caddr_t, CMAP3, CADDR3, 1)
  379         *CMAP1 = *CMAP2 = *CMAP3 = 0;
  380 
  381         /*
  382          * ptemap is used for pmap_pte
  383          */
  384         SYSMAP(pd_entry_t *, PMAP1, PADDR1, 1);
  385         SYSMAP(pd_entry_t *, PMAP2, PADDR2, 1);
  386 #endif
  387 
  388         /*
  389          * Crashdump maps.
  390          */
  391         SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
  392 
  393         /*
  394          * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
  395          * XXX ptmmap is not used.
  396          */
  397         SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
  398 
  399         /*
  400          * msgbufp is used to map the system message buffer.
  401          * XXX msgbufmap is not used.
  402          */
  403         SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
  404                atop(round_page(MSGBUF_SIZE)))
  405 
  406         virtual_avail = va;
  407 
  408         for (i = 0; i < NKPT; i++)
  409                 PTD[i] = 0;
  410 
  411         /*
  412          * Initialize the global page flag
  413          */
  414         pgeflag = 0;
  415 #if !defined(SMP)                       /* XXX - see also mp_machdep.c */
  416         if (cpu_feature & CPUID_PGE)
  417                 pgeflag = PG_G;
  418 #endif
  419 
  420         /*
  421          * Initialize the 4MB page size flag
  422          */
  423         pseflag = 0;
  424 #ifndef DISABLE_PSE
  425         if (cpu_feature & CPUID_PSE)
  426                 pseflag = PG_PS;
  427 #endif
  428 #if defined(I686_CPU) && !defined(NO_PSE_HACK)
  429         /* Deal with un-resolved Pentium4 issues */
  430         if (has_pse_bug)
  431                 pseflag = 0;
  432 #endif
  433         /*
  434          * The 4MB page version of the initial
  435          * kernel page mapping.
  436          */
  437         if (pseflag) {
  438                 pd_entry_t ptditmp;
  439                 /*
  440                  * Note that we have enabled PSE mode
  441                  */
  442                 ptditmp = *(PTmap + i386_btop(KERNBASE));
  443                 ptditmp &= ~(NBPDR - 1);
  444                 ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
  445                 pdir4mb = ptditmp;
  446 
  447 #if !defined(SMP)
  448                 /*
  449                  * Enable the PSE mode.
  450                  */
  451                 load_cr4(rcr4() | CR4_PSE);
  452 
  453                 /*
  454                  * We can do the mapping here for the single processor
  455                  * case.  We simply ignore the old page table page from
  456                  * now on.
  457                  */
  458                 /*
  459                  * For SMP, we still need 4K pages to bootstrap APs,
  460                  * PSE will be enabled as soon as all APs are up.
  461                  */
  462                 kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
  463                 invltlb();
  464 #endif
  465         }
  466 
  467 #ifdef SMP
  468         if (cpu_apic_address == 0)
  469                 panic("pmap_bootstrap: no local apic!");
  470 
  471         /* local apic is mapped on last page */
  472         SMPpt[NPTEPG - 1] = PG_V | PG_RW | PG_N | pgeflag |
  473             (cpu_apic_address & PG_FRAME);
  474 
  475         /* BSP does this itself, AP's get it pre-set */
  476         gd = &SMP_prvspace[0].globaldata;
  477         gd->gd_prv_CMAP1 = &SMPpt[1];
  478         gd->gd_prv_CMAP2 = &SMPpt[2];
  479         gd->gd_prv_CMAP3 = &SMPpt[3];
  480         gd->gd_prv_PMAP1 = &SMPpt[4];
  481         gd->gd_prv_PMAP2 = &SMPpt[5];
  482         gd->gd_prv_CADDR1 = SMP_prvspace[0].CPAGE1;
  483         gd->gd_prv_CADDR2 = SMP_prvspace[0].CPAGE2;
  484         gd->gd_prv_CADDR3 = SMP_prvspace[0].CPAGE3;
  485         gd->gd_prv_PADDR1 = (pt_entry_t *)SMP_prvspace[0].PPAGE1;
  486         gd->gd_prv_PADDR2 = (pt_entry_t *)SMP_prvspace[0].PPAGE2;
  487 #endif
  488 
  489         invltlb();
  490 }
  491 
  492 #ifdef SMP
  493 /*
  494  * Set 4mb pdir for mp startup
  495  */
  496 void
  497 pmap_set_opt(void)
  498 {
  499         if (pseflag && (cpu_feature & CPUID_PSE)) {
  500                 load_cr4(rcr4() | CR4_PSE);
  501                 if (pdir4mb && cpuid == 0) {    /* only on BSP */
  502                         kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
  503                         cpu_invltlb();
  504                 }
  505         }
  506 }
  507 #endif
  508 
  509 /*
  510  *      Initialize the pmap module.
  511  *      Called by vm_init, to initialize any structures that the pmap
  512  *      system needs to map virtual memory.
  513  *      pmap_init has been enhanced to support in a fairly consistant
  514  *      way, discontiguous physical memory.
  515  */
  516 void
  517 pmap_init(vm_paddr_t phys_start, vm_paddr_t phys_end)
  518 {
  519         int i;
  520         int initial_pvs;
  521 
  522         /*
  523          * object for kernel page table pages
  524          */
  525         kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
  526 
  527         /*
  528          * Allocate memory for random pmap data structures.  Includes the
  529          * pv_head_table.
  530          */
  531 
  532         for(i = 0; i < vm_page_array_size; i++) {
  533                 vm_page_t m;
  534 
  535                 m = &vm_page_array[i];
  536                 TAILQ_INIT(&m->md.pv_list);
  537                 m->md.pv_list_count = 0;
  538         }
  539 
  540         /*
  541          * init the pv free list
  542          */
  543         initial_pvs = vm_page_array_size;
  544         if (initial_pvs < MINPV)
  545                 initial_pvs = MINPV;
  546         pvzone = &pvzone_store;
  547         pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
  548                 initial_pvs * sizeof (struct pv_entry));
  549         zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
  550             vm_page_array_size);
  551 
  552         /*
  553          * Now it is safe to enable pv_table recording.
  554          */
  555         pmap_initialized = TRUE;
  556 }
  557 
  558 /*
  559  * Initialize the address space (zone) for the pv_entries.  Set a
  560  * high water mark so that the system can recover from excessive
  561  * numbers of pv entries.
  562  */
  563 void
  564 pmap_init2()
  565 {
  566         int shpgperproc = PMAP_SHPGPERPROC;
  567 
  568         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
  569         pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
  570         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
  571         pv_entry_high_water = 9 * (pv_entry_max / 10);
  572         zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
  573 }
  574 
  575 
  576 /***************************************************
  577  * Low level helper routines.....
  578  ***************************************************/
  579 
  580 #if defined(PMAP_DIAGNOSTIC)
  581 /*
  582  * This code checks for non-writeable/modified pages.
  583  * This should be an invalid condition.
  584  */
  585 static int
  586 pmap_nw_modified(pt_entry_t pte)
  587 {
  588         return ((pte & (PG_M|PG_RW)) == PG_M);
  589 }
  590 #endif
  591 
  592 /*
  593  * this routine defines the region(s) of memory that should
  594  * not be tested for the modified bit.
  595  */
  596 static PMAP_INLINE int
  597 pmap_track_modified(vm_offset_t va)
  598 {
  599         return (va < clean_sva) || (va >= clean_eva);
  600 }
  601 
  602 static PMAP_INLINE void
  603 invltlb_1pg(vm_offset_t va)
  604 {
  605 #if defined(I386_CPU)
  606         if (cpu_class == CPUCLASS_386) {
  607                 invltlb();
  608         } else
  609 #endif
  610         {
  611                 invlpg(va);
  612         }
  613 }
  614 
  615 static __inline void
  616 pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
  617 {
  618 #if defined(SMP)
  619         if (pmap->pm_active & (1 << cpuid))
  620                 cpu_invlpg((void *)va);
  621         if (pmap->pm_active & other_cpus)
  622                 smp_invltlb();
  623 #else
  624         if (pmap->pm_active)
  625                 invltlb_1pg(va);
  626 #endif
  627 }
  628 
  629 static __inline void
  630 pmap_TLB_invalidate_all(pmap_t pmap)
  631 {
  632 #if defined(SMP)
  633         if (pmap->pm_active & (1 << cpuid))
  634                 cpu_invltlb();
  635         if (pmap->pm_active & other_cpus)
  636                 smp_invltlb();
  637 #else
  638         if (pmap->pm_active)
  639                 invltlb();
  640 #endif
  641 }
  642 
  643 #ifdef PAE
  644 static __inline pt_entry_t
  645 pte_load(pt_entry_t *pte)
  646 {
  647         pt_entry_t rv = 0;
  648         __asm __volatile(MPLOCKED "cmpxchg8b %1"
  649             : "+A" (rv) : "m" (*pte), "b" (0), "c" (0));
  650         return rv;
  651 }
  652 
  653 static __inline pt_entry_t
  654 pte_store(pt_entry_t *pte, pt_entry_t v)
  655 {
  656         pt_entry_t rv = *pte;
  657         __asm __volatile("1:;" MPLOCKED "cmpxchg8b %1; jnz 1b"
  658             : "+A" (rv)
  659             : "m" (*pte), "b" ((u_int32_t)v), "c" ((u_int32_t)(v >> 32)));
  660         return rv;
  661 }
  662 #else
  663 static __inline pt_entry_t
  664 pte_load(pt_entry_t *pte)
  665 {
  666         return *pte;
  667 }
  668 
  669 static __inline pt_entry_t
  670 pte_store(pt_entry_t *pte, pt_entry_t v)
  671 {
  672         __asm __volatile("xchgl %1,%0" : "+r" (v) : "m" (*pte));
  673         return v;
  674 }
  675 #endif
  676 
  677 /*
  678  * Are we current address space or kernel?
  679  */
  680 static __inline int
  681 pmap_is_current(pmap_t pmap)
  682 {
  683         return (pmap == kernel_pmap ||
  684             (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
  685 }
  686 
  687 /*
  688  *      Routine:        pmap_pte
  689  *      Function:
  690  *              Extract the page table entry associated
  691  *              with the given map/virtual_address pair.
  692  *      Note: Must be protected by splvm()
  693  */
  694 
  695 static pt_entry_t *
  696 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
  697 {
  698         pd_entry_t *pde, newpf;
  699 
  700 #ifdef INVARIANTS
  701         if (~cpl & (net_imask | bio_imask | cam_imask))
  702                 panic("pmap_pte_quick not protected by splvm()");
  703 #endif
  704         pde = pmap_pde(pmap, va);
  705         if (*pde & PG_V) {
  706                 if (*pde & PG_PS)
  707                         return (pt_entry_t *)pde;
  708                 if (pmap_is_current(pmap))
  709                         return vtopte(va);
  710                 newpf = *pde & PG_FRAME;
  711                 if ((*PMAP1 & PG_FRAME) != newpf) {
  712                         *PMAP1 = newpf | PG_RW | PG_V;
  713 #ifdef SMP
  714                         cpu_invlpg(PADDR1);
  715 #else
  716                         invltlb_1pg((vm_offset_t) PADDR1);
  717 #endif
  718                 }
  719                 return PADDR1 + (i386_btop(va) & (NPTEPG - 1));
  720         }
  721         return (0);
  722 }
  723 
  724 /*
  725  *      Routine:        pmap_pte
  726  *      Function:
  727  *              Extract the page table entry associated
  728  *              with the given map/virtual_address pair.
  729  *      Note: Must not be called from interrupts on non-current pmap
  730  */
  731 
  732 pt_entry_t *
  733 pmap_pte(pmap_t pmap, vm_offset_t va)
  734 {
  735         pd_entry_t *pde, newpf;
  736 
  737         pde = pmap_pde(pmap, va);
  738         if (*pde & PG_V) {
  739                 if (*pde & PG_PS)
  740                         return (pt_entry_t *)pde;
  741                 if (pmap_is_current(pmap))
  742                         return vtopte(va);
  743 #ifdef INVARIANTS
  744                 if (intr_nesting_level != 0) {
  745                         panic("pmap_pte called from interrupt");
  746                 }
  747 #endif
  748                 newpf = *pde & PG_FRAME;
  749                 if ((*PMAP2 & PG_FRAME) != newpf) {
  750                         *PMAP2 = newpf | PG_RW | PG_V;
  751 #ifdef SMP
  752                         cpu_invlpg(PADDR2);
  753 #else
  754                         invltlb_1pg((vm_offset_t) PADDR2);
  755 #endif
  756                 }
  757                 return PADDR2 + (i386_btop(va) & (NPTEPG - 1));
  758         }
  759         return (0);
  760 }
  761 
  762 /*
  763  *      Routine:        pmap_extract
  764  *      Function:
  765  *              Extract the physical page address associated
  766  *              with the given map/virtual_address pair.
  767  */
  768 vm_paddr_t 
  769 pmap_extract(pmap_t pmap, vm_offset_t va)
  770 {
  771         pt_entry_t *pte;
  772 
  773         if (pmap == 0)
  774                 return 0;
  775         pte = pmap_pte(pmap, va);
  776         if (pte) {
  777                 if (*pte & PG_PS)
  778                         return (*pte & ~PDRMASK) | (va & PDRMASK);
  779                 return (*pte & PG_FRAME) | (va & PAGE_MASK);
  780         }
  781         return 0;
  782 }
  783 
  784 /***************************************************
  785  * Low level mapping routines.....
  786  ***************************************************/
  787 
  788 /*
  789  * add a wired page to the kva
  790  * note that in order for the mapping to take effect -- you
  791  * should do a invltlb after doing the pmap_kenter...
  792  */
  793 PMAP_INLINE void 
  794 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
  795 {
  796         pt_entry_t *pte;
  797 
  798         pte = vtopte(va);
  799         *pte = pa | PG_RW | PG_V | pgeflag;
  800         invltlb_1pg(va);
  801 }
  802 
  803 /*
  804  * remove a page from the kernel pagetables
  805  */
  806 PMAP_INLINE void
  807 pmap_kremove(vm_offset_t va)
  808 {
  809         pt_entry_t *pte;
  810 
  811         pte = vtopte(va);
  812         *pte = 0;
  813         invltlb_1pg(va);
  814 }
  815 
  816 /*
  817  *      Used to map a range of physical addresses into kernel
  818  *      virtual address space.
  819  *
  820  *      For now, VM is already on, we only need to map the
  821  *      specified memory.
  822  */
  823 vm_offset_t
  824 pmap_map(vm_offset_t virt, vm_paddr_t start, vm_paddr_t end, int prot)
  825 {
  826         while (start < end) {
  827                 pmap_kenter(virt, start);
  828                 virt += PAGE_SIZE;
  829                 start += PAGE_SIZE;
  830         }
  831         return (virt);
  832 }
  833 
  834 
  835 /*
  836  * Add a list of wired pages to the kva
  837  * this routine is only used for temporary
  838  * kernel mappings that do not need to have
  839  * page modification or references recorded.
  840  * Note that old mappings are simply written
  841  * over.  The page *must* be wired.
  842  */
  843 void
  844 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
  845 {
  846         while (count-- > 0) {
  847                 pt_entry_t *pte = vtopte(va);
  848                 *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
  849 #ifdef SMP
  850                 cpu_invlpg((void *)va);
  851 #else
  852                 invltlb_1pg(va);
  853 #endif
  854                 va += PAGE_SIZE;
  855                 m++;
  856         }
  857 #ifdef SMP
  858         smp_invltlb();
  859 #endif
  860 }
  861 
  862 /*
  863  * this routine jerks page mappings from the
  864  * kernel -- it is meant only for temporary mappings.
  865  */
  866 void
  867 pmap_qremove(vm_offset_t va, int count)
  868 {
  869         while (count-- > 0) {
  870                 pt_entry_t *pte = vtopte(va);
  871                 *pte = 0;
  872 #ifdef SMP
  873                 cpu_invlpg((void *)va);
  874 #else
  875                 invltlb_1pg(va);
  876 #endif
  877                 va += PAGE_SIZE;
  878         }
  879 #ifdef SMP
  880         smp_invltlb();
  881 #endif
  882 }
  883 
  884 static vm_page_t
  885 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
  886 {
  887         vm_page_t m;
  888 retry:
  889         m = vm_page_lookup(object, pindex);
  890         if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
  891                 goto retry;
  892         return m;
  893 }
  894 
  895 /*
  896  * Create the UPAGES for a new process.
  897  * This routine directly affects the fork perf for a process.
  898  */
  899 void
  900 pmap_new_proc(struct proc *p)
  901 {
  902         int i;
  903         vm_object_t upobj;
  904         vm_page_t m, ma[UPAGES];
  905         vm_offset_t up;
  906 
  907         /*
  908          * allocate object for the upages
  909          */
  910         if ((upobj = p->p_upages_obj) == NULL) {
  911                 upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
  912                 p->p_upages_obj = upobj;
  913         }
  914 
  915         /* get a kernel virtual address for the UPAGES for this proc */
  916         if ((up = (vm_offset_t) p->p_addr) == 0) {
  917                 up = kmem_alloc_nofault(kernel_map, UPAGES * PAGE_SIZE);
  918                 if (up == 0)
  919                         panic("pmap_new_proc: u_map allocation failed");
  920                 p->p_addr = (struct user *) up;
  921         }
  922 
  923         for(i = 0; i < UPAGES; i++) {
  924                 /*
  925                  * Get a kernel stack page
  926                  */
  927                 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  928                 ma[i] = m;
  929 
  930                 /*
  931                  * Wire the page
  932                  */
  933                 m->wire_count++;
  934                 cnt.v_wire_count++;
  935 
  936                 vm_page_wakeup(m);
  937                 vm_page_flag_clear(m, PG_ZERO);
  938                 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
  939                 m->valid = VM_PAGE_BITS_ALL;
  940         }
  941         pmap_qenter(up, ma, UPAGES);
  942 }
  943 
  944 /*
  945  * Dispose the UPAGES for a process that has exited.
  946  * This routine directly impacts the exit perf of a process.
  947  */
  948 void
  949 pmap_dispose_proc(struct proc *p)
  950 {
  951         int i;
  952         vm_object_t upobj;
  953         vm_page_t m;
  954 
  955         upobj = p->p_upages_obj;
  956         pmap_qremove((vm_offset_t) p->p_addr, UPAGES);
  957 
  958         for(i = 0; i < UPAGES; i++) {
  959                 if ((m = vm_page_lookup(upobj, i)) == NULL)
  960                         panic("pmap_dispose_proc: upage already missing???");
  961 
  962                 vm_page_busy(m);
  963                 vm_page_unwire(m, 0);
  964                 vm_page_free(m);
  965         }
  966 
  967         /*
  968          * If the process got swapped out some of its UPAGES might have gotten
  969          * swapped.  Just get rid of the object to clean up the swap use
  970          * proactively.  NOTE! might block waiting for paging I/O to complete.
  971          */
  972         if (upobj->type == OBJT_SWAP) {
  973                 p->p_upages_obj = NULL;
  974                 vm_object_deallocate(upobj);
  975         }
  976 }
  977 
  978 /*
  979  * Allow the UPAGES for a process to be prejudicially paged out.
  980  */
  981 void
  982 pmap_swapout_proc(struct proc *p)
  983 {
  984         int i;
  985         vm_object_t upobj;
  986         vm_page_t m;
  987 
  988         upobj = p->p_upages_obj;
  989         pmap_qremove((vm_offset_t) p->p_addr, UPAGES);
  990 
  991         /*
  992          * let the upages be paged
  993          */
  994         for(i = 0; i < UPAGES; i++) {
  995                 if ((m = vm_page_lookup(upobj, i)) == NULL)
  996                         panic("pmap_swapout_proc: upage already missing???");
  997                 vm_page_dirty(m);
  998                 vm_page_unwire(m, 0);
  999         }
 1000 }
 1001 
 1002 /*
 1003  * Bring the UPAGES for a specified process back in.
 1004  */
 1005 void
 1006 pmap_swapin_proc(struct proc *p)
 1007 {
 1008         int i, rv;
 1009         vm_object_t upobj;
 1010         vm_page_t m, ma[UPAGES];
 1011 
 1012         upobj = p->p_upages_obj;
 1013 
 1014         for(i = 0; i < UPAGES; i++) {
 1015                 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1016                 ma[i] = m;
 1017 
 1018                 if (m->valid != VM_PAGE_BITS_ALL) {
 1019                         rv = vm_pager_get_pages(upobj, &m, 1, 0);
 1020                         if (rv != VM_PAGER_OK)
 1021                                 panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 1022                         m = vm_page_lookup(upobj, i);
 1023                         m->valid = VM_PAGE_BITS_ALL;
 1024                 }
 1025 
 1026                 vm_page_wire(m);
 1027                 vm_page_wakeup(m);
 1028                 vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 1029         }
 1030 
 1031         pmap_qenter((vm_offset_t) p->p_addr, ma, UPAGES);
 1032 }
 1033 
 1034 /***************************************************
 1035  * Page table page management routines.....
 1036  ***************************************************/
 1037 
 1038 /*
 1039  * This routine unholds page table pages, and if the hold count
 1040  * drops to zero, then it decrements the wire count.
 1041  */
 1042 static int 
 1043 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1044 {
 1045         while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
 1046                 ;
 1047 
 1048         if (m->hold_count == 0) {
 1049                 vm_offset_t pteva;
 1050                 /*
 1051                  * unmap the page table page
 1052                  */
 1053                 pmap->pm_pdir[m->pindex] = 0;
 1054                 --pmap->pm_stats.resident_count;
 1055                 if (pmap_is_current(pmap)) {
 1056                         /*
 1057                          * Do a invltlb to make the invalidated mapping
 1058                          * take effect immediately.
 1059                          */
 1060                         pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
 1061                         pmap_TLB_invalidate(pmap, pteva);
 1062                 }
 1063 
 1064                 if (pmap->pm_ptphint == m)
 1065                         pmap->pm_ptphint = NULL;
 1066 
 1067                 /*
 1068                  * If the page is finally unwired, simply free it.
 1069                  */
 1070                 --m->wire_count;
 1071                 if (m->wire_count == 0) {
 1072                         vm_page_flash(m);
 1073                         vm_page_busy(m);
 1074                         vm_page_free_zero(m);
 1075                         --cnt.v_wire_count;
 1076                 }
 1077                 return 1;
 1078         }
 1079         return 0;
 1080 }
 1081 
 1082 static PMAP_INLINE int
 1083 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 1084 {
 1085         vm_page_unhold(m);
 1086         if (m->hold_count == 0)
 1087                 return _pmap_unwire_pte_hold(pmap, m);
 1088         else
 1089                 return 0;
 1090 }
 1091 
 1092 /*
 1093  * After removing a page table entry, this routine is used to
 1094  * conditionally free the page, and manage the hold/wire counts.
 1095  */
 1096 static int
 1097 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 1098 {
 1099         unsigned ptepindex;
 1100 
 1101         if (va >= UPT_MIN_ADDRESS)
 1102                 return 0;
 1103 
 1104         if (mpte == NULL) {
 1105                 ptepindex = (va >> PDRSHIFT);
 1106                 if (pmap->pm_ptphint &&
 1107                     (pmap->pm_ptphint->pindex == ptepindex)) {
 1108                         mpte = pmap->pm_ptphint;
 1109                 } else {
 1110                         mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 1111                         pmap->pm_ptphint = mpte;
 1112                 }
 1113         }
 1114 
 1115         return pmap_unwire_pte_hold(pmap, mpte);
 1116 }
 1117 
 1118 void
 1119 pmap_pinit0(pmap_t pmap)
 1120 {
 1121         pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD);
 1122         pmap->pm_active = 0;
 1123         pmap->pm_ptphint = NULL;
 1124         TAILQ_INIT(&pmap->pm_pvlist);
 1125         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1126 #ifdef PAE
 1127         pmap->pm_pdpt = IdlePDPT;
 1128 #endif
 1129 }
 1130 
 1131 /*
 1132  * Initialize a preallocated and zeroed pmap structure,
 1133  * such as one in a vmspace structure.
 1134  */
 1135 void
 1136 pmap_pinit(pmap_t pmap)
 1137 {
 1138         vm_page_t m, ma[NPGPTD];
 1139         vm_paddr_t pa;
 1140         int i;
 1141 
 1142         /*
 1143          * No need to allocate page table space yet but we do need a valid
 1144          * page directory table.
 1145          */
 1146         if (pmap->pm_pdir == NULL) {
 1147                 pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
 1148                     NPGPTD * PAGE_SIZE);
 1149 #ifdef PAE
 1150                 pmap->pm_pdpt = pmap_alloc_pdpt();
 1151 #endif
 1152         }
 1153 
 1154         /*
 1155          * allocate object for the ptes
 1156          */
 1157         if (pmap->pm_pteobj == NULL)
 1158                 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT,
 1159                     PTDPTDI + NPGPTD);
 1160 
 1161         /*
 1162          * allocate the page directory page
 1163          */
 1164         for (i = 0; i < NPGPTD; i++) {
 1165                 m = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i,
 1166                     VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 1167                 ma[i] = m;
 1168 
 1169                 m->wire_count = 1;
 1170                 ++cnt.v_wire_count;
 1171 
 1172                 vm_page_flag_clear(m, PG_MAPPED | PG_BUSY);
 1173                 m->valid = VM_PAGE_BITS_ALL;
 1174         }
 1175 
 1176         pmap_qenter((vm_offset_t)pmap->pm_pdir, ma, NPGPTD);
 1177 
 1178         for (i = 0; i < NPGPTD; i++) {
 1179                 if ((ma[i]->flags & PG_ZERO) == 0)
 1180                         bzero(pmap->pm_pdir + i * NPDEPG, PAGE_SIZE);
 1181         }
 1182 
 1183 #ifdef SMP
 1184         pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 1185 #endif
 1186 
 1187         /* install self-referential address mapping entry */
 1188         for (i = 0; i < NPGPTD; i++) {
 1189                 pa = VM_PAGE_TO_PHYS(ma[i]);
 1190                 pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 1191 #ifdef PAE
 1192                 pmap->pm_pdpt[i] = pa | PG_V;
 1193 #endif
 1194         }
 1195 
 1196         pmap->pm_active = 0;
 1197         pmap->pm_ptphint = NULL;
 1198         TAILQ_INIT(&pmap->pm_pvlist);
 1199         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 1200 }
 1201 
 1202 /*
 1203  * Wire in kernel global address entries.  To avoid a race condition
 1204  * between pmap initialization and pmap_growkernel, this procedure
 1205  * should be called after the vmspace is attached to the process
 1206  * but before this pmap is activated.
 1207  */
 1208 void
 1209 pmap_pinit2(pmap_t pmap)
 1210 {
 1211         /* XXX copies current process, does not fill in MPPTDI */
 1212         bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PDESIZE);
 1213 }
 1214 
 1215 /*
 1216  * this routine is called if the page table page is not
 1217  * mapped correctly.
 1218  */
 1219 static vm_page_t
 1220 _pmap_allocpte(pmap_t pmap, unsigned ptepindex)
 1221 {
 1222         vm_paddr_t ptepa;
 1223         vm_page_t m;
 1224 
 1225         /*
 1226          * Find or fabricate a new pagetable page
 1227          */
 1228         m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 1229                         VM_ALLOC_ZERO);
 1230         if (m == NULL) {
 1231                 VM_WAIT;
 1232                 /*
 1233                  * Indicate the need to retry.  While waiting, the page table
 1234                  * page may have been allocated.
 1235                  */
 1236                 return (NULL);
 1237         }
 1238         if ((m->flags & PG_ZERO) == 0)
 1239                 pmap_zero_page(VM_PAGE_TO_PHYS(m));
 1240 
 1241         KASSERT(m->queue == PQ_NONE,
 1242                 ("_pmap_allocpte: %p->queue != PQ_NONE", m));
 1243 
 1244         if (m->wire_count == 0)
 1245                 cnt.v_wire_count++;
 1246         m->wire_count++;
 1247 
 1248         /*
 1249          * Increment the hold count for the page table page
 1250          * (denoting a new mapping.)
 1251          */
 1252         m->hold_count++;
 1253 
 1254         /*
 1255          * Map the pagetable page into the process address space, if
 1256          * it isn't already there.
 1257          */
 1258 
 1259         pmap->pm_stats.resident_count++;
 1260 
 1261         ptepa = VM_PAGE_TO_PHYS(m);
 1262         pmap->pm_pdir[ptepindex] = ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M;
 1263 
 1264         /*
 1265          * Set the page table hint
 1266          */
 1267         pmap->pm_ptphint = m;
 1268 
 1269         m->valid = VM_PAGE_BITS_ALL;
 1270         vm_page_flag_clear(m, PG_ZERO);
 1271         vm_page_flag_set(m, PG_MAPPED);
 1272         vm_page_wakeup(m);
 1273 
 1274         return m;
 1275 }
 1276 
 1277 static vm_page_t
 1278 pmap_allocpte(pmap_t pmap, vm_offset_t va)
 1279 {
 1280         unsigned ptepindex;
 1281         pd_entry_t pde;
 1282         vm_page_t m;
 1283 
 1284         /*
 1285          * Calculate pagetable page index
 1286          */
 1287         ptepindex = va >> PDRSHIFT;
 1288 
 1289 retry:
 1290         /*
 1291          * Get the page directory entry
 1292          */
 1293         pde = pmap->pm_pdir[ptepindex];
 1294 
 1295         /*
 1296          * This supports switching from a 4MB page to a
 1297          * normal 4K page.
 1298          */
 1299         if (pde & PG_PS) {
 1300                 pmap->pm_pdir[ptepindex] = 0;
 1301                 pde = 0;
 1302                 invltlb();
 1303         }
 1304 
 1305         /*
 1306          * If the page table page is mapped, we just increment the
 1307          * hold count, and activate it.
 1308          */
 1309         if (pde & PG_V) {
 1310                 /*
 1311                  * In order to get the page table page, try the
 1312                  * hint first.
 1313                  */
 1314                 if (pmap->pm_ptphint && pmap->pm_ptphint->pindex == ptepindex) {
 1315                         m = pmap->pm_ptphint;
 1316                 } else {
 1317                         m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 1318                         pmap->pm_ptphint = m;
 1319                 }
 1320                 m->hold_count++;
 1321         } else {
 1322                 /*
 1323                  * Here if the pte page isn't mapped, or if it has
 1324                  * been deallocated.
 1325                  */
 1326                 m = _pmap_allocpte(pmap, ptepindex);
 1327                 if (m == NULL)
 1328                         goto retry;
 1329         }
 1330         return (m);
 1331 }
 1332 
 1333 
 1334 /***************************************************
 1335 * Pmap allocation/deallocation routines.
 1336  ***************************************************/
 1337 
 1338 /*
 1339  * Release any resources held by the given physical map.
 1340  * Called when a pmap initialized by pmap_pinit is being released.
 1341  * Should only be called if the map contains no valid mappings.
 1342  */
 1343 void
 1344 pmap_release(pmap_t pmap)
 1345 {
 1346         vm_page_t m;
 1347         vm_object_t object = pmap->pm_pteobj;
 1348 
 1349         bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * PDESIZE);
 1350 #ifdef SMP
 1351         pmap->pm_pdir[MPPTDI] = 0;
 1352 #endif
 1353         pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 1354 
 1355         while ((m = TAILQ_FIRST(&object->memq))) {
 1356                 if (m->pindex < PTDPTDI || m->pindex >= KPTDI)
 1357                         panic("pmap_release: non ptd page");
 1358                 m->wire_count--;
 1359                 cnt.v_wire_count--;
 1360                 vm_page_busy(m);
 1361                 vm_page_free_zero(m);
 1362         }
 1363 }
 1364 
 1365 
 1366 static int
 1367 kvm_size(SYSCTL_HANDLER_ARGS)
 1368 {
 1369         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 1370 
 1371         return sysctl_handle_long(oidp, &ksize, 0, req);
 1372 }
 1373 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1374     0, 0, kvm_size, "IU", "Size of KVM");
 1375 
 1376 static int
 1377 kvm_free(SYSCTL_HANDLER_ARGS)
 1378 {
 1379         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1380 
 1381         return sysctl_handle_long(oidp, &kfree, 0, req);
 1382 }
 1383 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1384     0, 0, kvm_free, "IU", "Amount of KVM free");
 1385 
 1386 /*
 1387  * grow the number of kernel page table entries, if needed
 1388  */
 1389 void
 1390 pmap_growkernel(vm_offset_t addr)
 1391 {
 1392         struct proc *p;
 1393         struct pmap *pmap;
 1394         int s;
 1395         vm_paddr_t ptppaddr;
 1396         vm_page_t nkpg;
 1397         pd_entry_t newpdir;
 1398 
 1399         s = splhigh();
 1400         if (kernel_vm_end == 0) {
 1401                 kernel_vm_end = KERNBASE;
 1402                 nkpt = 0;
 1403                 while (pdir_pde(PTD, kernel_vm_end) & PG_V) {
 1404                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
 1405                             ~(PAGE_SIZE * NPTEPG - 1);
 1406                         nkpt++;
 1407                 }
 1408         }
 1409         addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 1410         while (kernel_vm_end < addr) {
 1411                 if (pdir_pde(PTD, kernel_vm_end) & PG_V) {
 1412                         kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
 1413                             ~(PAGE_SIZE * NPTEPG - 1);
 1414                         continue;
 1415                 }
 1416 
 1417                 /*
 1418                  * This index is bogus, but out of the way
 1419                  */
 1420                 nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
 1421                 if (!nkpg)
 1422                         panic("pmap_growkernel: no memory to grow kernel");
 1423 
 1424                 nkpt++;
 1425 
 1426                 vm_page_wire(nkpg);
 1427                 ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 1428                 pmap_zero_page(ptppaddr);
 1429                 newpdir = ptppaddr | PG_V | PG_RW | PG_A | PG_M;
 1430                 pdir_pde(PTD, kernel_vm_end) = newpdir;
 1431 
 1432                 LIST_FOREACH(p, &allproc, p_list) {
 1433                         if (p->p_vmspace) {
 1434                                 pmap = vmspace_pmap(p->p_vmspace);
 1435                                 *pmap_pde(pmap, kernel_vm_end) = newpdir;
 1436                         }
 1437                 }
 1438                 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
 1439                 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) &
 1440                     ~(PAGE_SIZE * NPTEPG - 1);
 1441         }
 1442         splx(s);
 1443 }
 1444 
 1445 /***************************************************
 1446 * page management routines.
 1447  ***************************************************/
 1448 
 1449 /*
 1450  * free the pv_entry back to the free list
 1451  */
 1452 static PMAP_INLINE void
 1453 free_pv_entry(pv_entry_t pv)
 1454 {
 1455         pv_entry_count--;
 1456         zfreei(pvzone, pv);
 1457 }
 1458 
 1459 /*
 1460  * get a new pv_entry, allocating a block from the system
 1461  * when needed.
 1462  * the memory allocation is performed bypassing the malloc code
 1463  * because of the possibility of allocations at interrupt time.
 1464  */
 1465 static pv_entry_t
 1466 get_pv_entry(void)
 1467 {
 1468         pv_entry_count++;
 1469         if (pv_entry_high_water && (pv_entry_count > pv_entry_high_water) &&
 1470             (pmap_pagedaemon_waken == 0)) {
 1471                 pmap_pagedaemon_waken = 1;
 1472                 wakeup (&vm_pages_needed);
 1473         }
 1474         return zalloci(pvzone);
 1475 }
 1476 
 1477 /*
 1478  * This routine is very drastic, but can save the system
 1479  * in a pinch.
 1480  */
 1481 void
 1482 pmap_collect()
 1483 {
 1484         int i;
 1485         vm_page_t m;
 1486         static int warningdone=0;
 1487 
 1488         if (pmap_pagedaemon_waken == 0)
 1489                 return;
 1490 
 1491         if (warningdone < 5) {
 1492                 printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 1493                 warningdone++;
 1494         }
 1495 
 1496         for(i = 0; i < vm_page_array_size; i++) {
 1497                 m = &vm_page_array[i];
 1498                 if (m->wire_count || m->hold_count || m->busy ||
 1499                     (m->flags & PG_BUSY))
 1500                         continue;
 1501                 pmap_remove_all(m);
 1502         }
 1503         pmap_pagedaemon_waken = 0;
 1504 }
 1505         
 1506 
 1507 /*
 1508  * If it is the first entry on the list, it is actually
 1509  * in the header and we must copy the following entry up
 1510  * to the header.  Otherwise we must search the list for
 1511  * the entry.  In either case we free the now unused entry.
 1512  */
 1513 
 1514 static int
 1515 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 1516 {
 1517         pv_entry_t pv;
 1518         int rtval;
 1519         int s;
 1520 
 1521         s = splvm();
 1522         if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 1523                 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1524                         if (pmap == pv->pv_pmap && va == pv->pv_va) 
 1525                                 break;
 1526                 }
 1527         } else {
 1528                 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 1529                         if (va == pv->pv_va) 
 1530                                 break;
 1531                 }
 1532         }
 1533 
 1534         rtval = 0;
 1535         if (pv) {
 1536                 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 1537                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1538                 m->md.pv_list_count--;
 1539                 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 1540                         vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 1541 
 1542                 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 1543                 free_pv_entry(pv);
 1544         }
 1545                         
 1546         splx(s);
 1547         return rtval;
 1548 }
 1549 
 1550 /*
 1551  * Create a pv entry for page at pa for
 1552  * (pmap, va).
 1553  */
 1554 static void
 1555 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 1556 {
 1557         int s;
 1558         pv_entry_t pv;
 1559 
 1560         s = splvm();
 1561         pv = get_pv_entry();
 1562         pv->pv_va = va;
 1563         pv->pv_pmap = pmap;
 1564         pv->pv_ptem = mpte;
 1565 
 1566         TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 1567         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 1568         m->md.pv_list_count++;
 1569 
 1570         splx(s);
 1571 }
 1572 
 1573 /*
 1574  * pmap_remove_pte: do the things to unmap a page in a process
 1575  */
 1576 static int
 1577 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 1578 {
 1579         pt_entry_t oldpte;
 1580         vm_page_t m;
 1581 
 1582         oldpte = pte_store(ptq, 0);
 1583         if (oldpte & PG_W)
 1584                 pmap->pm_stats.wired_count -= 1;
 1585         /*
 1586          * Machines that don't support invlpg, also don't support
 1587          * PG_G.
 1588          */
 1589         if (oldpte & PG_G)
 1590                 invlpg(va);
 1591         pmap->pm_stats.resident_count -= 1;
 1592         if (oldpte & PG_MANAGED) {
 1593                 m = PHYS_TO_VM_PAGE(oldpte);
 1594                 if (oldpte & PG_M) {
 1595 #if defined(PMAP_DIAGNOSTIC)
 1596                         if (pmap_nw_modified(oldpte)) {
 1597                                 printf(
 1598         "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1599                                     va, oldpte);
 1600                         }
 1601 #endif
 1602                         if (pmap_track_modified(va))
 1603                                 vm_page_dirty(m);
 1604                 }
 1605                 if (oldpte & PG_A)
 1606                         vm_page_flag_set(m, PG_REFERENCED);
 1607                 return pmap_remove_entry(pmap, m, va);
 1608         } else {
 1609                 return pmap_unuse_pt(pmap, va, NULL);
 1610         }
 1611 
 1612         return 0;
 1613 }
 1614 
 1615 /*
 1616  * Remove a single page from a process address space
 1617  */
 1618 static void
 1619 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 1620 {
 1621         pt_entry_t *pte;
 1622 
 1623         /*
 1624          * get a local va for mappings for this pmap.
 1625          */
 1626         pte = pmap_pte(pmap, va);
 1627         if (!pte)
 1628                 return;
 1629         if (*pte & PG_V) {
 1630                 (void) pmap_remove_pte(pmap, pte, va);
 1631                 pmap_TLB_invalidate(pmap, va);
 1632         }
 1633         return;
 1634 }
 1635 
 1636 /*
 1637  *      Remove the given range of addresses from the specified map.
 1638  *
 1639  *      It is assumed that the start and end are properly
 1640  *      rounded to the page size.
 1641  */
 1642 void
 1643 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 1644 {
 1645         pt_entry_t *pte;
 1646         pd_entry_t pde;
 1647         vm_offset_t nva;
 1648         int anyvalid;
 1649 
 1650         if (pmap == NULL)
 1651                 return;
 1652 
 1653         if (pmap->pm_stats.resident_count == 0)
 1654                 return;
 1655 
 1656         /*
 1657          * special handling of removing one page.  a very
 1658          * common operation and easy to short circuit some
 1659          * code.
 1660          */
 1661         if (sva + PAGE_SIZE == eva && 
 1662             (pmap->pm_pdir[sva >> PDRSHIFT] & PG_PS) == 0) {
 1663                 pmap_remove_page(pmap, sva);
 1664                 return;
 1665         }
 1666 
 1667         anyvalid = 0;
 1668 
 1669         /*
 1670          * Get a local virtual address for the mappings that are being
 1671          * worked with.
 1672          */
 1673 
 1674         for (; sva < eva; sva = nva) {
 1675                 unsigned pdirindex;
 1676 
 1677                 /*
 1678                  * Calculate address for next page table.
 1679                  */
 1680                 nva = (sva + NBPDR) & ~PDRMASK;
 1681 
 1682                 if (pmap->pm_stats.resident_count == 0)
 1683                         break;
 1684 
 1685                 pdirindex = sva >> PDRSHIFT;
 1686                 if (((pde = pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 1687                         pmap->pm_pdir[pdirindex] = 0;
 1688                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1689                         anyvalid++;
 1690                         continue;
 1691                 }
 1692 
 1693                 /*
 1694                  * Weed out invalid mappings. Note: we assume that the page
 1695                  * directory table is always allocated, and in kernel virtual.
 1696                  */
 1697                 if ((pde & PG_V) == 0)
 1698                         continue;
 1699 
 1700                 /*
 1701                  * Limit our scan to either the end of the va represented
 1702                  * by the current page table page, or to the end of the
 1703                  * range being removed.
 1704                  */
 1705                 if (nva > eva)
 1706                         nva = eva;
 1707 
 1708                 pte = pmap_pte(pmap, sva);
 1709                 for (; sva < nva; sva += PAGE_SIZE, pte++) {
 1710                         if ((*pte & PG_V) == 0)
 1711                                 continue;
 1712                         
 1713                         anyvalid++;
 1714                         if (pmap_remove_pte(pmap, pte, sva))
 1715                                 break;
 1716                 }
 1717         }
 1718 
 1719         if (anyvalid)
 1720                 pmap_TLB_invalidate_all(pmap);
 1721 }
 1722 
 1723 /*
 1724  *      Routine:        pmap_remove_all
 1725  *      Function:
 1726  *              Removes this physical page from
 1727  *              all physical maps in which it resides.
 1728  *              Reflects back modify bits to the pager.
 1729  *
 1730  *      Notes:
 1731  *              Original versions of this routine were very
 1732  *              inefficient because they iteratively called
 1733  *              pmap_remove (slow...)
 1734  */
 1735 
 1736 static void
 1737 pmap_remove_all(vm_page_t m)
 1738 {
 1739         pv_entry_t pv;
 1740         pt_entry_t *pte, tpte;
 1741         int s;
 1742 
 1743 #if defined(PMAP_DIAGNOSTIC)
 1744         /*
 1745          * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 1746          * pages!
 1747          */
 1748         if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 1749                 panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
 1750         }
 1751 #endif
 1752 
 1753         s = splvm();
 1754         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 1755                 pv->pv_pmap->pm_stats.resident_count--;
 1756 
 1757                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 1758 
 1759                 tpte = pte_store(pte, 0);
 1760                 if (tpte & PG_W)
 1761                         pv->pv_pmap->pm_stats.wired_count--;
 1762 
 1763                 if (tpte & PG_A)
 1764                         vm_page_flag_set(m, PG_REFERENCED);
 1765 
 1766                 /*
 1767                  * Update the vm_page_t clean and reference bits.
 1768                  */
 1769                 if (tpte & PG_M) {
 1770 #if defined(PMAP_DIAGNOSTIC)
 1771                         if (pmap_nw_modified(tpte)) {
 1772                                 printf(
 1773         "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1774                                     pv->pv_va, tpte);
 1775                         }
 1776 #endif
 1777                         if (pmap_track_modified(pv->pv_va))
 1778                                 vm_page_dirty(m);
 1779                 }
 1780                 pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 1781 
 1782                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 1783                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 1784                 m->md.pv_list_count--;
 1785                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 1786                 free_pv_entry(pv);
 1787         }
 1788 
 1789         vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 1790 
 1791         splx(s);
 1792 }
 1793 
 1794 /*
 1795  *      Set the physical protection on the
 1796  *      specified range of this map as requested.
 1797  */
 1798 void
 1799 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 1800 {
 1801         pt_entry_t *pte;
 1802         pd_entry_t pde;
 1803         vm_offset_t nva;
 1804         int anychanged;
 1805 
 1806         if (pmap == NULL)
 1807                 return;
 1808 
 1809         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 1810                 pmap_remove(pmap, sva, eva);
 1811                 return;
 1812         }
 1813 
 1814         if (prot & VM_PROT_WRITE)
 1815                 return;
 1816 
 1817         anychanged = 0;
 1818 
 1819         for (; sva < eva; sva = nva) {
 1820 
 1821                 unsigned pdirindex;
 1822 
 1823                 nva = (sva + NBPDR) & ~PDRMASK;
 1824 
 1825                 pdirindex = sva >> PDRSHIFT;
 1826                 if (((pde = pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 1827                         pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 1828                         pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 1829                         anychanged++;
 1830                         continue;
 1831                 }
 1832 
 1833                 /*
 1834                  * Weed out invalid mappings. Note: we assume that the page
 1835                  * directory table is always allocated, and in kernel virtual.
 1836                  */
 1837                 if ((pde & PG_V) == 0)
 1838                         continue;
 1839 
 1840                 if (nva > eva)
 1841                         nva = eva;
 1842 
 1843                 pte = pmap_pte(pmap, sva);
 1844                 for (; sva < nva; sva += PAGE_SIZE, pte++) {
 1845                         pt_entry_t pbits;
 1846                         vm_page_t m;
 1847 
 1848                         pbits = *pte;
 1849 
 1850                         if (pbits & PG_MANAGED) {
 1851                                 m = NULL;
 1852                                 if (pbits & PG_A) {
 1853                                         m = PHYS_TO_VM_PAGE(pbits);
 1854                                         vm_page_flag_set(m, PG_REFERENCED);
 1855                                         pbits &= ~PG_A;
 1856                                 }
 1857                                 if (pbits & PG_M) {
 1858                                         if (pmap_track_modified(sva)) {
 1859                                                 if (m == NULL)
 1860                                                         m = PHYS_TO_VM_PAGE(pbits);
 1861                                                 vm_page_dirty(m);
 1862                                                 pbits &= ~PG_M;
 1863                                         }
 1864                                 }
 1865                         }
 1866 
 1867                         pbits &= ~PG_RW;
 1868 
 1869                         if (pbits != *pte) {
 1870                                 *pte = pbits;
 1871                                 anychanged = 1;
 1872                         }
 1873                 }
 1874         }
 1875         if (anychanged)
 1876                 pmap_TLB_invalidate_all(pmap);
 1877 }
 1878 
 1879 /*
 1880  *      Insert the given physical page (p) at
 1881  *      the specified virtual address (v) in the
 1882  *      target physical map with the protection requested.
 1883  *
 1884  *      If specified, the page will be wired down, meaning
 1885  *      that the related pte can not be reclaimed.
 1886  *
 1887  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 1888  *      or lose information.  That is, this routine must actually
 1889  *      insert this page into the given map NOW.
 1890  */
 1891 void
 1892 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 1893            boolean_t wired)
 1894 {
 1895         vm_paddr_t pa, opa;
 1896         pt_entry_t *pte, origpte, newpte;
 1897         vm_page_t mpte;
 1898 
 1899         if (pmap == NULL)
 1900                 return;
 1901 
 1902         va &= PG_FRAME;
 1903 #ifdef PMAP_DIAGNOSTIC
 1904         if (va > VM_MAX_KERNEL_ADDRESS)
 1905                 panic("pmap_enter: toobig");
 1906         if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 1907                 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 1908 #endif
 1909 
 1910         mpte = NULL;
 1911         /*
 1912          * In the case that a page table page is not
 1913          * resident, we are creating it here.
 1914          */
 1915         if (va < UPT_MIN_ADDRESS) {
 1916                 mpte = pmap_allocpte(pmap, va);
 1917         }
 1918 #if 0 && defined(PMAP_DIAGNOSTIC)
 1919         else {
 1920                 pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 1921                 if (((origpte = *pdeaddr) & PG_V) == 0) { 
 1922                         panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
 1923                                 pmap->pm_pdir[PTDPTDI], origpte, va);
 1924                 }
 1925                 if (smp_active) {
 1926                         pdeaddr = IdlePTDS[cpuid];
 1927                         if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
 1928                                 if (my_idlePTD != vtophys(pdeaddr))
 1929                                         printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
 1930                                 printf("cpuid: %d, pdeaddr: 0x%x\n", cpuid, pdeaddr);
 1931                                 panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
 1932                                         pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
 1933                         }
 1934                 }
 1935         }
 1936 #endif
 1937 
 1938         pte = pmap_pte(pmap, va);
 1939 
 1940         /*
 1941          * Page Directory table entry not valid, we need a new PT page
 1942          */
 1943         if (pte == NULL) {
 1944                 panic("pmap_enter: invalid page directory pdir=%#llx, va=%#x\n",
 1945                         (u_int64_t)pmap->pm_pdir[PTDPTDI], va);
 1946         }
 1947 
 1948         pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 1949         origpte = *pte;
 1950         opa = origpte & PG_FRAME;
 1951 
 1952         if (origpte & PG_PS)
 1953                 panic("pmap_enter: attempted pmap_enter on 4MB page");
 1954 
 1955         /*
 1956          * Mapping has not changed, must be protection or wiring change.
 1957          */
 1958         if ((origpte & PG_V) && (opa == pa)) {
 1959                 /*
 1960                  * Wiring change, just update stats. We don't worry about
 1961                  * wiring PT pages as they remain resident as long as there
 1962                  * are valid mappings in them. Hence, if a user page is wired,
 1963                  * the PT page will be also.
 1964                  */
 1965                 if (wired && ((origpte & PG_W) == 0))
 1966                         pmap->pm_stats.wired_count++;
 1967                 else if (!wired && (origpte & PG_W))
 1968                         pmap->pm_stats.wired_count--;
 1969 
 1970 #if defined(PMAP_DIAGNOSTIC)
 1971                 if (pmap_nw_modified((pt_entry_t) origpte)) {
 1972                         printf(
 1973         "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 1974                             va, origpte);
 1975                 }
 1976 #endif
 1977 
 1978                 /*
 1979                  * Remove extra pte reference
 1980                  */
 1981                 if (mpte)
 1982                         mpte->hold_count--;
 1983 
 1984                 if ((prot & VM_PROT_WRITE)) {
 1985                         if ((origpte & PG_RW) == 0) {
 1986                                 *pte |= PG_RW;
 1987                                 pmap_TLB_invalidate(pmap, va);
 1988                         }
 1989                         return;
 1990                 }
 1991 
 1992                 /*
 1993                  * We might be turning off write access to the page,
 1994                  * so we go ahead and sense modify status.
 1995                  */
 1996                 if (origpte & PG_MANAGED) {
 1997                         if ((origpte & PG_M) && pmap_track_modified(va)) {
 1998                                 vm_page_t om;
 1999                                 om = PHYS_TO_VM_PAGE(opa);
 2000                                 vm_page_dirty(om);
 2001                         }
 2002                         pa |= PG_MANAGED;
 2003                 }
 2004                 goto validate;
 2005         }
 2006         /*
 2007          * Mapping has changed, invalidate old range and fall through to
 2008          * handle validating new mapping.
 2009          */
 2010         if ((origpte & PG_V)) {
 2011                 int err;
 2012                 err = pmap_remove_pte(pmap, pte, va);
 2013                 if (err)
 2014                         panic("pmap_enter: pte vanished, va: 0x%x", va);
 2015         }
 2016 
 2017         /*
 2018          * Enter on the PV list if part of our managed memory. Note that we
 2019          * raise IPL while manipulating pv_table since pmap_enter can be
 2020          * called at interrupt time.
 2021          */
 2022         if (pmap_initialized && !(m->flags & (PG_FICTITIOUS|PG_UNMANAGED))) {
 2023                 pmap_insert_entry(pmap, va, mpte, m);
 2024                 pa |= PG_MANAGED;
 2025         }
 2026 
 2027         /*
 2028          * Increment counters
 2029          */
 2030         pmap->pm_stats.resident_count++;
 2031         if (wired)
 2032                 pmap->pm_stats.wired_count++;
 2033 
 2034 validate:
 2035         /*
 2036          * Now validate mapping with desired protection/wiring.
 2037          */
 2038         newpte = pa | pte_prot(pmap, prot) | PG_V;
 2039 
 2040         if (wired)
 2041                 newpte |= PG_W;
 2042         if (va < UPT_MIN_ADDRESS)
 2043                 newpte |= PG_U;
 2044         if (pmap == kernel_pmap)
 2045                 newpte |= pgeflag;
 2046 
 2047         /*
 2048          * if the mapping or permission bits are different, we need
 2049          * to update the pte.
 2050          */
 2051         if ((origpte & ~(PG_M|PG_A)) != newpte) {
 2052                 *pte = newpte | PG_A;
 2053                 pmap_TLB_invalidate(pmap, va);
 2054         }
 2055 }
 2056 
 2057 /*
 2058  * this code makes some *MAJOR* assumptions:
 2059  * 1. Current pmap & pmap exists.
 2060  * 2. Not wired.
 2061  * 3. Read access.
 2062  * 4. No page table pages.
 2063  * 5. Tlbflush is deferred to calling procedure.
 2064  * 6. Page IS managed.
 2065  * but is *MUCH* faster than pmap_enter...
 2066  */
 2067 
 2068 static vm_page_t
 2069 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 2070 {
 2071         pt_entry_t *pte;
 2072         vm_paddr_t pa;
 2073 
 2074         /*
 2075          * In the case that a page table page is not
 2076          * resident, we are creating it here.
 2077          */
 2078         if (va < UPT_MIN_ADDRESS) {
 2079                 unsigned ptepindex;
 2080                 pd_entry_t pde;
 2081 
 2082                 /*
 2083                  * Calculate pagetable page index
 2084                  */
 2085                 ptepindex = va >> PDRSHIFT;
 2086                 if (mpte && (mpte->pindex == ptepindex)) {
 2087                         mpte->hold_count++;
 2088                 } else {
 2089 retry:
 2090                         /*
 2091                          * Get the page directory entry
 2092                          */
 2093                         pde = pmap->pm_pdir[ptepindex];
 2094 
 2095                         /*
 2096                          * If the page table page is mapped, we just increment
 2097                          * the hold count, and activate it.
 2098                          */
 2099                         if ((pde & PG_V)) {
 2100                                 if (pde & PG_PS)
 2101                                         panic("pmap_enter_quick: unexpected mapping into 4MB page");
 2102                                 if (pmap->pm_ptphint &&
 2103                                     (pmap->pm_ptphint->pindex == ptepindex)) {
 2104                                         mpte = pmap->pm_ptphint;
 2105                                 } else {
 2106                                         mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 2107                                         pmap->pm_ptphint = mpte;
 2108                                 }
 2109                                 if (mpte == NULL)
 2110                                         goto retry;
 2111                                 mpte->hold_count++;
 2112                         } else {
 2113                                 mpte = _pmap_allocpte(pmap, ptepindex);
 2114                                 if (mpte == NULL)
 2115                                         goto retry;
 2116                         }
 2117                 }
 2118         } else {
 2119                 mpte = NULL;
 2120         }
 2121 
 2122         /*
 2123          * This call to vtopte makes the assumption that we are
 2124          * entering the page into the current pmap.  In order to support
 2125          * quick entry into any pmap, one would likely use pmap_pte.
 2126          * But that isn't as quick as vtopte.
 2127          */
 2128         pte = vtopte(va);
 2129         if (*pte) {
 2130                 if (mpte)
 2131                         pmap_unwire_pte_hold(pmap, mpte);
 2132                 return 0;
 2133         }
 2134 
 2135         /*
 2136          * Enter on the PV list if part of our managed memory. Note that we
 2137          * raise IPL while manipulating pv_table since pmap_enter can be
 2138          * called at interrupt time.
 2139          */
 2140         if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 2141                 pmap_insert_entry(pmap, va, mpte, m);
 2142 
 2143         /*
 2144          * Increment counters
 2145          */
 2146         pmap->pm_stats.resident_count++;
 2147 
 2148         pa = VM_PAGE_TO_PHYS(m);
 2149 
 2150         /*
 2151          * Now validate mapping with RO protection
 2152          */
 2153         if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 2154                 *pte = pa | PG_V | PG_U;
 2155         else
 2156                 *pte = pa | PG_V | PG_U | PG_MANAGED;
 2157 
 2158         return mpte;
 2159 }
 2160 
 2161 /*
 2162  * Make a temporary mapping for a physical address.  This is only intended
 2163  * to be used for panic dumps.
 2164  */
 2165 void *
 2166 pmap_kenter_temporary(vm_paddr_t pa, int i)
 2167 {
 2168         pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 2169         return ((void *)crashdumpmap);
 2170 }
 2171 
 2172 #define MAX_INIT_PT (96)
 2173 /*
 2174  * pmap_object_init_pt preloads the ptes for a given object
 2175  * into the specified pmap.  This eliminates the blast of soft
 2176  * faults on process startup and immediately after an mmap.
 2177  */
 2178 void
 2179 pmap_object_init_pt(pmap, addr, prot, object, pindex, size, limit)
 2180         pmap_t pmap;
 2181         vm_offset_t addr;
 2182         vm_prot_t prot;
 2183         vm_object_t object;
 2184         vm_pindex_t pindex;
 2185         vm_size_t size;
 2186         int limit;
 2187 {
 2188         vm_offset_t tmpidx;
 2189         int psize;
 2190         vm_page_t p, mpte;
 2191         int objpgs;
 2192 
 2193         if ((prot & VM_PROT_READ) == 0 || pmap == NULL || object == NULL)
 2194                 return;
 2195 
 2196         /*
 2197          * This code maps large physical mmap regions into the
 2198          * processor address space.  Note that some shortcuts
 2199          * are taken, but the code works.
 2200          */
 2201         if (pseflag &&
 2202             (object->type == OBJT_DEVICE) &&
 2203             ((addr & (NBPDR - 1)) == 0) &&
 2204             ((size & (NBPDR - 1)) == 0) ) {
 2205                 int i;
 2206                 vm_page_t m[1];
 2207                 unsigned int ptepindex;
 2208                 int npdes;
 2209                 pd_entry_t ptepa;
 2210 
 2211                 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)] & PG_V)
 2212                         return;
 2213 
 2214 retry:
 2215                 p = vm_page_lookup(object, pindex);
 2216                 if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 2217                         goto retry;
 2218 
 2219                 if (p == NULL) {
 2220                         p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 2221                         if (p == NULL)
 2222                                 return;
 2223                         m[0] = p;
 2224 
 2225                         if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 2226                                 vm_page_free(p);
 2227                                 return;
 2228                         }
 2229 
 2230                         p = vm_page_lookup(object, pindex);
 2231                         vm_page_wakeup(p);
 2232                 }
 2233 
 2234                 ptepa = VM_PAGE_TO_PHYS(p);
 2235                 if (ptepa & (NBPDR - 1)) {
 2236                         return;
 2237                 }
 2238 
 2239                 p->valid = VM_PAGE_BITS_ALL;
 2240 
 2241                 pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 2242                 npdes = size >> PDRSHIFT;
 2243                 for(i = 0; i < npdes; i++) {
 2244                         pmap->pm_pdir[ptepindex] =
 2245                                 ptepa | PG_U | PG_RW | PG_V | PG_PS;
 2246                         ptepa += NBPDR;
 2247                         ptepindex += 1;
 2248                 }
 2249                 vm_page_flag_set(p, PG_MAPPED);
 2250                 invltlb();
 2251                 return;
 2252         }
 2253 
 2254         psize = i386_btop(size);
 2255 
 2256         if ((object->type != OBJT_VNODE) ||
 2257                 ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
 2258                         (object->resident_page_count > MAX_INIT_PT))) {
 2259                 return;
 2260         }
 2261 
 2262         if (psize + pindex > object->size) {
 2263                 if (object->size < pindex)
 2264                         return;           
 2265                 psize = object->size - pindex;
 2266         }
 2267 
 2268         mpte = NULL;
 2269         /*
 2270          * if we are processing a major portion of the object, then scan the
 2271          * entire thing.
 2272          */
 2273         if (psize > (object->resident_page_count >> 2)) {
 2274                 objpgs = psize;
 2275 
 2276                 for (p = TAILQ_FIRST(&object->memq);
 2277                     ((objpgs > 0) && (p != NULL));
 2278                     p = TAILQ_NEXT(p, listq)) {
 2279 
 2280                         tmpidx = p->pindex;
 2281                         if (tmpidx < pindex) {
 2282                                 continue;
 2283                         }
 2284                         tmpidx -= pindex;
 2285                         if (tmpidx >= psize) {
 2286                                 continue;
 2287                         }
 2288                         /*
 2289                          * don't allow an madvise to blow away our really
 2290                          * free pages allocating pv entries.
 2291                          */
 2292                         if ((limit & MAP_PREFAULT_MADVISE) &&
 2293                             cnt.v_free_count < cnt.v_free_reserved) {
 2294                                 break;
 2295                         }
 2296                         if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
 2297                                 (p->busy == 0) &&
 2298                             (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2299                                 if ((p->queue - p->pc) == PQ_CACHE)
 2300                                         vm_page_deactivate(p);
 2301                                 vm_page_busy(p);
 2302                                 mpte = pmap_enter_quick(pmap, 
 2303                                         addr + i386_ptob(tmpidx), p, mpte);
 2304                                 vm_page_flag_set(p, PG_MAPPED);
 2305                                 vm_page_wakeup(p);
 2306                         }
 2307                         objpgs -= 1;
 2308                 }
 2309         } else {
 2310                 /*
 2311                  * else lookup the pages one-by-one.
 2312                  */
 2313                 for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 2314                         /*
 2315                          * don't allow an madvise to blow away our really
 2316                          * free pages allocating pv entries.
 2317                          */
 2318                         if ((limit & MAP_PREFAULT_MADVISE) &&
 2319                             cnt.v_free_count < cnt.v_free_reserved) {
 2320                                 break;
 2321                         }
 2322                         p = vm_page_lookup(object, tmpidx + pindex);
 2323                         if (p &&
 2324                             (p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
 2325                                 (p->busy == 0) &&
 2326                             (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2327                                 if ((p->queue - p->pc) == PQ_CACHE)
 2328                                         vm_page_deactivate(p);
 2329                                 vm_page_busy(p);
 2330                                 mpte = pmap_enter_quick(pmap, 
 2331                                         addr + i386_ptob(tmpidx), p, mpte);
 2332                                 vm_page_flag_set(p, PG_MAPPED);
 2333                                 vm_page_wakeup(p);
 2334                         }
 2335                 }
 2336         }
 2337 }
 2338 
 2339 /*
 2340  * pmap_prefault provides a quick way of clustering
 2341  * pagefaults into a processes address space.  It is a "cousin"
 2342  * of pmap_object_init_pt, except it runs at page fault time instead
 2343  * of mmap time.
 2344  */
 2345 #define PFBAK 4
 2346 #define PFFOR 4
 2347 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 2348 
 2349 static int pmap_prefault_pageorder[] = {
 2350         -PAGE_SIZE, PAGE_SIZE,
 2351         -2 * PAGE_SIZE, 2 * PAGE_SIZE,
 2352         -3 * PAGE_SIZE, 3 * PAGE_SIZE,
 2353         -4 * PAGE_SIZE, 4 * PAGE_SIZE
 2354 };
 2355 
 2356 void
 2357 pmap_prefault(pmap, addra, entry)
 2358         pmap_t pmap;
 2359         vm_offset_t addra;
 2360         vm_map_entry_t entry;
 2361 {
 2362         int i;
 2363         vm_offset_t starta;
 2364         vm_offset_t addr;
 2365         vm_pindex_t pindex;
 2366         vm_page_t m, mpte;
 2367         vm_object_t object;
 2368 
 2369         if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 2370                 return;
 2371 
 2372         object = entry->object.vm_object;
 2373 
 2374         starta = addra - PFBAK * PAGE_SIZE;
 2375         if (starta < entry->start) {
 2376                 starta = entry->start;
 2377         } else if (starta > addra) {
 2378                 starta = 0;
 2379         }
 2380 
 2381         mpte = NULL;
 2382         for (i = 0; i < PAGEORDER_SIZE; i++) {
 2383                 vm_object_t lobject;
 2384                 pt_entry_t *pte;
 2385 
 2386                 addr = addra + pmap_prefault_pageorder[i];
 2387                 if (addr > addra + (PFFOR * PAGE_SIZE))
 2388                         addr = 0;
 2389 
 2390                 if (addr < starta || addr >= entry->end)
 2391                         continue;
 2392 
 2393                 if ((*pmap_pde(pmap, addr) & PG_V) == 0) 
 2394                         continue;
 2395 
 2396                 pte = vtopte(addr);
 2397                 if ((*pte & PG_V))
 2398                         continue;
 2399 
 2400                 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 2401                 lobject = object;
 2402                 for (m = vm_page_lookup(lobject, pindex);
 2403                     (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 2404                     lobject = lobject->backing_object) {
 2405                         if (lobject->backing_object_offset & PAGE_MASK)
 2406                                 break;
 2407                         pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 2408                         m = vm_page_lookup(lobject->backing_object, pindex);
 2409                 }
 2410 
 2411                 /*
 2412                  * give-up when a page is not in memory
 2413                  */
 2414                 if (m == NULL)
 2415                         break;
 2416 
 2417                 if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
 2418                         (m->busy == 0) &&
 2419                     (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 2420 
 2421                         if ((m->queue - m->pc) == PQ_CACHE) {
 2422                                 vm_page_deactivate(m);
 2423                         }
 2424                         vm_page_busy(m);
 2425                         mpte = pmap_enter_quick(pmap, addr, m, mpte);
 2426                         vm_page_flag_set(m, PG_MAPPED);
 2427                         vm_page_wakeup(m);
 2428                 }
 2429         }
 2430 }
 2431 
 2432 /*
 2433  *      Routine:        pmap_change_wiring
 2434  *      Function:       Change the wiring attribute for a map/virtual-address
 2435  *                      pair.
 2436  *      In/out conditions:
 2437  *                      The mapping must already exist in the pmap.
 2438  */
 2439 void
 2440 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 2441 {
 2442         pt_entry_t *pte;
 2443 
 2444         if (pmap == NULL)
 2445                 return;
 2446 
 2447         pte = pmap_pte(pmap, va);
 2448 
 2449         if (wired && !pmap_pte_w(pte))
 2450                 pmap->pm_stats.wired_count++;
 2451         else if (!wired && pmap_pte_w(pte))
 2452                 pmap->pm_stats.wired_count--;
 2453 
 2454         /*
 2455          * Wiring is not a hardware characteristic so there is no need to
 2456          * invalidate TLB.
 2457          */
 2458         pmap_pte_set_w(pte, wired);
 2459 }
 2460 
 2461 
 2462 
 2463 /*
 2464  *      Copy the range specified by src_addr/len
 2465  *      from the source map to the range dst_addr/len
 2466  *      in the destination map.
 2467  *
 2468  *      This routine is only advisory and need not do anything.
 2469  */
 2470 
 2471 void
 2472 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 2473         pmap_t dst_pmap, src_pmap;
 2474         vm_offset_t dst_addr;
 2475         vm_size_t len;
 2476         vm_offset_t src_addr;
 2477 {
 2478         vm_offset_t addr;
 2479         vm_offset_t end_addr = src_addr + len;
 2480         vm_offset_t pdnxt;
 2481         vm_paddr_t src_frame;
 2482         vm_page_t m;
 2483 
 2484         if (dst_addr != src_addr)
 2485                 return;
 2486 
 2487         src_frame = src_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 2488         if (src_frame != (PTDpde[0] & PG_FRAME)) {
 2489                 return;
 2490         }
 2491 
 2492         for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 2493                 pt_entry_t *src_pte, *dst_pte;
 2494                 vm_page_t dstmpte, srcmpte;
 2495                 pd_entry_t srcptepaddr;
 2496                 unsigned ptepindex;
 2497 
 2498                 if (addr >= UPT_MIN_ADDRESS)
 2499                         panic("pmap_copy: invalid to pmap_copy page tables\n");
 2500 
 2501                 /*
 2502                  * Don't let optional prefaulting of pages make us go
 2503                  * way below the low water mark of free pages or way
 2504                  * above high water mark of used pv entries.
 2505                  */
 2506                 if (cnt.v_free_count < cnt.v_free_reserved ||
 2507                     pv_entry_count > pv_entry_high_water)
 2508                         break;
 2509                 
 2510                 pdnxt = (addr + NBPDR) & ~(NBPDR - 1);
 2511                 ptepindex = addr >> PDRSHIFT;
 2512 
 2513                 srcptepaddr = src_pmap->pm_pdir[ptepindex];
 2514                 if (srcptepaddr == 0)
 2515                         continue;
 2516                         
 2517                 if (srcptepaddr & PG_PS) {
 2518                         if (dst_pmap->pm_pdir[ptepindex] == 0) {
 2519                                 dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 2520                                 dst_pmap->pm_stats.resident_count += NPDEPG;
 2521                         }
 2522                         continue;
 2523                 }
 2524 
 2525                 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 2526                 if ((srcmpte == NULL) ||
 2527                     (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 2528                         continue;
 2529 
 2530                 if (pdnxt > end_addr)
 2531                         pdnxt = end_addr;
 2532 
 2533                 src_pte = vtopte(addr);
 2534                 while (addr < pdnxt) {
 2535                         pt_entry_t ptetemp;
 2536                         ptetemp = *src_pte;
 2537                         /*
 2538                          * we only virtual copy managed pages
 2539                          */
 2540                         if ((ptetemp & PG_MANAGED) != 0) {
 2541                                 /*
 2542                                  * We have to check after allocpte for the
 2543                                  * pte still being around...  allocpte can
 2544                                  * block.
 2545                                  */
 2546                                 dstmpte = pmap_allocpte(dst_pmap, addr);
 2547                                 dst_pte = pmap_pte(dst_pmap, addr);
 2548                                 if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 2549                                         /*
 2550                                          * Clear the modified and
 2551                                          * accessed (referenced) bits
 2552                                          * during the copy.
 2553                                          */
 2554                                         m = PHYS_TO_VM_PAGE(ptetemp);
 2555                                         *dst_pte = ptetemp & ~(PG_M | PG_A);
 2556                                         dst_pmap->pm_stats.resident_count++;
 2557                                         pmap_insert_entry(dst_pmap, addr,
 2558                                                 dstmpte, m);
 2559                                 } else {
 2560                                         pmap_unwire_pte_hold(dst_pmap, dstmpte);
 2561                                 }
 2562                                 if (dstmpte->hold_count >= srcmpte->hold_count)
 2563                                         break;
 2564                         }
 2565                         addr += PAGE_SIZE;
 2566                         src_pte++;
 2567                 }
 2568         }
 2569 }
 2570 
 2571 /*
 2572  *      Routine:        pmap_kernel
 2573  *      Function:
 2574  *              Returns the physical map handle for the kernel.
 2575  */
 2576 pmap_t
 2577 pmap_kernel()
 2578 {
 2579         return (kernel_pmap);
 2580 }
 2581 
 2582 /*
 2583  *      pmap_zero_page zeros the specified hardware page by mapping 
 2584  *      the page into KVM and using bzero to clear its contents.
 2585  */
 2586 void
 2587 pmap_zero_page(vm_paddr_t phys)
 2588 {
 2589         if (*CMAP3)
 2590                 panic("pmap_zero_page: CMAP3 busy");
 2591 
 2592         *CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 2593 #ifdef SMP
 2594         cpu_invlpg(CADDR3);
 2595 #else
 2596         invltlb_1pg((vm_offset_t)CADDR3);
 2597 #endif
 2598 
 2599 #if defined(I686_CPU)
 2600         if (cpu_class == CPUCLASS_686)
 2601                 i686_pagezero(CADDR3);
 2602         else
 2603 #endif
 2604                 bzero(CADDR3, PAGE_SIZE);
 2605         *CMAP3 = 0;
 2606 }
 2607 
 2608 /*
 2609  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 2610  *      the page into KVM and using bzero to clear its contents.
 2611  *
 2612  *      off and size may not cover an area beyond a single hardware page.
 2613  */
 2614 void
 2615 pmap_zero_page_area(vm_paddr_t phys, int off, int size)
 2616 {
 2617         if (*CMAP3)
 2618                 panic("pmap_zero_page: CMAP3 busy");
 2619 
 2620         *CMAP3 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 2621 #ifdef SMP
 2622         cpu_invlpg(CADDR3);
 2623 #else
 2624         invltlb_1pg((vm_offset_t)CADDR3);
 2625 #endif
 2626 
 2627 #if defined(I686_CPU)
 2628         if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 2629                 i686_pagezero(CADDR3);
 2630         else
 2631 #endif
 2632                 bzero(CADDR3 + off, size);
 2633         *CMAP3 = 0;
 2634 }
 2635 
 2636 /*
 2637  *      pmap_copy_page copies the specified (machine independent)
 2638  *      page by mapping the page into virtual memory and using
 2639  *      bcopy to copy the page, one machine dependent page at a
 2640  *      time.
 2641  */
 2642 void
 2643 pmap_copy_page(vm_paddr_t src, vm_paddr_t dst)
 2644 {
 2645         if (*CMAP1)
 2646                 panic("pmap_copy_page: CMAP1 busy");
 2647         if (*CMAP2)
 2648                 panic("pmap_copy_page: CMAP2 busy");
 2649 
 2650         *CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 2651         *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 2652 
 2653 #ifdef SMP
 2654         cpu_invlpg(CADDR1);
 2655         cpu_invlpg(CADDR2);
 2656 #else
 2657         invltlb_1pg((vm_offset_t)CADDR1);
 2658         invltlb_1pg((vm_offset_t)CADDR2);
 2659 #endif
 2660 
 2661         bcopy(CADDR1, CADDR2, PAGE_SIZE);
 2662 
 2663         *CMAP1 = 0;
 2664         *CMAP2 = 0;
 2665 }
 2666 
 2667 
 2668 /*
 2669  *      Routine:        pmap_pageable
 2670  *      Function:
 2671  *              Make the specified pages (by pmap, offset)
 2672  *              pageable (or not) as requested.
 2673  *
 2674  *              A page which is not pageable may not take
 2675  *              a fault; therefore, its page table entry
 2676  *              must remain valid for the duration.
 2677  *
 2678  *              This routine is merely advisory; pmap_enter
 2679  *              will specify that these pages are to be wired
 2680  *              down (or not) as appropriate.
 2681  */
 2682 void
 2683 pmap_pageable(pmap, sva, eva, pageable)
 2684         pmap_t pmap;
 2685         vm_offset_t sva, eva;
 2686         boolean_t pageable;
 2687 {
 2688 }
 2689 
 2690 /*
 2691  * Returns true if the pmap's pv is one of the first
 2692  * 16 pvs linked to from this page.  This count may
 2693  * be changed upwards or downwards in the future; it
 2694  * is only necessary that true be returned for a small
 2695  * subset of pmaps for proper page aging.
 2696  */
 2697 boolean_t
 2698 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 2699 {
 2700         pv_entry_t pv;
 2701         int loops = 0;
 2702         int s;
 2703 
 2704         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2705                 return FALSE;
 2706 
 2707         s = splvm();
 2708 
 2709         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2710                 if (pv->pv_pmap == pmap) {
 2711                         splx(s);
 2712                         return TRUE;
 2713                 }
 2714                 loops++;
 2715                 if (loops >= 16)
 2716                         break;
 2717         }
 2718         splx(s);
 2719         return (FALSE);
 2720 }
 2721 
 2722 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 2723 /*
 2724  * Remove all pages from specified address space
 2725  * this aids process exit speeds.  Also, this code
 2726  * is special cased for current process only, but
 2727  * can have the more generic (and slightly slower)
 2728  * mode enabled.  This is much faster than pmap_remove
 2729  * in the case of running down an entire address space.
 2730  */
 2731 void
 2732 pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2733 {
 2734         pt_entry_t *pte, tpte;
 2735         pv_entry_t pv, npv;
 2736         int s;
 2737         vm_page_t m;
 2738 
 2739 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2740         if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 2741                 printf("warning: pmap_remove_pages called with non-current pmap\n");
 2742                 return;
 2743         }
 2744 #endif
 2745 
 2746         s = splvm();
 2747         for(pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2748 
 2749                 if (pv->pv_va >= eva || pv->pv_va < sva) {
 2750                         npv = TAILQ_NEXT(pv, pv_plist);
 2751                         continue;
 2752                 }
 2753 
 2754 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 2755                 pte = vtopte(pv->pv_va);
 2756 #else
 2757                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2758 #endif
 2759                 tpte = *pte;
 2760 
 2761 /*
 2762  * We cannot remove wired pages from a process' mapping at this time
 2763  */
 2764                 if (tpte & PG_W) {
 2765                         npv = TAILQ_NEXT(pv, pv_plist);
 2766                         continue;
 2767                 }
 2768                 *pte = 0;
 2769 
 2770                 m = PHYS_TO_VM_PAGE(tpte);
 2771 
 2772                 KASSERT(m < &vm_page_array[vm_page_array_size],
 2773                         ("pmap_remove_pages: bad tpte %x", tpte));
 2774 
 2775                 pv->pv_pmap->pm_stats.resident_count--;
 2776 
 2777                 /*
 2778                  * Update the vm_page_t clean and reference bits.
 2779                  */
 2780                 if (tpte & PG_M) {
 2781                         vm_page_dirty(m);
 2782                 }
 2783 
 2784 
 2785                 npv = TAILQ_NEXT(pv, pv_plist);
 2786                 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 2787 
 2788                 m->md.pv_list_count--;
 2789                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2790                 if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 2791                         vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 2792                 }
 2793 
 2794                 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 2795                 free_pv_entry(pv);
 2796         }
 2797         splx(s);
 2798         pmap_TLB_invalidate_all(pmap);
 2799 }
 2800 
 2801 /*
 2802  * pmap_testbit tests bits in pte's
 2803  * note that the testbit/changebit routines are inline,
 2804  * and a lot of things compile-time evaluate.
 2805  */
 2806 static boolean_t
 2807 pmap_testbit(vm_page_t m, int bit)
 2808 {
 2809         pv_entry_t pv;
 2810         pt_entry_t *pte;
 2811         int s;
 2812 
 2813         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2814                 return FALSE;
 2815 
 2816         if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 2817                 return FALSE;
 2818 
 2819         s = splvm();
 2820 
 2821         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2822                 /*
 2823                  * if the bit being tested is the modified bit, then
 2824                  * mark clean_map and ptes as never
 2825                  * modified.
 2826                  */
 2827                 if (bit & (PG_A|PG_M)) {
 2828                         if (!pmap_track_modified(pv->pv_va))
 2829                                 continue;
 2830                 }
 2831 
 2832 #if defined(PMAP_DIAGNOSTIC)
 2833                 if (!pv->pv_pmap) {
 2834                         printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 2835                         continue;
 2836                 }
 2837 #endif
 2838                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2839                 if (*pte & bit) {
 2840                         splx(s);
 2841                         return TRUE;
 2842                 }
 2843         }
 2844         splx(s);
 2845         return (FALSE);
 2846 }
 2847 
 2848 /*
 2849  * this routine is used to modify bits in ptes
 2850  */
 2851 static __inline void
 2852 pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 2853 {
 2854         pv_entry_t pv;
 2855         pt_entry_t *pte;
 2856         int s;
 2857 
 2858         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2859                 return;
 2860 
 2861         s = splvm();
 2862 
 2863         /*
 2864          * Loop over all current mappings setting/clearing as appropos If
 2865          * setting RO do we need to clear the VAC?
 2866          */
 2867         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 2868                 /*
 2869                  * don't write protect pager mappings
 2870                  */
 2871                 if (!setem && (bit == PG_RW)) {
 2872                         if (!pmap_track_modified(pv->pv_va))
 2873                                 continue;
 2874                 }
 2875 
 2876 #if defined(PMAP_DIAGNOSTIC)
 2877                 if (!pv->pv_pmap) {
 2878                         printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 2879                         continue;
 2880                 }
 2881 #endif
 2882 
 2883                 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2884 
 2885                 if (setem) {
 2886                         *pte |= bit;
 2887                         pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 2888                 } else {
 2889                         pt_entry_t pbits = *pte;
 2890                         if (pbits & bit) {
 2891                                 if (bit == PG_RW) {
 2892                                         if (pbits & PG_M) {
 2893                                                 vm_page_dirty(m);
 2894                                         }
 2895                                         *pte = pbits & ~(PG_M|PG_RW);
 2896                                 } else {
 2897                                         *pte = pbits & ~bit;
 2898                                 }
 2899                                 pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 2900                         }
 2901                 }
 2902         }
 2903         splx(s);
 2904 }
 2905 
 2906 /*
 2907  *      pmap_page_protect:
 2908  *
 2909  *      Lower the permission for all mappings to a given page.
 2910  */
 2911 void
 2912 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 2913 {
 2914         if ((prot & VM_PROT_WRITE) == 0) {
 2915                 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 2916                         pmap_changebit(m, PG_RW, FALSE);
 2917                 } else {
 2918                         pmap_remove_all(m);
 2919                 }
 2920         }
 2921 }
 2922 
 2923 vm_paddr_t
 2924 pmap_phys_address(ppn)
 2925         int ppn;
 2926 {
 2927         return (i386_ptob((vm_paddr_t)ppn));
 2928 }
 2929 
 2930 /*
 2931  *      pmap_ts_referenced:
 2932  *
 2933  *      Return a count of reference bits for a page, clearing those bits.
 2934  *      It is not necessary for every reference bit to be cleared, but it
 2935  *      is necessary that 0 only be returned when there are truly no
 2936  *      reference bits set.
 2937  *
 2938  *      XXX: The exact number of bits to check and clear is a matter that
 2939  *      should be tested and standardized at some point in the future for
 2940  *      optimal aging of shared pages.
 2941  */
 2942 int
 2943 pmap_ts_referenced(vm_page_t m)
 2944 {
 2945         pv_entry_t pv, pvf, pvn;
 2946         pt_entry_t *pte;
 2947         int s;
 2948         int rtval = 0;
 2949 
 2950         if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 2951                 return (rtval);
 2952 
 2953         s = splvm();
 2954 
 2955         if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2956 
 2957                 pvf = pv;
 2958 
 2959                 do {
 2960                         pvn = TAILQ_NEXT(pv, pv_list);
 2961 
 2962                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2963 
 2964                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 2965 
 2966                         if (!pmap_track_modified(pv->pv_va))
 2967                                 continue;
 2968 
 2969                         pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 2970 
 2971                         if (pte && (*pte & PG_A)) {
 2972                                 *pte &= ~PG_A;
 2973 
 2974                                 pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 2975 
 2976                                 rtval++;
 2977                                 if (rtval > 4) {
 2978                                         break;
 2979                                 }
 2980                         }
 2981                 } while ((pv = pvn) != NULL && pv != pvf);
 2982         }
 2983         splx(s);
 2984 
 2985         return (rtval);
 2986 }
 2987 
 2988 /*
 2989  *      pmap_is_modified:
 2990  *
 2991  *      Return whether or not the specified physical page was modified
 2992  *      in any physical maps.
 2993  */
 2994 boolean_t
 2995 pmap_is_modified(vm_page_t m)
 2996 {
 2997         return pmap_testbit(m, PG_M);
 2998 }
 2999 
 3000 /*
 3001  *      Clear the modify bits on the specified physical page.
 3002  */
 3003 void
 3004 pmap_clear_modify(vm_page_t m)
 3005 {
 3006         pmap_changebit(m, PG_M, FALSE);
 3007 }
 3008 
 3009 /*
 3010  *      pmap_clear_reference:
 3011  *
 3012  *      Clear the reference bit on the specified physical page.
 3013  */
 3014 void
 3015 pmap_clear_reference(vm_page_t m)
 3016 {
 3017         pmap_changebit(m, PG_A, FALSE);
 3018 }
 3019 
 3020 /*
 3021  * Miscellaneous support routines follow
 3022  */
 3023 
 3024 static void
 3025 i386_protection_init()
 3026 {
 3027         register int *kp, prot;
 3028 
 3029         kp = protection_codes;
 3030         for (prot = 0; prot < 8; prot++) {
 3031                 switch (prot) {
 3032                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 3033                         /*
 3034                          * Read access is also 0. There isn't any execute bit,
 3035                          * so just make it readable.
 3036                          */
 3037                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 3038                 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 3039                 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 3040                         *kp++ = 0;
 3041                         break;
 3042                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 3043                 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3044                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 3045                 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 3046                         *kp++ = PG_RW;
 3047                         break;
 3048                 }
 3049         }
 3050 }
 3051 
 3052 /*
 3053  * Map a set of physical memory pages into the kernel virtual
 3054  * address space. Return a pointer to where it is mapped. This
 3055  * routine is intended to be used for mapping device memory,
 3056  * NOT real memory.
 3057  */
 3058 void *
 3059 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 3060 {
 3061         vm_offset_t va, tmpva, offset;
 3062         pt_entry_t *pte;
 3063 
 3064         offset = pa & PAGE_MASK;
 3065         size = roundup(offset + size, PAGE_SIZE);
 3066 
 3067         va = kmem_alloc_pageable(kernel_map, size);
 3068         if (!va)
 3069                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 3070 
 3071         pa = pa & PG_FRAME;
 3072         for (tmpva = va; size > 0;) {
 3073                 pte = vtopte(tmpva);
 3074                 *pte = pa | PG_RW | PG_V | pgeflag;
 3075                 size -= PAGE_SIZE;
 3076                 tmpva += PAGE_SIZE;
 3077                 pa += PAGE_SIZE;
 3078         }
 3079         invltlb();
 3080 
 3081         return ((void *)(va + offset));
 3082 }
 3083 
 3084 void
 3085 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 3086 {
 3087         vm_offset_t base, offset;
 3088 
 3089         base = va & PG_FRAME;
 3090         offset = va & PAGE_MASK;
 3091         size = roundup(offset + size, PAGE_SIZE);
 3092         kmem_free(kernel_map, base, size);
 3093 }
 3094 
 3095 /*
 3096  * perform the pmap work for mincore
 3097  */
 3098 int
 3099 pmap_mincore(pmap_t pmap, vm_offset_t addr)
 3100 {
 3101         pt_entry_t *ptep, pte;
 3102         vm_page_t m;
 3103         int val = 0;
 3104         
 3105         ptep = pmap_pte(pmap, addr);
 3106         if (ptep == 0) {
 3107                 return 0;
 3108         }
 3109 
 3110         if ((pte = *ptep) != 0) {
 3111                 vm_paddr_t pa;
 3112 
 3113                 val = MINCORE_INCORE;
 3114                 if ((pte & PG_MANAGED) == 0)
 3115                         return val;
 3116 
 3117                 pa = pte & PG_FRAME;
 3118 
 3119                 m = PHYS_TO_VM_PAGE(pa);
 3120 
 3121                 /*
 3122                  * Modified by us
 3123                  */
 3124                 if (pte & PG_M)
 3125                         val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 3126                 /*
 3127                  * Modified by someone
 3128                  */
 3129                 else if (m->dirty || pmap_is_modified(m))
 3130                         val |= MINCORE_MODIFIED_OTHER;
 3131                 /*
 3132                  * Referenced by us
 3133                  */
 3134                 if (pte & PG_A)
 3135                         val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 3136 
 3137                 /*
 3138                  * Referenced by someone
 3139                  */
 3140                 else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 3141                         val |= MINCORE_REFERENCED_OTHER;
 3142                         vm_page_flag_set(m, PG_REFERENCED);
 3143                 }
 3144         } 
 3145         return val;
 3146 }
 3147 
 3148 void
 3149 pmap_activate(struct proc *p)
 3150 {
 3151         pmap_t  pmap;
 3152 
 3153         pmap = vmspace_pmap(p->p_vmspace);
 3154 #if defined(SMP)
 3155         pmap->pm_active |= 1 << cpuid;
 3156 #else
 3157         pmap->pm_active |= 1;
 3158 #endif
 3159 #if defined(SWTCH_OPTIM_STATS)
 3160         tlb_flush_count++;
 3161 #endif
 3162 #ifdef PAE
 3163         load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdpt));
 3164 #else
 3165         load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
 3166 #endif
 3167 }
 3168 
 3169 vm_offset_t
 3170 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 3171 {
 3172 
 3173         if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 3174                 return addr;
 3175         }
 3176 
 3177         addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 3178         return addr;
 3179 }
 3180 
 3181 #ifdef PAE
 3182 /*
 3183  * Allocate PDPT in lower 32-bit pages
 3184  */
 3185 struct pdpt_page {
 3186         SLIST_ENTRY(pdpt_page)  link;
 3187         u_int32_t               avail;
 3188         vm_paddr_t              phys;
 3189         u_int32_t               bits[4];
 3190 };
 3191 
 3192 SLIST_HEAD(,pdpt_page) pdpt_pages = SLIST_HEAD_INITIALIZER(&pdpt_pages);
 3193 int pdpt_avail = 0;
 3194 
 3195 static pdpt_entry_t *
 3196 pmap_alloc_pdpt()
 3197 {
 3198         struct pdpt_page *pp;
 3199         pdpt_entry_t *pdpt = 0;
 3200         int i;
 3201 
 3202         if (pdpt_avail == 0) {
 3203                 pp = (struct pdpt_page *)contigmalloc(PAGE_SIZE, M_DEVBUF,
 3204                     M_WAITOK, 0ull, 0xffffffffull, PAGE_SIZE, 0);
 3205                 if (!pp)
 3206                         panic("pmap_alloc_pdpt: alloc failed");
 3207                 pp->phys = vtophys(pp);
 3208                 pp->avail = PAGE_SIZE / 32 - 1;
 3209                 pp->bits[0] = 1;
 3210                 pp->bits[1] = pp->bits[2] = pp->bits[3] = 0;
 3211                 SLIST_INSERT_HEAD(&pdpt_pages, pp, link);
 3212                 pdpt_avail += pp->avail;
 3213         } else {
 3214                 SLIST_FOREACH(pp, &pdpt_pages, link) {
 3215                         if (pp->avail > 0)
 3216                                 break;
 3217                 }
 3218         }
 3219 
 3220         for (i = 0; i < 4; i++) {
 3221                 int j = ffs(~pp->bits[i]);
 3222                 if (j == 0)
 3223                         continue;
 3224                 pp->bits[i] |= 1 << (j - 1);
 3225                 pp->avail--;
 3226                 pdpt_avail--;
 3227                 pdpt = (pdpt_entry_t *)pp + (32 * i + j - 1) * NPGPTD;
 3228         }
 3229 
 3230         return pdpt;
 3231 }
 3232 
 3233 #if 0
 3234 static void
 3235 pmap_free_pdpt(pdpt_entry_t *pdpt)
 3236 {
 3237         struct pdpt_page *pp;
 3238         int i;
 3239 
 3240         pp = (struct pdpt_page *)((vm_offset_t)pdpt & ~PAGE_MASK);
 3241         i = (pdpt - (pdpt_entry_t *)pp) / NPGPTD;
 3242         pp->bits[i / 32] &= ~(1 << (i % 32));
 3243         pp->avail++;
 3244         pdpt_avail++;
 3245 }
 3246 #endif
 3247 #endif
 3248 
 3249 #if defined(PMAP_DEBUG)
 3250 pmap_pid_dump(int pid)
 3251 {
 3252         pmap_t pmap;
 3253         struct proc *p;
 3254         int npte = 0;
 3255         int index;
 3256         int s;
 3257         LIST_FOREACH(p, &allproc, p_list) {
 3258                 if (p->p_pid != pid)
 3259                         continue;
 3260 
 3261                 if (p->p_vmspace) {
 3262                         int i,j;
 3263                         index = 0;
 3264                         pmap = vmspace_pmap(p->p_vmspace);
 3265                         for(i=0;i<1024;i++) {
 3266                                 pd_entry_t *pde;
 3267                                 unsigned *pte;
 3268                                 unsigned base = i << PDRSHIFT;
 3269                                 
 3270                                 pde = &pmap->pm_pdir[i];
 3271                                 if (pde && pmap_pde_v(pde)) {
 3272                                         s = splvm();
 3273                                         for(j=0;j<1024;j++) {
 3274                                                 unsigned va = base + (j << PAGE_SHIFT);
 3275                                                 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 3276                                                         if (index) {
 3277                                                                 index = 0;
 3278                                                                 printf("\n");
 3279                                                         }
 3280                                                         splx(s);
 3281                                                         return npte;
 3282                                                 }
 3283                                                 pte = pmap_pte_quick(pmap, va);
 3284                                                 if (pte && pmap_pte_v(pte)) {
 3285                                                         vm_offset_t pa;
 3286                                                         vm_page_t m;
 3287                                                         pa = *(int *)pte;
 3288                                                         m = PHYS_TO_VM_PAGE(pa);
 3289                                                         printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 3290                                                                 va, pa, m->hold_count, m->wire_count, m->flags);
 3291                                                         npte++;
 3292                                                         index++;
 3293                                                         if (index >= 2) {
 3294                                                                 index = 0;
 3295                                                                 printf("\n");
 3296                                                         } else {
 3297                                                                 printf(" ");
 3298                                                         }
 3299                                                 }
 3300                                         }
 3301                                         splx(s);
 3302                                 }
 3303                         }
 3304                 }
 3305         }
 3306         return npte;
 3307 }
 3308 #endif
 3309 
 3310 #if defined(DEBUG)
 3311 
 3312 static void     pads __P((pmap_t pm));
 3313 void            pmap_pvdump __P((vm_offset_t pa));
 3314 
 3315 /* print address space of pmap*/
 3316 static void
 3317 pads(pm)
 3318         pmap_t pm;
 3319 {
 3320         unsigned va, i, j;
 3321         unsigned *ptep;
 3322         int s;
 3323 
 3324         if (pm == kernel_pmap)
 3325                 return;
 3326         s = splvm();
 3327         for (i = 0; i < 1024; i++)
 3328                 if (pm->pm_pdir[i])
 3329                         for (j = 0; j < 1024; j++) {
 3330                                 va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 3331                                 if (pm == kernel_pmap && va < KERNBASE)
 3332                                         continue;
 3333                                 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 3334                                         continue;
 3335                                 ptep = pmap_pte_quick(pm, va);
 3336                                 if (pmap_pte_v(ptep))
 3337                                         printf("%x:%x ", va, *(int *) ptep);
 3338                         };
 3339         splx(s);
 3340 
 3341 }
 3342 
 3343 void
 3344 pmap_pvdump(pa)
 3345         vm_paddr_t pa;
 3346 {
 3347         register pv_entry_t pv;
 3348         vm_page_t m;
 3349 
 3350         printf("pa %x", pa);
 3351         m = PHYS_TO_VM_PAGE(pa);
 3352         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 3353 #ifdef used_to_be
 3354                 printf(" -> pmap %p, va %x, flags %x",
 3355                     (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 3356 #endif
 3357                 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 3358                 pads(pv->pv_pmap);
 3359         }
 3360         printf(" ");
 3361 }
 3362 #endif
 3363 
 3364 #if defined(I686_CPU) && !defined(NO_PSE_HACK)
 3365 static void note_pse_hack(void *unused);
 3366 SYSINIT(note_pse_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, note_pse_hack, NULL);
 3367 
 3368 static void
 3369 note_pse_hack(void *unused) {
 3370         if (!has_pse_bug)
 3371                 return;
 3372         printf("Warning: Pentium 4 CPU: PSE disabled\n");
 3373 }
 3374 #endif
 3375 

Cache object: 978ea82db40d00c724a92c4739694f33


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.