The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/osfmk/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
    3  *
    4  * @APPLE_LICENSE_HEADER_START@
    5  * 
    6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
    7  * 
    8  * This file contains Original Code and/or Modifications of Original Code
    9  * as defined in and that are subject to the Apple Public Source License
   10  * Version 2.0 (the 'License'). You may not use this file except in
   11  * compliance with the License. Please obtain a copy of the License at
   12  * http://www.opensource.apple.com/apsl/ and read it before using this
   13  * file.
   14  * 
   15  * The Original Code and all software distributed under the License are
   16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   20  * Please see the License for the specific language governing rights and
   21  * limitations under the License.
   22  * 
   23  * @APPLE_LICENSE_HEADER_END@
   24  */
   25 /*
   26  * @OSF_COPYRIGHT@
   27  */
   28 /*
   29  * Mach Operating System
   30  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
   31  * All Rights Reserved.
   32  * 
   33  * Permission to use, copy, modify and distribute this software and its
   34  * documentation is hereby granted, provided that both the copyright
   35  * notice and this permission notice appear in all copies of the
   36  * software, derivative works or modified versions, and any portions
   37  * thereof, and that both notices appear in supporting documentation.
   38  * 
   39  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   40  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   41  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   42  * 
   43  * Carnegie Mellon requests users of this software to return to
   44  * 
   45  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   46  *  School of Computer Science
   47  *  Carnegie Mellon University
   48  *  Pittsburgh PA 15213-3890
   49  * 
   50  * any improvements or extensions that they make and grant Carnegie Mellon
   51  * the rights to redistribute these changes.
   52  */
   53 /*
   54  */
   55 
   56 /*
   57  *      File:   pmap.c
   58  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
   59  *      (These guys wrote the Vax version)
   60  *
   61  *      Physical Map management code for Intel i386, i486, and i860.
   62  *
   63  *      Manages physical address maps.
   64  *
   65  *      In addition to hardware address maps, this
   66  *      module is called upon to provide software-use-only
   67  *      maps which may or may not be stored in the same
   68  *      form as hardware maps.  These pseudo-maps are
   69  *      used to store intermediate results from copy
   70  *      operations to and from address spaces.
   71  *
   72  *      Since the information managed by this module is
   73  *      also stored by the logical address mapping module,
   74  *      this module may throw away valid virtual-to-physical
   75  *      mappings at almost any time.  However, invalidations
   76  *      of virtual-to-physical mappings must be done as
   77  *      requested.
   78  *
   79  *      In order to cope with hardware architectures which
   80  *      make virtual-to-physical map invalidates expensive,
   81  *      this module may delay invalidate or reduced protection
   82  *      operations until such time as they are actually
   83  *      necessary.  This module is given full information as
   84  *      to which processors are currently using which maps,
   85  *      and to when physical maps must be made correct.
   86  */
   87 
   88 #include <cpus.h>
   89 
   90 #include <string.h>
   91 #include <norma_vm.h>
   92 #include <mach_kdb.h>
   93 #include <mach_ldebug.h>
   94 
   95 #include <mach/machine/vm_types.h>
   96 
   97 #include <mach/boolean.h>
   98 #include <kern/thread.h>
   99 #include <kern/zalloc.h>
  100 
  101 #include <kern/lock.h>
  102 #include <kern/spl.h>
  103 
  104 #include <vm/pmap.h>
  105 #include <vm/vm_map.h>
  106 #include <vm/vm_kern.h>
  107 #include <mach/vm_param.h>
  108 #include <mach/vm_prot.h>
  109 #include <vm/vm_object.h>
  110 #include <vm/vm_page.h>
  111 
  112 #include <mach/machine/vm_param.h>
  113 #include <machine/thread.h>
  114 
  115 #include <kern/misc_protos.h>                   /* prototyping */
  116 #include <i386/misc_protos.h>
  117 
  118 #include <i386/cpuid.h>
  119 #include <i386/cpu_number.h>
  120 #include <i386/machine_cpu.h>
  121 
  122 #if     MACH_KDB
  123 #include <ddb/db_command.h>
  124 #include <ddb/db_output.h>
  125 #include <ddb/db_sym.h>
  126 #include <ddb/db_print.h>
  127 #endif  /* MACH_KDB */
  128 
  129 #include <kern/xpr.h>
  130 
  131 #if NCPUS > 1
  132 #include <i386/mp_events.h>
  133 #endif
  134 
  135 /*
  136  * Forward declarations for internal functions.
  137  */
  138 void    pmap_expand(
  139                         pmap_t          map,
  140                         vm_offset_t     v);
  141 
  142 extern void     pmap_remove_range(
  143                         pmap_t          pmap,
  144                         vm_offset_t     va,
  145                         pt_entry_t      *spte,
  146                         pt_entry_t      *epte);
  147 
  148 void    phys_attribute_clear(
  149                         vm_offset_t     phys,
  150                         int             bits);
  151 
  152 boolean_t phys_attribute_test(
  153                         vm_offset_t     phys,
  154                         int             bits);
  155 
  156 void pmap_set_modify(ppnum_t            pn);
  157 
  158 void phys_attribute_set(
  159                         vm_offset_t     phys,
  160                         int             bits);
  161 
  162 
  163 #ifndef set_dirbase
  164 void    set_dirbase(vm_offset_t dirbase);
  165 #endif  /* set_dirbase */
  166 
  167 #define PA_TO_PTE(pa)   (pa_to_pte((pa) - VM_MIN_KERNEL_ADDRESS))
  168 #define iswired(pte)    ((pte) & INTEL_PTE_WIRED)
  169 
  170 pmap_t  real_pmap[NCPUS];
  171 
  172 #define WRITE_PTE(pte_p, pte_entry)             *(pte_p) = (pte_entry);
  173 #define WRITE_PTE_FAST(pte_p, pte_entry)        *(pte_p) = (pte_entry);
  174 
  175 #define value_64bit(value)  ((value) & 0xFFFFFFFF00000000LL)
  176 #define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL))
  177 
  178 /*
  179  *      Private data structures.
  180  */
  181 
  182 /*
  183  *      For each vm_page_t, there is a list of all currently
  184  *      valid virtual mappings of that page.  An entry is
  185  *      a pv_entry_t; the list is the pv_table.
  186  */
  187 
  188 typedef struct pv_entry {
  189         struct pv_entry *next;          /* next pv_entry */
  190         pmap_t          pmap;           /* pmap where mapping lies */
  191         vm_offset_t     va;             /* virtual address for mapping */
  192 } *pv_entry_t;
  193 
  194 #define PV_ENTRY_NULL   ((pv_entry_t) 0)
  195 
  196 pv_entry_t      pv_head_table;          /* array of entries, one per page */
  197 
  198 /*
  199  *      pv_list entries are kept on a list that can only be accessed
  200  *      with the pmap system locked (at SPLVM, not in the cpus_active set).
  201  *      The list is refilled from the pv_list_zone if it becomes empty.
  202  */
  203 pv_entry_t      pv_free_list;           /* free list at SPLVM */
  204 decl_simple_lock_data(,pv_free_list_lock)
  205 
  206 #define PV_ALLOC(pv_e) { \
  207         simple_lock(&pv_free_list_lock); \
  208         if ((pv_e = pv_free_list) != 0) { \
  209             pv_free_list = pv_e->next; \
  210         } \
  211         simple_unlock(&pv_free_list_lock); \
  212 }
  213 
  214 #define PV_FREE(pv_e) { \
  215         simple_lock(&pv_free_list_lock); \
  216         pv_e->next = pv_free_list; \
  217         pv_free_list = pv_e; \
  218         simple_unlock(&pv_free_list_lock); \
  219 }
  220 
  221 zone_t          pv_list_zone;           /* zone of pv_entry structures */
  222 
  223 /*
  224  *      Each entry in the pv_head_table is locked by a bit in the
  225  *      pv_lock_table.  The lock bits are accessed by the physical
  226  *      address of the page they lock.
  227  */
  228 
  229 char    *pv_lock_table;         /* pointer to array of bits */
  230 #define pv_lock_table_size(n)   (((n)+BYTE_SIZE-1)/BYTE_SIZE)
  231 
  232 /*
  233  *      First and last physical addresses that we maintain any information
  234  *      for.  Initialized to zero so that pmap operations done before
  235  *      pmap_init won't touch any non-existent structures.
  236  */
  237 vm_offset_t     vm_first_phys = (vm_offset_t) 0;
  238 vm_offset_t     vm_last_phys  = (vm_offset_t) 0;
  239 boolean_t       pmap_initialized = FALSE;/* Has pmap_init completed? */
  240 
  241 /*
  242  *      Index into pv_head table, its lock bits, and the modify/reference
  243  *      bits starting at vm_first_phys.
  244  */
  245 
  246 #define pa_index(pa)    (atop(pa - vm_first_phys))
  247 
  248 #define pai_to_pvh(pai)         (&pv_head_table[pai])
  249 #define lock_pvh_pai(pai)       bit_lock(pai, (void *)pv_lock_table)
  250 #define unlock_pvh_pai(pai)     bit_unlock(pai, (void *)pv_lock_table)
  251 
  252 /*
  253  *      Array of physical page attribites for managed pages.
  254  *      One byte per physical page.
  255  */
  256 char    *pmap_phys_attributes;
  257 
  258 /*
  259  *      Physical page attributes.  Copy bits from PTE definition.
  260  */
  261 #define PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
  262 #define PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
  263 #define PHYS_NCACHE     INTEL_PTE_NCACHE
  264 
  265 /*
  266  *      Amount of virtual memory mapped by one
  267  *      page-directory entry.
  268  */
  269 #define PDE_MAPPED_SIZE         (pdetova(1))
  270 
  271 /*
  272  *      We allocate page table pages directly from the VM system
  273  *      through this object.  It maps physical memory.
  274  */
  275 vm_object_t     pmap_object = VM_OBJECT_NULL;
  276 
  277 /*
  278  *      Locking and TLB invalidation
  279  */
  280 
  281 /*
  282  *      Locking Protocols:
  283  *
  284  *      There are two structures in the pmap module that need locking:
  285  *      the pmaps themselves, and the per-page pv_lists (which are locked
  286  *      by locking the pv_lock_table entry that corresponds to the pv_head
  287  *      for the list in question.)  Most routines want to lock a pmap and
  288  *      then do operations in it that require pv_list locking -- however
  289  *      pmap_remove_all and pmap_copy_on_write operate on a physical page
  290  *      basis and want to do the locking in the reverse order, i.e. lock
  291  *      a pv_list and then go through all the pmaps referenced by that list.
  292  *      To protect against deadlock between these two cases, the pmap_lock
  293  *      is used.  There are three different locking protocols as a result:
  294  *
  295  *  1.  pmap operations only (pmap_extract, pmap_access, ...)  Lock only
  296  *              the pmap.
  297  *
  298  *  2.  pmap-based operations (pmap_enter, pmap_remove, ...)  Get a read
  299  *              lock on the pmap_lock (shared read), then lock the pmap
  300  *              and finally the pv_lists as needed [i.e. pmap lock before
  301  *              pv_list lock.]
  302  *
  303  *  3.  pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
  304  *              Get a write lock on the pmap_lock (exclusive write); this
  305  *              also guaranteees exclusive access to the pv_lists.  Lock the
  306  *              pmaps as needed.
  307  *
  308  *      At no time may any routine hold more than one pmap lock or more than
  309  *      one pv_list lock.  Because interrupt level routines can allocate
  310  *      mbufs and cause pmap_enter's, the pmap_lock and the lock on the
  311  *      kernel_pmap can only be held at splhigh.
  312  */
  313 
  314 #if     NCPUS > 1
  315 /*
  316  *      We raise the interrupt level to splvm, to block interprocessor
  317  *      interrupts during pmap operations.  We must take the CPU out of
  318  *      the cpus_active set while interrupts are blocked.
  319  */
  320 #define SPLVM(spl)      { \
  321         spl = splhigh(); \
  322         mp_disable_preemption(); \
  323         i_bit_clear(cpu_number(), &cpus_active); \
  324         mp_enable_preemption(); \
  325 }
  326 
  327 #define SPLX(spl)       { \
  328         mp_disable_preemption(); \
  329         i_bit_set(cpu_number(), &cpus_active); \
  330         mp_enable_preemption(); \
  331         splx(spl); \
  332 }
  333 
  334 /*
  335  *      Lock on pmap system
  336  */
  337 lock_t  pmap_system_lock;
  338 
  339 #define PMAP_READ_LOCK(pmap, spl) {     \
  340         SPLVM(spl);                     \
  341         lock_read(&pmap_system_lock);   \
  342         simple_lock(&(pmap)->lock);     \
  343 }
  344 
  345 #define PMAP_WRITE_LOCK(spl) {          \
  346         SPLVM(spl);                     \
  347         lock_write(&pmap_system_lock);  \
  348 }
  349 
  350 #define PMAP_READ_UNLOCK(pmap, spl) {           \
  351         simple_unlock(&(pmap)->lock);           \
  352         lock_read_done(&pmap_system_lock);      \
  353         SPLX(spl);                              \
  354 }
  355 
  356 #define PMAP_WRITE_UNLOCK(spl) {                \
  357         lock_write_done(&pmap_system_lock);     \
  358         SPLX(spl);                              \
  359 }
  360 
  361 #define PMAP_WRITE_TO_READ_LOCK(pmap) {         \
  362         simple_lock(&(pmap)->lock);             \
  363         lock_write_to_read(&pmap_system_lock);  \
  364 }
  365 
  366 #define LOCK_PVH(index)         lock_pvh_pai(index)
  367 
  368 #define UNLOCK_PVH(index)       unlock_pvh_pai(index)
  369 
  370 #if     USLOCK_DEBUG
  371 extern int      max_lock_loops;
  372 #define LOOP_VAR        int     loop_count = 0
  373 #define LOOP_CHECK(msg, pmap)                                           \
  374         if (loop_count++ > max_lock_loops) {                            \
  375                 mp_disable_preemption();                                \
  376                 kprintf("%s: cpu %d pmap %x, cpus_active %d\n",         \
  377                           msg, cpu_number(), pmap, cpus_active);        \
  378                 Debugger("deadlock detection");                         \
  379                 mp_enable_preemption();                                 \
  380                 loop_count = 0;                                         \
  381         }
  382 #else   /* USLOCK_DEBUG */
  383 #define LOOP_VAR
  384 #define LOOP_CHECK(msg, pmap)
  385 #endif  /* USLOCK_DEBUG */
  386 
  387 #define PMAP_UPDATE_TLBS(pmap, s, e)                                    \
  388 {                                                                       \
  389         cpu_set cpu_mask;                                               \
  390         cpu_set users;                                                  \
  391                                                                         \
  392         mp_disable_preemption();                                        \
  393         cpu_mask = 1 << cpu_number();                                   \
  394                                                                         \
  395         /* Since the pmap is locked, other updates are locked */        \
  396         /* out, and any pmap_activate has finished. */                  \
  397                                                                         \
  398         /* find other cpus using the pmap */                            \
  399         users = (pmap)->cpus_using & ~cpu_mask;                         \
  400         if (users) {                                                    \
  401             LOOP_VAR;                                                   \
  402             /* signal them, and wait for them to finish */              \
  403             /* using the pmap */                                        \
  404             signal_cpus(users, (pmap), (s), (e));                       \
  405             while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) {    \
  406                 LOOP_CHECK("PMAP_UPDATE_TLBS", pmap);                   \
  407                 cpu_pause();                                            \
  408             }                                                           \
  409         }                                                               \
  410         /* invalidate our own TLB if pmap is in use */                  \
  411                                                                         \
  412         if ((pmap)->cpus_using & cpu_mask) {                            \
  413             INVALIDATE_TLB((pmap), (s), (e));                           \
  414         }                                                               \
  415                                                                         \
  416         mp_enable_preemption();                                         \
  417 }
  418 
  419 #else   /* NCPUS > 1 */
  420 
  421 #if     MACH_RT
  422 #define SPLVM(spl)                      { (spl) = splhigh(); }
  423 #define SPLX(spl)                       splx (spl)
  424 #else   /* MACH_RT */
  425 #define SPLVM(spl)
  426 #define SPLX(spl)
  427 #endif  /* MACH_RT */
  428 
  429 #define PMAP_READ_LOCK(pmap, spl)       SPLVM(spl)
  430 #define PMAP_WRITE_LOCK(spl)            SPLVM(spl)
  431 #define PMAP_READ_UNLOCK(pmap, spl)     SPLX(spl)
  432 #define PMAP_WRITE_UNLOCK(spl)          SPLX(spl)
  433 #define PMAP_WRITE_TO_READ_LOCK(pmap)
  434 
  435 #if     MACH_RT
  436 #define LOCK_PVH(index)                 disable_preemption()
  437 #define UNLOCK_PVH(index)               enable_preemption()
  438 #else   /* MACH_RT */
  439 #define LOCK_PVH(index)
  440 #define UNLOCK_PVH(index)
  441 #endif  /* MACH_RT */
  442 
  443 #define PMAP_FLUSH_TLBS()       flush_tlb()
  444 #define PMAP_RELOAD_TLBS()      set_cr3(kernel_pmap->pdirbase)
  445 #define PMAP_INVALIDATE_PAGE(map, saddr, eaddr) {       \
  446                 if (map == kernel_pmap)                 \
  447                         invlpg((vm_offset_t) saddr);    \
  448                 else                                    \
  449                         flush_tlb();                    \
  450 }
  451 
  452 #endif  /* NCPUS > 1 */
  453 
  454 #define MAX_TBIS_SIZE   32              /* > this -> TBIA */ /* XXX */
  455 
  456 #define INVALIDATE_TLB(m, s, e) {       \
  457         flush_tlb();                    \
  458 }
  459 
  460 #if     NCPUS > 1
  461 /*
  462  *      Structures to keep track of pending TLB invalidations
  463  */
  464 cpu_set                 cpus_active;
  465 cpu_set                 cpus_idle;
  466 volatile boolean_t      cpu_update_needed[NCPUS];
  467 
  468 #define UPDATE_LIST_SIZE        4
  469 
  470 struct pmap_update_item {
  471         pmap_t          pmap;           /* pmap to invalidate */
  472         vm_offset_t     start;          /* start address to invalidate */
  473         vm_offset_t     end;            /* end address to invalidate */
  474 };
  475 
  476 typedef struct pmap_update_item *pmap_update_item_t;
  477 
  478 /*
  479  *      List of pmap updates.  If the list overflows,
  480  *      the last entry is changed to invalidate all.
  481  */
  482 struct pmap_update_list {
  483         decl_simple_lock_data(,lock)
  484         int                     count;
  485         struct pmap_update_item item[UPDATE_LIST_SIZE];
  486 } ;
  487 typedef struct pmap_update_list *pmap_update_list_t;
  488 
  489 struct pmap_update_list cpu_update_list[NCPUS];
  490 
  491 extern void signal_cpus(
  492                         cpu_set         use_list,
  493                         pmap_t          pmap,
  494                         vm_offset_t     start,
  495                         vm_offset_t     end);
  496 
  497 #endif  /* NCPUS > 1 */
  498 
  499 /*
  500  *      Other useful macros.
  501  */
  502 #define current_pmap()          (vm_map_pmap(current_act()->map))
  503 #define pmap_in_use(pmap, cpu)  (((pmap)->cpus_using & (1 << (cpu))) != 0)
  504 
  505 struct pmap     kernel_pmap_store;
  506 pmap_t          kernel_pmap;
  507 
  508 struct zone     *pmap_zone;             /* zone of pmap structures */
  509 
  510 int             pmap_debug = 0;         /* flag for debugging prints */
  511 int             ptes_per_vm_page;       /* number of hardware ptes needed
  512                                            to map one VM page. */
  513 unsigned int    inuse_ptepages_count = 0;       /* debugging */
  514 
  515 /*
  516  *      Pmap cache.  Cache is threaded through ref_count field of pmap.
  517  *      Max will eventually be constant -- variable for experimentation.
  518  */
  519 int             pmap_cache_max = 32;
  520 int             pmap_alloc_chunk = 8;
  521 pmap_t          pmap_cache_list;
  522 int             pmap_cache_count;
  523 decl_simple_lock_data(,pmap_cache_lock)
  524 
  525 extern  vm_offset_t     hole_start, hole_end;
  526 
  527 extern char end;
  528 
  529 /*
  530  * Page directory for kernel.
  531  */
  532 pt_entry_t      *kpde = 0;      /* set by start.s - keep out of bss */
  533 
  534 #if  DEBUG_ALIAS
  535 #define PMAP_ALIAS_MAX 32
  536 struct pmap_alias {
  537         vm_offset_t rpc;
  538         pmap_t pmap;
  539         vm_offset_t va;
  540         int cookie;
  541 #define PMAP_ALIAS_COOKIE 0xdeadbeef
  542 } pmap_aliasbuf[PMAP_ALIAS_MAX];
  543 int pmap_alias_index = 0;
  544 extern vm_offset_t get_rpc();
  545 
  546 #endif  /* DEBUG_ALIAS */
  547 
  548 /*
  549  *      Given an offset and a map, compute the address of the
  550  *      pte.  If the address is invalid with respect to the map
  551  *      then PT_ENTRY_NULL is returned (and the map may need to grow).
  552  *
  553  *      This is only used in machine-dependent code.
  554  */
  555 
  556 pt_entry_t *
  557 pmap_pte(
  558         register pmap_t         pmap,
  559         register vm_offset_t    addr)
  560 {
  561         register pt_entry_t     *ptp;
  562         register pt_entry_t     pte;
  563 
  564         pte = pmap->dirbase[pdenum(pmap, addr)];
  565         if ((pte & INTEL_PTE_VALID) == 0)
  566                 return(PT_ENTRY_NULL);
  567         ptp = (pt_entry_t *)ptetokv(pte);
  568         return(&ptp[ptenum(addr)]);
  569 
  570 }
  571 
  572 #define pmap_pde(pmap, addr) (&(pmap)->dirbase[pdenum(pmap, addr)])
  573 
  574 #define DEBUG_PTE_PAGE  0
  575 
  576 #if     DEBUG_PTE_PAGE
  577 void
  578 ptep_check(
  579         ptep_t  ptep)
  580 {
  581         register pt_entry_t     *pte, *epte;
  582         int                     ctu, ctw;
  583 
  584         /* check the use and wired counts */
  585         if (ptep == PTE_PAGE_NULL)
  586                 return;
  587         pte = pmap_pte(ptep->pmap, ptep->va);
  588         epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
  589         ctu = 0;
  590         ctw = 0;
  591         while (pte < epte) {
  592                 if (pte->pfn != 0) {
  593                         ctu++;
  594                         if (pte->wired)
  595                                 ctw++;
  596                 }
  597                 pte += ptes_per_vm_page;
  598         }
  599 
  600         if (ctu != ptep->use_count || ctw != ptep->wired_count) {
  601                 printf("use %d wired %d - actual use %d wired %d\n",
  602                         ptep->use_count, ptep->wired_count, ctu, ctw);
  603                 panic("pte count");
  604         }
  605 }
  606 #endif  /* DEBUG_PTE_PAGE */
  607 
  608 /*
  609  *      Map memory at initialization.  The physical addresses being
  610  *      mapped are not managed and are never unmapped.
  611  *
  612  *      For now, VM is already on, we only need to map the
  613  *      specified memory.
  614  */
  615 vm_offset_t
  616 pmap_map(
  617         register vm_offset_t    virt,
  618         register vm_offset_t    start,
  619         register vm_offset_t    end,
  620         register vm_prot_t      prot)
  621 {
  622         register int            ps;
  623 
  624         ps = PAGE_SIZE;
  625         while (start < end) {
  626                 pmap_enter(kernel_pmap, virt, (ppnum_t)i386_btop(start), prot, 0, FALSE);
  627                 virt += ps;
  628                 start += ps;
  629         }
  630         return(virt);
  631 }
  632 
  633 /*
  634  *      Back-door routine for mapping kernel VM at initialization.  
  635  *      Useful for mapping memory outside the range
  636  *      Sets no-cache, A, D.
  637  *      [vm_first_phys, vm_last_phys) (i.e., devices).
  638  *      Otherwise like pmap_map.
  639  */
  640 vm_offset_t
  641 pmap_map_bd(
  642         register vm_offset_t    virt,
  643         register vm_offset_t    start,
  644         register vm_offset_t    end,
  645         vm_prot_t               prot)
  646 {
  647         register pt_entry_t     template;
  648         register pt_entry_t     *pte;
  649 
  650         template = pa_to_pte(start)
  651                 | INTEL_PTE_NCACHE
  652                 | INTEL_PTE_REF
  653                 | INTEL_PTE_MOD
  654                 | INTEL_PTE_WIRED
  655                 | INTEL_PTE_VALID;
  656         if (prot & VM_PROT_WRITE)
  657             template |= INTEL_PTE_WRITE;
  658 
  659         while (start < end) {
  660                 pte = pmap_pte(kernel_pmap, virt);
  661                 if (pte == PT_ENTRY_NULL)
  662                         panic("pmap_map_bd: Invalid kernel address\n");
  663                 WRITE_PTE_FAST(pte, template)
  664                 pte_increment_pa(template);
  665                 virt += PAGE_SIZE;
  666                 start += PAGE_SIZE;
  667         }
  668 
  669         flush_tlb();
  670         return(virt);
  671 }
  672 
  673 extern int              cnvmem;
  674 extern  char            *first_avail;
  675 extern  vm_offset_t     virtual_avail, virtual_end;
  676 extern  vm_offset_t     avail_start, avail_end, avail_next;
  677 
  678 /*
  679  *      Bootstrap the system enough to run with virtual memory.
  680  *      Map the kernel's code and data, and allocate the system page table.
  681  *      Called with mapping OFF.  Page_size must already be set.
  682  *
  683  *      Parameters:
  684  *      load_start:     PA where kernel was loaded
  685  *      avail_start     PA of first available physical page -
  686  *                         after kernel page tables
  687  *      avail_end       PA of last available physical page
  688  *      virtual_avail   VA of first available page -
  689  *                         after kernel page tables
  690  *      virtual_end     VA of last available page -
  691  *                         end of kernel address space
  692  *
  693  *      &start_text     start of kernel text
  694  *      &etext          end of kernel text
  695  */
  696 
  697 void
  698 pmap_bootstrap(
  699         vm_offset_t     load_start)
  700 {
  701         vm_offset_t     va, tva, paddr;
  702         ppnum_t         pn;
  703         pt_entry_t      template;
  704         pt_entry_t      *pde, *pte, *ptend;
  705         vm_size_t       morevm;         /* VM space for kernel map */
  706 
  707         vm_last_addr = VM_MAX_KERNEL_ADDRESS;                                   /* Set the highest address known to VM */
  708 
  709         /*
  710          *      Set ptes_per_vm_page for general use.
  711          */
  712         ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES;
  713 
  714         /*
  715          *      The kernel's pmap is statically allocated so we don't
  716          *      have to use pmap_create, which is unlikely to work
  717          *      correctly at this part of the boot sequence.
  718          */
  719 
  720         kernel_pmap = &kernel_pmap_store;
  721 
  722 #if     NCPUS > 1
  723         lock_init(&pmap_system_lock,
  724                   FALSE,                /* NOT a sleep lock */
  725                   ETAP_VM_PMAP_SYS,
  726                   ETAP_VM_PMAP_SYS_I);
  727 #endif  /* NCPUS > 1 */
  728 
  729         simple_lock_init(&kernel_pmap->lock, ETAP_VM_PMAP_KERNEL);
  730         simple_lock_init(&pv_free_list_lock, ETAP_VM_PMAP_FREE);
  731 
  732         kernel_pmap->ref_count = 1;
  733 
  734         /*
  735          *      The kernel page directory has been allocated;
  736          *      its virtual address is in kpde.
  737          *
  738          *      Enough kernel page table pages have been allocated
  739          *      to map low system memory, kernel text, kernel data/bss,
  740          *      kdb's symbols, and the page directory and page tables.
  741          *
  742          *      No other physical memory has been allocated.
  743          */
  744 
  745         /*
  746          * Start mapping virtual memory to physical memory, 1-1,
  747          * at end of mapped memory.
  748          */
  749 
  750         virtual_avail = phystokv(avail_start);
  751         virtual_end = phystokv(avail_end);
  752 
  753         pde = kpde;
  754         pde += pdenum(kernel_pmap, virtual_avail);
  755 
  756         if (pte_to_pa(*pde) == 0) {
  757             /* This pte has not been allocated */
  758             pte = 0; ptend = 0;
  759         }
  760         else {
  761             pte = (pt_entry_t *)ptetokv(*pde);
  762                                                 /* first pte of page */
  763             ptend = pte+NPTES;                  /* last pte of page */
  764             pte += ptenum(virtual_avail);       /* point to pte that
  765                                                    maps first avail VA */
  766             pde++;      /* point pde to first empty slot */
  767         }
  768 
  769         template = pa_to_pte(avail_start)
  770                 | INTEL_PTE_VALID
  771                 | INTEL_PTE_WRITE;
  772 
  773         for (va = virtual_avail; va < virtual_end; va += INTEL_PGBYTES) {
  774             if (pte >= ptend) {
  775                 pte = (pt_entry_t *)phystokv(virtual_avail);
  776                 ptend = pte + NPTES;
  777                 virtual_avail = (vm_offset_t)ptend;
  778                 if (virtual_avail == hole_start)
  779                   virtual_avail = hole_end;
  780                 *pde = PA_TO_PTE((vm_offset_t) pte)
  781                         | INTEL_PTE_VALID
  782                         | INTEL_PTE_WRITE;
  783                 pde++;
  784             }
  785             WRITE_PTE_FAST(pte, template)
  786             pte++;
  787             pte_increment_pa(template);
  788         }
  789 
  790         avail_start = virtual_avail - VM_MIN_KERNEL_ADDRESS;
  791         avail_next = avail_start;
  792 
  793         /*
  794          *      Figure out maximum kernel address.
  795          *      Kernel virtual space is:
  796          *              - at least three times physical memory
  797          *              - at least VM_MIN_KERNEL_ADDRESS
  798          *              - limited by VM_MAX_KERNEL_ADDRESS
  799          */
  800 
  801         morevm = 3*avail_end;
  802         if (virtual_end + morevm > VM_MAX_KERNEL_ADDRESS)
  803           morevm = VM_MAX_KERNEL_ADDRESS - virtual_end + 1;
  804 
  805 /*
  806  *      startup requires additional virtual memory (for tables, buffers, 
  807  *      etc.).  The kd driver may also require some of that memory to
  808  *      access the graphics board.
  809  *
  810  */
  811         *(int *)&template = 0;
  812 
  813         /*
  814          * Leave room for kernel-loaded servers, which have been linked at
  815          * addresses from VM_MIN_KERNEL_LOADED_ADDRESS to
  816          * VM_MAX_KERNEL_LOADED_ADDRESS.
  817          */
  818         if (virtual_end + morevm < VM_MAX_KERNEL_LOADED_ADDRESS + 1)
  819                 morevm = VM_MAX_KERNEL_LOADED_ADDRESS + 1 - virtual_end;
  820 
  821         virtual_end += morevm;
  822         for (tva = va; tva < virtual_end; tva += INTEL_PGBYTES) {
  823             if (pte >= ptend) {
  824                 pmap_next_page(&pn);
  825                 paddr = i386_ptob(pn);
  826                 pte = (pt_entry_t *)phystokv(paddr);
  827                 ptend = pte + NPTES;
  828                 *pde = PA_TO_PTE((vm_offset_t) pte)
  829                         | INTEL_PTE_VALID
  830                         | INTEL_PTE_WRITE;
  831                 pde++;
  832             }
  833             WRITE_PTE_FAST(pte, template)
  834             pte++;
  835         }
  836 
  837         virtual_avail = va;
  838 
  839         /* Push the virtual avail address above hole_end */
  840         if (virtual_avail < hole_end)
  841                 virtual_avail = hole_end;
  842 
  843         /*
  844          *      c.f. comment above
  845          *
  846          */
  847         virtual_end = va + morevm;
  848         while (pte < ptend)
  849             *pte++ = 0;
  850 
  851         /*
  852          *      invalidate user virtual addresses 
  853          */
  854         memset((char *)kpde,
  855                0,
  856                pdenum(kernel_pmap,VM_MIN_KERNEL_ADDRESS)*sizeof(pt_entry_t));
  857         kernel_pmap->dirbase = kpde;
  858         printf("Kernel virtual space from 0x%x to 0x%x.\n",
  859                         VM_MIN_KERNEL_ADDRESS, virtual_end);
  860 
  861         avail_start = avail_next;
  862         printf("Available physical space from 0x%x to 0x%x\n",
  863                         avail_start, avail_end);
  864 
  865         kernel_pmap->pdirbase = kvtophys((vm_offset_t)kernel_pmap->dirbase);
  866 
  867         if (cpuid_features() & CPUID_FEATURE_PAT)
  868         {
  869                 uint64_t pat;
  870                 uint32_t msr;
  871             
  872                 msr = 0x277;
  873                 asm volatile("rdmsr" : "=A" (pat) : "c" (msr));
  874             
  875                 pat &= ~(0xfULL << 48);
  876                 pat |= 0x01ULL << 48;
  877             
  878                 asm volatile("wrmsr" :: "A" (pat), "c" (msr));
  879         }
  880 }
  881 
  882 void
  883 pmap_virtual_space(
  884         vm_offset_t *startp,
  885         vm_offset_t *endp)
  886 {
  887         *startp = virtual_avail;
  888         *endp = virtual_end;
  889 }
  890 
  891 /*
  892  *      Initialize the pmap module.
  893  *      Called by vm_init, to initialize any structures that the pmap
  894  *      system needs to map virtual memory.
  895  */
  896 void
  897 pmap_init(void)
  898 {
  899         register long           npages;
  900         vm_offset_t             addr;
  901         register vm_size_t      s;
  902         int                     i;
  903 
  904         /*
  905          *      Allocate memory for the pv_head_table and its lock bits,
  906          *      the modify bit array, and the pte_page table.
  907          */
  908 
  909         npages = atop(avail_end - avail_start);
  910         s = (vm_size_t) (sizeof(struct pv_entry) * npages
  911                                 + pv_lock_table_size(npages)
  912                                 + npages);
  913 
  914         s = round_page(s);
  915         if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
  916                 panic("pmap_init");
  917 
  918         memset((char *)addr, 0, s);
  919 
  920         /*
  921          *      Allocate the structures first to preserve word-alignment.
  922          */
  923         pv_head_table = (pv_entry_t) addr;
  924         addr = (vm_offset_t) (pv_head_table + npages);
  925 
  926         pv_lock_table = (char *) addr;
  927         addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
  928 
  929         pmap_phys_attributes = (char *) addr;
  930 
  931         /*
  932          *      Create the zone of physical maps,
  933          *      and of the physical-to-virtual entries.
  934          */
  935         s = (vm_size_t) sizeof(struct pmap);
  936         pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
  937         s = (vm_size_t) sizeof(struct pv_entry);
  938         pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
  939 
  940 #if     NCPUS > 1
  941         /*
  942          *      Set up the pmap request lists
  943          */
  944         for (i = 0; i < NCPUS; i++) {
  945             pmap_update_list_t  up = &cpu_update_list[i];
  946 
  947             simple_lock_init(&up->lock, ETAP_VM_PMAP_UPDATE);
  948             up->count = 0;
  949         }
  950 #endif  /* NCPUS > 1 */
  951 
  952         /*
  953          *      Only now, when all of the data structures are allocated,
  954          *      can we set vm_first_phys and vm_last_phys.  If we set them
  955          *      too soon, the kmem_alloc_wired above will try to use these
  956          *      data structures and blow up.
  957          */
  958 
  959         vm_first_phys = avail_start;
  960         vm_last_phys = avail_end;
  961         pmap_initialized = TRUE;
  962 
  963         /*
  964          *      Initializie pmap cache.
  965          */
  966         pmap_cache_list = PMAP_NULL;
  967         pmap_cache_count = 0;
  968         simple_lock_init(&pmap_cache_lock, ETAP_VM_PMAP_CACHE);
  969 }
  970 
  971 
  972 #define pmap_valid_page(x)      ((avail_start <= x) && (x < avail_end))
  973 
  974 
  975 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
  976 
  977 boolean_t
  978 pmap_verify_free(
  979                  ppnum_t pn)
  980 {
  981         vm_offset_t     phys;
  982         pv_entry_t      pv_h;
  983         int             pai;
  984         spl_t           spl;
  985         boolean_t       result;
  986 
  987         assert(pn != vm_page_fictitious_addr);
  988         phys = (vm_offset_t)i386_ptob(pn);
  989         if (!pmap_initialized)
  990                 return(TRUE);
  991 
  992         if (!pmap_valid_page(phys))
  993                 return(FALSE);
  994 
  995         PMAP_WRITE_LOCK(spl);
  996 
  997         pai = pa_index(phys);
  998         pv_h = pai_to_pvh(pai);
  999 
 1000         result = (pv_h->pmap == PMAP_NULL);
 1001         PMAP_WRITE_UNLOCK(spl);
 1002 
 1003         return(result);
 1004 }
 1005 
 1006 /*
 1007  *      Create and return a physical map.
 1008  *
 1009  *      If the size specified for the map
 1010  *      is zero, the map is an actual physical
 1011  *      map, and may be referenced by the
 1012  *      hardware.
 1013  *
 1014  *      If the size specified is non-zero,
 1015  *      the map will be used in software only, and
 1016  *      is bounded by that size.
 1017  */
 1018 pmap_t
 1019 pmap_create(
 1020         vm_size_t       size)
 1021 {
 1022         register pmap_t                 p;
 1023         register pmap_statistics_t      stats;
 1024 
 1025         /*
 1026          *      A software use-only map doesn't even need a map.
 1027          */
 1028 
 1029         if (size != 0) {
 1030                 return(PMAP_NULL);
 1031         }
 1032 
 1033         /*
 1034          *      Try to get cached pmap, if this fails,
 1035          *      allocate a pmap struct from the pmap_zone.  Then allocate
 1036          *      the page descriptor table from the pd_zone.
 1037          */
 1038 
 1039         simple_lock(&pmap_cache_lock);
 1040         while ((p = pmap_cache_list) == PMAP_NULL) {
 1041 
 1042                 vm_offset_t             dirbases;
 1043                 register int            i;
 1044 
 1045                 simple_unlock(&pmap_cache_lock);
 1046 
 1047 #if     NCPUS > 1
 1048         /*
 1049          * XXX  NEEDS MP DOING ALLOC logic so that if multiple processors
 1050          * XXX  get here, only one allocates a chunk of pmaps.
 1051          * (for now we'll just let it go - safe but wasteful)
 1052          */
 1053 #endif
 1054 
 1055                 /*
 1056                  *      Allocate a chunck of pmaps.  Single kmem_alloc_wired
 1057                  *      operation reduces kernel map fragmentation.
 1058                  */
 1059 
 1060                 if (kmem_alloc_wired(kernel_map, &dirbases,
 1061                                      pmap_alloc_chunk * INTEL_PGBYTES)
 1062                                                         != KERN_SUCCESS)
 1063                         panic("pmap_create.1");
 1064 
 1065                 for (i = pmap_alloc_chunk; i > 0 ; i--) {
 1066                         p = (pmap_t) zalloc(pmap_zone);
 1067                         if (p == PMAP_NULL)
 1068                                 panic("pmap_create.2");
 1069 
 1070                         /*
 1071                          *      Initialize pmap.  Don't bother with
 1072                          *      ref count as cache list is threaded
 1073                          *      through it.  It'll be set on cache removal.
 1074                          */
 1075                         p->dirbase = (pt_entry_t *) dirbases;
 1076                         dirbases += INTEL_PGBYTES;
 1077                         memcpy(p->dirbase, kpde, INTEL_PGBYTES);
 1078                         p->pdirbase = kvtophys((vm_offset_t)p->dirbase);
 1079 
 1080                         simple_lock_init(&p->lock, ETAP_VM_PMAP);
 1081                         p->cpus_using = 0;
 1082 
 1083                         /*
 1084                          *      Initialize statistics.
 1085                          */
 1086                         stats = &p->stats;
 1087                         stats->resident_count = 0;
 1088                         stats->wired_count = 0;
 1089                         
 1090                         /*
 1091                          *      Insert into cache
 1092                          */
 1093                         simple_lock(&pmap_cache_lock);
 1094                         p->ref_count = (int) pmap_cache_list;
 1095                         pmap_cache_list = p;
 1096                         pmap_cache_count++;
 1097                         simple_unlock(&pmap_cache_lock);
 1098                 }
 1099                 simple_lock(&pmap_cache_lock);
 1100         }
 1101 
 1102         p->stats.resident_count = 0;
 1103         p->stats.wired_count = 0;
 1104 
 1105         pmap_cache_list = (pmap_t) p->ref_count;
 1106         p->ref_count = 1;
 1107         pmap_cache_count--;
 1108         simple_unlock(&pmap_cache_lock);
 1109 
 1110         return(p);
 1111 }
 1112 
 1113 /*
 1114  *      Retire the given physical map from service.
 1115  *      Should only be called if the map contains
 1116  *      no valid mappings.
 1117  */
 1118 
 1119 void
 1120 pmap_destroy(
 1121         register pmap_t p)
 1122 {
 1123         register pt_entry_t     *pdep;
 1124         register vm_offset_t    pa;
 1125         register int            c;
 1126         spl_t                   s;
 1127         register vm_page_t      m;
 1128 
 1129         if (p == PMAP_NULL)
 1130                 return;
 1131 
 1132         SPLVM(s);
 1133         simple_lock(&p->lock);
 1134         c = --p->ref_count;
 1135         if (c == 0) {
 1136                 register int    my_cpu;
 1137 
 1138                 mp_disable_preemption();
 1139                 my_cpu = cpu_number();
 1140 
 1141                 /* 
 1142                  * If some cpu is not using the physical pmap pointer that it
 1143                  * is supposed to be (see set_dirbase), we might be using the
 1144                  * pmap that is being destroyed! Make sure we are
 1145                  * physically on the right pmap:
 1146                  */
 1147 
 1148 #if     NCPUS > 1
 1149                 /* force pmap/cr3 update */
 1150                 PMAP_UPDATE_TLBS(p,
 1151                                  VM_MIN_ADDRESS,
 1152                                  VM_MAX_KERNEL_ADDRESS);
 1153 #endif  /* NCPUS > 1 */
 1154 
 1155                 if (real_pmap[my_cpu] == p) {
 1156                         PMAP_CPU_CLR(p, my_cpu);
 1157                         real_pmap[my_cpu] = kernel_pmap;
 1158                         set_cr3(kernel_pmap->pdirbase);
 1159                 }
 1160                 mp_enable_preemption();
 1161         }
 1162         simple_unlock(&p->lock);
 1163         SPLX(s);
 1164 
 1165         if (c != 0) {
 1166             return;     /* still in use */
 1167         }
 1168 
 1169         /*
 1170          *      Free the memory maps, then the
 1171          *      pmap structure.
 1172          */
 1173         pdep = p->dirbase;
 1174         while (pdep < &p->dirbase[pdenum(p, LINEAR_KERNEL_ADDRESS)]) {
 1175             if (*pdep & INTEL_PTE_VALID) {
 1176                 pa = pte_to_pa(*pdep);
 1177                 vm_object_lock(pmap_object);
 1178                 m = vm_page_lookup(pmap_object, pa);
 1179                 if (m == VM_PAGE_NULL)
 1180                     panic("pmap_destroy: pte page not in object");
 1181                 vm_page_lock_queues();
 1182                 vm_page_free(m);
 1183                 inuse_ptepages_count--;
 1184                 vm_object_unlock(pmap_object);
 1185                 vm_page_unlock_queues();
 1186 
 1187                 /*
 1188                  *      Clear pdes, this might be headed for the cache.
 1189                  */
 1190                 c = ptes_per_vm_page;
 1191                 do {
 1192                     *pdep = 0;
 1193                     pdep++;
 1194                 } while (--c > 0);
 1195             }
 1196             else {
 1197                 pdep += ptes_per_vm_page;
 1198             }
 1199         
 1200         }
 1201 
 1202         /*
 1203          * XXX These asserts fail on system shutdown.
 1204          *
 1205         assert(p->stats.resident_count == 0);
 1206         assert(p->stats.wired_count == 0);
 1207          *
 1208          */
 1209 
 1210         /*
 1211          *      Add to cache if not already full
 1212          */
 1213         simple_lock(&pmap_cache_lock);
 1214         if (pmap_cache_count <= pmap_cache_max) {
 1215                 p->ref_count = (int) pmap_cache_list;
 1216                 pmap_cache_list = p;
 1217                 pmap_cache_count++;
 1218                 simple_unlock(&pmap_cache_lock);
 1219         }
 1220         else {
 1221                 simple_unlock(&pmap_cache_lock);
 1222                 kmem_free(kernel_map, (vm_offset_t)p->dirbase, INTEL_PGBYTES);
 1223                 zfree(pmap_zone, (vm_offset_t) p);
 1224         }
 1225 }
 1226 
 1227 /*
 1228  *      Add a reference to the specified pmap.
 1229  */
 1230 
 1231 void
 1232 pmap_reference(
 1233         register pmap_t p)
 1234 {
 1235         spl_t   s;
 1236 
 1237         if (p != PMAP_NULL) {
 1238                 SPLVM(s);
 1239                 simple_lock(&p->lock);
 1240                 p->ref_count++;
 1241                 simple_unlock(&p->lock);
 1242                 SPLX(s);
 1243         }
 1244 }
 1245 
 1246 /*
 1247  *      Remove a range of hardware page-table entries.
 1248  *      The entries given are the first (inclusive)
 1249  *      and last (exclusive) entries for the VM pages.
 1250  *      The virtual address is the va for the first pte.
 1251  *
 1252  *      The pmap must be locked.
 1253  *      If the pmap is not the kernel pmap, the range must lie
 1254  *      entirely within one pte-page.  This is NOT checked.
 1255  *      Assumes that the pte-page exists.
 1256  */
 1257 
 1258 /* static */
 1259 void
 1260 pmap_remove_range(
 1261         pmap_t                  pmap,
 1262         vm_offset_t             va,
 1263         pt_entry_t              *spte,
 1264         pt_entry_t              *epte)
 1265 {
 1266         register pt_entry_t     *cpte;
 1267         int                     num_removed, num_unwired;
 1268         int                     pai;
 1269         vm_offset_t             pa;
 1270 
 1271 #if     DEBUG_PTE_PAGE
 1272         if (pmap != kernel_pmap)
 1273                 ptep_check(get_pte_page(spte));
 1274 #endif  /* DEBUG_PTE_PAGE */
 1275         num_removed = 0;
 1276         num_unwired = 0;
 1277 
 1278         for (cpte = spte; cpte < epte;
 1279              cpte += ptes_per_vm_page, va += PAGE_SIZE) {
 1280 
 1281             pa = pte_to_pa(*cpte);
 1282             if (pa == 0)
 1283                 continue;
 1284 
 1285             num_removed++;
 1286             if (iswired(*cpte))
 1287                 num_unwired++;
 1288 
 1289             if (!valid_page(pa)) {
 1290 
 1291                 /*
 1292                  *      Outside range of managed physical memory.
 1293                  *      Just remove the mappings.
 1294                  */
 1295                 register int    i = ptes_per_vm_page;
 1296                 register pt_entry_t     *lpte = cpte;
 1297                 do {
 1298                     *lpte = 0;
 1299                     lpte++;
 1300                 } while (--i > 0);
 1301                 continue;
 1302             }
 1303 
 1304             pai = pa_index(pa);
 1305             LOCK_PVH(pai);
 1306 
 1307             /*
 1308              *  Get the modify and reference bits.
 1309              */
 1310             {
 1311                 register int            i;
 1312                 register pt_entry_t     *lpte;
 1313 
 1314                 i = ptes_per_vm_page;
 1315                 lpte = cpte;
 1316                 do {
 1317                     pmap_phys_attributes[pai] |=
 1318                         *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
 1319                     *lpte = 0;
 1320                     lpte++;
 1321                 } while (--i > 0);
 1322             }
 1323 
 1324             /*
 1325              *  Remove the mapping from the pvlist for
 1326              *  this physical page.
 1327              */
 1328             {
 1329                 register pv_entry_t     pv_h, prev, cur;
 1330 
 1331                 pv_h = pai_to_pvh(pai);
 1332                 if (pv_h->pmap == PMAP_NULL) {
 1333                     panic("pmap_remove: null pv_list!");
 1334                 }
 1335                 if (pv_h->va == va && pv_h->pmap == pmap) {
 1336                     /*
 1337                      * Header is the pv_entry.  Copy the next one
 1338                      * to header and free the next one (we cannot
 1339                      * free the header)
 1340                      */
 1341                     cur = pv_h->next;
 1342                     if (cur != PV_ENTRY_NULL) {
 1343                         *pv_h = *cur;
 1344                         PV_FREE(cur);
 1345                     }
 1346                     else {
 1347                         pv_h->pmap = PMAP_NULL;
 1348                     }
 1349                 }
 1350                 else {
 1351                     cur = pv_h;
 1352                     do {
 1353                         prev = cur;
 1354                         if ((cur = prev->next) == PV_ENTRY_NULL) {
 1355                           panic("pmap-remove: mapping not in pv_list!");
 1356                         }
 1357                     } while (cur->va != va || cur->pmap != pmap);
 1358                     prev->next = cur->next;
 1359                     PV_FREE(cur);
 1360                 }
 1361                 UNLOCK_PVH(pai);
 1362             }
 1363         }
 1364 
 1365         /*
 1366          *      Update the counts
 1367          */
 1368         assert(pmap->stats.resident_count >= num_removed);
 1369         pmap->stats.resident_count -= num_removed;
 1370         assert(pmap->stats.wired_count >= num_unwired);
 1371         pmap->stats.wired_count -= num_unwired;
 1372 }
 1373 
 1374 /*
 1375  *      Remove phys addr if mapped in specified map
 1376  *
 1377  */
 1378 void
 1379 pmap_remove_some_phys(
 1380         pmap_t          map,
 1381         ppnum_t         pn)
 1382 {
 1383 
 1384 /* Implement to support working set code */
 1385 
 1386 }
 1387 
 1388 
 1389 /*
 1390  *      Remove the given range of addresses
 1391  *      from the specified map.
 1392  *
 1393  *      It is assumed that the start and end are properly
 1394  *      rounded to the hardware page size.
 1395  */
 1396 
 1397 
 1398 void
 1399 pmap_remove(
 1400         pmap_t          map,
 1401         addr64_t        s64,
 1402         addr64_t        e64)
 1403 {
 1404         spl_t                   spl;
 1405         register pt_entry_t     *pde;
 1406         register pt_entry_t     *spte, *epte;
 1407         vm_offset_t             l;
 1408         vm_offset_t    s, e;
 1409 
 1410         if (map == PMAP_NULL)
 1411                 return;
 1412 
 1413         PMAP_READ_LOCK(map, spl);
 1414 
 1415         if (value_64bit(s64) || value_64bit(e64)) {
 1416           panic("pmap_remove addr overflow");
 1417         }
 1418 
 1419         s = (vm_offset_t)low32(s64);
 1420         e = (vm_offset_t)low32(e64);
 1421 
 1422         /*
 1423          *      Invalidate the translation buffer first
 1424          */
 1425         PMAP_UPDATE_TLBS(map, s, e);
 1426 
 1427         pde = pmap_pde(map, s);
 1428 
 1429         while (s < e) {
 1430             l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
 1431             if (l > e)
 1432                 l = e;
 1433             if (*pde & INTEL_PTE_VALID) {
 1434                 spte = (pt_entry_t *)ptetokv(*pde);
 1435                 spte = &spte[ptenum(s)];
 1436                 epte = &spte[intel_btop(l-s)];
 1437                 pmap_remove_range(map, s, spte, epte);
 1438             }
 1439             s = l;
 1440             pde++;
 1441         }
 1442 
 1443         PMAP_READ_UNLOCK(map, spl);
 1444 }
 1445 
 1446 /*
 1447  *      Routine:        pmap_page_protect
 1448  *
 1449  *      Function:
 1450  *              Lower the permission for all mappings to a given
 1451  *              page.
 1452  */
 1453 void
 1454 pmap_page_protect(
 1455         ppnum_t         pn,
 1456         vm_prot_t       prot)
 1457 {
 1458         pv_entry_t              pv_h, prev;
 1459         register pv_entry_t     pv_e;
 1460         register pt_entry_t     *pte;
 1461         int                     pai;
 1462         register pmap_t         pmap;
 1463         spl_t                   spl;
 1464         boolean_t               remove;
 1465         vm_offset_t             phys;
 1466 
 1467         assert(pn != vm_page_fictitious_addr);
 1468         phys = (vm_offset_t)i386_ptob(pn);
 1469         if (!valid_page(phys)) {
 1470             /*
 1471              *  Not a managed page.
 1472              */
 1473             return;
 1474         }
 1475 
 1476         /*
 1477          * Determine the new protection.
 1478          */
 1479         switch (prot) {
 1480             case VM_PROT_READ:
 1481             case VM_PROT_READ|VM_PROT_EXECUTE:
 1482                 remove = FALSE;
 1483                 break;
 1484             case VM_PROT_ALL:
 1485                 return; /* nothing to do */
 1486             default:
 1487                 remove = TRUE;
 1488                 break;
 1489         }
 1490 
 1491         /*
 1492          *      Lock the pmap system first, since we will be changing
 1493          *      several pmaps.
 1494          */
 1495 
 1496         PMAP_WRITE_LOCK(spl);
 1497 
 1498         pai = pa_index(phys);
 1499         pv_h = pai_to_pvh(pai);
 1500 
 1501         /*
 1502          * Walk down PV list, changing or removing all mappings.
 1503          * We do not have to lock the pv_list because we have
 1504          * the entire pmap system locked.
 1505          */
 1506         if (pv_h->pmap != PMAP_NULL) {
 1507 
 1508             prev = pv_e = pv_h;
 1509             do {
 1510                 pmap = pv_e->pmap;
 1511                 /*
 1512                  * Lock the pmap to block pmap_extract and similar routines.
 1513                  */
 1514                 simple_lock(&pmap->lock);
 1515 
 1516                 {
 1517                     register vm_offset_t va;
 1518 
 1519                     va = pv_e->va;
 1520                     pte = pmap_pte(pmap, va);
 1521 
 1522                     /*
 1523                      * Consistency checks.
 1524                      */
 1525                     /* assert(*pte & INTEL_PTE_VALID); XXX */
 1526                     /* assert(pte_to_phys(*pte) == phys); */
 1527 
 1528                     /*
 1529                      * Invalidate TLBs for all CPUs using this mapping.
 1530                      */
 1531                     PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
 1532                 }
 1533 
 1534                 /*
 1535                  * Remove the mapping if new protection is NONE
 1536                  * or if write-protecting a kernel mapping.
 1537                  */
 1538                 if (remove || pmap == kernel_pmap) {
 1539                     /*
 1540                      * Remove the mapping, collecting any modify bits.
 1541                      */
 1542                     {
 1543                         register int    i = ptes_per_vm_page;
 1544 
 1545                         do {
 1546                             pmap_phys_attributes[pai] |=
 1547                                 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
 1548                             *pte++ = 0;
 1549                         } while (--i > 0);
 1550                     }
 1551 
 1552                     assert(pmap->stats.resident_count >= 1);
 1553                     pmap->stats.resident_count--;
 1554 
 1555                     /*
 1556                      * Remove the pv_entry.
 1557                      */
 1558                     if (pv_e == pv_h) {
 1559                         /*
 1560                          * Fix up head later.
 1561                          */
 1562                         pv_h->pmap = PMAP_NULL;
 1563                     }
 1564                     else {
 1565                         /*
 1566                          * Delete this entry.
 1567                          */
 1568                         prev->next = pv_e->next;
 1569                         PV_FREE(pv_e);
 1570                     }
 1571                 }
 1572                 else {
 1573                     /*
 1574                      * Write-protect.
 1575                      */
 1576                     register int i = ptes_per_vm_page;
 1577 
 1578                     do {
 1579                         *pte &= ~INTEL_PTE_WRITE;
 1580                         pte++;
 1581                     } while (--i > 0);
 1582 
 1583                     /*
 1584                      * Advance prev.
 1585                      */
 1586                     prev = pv_e;
 1587                 }
 1588 
 1589                 simple_unlock(&pmap->lock);
 1590 
 1591             } while ((pv_e = prev->next) != PV_ENTRY_NULL);
 1592 
 1593             /*
 1594              * If pv_head mapping was removed, fix it up.
 1595              */
 1596             if (pv_h->pmap == PMAP_NULL) {
 1597                 pv_e = pv_h->next;
 1598                 if (pv_e != PV_ENTRY_NULL) {
 1599                     *pv_h = *pv_e;
 1600                     PV_FREE(pv_e);
 1601                 }
 1602             }
 1603         }
 1604 
 1605         PMAP_WRITE_UNLOCK(spl);
 1606 }
 1607 
 1608 /*
 1609  *      Set the physical protection on the
 1610  *      specified range of this map as requested.
 1611  *      Will not increase permissions.
 1612  */
 1613 void
 1614 pmap_protect(
 1615         pmap_t          map,
 1616         vm_offset_t     s,
 1617         vm_offset_t     e,
 1618         vm_prot_t       prot)
 1619 {
 1620         register pt_entry_t     *pde;
 1621         register pt_entry_t     *spte, *epte;
 1622         vm_offset_t             l;
 1623         spl_t           spl;
 1624 
 1625 
 1626         if (map == PMAP_NULL)
 1627                 return;
 1628 
 1629         /*
 1630          * Determine the new protection.
 1631          */
 1632         switch (prot) {
 1633             case VM_PROT_READ:
 1634             case VM_PROT_READ|VM_PROT_EXECUTE:
 1635                 break;
 1636             case VM_PROT_READ|VM_PROT_WRITE:
 1637             case VM_PROT_ALL:
 1638                 return; /* nothing to do */
 1639             default:
 1640                 pmap_remove(map, (addr64_t)s, (addr64_t)e);
 1641                 return;
 1642         }
 1643 
 1644         /*
 1645          * If write-protecting in the kernel pmap,
 1646          * remove the mappings; the i386 ignores
 1647          * the write-permission bit in kernel mode.
 1648          *
 1649          * XXX should be #if'd for i386
 1650          */
 1651 
 1652         if (cpuid_family() == CPUID_FAMILY_386)
 1653             if (map == kernel_pmap) {
 1654                     pmap_remove(map, (addr64_t)s, (addr64_t)e);
 1655                     return;
 1656             }
 1657 
 1658         SPLVM(spl);
 1659         simple_lock(&map->lock);
 1660 
 1661         /*
 1662          *      Invalidate the translation buffer first
 1663          */
 1664         PMAP_UPDATE_TLBS(map, s, e);
 1665 
 1666         pde = pmap_pde(map, s);
 1667         while (s < e) {
 1668             l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
 1669             if (l > e)
 1670                 l = e;
 1671             if (*pde & INTEL_PTE_VALID) {
 1672                 spte = (pt_entry_t *)ptetokv(*pde);
 1673                 spte = &spte[ptenum(s)];
 1674                 epte = &spte[intel_btop(l-s)];
 1675 
 1676                 while (spte < epte) {
 1677                     if (*spte & INTEL_PTE_VALID)
 1678                         *spte &= ~INTEL_PTE_WRITE;
 1679                     spte++;
 1680                 }
 1681             }
 1682             s = l;
 1683             pde++;
 1684         }
 1685 
 1686         simple_unlock(&map->lock);
 1687         SPLX(spl);
 1688 }
 1689 
 1690 
 1691 
 1692 /*
 1693  *      Insert the given physical page (p) at
 1694  *      the specified virtual address (v) in the
 1695  *      target physical map with the protection requested.
 1696  *
 1697  *      If specified, the page will be wired down, meaning
 1698  *      that the related pte cannot be reclaimed.
 1699  *
 1700  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 1701  *      or lose information.  That is, this routine must actually
 1702  *      insert this page into the given map NOW.
 1703  */
 1704 void
 1705 pmap_enter(
 1706         register pmap_t         pmap,
 1707         vm_offset_t             v,
 1708         ppnum_t                 pn,
 1709         vm_prot_t               prot,
 1710         unsigned int            flags,
 1711         boolean_t               wired)
 1712 {
 1713         register pt_entry_t     *pte;
 1714         register pv_entry_t     pv_h;
 1715         register int            i, pai;
 1716         pv_entry_t              pv_e;
 1717         pt_entry_t              template;
 1718         spl_t                   spl;
 1719         vm_offset_t             old_pa;
 1720         vm_offset_t             pa = (vm_offset_t)i386_ptob(pn);
 1721 
 1722         XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n",
 1723             current_thread()->top_act,
 1724             current_thread(), 
 1725             pmap, v, pn);
 1726 
 1727         assert(pn != vm_page_fictitious_addr);
 1728         if (pmap_debug)
 1729                 printf("pmap(%x, %x)\n", v, pn);
 1730         if (pmap == PMAP_NULL)
 1731                 return;
 1732 
 1733         if (cpuid_family() == CPUID_FAMILY_386)
 1734         if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
 1735             && !wired /* hack for io_wire */ ) {
 1736             /*
 1737              *  Because the 386 ignores write protection in kernel mode,
 1738              *  we cannot enter a read-only kernel mapping, and must
 1739              *  remove an existing mapping if changing it.
 1740              *
 1741              *  XXX should be #if'd for i386
 1742              */
 1743             PMAP_READ_LOCK(pmap, spl);
 1744 
 1745             pte = pmap_pte(pmap, v);
 1746             if (pte != PT_ENTRY_NULL && pte_to_pa(*pte) != 0) {
 1747                 /*
 1748                  *      Invalidate the translation buffer,
 1749                  *      then remove the mapping.
 1750                  */
 1751                 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
 1752                 pmap_remove_range(pmap, v, pte,
 1753                                   pte + ptes_per_vm_page);
 1754             }
 1755             PMAP_READ_UNLOCK(pmap, spl);
 1756             return;
 1757         }
 1758 
 1759         /*
 1760          *      Must allocate a new pvlist entry while we're unlocked;
 1761          *      zalloc may cause pageout (which will lock the pmap system).
 1762          *      If we determine we need a pvlist entry, we will unlock
 1763          *      and allocate one.  Then we will retry, throughing away
 1764          *      the allocated entry later (if we no longer need it).
 1765          */
 1766         pv_e = PV_ENTRY_NULL;
 1767 Retry:
 1768         PMAP_READ_LOCK(pmap, spl);
 1769 
 1770         /*
 1771          *      Expand pmap to include this pte.  Assume that
 1772          *      pmap is always expanded to include enough hardware
 1773          *      pages to map one VM page.
 1774          */
 1775 
 1776         while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
 1777                 /*
 1778                  *      Must unlock to expand the pmap.
 1779                  */
 1780                 PMAP_READ_UNLOCK(pmap, spl);
 1781 
 1782                 pmap_expand(pmap, v);
 1783 
 1784                 PMAP_READ_LOCK(pmap, spl);
 1785         }
 1786         /*
 1787          *      Special case if the physical page is already mapped
 1788          *      at this address.
 1789          */
 1790         old_pa = pte_to_pa(*pte);
 1791         if (old_pa == pa) {
 1792             /*
 1793              *  May be changing its wired attribute or protection
 1794              */
 1795         
 1796             template = pa_to_pte(pa) | INTEL_PTE_VALID;
 1797 
 1798             if(flags & VM_MEM_NOT_CACHEABLE) {
 1799                 if(!(flags & VM_MEM_GUARDED))
 1800                         template |= INTEL_PTE_PTA;
 1801                 template |= INTEL_PTE_NCACHE;
 1802             }
 1803 
 1804             if (pmap != kernel_pmap)
 1805                 template |= INTEL_PTE_USER;
 1806             if (prot & VM_PROT_WRITE)
 1807                 template |= INTEL_PTE_WRITE;
 1808             if (wired) {
 1809                 template |= INTEL_PTE_WIRED;
 1810                 if (!iswired(*pte))
 1811                     pmap->stats.wired_count++;
 1812             }
 1813             else {
 1814                 if (iswired(*pte)) {
 1815                     assert(pmap->stats.wired_count >= 1);
 1816                     pmap->stats.wired_count--;
 1817                 }
 1818             }
 1819 
 1820             PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
 1821             i = ptes_per_vm_page;
 1822             do {
 1823                 if (*pte & INTEL_PTE_MOD)
 1824                     template |= INTEL_PTE_MOD;
 1825                 WRITE_PTE(pte, template)
 1826                 pte++;
 1827                 pte_increment_pa(template);
 1828             } while (--i > 0);
 1829 
 1830             goto Done;
 1831         }
 1832 
 1833         /*
 1834          *      Outline of code from here:
 1835          *         1) If va was mapped, update TLBs, remove the mapping
 1836          *            and remove old pvlist entry.
 1837          *         2) Add pvlist entry for new mapping
 1838          *         3) Enter new mapping.
 1839          *
 1840          *      SHARING_FAULTS complicates this slightly in that it cannot
 1841          *      replace the mapping, but must remove it (because adding the
 1842          *      pvlist entry for the new mapping may remove others), and
 1843          *      hence always enters the new mapping at step 3)
 1844          *
 1845          *      If the old physical page is not managed step 1) is skipped
 1846          *      (except for updating the TLBs), and the mapping is
 1847          *      overwritten at step 3).  If the new physical page is not
 1848          *      managed, step 2) is skipped.
 1849          */
 1850 
 1851         if (old_pa != (vm_offset_t) 0) {
 1852 
 1853             PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
 1854 
 1855 #if     DEBUG_PTE_PAGE
 1856             if (pmap != kernel_pmap)
 1857                 ptep_check(get_pte_page(pte));
 1858 #endif  /* DEBUG_PTE_PAGE */
 1859 
 1860             /*
 1861              *  Don't do anything to pages outside valid memory here.
 1862              *  Instead convince the code that enters a new mapping
 1863              *  to overwrite the old one.
 1864              */
 1865 
 1866             if (valid_page(old_pa)) {
 1867 
 1868                 pai = pa_index(old_pa);
 1869                 LOCK_PVH(pai);
 1870 
 1871                 assert(pmap->stats.resident_count >= 1);
 1872                 pmap->stats.resident_count--;
 1873                 if (iswired(*pte)) {
 1874                     assert(pmap->stats.wired_count >= 1);
 1875                     pmap->stats.wired_count--;
 1876                 }
 1877                 i = ptes_per_vm_page;
 1878                 do {
 1879                     pmap_phys_attributes[pai] |=
 1880                         *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
 1881                     WRITE_PTE(pte, 0)
 1882                     pte++;
 1883                     pte_increment_pa(template);
 1884                 } while (--i > 0);
 1885 
 1886                 /*
 1887                  * Put pte back to beginning of page since it'll be
 1888                  * used later to enter the new page.
 1889                  */
 1890                 pte -= ptes_per_vm_page;
 1891 
 1892                 /*
 1893                  *      Remove the mapping from the pvlist for
 1894                  *      this physical page.
 1895                  */
 1896                 {
 1897                     register pv_entry_t prev, cur;
 1898 
 1899                     pv_h = pai_to_pvh(pai);
 1900                     if (pv_h->pmap == PMAP_NULL) {
 1901                         panic("pmap_enter: null pv_list!");
 1902                     }
 1903                     if (pv_h->va == v && pv_h->pmap == pmap) {
 1904                         /*
 1905                          * Header is the pv_entry.  Copy the next one
 1906                          * to header and free the next one (we cannot
 1907                          * free the header)
 1908                          */
 1909                         cur = pv_h->next;
 1910                         if (cur != PV_ENTRY_NULL) {
 1911                             *pv_h = *cur;
 1912                             pv_e = cur;
 1913                         }
 1914                         else {
 1915                             pv_h->pmap = PMAP_NULL;
 1916                         }
 1917                     }
 1918                     else {
 1919                         cur = pv_h;
 1920                         do {
 1921                             prev = cur;
 1922                             if ((cur = prev->next) == PV_ENTRY_NULL) {
 1923                                 panic("pmap_enter: mapping not in pv_list!");
 1924                             }
 1925                         } while (cur->va != v || cur->pmap != pmap);
 1926                         prev->next = cur->next;
 1927                         pv_e = cur;
 1928                     }
 1929                 }
 1930                 UNLOCK_PVH(pai);
 1931             }
 1932             else {
 1933 
 1934                 /*
 1935                  *      old_pa is not managed.  Pretend it's zero so code
 1936                  *      at Step 3) will enter new mapping (overwriting old
 1937                  *      one).  Do removal part of accounting.
 1938                  */
 1939                 old_pa = (vm_offset_t) 0;
 1940                 assert(pmap->stats.resident_count >= 1);
 1941                 pmap->stats.resident_count--;
 1942                 if (iswired(*pte)) {
 1943                     assert(pmap->stats.wired_count >= 1);
 1944                     pmap->stats.wired_count--;
 1945                 }
 1946             }
 1947         }
 1948 
 1949         if (valid_page(pa)) {
 1950 
 1951             /*
 1952              *  Step 2) Enter the mapping in the PV list for this
 1953              *  physical page.
 1954              */
 1955 
 1956             pai = pa_index(pa);
 1957 
 1958 
 1959 #if SHARING_FAULTS
 1960 RetryPvList:
 1961             /*
 1962              * We can return here from the sharing fault code below
 1963              * in case we removed the only entry on the pv list and thus
 1964              * must enter the new one in the list header.
 1965              */
 1966 #endif /* SHARING_FAULTS */
 1967             LOCK_PVH(pai);
 1968             pv_h = pai_to_pvh(pai);
 1969 
 1970             if (pv_h->pmap == PMAP_NULL) {
 1971                 /*
 1972                  *      No mappings yet
 1973                  */
 1974                 pv_h->va = v;
 1975                 pv_h->pmap = pmap;
 1976                 pv_h->next = PV_ENTRY_NULL;
 1977             }
 1978             else {
 1979 #if     DEBUG
 1980                 {
 1981                     /*
 1982                      * check that this mapping is not already there
 1983                      * or there is no alias for this mapping in the same map
 1984                      */
 1985                     pv_entry_t  e = pv_h;
 1986                     while (e != PV_ENTRY_NULL) {
 1987                         if (e->pmap == pmap && e->va == v)
 1988                             panic("pmap_enter: already in pv_list");
 1989                         e = e->next;
 1990                     }
 1991                 }
 1992 #endif  /* DEBUG */
 1993 #if SHARING_FAULTS
 1994                 {
 1995                     /*
 1996                      * do sharing faults.
 1997                      * if we find an entry on this pv list in the same address
 1998                      * space, remove it.  we know there will not be more
 1999                      * than one. 
 2000                      */
 2001                     pv_entry_t  e = pv_h;
 2002                     pt_entry_t      *opte;
 2003 
 2004                     while (e != PV_ENTRY_NULL) {
 2005                         if (e->pmap == pmap) {
 2006                             /*
 2007                              *  Remove it, drop pv list lock first.
 2008                              */
 2009                             UNLOCK_PVH(pai);
 2010 
 2011                             opte = pmap_pte(pmap, e->va);
 2012                             assert(opte != PT_ENTRY_NULL);
 2013                             /*
 2014                              *  Invalidate the translation buffer,
 2015                              *  then remove the mapping.
 2016                              */
 2017                              PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE);
 2018                              pmap_remove_range(pmap, e->va, opte,
 2019                                                       opte + ptes_per_vm_page);
 2020                              /*
 2021                               * We could have remove the head entry,
 2022                               * so there could be no more entries
 2023                               * and so we have to use the pv head entry.
 2024                               * so, go back to the top and try the entry
 2025                               * again.
 2026                               */
 2027                              goto RetryPvList;
 2028                         }
 2029                         e = e->next;
 2030                     }
 2031 
 2032                     /*
 2033                      * check that this mapping is not already there
 2034                      */
 2035                     e = pv_h;
 2036                     while (e != PV_ENTRY_NULL) {
 2037                         if (e->pmap == pmap)
 2038                             panic("pmap_enter: alias in pv_list");
 2039                         e = e->next;
 2040                     }
 2041                 }
 2042 #endif /* SHARING_FAULTS */
 2043 #if DEBUG_ALIAS
 2044                 {
 2045                     /*
 2046                      * check for aliases within the same address space.
 2047                      */
 2048                     pv_entry_t  e = pv_h;
 2049                     vm_offset_t     rpc = get_rpc();
 2050 
 2051                     while (e != PV_ENTRY_NULL) {
 2052                         if (e->pmap == pmap) {
 2053                             /*
 2054                              * log this entry in the alias ring buffer
 2055                              * if it's not there already.
 2056                              */
 2057                             struct pmap_alias *pma;
 2058                             int ii, logit;
 2059 
 2060                             logit = TRUE;
 2061                             for (ii = 0; ii < pmap_alias_index; ii++) {
 2062                                 if (pmap_aliasbuf[ii].rpc == rpc) {
 2063                                     /* found it in the log already */
 2064                                     logit = FALSE;
 2065                                     break;
 2066                                 }
 2067                             }
 2068                             if (logit) {
 2069                                 pma = &pmap_aliasbuf[pmap_alias_index];
 2070                                 pma->pmap = pmap;
 2071                                 pma->va = v;
 2072                                 pma->rpc = rpc;
 2073                                 pma->cookie = PMAP_ALIAS_COOKIE;
 2074                                 if (++pmap_alias_index >= PMAP_ALIAS_MAX)
 2075                                     panic("pmap_enter: exhausted alias log");
 2076                             }
 2077                         }
 2078                         e = e->next;
 2079                     }
 2080                 }
 2081 #endif /* DEBUG_ALIAS */
 2082                 /*
 2083                  *      Add new pv_entry after header.
 2084                  */
 2085                 if (pv_e == PV_ENTRY_NULL) {
 2086                     PV_ALLOC(pv_e);
 2087                     if (pv_e == PV_ENTRY_NULL) {
 2088                         UNLOCK_PVH(pai);
 2089                         PMAP_READ_UNLOCK(pmap, spl);
 2090 
 2091                         /*
 2092                          * Refill from zone.
 2093                          */
 2094                         pv_e = (pv_entry_t) zalloc(pv_list_zone);
 2095                         goto Retry;
 2096                     }
 2097                 }
 2098                 pv_e->va = v;
 2099                 pv_e->pmap = pmap;
 2100                 pv_e->next = pv_h->next;
 2101                 pv_h->next = pv_e;
 2102                 /*
 2103                  *      Remember that we used the pvlist entry.
 2104                  */
 2105                 pv_e = PV_ENTRY_NULL;
 2106             }
 2107             UNLOCK_PVH(pai);
 2108         }
 2109 
 2110         /*
 2111          * Step 3) Enter and count the mapping.
 2112          */
 2113 
 2114         pmap->stats.resident_count++;
 2115 
 2116         /*
 2117          *      Build a template to speed up entering -
 2118          *      only the pfn changes.
 2119          */
 2120         template = pa_to_pte(pa) | INTEL_PTE_VALID;
 2121 
 2122         if(flags & VM_MEM_NOT_CACHEABLE) {
 2123                 if(!(flags & VM_MEM_GUARDED))
 2124                         template |= INTEL_PTE_PTA;
 2125                 template |= INTEL_PTE_NCACHE;
 2126         }
 2127 
 2128         if (pmap != kernel_pmap)
 2129                 template |= INTEL_PTE_USER;
 2130         if (prot & VM_PROT_WRITE)
 2131                 template |= INTEL_PTE_WRITE;
 2132         if (wired) {
 2133                 template |= INTEL_PTE_WIRED;
 2134                 pmap->stats.wired_count++;
 2135         }
 2136         i = ptes_per_vm_page;
 2137         do {
 2138                 WRITE_PTE(pte, template)
 2139                 pte++;
 2140                 pte_increment_pa(template);
 2141         } while (--i > 0);
 2142 Done:
 2143         if (pv_e != PV_ENTRY_NULL) {
 2144             PV_FREE(pv_e);
 2145         }
 2146 
 2147         PMAP_READ_UNLOCK(pmap, spl);
 2148 }
 2149 
 2150 /*
 2151  *      Routine:        pmap_change_wiring
 2152  *      Function:       Change the wiring attribute for a map/virtual-address
 2153  *                      pair.
 2154  *      In/out conditions:
 2155  *                      The mapping must already exist in the pmap.
 2156  */
 2157 void
 2158 pmap_change_wiring(
 2159         register pmap_t map,
 2160         vm_offset_t     v,
 2161         boolean_t       wired)
 2162 {
 2163         register pt_entry_t     *pte;
 2164         register int            i;
 2165         spl_t                   spl;
 2166 
 2167 #if 1
 2168         /*
 2169          *      We must grab the pmap system lock because we may
 2170          *      change a pte_page queue.
 2171          */
 2172         PMAP_READ_LOCK(map, spl);
 2173 
 2174         if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
 2175                 panic("pmap_change_wiring: pte missing");
 2176 
 2177         if (wired && !iswired(*pte)) {
 2178             /*
 2179              *  wiring down mapping
 2180              */
 2181             map->stats.wired_count++;
 2182             i = ptes_per_vm_page;
 2183             do {
 2184                 *pte++ |= INTEL_PTE_WIRED;
 2185             } while (--i > 0);
 2186         }
 2187         else if (!wired && iswired(*pte)) {
 2188             /*
 2189              *  unwiring mapping
 2190              */
 2191             assert(map->stats.wired_count >= 1);
 2192             map->stats.wired_count--;
 2193             i = ptes_per_vm_page;
 2194             do {
 2195                 *pte++ &= ~INTEL_PTE_WIRED;
 2196             } while (--i > 0);
 2197         }
 2198 
 2199         PMAP_READ_UNLOCK(map, spl);
 2200 
 2201 #else
 2202         return;
 2203 #endif
 2204 
 2205 }
 2206 
 2207 ppnum_t 
 2208 pmap_find_phys(pmap_t pmap, addr64_t va)
 2209 {
 2210   pt_entry_t *ptp;
 2211   vm_offset_t a32;
 2212   ppnum_t ppn;
 2213 
 2214   if (value_64bit(va)) panic("pmap_find_phys 64 bit value");
 2215   a32 = (vm_offset_t)low32(va);
 2216   ptp = pmap_pte(pmap, a32);
 2217   if (PT_ENTRY_NULL == ptp)
 2218     return 0;
 2219   ppn = (ppnum_t)i386_btop(pte_to_pa(*ptp));
 2220   return ppn;
 2221 }
 2222 
 2223 /*
 2224  *      Routine:        pmap_extract
 2225  *      Function:
 2226  *              Extract the physical page address associated
 2227  *              with the given map/virtual_address pair.
 2228  */
 2229 
 2230 vm_offset_t
 2231 pmap_extract(
 2232         register pmap_t pmap,
 2233         vm_offset_t     va)
 2234 {
 2235         register pt_entry_t     *pte;
 2236         register vm_offset_t    pa;
 2237         spl_t                   spl;
 2238 
 2239         SPLVM(spl);
 2240         simple_lock(&pmap->lock);
 2241         if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
 2242             pa = (vm_offset_t) 0;
 2243         else if (!(*pte & INTEL_PTE_VALID))
 2244             pa = (vm_offset_t) 0;
 2245         else
 2246             pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
 2247         simple_unlock(&pmap->lock);
 2248         SPLX(spl);
 2249         return(pa);
 2250 }
 2251 
 2252 /*
 2253  *      Routine:        pmap_expand
 2254  *
 2255  *      Expands a pmap to be able to map the specified virtual address.
 2256  *
 2257  *      Allocates new virtual memory for the P0 or P1 portion of the
 2258  *      pmap, then re-maps the physical pages that were in the old
 2259  *      pmap to be in the new pmap.
 2260  *
 2261  *      Must be called with the pmap system and the pmap unlocked,
 2262  *      since these must be unlocked to use vm_allocate or vm_deallocate.
 2263  *      Thus it must be called in a loop that checks whether the map
 2264  *      has been expanded enough.
 2265  *      (We won't loop forever, since page tables aren't shrunk.)
 2266  */
 2267 void
 2268 pmap_expand(
 2269         register pmap_t         map,
 2270         register vm_offset_t    v)
 2271 {
 2272         pt_entry_t              *pdp;
 2273         register vm_page_t      m;
 2274         register vm_offset_t    pa;
 2275         register int            i;
 2276         spl_t                   spl;
 2277         ppnum_t                 pn;
 2278 
 2279         if (map == kernel_pmap)
 2280             panic("pmap_expand");
 2281 
 2282         /*
 2283          *      We cannot allocate the pmap_object in pmap_init,
 2284          *      because it is called before the zone package is up.
 2285          *      Allocate it now if it is missing.
 2286          */
 2287         if (pmap_object == VM_OBJECT_NULL)
 2288             pmap_object = vm_object_allocate(avail_end);
 2289 
 2290         /*
 2291          *      Allocate a VM page for the level 2 page table entries.
 2292          */
 2293         while ((m = vm_page_grab()) == VM_PAGE_NULL)
 2294                 VM_PAGE_WAIT();
 2295 
 2296         /*
 2297          *      Map the page to its physical address so that it
 2298          *      can be found later.
 2299          */
 2300         pn = m->phys_page;
 2301         pa = i386_ptob(pn);
 2302         vm_object_lock(pmap_object);
 2303         vm_page_insert(m, pmap_object, (vm_object_offset_t)pa);
 2304         vm_page_lock_queues();
 2305         vm_page_wire(m);
 2306         inuse_ptepages_count++;
 2307         vm_object_unlock(pmap_object);
 2308         vm_page_unlock_queues();
 2309 
 2310         /*
 2311          *      Zero the page.
 2312          */
 2313         memset((void *)phystokv(pa), 0, PAGE_SIZE);
 2314 
 2315         PMAP_READ_LOCK(map, spl);
 2316         /*
 2317          *      See if someone else expanded us first
 2318          */
 2319         if (pmap_pte(map, v) != PT_ENTRY_NULL) {
 2320                 PMAP_READ_UNLOCK(map, spl);
 2321                 vm_object_lock(pmap_object);
 2322                 vm_page_lock_queues();
 2323                 vm_page_free(m);
 2324                 inuse_ptepages_count--;
 2325                 vm_page_unlock_queues();
 2326                 vm_object_unlock(pmap_object);
 2327                 return;
 2328         }
 2329 
 2330         /*
 2331          *      Set the page directory entry for this page table.
 2332          *      If we have allocated more than one hardware page,
 2333          *      set several page directory entries.
 2334          */
 2335 
 2336         i = ptes_per_vm_page;
 2337         pdp = &map->dirbase[pdenum(map, v) & ~(i-1)];
 2338         do {
 2339             *pdp = pa_to_pte(pa)
 2340                 | INTEL_PTE_VALID
 2341                 | INTEL_PTE_USER
 2342                 | INTEL_PTE_WRITE;
 2343             pdp++;
 2344             pa += INTEL_PGBYTES;
 2345         } while (--i > 0);
 2346 
 2347         PMAP_READ_UNLOCK(map, spl);
 2348         return;
 2349 }
 2350 
 2351 /*
 2352  *      Copy the range specified by src_addr/len
 2353  *      from the source map to the range dst_addr/len
 2354  *      in the destination map.
 2355  *
 2356  *      This routine is only advisory and need not do anything.
 2357  */
 2358 #if     0
 2359 void
 2360 pmap_copy(
 2361         pmap_t          dst_pmap,
 2362         pmap_t          src_pmap,
 2363         vm_offset_t     dst_addr,
 2364         vm_size_t       len,
 2365         vm_offset_t     src_addr)
 2366 {
 2367 #ifdef  lint
 2368         dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
 2369 #endif  /* lint */
 2370 }
 2371 #endif/*        0 */
 2372 
 2373 /*
 2374  * pmap_sync_caches_phys(ppnum_t pa)
 2375  * 
 2376  * Invalidates all of the instruction cache on a physical page and
 2377  * pushes any dirty data from the data cache for the same physical page
 2378  */
 2379  
 2380 void pmap_sync_caches_phys(ppnum_t pa)
 2381 {
 2382 //      if (!(cpuid_features() & CPUID_FEATURE_SS))
 2383         {
 2384                 __asm__ volatile("wbinvd");     
 2385         }
 2386         return;
 2387 }
 2388 
 2389 int     collect_ref;
 2390 int     collect_unref;
 2391 
 2392 /*
 2393  *      Routine:        pmap_collect
 2394  *      Function:
 2395  *              Garbage collects the physical map system for
 2396  *              pages which are no longer used.
 2397  *              Success need not be guaranteed -- that is, there
 2398  *              may well be pages which are not referenced, but
 2399  *              others may be collected.
 2400  *      Usage:
 2401  *              Called by the pageout daemon when pages are scarce.
 2402  */
 2403 void
 2404 pmap_collect(
 2405         pmap_t          p)
 2406 {
 2407         register pt_entry_t     *pdp, *ptp;
 2408         pt_entry_t              *eptp;
 2409         vm_offset_t             pa;
 2410         int                     wired;
 2411         spl_t                   spl;
 2412 
 2413         if (p == PMAP_NULL)
 2414                 return;
 2415 
 2416         if (p == kernel_pmap)
 2417                 return;
 2418 
 2419         /*
 2420          *      Garbage collect map.
 2421          */
 2422         PMAP_READ_LOCK(p, spl);
 2423         PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
 2424 
 2425         for (pdp = p->dirbase;
 2426              pdp < &p->dirbase[pdenum(p, LINEAR_KERNEL_ADDRESS)];
 2427              pdp += ptes_per_vm_page)
 2428         {
 2429             if (*pdp & INTEL_PTE_VALID) 
 2430               if(*pdp & INTEL_PTE_REF) {
 2431                 *pdp &= ~INTEL_PTE_REF;
 2432                 collect_ref++;
 2433               } else {
 2434                 collect_unref++;
 2435                 pa = pte_to_pa(*pdp);
 2436                 ptp = (pt_entry_t *)phystokv(pa);
 2437                 eptp = ptp + NPTES*ptes_per_vm_page;
 2438 
 2439                 /*
 2440                  * If the pte page has any wired mappings, we cannot
 2441                  * free it.
 2442                  */
 2443                 wired = 0;
 2444                 {
 2445                     register pt_entry_t *ptep;
 2446                     for (ptep = ptp; ptep < eptp; ptep++) {
 2447                         if (iswired(*ptep)) {
 2448                             wired = 1;
 2449                             break;
 2450                         }
 2451                     }
 2452                 }
 2453                 if (!wired) {
 2454                     /*
 2455                      * Remove the virtual addresses mapped by this pte page.
 2456                      */
 2457                     pmap_remove_range(p,
 2458                                 pdetova(pdp - p->dirbase),
 2459                                 ptp,
 2460                                 eptp);
 2461 
 2462                     /*
 2463                      * Invalidate the page directory pointer.
 2464                      */
 2465                     {
 2466                         register int i = ptes_per_vm_page;
 2467                         register pt_entry_t *pdep = pdp;
 2468                         do {
 2469                             *pdep++ = 0;
 2470                         } while (--i > 0);
 2471                     }
 2472 
 2473                     PMAP_READ_UNLOCK(p, spl);
 2474 
 2475                     /*
 2476                      * And free the pte page itself.
 2477                      */
 2478                     {
 2479                         register vm_page_t m;
 2480 
 2481                         vm_object_lock(pmap_object);
 2482                         m = vm_page_lookup(pmap_object, pa);
 2483                         if (m == VM_PAGE_NULL)
 2484                             panic("pmap_collect: pte page not in object");
 2485                         vm_page_lock_queues();
 2486                         vm_page_free(m);
 2487                         inuse_ptepages_count--;
 2488                         vm_page_unlock_queues();
 2489                         vm_object_unlock(pmap_object);
 2490                     }
 2491 
 2492                     PMAP_READ_LOCK(p, spl);
 2493                 }
 2494             }
 2495         }
 2496         PMAP_READ_UNLOCK(p, spl);
 2497         return;
 2498 
 2499 }
 2500 
 2501 /*
 2502  *      Routine:        pmap_kernel
 2503  *      Function:
 2504  *              Returns the physical map handle for the kernel.
 2505  */
 2506 #if     0
 2507 pmap_t
 2508 pmap_kernel(void)
 2509 {
 2510         return (kernel_pmap);
 2511 }
 2512 #endif/*        0 */
 2513 
 2514 /*
 2515  *      pmap_zero_page zeros the specified (machine independent) page.
 2516  *      See machine/phys.c or machine/phys.s for implementation.
 2517  */
 2518 #if     0
 2519 void
 2520 pmap_zero_page(
 2521         register vm_offset_t    phys)
 2522 {
 2523         register int    i;
 2524 
 2525         assert(phys != vm_page_fictitious_addr);
 2526         i = PAGE_SIZE / INTEL_PGBYTES;
 2527         phys = intel_pfn(phys);
 2528 
 2529         while (i--)
 2530                 zero_phys(phys++);
 2531 }
 2532 #endif/*        0 */
 2533 
 2534 /*
 2535  *      pmap_copy_page copies the specified (machine independent) page.
 2536  *      See machine/phys.c or machine/phys.s for implementation.
 2537  */
 2538 #if     0
 2539 void
 2540 pmap_copy_page(
 2541         vm_offset_t     src,
 2542         vm_offset_t     dst)
 2543 {
 2544         int     i;
 2545 
 2546         assert(src != vm_page_fictitious_addr);
 2547         assert(dst != vm_page_fictitious_addr);
 2548         i = PAGE_SIZE / INTEL_PGBYTES;
 2549 
 2550         while (i--) {
 2551                 copy_phys(intel_pfn(src), intel_pfn(dst));
 2552                 src += INTEL_PGBYTES;
 2553                 dst += INTEL_PGBYTES;
 2554         }
 2555 }
 2556 #endif/*        0 */
 2557 
 2558 /*
 2559  *      Routine:        pmap_pageable
 2560  *      Function:
 2561  *              Make the specified pages (by pmap, offset)
 2562  *              pageable (or not) as requested.
 2563  *
 2564  *              A page which is not pageable may not take
 2565  *              a fault; therefore, its page table entry
 2566  *              must remain valid for the duration.
 2567  *
 2568  *              This routine is merely advisory; pmap_enter
 2569  *              will specify that these pages are to be wired
 2570  *              down (or not) as appropriate.
 2571  */
 2572 void
 2573 pmap_pageable(
 2574         pmap_t          pmap,
 2575         vm_offset_t     start,
 2576         vm_offset_t     end,
 2577         boolean_t       pageable)
 2578 {
 2579 #ifdef  lint
 2580         pmap++; start++; end++; pageable++;
 2581 #endif  /* lint */
 2582 }
 2583 
 2584 /*
 2585  *      Clear specified attribute bits.
 2586  */
 2587 void
 2588 phys_attribute_clear(
 2589         vm_offset_t     phys,
 2590         int             bits)
 2591 {
 2592         pv_entry_t              pv_h;
 2593         register pv_entry_t     pv_e;
 2594         register pt_entry_t     *pte;
 2595         int                     pai;
 2596         register pmap_t         pmap;
 2597         spl_t                   spl;
 2598 
 2599         assert(phys != vm_page_fictitious_addr);
 2600         if (!valid_page(phys)) {
 2601             /*
 2602              *  Not a managed page.
 2603              */
 2604             return;
 2605         }
 2606 
 2607         /*
 2608          *      Lock the pmap system first, since we will be changing
 2609          *      several pmaps.
 2610          */
 2611 
 2612         PMAP_WRITE_LOCK(spl);
 2613 
 2614         pai = pa_index(phys);
 2615         pv_h = pai_to_pvh(pai);
 2616 
 2617         /*
 2618          * Walk down PV list, clearing all modify or reference bits.
 2619          * We do not have to lock the pv_list because we have
 2620          * the entire pmap system locked.
 2621          */
 2622         if (pv_h->pmap != PMAP_NULL) {
 2623             /*
 2624              * There are some mappings.
 2625              */
 2626             for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
 2627 
 2628                 pmap = pv_e->pmap;
 2629                 /*
 2630                  * Lock the pmap to block pmap_extract and similar routines.
 2631                  */
 2632                 simple_lock(&pmap->lock);
 2633 
 2634                 {
 2635                     register vm_offset_t va;
 2636 
 2637                     va = pv_e->va;
 2638                     pte = pmap_pte(pmap, va);
 2639 
 2640 #if     0
 2641                     /*
 2642                      * Consistency checks.
 2643                      */
 2644                     assert(*pte & INTEL_PTE_VALID);
 2645                     /* assert(pte_to_phys(*pte) == phys); */
 2646 #endif
 2647 
 2648                     /*
 2649                      * Invalidate TLBs for all CPUs using this mapping.
 2650                      */
 2651                     PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
 2652                 }
 2653 
 2654                 /*
 2655                  * Clear modify or reference bits.
 2656                  */
 2657                 {
 2658                     register int        i = ptes_per_vm_page;
 2659                     do {
 2660                         *pte++ &= ~bits;
 2661                     } while (--i > 0);
 2662                 }
 2663                 simple_unlock(&pmap->lock);
 2664             }
 2665         }
 2666 
 2667         pmap_phys_attributes[pai] &= ~bits;
 2668 
 2669         PMAP_WRITE_UNLOCK(spl);
 2670 }
 2671 
 2672 /*
 2673  *      Check specified attribute bits.
 2674  */
 2675 boolean_t
 2676 phys_attribute_test(
 2677         vm_offset_t     phys,
 2678         int             bits)
 2679 {
 2680         pv_entry_t              pv_h;
 2681         register pv_entry_t     pv_e;
 2682         register pt_entry_t     *pte;
 2683         int                     pai;
 2684         register pmap_t         pmap;
 2685         spl_t                   spl;
 2686 
 2687         assert(phys != vm_page_fictitious_addr);
 2688         if (!valid_page(phys)) {
 2689             /*
 2690              *  Not a managed page.
 2691              */
 2692             return (FALSE);
 2693         }
 2694 
 2695         /*
 2696          *      Lock the pmap system first, since we will be checking
 2697          *      several pmaps.
 2698          */
 2699 
 2700         PMAP_WRITE_LOCK(spl);
 2701 
 2702         pai = pa_index(phys);
 2703         pv_h = pai_to_pvh(pai);
 2704 
 2705         if (pmap_phys_attributes[pai] & bits) {
 2706             PMAP_WRITE_UNLOCK(spl);
 2707             return (TRUE);
 2708         }
 2709 
 2710         /*
 2711          * Walk down PV list, checking all mappings.
 2712          * We do not have to lock the pv_list because we have
 2713          * the entire pmap system locked.
 2714          */
 2715         if (pv_h->pmap != PMAP_NULL) {
 2716             /*
 2717              * There are some mappings.
 2718              */
 2719             for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
 2720 
 2721                 pmap = pv_e->pmap;
 2722                 /*
 2723                  * Lock the pmap to block pmap_extract and similar routines.
 2724                  */
 2725                 simple_lock(&pmap->lock);
 2726 
 2727                 {
 2728                     register vm_offset_t va;
 2729 
 2730                     va = pv_e->va;
 2731                     pte = pmap_pte(pmap, va);
 2732 
 2733 #if     0
 2734                     /*
 2735                      * Consistency checks.
 2736                      */
 2737                     assert(*pte & INTEL_PTE_VALID);
 2738                     /* assert(pte_to_phys(*pte) == phys); */
 2739 #endif
 2740                 }
 2741 
 2742                 /*
 2743                  * Check modify or reference bits.
 2744                  */
 2745                 {
 2746                     register int        i = ptes_per_vm_page;
 2747 
 2748                     do {
 2749                         if (*pte++ & bits) {
 2750                             simple_unlock(&pmap->lock);
 2751                             PMAP_WRITE_UNLOCK(spl);
 2752                             return (TRUE);
 2753                         }
 2754                     } while (--i > 0);
 2755                 }
 2756                 simple_unlock(&pmap->lock);
 2757             }
 2758         }
 2759         PMAP_WRITE_UNLOCK(spl);
 2760         return (FALSE);
 2761 }
 2762 
 2763 /*
 2764  *      Set specified attribute bits.
 2765  */
 2766 void
 2767 phys_attribute_set(
 2768         vm_offset_t     phys,
 2769         int             bits)
 2770 {
 2771         int                     spl;
 2772 
 2773         assert(phys != vm_page_fictitious_addr);
 2774         if (!valid_page(phys)) {
 2775             /*
 2776              *  Not a managed page.
 2777              */
 2778             return;
 2779         }
 2780 
 2781         /*
 2782          *      Lock the pmap system and set the requested bits in
 2783          *      the phys attributes array.  Don't need to bother with
 2784          *      ptes because the test routine looks here first.
 2785          */
 2786 
 2787         PMAP_WRITE_LOCK(spl);
 2788         pmap_phys_attributes[pa_index(phys)] |= bits;
 2789         PMAP_WRITE_UNLOCK(spl);
 2790 }
 2791 
 2792 /*
 2793  *      Set the modify bit on the specified physical page.
 2794  */
 2795 
 2796 void pmap_set_modify(
 2797                      ppnum_t pn)
 2798 {
 2799         vm_offset_t phys = (vm_offset_t)i386_ptob(pn);
 2800         phys_attribute_set(phys, PHYS_MODIFIED);
 2801 }
 2802 
 2803 /*
 2804  *      Clear the modify bits on the specified physical page.
 2805  */
 2806 
 2807 void
 2808 pmap_clear_modify(
 2809                   ppnum_t pn)
 2810 {
 2811         vm_offset_t phys = (vm_offset_t)i386_ptob(pn);
 2812         phys_attribute_clear(phys, PHYS_MODIFIED);
 2813 }
 2814 
 2815 /*
 2816  *      pmap_is_modified:
 2817  *
 2818  *      Return whether or not the specified physical page is modified
 2819  *      by any physical maps.
 2820  */
 2821 
 2822 boolean_t
 2823 pmap_is_modified(
 2824                  ppnum_t pn)
 2825 {
 2826         vm_offset_t phys = (vm_offset_t)i386_ptob(pn);
 2827         return (phys_attribute_test(phys, PHYS_MODIFIED));
 2828 }
 2829 
 2830 /*
 2831  *      pmap_clear_reference:
 2832  *
 2833  *      Clear the reference bit on the specified physical page.
 2834  */
 2835 
 2836 void
 2837 pmap_clear_reference(
 2838                      ppnum_t pn)
 2839 {
 2840         vm_offset_t phys = (vm_offset_t)i386_ptob(pn);
 2841         phys_attribute_clear(phys, PHYS_REFERENCED);
 2842 }
 2843 
 2844 /*
 2845  *      pmap_is_referenced:
 2846  *
 2847  *      Return whether or not the specified physical page is referenced
 2848  *      by any physical maps.
 2849  */
 2850 
 2851 boolean_t
 2852 pmap_is_referenced(
 2853                    ppnum_t pn)
 2854 {
 2855         vm_offset_t phys = (vm_offset_t)i386_ptob(pn);
 2856         return (phys_attribute_test(phys, PHYS_REFERENCED));
 2857 }
 2858 
 2859 /*
 2860  *      Set the modify bit on the specified range
 2861  *      of this map as requested.
 2862  *
 2863  *      This optimization stands only if each time the dirty bit
 2864  *      in vm_page_t is tested, it is also tested in the pmap.
 2865  */
 2866 void
 2867 pmap_modify_pages(
 2868         pmap_t          map,
 2869         vm_offset_t     s,
 2870         vm_offset_t     e)
 2871 {
 2872         spl_t                   spl;
 2873         register pt_entry_t     *pde;
 2874         register pt_entry_t     *spte, *epte;
 2875         vm_offset_t             l;
 2876 
 2877         if (map == PMAP_NULL)
 2878                 return;
 2879 
 2880         PMAP_READ_LOCK(map, spl);
 2881 
 2882         /*
 2883          *      Invalidate the translation buffer first
 2884          */
 2885         PMAP_UPDATE_TLBS(map, s, e);
 2886 
 2887         pde = pmap_pde(map, s);
 2888         while (s && s < e) {
 2889             l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
 2890             if (l > e)
 2891                 l = e;
 2892             if (*pde & INTEL_PTE_VALID) {
 2893                 spte = (pt_entry_t *)ptetokv(*pde);
 2894                 if (l) {
 2895                    spte = &spte[ptenum(s)];
 2896                    epte = &spte[intel_btop(l-s)];
 2897                 } else {
 2898                    epte = &spte[intel_btop(PDE_MAPPED_SIZE)];
 2899                    spte = &spte[ptenum(s)];
 2900                 }
 2901                 while (spte < epte) {
 2902                     if (*spte & INTEL_PTE_VALID) {
 2903                         *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE);
 2904                     }
 2905                     spte++;
 2906                 }
 2907             }
 2908             s = l;
 2909             pde++;
 2910         }
 2911         PMAP_READ_UNLOCK(map, spl);
 2912 }
 2913 
 2914 
 2915 void 
 2916 invalidate_icache(vm_offset_t addr, unsigned cnt, int phys)
 2917 {
 2918         return;
 2919 }
 2920 void 
 2921 flush_dcache(vm_offset_t addr, unsigned count, int phys)
 2922 {
 2923         return;
 2924 }
 2925 
 2926 #if     NCPUS > 1
 2927 /*
 2928 *           TLB Coherence Code (TLB "shootdown" code)
 2929 * 
 2930 * Threads that belong to the same task share the same address space and
 2931 * hence share a pmap.  However, they  may run on distinct cpus and thus
 2932 * have distinct TLBs that cache page table entries. In order to guarantee
 2933 * the TLBs are consistent, whenever a pmap is changed, all threads that
 2934 * are active in that pmap must have their TLB updated. To keep track of
 2935 * this information, the set of cpus that are currently using a pmap is
 2936 * maintained within each pmap structure (cpus_using). Pmap_activate() and
 2937 * pmap_deactivate add and remove, respectively, a cpu from this set.
 2938 * Since the TLBs are not addressable over the bus, each processor must
 2939 * flush its own TLB; a processor that needs to invalidate another TLB
 2940 * needs to interrupt the processor that owns that TLB to signal the
 2941 * update.
 2942 * 
 2943 * Whenever a pmap is updated, the lock on that pmap is locked, and all
 2944 * cpus using the pmap are signaled to invalidate. All threads that need
 2945 * to activate a pmap must wait for the lock to clear to await any updates
 2946 * in progress before using the pmap. They must ACQUIRE the lock to add
 2947 * their cpu to the cpus_using set. An implicit assumption made
 2948 * throughout the TLB code is that all kernel code that runs at or higher
 2949 * than splvm blocks out update interrupts, and that such code does not
 2950 * touch pageable pages.
 2951 * 
 2952 * A shootdown interrupt serves another function besides signaling a
 2953 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
 2954 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
 2955 * preventing user code from making implicit pmap updates while the
 2956 * sending processor is performing its update. (This could happen via a
 2957 * user data write reference that turns on the modify bit in the page
 2958 * table). It must wait for any kernel updates that may have started
 2959 * concurrently with a user pmap update because the IPC code
 2960 * changes mappings.
 2961 * Spinning on the VALUES of the locks is sufficient (rather than
 2962 * having to acquire the locks) because any updates that occur subsequent
 2963 * to finding the lock unlocked will be signaled via another interrupt.
 2964 * (This assumes the interrupt is cleared before the low level interrupt code 
 2965 * calls pmap_update_interrupt()). 
 2966 * 
 2967 * The signaling processor must wait for any implicit updates in progress
 2968 * to terminate before continuing with its update. Thus it must wait for an
 2969 * acknowledgement of the interrupt from each processor for which such
 2970 * references could be made. For maintaining this information, a set
 2971 * cpus_active is used. A cpu is in this set if and only if it can 
 2972 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
 2973 * this set; when all such cpus are removed, it is safe to update.
 2974 * 
 2975 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
 2976 * be at least at the priority of the interprocessor interrupt
 2977 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
 2978 * kernel update; it would spin forever in pmap_update_interrupt() trying
 2979 * to acquire the user pmap lock it had already acquired. Furthermore A
 2980 * must remove itself from cpus_active.  Otherwise, another cpu holding
 2981 * the lock (B) could be in the process of sending an update signal to A,
 2982 * and thus be waiting for A to remove itself from cpus_active. If A is
 2983 * spinning on the lock at priority this will never happen and a deadlock
 2984 * will result.
 2985 */
 2986 
 2987 /*
 2988  *      Signal another CPU that it must flush its TLB
 2989  */
 2990 void
 2991 signal_cpus(
 2992         cpu_set         use_list,
 2993         pmap_t          pmap,
 2994         vm_offset_t     start,
 2995         vm_offset_t     end)
 2996 {
 2997         register int            which_cpu, j;
 2998         register pmap_update_list_t     update_list_p;
 2999 
 3000         while ((which_cpu = ffs((unsigned long)use_list)) != 0) {
 3001             which_cpu -= 1;     /* convert to 0 origin */
 3002 
 3003             update_list_p = &cpu_update_list[which_cpu];
 3004             simple_lock(&update_list_p->lock);
 3005 
 3006             j = update_list_p->count;
 3007             if (j >= UPDATE_LIST_SIZE) {
 3008                 /*
 3009                  *      list overflowed.  Change last item to
 3010                  *      indicate overflow.
 3011                  */
 3012                 update_list_p->item[UPDATE_LIST_SIZE-1].pmap  = kernel_pmap;
 3013                 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
 3014                 update_list_p->item[UPDATE_LIST_SIZE-1].end   = VM_MAX_KERNEL_ADDRESS;
 3015             }
 3016             else {
 3017                 update_list_p->item[j].pmap  = pmap;
 3018                 update_list_p->item[j].start = start;
 3019                 update_list_p->item[j].end   = end;
 3020                 update_list_p->count = j+1;
 3021             }
 3022             cpu_update_needed[which_cpu] = TRUE;
 3023             simple_unlock(&update_list_p->lock);
 3024 
 3025             /* if its the kernel pmap, ignore cpus_idle */
 3026             if (((cpus_idle & (1 << which_cpu)) == 0) ||
 3027                 (pmap == kernel_pmap) || real_pmap[which_cpu] == pmap)
 3028               {
 3029                 i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC);
 3030               }
 3031             use_list &= ~(1 << which_cpu);
 3032         }
 3033 }
 3034 
 3035 void
 3036 process_pmap_updates(
 3037         register pmap_t         my_pmap)
 3038 {
 3039         register int            my_cpu;
 3040         register pmap_update_list_t     update_list_p;
 3041         register int            j;
 3042         register pmap_t         pmap;
 3043 
 3044         mp_disable_preemption();
 3045         my_cpu = cpu_number();
 3046         update_list_p = &cpu_update_list[my_cpu];
 3047         simple_lock(&update_list_p->lock);
 3048 
 3049         for (j = 0; j < update_list_p->count; j++) {
 3050             pmap = update_list_p->item[j].pmap;
 3051             if (pmap == my_pmap ||
 3052                 pmap == kernel_pmap) {
 3053 
 3054                 if (pmap->ref_count <= 0) {
 3055                         PMAP_CPU_CLR(pmap, my_cpu);
 3056                         real_pmap[my_cpu] = kernel_pmap;
 3057                         set_cr3(kernel_pmap->pdirbase);
 3058                 } else
 3059                         INVALIDATE_TLB(pmap,
 3060                                        update_list_p->item[j].start,
 3061                                        update_list_p->item[j].end);
 3062             }
 3063         }       
 3064         update_list_p->count = 0;
 3065         cpu_update_needed[my_cpu] = FALSE;
 3066         simple_unlock(&update_list_p->lock);
 3067         mp_enable_preemption();
 3068 }
 3069 
 3070 /*
 3071  *      Interrupt routine for TBIA requested from other processor.
 3072  *      This routine can also be called at all interrupts time if
 3073  *      the cpu was idle. Some driver interrupt routines might access
 3074  *      newly allocated vm. (This is the case for hd)
 3075  */
 3076 void
 3077 pmap_update_interrupt(void)
 3078 {
 3079         register int            my_cpu;
 3080         spl_t                   s;
 3081         register pmap_t         my_pmap;
 3082 
 3083         mp_disable_preemption();
 3084         my_cpu = cpu_number();
 3085 
 3086         /*
 3087          *      Raise spl to splvm (above splip) to block out pmap_extract
 3088          *      from IO code (which would put this cpu back in the active
 3089          *      set).
 3090          */
 3091         s = splhigh();
 3092         
 3093         my_pmap = real_pmap[my_cpu];
 3094 
 3095         if (!(my_pmap && pmap_in_use(my_pmap, my_cpu)))
 3096                 my_pmap = kernel_pmap;
 3097 
 3098         do {
 3099             LOOP_VAR;
 3100 
 3101             /*
 3102              *  Indicate that we're not using either user or kernel
 3103              *  pmap.
 3104              */
 3105             i_bit_clear(my_cpu, &cpus_active);
 3106 
 3107             /*
 3108              *  Wait for any pmap updates in progress, on either user
 3109              *  or kernel pmap.
 3110              */
 3111             while (*(volatile hw_lock_t)&my_pmap->lock.interlock ||
 3112                    *(volatile hw_lock_t)&kernel_pmap->lock.interlock) {
 3113                 LOOP_CHECK("pmap_update_interrupt", my_pmap);
 3114                 cpu_pause();
 3115             }
 3116 
 3117             process_pmap_updates(my_pmap);
 3118 
 3119             i_bit_set(my_cpu, &cpus_active);
 3120 
 3121         } while (cpu_update_needed[my_cpu]);
 3122         
 3123         splx(s);
 3124         mp_enable_preemption();
 3125 }
 3126 #endif  /* NCPUS > 1 */
 3127 
 3128 #if     MACH_KDB
 3129 
 3130 /* show phys page mappings and attributes */
 3131 
 3132 extern void     db_show_page(vm_offset_t pa);
 3133 
 3134 void
 3135 db_show_page(vm_offset_t pa)
 3136 {
 3137         pv_entry_t      pv_h;
 3138         int             pai;
 3139         char            attr;
 3140         
 3141         pai = pa_index(pa);
 3142         pv_h = pai_to_pvh(pai);
 3143 
 3144         attr = pmap_phys_attributes[pai];
 3145         printf("phys page %x ", pa);
 3146         if (attr & PHYS_MODIFIED)
 3147                 printf("modified, ");
 3148         if (attr & PHYS_REFERENCED)
 3149                 printf("referenced, ");
 3150         if (pv_h->pmap || pv_h->next)
 3151                 printf(" mapped at\n");
 3152         else
 3153                 printf(" not mapped\n");
 3154         for (; pv_h; pv_h = pv_h->next)
 3155                 if (pv_h->pmap)
 3156                         printf("%x in pmap %x\n", pv_h->va, pv_h->pmap);
 3157 }
 3158 
 3159 #endif /* MACH_KDB */
 3160 
 3161 #if     MACH_KDB
 3162 void db_kvtophys(vm_offset_t);
 3163 void db_show_vaddrs(pt_entry_t  *);
 3164 
 3165 /*
 3166  *      print out the results of kvtophys(arg)
 3167  */
 3168 void
 3169 db_kvtophys(
 3170         vm_offset_t     vaddr)
 3171 {
 3172         db_printf("0x%x", kvtophys(vaddr));
 3173 }
 3174 
 3175 /*
 3176  *      Walk the pages tables.
 3177  */
 3178 void
 3179 db_show_vaddrs(
 3180         pt_entry_t      *dirbase)
 3181 {
 3182         pt_entry_t      *ptep, *pdep, tmp;
 3183         int             x, y, pdecnt, ptecnt;
 3184 
 3185         if (dirbase == 0) {
 3186                 dirbase = kernel_pmap->dirbase;
 3187         }
 3188         if (dirbase == 0) {
 3189                 db_printf("need a dirbase...\n");
 3190                 return;
 3191         }
 3192         dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK);
 3193 
 3194         db_printf("dirbase: 0x%x\n", dirbase);
 3195 
 3196         pdecnt = ptecnt = 0;
 3197         pdep = &dirbase[0];
 3198         for (y = 0; y < NPDES; y++, pdep++) {
 3199                 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
 3200                         continue;
 3201                 }
 3202                 pdecnt++;
 3203                 ptep = (pt_entry_t *) ((*pdep) & ~INTEL_OFFMASK);
 3204                 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
 3205                 for (x = 0; x < NPTES; x++, ptep++) {
 3206                         if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
 3207                                 continue;
 3208                         }
 3209                         ptecnt++;
 3210                         db_printf("   tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
 3211                                 x,
 3212                                 *ptep,
 3213                                 (y << 22) | (x << 12),
 3214                                 *ptep & ~INTEL_OFFMASK);
 3215                 }
 3216         }
 3217 
 3218         db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
 3219 
 3220 }
 3221 #endif  /* MACH_KDB */
 3222 
 3223 #include <mach_vm_debug.h>
 3224 #if     MACH_VM_DEBUG
 3225 #include <vm/vm_debug.h>
 3226 
 3227 int
 3228 pmap_list_resident_pages(
 3229         register pmap_t         pmap,
 3230         register vm_offset_t    *listp,
 3231         register int            space)
 3232 {
 3233         return 0;
 3234 }
 3235 #endif  /* MACH_VM_DEBUG */
 3236 
 3237 #ifdef MACH_BSD
 3238 /*
 3239  * pmap_pagemove
 3240  *
 3241  * BSD support routine to reassign virtual addresses.
 3242  */
 3243 
 3244 void
 3245 pmap_movepage(unsigned long from, unsigned long to, vm_size_t size)
 3246 {
 3247         spl_t   spl;
 3248         pt_entry_t      *pte, saved_pte;
 3249 
 3250         /* Lock the kernel map */
 3251         PMAP_READ_LOCK(kernel_pmap, spl);
 3252 
 3253 
 3254         while (size > 0) {
 3255                 pte = pmap_pte(kernel_pmap, from);
 3256                 if (pte == NULL)
 3257                         panic("pmap_pagemove from pte NULL");
 3258                 saved_pte = *pte;
 3259                 PMAP_READ_UNLOCK(kernel_pmap, spl);
 3260 
 3261                 pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)),
 3262                         VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED);
 3263 
 3264                 pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE));
 3265 
 3266                 PMAP_READ_LOCK(kernel_pmap, spl);
 3267                 pte = pmap_pte(kernel_pmap, to);
 3268                 if (pte == NULL)
 3269                         panic("pmap_pagemove 'to' pte NULL");
 3270 
 3271                 *pte = saved_pte;
 3272 
 3273                 from += PAGE_SIZE;
 3274                 to += PAGE_SIZE;
 3275                 size -= PAGE_SIZE;
 3276         }
 3277 
 3278         /* Get the processors to update the TLBs */
 3279         PMAP_UPDATE_TLBS(kernel_pmap, from, from+size);
 3280         PMAP_UPDATE_TLBS(kernel_pmap, to, to+size);
 3281 
 3282         PMAP_READ_UNLOCK(kernel_pmap, spl);
 3283 
 3284 }
 3285 
 3286 kern_return_t bmapvideo(vm_offset_t *info);
 3287 kern_return_t bmapvideo(vm_offset_t *info) {
 3288 
 3289         extern struct vc_info vinfo;
 3290 #ifdef NOTIMPLEMENTED
 3291         (void)copyout((char *)&vinfo, (char *)info, sizeof(struct vc_info));    /* Copy out the video info */
 3292 #endif
 3293         return KERN_SUCCESS;
 3294 }
 3295 
 3296 kern_return_t bmapmap(vm_offset_t va, vm_offset_t pa, vm_size_t size, vm_prot_t prot, int attr);
 3297 kern_return_t bmapmap(vm_offset_t va, vm_offset_t pa, vm_size_t size, vm_prot_t prot, int attr) {
 3298         
 3299 #ifdef NOTIMPLEMENTED
 3300         pmap_map_block(current_act()->task->map->pmap, va, pa, size, prot, attr);       /* Map it in */
 3301 #endif
 3302         return KERN_SUCCESS;
 3303 }
 3304 
 3305 kern_return_t bmapmapr(vm_offset_t va);
 3306 kern_return_t bmapmapr(vm_offset_t va) {
 3307         
 3308 #ifdef NOTIMPLEMENTED
 3309         mapping_remove(current_act()->task->map->pmap, va);     /* Remove map */
 3310 #endif
 3311         return KERN_SUCCESS;
 3312 }
 3313 #endif
 3314 
 3315 /* temporary workaround */
 3316 boolean_t
 3317 coredumpok(vm_map_t map, vm_offset_t va)
 3318 {
 3319   pt_entry_t *ptep;
 3320   ptep = pmap_pte(map->pmap, va);
 3321   if (0 == ptep) return FALSE;
 3322   return ((*ptep & (INTEL_PTE_NCACHE|INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE|INTEL_PTE_WIRED));
 3323 }

Cache object: 51b841bf7c9e74618bd6b8496454de6e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.