The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/osfmk/i386/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /*
   29  * @OSF_COPYRIGHT@
   30  */
   31 /*
   32  * Mach Operating System
   33  * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
   34  * All Rights Reserved.
   35  * 
   36  * Permission to use, copy, modify and distribute this software and its
   37  * documentation is hereby granted, provided that both the copyright
   38  * notice and this permission notice appear in all copies of the
   39  * software, derivative works or modified versions, and any portions
   40  * thereof, and that both notices appear in supporting documentation.
   41  * 
   42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
   44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   45  * 
   46  * Carnegie Mellon requests users of this software to return to
   47  * 
   48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   49  *  School of Computer Science
   50  *  Carnegie Mellon University
   51  *  Pittsburgh PA 15213-3890
   52  * 
   53  * any improvements or extensions that they make and grant Carnegie Mellon
   54  * the rights to redistribute these changes.
   55  */
   56 /*
   57  */
   58 
   59 /*
   60  *      File:   pmap.c
   61  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
   62  *      (These guys wrote the Vax version)
   63  *
   64  *      Physical Map management code for Intel i386, i486, and i860.
   65  *
   66  *      Manages physical address maps.
   67  *
   68  *      In addition to hardware address maps, this
   69  *      module is called upon to provide software-use-only
   70  *      maps which may or may not be stored in the same
   71  *      form as hardware maps.  These pseudo-maps are
   72  *      used to store intermediate results from copy
   73  *      operations to and from address spaces.
   74  *
   75  *      Since the information managed by this module is
   76  *      also stored by the logical address mapping module,
   77  *      this module may throw away valid virtual-to-physical
   78  *      mappings at almost any time.  However, invalidations
   79  *      of virtual-to-physical mappings must be done as
   80  *      requested.
   81  *
   82  *      In order to cope with hardware architectures which
   83  *      make virtual-to-physical map invalidates expensive,
   84  *      this module may delay invalidate or reduced protection
   85  *      operations until such time as they are actually
   86  *      necessary.  This module is given full information as
   87  *      to which processors are currently using which maps,
   88  *      and to when physical maps must be made correct.
   89  */
   90 
   91 #include <string.h>
   92 #include <norma_vm.h>
   93 #include <mach_kdb.h>
   94 #include <mach_ldebug.h>
   95 
   96 #include <libkern/OSAtomic.h>
   97 
   98 #include <mach/machine/vm_types.h>
   99 
  100 #include <mach/boolean.h>
  101 #include <kern/thread.h>
  102 #include <kern/zalloc.h>
  103 #include <kern/queue.h>
  104 
  105 #include <kern/lock.h>
  106 #include <kern/kalloc.h>
  107 #include <kern/spl.h>
  108 
  109 #include <vm/pmap.h>
  110 #include <vm/vm_map.h>
  111 #include <vm/vm_kern.h>
  112 #include <mach/vm_param.h>
  113 #include <mach/vm_prot.h>
  114 #include <vm/vm_object.h>
  115 #include <vm/vm_page.h>
  116 
  117 #include <mach/machine/vm_param.h>
  118 #include <machine/thread.h>
  119 
  120 #include <kern/misc_protos.h>                   /* prototyping */
  121 #include <i386/misc_protos.h>
  122 
  123 #include <i386/cpuid.h>
  124 #include <i386/cpu_data.h>
  125 #include <i386/cpu_number.h>
  126 #include <i386/machine_cpu.h>
  127 #include <i386/seg.h>
  128 #include <i386/serial_io.h>
  129 #include <i386/cpu_capabilities.h>
  130 #include <i386/machine_routines.h>
  131 #include <i386/proc_reg.h>
  132 #include <i386/tsc.h>
  133 #include <i386/acpi.h>
  134 #include <i386/pmap_internal.h>
  135 
  136 #if     MACH_KDB
  137 #include <ddb/db_command.h>
  138 #include <ddb/db_output.h>
  139 #include <ddb/db_sym.h>
  140 #include <ddb/db_print.h>
  141 #endif  /* MACH_KDB */
  142 
  143 #include <vm/vm_protos.h>
  144 
  145 #include <i386/mp.h>
  146 #include <i386/mp_desc.h>
  147 #include <i386/i386_lowmem.h>
  148 
  149 
  150 /* #define DEBUGINTERRUPTS 1  uncomment to ensure pmap callers have interrupts enabled */
  151 #ifdef DEBUGINTERRUPTS
  152 #define pmap_intr_assert() {if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);}
  153 #else
  154 #define pmap_intr_assert()
  155 #endif
  156 
  157 #ifdef IWANTTODEBUG
  158 #undef  DEBUG
  159 #define DEBUG 1
  160 #define POSTCODE_DELAY 1
  161 #include <i386/postcode.h>
  162 #endif /* IWANTTODEBUG */
  163 
  164 /*
  165  * Forward declarations for internal functions.
  166  */
  167 
  168 void            pmap_remove_range(
  169                         pmap_t          pmap,
  170                         vm_map_offset_t va,
  171                         pt_entry_t      *spte,
  172                         pt_entry_t      *epte);
  173 
  174 void            phys_attribute_clear(
  175                         ppnum_t         phys,
  176                         int             bits);
  177 
  178 int             phys_attribute_test(
  179                         ppnum_t         phys,
  180                         int             bits);
  181 
  182 void            phys_attribute_set(
  183                         ppnum_t         phys,
  184                         int             bits);
  185 
  186 void            pmap_set_reference(
  187                         ppnum_t pn);
  188 
  189 boolean_t       phys_page_exists(
  190                         ppnum_t pn);
  191 
  192 
  193 #ifdef PMAP_DEBUG
  194 void dump_pmap(pmap_t);
  195 void dump_4GB_pdpt(pmap_t p);
  196 void dump_4GB_pdpt_thread(thread_t tp);
  197 #endif
  198 
  199 int nx_enabled = 1;                     /* enable no-execute protection */
  200 #ifdef CONFIG_EMBEDDED
  201 int allow_data_exec  = 0;       /* no exec from data, embedded is hardcore like that */
  202 #else
  203 int allow_data_exec  = VM_ABI_32;       /* 32-bit apps may execute data by default, 64-bit apps may not */
  204 #endif
  205 int allow_stack_exec = 0;               /* No apps may execute from the stack by default */
  206 
  207 boolean_t cpu_64bit  = FALSE;
  208 boolean_t pmap_trace = FALSE;
  209 
  210 /*
  211  * when spinning through pmap_remove
  212  * ensure that we don't spend too much
  213  * time with preemption disabled.
  214  * I'm setting the current threshold
  215  * to 20us
  216  */
  217 #define MAX_PREEMPTION_LATENCY_NS 20000
  218 
  219 uint64_t max_preemption_latency_tsc = 0;
  220 
  221 
  222 /*
  223  *      Private data structures.
  224  */
  225 
  226 /*
  227  *      For each vm_page_t, there is a list of all currently
  228  *      valid virtual mappings of that page.  An entry is
  229  *      a pv_rooted_entry_t; the list is the pv_table.
  230  *
  231  *      N.B.  with the new combo rooted/hashed scheme it is
  232  *      only possibly to remove individual non-rooted entries
  233  *      if they are found via the hashed chains as there is no
  234  *      way to unlink the singly linked hashed entries if navigated to
  235  *      via the queue list off the rooted entries.  Think of it as
  236  *      hash/walk/pull, keeping track of the prev pointer while walking
  237  *      the singly linked hash list.  All of this is to save memory and
  238  *      keep both types of pv_entries as small as possible.
  239  */
  240 
  241 /*
  242 
  243 PV HASHING Changes - JK 1/2007
  244 
  245 Pve's establish physical to virtual mappings.  These are used for aliasing of a 
  246 physical page to (potentially many) virtual addresses within pmaps. In the previous 
  247 implementation the structure of the pv_entries (each 16 bytes in size) was
  248 
  249 typedef struct pv_entry {
  250     struct pv_entry_t    next;
  251     pmap_t                    pmap;
  252     vm_map_offset_t   va;
  253 } *pv_entry_t;
  254 
  255 An initial array of these is created at boot time, one per physical page of memory, 
  256 indexed by the physical page number. Additionally, a pool of entries is created from a 
  257 pv_zone to be used as needed by pmap_enter() when it is creating new mappings.  
  258 Originally, we kept this pool around because the code in pmap_enter() was unable to 
  259 block if it needed an entry and none were available - we'd panic.  Some time ago I 
  260 restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing 
  261 a pv structure and restart, removing a panic from the code (in the case of the kernel 
  262 pmap we cannot block and still panic, so, we keep a separate hot pool for use only on 
  263 kernel pmaps).  The pool has not been removed since there is a large performance gain 
  264 keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need.
  265 
  266 As pmap_enter() created new mappings it linked the new pve's for them off the fixed 
  267 pv array for that ppn (off the next pointer).  These pve's are accessed for several 
  268 operations, one of them being address space teardown.  In that case, we basically do this
  269 
  270         for (every page/pte in the space) {
  271                 calc pve_ptr from the ppn in the pte
  272                 for (every pv in the list for the ppn) {
  273                         if (this pv is for this pmap/vaddr) {
  274                                 do housekeeping
  275                                 unlink/free the pv
  276                         }
  277                 }
  278         }
  279 
  280 The problem arose when we were running, say 8000 (or even 2000) apache or other processes 
  281 and one or all terminate. The list hanging off each pv array entry could have thousands of 
  282 entries.  We were continuously linearly searching each of these lists as we stepped through 
  283 the address space we were tearing down.  Because of the locks we hold, likely taking a cache 
  284 miss for each node,  and interrupt disabling for MP issues the system became completely 
  285 unresponsive for many seconds while we did this.
  286 
  287 Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn 
  288 for operations like pmap_page_protect and finding and modifying/removing a single pve as 
  289 part of pmap_enter processing) has led to modifying the pve structures and databases.
  290 
  291 There are now two types of pve structures.  A "rooted" structure which is basically the 
  292 original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a 
  293 hash list via a hash of [pmap, vaddr].  These have been designed with the two goals of 
  294 minimizing wired memory and making the lookup of a ppn faster.  Since a vast majority of 
  295 pages in the system are not aliased and hence represented by a single pv entry I've kept 
  296 the rooted entry size as small as possible because there is one of these dedicated for 
  297 every physical page of memory.  The hashed pve's are larger due to the addition of the hash 
  298 link and the ppn entry needed for matching while running the hash list to find the entry we 
  299 are looking for.  This way, only systems that have lots of aliasing (like 2000+ httpd procs) 
  300 will pay the extra memory price. Both structures have the same first three fields allowing 
  301 some simplification in the code.
  302 
  303 They have these shapes
  304 
  305 typedef struct pv_rooted_entry {
  306         queue_head_t qlink;
  307         vm_map_offset_t va;
  308         pmap_t          pmap;
  309 } *pv_rooted_entry_t;
  310 
  311 
  312 typedef struct pv_hashed_entry {
  313   queue_head_t qlink;
  314   vm_map_offset_t va;
  315   pmap_t        pmap;
  316   ppnum_t ppn;
  317   struct pv_hashed_entry *nexth;
  318 } *pv_hashed_entry_t;
  319 
  320 The main flow difference is that the code is now aware of the rooted entry and the hashed 
  321 entries.  Code that runs the pv list still starts with the rooted entry and then continues 
  322 down the qlink onto the hashed entries.  Code that is looking up a specific pv entry first 
  323 checks the rooted entry and then hashes and runs the hash list for the match. The hash list 
  324 lengths are much smaller than the original pv lists that contained all aliases for the specific ppn.
  325 
  326 */
  327 
  328 typedef struct pv_rooted_entry {     /* first three entries must match pv_hashed_entry_t */
  329         queue_head_t qlink;
  330         vm_map_offset_t va;             /* virtual address for mapping */
  331         pmap_t          pmap;           /* pmap where mapping lies */
  332 } *pv_rooted_entry_t;
  333 
  334 #define PV_ROOTED_ENTRY_NULL    ((pv_rooted_entry_t) 0)
  335 
  336 pv_rooted_entry_t       pv_head_table;          /* array of entries, one per page */
  337 
  338 typedef struct pv_hashed_entry {     /* first three entries must match pv_rooted_entry_t */
  339   queue_head_t qlink;
  340   vm_map_offset_t va;
  341   pmap_t        pmap;
  342   ppnum_t ppn;
  343   struct pv_hashed_entry *nexth;
  344 } *pv_hashed_entry_t;
  345 
  346 #define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
  347 
  348 #define NPVHASH 4095   /* MUST BE 2^N - 1 */
  349 pv_hashed_entry_t     *pv_hash_table;  /* hash lists */
  350 
  351 uint32_t npvhash = 0;
  352 
  353 /* #define PV_DEBUG 1   uncomment to enable some PV debugging code */
  354 #ifdef PV_DEBUG
  355 #define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
  356 #else
  357 #define CHK_NPVHASH()
  358 #endif
  359 
  360 /*
  361  *      pv_list entries are kept on a list that can only be accessed
  362  *      with the pmap system locked (at SPLVM, not in the cpus_active set).
  363  *      The list is refilled from the pv_hashed_list_zone if it becomes empty.
  364  */
  365 pv_rooted_entry_t       pv_free_list = PV_ROOTED_ENTRY_NULL;            /* free list at SPLVM */
  366 pv_hashed_entry_t       pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
  367 pv_hashed_entry_t      pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
  368 decl_simple_lock_data(,pv_hashed_free_list_lock)
  369 decl_simple_lock_data(,pv_hashed_kern_free_list_lock)
  370 decl_simple_lock_data(,pv_hash_table_lock)
  371 
  372 int pv_free_count = 0;
  373 int pv_hashed_free_count = 0;
  374 int pv_kern_free_count = 0;
  375 int pv_hashed_kern_free_count = 0;
  376 #define PV_HASHED_LOW_WATER_MARK 5000
  377 #define PV_HASHED_KERN_LOW_WATER_MARK 100
  378 #define PV_HASHED_ALLOC_CHUNK 2000
  379 #define PV_HASHED_KERN_ALLOC_CHUNK 50
  380 thread_call_t  mapping_adjust_call;
  381 static thread_call_data_t  mapping_adjust_call_data;
  382 uint32_t mappingrecurse = 0;
  383 
  384 #define PV_HASHED_ALLOC(pvh_e) { \
  385         simple_lock(&pv_hashed_free_list_lock); \
  386         if ((pvh_e = pv_hashed_free_list) != 0) { \
  387           pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;   \
  388             pv_hashed_free_count--; \
  389             if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
  390               if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
  391                 thread_call_enter(mapping_adjust_call); \
  392         } \
  393         simple_unlock(&pv_hashed_free_list_lock); \
  394 }
  395 
  396 #define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {   \
  397         simple_lock(&pv_hashed_free_list_lock); \
  398         pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list;        \
  399         pv_hashed_free_list = pvh_eh; \
  400         pv_hashed_free_count += pv_cnt; \
  401         simple_unlock(&pv_hashed_free_list_lock); \
  402 }
  403 
  404 #define PV_HASHED_KERN_ALLOC(pvh_e) { \
  405         simple_lock(&pv_hashed_kern_free_list_lock); \
  406         if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
  407           pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next;      \
  408             pv_hashed_kern_free_count--; \
  409             if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \
  410               if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
  411                 thread_call_enter(mapping_adjust_call); \
  412         } \
  413         simple_unlock(&pv_hashed_kern_free_list_lock); \
  414 }
  415 
  416 #define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) {       \
  417         simple_lock(&pv_hashed_kern_free_list_lock); \
  418         pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list;   \
  419         pv_hashed_kern_free_list = pvh_eh; \
  420         pv_hashed_kern_free_count += pv_cnt; \
  421         simple_unlock(&pv_hashed_kern_free_list_lock); \
  422 }
  423 
  424 zone_t          pv_hashed_list_zone;    /* zone of pv_hashed_entry structures */
  425 
  426 static zone_t pdpt_zone;
  427 
  428 /*
  429  *      Each entry in the pv_head_table is locked by a bit in the
  430  *      pv_lock_table.  The lock bits are accessed by the physical
  431  *      address of the page they lock.
  432  */
  433 
  434 char    *pv_lock_table;         /* pointer to array of bits */
  435 #define pv_lock_table_size(n)   (((n)+BYTE_SIZE-1)/BYTE_SIZE)
  436 
  437 char    *pv_hash_lock_table;
  438 #define pv_hash_lock_table_size(n)  (((n)+BYTE_SIZE-1)/BYTE_SIZE)
  439 
  440 /*
  441  *      First and last physical addresses that we maintain any information
  442  *      for.  Initialized to zero so that pmap operations done before
  443  *      pmap_init won't touch any non-existent structures.
  444  */
  445 boolean_t       pmap_initialized = FALSE;/* Has pmap_init completed? */
  446 
  447 static struct vm_object kptobj_object_store;
  448 static vm_object_t kptobj;
  449 
  450 /*
  451  *      Index into pv_head table, its lock bits, and the modify/reference and managed bits
  452  */
  453 
  454 #define pa_index(pa)    (i386_btop(pa))
  455 #define ppn_to_pai(ppn) ((int)ppn)
  456 
  457 #define pai_to_pvh(pai)         (&pv_head_table[pai])
  458 #define lock_pvh_pai(pai)       bit_lock(pai, (void *)pv_lock_table)
  459 #define unlock_pvh_pai(pai)     bit_unlock(pai, (void *)pv_lock_table)
  460 
  461 #define pvhashidx(pmap, va) (((uint32_t)pmap ^ ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash)
  462 #define pvhash(idx)         (&pv_hash_table[idx])
  463 
  464 #define lock_hash_hash(hash)            bit_lock(hash, (void *)pv_hash_lock_table)
  465 #define unlock_hash_hash(hash)  bit_unlock(hash, (void *)pv_hash_lock_table)
  466 
  467 /*
  468  *      Array of physical page attribites for managed pages.
  469  *      One byte per physical page.
  470  */
  471 char    *pmap_phys_attributes;
  472 unsigned int    last_managed_page = 0;
  473 
  474 /*
  475  *      Physical page attributes.  Copy bits from PTE definition.
  476  */
  477 #define PHYS_MODIFIED   INTEL_PTE_MOD   /* page modified */
  478 #define PHYS_REFERENCED INTEL_PTE_REF   /* page referenced */
  479 #define PHYS_MANAGED    INTEL_PTE_VALID /* page is managed */
  480 
  481 /*
  482  *      Amount of virtual memory mapped by one
  483  *      page-directory entry.
  484  */
  485 #define PDE_MAPPED_SIZE         (pdetova(1))
  486 uint64_t pde_mapped_size;
  487 
  488 /*
  489  *      Locking and TLB invalidation
  490  */
  491 
  492 /*
  493  *      Locking Protocols: (changed 2/2007 JK)
  494  *
  495  *      There are two structures in the pmap module that need locking:
  496  *      the pmaps themselves, and the per-page pv_lists (which are locked
  497  *      by locking the pv_lock_table entry that corresponds to the pv_head
  498  *      for the list in question.)  Most routines want to lock a pmap and
  499  *      then do operations in it that require pv_list locking -- however
  500  *      pmap_remove_all and pmap_copy_on_write operate on a physical page
  501  *      basis and want to do the locking in the reverse order, i.e. lock
  502  *      a pv_list and then go through all the pmaps referenced by that list.
  503  *
  504  *      The system wide pmap lock has been removed. Now, paths take a lock
  505  *      on the pmap before changing its 'shape' and the reverse order lockers
  506  *      (coming in by phys ppn) take a lock on the corresponding pv and then
  507  *      retest to be sure nothing changed during the window before they locked
  508  *      and can then run up/down the pv lists holding the list lock. This also
  509  *      lets the pmap layer run (nearly completely) interrupt enabled, unlike
  510  *      previously.
  511  */
  512 
  513 
  514 /*
  515  * PV locking
  516  */
  517 
  518 #define LOCK_PVH(index)         {       \
  519     mp_disable_preemption();           \
  520     lock_pvh_pai(index);               \
  521 }
  522 
  523 #define UNLOCK_PVH(index)  {      \
  524     unlock_pvh_pai(index);        \
  525     mp_enable_preemption();       \
  526 }
  527 
  528 /*
  529  * PV hash locking
  530  */
  531 
  532 #define LOCK_PV_HASH(hash)         lock_hash_hash(hash)
  533 
  534 #define UNLOCK_PV_HASH(hash)       unlock_hash_hash(hash)
  535 
  536 #if     USLOCK_DEBUG
  537 extern int      max_lock_loops;
  538 #define LOOP_VAR                                                        \
  539         unsigned int    loop_count;                                     \
  540         loop_count = disable_serial_output ? max_lock_loops             \
  541                                         : max_lock_loops*100
  542 #define LOOP_CHECK(msg, pmap)                                           \
  543         if (--loop_count == 0) {                                        \
  544                 mp_disable_preemption();                                \
  545                 kprintf("%s: cpu %d pmap %x\n",                         \
  546                           msg, cpu_number(), pmap);                     \
  547                 Debugger("deadlock detection");                         \
  548                 mp_enable_preemption();                                 \
  549                 loop_count = max_lock_loops;                            \
  550         }
  551 #else   /* USLOCK_DEBUG */
  552 #define LOOP_VAR
  553 #define LOOP_CHECK(msg, pmap)
  554 #endif  /* USLOCK_DEBUG */
  555 
  556 unsigned pmap_memory_region_count;
  557 unsigned pmap_memory_region_current;
  558 
  559 pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE];
  560 
  561 /*
  562  *      Other useful macros.
  563  */
  564 #define current_pmap()          (vm_map_pmap(current_thread()->map))
  565 
  566 struct pmap     kernel_pmap_store;
  567 pmap_t          kernel_pmap;
  568 
  569 pd_entry_t    high_shared_pde;
  570 pd_entry_t    commpage64_pde;
  571 
  572 struct zone     *pmap_zone;             /* zone of pmap structures */
  573 
  574 int             pmap_debug = 0;         /* flag for debugging prints */
  575 
  576 unsigned int    inuse_ptepages_count = 0;
  577 
  578 addr64_t        kernel64_cr3;
  579 boolean_t       no_shared_cr3 = FALSE;  /* -no_shared_cr3 boot arg */
  580 
  581 
  582 /*
  583  *      Pmap cache.  Cache is threaded through ref_count field of pmap.
  584  *      Max will eventually be constant -- variable for experimentation.
  585  */
  586 int             pmap_cache_max = 32;
  587 int             pmap_alloc_chunk = 8;
  588 pmap_t          pmap_cache_list;
  589 int             pmap_cache_count;
  590 decl_simple_lock_data(,pmap_cache_lock)
  591 
  592 extern char end;
  593 
  594 static int nkpt;
  595 
  596 pt_entry_t     *DMAP1, *DMAP2;
  597 caddr_t         DADDR1;
  598 caddr_t         DADDR2;
  599 
  600 static inline
  601 void pmap_pvh_unlink(pv_hashed_entry_t pv);
  602 
  603 /*
  604  * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
  605  * properly deals with the anchor.
  606  * must be called with the hash locked, does not unlock it
  607  */
  608 
  609 static inline
  610 void pmap_pvh_unlink(pv_hashed_entry_t pvh)
  611 {
  612   pv_hashed_entry_t curh;
  613   pv_hashed_entry_t *pprevh;
  614   int pvhash_idx;
  615 
  616   CHK_NPVHASH();
  617   pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
  618 
  619   pprevh = pvhash(pvhash_idx);
  620 
  621 #if PV_DEBUG
  622   if (NULL == *pprevh) panic("pvh_unlink null anchor"); /* JK DEBUG */
  623 #endif
  624   curh = *pprevh;
  625 
  626   while (PV_HASHED_ENTRY_NULL != curh) {
  627     if (pvh == curh)
  628       break;
  629     pprevh = &curh->nexth;
  630     curh = curh->nexth;
  631   }
  632   if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
  633   *pprevh = pvh->nexth;
  634   return;
  635 }
  636 
  637 /*
  638  * for legacy, returns the address of the pde entry.
  639  * for 64 bit, causes the pdpt page containing the pde entry to be mapped,
  640  * then returns the mapped address of the pde entry in that page
  641  */
  642 pd_entry_t *
  643 pmap_pde(pmap_t m, vm_map_offset_t v)
  644 {
  645   pd_entry_t *pde;
  646         if (!cpu_64bit || (m == kernel_pmap)) {
  647           pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]));
  648         } else {
  649           assert(m);
  650           assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
  651           pde = pmap64_pde(m, v);
  652         }
  653         return pde;
  654 }
  655 
  656 
  657 /*
  658  * the single pml4 page per pmap is allocated at pmap create time and exists
  659  * for the duration of the pmap. we allocate this page in kernel vm (to save us one
  660  * level of page table dynamic mapping.
  661  * this returns the address of the requested pml4 entry in the top level page.
  662  */
  663 static inline
  664 pml4_entry_t *
  665 pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr)
  666 {
  667   return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1))));
  668 }
  669 
  670 /*
  671  * maps in the pml4 page, if any, containing the pdpt entry requested
  672  * and returns the address of the pdpt entry in that mapped page
  673  */
  674 pdpt_entry_t *
  675 pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr)
  676 {
  677   pml4_entry_t newpf;
  678   pml4_entry_t *pml4;
  679   int i;
  680 
  681   assert(pmap);
  682   assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
  683   if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
  684     return(0);
  685   }
  686 
  687   pml4 = pmap64_pml4(pmap, vaddr);
  688 
  689         if (pml4 && ((*pml4 & INTEL_PTE_VALID))) {
  690 
  691                 newpf = *pml4 & PG_FRAME;
  692 
  693 
  694                 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) {
  695                   if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
  696                   return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + 
  697                          ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
  698                   }
  699                 }
  700 
  701                   current_cpu_datap()->cpu_pmap->pdpt_window_index++;
  702                   if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1))
  703                     current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
  704                   pmap_store_pte(
  705                                  (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP),
  706                                  newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
  707                   invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR));
  708                   return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) +
  709                           ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1))));
  710         }
  711 
  712         return (NULL);
  713 }
  714 
  715 /*
  716  * maps in the pdpt page, if any, containing the pde entry requested
  717  * and returns the address of the pde entry in that mapped page
  718  */
  719 pd_entry_t *
  720 pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr)
  721 {
  722   pdpt_entry_t newpf;
  723   pdpt_entry_t *pdpt;
  724   int i;
  725 
  726   assert(pmap);
  727   assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
  728   if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) {
  729     return(0);
  730   }
  731 
  732   /*  if (vaddr & (1ULL << 63)) panic("neg addr");*/
  733   pdpt = pmap64_pdpt(pmap, vaddr);
  734 
  735           if (pdpt && ((*pdpt & INTEL_PTE_VALID))) {
  736 
  737                 newpf = *pdpt & PG_FRAME;
  738 
  739                 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) {
  740                   if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
  741                   return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + 
  742                          ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
  743                   }
  744                 }
  745 
  746                   current_cpu_datap()->cpu_pmap->pde_window_index++;
  747                   if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1))
  748                     current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW;
  749                   pmap_store_pte(
  750                                  (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP),
  751                                  newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
  752                   invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR));
  753                   return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) +
  754                           ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1))));
  755         }
  756 
  757         return (NULL);
  758 }
  759 
  760 /*
  761  * Because the page tables (top 3 levels) are mapped into per cpu windows,
  762  * callers must either disable interrupts or disable preemption before calling
  763  * one of the pte mapping routines (e.g. pmap_pte()) as the returned vaddr
  764  * is in one of those mapped windows and that cannot be allowed to change until
  765  * the caller is done using the returned pte pointer. When done, the caller
  766  * restores interrupts or preemption to its previous state after which point the
  767  * vaddr for the returned pte can no longer be used
  768  */
  769 
  770 
  771 /*
  772  * return address of mapped pte for vaddr va in pmap pmap.
  773  * must be called with pre-emption or interrupts disabled
  774  * if targeted pmap is not the kernel pmap
  775  * since we may be passing back a virtual address that is
  776  * associated with this cpu... pre-emption or interrupts
  777  * must remain disabled until the caller is done using
  778  * the pointer that was passed back .
  779  *
  780  * maps the pde page, if any, containing the pte in and returns
  781  * the address of the pte in that mapped page
  782  */
  783 pt_entry_t     *
  784 pmap_pte(pmap_t pmap, vm_map_offset_t vaddr)
  785 {
  786         pd_entry_t     *pde;
  787         pd_entry_t     newpf;
  788         int i;
  789 
  790         assert(pmap);
  791         pde = pmap_pde(pmap,vaddr);
  792 
  793         if (pde && ((*pde & INTEL_PTE_VALID))) {
  794            if (*pde & INTEL_PTE_PS)
  795                 return pde;
  796             if (pmap == kernel_pmap)
  797                 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */
  798 #if TESTING
  799             if (ml_get_interrupts_enabled() && get_preemption_level() == 0)
  800                 panic("pmap_pte: unsafe call");
  801 #endif
  802                 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
  803 
  804                 newpf = *pde & PG_FRAME;
  805 
  806                 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) {
  807                   if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) {
  808                   return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + 
  809                          ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
  810                   }
  811                 }
  812 
  813                   current_cpu_datap()->cpu_pmap->pte_window_index++;
  814                   if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1))
  815                     current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW;
  816                   pmap_store_pte(
  817                                  (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP),
  818                                  newpf | INTEL_PTE_RW | INTEL_PTE_VALID);
  819                   invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR));
  820                   return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) +
  821                           ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1)));
  822         }
  823 
  824         return(NULL);
  825 }
  826         
  827 
  828 /*
  829  *      Map memory at initialization.  The physical addresses being
  830  *      mapped are not managed and are never unmapped.
  831  *
  832  *      For now, VM is already on, we only need to map the
  833  *      specified memory.
  834  */
  835 vm_offset_t
  836 pmap_map(
  837         vm_offset_t     virt,
  838         vm_map_offset_t start_addr,
  839         vm_map_offset_t end_addr,
  840         vm_prot_t       prot,
  841         unsigned int    flags)
  842 {
  843         int             ps;
  844 
  845         ps = PAGE_SIZE;
  846         while (start_addr < end_addr) {
  847                 pmap_enter(kernel_pmap, (vm_map_offset_t)virt,
  848                            (ppnum_t) i386_btop(start_addr), prot, flags, FALSE);
  849                 virt += ps;
  850                 start_addr += ps;
  851         }
  852         return(virt);
  853 }
  854 
  855 /*
  856  *      Back-door routine for mapping kernel VM at initialization.  
  857  *      Useful for mapping memory outside the range
  858  *      Sets no-cache, A, D.
  859  *      Otherwise like pmap_map.
  860  */
  861 vm_offset_t
  862 pmap_map_bd(
  863         vm_offset_t     virt,
  864         vm_map_offset_t start_addr,
  865         vm_map_offset_t end_addr,
  866         vm_prot_t       prot,
  867         unsigned int    flags)
  868 {
  869         pt_entry_t      template;
  870         pt_entry_t      *pte;
  871         spl_t           spl;
  872 
  873         template = pa_to_pte(start_addr)
  874                 | INTEL_PTE_REF
  875                 | INTEL_PTE_MOD
  876                 | INTEL_PTE_WIRED
  877                 | INTEL_PTE_VALID;
  878 
  879         if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) {
  880             template |= INTEL_PTE_NCACHE;
  881             if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT)))
  882                     template |= INTEL_PTE_PTA;
  883         }
  884 
  885         if (prot & VM_PROT_WRITE)
  886             template |= INTEL_PTE_WRITE;
  887 
  888 
  889         while (start_addr < end_addr) {
  890                 spl = splhigh();
  891                 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
  892                 if (pte == PT_ENTRY_NULL) {
  893                         panic("pmap_map_bd: Invalid kernel address\n");
  894                 }
  895                 pmap_store_pte(pte, template);
  896                 splx(spl);
  897                 pte_increment_pa(template);
  898                 virt += PAGE_SIZE;
  899                 start_addr += PAGE_SIZE;
  900         } 
  901 
  902 
  903         flush_tlb();
  904         return(virt);
  905 }
  906 
  907 extern  char                    *first_avail;
  908 extern  vm_offset_t             virtual_avail, virtual_end;
  909 extern  pmap_paddr_t            avail_start, avail_end;
  910 
  911 void
  912 pmap_cpu_init(void)
  913 {
  914         /*
  915          * Here early in the life of a processor (from cpu_mode_init()).
  916          * If we're not in 64-bit mode, enable the global TLB feature.
  917          * Note: regardless of mode we continue to set the global attribute
  918          * bit in ptes for all (32-bit) global pages such as the commpage.
  919          */
  920         if (!cpu_64bit) {
  921                 set_cr4(get_cr4() | CR4_PGE);
  922         }
  923 
  924         /*
  925          * Initialize the per-cpu, TLB-related fields.
  926          */
  927         current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3;
  928         current_cpu_datap()->cpu_tlb_invalid = FALSE;
  929 }
  930 
  931 vm_offset_t
  932 pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz)
  933 {
  934   vm_offset_t ve = pmap_index_to_virt(e);
  935   pt_entry_t *ptep;
  936   pmap_paddr_t pa;
  937   int i;
  938   spl_t s;
  939 
  940   assert(0 == (va & PAGE_MASK));  /* expecting page aligned */
  941   s = splhigh();
  942   ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve);
  943 
  944   for (i=0; i< sz; i++) {
  945     pa = (pmap_paddr_t) kvtophys(va);
  946     pmap_store_pte(ptep, (pa & PG_FRAME)
  947                                 | INTEL_PTE_VALID
  948                                 | INTEL_PTE_GLOBAL
  949                                 | INTEL_PTE_RW
  950                                 | INTEL_PTE_REF
  951                                 | INTEL_PTE_MOD);
  952     va+= PAGE_SIZE;
  953     ptep++;
  954   }
  955   splx(s);
  956   return ve;
  957 }
  958 
  959 vm_offset_t
  960 pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz)
  961 { 
  962   enum high_fixed_addresses     a = e + HIGH_CPU_END * cpu;
  963   return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz);
  964 }
  965 
  966 void pmap_init_high_shared(void);
  967 
  968 extern vm_offset_t gdtptr, idtptr;
  969 
  970 extern uint32_t low_intstack;
  971 
  972 extern struct fake_descriptor ldt_desc_pattern;
  973 extern struct fake_descriptor tss_desc_pattern;
  974 
  975 extern char hi_remap_text, hi_remap_etext;
  976 extern char t_zero_div;
  977 
  978 pt_entry_t *pte_unique_base;
  979 
  980 void
  981 pmap_init_high_shared(void)
  982 {
  983 
  984         vm_offset_t haddr;
  985         spl_t s;
  986 #if MACH_KDB
  987         struct i386_tss *ttss;
  988 #endif
  989 
  990         cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index;
  991 
  992         kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", 
  993                 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
  994         s = splhigh();
  995         pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN));
  996         splx(s);
  997 
  998         if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) >
  999                                 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1)
 1000                 panic("tramps too large");
 1001         haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS,
 1002                                         (vm_offset_t) &hi_remap_text, 3);
 1003         kprintf("tramp: 0x%x, ",haddr);
 1004         /* map gdt up high and update ptr for reload */
 1005         haddr = pmap_high_shared_remap(HIGH_FIXED_GDT,
 1006                                         (vm_offset_t) master_gdt, 1);
 1007         cdi->cdi_gdt.ptr = (void *)haddr;
 1008         kprintf("GDT: 0x%x, ",haddr);
 1009         /* map ldt up high */
 1010         haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN,
 1011                                         (vm_offset_t) master_ldt,
 1012                                         HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1);
 1013         cdi->cdi_ldt = (struct fake_descriptor *)haddr;
 1014         kprintf("LDT: 0x%x, ",haddr);
 1015         /* put new ldt addr into gdt */
 1016         struct fake_descriptor temp_fake_desc;
 1017         temp_fake_desc = ldt_desc_pattern;
 1018         temp_fake_desc.offset = (vm_offset_t) haddr;
 1019         fix_desc(&temp_fake_desc, 1);
 1020         
 1021         *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc;
 1022         *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc;
 1023 
 1024         /* map idt up high */
 1025         haddr = pmap_high_shared_remap(HIGH_FIXED_IDT,
 1026                                         (vm_offset_t) master_idt, 1);
 1027         cdi->cdi_idt.ptr = (void *)haddr;
 1028         kprintf("IDT: 0x%x, ", haddr);
 1029         /* remap ktss up high and put new high addr into gdt */
 1030         haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS,
 1031                                         (vm_offset_t) &master_ktss, 1);
 1032 
 1033         temp_fake_desc = tss_desc_pattern;
 1034         temp_fake_desc.offset = (vm_offset_t) haddr;
 1035         fix_desc(&temp_fake_desc, 1);
 1036         *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc;
 1037         kprintf("KTSS: 0x%x, ",haddr);
 1038 #if MACH_KDB
 1039         /* remap dbtss up high and put new high addr into gdt */
 1040         haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS,
 1041                                         (vm_offset_t) &master_dbtss, 1);
 1042         temp_fake_desc = tss_desc_pattern;
 1043         temp_fake_desc.offset = (vm_offset_t) haddr;
 1044         fix_desc(&temp_fake_desc, 1);
 1045         *(struct fake_descriptor *)&master_gdt[sel_idx(DEBUG_TSS)] = temp_fake_desc;
 1046         ttss = (struct i386_tss *)haddr;
 1047         kprintf("DBTSS: 0x%x, ",haddr);
 1048 #endif  /* MACH_KDB */
 1049 
 1050         /* remap dftss up high and put new high addr into gdt */
 1051         haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
 1052                                         (vm_offset_t) &master_dftss, 1);
 1053         temp_fake_desc = tss_desc_pattern;
 1054         temp_fake_desc.offset = (vm_offset_t) haddr;
 1055         fix_desc(&temp_fake_desc, 1);
 1056         *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc;
 1057         kprintf("DFTSS: 0x%x\n",haddr);
 1058 
 1059         /* remap mctss up high and put new high addr into gdt */
 1060         haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS,
 1061                                         (vm_offset_t) &master_mctss, 1);
 1062         temp_fake_desc = tss_desc_pattern;
 1063         temp_fake_desc.offset = (vm_offset_t) haddr;
 1064         fix_desc(&temp_fake_desc, 1);
 1065         *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc;
 1066         kprintf("MCTSS: 0x%x\n",haddr);
 1067 
 1068         cpu_desc_load(&cpu_data_master);
 1069 }
 1070 
 1071 
 1072 /*
 1073  *      Bootstrap the system enough to run with virtual memory.
 1074  *      Map the kernel's code and data, and allocate the system page table.
 1075  *      Called with mapping OFF.  Page_size must already be set.
 1076  */
 1077 
 1078 void
 1079 pmap_bootstrap(
 1080         __unused vm_offset_t    load_start,
 1081         boolean_t               IA32e)
 1082 {
 1083         vm_offset_t     va;
 1084         pt_entry_t      *pte;
 1085         int i;
 1086         pdpt_entry_t *pdpt;
 1087         spl_t s;
 1088 
 1089         vm_last_addr = VM_MAX_KERNEL_ADDRESS;   /* Set the highest address
 1090                                                  * known to VM */
 1091         /*
 1092          *      The kernel's pmap is statically allocated so we don't
 1093          *      have to use pmap_create, which is unlikely to work
 1094          *      correctly at this part of the boot sequence.
 1095          */
 1096 
 1097 
 1098         kernel_pmap = &kernel_pmap_store;
 1099         kernel_pmap->ref_count = 1;
 1100         kernel_pmap->nx_enabled = FALSE;
 1101         kernel_pmap->pm_task_map = TASK_MAP_32BIT;
 1102         kernel_pmap->pm_obj = (vm_object_t) NULL;
 1103         kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE);
 1104         kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD);
 1105         pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE );
 1106         kernel_pmap->pm_pdpt = pdpt;
 1107         kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT);
 1108 
 1109 
 1110         va = (vm_offset_t)kernel_pmap->dirbase;
 1111         /* setup self referential mapping(s) */
 1112         for (i = 0; i< NPGPTD; i++, pdpt++) {
 1113           pmap_paddr_t pa;
 1114           pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
 1115           pmap_store_pte(
 1116             (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i),
 1117             (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF |
 1118               INTEL_PTE_MOD | INTEL_PTE_WIRED) ;
 1119           pmap_store_pte(pdpt, pa | INTEL_PTE_VALID);
 1120         }
 1121 
 1122         cpu_64bit = IA32e;
 1123         
 1124         lo_kernel_cr3 = kernel_pmap->pm_cr3;
 1125         current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3;
 1126 
 1127         /* save the value we stuff into created pmaps to share the gdts etc */
 1128         high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE);
 1129         /* make sure G bit is on for high shared pde entry */
 1130         high_shared_pde |= INTEL_PTE_GLOBAL;
 1131         s = splhigh();
 1132         pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde);
 1133         splx(s);
 1134 
 1135         nkpt = NKPT;
 1136         OSAddAtomic(NKPT, &inuse_ptepages_count);
 1137 
 1138         virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail;
 1139         virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS);
 1140 
 1141         /*
 1142          * Reserve some special page table entries/VA space for temporary
 1143          * mapping of pages.
 1144          */
 1145 #define SYSMAP(c, p, v, n)      \
 1146         v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n)
 1147 
 1148         va = virtual_avail;
 1149         pte = vtopte(va);
 1150 
 1151         for (i=0; i<PMAP_NWINDOWS; i++) {
 1152             SYSMAP(caddr_t,
 1153                    (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP),
 1154                    (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR),
 1155                    1);
 1156             *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0;
 1157         }
 1158 
 1159         /* DMAP user for debugger */
 1160         SYSMAP(caddr_t, DMAP1, DADDR1, 1);
 1161         SYSMAP(caddr_t, DMAP2, DADDR2, 1);  /* XXX temporary - can remove */
 1162 
 1163         virtual_avail = va;
 1164 
 1165         if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
 1166           if (0 != ((npvhash+1) & npvhash)) {
 1167             kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH);
 1168             npvhash = NPVHASH;
 1169           }
 1170         } else {
 1171           npvhash = NPVHASH;
 1172         }
 1173         printf("npvhash=%d\n",npvhash);
 1174 
 1175         simple_lock_init(&kernel_pmap->lock, 0);
 1176         simple_lock_init(&pv_hashed_free_list_lock, 0);
 1177         simple_lock_init(&pv_hashed_kern_free_list_lock, 0);
 1178         simple_lock_init(&pv_hash_table_lock,0);
 1179 
 1180         pmap_init_high_shared();
 1181 
 1182         pde_mapped_size = PDE_MAPPED_SIZE;
 1183 
 1184         if (cpu_64bit) {
 1185           pdpt_entry_t *ppdpt   = IdlePDPT;
 1186           pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64;
 1187           pdpt_entry_t *ppml4   = (pdpt_entry_t *)IdlePML4;
 1188           int istate = ml_set_interrupts_enabled(FALSE);
 1189 
 1190           /*
 1191            * Clone a new 64-bit 3rd-level page table directory, IdlePML4,
 1192            * with page bits set for the correct IA-32e operation and so that
 1193            * the legacy-mode IdlePDPT is retained for slave processor start-up.
 1194            * This is necessary due to the incompatible use of page bits between
 1195            * 64-bit and legacy modes.
 1196            */
 1197           kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */
 1198           kernel_pmap->pm_pml4 = IdlePML4;
 1199           kernel_pmap->pm_pdpt = (pd_entry_t *)
 1200                                         ((unsigned int)IdlePDPT64 | KERNBASE );
 1201 #define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF
 1202           pmap_store_pte(kernel_pmap->pm_pml4,
 1203                          (uint32_t)IdlePDPT64 | PAGE_BITS);
 1204           pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS);
 1205           pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS);
 1206           pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS);
 1207           pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS);
 1208 
 1209           /*
 1210            * The kernel is also mapped in the uber-sapce at the 4GB starting
 1211            * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level.
 1212            */
 1213           pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0));
 1214 
 1215           kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3;
 1216 
 1217           /* Re-initialize descriptors and prepare to switch modes */
 1218           cpu_desc_init64(&cpu_data_master);
 1219           current_cpu_datap()->cpu_is64bit = TRUE;
 1220           current_cpu_datap()->cpu_active_cr3 = kernel64_cr3;
 1221 
 1222           pde_mapped_size = 512*4096 ; 
 1223 
 1224           ml_set_interrupts_enabled(istate);
 1225         }
 1226 
 1227         /* Sets 64-bit mode if required. */
 1228         cpu_mode_init(&cpu_data_master);
 1229         /* Update in-kernel CPUID information if we're now in 64-bit mode */
 1230         if (IA32e)
 1231                 cpuid_set_info();
 1232 
 1233         kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4;
 1234 
 1235         kprintf("Kernel virtual space from 0x%x to 0x%x.\n",
 1236                         VADDR(KPTDI,0), virtual_end);
 1237         printf("PAE enabled\n");
 1238         if (cpu_64bit){
 1239           printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); }
 1240 
 1241         kprintf("Available physical space from 0x%llx to 0x%llx\n",
 1242                         avail_start, avail_end);
 1243 
 1244         /*
 1245          * By default for 64-bit users loaded at 4GB, share kernel mapping.
 1246          * But this may be overridden by the -no_shared_cr3 boot-arg.
 1247          */
 1248         if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) {
 1249                 kprintf("Shared kernel address space disabled\n");
 1250         }       
 1251 
 1252 #ifdef  PMAP_TRACES
 1253         if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
 1254                 kprintf("Kernel traces for pmap operations enabled\n");
 1255         }       
 1256 #endif  /* PMAP_TRACES */
 1257 }
 1258 
 1259 void
 1260 pmap_virtual_space(
 1261         vm_offset_t *startp,
 1262         vm_offset_t *endp)
 1263 {
 1264         *startp = virtual_avail;
 1265         *endp = virtual_end;
 1266 }
 1267 
 1268 /*
 1269  *      Initialize the pmap module.
 1270  *      Called by vm_init, to initialize any structures that the pmap
 1271  *      system needs to map virtual memory.
 1272  */
 1273 void
 1274 pmap_init(void)
 1275 {
 1276         register long           npages;
 1277         vm_offset_t             addr;
 1278         register vm_size_t      s;
 1279         vm_map_offset_t         vaddr;
 1280         ppnum_t ppn;
 1281 
 1282         /*
 1283          *      Allocate memory for the pv_head_table and its lock bits,
 1284          *      the modify bit array, and the pte_page table.
 1285          */
 1286 
 1287         /*
 1288          * zero bias all these arrays now instead of off avail_start
 1289          * so we cover all memory
 1290          */
 1291 
 1292         npages = (long)i386_btop(avail_end);
 1293         s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages
 1294                          + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1))
 1295                          + pv_lock_table_size(npages)
 1296                          + pv_hash_lock_table_size((npvhash+1))
 1297                                 + npages);
 1298 
 1299         s = round_page(s);
 1300         if (kernel_memory_allocate(kernel_map, &addr, s, 0,
 1301                                    KMA_KOBJECT | KMA_PERMANENT)
 1302             != KERN_SUCCESS)
 1303                 panic("pmap_init");
 1304 
 1305         memset((char *)addr, 0, s);
 1306 
 1307 #if PV_DEBUG
 1308         if (0 == npvhash) panic("npvhash not initialized");
 1309 #endif
 1310 
 1311         /*
 1312          *      Allocate the structures first to preserve word-alignment.
 1313          */
 1314         pv_head_table = (pv_rooted_entry_t) addr;
 1315         addr = (vm_offset_t) (pv_head_table + npages);
 1316 
 1317         pv_hash_table = (pv_hashed_entry_t *)addr;
 1318         addr = (vm_offset_t) (pv_hash_table + (npvhash + 1));
 1319 
 1320         pv_lock_table = (char *) addr;
 1321         addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
 1322 
 1323         pv_hash_lock_table = (char *) addr;
 1324         addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1)));
 1325 
 1326         pmap_phys_attributes = (char *) addr;
 1327         {
 1328                 unsigned int i;
 1329                 unsigned int pn;
 1330                 ppnum_t  last_pn;
 1331                 pmap_memory_region_t *pmptr = pmap_memory_regions;
 1332 
 1333                 last_pn = (ppnum_t)i386_btop(avail_end);
 1334 
 1335                 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) {
 1336                         if (pmptr->type == kEfiConventionalMemory) {
 1337 
 1338                                 for (pn = pmptr->base; pn <= pmptr->end; pn++) {
 1339                                         if (pn < last_pn) {
 1340                                                 pmap_phys_attributes[pn] |= PHYS_MANAGED;
 1341 
 1342                                                 if (pn > last_managed_page)
 1343                                                         last_managed_page = pn;
 1344                                         }
 1345                                 }
 1346                         }
 1347                 }
 1348         }
 1349 
 1350         /*
 1351          *      Create the zone of physical maps,
 1352          *      and of the physical-to-virtual entries.
 1353          */
 1354         s = (vm_size_t) sizeof(struct pmap);
 1355         pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */
 1356         s = (vm_size_t) sizeof(struct pv_hashed_entry);
 1357         pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */
 1358         s = 63;
 1359         pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */
 1360 
 1361         kptobj = &kptobj_object_store;
 1362         _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj);
 1363         kernel_pmap->pm_obj = kptobj;
 1364 
 1365         /* create pv entries for kernel pages mapped by low level
 1366            startup code.  these have to exist so we can pmap_remove()
 1367            e.g. kext pages from the middle of our addr space */
 1368 
 1369         vaddr = (vm_map_offset_t)0;
 1370         for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) {
 1371           pv_rooted_entry_t     pv_e;
 1372 
 1373           pv_e = pai_to_pvh(ppn);
 1374           pv_e->va = vaddr;
 1375           vaddr += PAGE_SIZE;
 1376           pv_e->pmap = kernel_pmap;
 1377           queue_init(&pv_e->qlink);
 1378         }
 1379 
 1380         pmap_initialized = TRUE;
 1381 
 1382         /*
 1383          *      Initialize pmap cache.
 1384          */
 1385         pmap_cache_list = PMAP_NULL;
 1386         pmap_cache_count = 0;
 1387         simple_lock_init(&pmap_cache_lock, 0);
 1388 
 1389         max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t);
 1390 
 1391 }
 1392 
 1393 
 1394 #define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) )
 1395 
 1396 /*
 1397  * this function is only used for debugging fron the vm layer
 1398  */
 1399 boolean_t
 1400 pmap_verify_free(
 1401                  ppnum_t pn)
 1402 {
 1403         pv_rooted_entry_t       pv_h;
 1404         int             pai;
 1405         boolean_t       result;
 1406 
 1407         assert(pn != vm_page_fictitious_addr);
 1408 
 1409         if (!pmap_initialized)
 1410                 return(TRUE);
 1411 
 1412         if (pn == vm_page_guard_addr)
 1413                 return TRUE;
 1414 
 1415         pai = ppn_to_pai(pn);
 1416         if (!managed_page(pai))
 1417                 return(FALSE);
 1418         pv_h = pai_to_pvh(pn);
 1419         result = (pv_h->pmap == PMAP_NULL);
 1420         return(result);
 1421 }
 1422 
 1423 boolean_t
 1424 pmap_is_empty(
 1425        pmap_t          pmap,
 1426        vm_map_offset_t va_start,
 1427        vm_map_offset_t va_end)
 1428 {
 1429         vm_map_offset_t offset;
 1430         ppnum_t         phys_page;
 1431 
 1432         if (pmap == PMAP_NULL) {
 1433                 return TRUE;
 1434         }
 1435 
 1436         /*
 1437          * Check the resident page count
 1438          * - if it's zero, the pmap is completely empty.
 1439          * This short-circuit test prevents a virtual address scan which is
 1440          * painfully slow for 64-bit spaces.
 1441          * This assumes the count is correct
 1442          * .. the debug kernel ought to be checking perhaps by page table walk.
 1443          */
 1444         if (pmap->stats.resident_count == 0)
 1445                 return TRUE;
 1446 
 1447         for (offset = va_start;
 1448              offset < va_end;
 1449              offset += PAGE_SIZE_64) {
 1450                 phys_page = pmap_find_phys(pmap, offset);
 1451                 if (phys_page) {
 1452                         if (pmap != kernel_pmap &&
 1453                             pmap->pm_task_map == TASK_MAP_32BIT &&
 1454                             offset >= HIGH_MEM_BASE) {
 1455                                 /*
 1456                                  * The "high_shared_pde" is used to share
 1457                                  * the entire top-most 2MB of address space
 1458                                  * between the kernel and all 32-bit tasks.
 1459                                  * So none of this can be removed from 32-bit
 1460                                  * tasks.
 1461                                  * Let's pretend there's nothing up
 1462                                  * there...
 1463                                  */
 1464                                 return TRUE;
 1465                         }
 1466                         kprintf("pmap_is_empty(%p,0x%llx,0x%llx): "
 1467                                 "page %d at 0x%llx\n",
 1468                                 pmap, va_start, va_end, phys_page, offset);
 1469                         return FALSE;
 1470                 }
 1471         }
 1472 
 1473         return TRUE;
 1474 }
 1475 
 1476 
 1477 /*
 1478  *      Create and return a physical map.
 1479  *
 1480  *      If the size specified for the map
 1481  *      is zero, the map is an actual physical
 1482  *      map, and may be referenced by the
 1483  *      hardware.
 1484  *
 1485  *      If the size specified is non-zero,
 1486  *      the map will be used in software only, and
 1487  *      is bounded by that size.
 1488  */
 1489 pmap_t
 1490 pmap_create(
 1491             vm_map_size_t       sz,
 1492             boolean_t           is_64bit)
 1493 {
 1494         pmap_t                  p;
 1495         int             i;
 1496         vm_offset_t     va;
 1497         vm_size_t       size;
 1498         pdpt_entry_t    *pdpt;
 1499         pml4_entry_t    *pml4p;
 1500         pd_entry_t      *pdp;
 1501         int template;
 1502         spl_t s;
 1503 
 1504         PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
 1505                    (int) (sz>>32), (int) sz, (int) is_64bit, 0, 0);
 1506 
 1507         size = (vm_size_t) sz;
 1508 
 1509         /*
 1510          *      A software use-only map doesn't even need a map.
 1511          */
 1512 
 1513         if (size != 0) {
 1514                 return(PMAP_NULL);
 1515         }
 1516 
 1517         p = (pmap_t) zalloc(pmap_zone);
 1518         if (PMAP_NULL == p)
 1519                 panic("pmap_create zalloc");
 1520 
 1521         /* init counts now since we'll be bumping some */
 1522         simple_lock_init(&p->lock, 0);
 1523         p->stats.resident_count = 0;
 1524         p->stats.resident_max = 0;
 1525         p->stats.wired_count = 0;
 1526         p->ref_count = 1;
 1527         p->nx_enabled = 1;
 1528         p->pm_shared = FALSE;
 1529 
 1530         assert(!is_64bit || cpu_64bit);
 1531         p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;;
 1532 
 1533         if (!cpu_64bit) {
 1534                 /* legacy 32 bit setup */
 1535                 /* in the legacy case the pdpt layer is hardwired to 4 entries and each
 1536                  * entry covers 1GB of addr space */
 1537                 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD))
 1538                         panic("pmap_create kmem_alloc_kobject");
 1539                 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone);
 1540                 if ((vm_offset_t)NULL == p->pm_hold) {
 1541                         panic("pdpt zalloc");
 1542                 }
 1543                 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31);
 1544                 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt);
 1545                 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG))))
 1546                         panic("pmap_create vm_object_allocate");
 1547 
 1548                 memset((char *)p->dirbase, 0, NBPTD);
 1549 
 1550                 va = (vm_offset_t)p->dirbase;
 1551                 p->pdirbase = kvtophys(va);
 1552 
 1553                 template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID;
 1554                 for (i = 0; i< NPGPTD; i++, pdpt++ ) {
 1555                         pmap_paddr_t pa;
 1556                         pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i)));
 1557                         pmap_store_pte(pdpt, pa | template);
 1558                 }
 1559 
 1560                 /* map the high shared pde */
 1561                 s = splhigh();
 1562                 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde);
 1563                 splx(s);
 1564 
 1565         } else {
 1566                 /* 64 bit setup  */
 1567 
 1568                 /* alloc the pml4 page in kernel vm */
 1569                 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE))
 1570                         panic("pmap_create kmem_alloc_kobject pml4");
 1571 
 1572                 memset((char *)p->pm_hold, 0, PAGE_SIZE);
 1573                 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold);
 1574 
 1575                 OSAddAtomic(1,  &inuse_ptepages_count);
 1576 
 1577                 /* allocate the vm_objs to hold the pdpt, pde and pte pages */
 1578 
 1579                 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS))))
 1580                         panic("pmap_create pdpt obj");
 1581 
 1582                 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS))))
 1583                         panic("pmap_create pdpt obj");
 1584 
 1585                 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS))))
 1586                         panic("pmap_create pte obj");
 1587 
 1588                 /* uber space points to uber mapped kernel */
 1589                 s = splhigh();
 1590                 pml4p = pmap64_pml4(p, 0ULL);
 1591                 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4);
 1592 
 1593 
 1594                 if (!is_64bit) {
 1595                         while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) {
 1596                                 splx(s);
 1597                                 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */
 1598                                 s = splhigh();
 1599                         }
 1600                         pmap_store_pte(pdp, high_shared_pde);
 1601                 }
 1602                 splx(s);
 1603         }
 1604 
 1605         PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START,
 1606                    (int) p, is_64bit, 0, 0, 0);
 1607 
 1608         return(p);
 1609 }
 1610 
 1611 /*
 1612  * The following routines implement the shared address optmization for 64-bit
 1613  * users with a 4GB page zero.
 1614  *
 1615  * pmap_set_4GB_pagezero()
 1616  *      is called in the exec and fork paths to mirror the kernel's
 1617  *      mapping in the bottom 4G of the user's pmap. The task mapping changes
 1618  *      from TASK_MAP_64BIT to TASK_MAP_64BIT_SHARED. This routine returns
 1619  *      without doing anything if the -no_shared_cr3 boot-arg is set.
 1620  *
 1621  * pmap_clear_4GB_pagezero()
 1622  *      is called in the exec/exit paths to undo this mirror. The task mapping
 1623  *      reverts to TASK_MAP_64BIT. In addition, we switch to the kernel's
 1624  *      CR3 by calling pmap_load_kernel_cr3(). 
 1625  *
 1626  * pmap_load_kernel_cr3()
 1627  *      loads cr3 with the kernel's page table. In addition to being called
 1628  *      by pmap_clear_4GB_pagezero(), it is used both prior to teardown and
 1629  *      when we go idle in the context of a shared map.
 1630  *
 1631  * Further notes on per-cpu data used:
 1632  *
 1633  *      cpu_kernel_cr3  is the cr3 for the kernel's pmap.
 1634  *                      This is loaded in a trampoline on entering the kernel
 1635  *                      from a 32-bit user (or non-shared-cr3 64-bit user).
 1636  *      cpu_task_cr3    is the cr3 for the current thread.
 1637  *                      This is loaded in a trampoline as we exit the kernel.
 1638  *      cpu_active_cr3  reflects the cr3 currently loaded.
 1639  *                      However, the low order bit is set when the
 1640  *                      processor is idle or interrupts are disabled
 1641  *                      while the system pmap lock is held. It is used by
 1642  *                      tlb shoot-down.
 1643  *      cpu_task_map    indicates whether the task cr3 belongs to
 1644  *                      a 32-bit, a 64-bit or a 64-bit shared map.
 1645  *                      The latter allows the avoidance of the cr3 load
 1646  *                      on kernel entry and exit.
 1647  *      cpu_tlb_invalid set TRUE when a tlb flush is requested.
 1648  *                      If the cr3 is "inactive" (the cpu is idle or the
 1649  *                      system-wide pmap lock is held) this not serviced by
 1650  *                      an IPI but at time when the cr3 becomes "active".
 1651  */ 
 1652 
 1653 void
 1654 pmap_set_4GB_pagezero(pmap_t p)
 1655 {
 1656         pdpt_entry_t    *user_pdptp;
 1657         pdpt_entry_t    *kern_pdptp;
 1658 
 1659         assert(p->pm_task_map != TASK_MAP_32BIT);
 1660 
 1661         /* Kernel-shared cr3 may be disabled by boot arg. */
 1662         if (no_shared_cr3)
 1663                 return;
 1664 
 1665         /*
 1666          * Set the bottom 4 3rd-level pte's to be the kernel's.
 1667          */
 1668         PMAP_LOCK(p);
 1669         while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
 1670                 PMAP_UNLOCK(p);
 1671                 pmap_expand_pml4(p, 0x0);
 1672                 PMAP_LOCK(p);
 1673         }
 1674         kern_pdptp = kernel_pmap->pm_pdpt;
 1675         pmap_store_pte(user_pdptp+0, *(kern_pdptp+0));
 1676         pmap_store_pte(user_pdptp+1, *(kern_pdptp+1));
 1677         pmap_store_pte(user_pdptp+2, *(kern_pdptp+2));
 1678         pmap_store_pte(user_pdptp+3, *(kern_pdptp+3));
 1679         p->pm_task_map = TASK_MAP_64BIT_SHARED;
 1680         PMAP_UNLOCK(p);
 1681 }
 1682 
 1683 void
 1684 pmap_clear_4GB_pagezero(pmap_t p)
 1685 {
 1686         pdpt_entry_t    *user_pdptp;
 1687 
 1688         if (p->pm_task_map != TASK_MAP_64BIT_SHARED)
 1689                 return;
 1690 
 1691         PMAP_LOCK(p);
 1692 
 1693         p->pm_task_map = TASK_MAP_64BIT;
 1694 
 1695         pmap_load_kernel_cr3();
 1696 
 1697         user_pdptp = pmap64_pdpt(p, 0x0);
 1698         pmap_store_pte(user_pdptp+0, 0);
 1699         pmap_store_pte(user_pdptp+1, 0);
 1700         pmap_store_pte(user_pdptp+2, 0);
 1701         pmap_store_pte(user_pdptp+3, 0);
 1702 
 1703         PMAP_UNLOCK(p);
 1704 }
 1705 
 1706 void
 1707 pmap_load_kernel_cr3(void)
 1708 {
 1709         uint64_t        kernel_cr3;
 1710 
 1711         assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 1712 
 1713         /*
 1714          * Reload cr3 with the true kernel cr3.
 1715          */
 1716         kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3;
 1717         set64_cr3(kernel_cr3);
 1718         current_cpu_datap()->cpu_active_cr3 = kernel_cr3;
 1719         current_cpu_datap()->cpu_tlb_invalid = FALSE;
 1720         __asm__ volatile("mfence");
 1721 }
 1722 
 1723 /*
 1724  *      Retire the given physical map from service.
 1725  *      Should only be called if the map contains
 1726  *      no valid mappings.
 1727  */
 1728 
 1729 void
 1730 pmap_destroy(
 1731         register pmap_t p)
 1732 {
 1733         register int            c;
 1734 
 1735         if (p == PMAP_NULL)
 1736                 return;
 1737 
 1738         PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START,
 1739                    (int) p, 0, 0, 0, 0);
 1740 
 1741         PMAP_LOCK(p);
 1742 
 1743         c = --p->ref_count;
 1744 
 1745         if (c == 0) {
 1746                 /* 
 1747                  * If some cpu is not using the physical pmap pointer that it
 1748                  * is supposed to be (see set_dirbase), we might be using the
 1749                  * pmap that is being destroyed! Make sure we are
 1750                  * physically on the right pmap:
 1751                  */
 1752                 PMAP_UPDATE_TLBS(p,
 1753                                  0x0ULL,
 1754                                  0xFFFFFFFFFFFFF000ULL);
 1755         }
 1756 
 1757         PMAP_UNLOCK(p);
 1758 
 1759         if (c != 0) {
 1760                 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
 1761                            (int) p, 1, 0, 0, 0);
 1762                 return; /* still in use */
 1763         }
 1764 
 1765         /*
 1766          *      Free the memory maps, then the
 1767          *      pmap structure.
 1768          */
 1769         if (!cpu_64bit) {
 1770                 OSAddAtomic(-p->pm_obj->resident_page_count,  &inuse_ptepages_count);
 1771 
 1772                 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD);
 1773                 zfree(pdpt_zone, (void *)p->pm_hold);
 1774 
 1775                 vm_object_deallocate(p->pm_obj);
 1776         } else {
 1777                 /* 64 bit */
 1778                 int inuse_ptepages = 0;
 1779 
 1780                 /* free 64 bit mode structs */
 1781                 inuse_ptepages++;
 1782                 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE);
 1783 
 1784                 inuse_ptepages += p->pm_obj_pml4->resident_page_count;
 1785                 vm_object_deallocate(p->pm_obj_pml4);
 1786 
 1787                 inuse_ptepages += p->pm_obj_pdpt->resident_page_count;
 1788                 vm_object_deallocate(p->pm_obj_pdpt);
 1789 
 1790                 inuse_ptepages += p->pm_obj->resident_page_count;
 1791                 vm_object_deallocate(p->pm_obj);
 1792 
 1793                 OSAddAtomic(-inuse_ptepages,  &inuse_ptepages_count);
 1794         }
 1795         zfree(pmap_zone, p);
 1796 
 1797         PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END,
 1798                    0, 0, 0, 0, 0);
 1799 
 1800 }
 1801 
 1802 /*
 1803  *      Add a reference to the specified pmap.
 1804  */
 1805 
 1806 void
 1807 pmap_reference(
 1808         register pmap_t p)
 1809 {
 1810 
 1811         if (p != PMAP_NULL) {
 1812                 PMAP_LOCK(p);
 1813                 p->ref_count++;
 1814                 PMAP_UNLOCK(p);;
 1815         }
 1816 }
 1817 
 1818 /*
 1819  *      Remove a range of hardware page-table entries.
 1820  *      The entries given are the first (inclusive)
 1821  *      and last (exclusive) entries for the VM pages.
 1822  *      The virtual address is the va for the first pte.
 1823  *
 1824  *      The pmap must be locked.
 1825  *      If the pmap is not the kernel pmap, the range must lie
 1826  *      entirely within one pte-page.  This is NOT checked.
 1827  *      Assumes that the pte-page exists.
 1828  */
 1829 
 1830 void
 1831 pmap_remove_range(
 1832         pmap_t                  pmap,
 1833         vm_map_offset_t         start_vaddr,
 1834         pt_entry_t              *spte,
 1835         pt_entry_t              *epte)
 1836 {
 1837         register pt_entry_t     *cpte;
 1838         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
 1839         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
 1840         pv_hashed_entry_t       pvh_e;
 1841         int                     pvh_cnt = 0;
 1842         int                     num_removed, num_unwired, num_found;
 1843         int                     pai;
 1844         pmap_paddr_t            pa;
 1845         vm_map_offset_t         vaddr;
 1846         int                     pvhash_idx;
 1847         uint32_t                pv_cnt;
 1848 
 1849         num_removed = 0;
 1850         num_unwired = 0;
 1851         num_found   = 0;
 1852 
 1853         if (pmap != kernel_pmap &&
 1854             pmap->pm_task_map == TASK_MAP_32BIT &&
 1855             start_vaddr >= HIGH_MEM_BASE) {
 1856                 /*
 1857                  * The range is in the "high_shared_pde" which is shared
 1858                  * between the kernel and all 32-bit tasks.  It holds
 1859                  * the 32-bit commpage but also the trampolines, GDT, etc...
 1860                  * so we can't let user tasks remove anything from it.
 1861                  */
 1862                 return;
 1863         }
 1864 
 1865         /* invalidate the PTEs first to "freeze" them */
 1866         for (cpte = spte, vaddr = start_vaddr;
 1867              cpte < epte;
 1868              cpte++, vaddr += PAGE_SIZE_64) {
 1869 
 1870             pa = pte_to_pa(*cpte);
 1871             if (pa == 0)
 1872                 continue;
 1873             num_found++;
 1874 
 1875             if (iswired(*cpte))
 1876                 num_unwired++;
 1877 
 1878             pai = pa_index(pa);
 1879 
 1880             if (!managed_page(pai)) {
 1881                 /*
 1882                  *      Outside range of managed physical memory.
 1883                  *      Just remove the mappings.
 1884                  */
 1885                 pmap_store_pte(cpte, 0);
 1886                 continue;
 1887             }
 1888 
 1889             /* invalidate the PTE */ 
 1890             pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
 1891         }
 1892 
 1893         if (num_found == 0) {
 1894                 /* nothing was changed: we're done */
 1895                 goto update_counts;
 1896         }
 1897 
 1898         /* propagate the invalidates to other CPUs */
 1899 
 1900         PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
 1901 
 1902         for (cpte = spte, vaddr = start_vaddr;
 1903              cpte < epte;
 1904              cpte++, vaddr += PAGE_SIZE_64) {
 1905 
 1906             pa = pte_to_pa(*cpte);
 1907             if (pa == 0)
 1908                 continue;
 1909 
 1910             pai = pa_index(pa);
 1911 
 1912             LOCK_PVH(pai);
 1913 
 1914             pa = pte_to_pa(*cpte);
 1915             if (pa == 0) {
 1916               UNLOCK_PVH(pai);
 1917               continue;
 1918             }
 1919               
 1920             num_removed++;
 1921 
 1922             /*
 1923              *  Get the modify and reference bits, then
 1924              *  nuke the entry in the page table
 1925              */
 1926             /* remember reference and change */
 1927             pmap_phys_attributes[pai] |=
 1928                     (char)(*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
 1929             /* completely invalidate the PTE */
 1930             pmap_store_pte(cpte, 0);
 1931 
 1932             /*
 1933              *  Remove the mapping from the pvlist for
 1934              *  this physical page.
 1935              */
 1936             {
 1937               pv_rooted_entry_t pv_h;
 1938               pv_hashed_entry_t *pprevh;
 1939               ppnum_t ppn = (ppnum_t)pai;
 1940 
 1941                 pv_h = pai_to_pvh(pai);
 1942                 pvh_e = PV_HASHED_ENTRY_NULL;
 1943                 if (pv_h->pmap == PMAP_NULL)
 1944                     panic("pmap_remove_range: null pv_list!");
 1945 
 1946                 if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* rooted or not */
 1947                     /*
 1948                      * Header is the pv_rooted_entry. We can't free that. If there is a queued
 1949                      * entry after this one we remove that
 1950                      * from the ppn queue, we remove it from the hash chain
 1951                      * and copy it to the rooted entry. Then free it instead.
 1952                      */
 1953 
 1954                   pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
 1955                   if (pv_h != (pv_rooted_entry_t)pvh_e) {  /* any queued after rooted? */
 1956                     CHK_NPVHASH();
 1957                     pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
 1958                     LOCK_PV_HASH(pvhash_idx);
 1959                     remque(&pvh_e->qlink);
 1960                     {
 1961                       pprevh = pvhash(pvhash_idx);
 1962                       if (PV_HASHED_ENTRY_NULL == *pprevh) {
 1963                         panic("pmap_remove_range empty hash removing rooted pv");
 1964                       }
 1965                     }
 1966                     pmap_pvh_unlink(pvh_e);
 1967                     UNLOCK_PV_HASH(pvhash_idx);
 1968                     pv_h->pmap = pvh_e->pmap;
 1969                     pv_h->va = pvh_e->va;   /* dispose of pvh_e */
 1970                   } else {  /* none queued after rooted */
 1971                     pv_h->pmap = PMAP_NULL;
 1972                     pvh_e = PV_HASHED_ENTRY_NULL;
 1973                   }   /* any queued after rooted */
 1974 
 1975                 } else { /* rooted or not */
 1976                   /* not removing rooted pv. find it on hash chain, remove from ppn queue and
 1977                    * hash chain and free it */
 1978                   CHK_NPVHASH();
 1979                   pvhash_idx = pvhashidx(pmap,vaddr);
 1980                   LOCK_PV_HASH(pvhash_idx);
 1981                   pprevh = pvhash(pvhash_idx);
 1982                   if (PV_HASHED_ENTRY_NULL == *pprevh) {
 1983                     panic("pmap_remove_range empty hash removing hashed pv");
 1984                     }
 1985                   pvh_e = *pprevh;
 1986                   pmap_pv_hashlist_walks++;
 1987                   pv_cnt = 0;
 1988                   while (PV_HASHED_ENTRY_NULL != pvh_e) {
 1989                         pv_cnt++;
 1990                         if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn) break;
 1991                         pprevh = &pvh_e->nexth;
 1992                         pvh_e = pvh_e->nexth;
 1993                   }
 1994                   pmap_pv_hashlist_cnts += pv_cnt;
 1995                   if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
 1996                   if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_remove_range pv not on hash");
 1997                   *pprevh = pvh_e->nexth;
 1998                   remque(&pvh_e->qlink);
 1999                   UNLOCK_PV_HASH(pvhash_idx);
 2000 
 2001                 } /* rooted or not */
 2002 
 2003                 UNLOCK_PVH(pai);
 2004 
 2005                 if (pvh_e != PV_HASHED_ENTRY_NULL) {
 2006                   pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 2007                   pvh_eh = pvh_e;
 2008 
 2009                   if (pvh_et == PV_HASHED_ENTRY_NULL) {
 2010                     pvh_et = pvh_e;
 2011                   }
 2012 
 2013                   pvh_cnt++;
 2014                 }
 2015 
 2016             } /* removing mappings for this phy page */
 2017         } /* for loop */
 2018         
 2019         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
 2020             PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
 2021         }
 2022 
 2023 update_counts:
 2024         /*
 2025          *      Update the counts
 2026          */
 2027 #if TESTING
 2028         if (pmap->stats.resident_count < num_removed)
 2029                 panic("pmap_remove_range: resident_count");
 2030 #endif
 2031         assert(pmap->stats.resident_count >= num_removed);
 2032         OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
 2033 
 2034 #if TESTING
 2035         if (pmap->stats.wired_count < num_unwired)
 2036                 panic("pmap_remove_range: wired_count");
 2037 #endif
 2038         assert(pmap->stats.wired_count >= num_unwired);
 2039         OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
 2040 
 2041         return;
 2042 }
 2043 
 2044 /*
 2045  *      Remove phys addr if mapped in specified map
 2046  *
 2047  */
 2048 void
 2049 pmap_remove_some_phys(
 2050         __unused pmap_t         map,
 2051         __unused ppnum_t         pn)
 2052 {
 2053 
 2054 /* Implement to support working set code */
 2055 
 2056 }
 2057 
 2058 /*
 2059  *      Remove the given range of addresses
 2060  *      from the specified map.
 2061  *
 2062  *      It is assumed that the start and end are properly
 2063  *      rounded to the hardware page size.
 2064  */
 2065 
 2066 
 2067 void
 2068 pmap_remove(
 2069         pmap_t          map,
 2070         addr64_t        s64,
 2071         addr64_t        e64)
 2072 {
 2073         pt_entry_t      *pde;
 2074         pt_entry_t      *spte, *epte;
 2075         addr64_t        l64;
 2076         addr64_t        orig_s64;
 2077         uint64_t        deadline;
 2078 
 2079         pmap_intr_assert();
 2080 
 2081         if (map == PMAP_NULL || s64 == e64)
 2082                 return;
 2083  
 2084         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
 2085                    (int) map,
 2086                    (int) (s64>>32), (int) s64,
 2087                    (int) (e64>>32), (int) e64);
 2088 
 2089         PMAP_LOCK(map);
 2090 
 2091 #if 0
 2092         /*
 2093          * Check that address range in the kernel does not overlap the stacks.
 2094          * We initialize local static min/max variables once to avoid making
 2095          * 2 function calls for every remove. Note also that these functions
 2096          * both return 0 before kernel stacks have been initialized, and hence
 2097          * the panic is not triggered in this case.
 2098          */
 2099         if (map == kernel_pmap) {
 2100                 static vm_offset_t      kernel_stack_min = 0;
 2101                 static vm_offset_t      kernel_stack_max = 0;
 2102 
 2103                 if (kernel_stack_min == 0) {
 2104                         kernel_stack_min = min_valid_stack_address();
 2105                         kernel_stack_max = max_valid_stack_address();
 2106                 }
 2107                 if  ((kernel_stack_min <= s64 && s64 <  kernel_stack_max) ||
 2108                      (kernel_stack_min <  e64 && e64 <= kernel_stack_max))
 2109                         panic("pmap_remove() attempted in kernel stack");
 2110         }
 2111 #else
 2112 
 2113         /*
 2114          * The values of kernel_stack_min and kernel_stack_max are no longer
 2115          * relevant now that we allocate kernel stacks anywhere in the kernel map,
 2116          * so the old code above no longer applies.  If we wanted to check that
 2117          * we weren't removing a mapping of a page in a kernel stack we'd have to
 2118          * mark the PTE with an unused bit and check that here.
 2119          */
 2120 
 2121 #endif
 2122 
 2123         deadline = rdtsc64() + max_preemption_latency_tsc;
 2124 
 2125         orig_s64 = s64;
 2126 
 2127         while (s64 < e64) {
 2128             l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1);
 2129             if (l64 > e64)
 2130                 l64 = e64;
 2131             pde = pmap_pde(map, s64);
 2132 
 2133             if (pde && (*pde & INTEL_PTE_VALID)) {
 2134                 spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1)));
 2135                 spte = &spte[ptenum(s64)];
 2136                 epte = &spte[intel_btop(l64-s64)];
 2137 
 2138                 pmap_remove_range(map, s64, spte, epte);
 2139             }
 2140             s64 = l64;
 2141             pde++;
 2142 
 2143             if (s64 < e64 && rdtsc64() >= deadline) {
 2144               PMAP_UNLOCK(map)
 2145                 PMAP_LOCK(map)
 2146 
 2147               deadline = rdtsc64() + max_preemption_latency_tsc;
 2148             }
 2149 
 2150         }
 2151 
 2152         PMAP_UNLOCK(map);
 2153 
 2154         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
 2155                    (int) map, 0, 0, 0, 0);
 2156 
 2157 }
 2158 
 2159 /*
 2160  *      Routine:        pmap_page_protect
 2161  *
 2162  *      Function:
 2163  *              Lower the permission for all mappings to a given
 2164  *              page.
 2165  */
 2166 void
 2167 pmap_page_protect(
 2168         ppnum_t         pn,
 2169         vm_prot_t       prot)
 2170 {
 2171         pv_hashed_entry_t               pvh_eh = PV_HASHED_ENTRY_NULL;
 2172         pv_hashed_entry_t               pvh_et = PV_HASHED_ENTRY_NULL;
 2173         pv_hashed_entry_t       nexth;
 2174         int                     pvh_cnt = 0;
 2175         pv_rooted_entry_t               pv_h;
 2176         pv_rooted_entry_t               pv_e;
 2177         pv_hashed_entry_t       pvh_e;
 2178         pt_entry_t              *pte;
 2179         int                     pai;
 2180         register pmap_t         pmap;
 2181         boolean_t               remove;
 2182         int                     pvhash_idx;
 2183 
 2184         pmap_intr_assert();
 2185         assert(pn != vm_page_fictitious_addr);
 2186         if (pn == vm_page_guard_addr)
 2187                 return;
 2188 
 2189         pai = ppn_to_pai(pn);
 2190 
 2191         if (!managed_page(pai)) {
 2192             /*
 2193              *  Not a managed page.
 2194              */
 2195             return;
 2196         }
 2197 
 2198         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
 2199                    (int) pn, (int) prot, 0, 0, 0);
 2200 
 2201         /*
 2202          * Determine the new protection.
 2203          */
 2204         switch (prot) {
 2205             case VM_PROT_READ:
 2206             case VM_PROT_READ|VM_PROT_EXECUTE:
 2207                 remove = FALSE;
 2208                 break;
 2209             case VM_PROT_ALL:
 2210                 return; /* nothing to do */
 2211             default:
 2212                 remove = TRUE;
 2213                 break;
 2214         }
 2215 
 2216         pv_h = pai_to_pvh(pai);
 2217 
 2218         LOCK_PVH(pai);
 2219 
 2220 
 2221         /*
 2222          * Walk down PV list, changing or removing all mappings.
 2223          */
 2224         if (pv_h->pmap != PMAP_NULL) {
 2225 
 2226             pv_e = pv_h;
 2227             pvh_e = (pv_hashed_entry_t)pv_e; /* cheat */
 2228 
 2229             do {
 2230                 register vm_map_offset_t vaddr;
 2231                 pmap = pv_e->pmap;
 2232 
 2233                 vaddr = pv_e->va;
 2234                 pte = pmap_pte(pmap, vaddr);
 2235                 
 2236                 if (0 == pte) {
 2237                         panic("pmap_page_protect: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx, prot: %d kernel_pmap: %p", pmap, pn, vaddr, prot, kernel_pmap);
 2238                 }
 2239 
 2240                 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);  /* if there is one */
 2241 
 2242                 /*
 2243                  * Remove the mapping if new protection is NONE
 2244                  * or if write-protecting a kernel mapping.
 2245                  */
 2246                 if (remove || pmap == kernel_pmap) {
 2247                     /*
 2248                      * Remove the mapping, collecting any modify bits.
 2249                      */
 2250                     pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
 2251 
 2252                     PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 2253 
 2254                     pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
 2255 
 2256                     pmap_store_pte(pte, 0);
 2257 
 2258 #if TESTING
 2259                     if (pmap->stats.resident_count < 1)
 2260                         panic("pmap_page_protect: resident_count");
 2261 #endif
 2262                     assert(pmap->stats.resident_count >= 1);
 2263                     OSAddAtomic(-1,  &pmap->stats.resident_count);
 2264 
 2265                     /*
 2266                      * Deal with the pv_rooted_entry.
 2267                      */
 2268 
 2269                     if (pv_e == pv_h) {
 2270                         /*
 2271                          * Fix up head later.
 2272                          */
 2273                         pv_h->pmap = PMAP_NULL;
 2274                     }
 2275                     else {
 2276                         /*
 2277                          * Delete this entry.
 2278                          */
 2279                       CHK_NPVHASH();
 2280                       pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
 2281                       LOCK_PV_HASH(pvhash_idx);
 2282                       remque(&pvh_e->qlink);
 2283                       pmap_pvh_unlink(pvh_e);
 2284                       UNLOCK_PV_HASH(pvhash_idx);
 2285 
 2286                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 2287                         pvh_eh = pvh_e;
 2288 
 2289                         if (pvh_et == PV_HASHED_ENTRY_NULL)
 2290                             pvh_et = pvh_e;
 2291                         pvh_cnt++;
 2292                     }
 2293                 } else {
 2294                     /*
 2295                      * Write-protect.
 2296                      */
 2297                     pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WRITE));
 2298                     PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 2299                 }
 2300 
 2301                 pvh_e = nexth;
 2302             } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
 2303 
 2304 
 2305             /*
 2306              * If pv_head mapping was removed, fix it up.
 2307              */
 2308 
 2309             if (pv_h->pmap == PMAP_NULL) {
 2310               pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
 2311 
 2312               if (pvh_e != (pv_hashed_entry_t)pv_h) {
 2313                 CHK_NPVHASH();
 2314                 pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
 2315                 LOCK_PV_HASH(pvhash_idx);
 2316                 remque(&pvh_e->qlink);
 2317                 pmap_pvh_unlink(pvh_e);
 2318                 UNLOCK_PV_HASH(pvhash_idx);
 2319                   pv_h->pmap = pvh_e->pmap;
 2320                   pv_h->va = pvh_e->va;
 2321                   pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 2322                     pvh_eh = pvh_e;
 2323 
 2324                     if (pvh_et == PV_HASHED_ENTRY_NULL)
 2325                         pvh_et = pvh_e;
 2326                     pvh_cnt++;
 2327                 }
 2328             }
 2329         }
 2330         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
 2331             PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
 2332         }
 2333 
 2334         UNLOCK_PVH(pai);
 2335 
 2336         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
 2337                    0, 0, 0, 0, 0);
 2338 
 2339 }
 2340 
 2341 
 2342 /*
 2343  *      Routine:
 2344  *              pmap_disconnect
 2345  *
 2346  *      Function:
 2347  *              Disconnect all mappings for this page and return reference and change status
 2348  *              in generic format.
 2349  *
 2350  */
 2351 unsigned int pmap_disconnect(
 2352         ppnum_t pa)
 2353 {
 2354         pmap_page_protect(pa, 0);                       /* disconnect the page */
 2355         return (pmap_get_refmod(pa));                   /* return ref/chg status */
 2356 }
 2357 
 2358 /*
 2359  *      Set the physical protection on the
 2360  *      specified range of this map as requested.
 2361  *      Will not increase permissions.
 2362  */
 2363 void
 2364 pmap_protect(
 2365         pmap_t          map,
 2366         vm_map_offset_t sva,
 2367         vm_map_offset_t eva,
 2368         vm_prot_t       prot)
 2369 {
 2370         register pt_entry_t     *pde;
 2371         register pt_entry_t     *spte, *epte;
 2372         vm_map_offset_t         lva;
 2373         vm_map_offset_t         orig_sva;
 2374         boolean_t       set_NX;
 2375         int             num_found = 0;
 2376 
 2377         pmap_intr_assert();
 2378 
 2379         if (map == PMAP_NULL)
 2380                 return;
 2381 
 2382         if (prot == VM_PROT_NONE) {
 2383                 pmap_remove(map, sva, eva);
 2384                 return;
 2385         }
 2386 
 2387         PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
 2388                    (int) map,
 2389                    (int) (sva>>32), (int) sva,
 2390                    (int) (eva>>32), (int) eva);
 2391 
 2392         if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled )
 2393                 set_NX = FALSE;
 2394         else
 2395                 set_NX = TRUE;
 2396 
 2397         PMAP_LOCK(map);
 2398 
 2399         orig_sva = sva;
 2400         while (sva < eva) {
 2401             lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1);
 2402             if (lva > eva)
 2403                 lva = eva;
 2404             pde = pmap_pde(map, sva);
 2405             if (pde && (*pde & INTEL_PTE_VALID)) {
 2406                 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1)));
 2407                 spte = &spte[ptenum(sva)];
 2408                 epte = &spte[intel_btop(lva-sva)];
 2409 
 2410                 while (spte < epte) {
 2411 
 2412                     if (*spte & INTEL_PTE_VALID) {
 2413                       
 2414                         if (prot & VM_PROT_WRITE)
 2415                             pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_WRITE));
 2416                         else
 2417                             pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_WRITE));
 2418 
 2419                         if (set_NX == TRUE)
 2420                             pmap_update_pte(spte, *spte, (*spte | INTEL_PTE_NX));
 2421                         else
 2422                             pmap_update_pte(spte, *spte, (*spte & ~INTEL_PTE_NX));
 2423 
 2424                         num_found++;
 2425                     }
 2426                     spte++;
 2427                 }
 2428             }
 2429             sva = lva;
 2430         }
 2431         if (num_found)
 2432             PMAP_UPDATE_TLBS(map, orig_sva, eva);
 2433 
 2434         PMAP_UNLOCK(map);
 2435 
 2436         PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END,
 2437                    0, 0, 0, 0, 0);
 2438 
 2439 }
 2440 
 2441 /* Map a (possibly) autogenned block */
 2442 void
 2443 pmap_map_block(
 2444         pmap_t          pmap, 
 2445         addr64_t        va,
 2446         ppnum_t         pa,
 2447         uint32_t        size,
 2448         vm_prot_t       prot,
 2449         int             attr,
 2450         __unused unsigned int   flags)
 2451 {
 2452     uint32_t page;
 2453 
 2454     for (page = 0; page < size; page++) {
 2455         pmap_enter(pmap, va, pa, prot, attr, TRUE);
 2456         va += PAGE_SIZE;
 2457         pa++;
 2458     }
 2459 }
 2460 
 2461 
 2462 /*
 2463  *      Insert the given physical page (p) at
 2464  *      the specified virtual address (v) in the
 2465  *      target physical map with the protection requested.
 2466  *
 2467  *      If specified, the page will be wired down, meaning
 2468  *      that the related pte cannot be reclaimed.
 2469  *
 2470  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 2471  *      or lose information.  That is, this routine must actually
 2472  *      insert this page into the given map NOW.
 2473  */
 2474 void
 2475 pmap_enter(
 2476         register pmap_t         pmap,
 2477         vm_map_offset_t         vaddr,
 2478         ppnum_t                 pn,
 2479         vm_prot_t               prot,
 2480         unsigned int            flags,
 2481         boolean_t               wired)
 2482 {
 2483         register pt_entry_t     *pte;
 2484         register pv_rooted_entry_t      pv_h;
 2485         register int            pai;
 2486         pv_hashed_entry_t               pvh_e;
 2487         pv_hashed_entry_t               pvh_new;
 2488         pv_hashed_entry_t       *hashp;
 2489         pt_entry_t              template;
 2490         pmap_paddr_t            old_pa;
 2491         pmap_paddr_t             pa = (pmap_paddr_t)i386_ptob(pn);
 2492         boolean_t               need_tlbflush = FALSE;
 2493         boolean_t               set_NX;
 2494         char                    oattr;
 2495         int                     pvhash_idx;
 2496         uint32_t                pv_cnt;
 2497         boolean_t               old_pa_locked;
 2498 
 2499         pmap_intr_assert();
 2500         assert(pn != vm_page_fictitious_addr);
 2501         if (pmap_debug)
 2502                 printf("pmap(%qx, %x)\n", vaddr, pn);
 2503         if (pmap == PMAP_NULL)
 2504                 return;
 2505         if (pn == vm_page_guard_addr)
 2506                 return;
 2507 
 2508         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
 2509                    (int) pmap,
 2510                    (int) (vaddr>>32), (int) vaddr,
 2511                    (int) pn, prot);
 2512 
 2513         if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled )
 2514                 set_NX = FALSE;
 2515         else
 2516                 set_NX = TRUE;
 2517         
 2518         /*
 2519          *      Must allocate a new pvlist entry while we're unlocked;
 2520          *      zalloc may cause pageout (which will lock the pmap system).
 2521          *      If we determine we need a pvlist entry, we will unlock
 2522          *      and allocate one.  Then we will retry, throughing away
 2523          *      the allocated entry later (if we no longer need it).
 2524          */
 2525 
 2526         pvh_new = PV_HASHED_ENTRY_NULL;
 2527 Retry:
 2528         pvh_e = PV_HASHED_ENTRY_NULL;
 2529 
 2530         PMAP_LOCK(pmap);
 2531 
 2532         /*
 2533          *      Expand pmap to include this pte.  Assume that
 2534          *      pmap is always expanded to include enough hardware
 2535          *      pages to map one VM page.
 2536          */
 2537 
 2538         while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
 2539                 /*
 2540                  *      Must unlock to expand the pmap.
 2541                  */
 2542                 PMAP_UNLOCK(pmap);
 2543                 pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */
 2544                 PMAP_LOCK(pmap);
 2545         }
 2546 
 2547         old_pa = pte_to_pa(*pte);
 2548         pai = pa_index(old_pa);
 2549         old_pa_locked = FALSE;
 2550 
 2551         /*
 2552          * if we have a previous managed page, lock the pv entry now. after
 2553          * we lock it, check to see if someone beat us to the lock and if so
 2554          * drop the lock
 2555          */
 2556 
 2557         if ((0 != old_pa) && managed_page(pai)) {
 2558           LOCK_PVH(pai);
 2559           old_pa_locked = TRUE;
 2560           old_pa = pte_to_pa(*pte);
 2561           if (0 == old_pa) {
 2562             UNLOCK_PVH(pai);  /* some other path beat us to it */
 2563             old_pa_locked = FALSE;
 2564           }
 2565         }
 2566 
 2567 
 2568         /*
 2569          *      Special case if the incoming physical page is already mapped
 2570          *      at this address.
 2571          */
 2572         if (old_pa == pa) {
 2573 
 2574             /*
 2575              *  May be changing its wired attribute or protection
 2576              */
 2577 
 2578             template = pa_to_pte(pa) | INTEL_PTE_VALID;
 2579 
 2580             if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
 2581                 if(!(flags & VM_MEM_GUARDED))
 2582                         template |= INTEL_PTE_PTA;
 2583                 template |= INTEL_PTE_NCACHE;
 2584             }
 2585 
 2586             if (pmap != kernel_pmap)
 2587                 template |= INTEL_PTE_USER;
 2588             if (prot & VM_PROT_WRITE)
 2589                 template |= INTEL_PTE_WRITE;
 2590 
 2591             if (set_NX == TRUE)
 2592                 template |= INTEL_PTE_NX;
 2593 
 2594             if (wired) {
 2595                 template |= INTEL_PTE_WIRED;
 2596                 if (!iswired(*pte))
 2597                     OSAddAtomic(+1,  &pmap->stats.wired_count);
 2598             }
 2599             else {
 2600                 if (iswired(*pte)) {
 2601                     assert(pmap->stats.wired_count >= 1);
 2602                     OSAddAtomic(-1,  &pmap->stats.wired_count);
 2603                 }
 2604             }
 2605 
 2606             /* store modified PTE and preserve RC bits */ 
 2607             pmap_update_pte(pte, *pte, template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
 2608             if (old_pa_locked) {
 2609               UNLOCK_PVH(pai);
 2610               old_pa_locked = FALSE;
 2611             }
 2612             need_tlbflush = TRUE;
 2613             goto Done;
 2614         }
 2615 
 2616         /*
 2617          *      Outline of code from here:
 2618          *         1) If va was mapped, update TLBs, remove the mapping
 2619          *            and remove old pvlist entry.
 2620          *         2) Add pvlist entry for new mapping
 2621          *         3) Enter new mapping.
 2622          *
 2623          *      If the old physical page is not managed step 1) is skipped
 2624          *      (except for updating the TLBs), and the mapping is
 2625          *      overwritten at step 3).  If the new physical page is not
 2626          *      managed, step 2) is skipped.
 2627          */
 2628 
 2629         if (old_pa != (pmap_paddr_t) 0) {
 2630 
 2631             /*
 2632              *  Don't do anything to pages outside valid memory here.
 2633              *  Instead convince the code that enters a new mapping
 2634              *  to overwrite the old one.
 2635              */
 2636 
 2637             /* invalidate the PTE */ 
 2638             pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
 2639             /* propagate invalidate everywhere */
 2640             PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 2641             /* remember reference and change */
 2642             oattr = (char)(*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
 2643             /* completely invalidate the PTE */
 2644             pmap_store_pte(pte, 0);
 2645 
 2646             if (managed_page(pai)) {
 2647 #if TESTING
 2648                 if (pmap->stats.resident_count < 1)
 2649                     panic("pmap_enter: resident_count");
 2650 #endif
 2651                 assert(pmap->stats.resident_count >= 1);
 2652                 OSAddAtomic(-1,  &pmap->stats.resident_count);
 2653 
 2654                 if (iswired(*pte)) {
 2655 
 2656 #if TESTING
 2657                     if (pmap->stats.wired_count < 1)
 2658                         panic("pmap_enter: wired_count");
 2659 #endif
 2660                     assert(pmap->stats.wired_count >= 1);
 2661                     OSAddAtomic(-1,  &pmap->stats.wired_count);
 2662                 }
 2663 
 2664                 pmap_phys_attributes[pai] |= oattr;
 2665                 /*
 2666                  *      Remove the mapping from the pvlist for
 2667                  *      this physical page.
 2668                  *      We'll end up with either a rooted pv or a
 2669                  *      hashed pv
 2670                  */
 2671                 {
 2672 
 2673                     pv_h = pai_to_pvh(pai);
 2674 
 2675                     if (pv_h->pmap == PMAP_NULL) {
 2676                         panic("pmap_enter: null pv_list!");
 2677                     }
 2678 
 2679                     if (pv_h->va == vaddr && pv_h->pmap == pmap) {
 2680                         /*
 2681                          * Header is the pv_rooted_entry.  
 2682                          * If there is a next one, copy it to the
 2683                          * header and free the next one (we cannot
 2684                          * free the header)
 2685                          */
 2686                       pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink);
 2687                       if (pvh_e != (pv_hashed_entry_t)pv_h) {
 2688                         pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
 2689                         LOCK_PV_HASH(pvhash_idx);
 2690                           remque(&pvh_e->qlink);
 2691                           pmap_pvh_unlink(pvh_e);
 2692                           UNLOCK_PV_HASH(pvhash_idx);
 2693                           pv_h->pmap = pvh_e->pmap;
 2694                           pv_h->va = pvh_e->va;
 2695                         }
 2696                       else {
 2697                         pv_h->pmap = PMAP_NULL;
 2698                         pvh_e = PV_HASHED_ENTRY_NULL;
 2699                       }
 2700                     }
 2701                     else {
 2702                       pv_hashed_entry_t *pprevh;
 2703                       ppnum_t old_ppn;
 2704                       /* wasn't the rooted pv - hash, find it, and unlink it */
 2705                       old_ppn = (ppnum_t)pa_index(old_pa);
 2706                       CHK_NPVHASH();
 2707                       pvhash_idx = pvhashidx(pmap,vaddr);
 2708                       LOCK_PV_HASH(pvhash_idx);
 2709                       pprevh = pvhash(pvhash_idx);
 2710 #if PV_DEBUG
 2711                       if (NULL==pprevh)panic("pmap enter 1");
 2712 #endif
 2713                       pvh_e = *pprevh;
 2714                       pmap_pv_hashlist_walks++;
 2715                       pv_cnt = 0;
 2716                       while (PV_HASHED_ENTRY_NULL != pvh_e) {
 2717                         pv_cnt++;
 2718                         if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == old_ppn) break;
 2719                         pprevh = &pvh_e->nexth;
 2720                         pvh_e = pvh_e->nexth;
 2721                       }
 2722                       pmap_pv_hashlist_cnts += pv_cnt;
 2723                       if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt;
 2724                       if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_enter: pv not in hash list");
 2725                       if(NULL==pprevh)panic("pmap enter 2");
 2726                       *pprevh = pvh_e->nexth;
 2727                       remque(&pvh_e->qlink);
 2728                       UNLOCK_PV_HASH(pvhash_idx);
 2729                     }
 2730                 }
 2731             }
 2732             else {
 2733                 /*
 2734                  *      old_pa is not managed.
 2735                  *      Do removal part of accounting.
 2736                  */
 2737 
 2738                 if (iswired(*pte)) {
 2739                     assert(pmap->stats.wired_count >= 1);
 2740                     OSAddAtomic(-1,  &pmap->stats.wired_count);
 2741                 }
 2742             }
 2743         }
 2744 
 2745         /*
 2746          * if we had a previously managed paged locked, unlock it now
 2747          */
 2748 
 2749         if (old_pa_locked) {
 2750           UNLOCK_PVH(pai);
 2751           old_pa_locked = FALSE;
 2752         }
 2753 
 2754         pai = pa_index(pa);     /* now working with new incoming phys page */
 2755         if (managed_page(pai)) {
 2756 
 2757             /*
 2758              *  Step 2) Enter the mapping in the PV list for this
 2759              *  physical page.
 2760              */
 2761             pv_h = pai_to_pvh(pai);
 2762 
 2763             LOCK_PVH(pai);
 2764 
 2765             if (pv_h->pmap == PMAP_NULL) {
 2766                 /*
 2767                  *      No mappings yet, use  rooted pv
 2768                  */
 2769                 pv_h->va = vaddr;
 2770                 pv_h->pmap = pmap;
 2771                 queue_init(&pv_h->qlink);
 2772             }
 2773             else {
 2774                 /*
 2775                  *      Add new pv_hashed_entry after header.
 2776                  */
 2777                 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
 2778                   pvh_e = pvh_new;
 2779                   pvh_new = PV_HASHED_ENTRY_NULL;  /* show we used it */
 2780                 } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
 2781                   PV_HASHED_ALLOC(pvh_e);
 2782                   if (PV_HASHED_ENTRY_NULL == pvh_e) {
 2783                     /* the pv list is empty.
 2784                      * if we are on the kernel pmap we'll use one of the special private
 2785                      * kernel pv_e's, else, we need to unlock everything, zalloc a pv_e,
 2786                      * and restart bringing in the pv_e with us.
 2787                      */
 2788                     if (kernel_pmap == pmap) {
 2789                       PV_HASHED_KERN_ALLOC(pvh_e);
 2790                     } else {
 2791                       UNLOCK_PVH(pai);
 2792                       PMAP_UNLOCK(pmap);
 2793                       pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 2794                       goto Retry;
 2795                     }
 2796                   }
 2797                 }
 2798 
 2799                 if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pvh_e exhaustion");
 2800                 pvh_e->va = vaddr;
 2801                 pvh_e->pmap = pmap;
 2802                 pvh_e->ppn = pn;
 2803                 CHK_NPVHASH();
 2804                 pvhash_idx = pvhashidx(pmap,vaddr);
 2805                 LOCK_PV_HASH(pvhash_idx);
 2806                 insque(&pvh_e->qlink, &pv_h->qlink);
 2807                 hashp = pvhash(pvhash_idx);
 2808 #if PV_DEBUG
 2809                 if(NULL==hashp)panic("pmap_enter 4");
 2810 #endif
 2811                 pvh_e->nexth = *hashp;
 2812                 *hashp = pvh_e;
 2813                 UNLOCK_PV_HASH(pvhash_idx);
 2814 
 2815                 /*
 2816                  *      Remember that we used the pvlist entry.
 2817                  */
 2818                 pvh_e = PV_HASHED_ENTRY_NULL;
 2819             }
 2820 
 2821             /*
 2822              * only count the mapping
 2823              * for 'managed memory'
 2824              */
 2825             OSAddAtomic(+1,  &pmap->stats.resident_count);
 2826             if (pmap->stats.resident_count > pmap->stats.resident_max) {
 2827                     pmap->stats.resident_max = pmap->stats.resident_count;
 2828             }
 2829         }
 2830 
 2831         /*
 2832          * Step 3) Enter the mapping.
 2833          *
 2834          *      Build a template to speed up entering -
 2835          *      only the pfn changes.
 2836          */
 2837         template = pa_to_pte(pa) | INTEL_PTE_VALID;
 2838 
 2839         if (flags & VM_MEM_NOT_CACHEABLE) {
 2840                 if(!(flags & VM_MEM_GUARDED))
 2841                         template |= INTEL_PTE_PTA;
 2842                 template |= INTEL_PTE_NCACHE;
 2843         }
 2844 
 2845         if (pmap != kernel_pmap)
 2846                 template |= INTEL_PTE_USER;
 2847         if (prot & VM_PROT_WRITE)
 2848                 template |= INTEL_PTE_WRITE;
 2849 
 2850         if (set_NX == TRUE)
 2851                 template |= INTEL_PTE_NX;
 2852 
 2853         if (wired) {
 2854                 template |= INTEL_PTE_WIRED;
 2855                 OSAddAtomic(+1,  &pmap->stats.wired_count);
 2856         }
 2857         pmap_store_pte(pte, template);
 2858 
 2859         /* if this was a managed page we delayed unlocking the pv until here
 2860          * to prevent pmap_page_protect et al from finding it until the pte
 2861          * has been stored */
 2862 
 2863         if (managed_page(pai)) {
 2864           UNLOCK_PVH(pai);
 2865         }
 2866 
 2867 Done:
 2868         if (need_tlbflush == TRUE)
 2869                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 2870 
 2871         if (pvh_e != PV_HASHED_ENTRY_NULL) {
 2872                 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
 2873         }
 2874 
 2875         if (pvh_new != PV_HASHED_ENTRY_NULL) {
 2876           PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
 2877         }
 2878 
 2879         PMAP_UNLOCK(pmap);
 2880         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 2881 }
 2882 
 2883 /*
 2884  *      Routine:        pmap_change_wiring
 2885  *      Function:       Change the wiring attribute for a map/virtual-address
 2886  *                      pair.
 2887  *      In/out conditions:
 2888  *                      The mapping must already exist in the pmap.
 2889  */
 2890 void
 2891 pmap_change_wiring(
 2892         register pmap_t map,
 2893         vm_map_offset_t vaddr,
 2894         boolean_t       wired)
 2895 {
 2896         register pt_entry_t     *pte;
 2897 
 2898         /*
 2899          *      We must grab the pmap system lock because we may
 2900          *      change a pte_page queue.
 2901          */
 2902         PMAP_LOCK(map);
 2903 
 2904         if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
 2905                 panic("pmap_change_wiring: pte missing");
 2906 
 2907         if (wired && !iswired(*pte)) {
 2908             /*
 2909              *  wiring down mapping
 2910              */
 2911             OSAddAtomic(+1,  &map->stats.wired_count);
 2912             pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
 2913         }
 2914         else if (!wired && iswired(*pte)) {
 2915             /*
 2916              *  unwiring mapping
 2917              */
 2918             assert(map->stats.wired_count >= 1);
 2919             OSAddAtomic(-1,  &map->stats.wired_count);
 2920             pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
 2921         }
 2922 
 2923         PMAP_UNLOCK(map);
 2924 }
 2925 
 2926 
 2927 /*
 2928  *      Routine:        pmap_extract
 2929  *      Function:
 2930  *              Extract the physical page address associated
 2931  *              with the given map/virtual_address pair.
 2932  *     Change to shim for backwards compatibility but will not
 2933  *     work for 64 bit systems.  Some old drivers that we cannot
 2934  *     change need this.
 2935  */
 2936 
 2937 vm_offset_t
 2938 pmap_extract(
 2939         register pmap_t pmap,
 2940         vm_map_offset_t vaddr)
 2941 {
 2942         ppnum_t ppn;
 2943         vm_offset_t paddr;
 2944 
 2945         paddr = (vm_offset_t)0;
 2946         ppn = pmap_find_phys(pmap, vaddr);
 2947 
 2948         if (ppn) {
 2949                 paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK);
 2950         }
 2951         return (paddr);
 2952 }
 2953 
 2954 void
 2955 pmap_expand_pml4(
 2956                  pmap_t map,
 2957                  vm_map_offset_t vaddr)
 2958 {
 2959         register vm_page_t      m;
 2960         register pmap_paddr_t   pa;
 2961         uint64_t                i;
 2962         spl_t                   spl;
 2963         ppnum_t                 pn;
 2964         pml4_entry_t            *pml4p;
 2965 
 2966         if (kernel_pmap == map) panic("expand kernel pml4");
 2967 
 2968         spl = splhigh();
 2969         pml4p = pmap64_pml4(map, vaddr);
 2970         splx(spl);
 2971         if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p");
 2972 
 2973         /*
 2974          *      Allocate a VM page for the pml4 page
 2975          */
 2976         while ((m = vm_page_grab()) == VM_PAGE_NULL)
 2977                 VM_PAGE_WAIT();
 2978 
 2979         /*
 2980          *      put the page into the pmap's obj list so it
 2981          *      can be found later.
 2982          */
 2983         pn = m->phys_page;
 2984         pa = i386_ptob(pn);
 2985         i = pml4idx(map, vaddr);
 2986 
 2987         /*
 2988          *      Zero the page.
 2989          */
 2990         pmap_zero_page(pn);
 2991 
 2992         vm_page_lockspin_queues();
 2993         vm_page_wire(m);
 2994         vm_page_unlock_queues();
 2995 
 2996         OSAddAtomic(1,  &inuse_ptepages_count);
 2997 
 2998         /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 2999         vm_object_lock(map->pm_obj_pml4);
 3000 
 3001         PMAP_LOCK(map);
 3002         /*
 3003          *      See if someone else expanded us first
 3004          */
 3005         if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) {
 3006                 PMAP_UNLOCK(map);
 3007                 vm_object_unlock(map->pm_obj_pml4);
 3008 
 3009                 VM_PAGE_FREE(m);
 3010 
 3011                 OSAddAtomic(-1,  &inuse_ptepages_count);
 3012                 return;
 3013         }
 3014 
 3015 #if 0 /* DEBUG */
 3016        if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) {
 3017                panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
 3018                      map, map->pm_obj_pml4, vaddr, i);
 3019        }
 3020 #endif
 3021         vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i);
 3022         vm_object_unlock(map->pm_obj_pml4);
 3023 
 3024         /*
 3025          *      Set the page directory entry for this page table.
 3026          */
 3027         pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */
 3028 
 3029         pmap_store_pte(pml4p, pa_to_pte(pa)
 3030                                 | INTEL_PTE_VALID
 3031                                 | INTEL_PTE_USER
 3032                                 | INTEL_PTE_WRITE);
 3033 
 3034         PMAP_UNLOCK(map);
 3035 
 3036         return;
 3037 
 3038 }
 3039 
 3040 void
 3041 pmap_expand_pdpt(
 3042                  pmap_t map,
 3043                  vm_map_offset_t vaddr)
 3044 {
 3045         register vm_page_t      m;
 3046         register pmap_paddr_t   pa;
 3047         uint64_t                i;
 3048         spl_t                   spl;
 3049         ppnum_t                 pn;
 3050         pdpt_entry_t            *pdptp;
 3051 
 3052         if (kernel_pmap == map) panic("expand kernel pdpt");
 3053 
 3054         spl = splhigh();
 3055         while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) {
 3056                 splx(spl);
 3057                 pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */
 3058                 spl = splhigh();
 3059         }
 3060         splx(spl);
 3061 
 3062         /*
 3063          *      Allocate a VM page for the pdpt page
 3064          */
 3065         while ((m = vm_page_grab()) == VM_PAGE_NULL)
 3066                 VM_PAGE_WAIT();
 3067 
 3068         /*
 3069          *      put the page into the pmap's obj list so it
 3070          *      can be found later.
 3071          */
 3072         pn = m->phys_page;
 3073         pa = i386_ptob(pn);
 3074         i = pdptidx(map, vaddr);
 3075 
 3076         /*
 3077          *      Zero the page.
 3078          */
 3079         pmap_zero_page(pn);
 3080 
 3081         vm_page_lockspin_queues();
 3082         vm_page_wire(m);
 3083         vm_page_unlock_queues();
 3084 
 3085         OSAddAtomic(1,  &inuse_ptepages_count);
 3086 
 3087         /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 3088         vm_object_lock(map->pm_obj_pdpt);
 3089 
 3090         PMAP_LOCK(map);
 3091         /*
 3092          *      See if someone else expanded us first
 3093          */
 3094         if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) {
 3095                 PMAP_UNLOCK(map);
 3096                 vm_object_unlock(map->pm_obj_pdpt);
 3097 
 3098                 VM_PAGE_FREE(m);
 3099 
 3100                 OSAddAtomic(-1,  &inuse_ptepages_count);
 3101                 return;
 3102         }
 3103 
 3104 #if 0 /* DEBUG */
 3105        if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) {
 3106                panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n",
 3107                      map, map->pm_obj_pdpt, vaddr, i);
 3108        }
 3109 #endif
 3110         vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i);
 3111         vm_object_unlock(map->pm_obj_pdpt);
 3112 
 3113         /*
 3114          *      Set the page directory entry for this page table.
 3115          */
 3116         pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */
 3117 
 3118         pmap_store_pte(pdptp, pa_to_pte(pa)
 3119                                 | INTEL_PTE_VALID
 3120                                 | INTEL_PTE_USER
 3121                                 | INTEL_PTE_WRITE);
 3122 
 3123         PMAP_UNLOCK(map);
 3124 
 3125         return;
 3126 
 3127 }
 3128 
 3129 
 3130 
 3131 /*
 3132  *      Routine:        pmap_expand
 3133  *
 3134  *      Expands a pmap to be able to map the specified virtual address.
 3135  *
 3136  *      Allocates new virtual memory for the P0 or P1 portion of the
 3137  *      pmap, then re-maps the physical pages that were in the old
 3138  *      pmap to be in the new pmap.
 3139  *
 3140  *      Must be called with the pmap system and the pmap unlocked,
 3141  *      since these must be unlocked to use vm_allocate or vm_deallocate.
 3142  *      Thus it must be called in a loop that checks whether the map
 3143  *      has been expanded enough.
 3144  *      (We won't loop forever, since page tables aren't shrunk.)
 3145  */
 3146 void
 3147 pmap_expand(
 3148         pmap_t          map,
 3149         vm_map_offset_t vaddr)
 3150 {
 3151         pt_entry_t              *pdp;
 3152         register vm_page_t      m;
 3153         register pmap_paddr_t   pa;
 3154         uint64_t                 i;
 3155         spl_t                   spl;
 3156         ppnum_t                 pn;
 3157 
 3158         /*
 3159          * if not the kernel map (while we are still compat kernel mode)
 3160          * and we are 64 bit, propagate expand upwards
 3161          */
 3162 
 3163         if (cpu_64bit && (map != kernel_pmap)) {
 3164                 spl = splhigh();
 3165                 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) {
 3166                         splx(spl);
 3167                         pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */
 3168                         spl = splhigh();
 3169                 }
 3170                 splx(spl);
 3171         }
 3172 
 3173         /*
 3174          *      Allocate a VM page for the pde entries.
 3175          */
 3176         while ((m = vm_page_grab()) == VM_PAGE_NULL)
 3177                 VM_PAGE_WAIT();
 3178 
 3179         /*
 3180          *      put the page into the pmap's obj list so it
 3181          *      can be found later.
 3182          */
 3183         pn = m->phys_page;
 3184         pa = i386_ptob(pn);
 3185         i = pdeidx(map, vaddr);
 3186 
 3187         /*
 3188          *      Zero the page.
 3189          */
 3190         pmap_zero_page(pn);
 3191 
 3192         vm_page_lockspin_queues();
 3193         vm_page_wire(m);
 3194         vm_page_unlock_queues();
 3195 
 3196         OSAddAtomic(1,  &inuse_ptepages_count);
 3197 
 3198         /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */
 3199         vm_object_lock(map->pm_obj);
 3200 
 3201         PMAP_LOCK(map);
 3202         /*
 3203          *      See if someone else expanded us first
 3204          */
 3205 
 3206         if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) {
 3207                 PMAP_UNLOCK(map);
 3208                 vm_object_unlock(map->pm_obj);
 3209 
 3210                 VM_PAGE_FREE(m);
 3211 
 3212                 OSAddAtomic(-1,  &inuse_ptepages_count);
 3213                 return;
 3214         }
 3215 
 3216 #if 0 /* DEBUG */
 3217        if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) {
 3218                panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n",
 3219                      map, map->pm_obj, vaddr, i);
 3220        }
 3221 #endif
 3222         vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i);
 3223         vm_object_unlock(map->pm_obj);
 3224 
 3225         /*
 3226          * refetch while locked 
 3227          */
 3228 
 3229         pdp = pmap_pde(map, vaddr);
 3230 
 3231         /*
 3232          *      Set the page directory entry for this page table.
 3233          */
 3234         pmap_store_pte(pdp, pa_to_pte(pa)
 3235                                 | INTEL_PTE_VALID
 3236                                 | INTEL_PTE_USER
 3237                                 | INTEL_PTE_WRITE);
 3238 
 3239         PMAP_UNLOCK(map);
 3240 
 3241         return;
 3242 }
 3243 
 3244 
 3245 /*
 3246  * pmap_sync_page_data_phys(ppnum_t pa)
 3247  * 
 3248  * Invalidates all of the instruction cache on a physical page and
 3249  * pushes any dirty data from the data cache for the same physical page
 3250  * Not required in i386.
 3251  */
 3252 void
 3253 pmap_sync_page_data_phys(__unused ppnum_t pa)
 3254 {
 3255         return;
 3256 }
 3257 
 3258 /*
 3259  * pmap_sync_page_attributes_phys(ppnum_t pa)
 3260  * 
 3261  * Write back and invalidate all cachelines on a physical page.
 3262  */
 3263 void
 3264 pmap_sync_page_attributes_phys(ppnum_t pa)
 3265 {
 3266         cache_flush_page_phys(pa);
 3267 }
 3268 
 3269 
 3270 
 3271 #ifdef CURRENTLY_UNUSED_AND_UNTESTED
 3272 
 3273 int     collect_ref;
 3274 int     collect_unref;
 3275 
 3276 /*
 3277  *      Routine:        pmap_collect
 3278  *      Function:
 3279  *              Garbage collects the physical map system for
 3280  *              pages which are no longer used.
 3281  *              Success need not be guaranteed -- that is, there
 3282  *              may well be pages which are not referenced, but
 3283  *              others may be collected.
 3284  *      Usage:
 3285  *              Called by the pageout daemon when pages are scarce.
 3286  */
 3287 void
 3288 pmap_collect(
 3289         pmap_t          p)
 3290 {
 3291         register pt_entry_t     *pdp, *ptp;
 3292         pt_entry_t              *eptp;
 3293         int                     wired;
 3294 
 3295         if (p == PMAP_NULL)
 3296                 return;
 3297 
 3298         if (p == kernel_pmap)
 3299                 return;
 3300 
 3301         /*
 3302          *      Garbage collect map.
 3303          */
 3304         PMAP_LOCK(p);
 3305 
 3306         for (pdp = (pt_entry_t *)p->dirbase;
 3307              pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)];
 3308              pdp++)
 3309         {
 3310            if (*pdp & INTEL_PTE_VALID) {
 3311               if(*pdp & INTEL_PTE_REF) {
 3312                 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF);
 3313                 collect_ref++;
 3314               } else {
 3315                 collect_unref++;
 3316                 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase));
 3317                 eptp = ptp + NPTEPG;
 3318 
 3319                 /*
 3320                  * If the pte page has any wired mappings, we cannot
 3321                  * free it.
 3322                  */
 3323                 wired = 0;
 3324                 {
 3325                     register pt_entry_t *ptep;
 3326                     for (ptep = ptp; ptep < eptp; ptep++) {
 3327                         if (iswired(*ptep)) {
 3328                             wired = 1;
 3329                             break;
 3330                         }
 3331                     }
 3332                 }
 3333                 if (!wired) {
 3334                     /*
 3335                      * Remove the virtual addresses mapped by this pte page.
 3336                      */
 3337                     pmap_remove_range(p,
 3338                                 pdetova(pdp - (pt_entry_t *)p->dirbase),
 3339                                 ptp,
 3340                                 eptp);
 3341 
 3342                     /*
 3343                      * Invalidate the page directory pointer.
 3344                      */
 3345                     pmap_store_pte(pdp, 0x0);
 3346                  
 3347                     PMAP_UNLOCK(p);
 3348 
 3349                     /*
 3350                      * And free the pte page itself.
 3351                      */
 3352                     {
 3353                         register vm_page_t m;
 3354 
 3355                         vm_object_lock(p->pm_obj);
 3356 
 3357                         m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]));
 3358                         if (m == VM_PAGE_NULL)
 3359                             panic("pmap_collect: pte page not in object");
 3360 
 3361                         VM_PAGE_FREE(m);
 3362 
 3363                         OSAddAtomic(-1,  &inuse_ptepages_count);
 3364 
 3365                         vm_object_unlock(p->pm_obj);
 3366                     }
 3367 
 3368                     PMAP_LOCK(p);
 3369                 }
 3370               }
 3371            }
 3372         }
 3373 
 3374         PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL);
 3375         PMAP_UNLOCK(p);
 3376         return;
 3377 
 3378 }
 3379 #endif
 3380 
 3381 
 3382 void
 3383 pmap_copy_page(ppnum_t src, ppnum_t dst)
 3384 {
 3385   bcopy_phys((addr64_t)i386_ptob(src),
 3386              (addr64_t)i386_ptob(dst),
 3387              PAGE_SIZE);
 3388 }
 3389 
 3390 
 3391 /*
 3392  *      Routine:        pmap_pageable
 3393  *      Function:
 3394  *              Make the specified pages (by pmap, offset)
 3395  *              pageable (or not) as requested.
 3396  *
 3397  *              A page which is not pageable may not take
 3398  *              a fault; therefore, its page table entry
 3399  *              must remain valid for the duration.
 3400  *
 3401  *              This routine is merely advisory; pmap_enter
 3402  *              will specify that these pages are to be wired
 3403  *              down (or not) as appropriate.
 3404  */
 3405 void
 3406 pmap_pageable(
 3407         __unused pmap_t         pmap,
 3408         __unused vm_map_offset_t        start_addr,
 3409         __unused vm_map_offset_t        end_addr,
 3410         __unused boolean_t      pageable)
 3411 {
 3412 #ifdef  lint
 3413         pmap++; start_addr++; end_addr++; pageable++;
 3414 #endif  /* lint */
 3415 }
 3416 
 3417 /*
 3418  *      Clear specified attribute bits.
 3419  */
 3420 void
 3421 phys_attribute_clear(
 3422         ppnum_t         pn,
 3423         int             bits)
 3424 {
 3425         pv_rooted_entry_t               pv_h;
 3426         register pv_hashed_entry_t      pv_e;
 3427         register pt_entry_t     *pte;
 3428         int                     pai;
 3429         register pmap_t         pmap;
 3430 
 3431         pmap_intr_assert();
 3432         assert(pn != vm_page_fictitious_addr);
 3433         if (pn == vm_page_guard_addr)
 3434                 return;
 3435 
 3436         pai = ppn_to_pai(pn);
 3437 
 3438         if (!managed_page(pai)) {
 3439             /*
 3440              *  Not a managed page.
 3441              */
 3442             return;
 3443         }
 3444 
 3445 
 3446         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
 3447                    (int) pn, bits, 0, 0, 0);
 3448 
 3449         pv_h = pai_to_pvh(pai);
 3450 
 3451         LOCK_PVH(pai);
 3452 
 3453         /*
 3454          * Walk down PV list, clearing all modify or reference bits.
 3455          * We do not have to lock the pv_list because we have
 3456          * the entire pmap system locked.
 3457          */
 3458         if (pv_h->pmap != PMAP_NULL) {
 3459             /*
 3460              * There are some mappings.
 3461              */
 3462 
 3463           pv_e = (pv_hashed_entry_t)pv_h;
 3464 
 3465           do {
 3466                 pmap = pv_e->pmap;
 3467 
 3468                 {
 3469                     vm_map_offset_t va;
 3470 
 3471                     va = pv_e->va;
 3472 
 3473                     /*
 3474                      * Clear modify and/or reference bits.
 3475                      */
 3476 
 3477                     pte = pmap_pte(pmap, va);
 3478                     pmap_update_pte(pte, *pte, (*pte & ~bits));
 3479                     /* Ensure all processors using this translation
 3480                      * invalidate this TLB entry. The invalidation *must* follow
 3481                      * the PTE update, to ensure that the TLB shadow of the
 3482                      * 'D' bit (in particular) is synchronized with the
 3483                      * updated PTE.
 3484                      */
 3485                     PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
 3486                 }
 3487 
 3488                 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
 3489 
 3490           } while (pv_e != (pv_hashed_entry_t)pv_h);
 3491         }
 3492         pmap_phys_attributes[pai] &= ~bits;
 3493 
 3494         UNLOCK_PVH(pai);
 3495 
 3496         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
 3497                    0, 0, 0, 0, 0);
 3498 
 3499 }
 3500 
 3501 /*
 3502  *      Check specified attribute bits.
 3503  */
 3504 int
 3505 phys_attribute_test(
 3506         ppnum_t         pn,
 3507         int             bits)
 3508 {
 3509         pv_rooted_entry_t               pv_h;
 3510         register pv_hashed_entry_t      pv_e;
 3511         register pt_entry_t     *pte;
 3512         int                     pai;
 3513         register pmap_t         pmap;
 3514         int                     attributes = 0;
 3515 
 3516         pmap_intr_assert();
 3517         assert(pn != vm_page_fictitious_addr);
 3518         if (pn == vm_page_guard_addr)
 3519                 return 0;
 3520 
 3521         pai = ppn_to_pai(pn);
 3522 
 3523         if (!managed_page(pai)) {
 3524             /*
 3525              *  Not a managed page.
 3526              */
 3527             return (0);
 3528         }
 3529 
 3530         /*
 3531          * super fast check...  if bits already collected
 3532          * no need to take any locks...
 3533          * if not set, we need to recheck after taking
 3534          * the lock in case they got pulled in while
 3535          * we were waiting for the lock
 3536          */
 3537         if ( (pmap_phys_attributes[pai] & bits) == bits)
 3538             return (bits);
 3539 
 3540         pv_h = pai_to_pvh(pai);
 3541 
 3542         LOCK_PVH(pai);
 3543 
 3544         attributes = pmap_phys_attributes[pai] & bits;
 3545 
 3546 
 3547         /*
 3548          * Walk down PV list, checking the mappings until we
 3549          * reach the end or we've found the attributes we've asked for
 3550          * We do not have to lock the pv_list because we have
 3551          * the entire pmap system locked.
 3552          */
 3553         if (pv_h->pmap != PMAP_NULL) {
 3554             /*
 3555              * There are some mappings.
 3556              */
 3557           pv_e = (pv_hashed_entry_t)pv_h;
 3558           if (attributes != bits) do {
 3559 
 3560                 pmap = pv_e->pmap;
 3561 
 3562                 {
 3563                     vm_map_offset_t va;
 3564 
 3565                     va = pv_e->va;
 3566                     /*
 3567                      * first make sure any processor actively
 3568                      * using this pmap, flushes its TLB state
 3569                      */
 3570                     PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
 3571 
 3572                     /*
 3573                      * pick up modify and/or reference bits from this mapping
 3574                      */
 3575                     pte = pmap_pte(pmap, va);
 3576                     attributes |= (int)(*pte & bits);
 3577 
 3578                 }
 3579 
 3580                 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
 3581 
 3582             } while ((attributes != bits) && (pv_e != (pv_hashed_entry_t)pv_h));
 3583         }
 3584 
 3585         UNLOCK_PVH(pai);
 3586         return (attributes);
 3587 }
 3588 
 3589 /*
 3590  *      Set specified attribute bits.
 3591  */
 3592 void
 3593 phys_attribute_set(
 3594         ppnum_t         pn,
 3595         int             bits)
 3596 {
 3597         int             pai;
 3598 
 3599         pmap_intr_assert();
 3600         assert(pn != vm_page_fictitious_addr);
 3601         if (pn == vm_page_guard_addr)
 3602                 return;
 3603 
 3604         pai = ppn_to_pai(pn);
 3605 
 3606         if (!managed_page(pai)) {
 3607             /*
 3608              *  Not a managed page.
 3609              */
 3610             return;
 3611         }
 3612 
 3613         LOCK_PVH(pai);
 3614 
 3615         pmap_phys_attributes[pai] |= bits;
 3616 
 3617         UNLOCK_PVH(pai);
 3618 }
 3619 
 3620 /*
 3621  *      Set the modify bit on the specified physical page.
 3622  */
 3623 
 3624 void pmap_set_modify(
 3625                      ppnum_t pn)
 3626 {
 3627         phys_attribute_set(pn, PHYS_MODIFIED);
 3628 }
 3629 
 3630 /*
 3631  *      Clear the modify bits on the specified physical page.
 3632  */
 3633 
 3634 void
 3635 pmap_clear_modify(
 3636                   ppnum_t pn)
 3637 {
 3638         phys_attribute_clear(pn, PHYS_MODIFIED);
 3639 }
 3640 
 3641 /*
 3642  *      pmap_is_modified:
 3643  *
 3644  *      Return whether or not the specified physical page is modified
 3645  *      by any physical maps.
 3646  */
 3647 
 3648 boolean_t
 3649 pmap_is_modified(
 3650                  ppnum_t pn)
 3651 {
 3652         if (phys_attribute_test(pn, PHYS_MODIFIED))
 3653                 return TRUE;
 3654 
 3655         return FALSE;
 3656 }
 3657 
 3658 /*
 3659  *      pmap_clear_reference:
 3660  *
 3661  *      Clear the reference bit on the specified physical page.
 3662  */
 3663 
 3664 void
 3665 pmap_clear_reference(
 3666                      ppnum_t pn)
 3667 {
 3668         phys_attribute_clear(pn, PHYS_REFERENCED);
 3669 }
 3670 
 3671 void
 3672 pmap_set_reference(ppnum_t pn)
 3673 {
 3674         phys_attribute_set(pn, PHYS_REFERENCED);
 3675 }
 3676 
 3677 /*
 3678  *      pmap_is_referenced:
 3679  *
 3680  *      Return whether or not the specified physical page is referenced
 3681  *      by any physical maps.
 3682  */
 3683 
 3684 boolean_t
 3685 pmap_is_referenced(
 3686                    ppnum_t pn)
 3687 {
 3688         if (phys_attribute_test(pn, PHYS_REFERENCED))
 3689                 return TRUE;
 3690 
 3691         return FALSE;
 3692 }
 3693 
 3694 /*
 3695  * pmap_get_refmod(phys)
 3696  *  returns the referenced and modified bits of the specified
 3697  *  physical page.
 3698  */
 3699 unsigned int
 3700 pmap_get_refmod(ppnum_t pa)
 3701 {
 3702         int     refmod;
 3703         unsigned int retval = 0;
 3704 
 3705         refmod = phys_attribute_test(pa, PHYS_MODIFIED | PHYS_REFERENCED);
 3706 
 3707         if (refmod & PHYS_MODIFIED)
 3708                 retval |= VM_MEM_MODIFIED;
 3709         if (refmod & PHYS_REFERENCED)
 3710                 retval |= VM_MEM_REFERENCED;
 3711 
 3712         return (retval);
 3713 }
 3714 
 3715 /*
 3716  * pmap_clear_refmod(phys, mask)
 3717  *  clears the referenced and modified bits as specified by the mask
 3718  *  of the specified physical page.
 3719  */
 3720 void
 3721 pmap_clear_refmod(ppnum_t pa, unsigned int mask)
 3722 {
 3723         unsigned int  x86Mask;
 3724 
 3725         x86Mask = (   ((mask &   VM_MEM_MODIFIED)?   PHYS_MODIFIED : 0)
 3726                     | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
 3727         phys_attribute_clear(pa, x86Mask);
 3728 }
 3729 
 3730 void 
 3731 invalidate_icache(__unused vm_offset_t  addr,
 3732                   __unused unsigned     cnt,
 3733                   __unused int          phys)
 3734 {
 3735         return;
 3736 }
 3737 void 
 3738 flush_dcache(__unused vm_offset_t       addr,
 3739              __unused unsigned          count,
 3740              __unused int               phys)
 3741 {
 3742         return;
 3743 }
 3744 
 3745 #if CONFIG_DTRACE
 3746 /*
 3747  * Constrain DTrace copyin/copyout actions
 3748  */
 3749 extern kern_return_t dtrace_copyio_preflight(addr64_t);
 3750 extern kern_return_t dtrace_copyio_postflight(addr64_t);
 3751 
 3752 kern_return_t dtrace_copyio_preflight(__unused addr64_t va)
 3753 {
 3754         thread_t thread = current_thread();
 3755 
 3756         if (current_map() == kernel_map)
 3757                 return KERN_FAILURE;
 3758         else if (thread->machine.specFlags & CopyIOActive)
 3759                 return KERN_FAILURE;
 3760         else
 3761                 return KERN_SUCCESS;
 3762 }
 3763  
 3764 kern_return_t dtrace_copyio_postflight(__unused addr64_t va)
 3765 {
 3766         return KERN_SUCCESS;
 3767 }
 3768 #endif /* CONFIG_DTRACE */
 3769 
 3770 #if     MACH_KDB
 3771 
 3772 /* show phys page mappings and attributes */
 3773 
 3774 extern void     db_show_page(pmap_paddr_t pa);
 3775 
 3776 #if 0
 3777 void
 3778 db_show_page(pmap_paddr_t pa)
 3779 {
 3780         pv_entry_t      pv_h;
 3781         int             pai;
 3782         char            attr;
 3783         
 3784         pai = pa_index(pa);
 3785         pv_h = pai_to_pvh(pai);
 3786 
 3787         attr = pmap_phys_attributes[pai];
 3788         printf("phys page %llx ", pa);
 3789         if (attr & PHYS_MODIFIED)
 3790                 printf("modified, ");
 3791         if (attr & PHYS_REFERENCED)
 3792                 printf("referenced, ");
 3793         if (pv_h->pmap || pv_h->next)
 3794                 printf(" mapped at\n");
 3795         else
 3796                 printf(" not mapped\n");
 3797         for (; pv_h; pv_h = pv_h->next)
 3798                 if (pv_h->pmap)
 3799                         printf("%llx in pmap %p\n", pv_h->va, pv_h->pmap);
 3800 }
 3801 #endif
 3802 
 3803 #endif /* MACH_KDB */
 3804 
 3805 #if     MACH_KDB
 3806 #if 0
 3807 void db_kvtophys(vm_offset_t);
 3808 void db_show_vaddrs(pt_entry_t  *);
 3809 
 3810 /*
 3811  *      print out the results of kvtophys(arg)
 3812  */
 3813 void
 3814 db_kvtophys(
 3815         vm_offset_t     vaddr)
 3816 {
 3817         db_printf("0x%qx", kvtophys(vaddr));
 3818 }
 3819 
 3820 /*
 3821  *      Walk the pages tables.
 3822  */
 3823 void
 3824 db_show_vaddrs(
 3825         pt_entry_t      *dirbase)
 3826 {
 3827         pt_entry_t      *ptep, *pdep, tmp;
 3828         unsigned int    x, y, pdecnt, ptecnt;
 3829 
 3830         if (dirbase == 0) {
 3831                 dirbase = kernel_pmap->dirbase;
 3832         }
 3833         if (dirbase == 0) {
 3834                 db_printf("need a dirbase...\n");
 3835                 return;
 3836         }
 3837         dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK);
 3838 
 3839         db_printf("dirbase: 0x%x\n", dirbase);
 3840 
 3841         pdecnt = ptecnt = 0;
 3842         pdep = &dirbase[0];
 3843         for (y = 0; y < NPDEPG; y++, pdep++) {
 3844                 if (((tmp = *pdep) & INTEL_PTE_VALID) == 0) {
 3845                         continue;
 3846                 }
 3847                 pdecnt++;
 3848                 ptep = (pt_entry_t *) ((unsigned long)(*pdep) & ~INTEL_OFFMASK);
 3849                 db_printf("dir[%4d]: 0x%x\n", y, *pdep);
 3850                 for (x = 0; x < NPTEPG; x++, ptep++) {
 3851                         if (((tmp = *ptep) & INTEL_PTE_VALID) == 0) {
 3852                                 continue;
 3853                         }
 3854                         ptecnt++;
 3855                         db_printf("   tab[%4d]: 0x%x, va=0x%x, pa=0x%x\n",
 3856                                 x,
 3857                                 *ptep,
 3858                                 (y << 22) | (x << 12),
 3859                                 *ptep & ~INTEL_OFFMASK);
 3860                 }
 3861         }
 3862 
 3863         db_printf("total: %d tables, %d page table entries.\n", pdecnt, ptecnt);
 3864 
 3865 }
 3866 #endif
 3867 #endif  /* MACH_KDB */
 3868 
 3869 #include <mach_vm_debug.h>
 3870 #if     MACH_VM_DEBUG
 3871 #include <vm/vm_debug.h>
 3872 
 3873 int
 3874 pmap_list_resident_pages(
 3875         __unused pmap_t         pmap,
 3876         __unused vm_offset_t    *listp,
 3877         __unused int            space)
 3878 {
 3879         return 0;
 3880 }
 3881 #endif  /* MACH_VM_DEBUG */
 3882 
 3883 
 3884 
 3885 /* temporary workaround */
 3886 boolean_t
 3887 coredumpok(__unused vm_map_t map, __unused vm_offset_t va)
 3888 {
 3889 #if 0
 3890         pt_entry_t     *ptep;
 3891 
 3892         ptep = pmap_pte(map->pmap, va);
 3893         if (0 == ptep)
 3894                 return FALSE;
 3895         return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED));
 3896 #else
 3897         return TRUE;
 3898 #endif
 3899 }
 3900 
 3901 
 3902 boolean_t
 3903 phys_page_exists(
 3904                  ppnum_t pn)
 3905 {
 3906         assert(pn != vm_page_fictitious_addr);
 3907 
 3908         if (!pmap_initialized)
 3909                 return (TRUE);
 3910 
 3911         if (pn == vm_page_guard_addr)
 3912                 return FALSE;
 3913 
 3914         if (!managed_page(ppn_to_pai(pn)))
 3915                 return (FALSE);
 3916 
 3917         return TRUE;
 3918 }
 3919 
 3920 void
 3921 mapping_free_prime(void)
 3922 {
 3923         int             i;
 3924         pv_hashed_entry_t      pvh_e;
 3925         pv_hashed_entry_t      pvh_eh;
 3926         pv_hashed_entry_t      pvh_et;
 3927         int             pv_cnt;
 3928 
 3929         pv_cnt = 0;
 3930         pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
 3931         for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
 3932                 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 3933 
 3934                 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 3935                 pvh_eh = pvh_e;
 3936 
 3937                 if (pvh_et == PV_HASHED_ENTRY_NULL)
 3938                         pvh_et = pvh_e;
 3939                 pv_cnt++;
 3940         }
 3941         PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
 3942 
 3943         pv_cnt = 0;
 3944         pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
 3945         for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
 3946                 pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 3947 
 3948                 pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 3949                 pvh_eh = pvh_e;
 3950 
 3951                 if (pvh_et == PV_HASHED_ENTRY_NULL)
 3952                         pvh_et = pvh_e;
 3953                 pv_cnt++;
 3954         }
 3955         PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
 3956 
 3957 }
 3958 
 3959 void
 3960 mapping_adjust(void)
 3961 {
 3962         pv_hashed_entry_t      pvh_e;
 3963         pv_hashed_entry_t      pvh_eh;
 3964         pv_hashed_entry_t      pvh_et;
 3965         int             pv_cnt;
 3966         int             i;
 3967 
 3968         if (mapping_adjust_call == NULL) {
 3969                 thread_call_setup(&mapping_adjust_call_data,
 3970                                   (thread_call_func_t) mapping_adjust,
 3971                                   (thread_call_param_t) NULL);
 3972                 mapping_adjust_call = &mapping_adjust_call_data;
 3973         }
 3974 
 3975         pv_cnt = 0;
 3976         pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
 3977         if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
 3978                 for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
 3979                         pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 3980 
 3981                         pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 3982                         pvh_eh = pvh_e;
 3983 
 3984                         if (pvh_et == PV_HASHED_ENTRY_NULL)
 3985                                 pvh_et = pvh_e;
 3986                         pv_cnt++;
 3987                 }
 3988                 PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
 3989         }
 3990 
 3991         pv_cnt = 0;
 3992         pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
 3993         if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
 3994                 for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
 3995                         pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 3996 
 3997                         pvh_e->qlink.next = (queue_entry_t)pvh_eh;
 3998                         pvh_eh = pvh_e;
 3999 
 4000                         if (pvh_et == PV_HASHED_ENTRY_NULL)
 4001                                 pvh_et = pvh_e;
 4002                         pv_cnt++;
 4003                 }
 4004                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
 4005         }
 4006         mappingrecurse = 0;
 4007 }
 4008 
 4009 void
 4010 pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt)
 4011 {
 4012         int i;
 4013         pt_entry_t *opte, *npte;
 4014         pt_entry_t pte;
 4015         spl_t s;
 4016 
 4017         for (i = 0; i < cnt; i++) {
 4018                 s = splhigh();
 4019                 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage);
 4020                 if (0 == opte)
 4021                         panic("kernel_commpage");
 4022                 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL;
 4023                 pte &= ~INTEL_PTE_WRITE; // ensure read only
 4024                 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage);
 4025                 if (0 == npte)
 4026                         panic("user_commpage");
 4027                 pmap_store_pte(npte, pte);
 4028                 splx(s);
 4029                 kernel_commpage += INTEL_PGBYTES;
 4030                 user_commpage += INTEL_PGBYTES;
 4031         }
 4032 }
 4033 
 4034 
 4035 #define PMAP_COMMPAGE64_CNT  (_COMM_PAGE64_AREA_USED/PAGE_SIZE)
 4036 pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT];
 4037 
 4038 void
 4039 pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt)
 4040 {
 4041     int i;
 4042     pt_entry_t *kptep;
 4043 
 4044     PMAP_LOCK(kernel_pmap);
 4045 
 4046     for (i = 0; i < cnt; i++) {
 4047         kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE));
 4048         if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID)))
 4049             panic("pmap_commpage64_init pte");
 4050         pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER);
 4051     }
 4052     PMAP_UNLOCK(kernel_pmap);
 4053 }
 4054 
 4055 
 4056 static cpu_pmap_t               cpu_pmap_master;
 4057 
 4058 struct cpu_pmap *
 4059 pmap_cpu_alloc(boolean_t is_boot_cpu)
 4060 {
 4061         int                     ret;
 4062         int                     i;
 4063         cpu_pmap_t              *cp;
 4064         vm_offset_t             address;
 4065         vm_map_address_t        mapaddr;
 4066         vm_map_entry_t          entry;
 4067         pt_entry_t              *pte;
 4068         
 4069         if (is_boot_cpu) {
 4070                 cp = &cpu_pmap_master;
 4071         } else {
 4072                 /*
 4073                  * The per-cpu pmap data structure itself.
 4074                  */
 4075                 ret = kmem_alloc(kernel_map,
 4076                                  (vm_offset_t *) &cp, sizeof(cpu_pmap_t));
 4077                 if (ret != KERN_SUCCESS) {
 4078                         printf("pmap_cpu_alloc() failed ret=%d\n", ret);
 4079                         return NULL;
 4080                 }
 4081                 bzero((void *)cp, sizeof(cpu_pmap_t));
 4082 
 4083                 /*
 4084                  * The temporary windows used for copy/zero - see loose_ends.c
 4085                  */
 4086                 ret = vm_map_find_space(kernel_map,
 4087                     &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry);
 4088                 if (ret != KERN_SUCCESS) {
 4089                         printf("pmap_cpu_alloc() "
 4090                                 "vm_map_find_space ret=%d\n", ret);
 4091                         pmap_cpu_free(cp);
 4092                         return NULL;
 4093                 }
 4094                 address = (vm_offset_t)mapaddr;
 4095 
 4096                 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) {
 4097                   spl_t s;
 4098                         s = splhigh();
 4099                         while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0)
 4100                                 pmap_expand(kernel_pmap, (vm_map_offset_t)address);
 4101                         * (int *) pte = 0; 
 4102                         cp->mapwindow[i].prv_CADDR = (caddr_t) address;
 4103                         cp->mapwindow[i].prv_CMAP = pte;
 4104                         splx(s);
 4105                 }
 4106                 vm_map_unlock(kernel_map);
 4107         }
 4108 
 4109         cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW;
 4110         cp->pde_window_index = PMAP_PDE_FIRST_WINDOW;
 4111         cp->pte_window_index = PMAP_PTE_FIRST_WINDOW;
 4112 
 4113         return cp;
 4114 }
 4115 
 4116 void
 4117 pmap_cpu_free(struct cpu_pmap *cp)
 4118 {
 4119         if (cp != NULL && cp != &cpu_pmap_master) {
 4120                 kfree((void *) cp, sizeof(cpu_pmap_t));
 4121         }
 4122 }
 4123 
 4124 
 4125 mapwindow_t *
 4126 pmap_get_mapwindow(pt_entry_t pentry)
 4127 {
 4128     mapwindow_t *mp;
 4129     int i;
 4130 
 4131     assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 4132 
 4133     /*
 4134      * Note: 0th map reserved for pmap_pte()
 4135      */
 4136     for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) {
 4137             mp = &current_cpu_datap()->cpu_pmap->mapwindow[i];
 4138 
 4139             if (*mp->prv_CMAP == 0) {
 4140                     pmap_store_pte(mp->prv_CMAP, pentry);
 4141 
 4142                     invlpg((uintptr_t)mp->prv_CADDR);
 4143 
 4144                     return (mp);
 4145             }
 4146     }
 4147     panic("pmap_get_mapwindow: no windows available");
 4148 
 4149     return NULL;
 4150 }
 4151 
 4152 
 4153 void
 4154 pmap_put_mapwindow(mapwindow_t *mp)
 4155 {
 4156     pmap_store_pte(mp->prv_CMAP, 0);
 4157 }
 4158 
 4159 void
 4160 pmap_switch(pmap_t tpmap)
 4161 {
 4162         spl_t   s;
 4163 
 4164         s = splhigh();          /* Make sure interruptions are disabled */
 4165 
 4166         set_dirbase(tpmap, current_thread());
 4167 
 4168         splx(s);
 4169 }
 4170 
 4171 
 4172 /*
 4173  * disable no-execute capability on
 4174  * the specified pmap
 4175  */
 4176 void pmap_disable_NX(pmap_t pmap) {
 4177   
 4178         pmap->nx_enabled = 0;
 4179 }
 4180 
 4181 void
 4182 pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size,
 4183                   vm_size_t *alloc_size, int *collectable, int *exhaustable)
 4184 {
 4185         *count      = inuse_ptepages_count;
 4186         *cur_size   = PAGE_SIZE * inuse_ptepages_count;
 4187         *max_size   = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
 4188         *elem_size  = PAGE_SIZE;
 4189         *alloc_size = PAGE_SIZE;
 4190 
 4191         *collectable = 1;
 4192         *exhaustable = 0;
 4193 }
 4194 
 4195 vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e)
 4196 {
 4197   enum high_fixed_addresses a;
 4198   a = e + HIGH_CPU_END * cpu;
 4199   return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
 4200 }
 4201 
 4202 vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e)
 4203 {
 4204   return pmap_cpu_high_map_vaddr(cpu_number(), e);
 4205 }
 4206 
 4207 vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
 4208 {
 4209   enum high_fixed_addresses a;
 4210   vm_offset_t vaddr;
 4211 
 4212   a = e + HIGH_CPU_END * cpu_number();
 4213   vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a);
 4214   pmap_store_pte(pte_unique_base + a, pte);
 4215 
 4216   /* TLB flush for this page for this  cpu */
 4217   invlpg((uintptr_t)vaddr);
 4218 
 4219   return  vaddr;
 4220 }
 4221 
 4222 static inline void
 4223 pmap_cpuset_NMIPI(cpu_set cpu_mask) {
 4224         unsigned int cpu, cpu_bit;
 4225         uint64_t deadline;
 4226 
 4227         for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
 4228                 if (cpu_mask & cpu_bit)
 4229                         cpu_NMI_interrupt(cpu);
 4230         }
 4231         deadline = mach_absolute_time() + (LockTimeOut);
 4232         while (mach_absolute_time() < deadline)
 4233                 cpu_pause();
 4234 }
 4235 
 4236 /*
 4237  * Called with pmap locked, we:
 4238  *  - scan through per-cpu data to see which other cpus need to flush
 4239  *  - send an IPI to each non-idle cpu to be flushed
 4240  *  - wait for all to signal back that they are inactive or we see that
 4241  *    they are in an interrupt handler or at a safe point
 4242  *  - flush the local tlb is active for this pmap
 4243  *  - return ... the caller will unlock the pmap
 4244  */
 4245 void
 4246 pmap_flush_tlbs(pmap_t  pmap)
 4247 {
 4248         unsigned int    cpu;
 4249         unsigned int    cpu_bit;
 4250         cpu_set         cpus_to_signal;
 4251         unsigned int    my_cpu = cpu_number();
 4252         pmap_paddr_t    pmap_cr3 = pmap->pm_cr3;
 4253         boolean_t       flush_self = FALSE;
 4254         uint64_t        deadline;
 4255 
 4256         assert((processor_avail_count < 2) ||
 4257                (ml_get_interrupts_enabled() && get_preemption_level() != 0));
 4258 
 4259         /*
 4260          * Scan other cpus for matching active or task CR3.
 4261          * For idle cpus (with no active map) we mark them invalid but
 4262          * don't signal -- they'll check as they go busy.
 4263          * Note: for the kernel pmap we look for 64-bit shared address maps.
 4264          */
 4265         cpus_to_signal = 0;
 4266         for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
 4267                 if (!cpu_datap(cpu)->cpu_running)
 4268                         continue;
 4269                 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) ||
 4270                     (CPU_GET_ACTIVE_CR3(cpu)      == pmap_cr3) ||
 4271                     (pmap->pm_shared) ||
 4272                     ((pmap == kernel_pmap) &&
 4273                      (!CPU_CR3_IS_ACTIVE(cpu) ||
 4274                       cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) {
 4275                         if (cpu == my_cpu) {
 4276                                 flush_self = TRUE;
 4277                                 continue;
 4278                         }
 4279                         cpu_datap(cpu)->cpu_tlb_invalid = TRUE;
 4280                         __asm__ volatile("mfence");
 4281 
 4282                         if (CPU_CR3_IS_ACTIVE(cpu)) {
 4283                                 cpus_to_signal |= cpu_bit;
 4284                                 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC);
 4285                         }
 4286                 }
 4287         }
 4288 
 4289         PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START,
 4290                    (int) pmap, cpus_to_signal, flush_self, 0, 0);
 4291 
 4292         if (cpus_to_signal) {
 4293                 cpu_set cpus_to_respond = cpus_to_signal;
 4294 
 4295                 deadline = mach_absolute_time() + LockTimeOut;
 4296                 /*
 4297                  * Wait for those other cpus to acknowledge
 4298                  */
 4299                 while (cpus_to_respond != 0) {
 4300                         if (mach_absolute_time() > deadline) {
 4301                                 if (mp_recent_debugger_activity())
 4302                                         continue;
 4303                                 if (!panic_active()) {
 4304                                         pmap_tlb_flush_timeout = TRUE;
 4305                                         pmap_cpuset_NMIPI(cpus_to_respond);
 4306                                 }
 4307                                 panic("pmap_flush_tlbs() timeout: "
 4308                                     "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
 4309                                     pmap, cpus_to_respond);
 4310                         }
 4311 
 4312                         for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
 4313                                 if ((cpus_to_respond & cpu_bit) != 0) {
 4314                                         if (!cpu_datap(cpu)->cpu_running ||
 4315                                             cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
 4316                                             !CPU_CR3_IS_ACTIVE(cpu)) {
 4317                                                 cpus_to_respond &= ~cpu_bit;
 4318                                         }
 4319                                         cpu_pause();
 4320                                 }
 4321                                 if (cpus_to_respond == 0)
 4322                                         break;
 4323                         }
 4324                 }
 4325         }
 4326         /*
 4327          * Flush local tlb if required.
 4328          * We need this flush even if the pmap being changed
 4329          * is the user map... in case we do a copyin/out
 4330          * before returning to user mode.
 4331          */
 4332         if (flush_self)
 4333                 flush_tlb();
 4334 
 4335         if ((pmap == kernel_pmap) && (flush_self != TRUE)) {
 4336                 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map);
 4337         }
 4338 
 4339         PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
 4340                    (int) pmap, cpus_to_signal, flush_self, 0, 0);
 4341 }
 4342 
 4343 void
 4344 process_pmap_updates(void)
 4345 {
 4346         assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 4347 
 4348         flush_tlb();
 4349 
 4350         current_cpu_datap()->cpu_tlb_invalid = FALSE;
 4351         __asm__ volatile("mfence");
 4352 }
 4353 
 4354 void
 4355 pmap_update_interrupt(void)
 4356 {
 4357         PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START,
 4358                    0, 0, 0, 0, 0);
 4359 
 4360         process_pmap_updates();
 4361 
 4362         PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END,
 4363                    0, 0, 0, 0, 0);
 4364 }
 4365 
 4366 
 4367 unsigned int pmap_cache_attributes(ppnum_t pn) {
 4368 
 4369         if (!managed_page(ppn_to_pai(pn)))
 4370                 return (VM_WIMG_IO);
 4371 
 4372         return (VM_WIMG_COPYBACK);
 4373 }
 4374 
 4375 #ifdef PMAP_DEBUG
 4376 void
 4377 pmap_dump(pmap_t p)
 4378 {
 4379   int i;
 4380 
 4381   kprintf("pmap 0x%x\n",p);
 4382 
 4383   kprintf("  pm_cr3 0x%llx\n",p->pm_cr3);
 4384   kprintf("  pm_pml4 0x%x\n",p->pm_pml4);
 4385   kprintf("  pm_pdpt 0x%x\n",p->pm_pdpt);
 4386 
 4387   kprintf("    pml4[0] 0x%llx\n",*p->pm_pml4);
 4388   for (i=0;i<8;i++)
 4389     kprintf("    pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]);
 4390 }
 4391 
 4392 void pmap_dump_wrap(void)
 4393 {
 4394   pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap);
 4395 }
 4396 
 4397 void
 4398 dump_4GB_pdpt(pmap_t p)
 4399 {
 4400         int             spl;
 4401         pdpt_entry_t    *user_pdptp;
 4402         pdpt_entry_t    *kern_pdptp;
 4403         pdpt_entry_t    *pml4p;
 4404 
 4405         spl = splhigh();
 4406         while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) {
 4407                 splx(spl);
 4408                 pmap_expand_pml4(p, 0x0);
 4409                 spl = splhigh();
 4410         }
 4411         kern_pdptp = kernel_pmap->pm_pdpt;
 4412         if (kern_pdptp == NULL)
 4413                 panic("kern_pdptp == NULL");
 4414         kprintf("dump_4GB_pdpt(%p)\n"
 4415                 "kern_pdptp=%p (phys=0x%016llx)\n"
 4416                 "\t 0x%08x: 0x%016llx\n"
 4417                 "\t 0x%08x: 0x%016llx\n"
 4418                 "\t 0x%08x: 0x%016llx\n"
 4419                 "\t 0x%08x: 0x%016llx\n"
 4420                 "\t 0x%08x: 0x%016llx\n"
 4421                 "user_pdptp=%p (phys=0x%016llx)\n"
 4422                 "\t 0x%08x: 0x%016llx\n"
 4423                 "\t 0x%08x: 0x%016llx\n"
 4424                 "\t 0x%08x: 0x%016llx\n"
 4425                 "\t 0x%08x: 0x%016llx\n"
 4426                 "\t 0x%08x: 0x%016llx\n",
 4427                 p, kern_pdptp, kvtophys(kern_pdptp),
 4428                 kern_pdptp+0, *(kern_pdptp+0),
 4429                 kern_pdptp+1, *(kern_pdptp+1),
 4430                 kern_pdptp+2, *(kern_pdptp+2),
 4431                 kern_pdptp+3, *(kern_pdptp+3),
 4432                 kern_pdptp+4, *(kern_pdptp+4),
 4433                 user_pdptp, kvtophys(user_pdptp),
 4434                 user_pdptp+0, *(user_pdptp+0),
 4435                 user_pdptp+1, *(user_pdptp+1),
 4436                 user_pdptp+2, *(user_pdptp+2),
 4437                 user_pdptp+3, *(user_pdptp+3),
 4438                 user_pdptp+4, *(user_pdptp+4));
 4439         kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
 4440                 p->pm_cr3, p->pm_hold, p->pm_pml4);
 4441         pml4p = (pdpt_entry_t *)p->pm_hold;
 4442         if (pml4p == NULL)
 4443                 panic("user pml4p == NULL");
 4444         kprintf("\t 0x%08x: 0x%016llx\n"
 4445                 "\t 0x%08x: 0x%016llx\n",
 4446                 pml4p+0, *(pml4p),
 4447                 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX));
 4448         kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n",
 4449                 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4);
 4450         pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold;
 4451         if (pml4p == NULL)
 4452                 panic("kern pml4p == NULL");
 4453         kprintf("\t 0x%08x: 0x%016llx\n"
 4454                 "\t 0x%08x: 0x%016llx\n",
 4455                 pml4p+0, *(pml4p),
 4456                 pml4p+511, *(pml4p+511));
 4457         splx(spl);
 4458 }
 4459 
 4460 void dump_4GB_pdpt_thread(thread_t tp)
 4461 {
 4462         dump_4GB_pdpt(tp->map->pmap);
 4463 }
 4464 
 4465 
 4466 #endif
 4467 

Cache object: eb9649fd480bcae4c795fab761e9a612


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.