pmap-v6.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */
    2 /*-
    3  * Copyright 2011 Semihalf
    4  * Copyright 2004 Olivier Houchard.
    5  * Copyright 2003 Wasabi Systems, Inc.
    6  * All rights reserved.
    7  *
    8  * Written by Steve C. Woodford for Wasabi Systems, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed for the NetBSD Project by
   21  *      Wasabi Systems, Inc.
   22  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   23  *    or promote products derived from this software without specific prior
   24  *    written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36  * POSSIBILITY OF SUCH DAMAGE.
   37  *
   38  * From: FreeBSD: src/sys/arm/arm/pmap.c,v 1.113 2009/07/24 13:50:29
   39  */
   40 
   41 /*-
   42  * Copyright (c) 2002-2003 Wasabi Systems, Inc.
   43  * Copyright (c) 2001 Richard Earnshaw
   44  * Copyright (c) 2001-2002 Christopher Gilbert
   45  * All rights reserved.
   46  *
   47  * 1. Redistributions of source code must retain the above copyright
   48  *    notice, this list of conditions and the following disclaimer.
   49  * 2. Redistributions in binary form must reproduce the above copyright
   50  *    notice, this list of conditions and the following disclaimer in the
   51  *    documentation and/or other materials provided with the distribution.
   52  * 3. The name of the company nor the name of the author may be used to
   53  *    endorse or promote products derived from this software without specific
   54  *    prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
   57  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
   58  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   59  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
   60  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   61  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   62  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   66  * SUCH DAMAGE.
   67  */
   68 /*-
   69  * Copyright (c) 1999 The NetBSD Foundation, Inc.
   70  * All rights reserved.
   71  *
   72  * This code is derived from software contributed to The NetBSD Foundation
   73  * by Charles M. Hannum.
   74  *
   75  * Redistribution and use in source and binary forms, with or without
   76  * modification, are permitted provided that the following conditions
   77  * are met:
   78  * 1. Redistributions of source code must retain the above copyright
   79  *    notice, this list of conditions and the following disclaimer.
   80  * 2. Redistributions in binary form must reproduce the above copyright
   81  *    notice, this list of conditions and the following disclaimer in the
   82  *    documentation and/or other materials provided with the distribution.
   83  *
   84  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   85  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   86  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   87  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   88  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   89  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   90  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   91  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   92  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   93  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   94  * POSSIBILITY OF SUCH DAMAGE.
   95  */
   96 
   97 /*-
   98  * Copyright (c) 1994-1998 Mark Brinicombe.
   99  * Copyright (c) 1994 Brini.
  100  * All rights reserved.
  101  *
  102  * This code is derived from software written for Brini by Mark Brinicombe
  103  *
  104  * Redistribution and use in source and binary forms, with or without
  105  * modification, are permitted provided that the following conditions
  106  * are met:
  107  * 1. Redistributions of source code must retain the above copyright
  108  *    notice, this list of conditions and the following disclaimer.
  109  * 2. Redistributions in binary form must reproduce the above copyright
  110  *    notice, this list of conditions and the following disclaimer in the
  111  *    documentation and/or other materials provided with the distribution.
  112  * 3. All advertising materials mentioning features or use of this software
  113  *    must display the following acknowledgement:
  114  *      This product includes software developed by Mark Brinicombe.
  115  * 4. The name of the author may not be used to endorse or promote products
  116  *    derived from this software without specific prior written permission.
  117  *
  118  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  119  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  120  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  121  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  122  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  123  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  124  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  125  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  126  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  127  *
  128  * RiscBSD kernel project
  129  *
  130  * pmap.c
  131  *
  132  * Machine dependant vm stuff
  133  *
  134  * Created      : 20/09/94
  135  */
  136 
  137 /*
  138  * Special compilation symbols
  139  * PMAP_DEBUG           - Build in pmap_debug_level code
  140  */
  141 /* Include header files */
  142 
  143 #include "opt_vm.h"
  144 #include "opt_pmap.h"
  145 
  146 #include <sys/cdefs.h>
  147 __FBSDID("$FreeBSD: releng/10.0/sys/arm/arm/pmap-v6.c 255724 2013-09-20 04:30:18Z alc $");
  148 #include <sys/param.h>
  149 #include <sys/systm.h>
  150 #include <sys/kernel.h>
  151 #include <sys/ktr.h>
  152 #include <sys/lock.h>
  153 #include <sys/proc.h>
  154 #include <sys/malloc.h>
  155 #include <sys/msgbuf.h>
  156 #include <sys/mutex.h>
  157 #include <sys/vmmeter.h>
  158 #include <sys/mman.h>
  159 #include <sys/rwlock.h>
  160 #include <sys/smp.h>
  161 #include <sys/sched.h>
  162 #include <sys/sysctl.h>
  163 
  164 #include <vm/vm.h>
  165 #include <vm/vm_param.h>
  166 #include <vm/uma.h>
  167 #include <vm/pmap.h>
  168 #include <vm/vm_kern.h>
  169 #include <vm/vm_object.h>
  170 #include <vm/vm_map.h>
  171 #include <vm/vm_page.h>
  172 #include <vm/vm_pageout.h>
  173 #include <vm/vm_extern.h>
  174 #include <vm/vm_reserv.h>
  175 
  176 #include <machine/md_var.h>
  177 #include <machine/cpu.h>
  178 #include <machine/cpufunc.h>
  179 #include <machine/pcb.h>
  180 
  181 #ifdef DEBUG
  182 extern int last_fault_code;
  183 #endif
  184 
  185 #ifdef PMAP_DEBUG
  186 #define PDEBUG(_lev_,_stat_) \
  187         if (pmap_debug_level >= (_lev_)) \
  188                 ((_stat_))
  189 #define dprintf printf
  190 
  191 int pmap_debug_level = 0;
  192 #define PMAP_INLINE
  193 #else   /* PMAP_DEBUG */
  194 #define PDEBUG(_lev_,_stat_) /* Nothing */
  195 #define dprintf(x, arg...)
  196 #define PMAP_INLINE __inline
  197 #endif  /* PMAP_DEBUG */
  198 
  199 #ifdef PV_STATS
  200 #define PV_STAT(x)      do { x ; } while (0)
  201 #else
  202 #define PV_STAT(x)      do { } while (0)
  203 #endif
  204 
  205 #define pa_to_pvh(pa)   (&pv_table[pa_index(pa)])
  206 
  207 #ifdef ARM_L2_PIPT
  208 #define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((pa), (size))
  209 #define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((pa), (size))
  210 #else
  211 #define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((va), (size))
  212 #define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((va), (size))
  213 #endif
  214 
  215 extern struct pv_addr systempage;
  216 
  217 /*
  218  * Internal function prototypes
  219  */
  220 
  221 static PMAP_INLINE
  222 struct pv_entry         *pmap_find_pv(struct md_page *, pmap_t, vm_offset_t);
  223 static void             pmap_free_pv_chunk(struct pv_chunk *pc);
  224 static void             pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv);
  225 static pv_entry_t       pmap_get_pv_entry(pmap_t pmap, boolean_t try);
  226 static vm_page_t        pmap_pv_reclaim(pmap_t locked_pmap);
  227 static boolean_t        pmap_pv_insert_section(pmap_t, vm_offset_t,
  228     vm_paddr_t);
  229 static struct pv_entry  *pmap_remove_pv(struct vm_page *, pmap_t, vm_offset_t);
  230 static int              pmap_pvh_wired_mappings(struct md_page *, int);
  231 
  232 static void             pmap_enter_locked(pmap_t, vm_offset_t, vm_prot_t,
  233     vm_page_t, vm_prot_t, boolean_t, int);
  234 static vm_paddr_t       pmap_extract_locked(pmap_t pmap, vm_offset_t va);
  235 static void             pmap_alloc_l1(pmap_t);
  236 static void             pmap_free_l1(pmap_t);
  237 
  238 static void             pmap_map_section(pmap_t, vm_offset_t, vm_offset_t,
  239     vm_prot_t, boolean_t);
  240 static void             pmap_promote_section(pmap_t, vm_offset_t);
  241 static boolean_t        pmap_demote_section(pmap_t, vm_offset_t);
  242 static boolean_t        pmap_enter_section(pmap_t, vm_offset_t, vm_page_t,
  243     vm_prot_t);
  244 static void             pmap_remove_section(pmap_t, vm_offset_t);
  245 
  246 static int              pmap_clearbit(struct vm_page *, u_int);
  247 
  248 static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t);
  249 static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t);
  250 static void             pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
  251 static vm_offset_t      kernel_pt_lookup(vm_paddr_t);
  252 
  253 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
  254 
  255 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  256 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  257 vm_offset_t pmap_curmaxkvaddr;
  258 vm_paddr_t kernel_l1pa;
  259 
  260 vm_offset_t kernel_vm_end = 0;
  261 
  262 vm_offset_t vm_max_kernel_address;
  263 
  264 struct pmap kernel_pmap_store;
  265 
  266 static pt_entry_t *csrc_pte, *cdst_pte;
  267 static vm_offset_t csrcp, cdstp;
  268 static struct mtx cmtx;
  269 
  270 static void             pmap_init_l1(struct l1_ttable *, pd_entry_t *);
  271 /*
  272  * These routines are called when the CPU type is identified to set up
  273  * the PTE prototypes, cache modes, etc.
  274  *
  275  * The variables are always here, just in case LKMs need to reference
  276  * them (though, they shouldn't).
  277  */
  278 static void pmap_set_prot(pt_entry_t *pte, vm_prot_t prot, uint8_t user);
  279 pt_entry_t      pte_l1_s_cache_mode;
  280 pt_entry_t      pte_l1_s_cache_mode_pt;
  281 
  282 pt_entry_t      pte_l2_l_cache_mode;
  283 pt_entry_t      pte_l2_l_cache_mode_pt;
  284 
  285 pt_entry_t      pte_l2_s_cache_mode;
  286 pt_entry_t      pte_l2_s_cache_mode_pt;
  287 
  288 struct msgbuf *msgbufp = 0;
  289 
  290 /*
  291  * Crashdump maps.
  292  */
  293 static caddr_t crashdumpmap;
  294 
  295 extern void bcopy_page(vm_offset_t, vm_offset_t);
  296 extern void bzero_page(vm_offset_t);
  297 
  298 char *_tmppt;
  299 
  300 /*
  301  * Metadata for L1 translation tables.
  302  */
  303 struct l1_ttable {
  304         /* Entry on the L1 Table list */
  305         SLIST_ENTRY(l1_ttable) l1_link;
  306 
  307         /* Entry on the L1 Least Recently Used list */
  308         TAILQ_ENTRY(l1_ttable) l1_lru;
  309 
  310         /* Track how many domains are allocated from this L1 */
  311         volatile u_int l1_domain_use_count;
  312 
  313         /*
  314          * A free-list of domain numbers for this L1.
  315          * We avoid using ffs() and a bitmap to track domains since ffs()
  316          * is slow on ARM.
  317          */
  318         u_int8_t l1_domain_first;
  319         u_int8_t l1_domain_free[PMAP_DOMAINS];
  320 
  321         /* Physical address of this L1 page table */
  322         vm_paddr_t l1_physaddr;
  323 
  324         /* KVA of this L1 page table */
  325         pd_entry_t *l1_kva;
  326 };
  327 
  328 /*
  329  * Convert a virtual address into its L1 table index. That is, the
  330  * index used to locate the L2 descriptor table pointer in an L1 table.
  331  * This is basically used to index l1->l1_kva[].
  332  *
  333  * Each L2 descriptor table represents 1MB of VA space.
  334  */
  335 #define L1_IDX(va)              (((vm_offset_t)(va)) >> L1_S_SHIFT)
  336 
  337 /*
  338  * L1 Page Tables are tracked using a Least Recently Used list.
  339  *  - New L1s are allocated from the HEAD.
  340  *  - Freed L1s are added to the TAIl.
  341  *  - Recently accessed L1s (where an 'access' is some change to one of
  342  *    the userland pmaps which owns this L1) are moved to the TAIL.
  343  */
  344 static TAILQ_HEAD(, l1_ttable) l1_lru_list;
  345 /*
  346  * A list of all L1 tables
  347  */
  348 static SLIST_HEAD(, l1_ttable) l1_list;
  349 static struct mtx l1_lru_lock;
  350 
  351 /*
  352  * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
  353  *
  354  * This is normally 16MB worth L2 page descriptors for any given pmap.
  355  * Reference counts are maintained for L2 descriptors so they can be
  356  * freed when empty.
  357  */
  358 struct l2_dtable {
  359         /* The number of L2 page descriptors allocated to this l2_dtable */
  360         u_int l2_occupancy;
  361 
  362         /* List of L2 page descriptors */
  363         struct l2_bucket {
  364                 pt_entry_t *l2b_kva;    /* KVA of L2 Descriptor Table */
  365                 vm_paddr_t l2b_phys;    /* Physical address of same */
  366                 u_short l2b_l1idx;      /* This L2 table's L1 index */
  367                 u_short l2b_occupancy;  /* How many active descriptors */
  368         } l2_bucket[L2_BUCKET_SIZE];
  369 };
  370 
  371 /* pmap_kenter_internal flags */
  372 #define KENTER_CACHE    0x1
  373 #define KENTER_USER     0x2
  374 
  375 /*
  376  * Given an L1 table index, calculate the corresponding l2_dtable index
  377  * and bucket index within the l2_dtable.
  378  */
  379 #define L2_IDX(l1idx)           (((l1idx) >> L2_BUCKET_LOG2) & \
  380                                  (L2_SIZE - 1))
  381 #define L2_BUCKET(l1idx)        ((l1idx) & (L2_BUCKET_SIZE - 1))
  382 
  383 /*
  384  * Given a virtual address, this macro returns the
  385  * virtual address required to drop into the next L2 bucket.
  386  */
  387 #define L2_NEXT_BUCKET(va)      (((va) & L1_S_FRAME) + L1_S_SIZE)
  388 
  389 /*
  390  * We try to map the page tables write-through, if possible.  However, not
  391  * all CPUs have a write-through cache mode, so on those we have to sync
  392  * the cache when we frob page tables.
  393  *
  394  * We try to evaluate this at compile time, if possible.  However, it's
  395  * not always possible to do that, hence this run-time var.
  396  */
  397 int     pmap_needs_pte_sync;
  398 
  399 /*
  400  * Macro to determine if a mapping might be resident in the
  401  * instruction cache and/or TLB
  402  */
  403 #define PTE_BEEN_EXECD(pte)  (L2_S_EXECUTABLE(pte) && L2_S_REFERENCED(pte))
  404 
  405 /*
  406  * Macro to determine if a mapping might be resident in the
  407  * data cache and/or TLB
  408  */
  409 #define PTE_BEEN_REFD(pte)   (L2_S_REFERENCED(pte))
  410 
  411 #ifndef PMAP_SHPGPERPROC
  412 #define PMAP_SHPGPERPROC 200
  413 #endif
  414 
  415 #define pmap_is_current(pm)     ((pm) == pmap_kernel() || \
  416             curproc->p_vmspace->vm_map.pmap == (pm))
  417 
  418 /*
  419  * Data for the pv entry allocation mechanism
  420  */
  421 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
  422 static int pv_entry_count, pv_entry_max, pv_entry_high_water;
  423 static struct md_page *pv_table;
  424 static int shpgperproc = PMAP_SHPGPERPROC;
  425 
  426 struct pv_chunk *pv_chunkbase;          /* KVA block for pv_chunks */
  427 int pv_maxchunks;                       /* How many chunks we have KVA for */
  428 vm_offset_t pv_vafree;                  /* Freelist stored in the PTE */
  429 
  430 static __inline struct pv_chunk *
  431 pv_to_chunk(pv_entry_t pv)
  432 {
  433 
  434         return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
  435 }
  436 
  437 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
  438 
  439 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
  440 CTASSERT(_NPCM == 8);
  441 CTASSERT(_NPCPV == 252);
  442 
  443 #define PC_FREE0_6      0xfffffffful    /* Free values for index 0 through 6 */
  444 #define PC_FREE7        0x0ffffffful    /* Free values for index 7 */
  445 
  446 static const uint32_t pc_freemask[_NPCM] = {
  447         PC_FREE0_6, PC_FREE0_6, PC_FREE0_6,
  448         PC_FREE0_6, PC_FREE0_6, PC_FREE0_6,
  449         PC_FREE0_6, PC_FREE7
  450 };
  451 
  452 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
  453 
  454 /* Superpages utilization enabled = 1 / disabled = 0 */
  455 static int sp_enabled = 0;
  456 SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN, &sp_enabled, 0,
  457     "Are large page mappings enabled?");
  458 
  459 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
  460     "Current number of pv entries");
  461 
  462 #ifdef PV_STATS
  463 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
  464 
  465 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
  466     "Current number of pv entry chunks");
  467 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
  468     "Current number of pv entry chunks allocated");
  469 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
  470     "Current number of pv entry chunks frees");
  471 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
  472     "Number of times tried to get a chunk page but failed.");
  473 
  474 static long pv_entry_frees, pv_entry_allocs;
  475 static int pv_entry_spare;
  476 
  477 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
  478     "Current number of pv entry frees");
  479 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
  480     "Current number of pv entry allocs");
  481 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
  482     "Current number of spare pv entries");
  483 #endif
  484 
  485 uma_zone_t l2zone;
  486 static uma_zone_t l2table_zone;
  487 static vm_offset_t pmap_kernel_l2dtable_kva;
  488 static vm_offset_t pmap_kernel_l2ptp_kva;
  489 static vm_paddr_t pmap_kernel_l2ptp_phys;
  490 static struct rwlock pvh_global_lock;
  491 
  492 int l1_mem_types[] = {
  493         ARM_L1S_STRONG_ORD,
  494         ARM_L1S_DEVICE_NOSHARE,
  495         ARM_L1S_DEVICE_SHARE,
  496         ARM_L1S_NRML_NOCACHE,
  497         ARM_L1S_NRML_IWT_OWT,
  498         ARM_L1S_NRML_IWB_OWB,
  499         ARM_L1S_NRML_IWBA_OWBA
  500 };
  501 
  502 int l2l_mem_types[] = {
  503         ARM_L2L_STRONG_ORD,
  504         ARM_L2L_DEVICE_NOSHARE,
  505         ARM_L2L_DEVICE_SHARE,
  506         ARM_L2L_NRML_NOCACHE,
  507         ARM_L2L_NRML_IWT_OWT,
  508         ARM_L2L_NRML_IWB_OWB,
  509         ARM_L2L_NRML_IWBA_OWBA
  510 };
  511 
  512 int l2s_mem_types[] = {
  513         ARM_L2S_STRONG_ORD,
  514         ARM_L2S_DEVICE_NOSHARE,
  515         ARM_L2S_DEVICE_SHARE,
  516         ARM_L2S_NRML_NOCACHE,
  517         ARM_L2S_NRML_IWT_OWT,
  518         ARM_L2S_NRML_IWB_OWB,
  519         ARM_L2S_NRML_IWBA_OWBA
  520 };
  521 
  522 /*
  523  * This list exists for the benefit of pmap_map_chunk().  It keeps track
  524  * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
  525  * find them as necessary.
  526  *
  527  * Note that the data on this list MUST remain valid after initarm() returns,
  528  * as pmap_bootstrap() uses it to contruct L2 table metadata.
  529  */
  530 SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
  531 
  532 static void
  533 pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
  534 {
  535         int i;
  536 
  537         l1->l1_kva = l1pt;
  538         l1->l1_domain_use_count = 0;
  539         l1->l1_domain_first = 0;
  540 
  541         for (i = 0; i < PMAP_DOMAINS; i++)
  542                 l1->l1_domain_free[i] = i + 1;
  543 
  544         /*
  545          * Copy the kernel's L1 entries to each new L1.
  546          */
  547         if (l1pt != pmap_kernel()->pm_l1->l1_kva)
  548                 memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
  549 
  550         if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0)
  551                 panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
  552         SLIST_INSERT_HEAD(&l1_list, l1, l1_link);
  553         TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
  554 }
  555 
  556 static vm_offset_t
  557 kernel_pt_lookup(vm_paddr_t pa)
  558 {
  559         struct pv_addr *pv;
  560 
  561         SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
  562                 if (pv->pv_pa == pa)
  563                         return (pv->pv_va);
  564         }
  565         return (0);
  566 }
  567 
  568 void
  569 pmap_pte_init_mmu_v6(void)
  570 {
  571 
  572         if (PTE_PAGETABLE >= 3)
  573                 pmap_needs_pte_sync = 1;
  574         pte_l1_s_cache_mode = l1_mem_types[PTE_CACHE];
  575         pte_l2_l_cache_mode = l2l_mem_types[PTE_CACHE];
  576         pte_l2_s_cache_mode = l2s_mem_types[PTE_CACHE];
  577 
  578         pte_l1_s_cache_mode_pt = l1_mem_types[PTE_PAGETABLE];
  579         pte_l2_l_cache_mode_pt = l2l_mem_types[PTE_PAGETABLE];
  580         pte_l2_s_cache_mode_pt = l2s_mem_types[PTE_PAGETABLE];
  581 
  582 }
  583 
  584 /*
  585  * Allocate an L1 translation table for the specified pmap.
  586  * This is called at pmap creation time.
  587  */
  588 static void
  589 pmap_alloc_l1(pmap_t pmap)
  590 {
  591         struct l1_ttable *l1;
  592         u_int8_t domain;
  593 
  594         /*
  595          * Remove the L1 at the head of the LRU list
  596          */
  597         mtx_lock(&l1_lru_lock);
  598         l1 = TAILQ_FIRST(&l1_lru_list);
  599         TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
  600 
  601         /*
  602          * Pick the first available domain number, and update
  603          * the link to the next number.
  604          */
  605         domain = l1->l1_domain_first;
  606         l1->l1_domain_first = l1->l1_domain_free[domain];
  607 
  608         /*
  609          * If there are still free domain numbers in this L1,
  610          * put it back on the TAIL of the LRU list.
  611          */
  612         if (++l1->l1_domain_use_count < PMAP_DOMAINS)
  613                 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
  614 
  615         mtx_unlock(&l1_lru_lock);
  616 
  617         /*
  618          * Fix up the relevant bits in the pmap structure
  619          */
  620         pmap->pm_l1 = l1;
  621         pmap->pm_domain = domain + 1;
  622 }
  623 
  624 /*
  625  * Free an L1 translation table.
  626  * This is called at pmap destruction time.
  627  */
  628 static void
  629 pmap_free_l1(pmap_t pmap)
  630 {
  631         struct l1_ttable *l1 = pmap->pm_l1;
  632 
  633         mtx_lock(&l1_lru_lock);
  634 
  635         /*
  636          * If this L1 is currently on the LRU list, remove it.
  637          */
  638         if (l1->l1_domain_use_count < PMAP_DOMAINS)
  639                 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
  640 
  641         /*
  642          * Free up the domain number which was allocated to the pmap
  643          */
  644         l1->l1_domain_free[pmap->pm_domain - 1] = l1->l1_domain_first;
  645         l1->l1_domain_first = pmap->pm_domain - 1;
  646         l1->l1_domain_use_count--;
  647 
  648         /*
  649          * The L1 now must have at least 1 free domain, so add
  650          * it back to the LRU list. If the use count is zero,
  651          * put it at the head of the list, otherwise it goes
  652          * to the tail.
  653          */
  654         if (l1->l1_domain_use_count == 0) {
  655                 TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru);
  656         }       else
  657                 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
  658 
  659         mtx_unlock(&l1_lru_lock);
  660 }
  661 
  662 /*
  663  * Returns a pointer to the L2 bucket associated with the specified pmap
  664  * and VA, or NULL if no L2 bucket exists for the address.
  665  */
  666 static PMAP_INLINE struct l2_bucket *
  667 pmap_get_l2_bucket(pmap_t pmap, vm_offset_t va)
  668 {
  669         struct l2_dtable *l2;
  670         struct l2_bucket *l2b;
  671         u_short l1idx;
  672 
  673         l1idx = L1_IDX(va);
  674 
  675         if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL ||
  676             (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
  677                 return (NULL);
  678 
  679         return (l2b);
  680 }
  681 
  682 /*
  683  * Returns a pointer to the L2 bucket associated with the specified pmap
  684  * and VA.
  685  *
  686  * If no L2 bucket exists, perform the necessary allocations to put an L2
  687  * bucket/page table in place.
  688  *
  689  * Note that if a new L2 bucket/page was allocated, the caller *must*
  690  * increment the bucket occupancy counter appropriately *before*
  691  * releasing the pmap's lock to ensure no other thread or cpu deallocates
  692  * the bucket/page in the meantime.
  693  */
  694 static struct l2_bucket *
  695 pmap_alloc_l2_bucket(pmap_t pmap, vm_offset_t va)
  696 {
  697         struct l2_dtable *l2;
  698         struct l2_bucket *l2b;
  699         u_short l1idx;
  700 
  701         l1idx = L1_IDX(va);
  702 
  703         PMAP_ASSERT_LOCKED(pmap);
  704         rw_assert(&pvh_global_lock, RA_WLOCKED);
  705         if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
  706                 /*
  707                  * No mapping at this address, as there is
  708                  * no entry in the L1 table.
  709                  * Need to allocate a new l2_dtable.
  710                  */
  711                 PMAP_UNLOCK(pmap);
  712                 rw_wunlock(&pvh_global_lock);
  713                 if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) {
  714                         rw_wlock(&pvh_global_lock);
  715                         PMAP_LOCK(pmap);
  716                         return (NULL);
  717                 }
  718                 rw_wlock(&pvh_global_lock);
  719                 PMAP_LOCK(pmap);
  720                 if (pmap->pm_l2[L2_IDX(l1idx)] != NULL) {
  721                         /*
  722                          * Someone already allocated the l2_dtable while
  723                          * we were doing the same.
  724                          */
  725                         uma_zfree(l2table_zone, l2);
  726                         l2 = pmap->pm_l2[L2_IDX(l1idx)];
  727                 } else {
  728                         bzero(l2, sizeof(*l2));
  729                         /*
  730                          * Link it into the parent pmap
  731                          */
  732                         pmap->pm_l2[L2_IDX(l1idx)] = l2;
  733                 }
  734         }
  735 
  736         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
  737 
  738         /*
  739          * Fetch pointer to the L2 page table associated with the address.
  740          */
  741         if (l2b->l2b_kva == NULL) {
  742                 pt_entry_t *ptep;
  743 
  744                 /*
  745                  * No L2 page table has been allocated. Chances are, this
  746                  * is because we just allocated the l2_dtable, above.
  747                  */
  748                 PMAP_UNLOCK(pmap);
  749                 rw_wunlock(&pvh_global_lock);
  750                 ptep = uma_zalloc(l2zone, M_NOWAIT);
  751                 rw_wlock(&pvh_global_lock);
  752                 PMAP_LOCK(pmap);
  753                 if (l2b->l2b_kva != 0) {
  754                         /* We lost the race. */
  755                         uma_zfree(l2zone, ptep);
  756                         return (l2b);
  757                 }
  758                 l2b->l2b_phys = vtophys(ptep);
  759                 if (ptep == NULL) {
  760                         /*
  761                          * Oops, no more L2 page tables available at this
  762                          * time. We may need to deallocate the l2_dtable
  763                          * if we allocated a new one above.
  764                          */
  765                         if (l2->l2_occupancy == 0) {
  766                                 pmap->pm_l2[L2_IDX(l1idx)] = NULL;
  767                                 uma_zfree(l2table_zone, l2);
  768                         }
  769                         return (NULL);
  770                 }
  771 
  772                 l2->l2_occupancy++;
  773                 l2b->l2b_kva = ptep;
  774                 l2b->l2b_l1idx = l1idx;
  775         }
  776 
  777         return (l2b);
  778 }
  779 
  780 static PMAP_INLINE void
  781 pmap_free_l2_ptp(pt_entry_t *l2)
  782 {
  783         uma_zfree(l2zone, l2);
  784 }
  785 /*
  786  * One or more mappings in the specified L2 descriptor table have just been
  787  * invalidated.
  788  *
  789  * Garbage collect the metadata and descriptor table itself if necessary.
  790  *
  791  * The pmap lock must be acquired when this is called (not necessary
  792  * for the kernel pmap).
  793  */
  794 static void
  795 pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket *l2b, u_int count)
  796 {
  797         struct l2_dtable *l2;
  798         pd_entry_t *pl1pd, l1pd;
  799         pt_entry_t *ptep;
  800         u_short l1idx;
  801 
  802 
  803         /*
  804          * Update the bucket's reference count according to how many
  805          * PTEs the caller has just invalidated.
  806          */
  807         l2b->l2b_occupancy -= count;
  808 
  809         /*
  810          * Note:
  811          *
  812          * Level 2 page tables allocated to the kernel pmap are never freed
  813          * as that would require checking all Level 1 page tables and
  814          * removing any references to the Level 2 page table. See also the
  815          * comment elsewhere about never freeing bootstrap L2 descriptors.
  816          *
  817          * We make do with just invalidating the mapping in the L2 table.
  818          *
  819          * This isn't really a big deal in practice and, in fact, leads
  820          * to a performance win over time as we don't need to continually
  821          * alloc/free.
  822          */
  823         if (l2b->l2b_occupancy > 0 || pmap == pmap_kernel())
  824                 return;
  825 
  826         /*
  827          * There are no more valid mappings in this level 2 page table.
  828          * Go ahead and NULL-out the pointer in the bucket, then
  829          * free the page table.
  830          */
  831         l1idx = l2b->l2b_l1idx;
  832         ptep = l2b->l2b_kva;
  833         l2b->l2b_kva = NULL;
  834 
  835         pl1pd = &pmap->pm_l1->l1_kva[l1idx];
  836 
  837         /*
  838          * If the L1 slot matches the pmap's domain
  839          * number, then invalidate it.
  840          */
  841         l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK);
  842         if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) {
  843                 *pl1pd = 0;
  844                 PTE_SYNC(pl1pd);
  845         }
  846 
  847         /*
  848          * Release the L2 descriptor table back to the pool cache.
  849          */
  850         pmap_free_l2_ptp(ptep);
  851 
  852         /*
  853          * Update the reference count in the associated l2_dtable
  854          */
  855         l2 = pmap->pm_l2[L2_IDX(l1idx)];
  856         if (--l2->l2_occupancy > 0)
  857                 return;
  858 
  859         /*
  860          * There are no more valid mappings in any of the Level 1
  861          * slots managed by this l2_dtable. Go ahead and NULL-out
  862          * the pointer in the parent pmap and free the l2_dtable.
  863          */
  864         pmap->pm_l2[L2_IDX(l1idx)] = NULL;
  865         uma_zfree(l2table_zone, l2);
  866 }
  867 
  868 /*
  869  * Pool cache constructors for L2 descriptor tables, metadata and pmap
  870  * structures.
  871  */
  872 static int
  873 pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags)
  874 {
  875         struct l2_bucket *l2b;
  876         pt_entry_t *ptep, pte;
  877         vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK;
  878 
  879         /*
  880          * The mappings for these page tables were initially made using
  881          * pmap_kenter() by the pool subsystem. Therefore, the cache-
  882          * mode will not be right for page table mappings. To avoid
  883          * polluting the pmap_kenter() code with a special case for
  884          * page tables, we simply fix up the cache-mode here if it's not
  885          * correct.
  886          */
  887         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
  888         ptep = &l2b->l2b_kva[l2pte_index(va)];
  889         pte = *ptep;
  890 
  891         cpu_idcache_wbinv_range(va, PAGE_SIZE);
  892         pmap_l2cache_wbinv_range(va, pte & L2_S_FRAME, PAGE_SIZE);
  893         if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
  894                 /*
  895                  * Page tables must have the cache-mode set to
  896                  * Write-Thru.
  897                  */
  898                 *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
  899                 PTE_SYNC(ptep);
  900                 cpu_tlb_flushD_SE(va);
  901                 cpu_cpwait();
  902         }
  903 
  904         memset(mem, 0, L2_TABLE_SIZE_REAL);
  905         return (0);
  906 }
  907 
  908 /*
  909  * Modify pte bits for all ptes corresponding to the given physical address.
  910  * We use `maskbits' rather than `clearbits' because we're always passing
  911  * constants and the latter would require an extra inversion at run-time.
  912  */
  913 static int
  914 pmap_clearbit(struct vm_page *m, u_int maskbits)
  915 {
  916         struct l2_bucket *l2b;
  917         struct pv_entry *pv, *pve, *next_pv;
  918         struct md_page *pvh;
  919         pd_entry_t *pl1pd;
  920         pt_entry_t *ptep, npte, opte;
  921         pmap_t pmap;
  922         vm_offset_t va;
  923         u_int oflags;
  924         int count = 0;
  925 
  926         rw_wlock(&pvh_global_lock);
  927         if ((m->flags & PG_FICTITIOUS) != 0)
  928                 goto small_mappings;
  929 
  930         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
  931         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) {
  932                 va = pv->pv_va;
  933                 pmap = PV_PMAP(pv);
  934                 PMAP_LOCK(pmap);
  935                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
  936                 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO,
  937                     ("pmap_clearbit: valid section mapping expected"));
  938                 if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_WRITE))
  939                         (void)pmap_demote_section(pmap, va);
  940                 else if ((maskbits & PVF_REF) && L1_S_REFERENCED(*pl1pd)) {
  941                         if (pmap_demote_section(pmap, va)) {
  942                                 if ((pv->pv_flags & PVF_WIRED) == 0) {
  943                                         /*
  944                                          * Remove the mapping to a single page
  945                                          * so that a subsequent access may
  946                                          * repromote. Since the underlying
  947                                          * l2_bucket is fully populated, this
  948                                          * removal never frees an entire
  949                                          * l2_bucket.
  950                                          */
  951                                         va += (VM_PAGE_TO_PHYS(m) &
  952                                             L1_S_OFFSET);
  953                                         l2b = pmap_get_l2_bucket(pmap, va);
  954                                         KASSERT(l2b != NULL,
  955                                             ("pmap_clearbit: no l2 bucket for "
  956                                              "va 0x%#x, pmap 0x%p", va, pmap));
  957                                         ptep = &l2b->l2b_kva[l2pte_index(va)];
  958                                         *ptep = 0;
  959                                         PTE_SYNC(ptep);
  960                                         pmap_free_l2_bucket(pmap, l2b, 1);
  961                                         pve = pmap_remove_pv(m, pmap, va);
  962                                         KASSERT(pve != NULL, ("pmap_clearbit: "
  963                                             "no PV entry for managed mapping"));
  964                                         pmap_free_pv_entry(pmap, pve);
  965 
  966                                 }
  967                         }
  968                 } else if ((maskbits & PVF_MOD) && L1_S_WRITABLE(*pl1pd)) {
  969                         if (pmap_demote_section(pmap, va)) {
  970                                 if ((pv->pv_flags & PVF_WIRED) == 0) {
  971                                         /*
  972                                          * Write protect the mapping to a
  973                                          * single page so that a subsequent
  974                                          * write access may repromote.
  975                                          */
  976                                         va += (VM_PAGE_TO_PHYS(m) &
  977                                             L1_S_OFFSET);
  978                                         l2b = pmap_get_l2_bucket(pmap, va);
  979                                         KASSERT(l2b != NULL,
  980                                             ("pmap_clearbit: no l2 bucket for "
  981                                              "va 0x%#x, pmap 0x%p", va, pmap));
  982                                         ptep = &l2b->l2b_kva[l2pte_index(va)];
  983                                         if ((*ptep & L2_S_PROTO) != 0) {
  984                                                 pve = pmap_find_pv(&m->md,
  985                                                     pmap, va);
  986                                                 KASSERT(pve != NULL,
  987                                                     ("pmap_clearbit: no PV "
  988                                                     "entry for managed mapping"));
  989                                                 pve->pv_flags &= ~PVF_WRITE;
  990                                                 *ptep |= L2_APX;
  991                                                 PTE_SYNC(ptep);
  992                                         }
  993                                 }
  994                         }
  995                 }
  996                 PMAP_UNLOCK(pmap);
  997         }
  998 
  999 small_mappings:
 1000         if (TAILQ_EMPTY(&m->md.pv_list)) {
 1001                 rw_wunlock(&pvh_global_lock);
 1002                 return (0);
 1003         }
 1004 
 1005         /*
 1006          * Loop over all current mappings setting/clearing as appropos
 1007          */
 1008         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 1009                 va = pv->pv_va;
 1010                 pmap = PV_PMAP(pv);
 1011                 oflags = pv->pv_flags;
 1012                 pv->pv_flags &= ~maskbits;
 1013 
 1014                 PMAP_LOCK(pmap);
 1015 
 1016                 l2b = pmap_get_l2_bucket(pmap, va);
 1017                 KASSERT(l2b != NULL, ("pmap_clearbit: no l2 bucket for "
 1018                     "va 0x%#x, pmap 0x%p", va, pmap));
 1019 
 1020                 ptep = &l2b->l2b_kva[l2pte_index(va)];
 1021                 npte = opte = *ptep;
 1022 
 1023                 if (maskbits & (PVF_WRITE | PVF_MOD)) {
 1024                         /* make the pte read only */
 1025                         npte |= L2_APX;
 1026                 }
 1027 
 1028                 if (maskbits & PVF_REF) {
 1029                         /*
 1030                          * Clear referenced flag in PTE so that we
 1031                          * will take a flag fault the next time the mapping
 1032                          * is referenced.
 1033                          */
 1034                         npte &= ~L2_S_REF;
 1035                 }
 1036 
 1037                 CTR4(KTR_PMAP,"clearbit: pmap:%p bits:%x pte:%x->%x",
 1038                     pmap, maskbits, opte, npte);
 1039                 if (npte != opte) {
 1040                         count++;
 1041                         *ptep = npte;
 1042                         PTE_SYNC(ptep);
 1043                         /* Flush the TLB entry if a current pmap. */
 1044                         if (PTE_BEEN_EXECD(opte))
 1045                                 cpu_tlb_flushID_SE(pv->pv_va);
 1046                         else if (PTE_BEEN_REFD(opte))
 1047                                 cpu_tlb_flushD_SE(pv->pv_va);
 1048                 }
 1049 
 1050                 PMAP_UNLOCK(pmap);
 1051 
 1052         }
 1053 
 1054         if (maskbits & PVF_WRITE)
 1055                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 1056         rw_wunlock(&pvh_global_lock);
 1057         return (count);
 1058 }
 1059 
 1060 /*
 1061  * main pv_entry manipulation functions:
 1062  *   pmap_enter_pv: enter a mapping onto a vm_page list
 1063  *   pmap_remove_pv: remove a mappiing from a vm_page list
 1064  *
 1065  * NOTE: pmap_enter_pv expects to lock the pvh itself
 1066  *       pmap_remove_pv expects the caller to lock the pvh before calling
 1067  */
 1068 
 1069 /*
 1070  * pmap_enter_pv: enter a mapping onto a vm_page's PV list
 1071  *
 1072  * => caller should hold the proper lock on pvh_global_lock
 1073  * => caller should have pmap locked
 1074  * => we will (someday) gain the lock on the vm_page's PV list
 1075  * => caller should adjust ptp's wire_count before calling
 1076  * => caller should not adjust pmap's wire_count
 1077  */
 1078 static void
 1079 pmap_enter_pv(struct vm_page *m, struct pv_entry *pve, pmap_t pmap,
 1080     vm_offset_t va, u_int flags)
 1081 {
 1082 
 1083         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1084 
 1085         PMAP_ASSERT_LOCKED(pmap);
 1086         pve->pv_va = va;
 1087         pve->pv_flags = flags;
 1088 
 1089         TAILQ_INSERT_HEAD(&m->md.pv_list, pve, pv_list);
 1090         if (pve->pv_flags & PVF_WIRED)
 1091                 ++pmap->pm_stats.wired_count;
 1092 }
 1093 
 1094 /*
 1095  *
 1096  * pmap_find_pv: Find a pv entry
 1097  *
 1098  * => caller should hold lock on vm_page
 1099  */
 1100 static PMAP_INLINE struct pv_entry *
 1101 pmap_find_pv(struct md_page *md, pmap_t pmap, vm_offset_t va)
 1102 {
 1103         struct pv_entry *pv;
 1104 
 1105         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1106         TAILQ_FOREACH(pv, &md->pv_list, pv_list)
 1107                 if (pmap == PV_PMAP(pv) && va == pv->pv_va)
 1108                         break;
 1109 
 1110         return (pv);
 1111 }
 1112 
 1113 /*
 1114  * vector_page_setprot:
 1115  *
 1116  *      Manipulate the protection of the vector page.
 1117  */
 1118 void
 1119 vector_page_setprot(int prot)
 1120 {
 1121         struct l2_bucket *l2b;
 1122         pt_entry_t *ptep;
 1123 
 1124         l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
 1125 
 1126         ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
 1127         /*
 1128          * Set referenced flag.
 1129          * Vectors' page is always desired
 1130          * to be allowed to reside in TLB. 
 1131          */
 1132         *ptep |= L2_S_REF;
 1133 
 1134         pmap_set_prot(ptep, prot|VM_PROT_EXECUTE, 0);
 1135 
 1136         cpu_tlb_flushD_SE(vector_page);
 1137         cpu_cpwait();
 1138 }
 1139 
 1140 static void
 1141 pmap_set_prot(pt_entry_t *ptep, vm_prot_t prot, uint8_t user)
 1142 {
 1143 
 1144         *ptep &= ~(L2_S_PROT_MASK | L2_XN);
 1145 
 1146         if (!(prot & VM_PROT_EXECUTE))
 1147                 *ptep |= L2_XN;
 1148 
 1149         /* Set defaults first - kernel read access */
 1150         *ptep |= L2_APX;
 1151         *ptep |= L2_S_PROT_R;
 1152         /* Now tune APs as desired */
 1153         if (user)
 1154                 *ptep |= L2_S_PROT_U;
 1155 
 1156         if (prot & VM_PROT_WRITE)
 1157                 *ptep &= ~(L2_APX);
 1158 }
 1159 
 1160 /*
 1161  * pmap_remove_pv: try to remove a mapping from a pv_list
 1162  *
 1163  * => caller should hold proper lock on pmap_main_lock
 1164  * => pmap should be locked
 1165  * => caller should hold lock on vm_page [so that attrs can be adjusted]
 1166  * => caller should adjust ptp's wire_count and free PTP if needed
 1167  * => caller should NOT adjust pmap's wire_count
 1168  * => we return the removed pve
 1169  */
 1170 static struct pv_entry *
 1171 pmap_remove_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va)
 1172 {
 1173         struct pv_entry *pve;
 1174 
 1175         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1176         PMAP_ASSERT_LOCKED(pmap);
 1177 
 1178         pve = pmap_find_pv(&m->md, pmap, va);   /* find corresponding pve */
 1179         if (pve != NULL) {
 1180                 TAILQ_REMOVE(&m->md.pv_list, pve, pv_list);
 1181                 if (pve->pv_flags & PVF_WIRED)
 1182                         --pmap->pm_stats.wired_count;
 1183         }
 1184         if (TAILQ_EMPTY(&m->md.pv_list))
 1185                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 1186 
 1187         return(pve);                            /* return removed pve */
 1188 }
 1189 
 1190 /*
 1191  *
 1192  * pmap_modify_pv: Update pv flags
 1193  *
 1194  * => caller should hold lock on vm_page [so that attrs can be adjusted]
 1195  * => caller should NOT adjust pmap's wire_count
 1196  * => we return the old flags
 1197  *
 1198  * Modify a physical-virtual mapping in the pv table
 1199  */
 1200 static u_int
 1201 pmap_modify_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va,
 1202     u_int clr_mask, u_int set_mask)
 1203 {
 1204         struct pv_entry *npv;
 1205         u_int flags, oflags;
 1206 
 1207         PMAP_ASSERT_LOCKED(pmap);
 1208         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1209         if ((npv = pmap_find_pv(&m->md, pmap, va)) == NULL)
 1210                 return (0);
 1211 
 1212         /*
 1213          * There is at least one VA mapping this page.
 1214          */
 1215         oflags = npv->pv_flags;
 1216         npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
 1217 
 1218         if ((flags ^ oflags) & PVF_WIRED) {
 1219                 if (flags & PVF_WIRED)
 1220                         ++pmap->pm_stats.wired_count;
 1221                 else
 1222                         --pmap->pm_stats.wired_count;
 1223         }
 1224 
 1225         return (oflags);
 1226 }
 1227 
 1228 /* Function to set the debug level of the pmap code */
 1229 #ifdef PMAP_DEBUG
 1230 void
 1231 pmap_debug(int level)
 1232 {
 1233         pmap_debug_level = level;
 1234         dprintf("pmap_debug: level=%d\n", pmap_debug_level);
 1235 }
 1236 #endif  /* PMAP_DEBUG */
 1237 
 1238 void
 1239 pmap_pinit0(struct pmap *pmap)
 1240 {
 1241         PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap));
 1242 
 1243         bcopy(kernel_pmap, pmap, sizeof(*pmap));
 1244         bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx));
 1245         PMAP_LOCK_INIT(pmap);
 1246         TAILQ_INIT(&pmap->pm_pvchunk);
 1247 }
 1248 
 1249 /*
 1250  *      Initialize a vm_page's machine-dependent fields.
 1251  */
 1252 void
 1253 pmap_page_init(vm_page_t m)
 1254 {
 1255 
 1256         TAILQ_INIT(&m->md.pv_list);
 1257         m->md.pv_memattr = VM_MEMATTR_DEFAULT;
 1258 }
 1259 
 1260 static vm_offset_t
 1261 pmap_ptelist_alloc(vm_offset_t *head)
 1262 {
 1263         pt_entry_t *pte;
 1264         vm_offset_t va;
 1265 
 1266         va = *head;
 1267         if (va == 0)
 1268                 return (va);    /* Out of memory */
 1269         pte = vtopte(va);
 1270         *head = *pte;
 1271         if ((*head & L2_TYPE_MASK) != L2_TYPE_INV)
 1272                 panic("%s: va is not L2_TYPE_INV!", __func__);
 1273         *pte = 0;
 1274         return (va);
 1275 }
 1276 
 1277 static void
 1278 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
 1279 {
 1280         pt_entry_t *pte;
 1281 
 1282         if ((va & L2_TYPE_MASK) != L2_TYPE_INV)
 1283                 panic("%s: freeing va that is not L2_TYPE INV!", __func__);
 1284         pte = vtopte(va);
 1285         *pte = *head;           /* virtual! L2_TYPE is L2_TYPE_INV though */
 1286         *head = va;
 1287 }
 1288 
 1289 static void
 1290 pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
 1291 {
 1292         int i;
 1293         vm_offset_t va;
 1294 
 1295         *head = 0;
 1296         for (i = npages - 1; i >= 0; i--) {
 1297                 va = (vm_offset_t)base + i * PAGE_SIZE;
 1298                 pmap_ptelist_free(head, va);
 1299         }
 1300 }
 1301 
 1302 /*
 1303  *      Initialize the pmap module.
 1304  *      Called by vm_init, to initialize any structures that the pmap
 1305  *      system needs to map virtual memory.
 1306  */
 1307 void
 1308 pmap_init(void)
 1309 {
 1310         vm_size_t s;
 1311         int i, pv_npg;
 1312 
 1313         PDEBUG(1, printf("pmap_init: phys_start = %08x\n", PHYSADDR));
 1314 
 1315         l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
 1316             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 1317         l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL,
 1318             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 1319 
 1320         /*
 1321          * Are large page mappings supported and enabled?
 1322          */
 1323         TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled);
 1324         if (sp_enabled) {
 1325                 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 1326                     ("pmap_init: can't assign to pagesizes[1]"));
 1327                 pagesizes[1] = NBPDR;
 1328         }
 1329 
 1330         /*
 1331          * Calculate the size of the pv head table for superpages.
 1332          */
 1333         for (i = 0; phys_avail[i + 1]; i += 2);
 1334         pv_npg = round_1mpage(phys_avail[(i - 2) + 1]) / NBPDR;
 1335 
 1336         /*
 1337          * Allocate memory for the pv head table for superpages.
 1338          */
 1339         s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 1340         s = round_page(s);
 1341         pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
 1342             M_WAITOK | M_ZERO);
 1343         for (i = 0; i < pv_npg; i++)
 1344                 TAILQ_INIT(&pv_table[i].pv_list);
 1345 
 1346         /*
 1347          * Initialize the address space for the pv chunks.
 1348          */
 1349 
 1350         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 1351         pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
 1352         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 1353         pv_entry_max = roundup(pv_entry_max, _NPCPV);
 1354         pv_entry_high_water = 9 * (pv_entry_max / 10);
 1355 
 1356         pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 1357         pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
 1358 
 1359         if (pv_chunkbase == NULL)
 1360                 panic("pmap_init: not enough kvm for pv chunks");
 1361 
 1362         pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
 1363 
 1364         /*
 1365          * Now it is safe to enable pv_table recording.
 1366          */
 1367         PDEBUG(1, printf("pmap_init: done!\n"));
 1368 }
 1369 
 1370 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
 1371         "Max number of PV entries");
 1372 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
 1373         "Page share factor per proc");
 1374 
 1375 static SYSCTL_NODE(_vm_pmap, OID_AUTO, section, CTLFLAG_RD, 0,
 1376     "1MB page mapping counters");
 1377 
 1378 static u_long pmap_section_demotions;
 1379 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, demotions, CTLFLAG_RD,
 1380     &pmap_section_demotions, 0, "1MB page demotions");
 1381 
 1382 static u_long pmap_section_mappings;
 1383 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, mappings, CTLFLAG_RD,
 1384     &pmap_section_mappings, 0, "1MB page mappings");
 1385 
 1386 static u_long pmap_section_p_failures;
 1387 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, p_failures, CTLFLAG_RD,
 1388     &pmap_section_p_failures, 0, "1MB page promotion failures");
 1389 
 1390 static u_long pmap_section_promotions;
 1391 SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, promotions, CTLFLAG_RD,
 1392     &pmap_section_promotions, 0, "1MB page promotions");
 1393 
 1394 int
 1395 pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype, int user)
 1396 {
 1397         struct l2_dtable *l2;
 1398         struct l2_bucket *l2b;
 1399         pd_entry_t *pl1pd, l1pd;
 1400         pt_entry_t *ptep, pte;
 1401         vm_paddr_t pa;
 1402         u_int l1idx;
 1403         int rv = 0;
 1404 
 1405         l1idx = L1_IDX(va);
 1406         rw_wlock(&pvh_global_lock);
 1407         PMAP_LOCK(pmap);
 1408         /*
 1409          * Check and possibly fix-up L1 section mapping
 1410          * only when superpage mappings are enabled to speed up.
 1411          */
 1412         if (sp_enabled) {
 1413                 pl1pd = &pmap->pm_l1->l1_kva[l1idx];
 1414                 l1pd = *pl1pd;
 1415                 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 1416                         /* Catch an access to the vectors section */
 1417                         if (l1idx == L1_IDX(vector_page))
 1418                                 goto out;
 1419                         /*
 1420                          * Stay away from the kernel mappings.
 1421                          * None of them should fault from L1 entry.
 1422                          */
 1423                         if (pmap == pmap_kernel())
 1424                                 goto out;
 1425                         /*
 1426                          * Catch a forbidden userland access
 1427                          */
 1428                         if (user && !(l1pd & L1_S_PROT_U))
 1429                                 goto out;
 1430                         /*
 1431                          * Superpage is always either mapped read only
 1432                          * or it is modified and permitted to be written
 1433                          * by default. Therefore, process only reference
 1434                          * flag fault and demote page in case of write fault.
 1435                          */
 1436                         if ((ftype & VM_PROT_WRITE) && !L1_S_WRITABLE(l1pd) &&
 1437                             L1_S_REFERENCED(l1pd)) {
 1438                                 (void)pmap_demote_section(pmap, va);
 1439                                 goto out;
 1440                         } else if (!L1_S_REFERENCED(l1pd)) {
 1441                                 /* Mark the page "referenced" */
 1442                                 *pl1pd = l1pd | L1_S_REF;
 1443                                 PTE_SYNC(pl1pd);
 1444                                 goto l1_section_out;
 1445                         } else
 1446                                 goto out;
 1447                 }
 1448         }
 1449         /*
 1450          * If there is no l2_dtable for this address, then the process
 1451          * has no business accessing it.
 1452          *
 1453          * Note: This will catch userland processes trying to access
 1454          * kernel addresses.
 1455          */
 1456         l2 = pmap->pm_l2[L2_IDX(l1idx)];
 1457         if (l2 == NULL)
 1458                 goto out;
 1459 
 1460         /*
 1461          * Likewise if there is no L2 descriptor table
 1462          */
 1463         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 1464         if (l2b->l2b_kva == NULL)
 1465                 goto out;
 1466 
 1467         /*
 1468          * Check the PTE itself.
 1469          */
 1470         ptep = &l2b->l2b_kva[l2pte_index(va)];
 1471         pte = *ptep;
 1472         if (pte == 0)
 1473                 goto out;
 1474 
 1475         /*
 1476          * Catch a userland access to the vector page mapped at 0x0
 1477          */
 1478         if (user && !(pte & L2_S_PROT_U))
 1479                 goto out;
 1480         if (va == vector_page)
 1481                 goto out;
 1482 
 1483         pa = l2pte_pa(pte);
 1484         CTR5(KTR_PMAP, "pmap_fault_fix: pmap:%p va:%x pte:0x%x ftype:%x user:%x",
 1485             pmap, va, pte, ftype, user);
 1486         if ((ftype & VM_PROT_WRITE) && !(L2_S_WRITABLE(pte)) &&
 1487             L2_S_REFERENCED(pte)) {
 1488                 /*
 1489                  * This looks like a good candidate for "page modified"
 1490                  * emulation...
 1491                  */
 1492                 struct pv_entry *pv;
 1493                 struct vm_page *m;
 1494 
 1495                 /* Extract the physical address of the page */
 1496                 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) {
 1497                         goto out;
 1498                 }
 1499                 /* Get the current flags for this page. */
 1500 
 1501                 pv = pmap_find_pv(&m->md, pmap, va);
 1502                 if (pv == NULL) {
 1503                         goto out;
 1504                 }
 1505 
 1506                 /*
 1507                  * Do the flags say this page is writable? If not then it
 1508                  * is a genuine write fault. If yes then the write fault is
 1509                  * our fault as we did not reflect the write access in the
 1510                  * PTE. Now we know a write has occurred we can correct this
 1511                  * and also set the modified bit
 1512                  */
 1513                 if ((pv->pv_flags & PVF_WRITE) == 0) {
 1514                         goto out;
 1515                 }
 1516 
 1517                 vm_page_dirty(m);
 1518 
 1519                 /* Re-enable write permissions for the page */
 1520                 pmap_set_prot(ptep, VM_PROT_WRITE, *ptep & L2_S_PROT_U);
 1521                 CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", pte);
 1522                 PTE_SYNC(ptep);
 1523                 rv = 1;
 1524         } else if (!L2_S_REFERENCED(pte)) {
 1525                 /*
 1526                  * This looks like a good candidate for "page referenced"
 1527                  * emulation.
 1528                  */
 1529                 struct pv_entry *pv;
 1530                 struct vm_page *m;
 1531 
 1532                 /* Extract the physical address of the page */
 1533                 if ((m = PHYS_TO_VM_PAGE(pa)) == NULL)
 1534                         goto out;
 1535                 /* Get the current flags for this page. */
 1536                 pv = pmap_find_pv(&m->md, pmap, va);
 1537                 if (pv == NULL)
 1538                         goto out;
 1539 
 1540                 vm_page_aflag_set(m, PGA_REFERENCED);
 1541 
 1542                 /* Mark the page "referenced" */
 1543                 *ptep = pte | L2_S_REF;
 1544                 PTE_SYNC(ptep);
 1545                 rv = 1;
 1546         }
 1547 
 1548         /*
 1549          * We know there is a valid mapping here, so simply
 1550          * fix up the L1 if necessary.
 1551          */
 1552         pl1pd = &pmap->pm_l1->l1_kva[l1idx];
 1553         l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO;
 1554         if (*pl1pd != l1pd) {
 1555                 *pl1pd = l1pd;
 1556                 PTE_SYNC(pl1pd);
 1557                 rv = 1;
 1558         }
 1559 
 1560 #ifdef DEBUG
 1561         /*
 1562          * If 'rv == 0' at this point, it generally indicates that there is a
 1563          * stale TLB entry for the faulting address. This happens when two or
 1564          * more processes are sharing an L1. Since we don't flush the TLB on
 1565          * a context switch between such processes, we can take domain faults
 1566          * for mappings which exist at the same VA in both processes. EVEN IF
 1567          * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for
 1568          * example.
 1569          *
 1570          * This is extremely likely to happen if pmap_enter() updated the L1
 1571          * entry for a recently entered mapping. In this case, the TLB is
 1572          * flushed for the new mapping, but there may still be TLB entries for
 1573          * other mappings belonging to other processes in the 1MB range
 1574          * covered by the L1 entry.
 1575          *
 1576          * Since 'rv == 0', we know that the L1 already contains the correct
 1577          * value, so the fault must be due to a stale TLB entry.
 1578          *
 1579          * Since we always need to flush the TLB anyway in the case where we
 1580          * fixed up the L1, or frobbed the L2 PTE, we effectively deal with
 1581          * stale TLB entries dynamically.
 1582          *
 1583          * However, the above condition can ONLY happen if the current L1 is
 1584          * being shared. If it happens when the L1 is unshared, it indicates
 1585          * that other parts of the pmap are not doing their job WRT managing
 1586          * the TLB.
 1587          */
 1588         if (rv == 0 && pmap->pm_l1->l1_domain_use_count == 1) {
 1589                 printf("fixup: pmap %p, va 0x%08x, ftype %d - nothing to do!\n",
 1590                     pmap, va, ftype);
 1591                 printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n",
 1592                     l2, l2b, ptep, pl1pd);
 1593                 printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n",
 1594                     pte, l1pd, last_fault_code);
 1595 #ifdef DDB
 1596                 Debugger();
 1597 #endif
 1598         }
 1599 #endif
 1600 
 1601 l1_section_out:
 1602         cpu_tlb_flushID_SE(va);
 1603         cpu_cpwait();
 1604 
 1605         rv = 1;
 1606 
 1607 out:
 1608         rw_wunlock(&pvh_global_lock);
 1609         PMAP_UNLOCK(pmap);
 1610         return (rv);
 1611 }
 1612 
 1613 void
 1614 pmap_postinit(void)
 1615 {
 1616         struct l2_bucket *l2b;
 1617         struct l1_ttable *l1;
 1618         pd_entry_t *pl1pt;
 1619         pt_entry_t *ptep, pte;
 1620         vm_offset_t va, eva;
 1621         u_int loop, needed;
 1622 
 1623         needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0);
 1624         needed -= 1;
 1625         l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK);
 1626 
 1627         for (loop = 0; loop < needed; loop++, l1++) {
 1628                 /* Allocate a L1 page table */
 1629                 va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0,
 1630                     0xffffffff, L1_TABLE_SIZE, 0);
 1631 
 1632                 if (va == 0)
 1633                         panic("Cannot allocate L1 KVM");
 1634 
 1635                 eva = va + L1_TABLE_SIZE;
 1636                 pl1pt = (pd_entry_t *)va;
 1637 
 1638                 while (va < eva) {
 1639                                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 1640                                 ptep = &l2b->l2b_kva[l2pte_index(va)];
 1641                                 pte = *ptep;
 1642                                 pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
 1643                                 *ptep = pte;
 1644                                 PTE_SYNC(ptep);
 1645                                 cpu_tlb_flushD_SE(va);
 1646 
 1647                                 va += PAGE_SIZE;
 1648                 }
 1649                 pmap_init_l1(l1, pl1pt);
 1650         }
 1651 #ifdef DEBUG
 1652         printf("pmap_postinit: Allocated %d static L1 descriptor tables\n",
 1653             needed);
 1654 #endif
 1655 }
 1656 
 1657 /*
 1658  * This is used to stuff certain critical values into the PCB where they
 1659  * can be accessed quickly from cpu_switch() et al.
 1660  */
 1661 void
 1662 pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb)
 1663 {
 1664         struct l2_bucket *l2b;
 1665 
 1666         pcb->pcb_pagedir = pmap->pm_l1->l1_physaddr;
 1667         pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) |
 1668             (DOMAIN_CLIENT << (pmap->pm_domain * 2));
 1669 
 1670         if (vector_page < KERNBASE) {
 1671                 pcb->pcb_pl1vec = &pmap->pm_l1->l1_kva[L1_IDX(vector_page)];
 1672                 l2b = pmap_get_l2_bucket(pmap, vector_page);
 1673                 pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO |
 1674                     L1_C_DOM(pmap->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL);
 1675         } else
 1676                 pcb->pcb_pl1vec = NULL;
 1677 }
 1678 
 1679 void
 1680 pmap_activate(struct thread *td)
 1681 {
 1682         pmap_t pmap;
 1683         struct pcb *pcb;
 1684 
 1685         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 1686         pcb = td->td_pcb;
 1687 
 1688         critical_enter();
 1689         pmap_set_pcb_pagedir(pmap, pcb);
 1690 
 1691         if (td == curthread) {
 1692                 u_int cur_dacr, cur_ttb;
 1693 
 1694                 __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
 1695                 __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr));
 1696 
 1697                 cur_ttb &= ~(L1_TABLE_SIZE - 1);
 1698 
 1699                 if (cur_ttb == (u_int)pcb->pcb_pagedir &&
 1700                     cur_dacr == pcb->pcb_dacr) {
 1701                         /*
 1702                          * No need to switch address spaces.
 1703                          */
 1704                         critical_exit();
 1705                         return;
 1706                 }
 1707 
 1708 
 1709                 /*
 1710                  * We MUST, I repeat, MUST fix up the L1 entry corresponding
 1711                  * to 'vector_page' in the incoming L1 table before switching
 1712                  * to it otherwise subsequent interrupts/exceptions (including
 1713                  * domain faults!) will jump into hyperspace.
 1714                  */
 1715                 if (pcb->pcb_pl1vec) {
 1716                         *pcb->pcb_pl1vec = pcb->pcb_l1vec;
 1717                 }
 1718 
 1719                 cpu_domains(pcb->pcb_dacr);
 1720                 cpu_setttb(pcb->pcb_pagedir);
 1721         }
 1722         critical_exit();
 1723 }
 1724 
 1725 static int
 1726 pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va)
 1727 {
 1728         pd_entry_t *pdep, pde;
 1729         pt_entry_t *ptep, pte;
 1730         vm_offset_t pa;
 1731         int rv = 0;
 1732 
 1733         /*
 1734          * Make sure the descriptor itself has the correct cache mode
 1735          */
 1736         pdep = &kl1[L1_IDX(va)];
 1737         pde = *pdep;
 1738 
 1739         if (l1pte_section_p(pde)) {
 1740                 if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) {
 1741                         *pdep = (pde & ~L1_S_CACHE_MASK) |
 1742                             pte_l1_s_cache_mode_pt;
 1743                         PTE_SYNC(pdep);
 1744                         rv = 1;
 1745                 }
 1746         } else {
 1747                 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
 1748                 ptep = (pt_entry_t *)kernel_pt_lookup(pa);
 1749                 if (ptep == NULL)
 1750                         panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep);
 1751 
 1752                 ptep = &ptep[l2pte_index(va)];
 1753                 pte = *ptep;
 1754                 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
 1755                         *ptep = (pte & ~L2_S_CACHE_MASK) |
 1756                             pte_l2_s_cache_mode_pt;
 1757                         PTE_SYNC(ptep);
 1758                         rv = 1;
 1759                 }
 1760         }
 1761 
 1762         return (rv);
 1763 }
 1764 
 1765 static void
 1766 pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap,
 1767     pt_entry_t **ptep)
 1768 {
 1769         vm_offset_t va = *availp;
 1770         struct l2_bucket *l2b;
 1771 
 1772         if (ptep) {
 1773                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 1774                 if (l2b == NULL)
 1775                         panic("pmap_alloc_specials: no l2b for 0x%x", va);
 1776 
 1777                 *ptep = &l2b->l2b_kva[l2pte_index(va)];
 1778         }
 1779 
 1780         *vap = va;
 1781         *availp = va + (PAGE_SIZE * pages);
 1782 }
 1783 
 1784 /*
 1785  *      Bootstrap the system enough to run with virtual memory.
 1786  *
 1787  *      On the arm this is called after mapping has already been enabled
 1788  *      and just syncs the pmap module with what has already been done.
 1789  *      [We can't call it easily with mapping off since the kernel is not
 1790  *      mapped with PA == VA, hence we would have to relocate every address
 1791  *      from the linked base (virtual) address "KERNBASE" to the actual
 1792  *      (physical) address starting relative to 0]
 1793  */
 1794 #define PMAP_STATIC_L2_SIZE 16
 1795 
 1796 void
 1797 pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt)
 1798 {
 1799         static struct l1_ttable static_l1;
 1800         static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
 1801         struct l1_ttable *l1 = &static_l1;
 1802         struct l2_dtable *l2;
 1803         struct l2_bucket *l2b;
 1804         pd_entry_t pde;
 1805         pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
 1806         pt_entry_t *ptep;
 1807         vm_paddr_t pa;
 1808         vm_offset_t va;
 1809         vm_size_t size;
 1810         int l1idx, l2idx, l2next = 0;
 1811 
 1812         PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n",
 1813             firstaddr, vm_max_kernel_address));
 1814 
 1815         virtual_avail = firstaddr;
 1816         kernel_pmap->pm_l1 = l1;
 1817         kernel_l1pa = l1pt->pv_pa;
 1818 
 1819         /*
 1820          * Scan the L1 translation table created by initarm() and create
 1821          * the required metadata for all valid mappings found in it.
 1822          */
 1823         for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
 1824                 pde = kernel_l1pt[l1idx];
 1825 
 1826                 /*
 1827                  * We're only interested in Coarse mappings.
 1828                  * pmap_extract() can deal with section mappings without
 1829                  * recourse to checking L2 metadata.
 1830                  */
 1831                 if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
 1832                         continue;
 1833 
 1834                 /*
 1835                  * Lookup the KVA of this L2 descriptor table
 1836                  */
 1837                 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
 1838                 ptep = (pt_entry_t *)kernel_pt_lookup(pa);
 1839 
 1840                 if (ptep == NULL) {
 1841                         panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
 1842                             (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa);
 1843                 }
 1844 
 1845                 /*
 1846                  * Fetch the associated L2 metadata structure.
 1847                  * Allocate a new one if necessary.
 1848                  */
 1849                 if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
 1850                         if (l2next == PMAP_STATIC_L2_SIZE)
 1851                                 panic("pmap_bootstrap: out of static L2s");
 1852                         kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 =
 1853                             &static_l2[l2next++];
 1854                 }
 1855 
 1856                 /*
 1857                  * One more L1 slot tracked...
 1858                  */
 1859                 l2->l2_occupancy++;
 1860 
 1861                 /*
 1862                  * Fill in the details of the L2 descriptor in the
 1863                  * appropriate bucket.
 1864                  */
 1865                 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 1866                 l2b->l2b_kva = ptep;
 1867                 l2b->l2b_phys = pa;
 1868                 l2b->l2b_l1idx = l1idx;
 1869 
 1870                 /*
 1871                  * Establish an initial occupancy count for this descriptor
 1872                  */
 1873                 for (l2idx = 0;
 1874                     l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
 1875                     l2idx++) {
 1876                         if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) {
 1877                                 l2b->l2b_occupancy++;
 1878                         }
 1879                 }
 1880 
 1881                 /*
 1882                  * Make sure the descriptor itself has the correct cache mode.
 1883                  * If not, fix it, but whine about the problem. Port-meisters
 1884                  * should consider this a clue to fix up their initarm()
 1885                  * function. :)
 1886                  */
 1887                 if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) {
 1888                         printf("pmap_bootstrap: WARNING! wrong cache mode for "
 1889                             "L2 pte @ %p\n", ptep);
 1890                 }
 1891         }
 1892 
 1893 
 1894         /*
 1895          * Ensure the primary (kernel) L1 has the correct cache mode for
 1896          * a page table. Bitch if it is not correctly set.
 1897          */
 1898         for (va = (vm_offset_t)kernel_l1pt;
 1899             va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) {
 1900                 if (pmap_set_pt_cache_mode(kernel_l1pt, va))
 1901                         printf("pmap_bootstrap: WARNING! wrong cache mode for "
 1902                             "primary L1 @ 0x%x\n", va);
 1903         }
 1904 
 1905         cpu_dcache_wbinv_all();
 1906         cpu_l2cache_wbinv_all();
 1907         cpu_tlb_flushID();
 1908         cpu_cpwait();
 1909 
 1910         PMAP_LOCK_INIT(kernel_pmap);
 1911         CPU_FILL(&kernel_pmap->pm_active);
 1912         kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
 1913         TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 1914 
 1915         /*
 1916          * Initialize the global pv list lock.
 1917          */
 1918         rw_init(&pvh_global_lock, "pmap pv global");
 1919 
 1920         /*
 1921          * Reserve some special page table entries/VA space for temporary
 1922          * mapping of pages.
 1923          */
 1924 
 1925         pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
 1926         pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
 1927         pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
 1928         pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
 1929         size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) /
 1930             L1_S_SIZE;
 1931         pmap_alloc_specials(&virtual_avail,
 1932             round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
 1933             &pmap_kernel_l2ptp_kva, NULL);
 1934 
 1935         size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
 1936         pmap_alloc_specials(&virtual_avail,
 1937             round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
 1938             &pmap_kernel_l2dtable_kva, NULL);
 1939 
 1940         pmap_alloc_specials(&virtual_avail,
 1941             1, (vm_offset_t*)&_tmppt, NULL);
 1942         pmap_alloc_specials(&virtual_avail,
 1943             MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL);
 1944         SLIST_INIT(&l1_list);
 1945         TAILQ_INIT(&l1_lru_list);
 1946         mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF);
 1947         pmap_init_l1(l1, kernel_l1pt);
 1948         cpu_dcache_wbinv_all();
 1949         cpu_l2cache_wbinv_all();
 1950 
 1951         virtual_avail = round_page(virtual_avail);
 1952         virtual_end = vm_max_kernel_address;
 1953         kernel_vm_end = pmap_curmaxkvaddr;
 1954         arm_nocache_startaddr = vm_max_kernel_address;
 1955         mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
 1956 
 1957         pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
 1958 }
 1959 
 1960 /***************************************************
 1961  * Pmap allocation/deallocation routines.
 1962  ***************************************************/
 1963 
 1964 /*
 1965  * Release any resources held by the given physical map.
 1966  * Called when a pmap initialized by pmap_pinit is being released.
 1967  * Should only be called if the map contains no valid mappings.
 1968  */
 1969 void
 1970 pmap_release(pmap_t pmap)
 1971 {
 1972         struct pcb *pcb;
 1973 
 1974         cpu_idcache_wbinv_all();
 1975         cpu_l2cache_wbinv_all();
 1976         cpu_tlb_flushID();
 1977         cpu_cpwait();
 1978         if (vector_page < KERNBASE) {
 1979                 struct pcb *curpcb = PCPU_GET(curpcb);
 1980                 pcb = thread0.td_pcb;
 1981                 if (pmap_is_current(pmap)) {
 1982                         /*
 1983                          * Frob the L1 entry corresponding to the vector
 1984                          * page so that it contains the kernel pmap's domain
 1985                          * number. This will ensure pmap_remove() does not
 1986                          * pull the current vector page out from under us.
 1987                          */
 1988                         critical_enter();
 1989                         *pcb->pcb_pl1vec = pcb->pcb_l1vec;
 1990                         cpu_domains(pcb->pcb_dacr);
 1991                         cpu_setttb(pcb->pcb_pagedir);
 1992                         critical_exit();
 1993                 }
 1994                 pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE);
 1995                 /*
 1996                  * Make sure cpu_switch(), et al, DTRT. This is safe to do
 1997                  * since this process has no remaining mappings of its own.
 1998                  */
 1999                 curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
 2000                 curpcb->pcb_l1vec = pcb->pcb_l1vec;
 2001                 curpcb->pcb_dacr = pcb->pcb_dacr;
 2002                 curpcb->pcb_pagedir = pcb->pcb_pagedir;
 2003 
 2004         }
 2005         pmap_free_l1(pmap);
 2006 
 2007         dprintf("pmap_release()\n");
 2008 }
 2009 
 2010 
 2011 
 2012 /*
 2013  * Helper function for pmap_grow_l2_bucket()
 2014  */
 2015 static __inline int
 2016 pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap)
 2017 {
 2018         struct l2_bucket *l2b;
 2019         pt_entry_t *ptep;
 2020         vm_paddr_t pa;
 2021         struct vm_page *m;
 2022 
 2023         m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
 2024         if (m == NULL)
 2025                 return (1);
 2026         pa = VM_PAGE_TO_PHYS(m);
 2027 
 2028         if (pap)
 2029                 *pap = pa;
 2030 
 2031         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2032 
 2033         ptep = &l2b->l2b_kva[l2pte_index(va)];
 2034         *ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF;
 2035         pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0);
 2036         PTE_SYNC(ptep);
 2037 
 2038         return (0);
 2039 }
 2040 
 2041 /*
 2042  * This is the same as pmap_alloc_l2_bucket(), except that it is only
 2043  * used by pmap_growkernel().
 2044  */
 2045 static __inline struct l2_bucket *
 2046 pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va)
 2047 {
 2048         struct l2_dtable *l2;
 2049         struct l2_bucket *l2b;
 2050         struct l1_ttable *l1;
 2051         pd_entry_t *pl1pd;
 2052         u_short l1idx;
 2053         vm_offset_t nva;
 2054 
 2055         l1idx = L1_IDX(va);
 2056 
 2057         if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
 2058                 /*
 2059                  * No mapping at this address, as there is
 2060                  * no entry in the L1 table.
 2061                  * Need to allocate a new l2_dtable.
 2062                  */
 2063                 nva = pmap_kernel_l2dtable_kva;
 2064                 if ((nva & PAGE_MASK) == 0) {
 2065                         /*
 2066                          * Need to allocate a backing page
 2067                          */
 2068                         if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
 2069                                 return (NULL);
 2070                 }
 2071 
 2072                 l2 = (struct l2_dtable *)nva;
 2073                 nva += sizeof(struct l2_dtable);
 2074 
 2075                 if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva &
 2076                     PAGE_MASK)) {
 2077                         /*
 2078                          * The new l2_dtable straddles a page boundary.
 2079                          * Map in another page to cover it.
 2080                          */
 2081                         if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
 2082                                 return (NULL);
 2083                 }
 2084 
 2085                 pmap_kernel_l2dtable_kva = nva;
 2086 
 2087                 /*
 2088                  * Link it into the parent pmap
 2089                  */
 2090                 pmap->pm_l2[L2_IDX(l1idx)] = l2;
 2091                 memset(l2, 0, sizeof(*l2));
 2092         }
 2093 
 2094         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 2095 
 2096         /*
 2097          * Fetch pointer to the L2 page table associated with the address.
 2098          */
 2099         if (l2b->l2b_kva == NULL) {
 2100                 pt_entry_t *ptep;
 2101 
 2102                 /*
 2103                  * No L2 page table has been allocated. Chances are, this
 2104                  * is because we just allocated the l2_dtable, above.
 2105                  */
 2106                 nva = pmap_kernel_l2ptp_kva;
 2107                 ptep = (pt_entry_t *)nva;
 2108                 if ((nva & PAGE_MASK) == 0) {
 2109                         /*
 2110                          * Need to allocate a backing page
 2111                          */
 2112                         if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
 2113                             &pmap_kernel_l2ptp_phys))
 2114                                 return (NULL);
 2115                 }
 2116                 memset(ptep, 0, L2_TABLE_SIZE_REAL);
 2117                 l2->l2_occupancy++;
 2118                 l2b->l2b_kva = ptep;
 2119                 l2b->l2b_l1idx = l1idx;
 2120                 l2b->l2b_phys = pmap_kernel_l2ptp_phys;
 2121 
 2122                 pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
 2123                 pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
 2124         }
 2125 
 2126         /* Distribute new L1 entry to all other L1s */
 2127         SLIST_FOREACH(l1, &l1_list, l1_link) {
 2128                         pl1pd = &l1->l1_kva[L1_IDX(va)];
 2129                         *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) |
 2130                             L1_C_PROTO;
 2131                         PTE_SYNC(pl1pd);
 2132         }
 2133 
 2134         return (l2b);
 2135 }
 2136 
 2137 
 2138 /*
 2139  * grow the number of kernel page table entries, if needed
 2140  */
 2141 void
 2142 pmap_growkernel(vm_offset_t addr)
 2143 {
 2144         pmap_t kpmap = pmap_kernel();
 2145 
 2146         if (addr <= pmap_curmaxkvaddr)
 2147                 return;         /* we are OK */
 2148 
 2149         /*
 2150          * whoops!   we need to add kernel PTPs
 2151          */
 2152 
 2153         /* Map 1MB at a time */
 2154         for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE)
 2155                 pmap_grow_l2_bucket(kpmap, pmap_curmaxkvaddr);
 2156 
 2157         /*
 2158          * flush out the cache, expensive but growkernel will happen so
 2159          * rarely
 2160          */
 2161         cpu_dcache_wbinv_all();
 2162         cpu_l2cache_wbinv_all();
 2163         cpu_tlb_flushD();
 2164         cpu_cpwait();
 2165         kernel_vm_end = pmap_curmaxkvaddr;
 2166 }
 2167 
 2168 /*
 2169  * Returns TRUE if the given page is mapped individually or as part of
 2170  * a 1MB section.  Otherwise, returns FALSE.
 2171  */
 2172 boolean_t
 2173 pmap_page_is_mapped(vm_page_t m)
 2174 {
 2175         boolean_t rv;
 2176 
 2177         if ((m->oflags & VPO_UNMANAGED) != 0)
 2178                 return (FALSE);
 2179         rw_wlock(&pvh_global_lock);
 2180         rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 2181             ((m->flags & PG_FICTITIOUS) == 0 &&
 2182             !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 2183         rw_wunlock(&pvh_global_lock);
 2184         return (rv);
 2185 }
 2186 
 2187 /*
 2188  * Remove all pages from specified address space
 2189  * this aids process exit speeds.  Also, this code
 2190  * is special cased for current process only, but
 2191  * can have the more generic (and slightly slower)
 2192  * mode enabled.  This is much faster than pmap_remove
 2193  * in the case of running down an entire address space.
 2194  */
 2195 void
 2196 pmap_remove_pages(pmap_t pmap)
 2197 {
 2198         struct pv_entry *pv;
 2199         struct l2_bucket *l2b = NULL;
 2200         struct pv_chunk *pc, *npc;
 2201         struct md_page *pvh;
 2202         pd_entry_t *pl1pd, l1pd;
 2203         pt_entry_t *ptep;
 2204         vm_page_t m, mt;
 2205         vm_offset_t va;
 2206         uint32_t inuse, bitmask;
 2207         int allfree, bit, field, idx;
 2208  
 2209         rw_wlock(&pvh_global_lock);
 2210         PMAP_LOCK(pmap);
 2211 
 2212         TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 2213                 allfree = 1;
 2214                 for (field = 0; field < _NPCM; field++) {
 2215                         inuse = ~pc->pc_map[field] & pc_freemask[field];
 2216                         while (inuse != 0) {
 2217                                 bit = ffs(inuse) - 1;
 2218                                 bitmask = 1ul << bit;
 2219                                 idx = field * sizeof(inuse) * NBBY + bit;
 2220                                 pv = &pc->pc_pventry[idx];
 2221                                 va = pv->pv_va;
 2222                                 inuse &= ~bitmask;
 2223                                 if (pv->pv_flags & PVF_WIRED) {
 2224                                         /* Cannot remove wired pages now. */
 2225                                         allfree = 0;
 2226                                         continue;
 2227                                 }
 2228                                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 2229                                 l1pd = *pl1pd;
 2230                                 l2b = pmap_get_l2_bucket(pmap, va);
 2231                                 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 2232                                         pvh = pa_to_pvh(l1pd & L1_S_FRAME);
 2233                                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 2234                                         if (TAILQ_EMPTY(&pvh->pv_list)) {
 2235                                                 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME);
 2236                                                 KASSERT((vm_offset_t)m >= KERNBASE,
 2237                                                     ("Trying to access non-existent page "
 2238                                                      "va %x l1pd %x", trunc_1mpage(va), l1pd));
 2239                                                 for (mt = m; mt < &m[L2_PTE_NUM_TOTAL]; mt++) {
 2240                                                         if (TAILQ_EMPTY(&mt->md.pv_list))
 2241                                                                 vm_page_aflag_clear(mt, PGA_WRITEABLE);
 2242                                                 }
 2243                                         }
 2244                                         if (l2b != NULL) {
 2245                                                 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL,
 2246                                                     ("pmap_remove_pages: l2_bucket occupancy error"));
 2247                                                 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL);
 2248                                         }
 2249                                         pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL;
 2250                                         *pl1pd = 0;
 2251                                         PTE_SYNC(pl1pd);
 2252                                 } else {
 2253                                         KASSERT(l2b != NULL,
 2254                                             ("No L2 bucket in pmap_remove_pages"));
 2255                                         ptep = &l2b->l2b_kva[l2pte_index(va)];
 2256                                         m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep));
 2257                                         KASSERT((vm_offset_t)m >= KERNBASE,
 2258                                             ("Trying to access non-existent page "
 2259                                              "va %x pte %x", va, *ptep));
 2260                                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2261                                         if (TAILQ_EMPTY(&m->md.pv_list) &&
 2262                                             (m->flags & PG_FICTITIOUS) == 0) {
 2263                                                 pvh = pa_to_pvh(l2pte_pa(*ptep));
 2264                                                 if (TAILQ_EMPTY(&pvh->pv_list))
 2265                                                         vm_page_aflag_clear(m, PGA_WRITEABLE);
 2266                                         }
 2267                                         *ptep = 0;
 2268                                         PTE_SYNC(ptep);
 2269                                         pmap_free_l2_bucket(pmap, l2b, 1);
 2270                                         pmap->pm_stats.resident_count--;
 2271                                 }
 2272 
 2273                                 /* Mark free */
 2274                                 PV_STAT(pv_entry_frees++);
 2275                                 PV_STAT(pv_entry_spare++);
 2276                                 pv_entry_count--;
 2277                                 pc->pc_map[field] |= bitmask;
 2278                         }
 2279                 }
 2280                 if (allfree) {
 2281                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2282                         pmap_free_pv_chunk(pc);
 2283                 }
 2284 
 2285         }
 2286 
 2287         rw_wunlock(&pvh_global_lock);
 2288         cpu_tlb_flushID();
 2289         cpu_cpwait();
 2290         PMAP_UNLOCK(pmap);
 2291 }
 2292 
 2293 
 2294 /***************************************************
 2295  * Low level mapping routines.....
 2296  ***************************************************/
 2297 
 2298 #ifdef ARM_HAVE_SUPERSECTIONS
 2299 /* Map a super section into the KVA. */
 2300 
 2301 void
 2302 pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags)
 2303 {
 2304         pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) |
 2305             (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL,
 2306             VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) |
 2307             L1_S_DOM(PMAP_DOMAIN_KERNEL);
 2308         struct l1_ttable *l1;
 2309         vm_offset_t va0, va_end;
 2310 
 2311         KASSERT(((va | pa) & L1_SUP_OFFSET) == 0,
 2312             ("Not a valid super section mapping"));
 2313         if (flags & SECTION_CACHE)
 2314                 pd |= pte_l1_s_cache_mode;
 2315         else if (flags & SECTION_PT)
 2316                 pd |= pte_l1_s_cache_mode_pt;
 2317 
 2318         va0 = va & L1_SUP_FRAME;
 2319         va_end = va + L1_SUP_SIZE;
 2320         SLIST_FOREACH(l1, &l1_list, l1_link) {
 2321                 va = va0;
 2322                 for (; va < va_end; va += L1_S_SIZE) {
 2323                         l1->l1_kva[L1_IDX(va)] = pd;
 2324                         PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
 2325                 }
 2326         }
 2327 }
 2328 #endif
 2329 
 2330 /* Map a section into the KVA. */
 2331 
 2332 void
 2333 pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags)
 2334 {
 2335         pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL,
 2336             VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | L1_S_REF |
 2337             L1_S_DOM(PMAP_DOMAIN_KERNEL);
 2338         struct l1_ttable *l1;
 2339 
 2340         KASSERT(((va | pa) & L1_S_OFFSET) == 0,
 2341             ("Not a valid section mapping"));
 2342         if (flags & SECTION_CACHE)
 2343                 pd |= pte_l1_s_cache_mode;
 2344         else if (flags & SECTION_PT)
 2345                 pd |= pte_l1_s_cache_mode_pt;
 2346 
 2347         SLIST_FOREACH(l1, &l1_list, l1_link) {
 2348                 l1->l1_kva[L1_IDX(va)] = pd;
 2349                 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
 2350         }
 2351 }
 2352 
 2353 /*
 2354  * Make a temporary mapping for a physical address.  This is only intended
 2355  * to be used for panic dumps.
 2356  */
 2357 void *
 2358 pmap_kenter_temp(vm_paddr_t pa, int i)
 2359 {
 2360         vm_offset_t va;
 2361 
 2362         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2363         pmap_kenter(va, pa);
 2364         return ((void *)crashdumpmap);
 2365 }
 2366 
 2367 /*
 2368  * add a wired page to the kva
 2369  * note that in order for the mapping to take effect -- you
 2370  * should do a invltlb after doing the pmap_kenter...
 2371  */
 2372 static PMAP_INLINE void
 2373 pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags)
 2374 {
 2375         struct l2_bucket *l2b;
 2376         pt_entry_t *ptep;
 2377         pt_entry_t opte;
 2378 
 2379         PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n",
 2380             (uint32_t) va, (uint32_t) pa));
 2381 
 2382 
 2383         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2384         if (l2b == NULL)
 2385                 l2b = pmap_grow_l2_bucket(pmap_kernel(), va);
 2386         KASSERT(l2b != NULL, ("No L2 Bucket"));
 2387 
 2388         ptep = &l2b->l2b_kva[l2pte_index(va)];
 2389         opte = *ptep;
 2390         if (l2pte_valid(opte)) {
 2391                 cpu_tlb_flushD_SE(va);
 2392                 cpu_cpwait();
 2393         } else {
 2394                 if (opte == 0)
 2395                         l2b->l2b_occupancy++;
 2396         }
 2397 
 2398         if (flags & KENTER_CACHE) {
 2399                 *ptep = L2_S_PROTO | pa | pte_l2_s_cache_mode | L2_S_REF;
 2400                 pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE,
 2401                     flags & KENTER_USER);
 2402         } else {
 2403                 *ptep = L2_S_PROTO | pa | L2_S_REF;
 2404                 pmap_set_prot(ptep, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE,
 2405                     0);
 2406         }
 2407 
 2408         PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
 2409             (uint32_t) ptep, opte, *ptep));
 2410         PTE_SYNC(ptep);
 2411         cpu_cpwait();
 2412 }
 2413 
 2414 void
 2415 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 2416 {
 2417         pmap_kenter_internal(va, pa, KENTER_CACHE);
 2418 }
 2419 
 2420 void
 2421 pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa)
 2422 {
 2423 
 2424         pmap_kenter_internal(va, pa, 0);
 2425 }
 2426 
 2427 void
 2428 pmap_kenter_user(vm_offset_t va, vm_paddr_t pa)
 2429 {
 2430 
 2431         pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER);
 2432         /*
 2433          * Call pmap_fault_fixup now, to make sure we'll have no exception
 2434          * at the first use of the new address, or bad things will happen,
 2435          * as we use one of these addresses in the exception handlers.
 2436          */
 2437         pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1);
 2438 }
 2439 
 2440 vm_paddr_t
 2441 pmap_kextract(vm_offset_t va)
 2442 {
 2443 
 2444         return (pmap_extract_locked(kernel_pmap, va));
 2445 }
 2446 
 2447 /*
 2448  * remove a page from the kernel pagetables
 2449  */
 2450 void
 2451 pmap_kremove(vm_offset_t va)
 2452 {
 2453         struct l2_bucket *l2b;
 2454         pt_entry_t *ptep, opte;
 2455 
 2456         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2457         if (!l2b)
 2458                 return;
 2459         KASSERT(l2b != NULL, ("No L2 Bucket"));
 2460         ptep = &l2b->l2b_kva[l2pte_index(va)];
 2461         opte = *ptep;
 2462         if (l2pte_valid(opte)) {
 2463                 va = va & ~PAGE_MASK;
 2464                 cpu_tlb_flushD_SE(va);
 2465                 cpu_cpwait();
 2466                 *ptep = 0;
 2467                 PTE_SYNC(ptep);
 2468         }
 2469 }
 2470 
 2471 
 2472 /*
 2473  *      Used to map a range of physical addresses into kernel
 2474  *      virtual address space.
 2475  *
 2476  *      The value passed in '*virt' is a suggested virtual address for
 2477  *      the mapping. Architectures which can support a direct-mapped
 2478  *      physical to virtual region can return the appropriate address
 2479  *      within that region, leaving '*virt' unchanged. Other
 2480  *      architectures should map the pages starting at '*virt' and
 2481  *      update '*virt' with the first usable address after the mapped
 2482  *      region.
 2483  */
 2484 vm_offset_t
 2485 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
 2486 {
 2487         vm_offset_t sva = *virt;
 2488         vm_offset_t va = sva;
 2489 
 2490         PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, "
 2491             "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end,
 2492             prot));
 2493 
 2494         while (start < end) {
 2495                 pmap_kenter(va, start);
 2496                 va += PAGE_SIZE;
 2497                 start += PAGE_SIZE;
 2498         }
 2499         *virt = va;
 2500         return (sva);
 2501 }
 2502 
 2503 /*
 2504  * Add a list of wired pages to the kva
 2505  * this routine is only used for temporary
 2506  * kernel mappings that do not need to have
 2507  * page modification or references recorded.
 2508  * Note that old mappings are simply written
 2509  * over.  The page *must* be wired.
 2510  */
 2511 void
 2512 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
 2513 {
 2514         int i;
 2515 
 2516         for (i = 0; i < count; i++) {
 2517                 pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]),
 2518                     KENTER_CACHE);
 2519                 va += PAGE_SIZE;
 2520         }
 2521 }
 2522 
 2523 
 2524 /*
 2525  * this routine jerks page mappings from the
 2526  * kernel -- it is meant only for temporary mappings.
 2527  */
 2528 void
 2529 pmap_qremove(vm_offset_t va, int count)
 2530 {
 2531         int i;
 2532 
 2533         for (i = 0; i < count; i++) {
 2534                 if (vtophys(va))
 2535                         pmap_kremove(va);
 2536 
 2537                 va += PAGE_SIZE;
 2538         }
 2539 }
 2540 
 2541 
 2542 /*
 2543  * pmap_object_init_pt preloads the ptes for a given object
 2544  * into the specified pmap.  This eliminates the blast of soft
 2545  * faults on process startup and immediately after an mmap.
 2546  */
 2547 void
 2548 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 2549     vm_pindex_t pindex, vm_size_t size)
 2550 {
 2551 
 2552         VM_OBJECT_ASSERT_WLOCKED(object);
 2553         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 2554             ("pmap_object_init_pt: non-device object"));
 2555 }
 2556 
 2557 
 2558 /*
 2559  *      pmap_is_prefaultable:
 2560  *
 2561  *      Return whether or not the specified virtual address is elgible
 2562  *      for prefault.
 2563  */
 2564 boolean_t
 2565 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 2566 {
 2567         pd_entry_t *pdep;
 2568         pt_entry_t *ptep;
 2569 
 2570         if (!pmap_get_pde_pte(pmap, addr, &pdep, &ptep))
 2571                 return (FALSE);
 2572         KASSERT((pdep != NULL && (l1pte_section_p(*pdep) || ptep != NULL)),
 2573             ("Valid mapping but no pte ?"));
 2574         if (*pdep != 0 && !l1pte_section_p(*pdep))
 2575                 if (*ptep == 0)
 2576                         return (TRUE);
 2577         return (FALSE);
 2578 }
 2579 
 2580 /*
 2581  * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
 2582  * Returns TRUE if the mapping exists, else FALSE.
 2583  *
 2584  * NOTE: This function is only used by a couple of arm-specific modules.
 2585  * It is not safe to take any pmap locks here, since we could be right
 2586  * in the middle of debugging the pmap anyway...
 2587  *
 2588  * It is possible for this routine to return FALSE even though a valid
 2589  * mapping does exist. This is because we don't lock, so the metadata
 2590  * state may be inconsistent.
 2591  *
 2592  * NOTE: We can return a NULL *ptp in the case where the L1 pde is
 2593  * a "section" mapping.
 2594  */
 2595 boolean_t
 2596 pmap_get_pde_pte(pmap_t pmap, vm_offset_t va, pd_entry_t **pdp,
 2597     pt_entry_t **ptp)
 2598 {
 2599         struct l2_dtable *l2;
 2600         pd_entry_t *pl1pd, l1pd;
 2601         pt_entry_t *ptep;
 2602         u_short l1idx;
 2603 
 2604         if (pmap->pm_l1 == NULL)
 2605                 return (FALSE);
 2606 
 2607         l1idx = L1_IDX(va);
 2608         *pdp = pl1pd = &pmap->pm_l1->l1_kva[l1idx];
 2609         l1pd = *pl1pd;
 2610 
 2611         if (l1pte_section_p(l1pd)) {
 2612                 *ptp = NULL;
 2613                 return (TRUE);
 2614         }
 2615 
 2616         if (pmap->pm_l2 == NULL)
 2617                 return (FALSE);
 2618 
 2619         l2 = pmap->pm_l2[L2_IDX(l1idx)];
 2620 
 2621         if (l2 == NULL ||
 2622             (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
 2623                 return (FALSE);
 2624         }
 2625 
 2626         *ptp = &ptep[l2pte_index(va)];
 2627         return (TRUE);
 2628 }
 2629 
 2630 /*
 2631  *      Routine:        pmap_remove_all
 2632  *      Function:
 2633  *              Removes this physical page from
 2634  *              all physical maps in which it resides.
 2635  *              Reflects back modify bits to the pager.
 2636  *
 2637  *      Notes:
 2638  *              Original versions of this routine were very
 2639  *              inefficient because they iteratively called
 2640  *              pmap_remove (slow...)
 2641  */
 2642 void
 2643 pmap_remove_all(vm_page_t m)
 2644 {
 2645         struct md_page *pvh;
 2646         pv_entry_t pv;
 2647         pmap_t pmap;
 2648         pt_entry_t *ptep;
 2649         struct l2_bucket *l2b;
 2650         boolean_t flush = FALSE;
 2651         pmap_t curpmap;
 2652         u_int is_exec = 0;
 2653 
 2654         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2655             ("pmap_remove_all: page %p is not managed", m));
 2656         rw_wlock(&pvh_global_lock);
 2657         if ((m->flags & PG_FICTITIOUS) != 0)
 2658                 goto small_mappings;
 2659         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2660         while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 2661                 pmap = PV_PMAP(pv);
 2662                 PMAP_LOCK(pmap);
 2663                 pd_entry_t *pl1pd;
 2664                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)];
 2665                 KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO,
 2666                     ("pmap_remove_all: valid section mapping expected"));
 2667                 (void)pmap_demote_section(pmap, pv->pv_va);
 2668                 PMAP_UNLOCK(pmap);
 2669         }
 2670 small_mappings:
 2671         curpmap = vmspace_pmap(curproc->p_vmspace);
 2672         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2673                 pmap = PV_PMAP(pv);
 2674                 if (flush == FALSE && (pmap == curpmap ||
 2675                     pmap == pmap_kernel()))
 2676                         flush = TRUE;
 2677 
 2678                 PMAP_LOCK(pmap);
 2679                 l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
 2680                 KASSERT(l2b != NULL, ("No l2 bucket"));
 2681                 ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
 2682                 is_exec |= PTE_BEEN_EXECD(*ptep);
 2683                 *ptep = 0;
 2684                 if (pmap_is_current(pmap))
 2685                         PTE_SYNC(ptep);
 2686                 pmap_free_l2_bucket(pmap, l2b, 1);
 2687                 pmap->pm_stats.resident_count--;
 2688                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 2689                 if (pv->pv_flags & PVF_WIRED)
 2690                         pmap->pm_stats.wired_count--;
 2691                 pmap_free_pv_entry(pmap, pv);
 2692                 PMAP_UNLOCK(pmap);
 2693         }
 2694 
 2695         if (flush) {
 2696                 if (is_exec)
 2697                         cpu_tlb_flushID();
 2698                 else
 2699                         cpu_tlb_flushD();
 2700         }
 2701         vm_page_aflag_clear(m, PGA_WRITEABLE);
 2702         rw_wunlock(&pvh_global_lock);
 2703 }
 2704 
 2705 int
 2706 pmap_change_attr(vm_offset_t sva, vm_size_t len, int mode)
 2707 {
 2708         vm_offset_t base, offset, tmpva;
 2709         vm_size_t size;
 2710         struct l2_bucket *l2b;
 2711         pt_entry_t *ptep, pte;
 2712         vm_offset_t next_bucket;
 2713 
 2714         PMAP_LOCK(kernel_pmap);
 2715 
 2716         base = trunc_page(sva);
 2717         offset = sva & PAGE_MASK;
 2718         size = roundup(offset + len, PAGE_SIZE);
 2719 
 2720 #ifdef checkit
 2721         /*
 2722          * Only supported on kernel virtual addresses, including the direct
 2723          * map but excluding the recursive map.
 2724          */
 2725         if (base < DMAP_MIN_ADDRESS) {
 2726                 PMAP_UNLOCK(kernel_pmap);
 2727                 return (EINVAL);
 2728         }
 2729 #endif
 2730         for (tmpva = base; tmpva < base + size; ) {
 2731                 next_bucket = L2_NEXT_BUCKET(tmpva);
 2732                 if (next_bucket > base + size)
 2733                         next_bucket = base + size;
 2734 
 2735                 l2b = pmap_get_l2_bucket(kernel_pmap, tmpva);
 2736                 if (l2b == NULL) {
 2737                         tmpva = next_bucket;
 2738                         continue;
 2739                 }
 2740 
 2741                 ptep = &l2b->l2b_kva[l2pte_index(tmpva)];
 2742 
 2743                 if (*ptep == 0) {
 2744                         PMAP_UNLOCK(kernel_pmap);
 2745                         return(EINVAL);
 2746                 }
 2747 
 2748                 pte = *ptep &~ L2_S_CACHE_MASK;
 2749                 cpu_idcache_wbinv_range(tmpva, PAGE_SIZE);
 2750                 pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE);
 2751                 *ptep = pte;
 2752                 cpu_tlb_flushID_SE(tmpva);
 2753 
 2754                 dprintf("%s: for va:%x ptep:%x pte:%x\n",
 2755                     __func__, tmpva, (uint32_t)ptep, pte);
 2756                 tmpva += PAGE_SIZE;
 2757         }
 2758 
 2759         PMAP_UNLOCK(kernel_pmap);
 2760 
 2761         return (0);
 2762 }
 2763 
 2764 /*
 2765  *      Set the physical protection on the
 2766  *      specified range of this map as requested.
 2767  */
 2768 void
 2769 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 2770 {
 2771         struct l2_bucket *l2b;
 2772         struct md_page *pvh;
 2773         struct pv_entry *pve;
 2774         pd_entry_t *pl1pd, l1pd;
 2775         pt_entry_t *ptep, pte;
 2776         vm_offset_t next_bucket;
 2777         u_int is_exec, is_refd;
 2778         int flush;
 2779 
 2780         if ((prot & VM_PROT_READ) == 0) {
 2781                 pmap_remove(pmap, sva, eva);
 2782                 return;
 2783         }
 2784 
 2785         if (prot & VM_PROT_WRITE) {
 2786                 /*
 2787                  * If this is a read->write transition, just ignore it and let
 2788                  * vm_fault() take care of it later.
 2789                  */
 2790                 return;
 2791         }
 2792 
 2793         rw_wlock(&pvh_global_lock);
 2794         PMAP_LOCK(pmap);
 2795 
 2796         /*
 2797          * OK, at this point, we know we're doing write-protect operation.
 2798          * If the pmap is active, write-back the range.
 2799          */
 2800 
 2801         flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1;
 2802         is_exec = is_refd = 0;
 2803 
 2804         while (sva < eva) {
 2805                 next_bucket = L2_NEXT_BUCKET(sva);
 2806                 /*
 2807                  * Check for large page.
 2808                  */
 2809                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)];
 2810                 l1pd = *pl1pd;
 2811                 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 2812                         KASSERT(pmap != pmap_kernel(),
 2813                             ("pmap_protect: trying to modify "
 2814                             "kernel section protections"));
 2815                         /*
 2816                          * Are we protecting the entire large page? If not,
 2817                          * demote the mapping and fall through.
 2818                          */
 2819                         if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) &&
 2820                             eva >= L2_NEXT_BUCKET(sva)) {
 2821                                 l1pd &= ~(L1_S_PROT_MASK | L1_S_XN);
 2822                                 if (!(prot & VM_PROT_EXECUTE))
 2823                                         *pl1pd |= L1_S_XN;
 2824                                 /*
 2825                                  * At this point we are always setting
 2826                                  * write-protect bit.
 2827                                  */
 2828                                 l1pd |= L1_S_APX;
 2829                                 /* All managed superpages are user pages. */
 2830                                 l1pd |= L1_S_PROT_U;
 2831                                 *pl1pd = l1pd;
 2832                                 PTE_SYNC(pl1pd);
 2833                                 pvh = pa_to_pvh(l1pd & L1_S_FRAME);
 2834                                 pve = pmap_find_pv(pvh, pmap,
 2835                                     trunc_1mpage(sva));
 2836                                 pve->pv_flags &= ~PVF_WRITE;
 2837                                 sva = next_bucket;
 2838                                 continue;
 2839                         } else if (!pmap_demote_section(pmap, sva)) {
 2840                                 /* The large page mapping was destroyed. */
 2841                                 sva = next_bucket;
 2842                                 continue;
 2843                         }
 2844                 }
 2845                 if (next_bucket > eva)
 2846                         next_bucket = eva;
 2847                 l2b = pmap_get_l2_bucket(pmap, sva);
 2848                 if (l2b == NULL) {
 2849                         sva = next_bucket;
 2850                         continue;
 2851                 }
 2852 
 2853                 ptep = &l2b->l2b_kva[l2pte_index(sva)];
 2854 
 2855                 while (sva < next_bucket) {
 2856                         if ((pte = *ptep) != 0 && L2_S_WRITABLE(pte)) {
 2857                                 struct vm_page *m;
 2858 
 2859                                 m = PHYS_TO_VM_PAGE(l2pte_pa(pte));
 2860                                 pmap_set_prot(ptep, prot,
 2861                                     !(pmap == pmap_kernel()));
 2862                                 PTE_SYNC(ptep);
 2863 
 2864                                 pmap_modify_pv(m, pmap, sva, PVF_WRITE, 0);
 2865 
 2866                                 if (flush >= 0) {
 2867                                         flush++;
 2868                                         is_exec |= PTE_BEEN_EXECD(pte);
 2869                                         is_refd |= PTE_BEEN_REFD(pte);
 2870                                 } else {
 2871                                         if (PTE_BEEN_EXECD(pte))
 2872                                                 cpu_tlb_flushID_SE(sva);
 2873                                         else if (PTE_BEEN_REFD(pte))
 2874                                                 cpu_tlb_flushD_SE(sva);
 2875                                 }
 2876                         }
 2877 
 2878                         sva += PAGE_SIZE;
 2879                         ptep++;
 2880                 }
 2881         }
 2882 
 2883 
 2884         if (flush) {
 2885                 if (is_exec)
 2886                         cpu_tlb_flushID();
 2887                 else
 2888                 if (is_refd)
 2889                         cpu_tlb_flushD();
 2890         }
 2891         rw_wunlock(&pvh_global_lock);
 2892 
 2893         PMAP_UNLOCK(pmap);
 2894 }
 2895 
 2896 
 2897 /*
 2898  *      Insert the given physical page (p) at
 2899  *      the specified virtual address (v) in the
 2900  *      target physical map with the protection requested.
 2901  *
 2902  *      If specified, the page will be wired down, meaning
 2903  *      that the related pte can not be reclaimed.
 2904  *
 2905  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 2906  *      or lose information.  That is, this routine must actually
 2907  *      insert this page into the given map NOW.
 2908  */
 2909 
 2910 void
 2911 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 2912     vm_prot_t prot, boolean_t wired)
 2913 {
 2914 
 2915         rw_wlock(&pvh_global_lock);
 2916         PMAP_LOCK(pmap);
 2917         pmap_enter_locked(pmap, va, access, m, prot, wired, M_WAITOK);
 2918         PMAP_UNLOCK(pmap);
 2919         rw_wunlock(&pvh_global_lock);
 2920 }
 2921 
 2922 /*
 2923  *      The pvh global and pmap locks must be held.
 2924  */
 2925 static void
 2926 pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 2927     vm_prot_t prot, boolean_t wired, int flags)
 2928 {
 2929         struct l2_bucket *l2b = NULL;
 2930         struct vm_page *om;
 2931         struct pv_entry *pve = NULL;
 2932         pd_entry_t *pl1pd, l1pd;
 2933         pt_entry_t *ptep, npte, opte;
 2934         u_int nflags;
 2935         u_int is_exec, is_refd;
 2936         vm_paddr_t pa;
 2937         u_char user;
 2938 
 2939         PMAP_ASSERT_LOCKED(pmap);
 2940         rw_assert(&pvh_global_lock, RA_WLOCKED);
 2941         if (va == vector_page) {
 2942                 pa = systempage.pv_pa;
 2943                 m = NULL;
 2944         } else {
 2945                 KASSERT((m->oflags & VPO_UNMANAGED) != 0 ||
 2946                     vm_page_xbusied(m) || (flags & M_NOWAIT) != 0,
 2947                     ("pmap_enter_locked: page %p is not busy", m));
 2948                 pa = VM_PAGE_TO_PHYS(m);
 2949         }
 2950 
 2951         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 2952         if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO)
 2953                 panic("pmap_enter_locked: attempt pmap_enter_on 1MB page");
 2954 
 2955         user = 0;
 2956         /*
 2957          * Make sure userland mappings get the right permissions
 2958          */
 2959         if (pmap != pmap_kernel() && va != vector_page)
 2960                 user = 1;
 2961 
 2962         nflags = 0;
 2963 
 2964         if (prot & VM_PROT_WRITE)
 2965                 nflags |= PVF_WRITE;
 2966         if (wired)
 2967                 nflags |= PVF_WIRED;
 2968 
 2969         PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, "
 2970             "prot = %x, wired = %x\n", (uint32_t) pmap, va, (uint32_t) m,
 2971             prot, wired));
 2972 
 2973         if (pmap == pmap_kernel()) {
 2974                 l2b = pmap_get_l2_bucket(pmap, va);
 2975                 if (l2b == NULL)
 2976                         l2b = pmap_grow_l2_bucket(pmap, va);
 2977         } else {
 2978 do_l2b_alloc:
 2979                 l2b = pmap_alloc_l2_bucket(pmap, va);
 2980                 if (l2b == NULL) {
 2981                         if (flags & M_WAITOK) {
 2982                                 PMAP_UNLOCK(pmap);
 2983                                 rw_wunlock(&pvh_global_lock);
 2984                                 VM_WAIT;
 2985                                 rw_wlock(&pvh_global_lock);
 2986                                 PMAP_LOCK(pmap);
 2987                                 goto do_l2b_alloc;
 2988                         }
 2989                         return;
 2990                 }
 2991         }
 2992 
 2993         ptep = &l2b->l2b_kva[l2pte_index(va)];
 2994 
 2995         opte = *ptep;
 2996         npte = pa;
 2997         is_exec = is_refd = 0;
 2998 
 2999         if (opte) {
 3000                 if (l2pte_pa(opte) == pa) {
 3001                         /*
 3002                          * We're changing the attrs of an existing mapping.
 3003                          */
 3004                         if (m != NULL)
 3005                                 pmap_modify_pv(m, pmap, va,
 3006                                     PVF_WRITE | PVF_WIRED, nflags);
 3007                         is_exec |= PTE_BEEN_EXECD(opte);
 3008                         is_refd |= PTE_BEEN_REFD(opte);
 3009                         goto validate;
 3010                 }
 3011                 if ((om = PHYS_TO_VM_PAGE(l2pte_pa(opte)))) {
 3012                         /*
 3013                          * Replacing an existing mapping with a new one.
 3014                          * It is part of our managed memory so we
 3015                          * must remove it from the PV list
 3016                          */
 3017                         if ((pve = pmap_remove_pv(om, pmap, va))) {
 3018                                 is_exec |= PTE_BEEN_EXECD(opte);
 3019                                 is_refd |= PTE_BEEN_REFD(opte);
 3020                 
 3021                                 if (m && ((m->oflags & VPO_UNMANAGED)))
 3022                                         pmap_free_pv_entry(pmap, pve);
 3023                         }
 3024                 }
 3025 
 3026         } else {
 3027                 /*
 3028                  * Keep the stats up to date
 3029                  */
 3030                 l2b->l2b_occupancy++;
 3031                 pmap->pm_stats.resident_count++;
 3032         }
 3033 
 3034         /*
 3035          * Enter on the PV list if part of our managed memory.
 3036          */
 3037         if ((m && !(m->oflags & VPO_UNMANAGED))) {
 3038                 if ((!pve) && (pve = pmap_get_pv_entry(pmap, FALSE)) == NULL)
 3039                         panic("pmap_enter: no pv entries");
 3040 
 3041                 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 3042                 ("pmap_enter: managed mapping within the clean submap"));
 3043                 KASSERT(pve != NULL, ("No pv"));
 3044                 pmap_enter_pv(m, pve, pmap, va, nflags);
 3045         }
 3046 
 3047 validate:
 3048         /* Make the new PTE valid */
 3049         npte |= L2_S_PROTO;
 3050 #ifdef SMP
 3051         npte |= L2_SHARED;
 3052 #endif
 3053         /* Set defaults first - kernel read access */
 3054         npte |= L2_APX;
 3055         npte |= L2_S_PROT_R;
 3056         /* Set "referenced" flag */
 3057         npte |= L2_S_REF;
 3058 
 3059         /* Now tune APs as desired */
 3060         if (user)
 3061                 npte |= L2_S_PROT_U;
 3062         /*
 3063          * If this is not a vector_page
 3064          * then continue setting mapping parameters
 3065          */
 3066         if (m != NULL) {
 3067                 if (prot & (VM_PROT_ALL)) {
 3068                         if ((m->oflags & VPO_UNMANAGED) == 0)
 3069                                 vm_page_aflag_set(m, PGA_REFERENCED);
 3070                 } else {
 3071                         /*
 3072                          * Need to do page referenced emulation.
 3073                          */
 3074                         npte &= ~L2_S_REF;
 3075                 }
 3076 
 3077                 if (prot & VM_PROT_WRITE) {
 3078                         /*
 3079                          * Enable write permission if the access type
 3080                          * indicates write intention. Emulate modified
 3081                          * bit otherwise.
 3082                          */
 3083                         if ((access & VM_PROT_WRITE) != 0)
 3084                                 npte &= ~(L2_APX);
 3085 
 3086                         if ((m->oflags & VPO_UNMANAGED) == 0) {
 3087                                 vm_page_aflag_set(m, PGA_WRITEABLE);
 3088                                 /*
 3089                                  * The access type and permissions indicate 
 3090                                  * that the page will be written as soon as
 3091                                  * returned from fault service.
 3092                                  * Mark it dirty from the outset.
 3093                                  */
 3094                                 if ((access & VM_PROT_WRITE) != 0)
 3095                                         vm_page_dirty(m);
 3096                         }
 3097                 }
 3098                 if (!(prot & VM_PROT_EXECUTE))
 3099                         npte |= L2_XN;
 3100 
 3101                 if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE)
 3102                         npte |= pte_l2_s_cache_mode;
 3103         }
 3104 
 3105         CTR5(KTR_PMAP,"enter: pmap:%p va:%x prot:%x pte:%x->%x",
 3106             pmap, va, prot, opte, npte);
 3107         /*
 3108          * If this is just a wiring change, the two PTEs will be
 3109          * identical, so there's no need to update the page table.
 3110          */
 3111         if (npte != opte) {
 3112                 boolean_t is_cached = pmap_is_current(pmap);
 3113 
 3114                 *ptep = npte;
 3115                 PTE_SYNC(ptep);
 3116                 if (is_cached) {
 3117                         /*
 3118                          * We only need to frob the cache/tlb if this pmap
 3119                          * is current
 3120                          */
 3121                         if (L1_IDX(va) != L1_IDX(vector_page) &&
 3122                             l2pte_valid(npte)) {
 3123                                 /*
 3124                                  * This mapping is likely to be accessed as
 3125                                  * soon as we return to userland. Fix up the
 3126                                  * L1 entry to avoid taking another
 3127                                  * page/domain fault.
 3128                                  */
 3129                                 l1pd = l2b->l2b_phys |
 3130                                     L1_C_DOM(pmap->pm_domain) | L1_C_PROTO;
 3131                                 if (*pl1pd != l1pd) {
 3132                                         *pl1pd = l1pd;
 3133                                         PTE_SYNC(pl1pd);
 3134                                 }
 3135                         }
 3136                 }
 3137 
 3138                 if (is_exec)
 3139                         cpu_tlb_flushID_SE(va);
 3140                 else if (is_refd)
 3141                         cpu_tlb_flushD_SE(va);
 3142         }
 3143 
 3144         if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
 3145                 cpu_icache_sync_range(va, PAGE_SIZE);
 3146         /*
 3147          * If both the l2b_occupancy and the reservation are fully
 3148          * populated, then attempt promotion.
 3149          */
 3150         if ((l2b->l2b_occupancy == L2_PTE_NUM_TOTAL) &&
 3151             sp_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
 3152             vm_reserv_level_iffullpop(m) == 0)
 3153                 pmap_promote_section(pmap, va);
 3154 }
 3155 
 3156 /*
 3157  * Maps a sequence of resident pages belonging to the same object.
 3158  * The sequence begins with the given page m_start.  This page is
 3159  * mapped at the given virtual address start.  Each subsequent page is
 3160  * mapped at a virtual address that is offset from start by the same
 3161  * amount as the page is offset from m_start within the object.  The
 3162  * last page in the sequence is the page with the largest offset from
 3163  * m_start that can be mapped at a virtual address less than the given
 3164  * virtual address end.  Not every virtual page between start and end
 3165  * is mapped; only those for which a resident page exists with the
 3166  * corresponding offset from m_start are mapped.
 3167  */
 3168 void
 3169 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
 3170     vm_page_t m_start, vm_prot_t prot)
 3171 {
 3172         vm_offset_t va;
 3173         vm_page_t m;
 3174         vm_pindex_t diff, psize;
 3175         vm_prot_t access;
 3176 
 3177         VM_OBJECT_ASSERT_LOCKED(m_start->object);
 3178 
 3179         psize = atop(end - start);
 3180         m = m_start;
 3181         access = prot = prot & (VM_PROT_READ | VM_PROT_EXECUTE);
 3182         rw_wlock(&pvh_global_lock);
 3183         PMAP_LOCK(pmap);
 3184         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 3185                 va = start + ptoa(diff);
 3186                 if ((va & L1_S_OFFSET) == 0 && L2_NEXT_BUCKET(va) <= end &&
 3187                     (VM_PAGE_TO_PHYS(m) & L1_S_OFFSET) == 0 &&
 3188                     sp_enabled && vm_reserv_level_iffullpop(m) == 0 &&
 3189                     pmap_enter_section(pmap, va, m, prot))
 3190                         m = &m[L1_S_SIZE / PAGE_SIZE - 1];
 3191                 else
 3192                         pmap_enter_locked(pmap, va, access, m, prot,
 3193                             FALSE, M_NOWAIT);
 3194                 m = TAILQ_NEXT(m, listq);
 3195         }
 3196         PMAP_UNLOCK(pmap);
 3197         rw_wunlock(&pvh_global_lock);
 3198 }
 3199 
 3200 /*
 3201  * this code makes some *MAJOR* assumptions:
 3202  * 1. Current pmap & pmap exists.
 3203  * 2. Not wired.
 3204  * 3. Read access.
 3205  * 4. No page table pages.
 3206  * but is *MUCH* faster than pmap_enter...
 3207  */
 3208 
 3209 void
 3210 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3211 {
 3212         vm_prot_t access;
 3213 
 3214         access = prot = prot & (VM_PROT_READ | VM_PROT_EXECUTE);
 3215         rw_wlock(&pvh_global_lock);
 3216         PMAP_LOCK(pmap);
 3217         pmap_enter_locked(pmap, va, access, m, prot, FALSE, M_NOWAIT);
 3218         PMAP_UNLOCK(pmap);
 3219         rw_wunlock(&pvh_global_lock);
 3220 }
 3221 
 3222 /*
 3223  *      Routine:        pmap_change_wiring
 3224  *      Function:       Change the wiring attribute for a map/virtual-address
 3225  *                      pair.
 3226  *      In/out conditions:
 3227  *                      The mapping must already exist in the pmap.
 3228  */
 3229 void
 3230 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
 3231 {
 3232         struct l2_bucket *l2b;
 3233         struct md_page *pvh;
 3234         struct pv_entry *pve;
 3235         pd_entry_t *pl1pd, l1pd;
 3236         pt_entry_t *ptep, pte;
 3237         vm_page_t m;
 3238 
 3239         rw_wlock(&pvh_global_lock);
 3240         PMAP_LOCK(pmap);
 3241         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 3242         l1pd = *pl1pd;
 3243         if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 3244                 m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME);
 3245                 KASSERT((m != NULL) && ((m->oflags & VPO_UNMANAGED) == 0),
 3246                     ("pmap_change_wiring: unmanaged superpage should not "
 3247                      "be changed"));
 3248                 KASSERT(pmap != pmap_kernel(),
 3249                     ("pmap_change_wiring: managed kernel superpage "
 3250                      "should not exist"));
 3251                 pvh = pa_to_pvh(l1pd & L1_S_FRAME);
 3252                 pve = pmap_find_pv(pvh, pmap, trunc_1mpage(va));
 3253                 if (!wired != ((pve->pv_flags & PVF_WIRED) == 0)) {
 3254                         if (!pmap_demote_section(pmap, va))
 3255                                 panic("pmap_change_wiring: demotion failed");
 3256                 } else
 3257                         goto out;
 3258         }
 3259         l2b = pmap_get_l2_bucket(pmap, va);
 3260         KASSERT(l2b, ("No l2b bucket in pmap_change_wiring"));
 3261         ptep = &l2b->l2b_kva[l2pte_index(va)];
 3262         pte = *ptep;
 3263         m = PHYS_TO_VM_PAGE(l2pte_pa(pte));
 3264         if (m != NULL)
 3265                 pmap_modify_pv(m, pmap, va, PVF_WIRED,
 3266                     wired == TRUE ? PVF_WIRED : 0);
 3267 out:
 3268         rw_wunlock(&pvh_global_lock);
 3269         PMAP_UNLOCK(pmap);
 3270 }
 3271 
 3272 
 3273 /*
 3274  *      Copy the range specified by src_addr/len
 3275  *      from the source map to the range dst_addr/len
 3276  *      in the destination map.
 3277  *
 3278  *      This routine is only advisory and need not do anything.
 3279  */
 3280 void
 3281 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
 3282     vm_size_t len, vm_offset_t src_addr)
 3283 {
 3284 }
 3285 
 3286 
 3287 /*
 3288  *      Routine:        pmap_extract
 3289  *      Function:
 3290  *              Extract the physical page address associated
 3291  *              with the given map/virtual_address pair.
 3292  */
 3293 vm_paddr_t
 3294 pmap_extract(pmap_t pmap, vm_offset_t va)
 3295 {
 3296         vm_paddr_t pa;
 3297 
 3298         PMAP_LOCK(pmap);
 3299         pa = pmap_extract_locked(pmap, va);
 3300         PMAP_UNLOCK(pmap);
 3301         return (pa);
 3302 }
 3303 
 3304 static vm_paddr_t
 3305 pmap_extract_locked(pmap_t pmap, vm_offset_t va)
 3306 {
 3307         struct l2_dtable *l2;
 3308         pd_entry_t l1pd;
 3309         pt_entry_t *ptep, pte;
 3310         vm_paddr_t pa;
 3311         u_int l1idx;
 3312 
 3313         if (pmap != kernel_pmap)
 3314                 PMAP_ASSERT_LOCKED(pmap);
 3315         l1idx = L1_IDX(va);
 3316         l1pd = pmap->pm_l1->l1_kva[l1idx];
 3317         if (l1pte_section_p(l1pd)) {
 3318                 /*
 3319                  * These should only happen for the kernel pmap.
 3320                  */
 3321                 KASSERT(pmap == kernel_pmap, ("unexpected section"));
 3322                 /* XXX: what to do about the bits > 32 ? */
 3323                 if (l1pd & L1_S_SUPERSEC)
 3324                         pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
 3325                 else
 3326                         pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
 3327         } else {
 3328                 /*
 3329                  * Note that we can't rely on the validity of the L1
 3330                  * descriptor as an indication that a mapping exists.
 3331                  * We have to look it up in the L2 dtable.
 3332                  */
 3333                 l2 = pmap->pm_l2[L2_IDX(l1idx)];
 3334                 if (l2 == NULL ||
 3335                     (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL)
 3336                         return (0);
 3337                 pte = ptep[l2pte_index(va)];
 3338                 if (pte == 0)
 3339                         return (0);
 3340                 switch (pte & L2_TYPE_MASK) {
 3341                 case L2_TYPE_L:
 3342                         pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
 3343                         break;
 3344                 default:
 3345                         pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
 3346                         break;
 3347                 }
 3348         }
 3349         return (pa);
 3350 }
 3351 
 3352 /*
 3353  * Atomically extract and hold the physical page with the given
 3354  * pmap and virtual address pair if that mapping permits the given
 3355  * protection.
 3356  *
 3357  */
 3358 vm_page_t
 3359 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 3360 {
 3361         struct l2_dtable *l2;
 3362         pd_entry_t l1pd;
 3363         pt_entry_t *ptep, pte;
 3364         vm_paddr_t pa, paddr;
 3365         vm_page_t m = NULL;
 3366         u_int l1idx;
 3367         l1idx = L1_IDX(va);
 3368         paddr = 0;
 3369 
 3370         PMAP_LOCK(pmap);
 3371 retry:
 3372         l1pd = pmap->pm_l1->l1_kva[l1idx];
 3373         if (l1pte_section_p(l1pd)) {
 3374                 /* XXX: what to do about the bits > 32 ? */
 3375                 if (l1pd & L1_S_SUPERSEC)
 3376                         pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
 3377                 else
 3378                         pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
 3379                 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
 3380                         goto retry;
 3381                 if (L1_S_WRITABLE(l1pd) || (prot & VM_PROT_WRITE) == 0) {
 3382                         m = PHYS_TO_VM_PAGE(pa);
 3383                         vm_page_hold(m);
 3384                 }
 3385         } else {
 3386                 /*
 3387                  * Note that we can't rely on the validity of the L1
 3388                  * descriptor as an indication that a mapping exists.
 3389                  * We have to look it up in the L2 dtable.
 3390                  */
 3391                 l2 = pmap->pm_l2[L2_IDX(l1idx)];
 3392 
 3393                 if (l2 == NULL ||
 3394                     (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
 3395                         PMAP_UNLOCK(pmap);
 3396                         return (NULL);
 3397                 }
 3398 
 3399                 ptep = &ptep[l2pte_index(va)];
 3400                 pte = *ptep;
 3401 
 3402                 if (pte == 0) {
 3403                         PMAP_UNLOCK(pmap);
 3404                         return (NULL);
 3405                 } else if ((prot & VM_PROT_WRITE) && (pte & L2_APX)) {
 3406                         PMAP_UNLOCK(pmap);
 3407                         return (NULL);
 3408                 } else {
 3409                         switch (pte & L2_TYPE_MASK) {
 3410                         case L2_TYPE_L:
 3411                                 panic("extract and hold section mapping");
 3412                                 break;
 3413                         default:
 3414                                 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
 3415                                 break;
 3416                         }
 3417                         if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
 3418                                 goto retry;
 3419                         m = PHYS_TO_VM_PAGE(pa);
 3420                         vm_page_hold(m);
 3421                 }
 3422 
 3423         }
 3424 
 3425         PMAP_UNLOCK(pmap);
 3426         PA_UNLOCK_COND(paddr);
 3427         return (m);
 3428 }
 3429 
 3430 /*
 3431  * Initialize a preallocated and zeroed pmap structure,
 3432  * such as one in a vmspace structure.
 3433  */
 3434 
 3435 int
 3436 pmap_pinit(pmap_t pmap)
 3437 {
 3438         PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap));
 3439 
 3440         pmap_alloc_l1(pmap);
 3441         bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
 3442 
 3443         CPU_ZERO(&pmap->pm_active);
 3444 
 3445         TAILQ_INIT(&pmap->pm_pvchunk);
 3446         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 3447         pmap->pm_stats.resident_count = 1;
 3448         if (vector_page < KERNBASE) {
 3449                 pmap_enter(pmap, vector_page,
 3450                     VM_PROT_READ, PHYS_TO_VM_PAGE(systempage.pv_pa),
 3451                     VM_PROT_READ, 1);
 3452         }
 3453         return (1);
 3454 }
 3455 
 3456 
 3457 /***************************************************
 3458  * Superpage management routines.
 3459  ***************************************************/
 3460 
 3461 static PMAP_INLINE struct pv_entry *
 3462 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 3463 {
 3464         pv_entry_t pv;
 3465 
 3466         rw_assert(&pvh_global_lock, RA_WLOCKED);
 3467 
 3468         pv = pmap_find_pv(pvh, pmap, va);
 3469         if (pv != NULL)
 3470                 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
 3471 
 3472         return (pv);
 3473 }
 3474 
 3475 static void
 3476 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 3477 {
 3478         pv_entry_t pv;
 3479 
 3480         pv = pmap_pvh_remove(pvh, pmap, va);
 3481         KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 3482         pmap_free_pv_entry(pmap, pv);
 3483 }
 3484 
 3485 static boolean_t
 3486 pmap_pv_insert_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 3487 {
 3488         struct md_page *pvh;
 3489         pv_entry_t pv;
 3490 
 3491         rw_assert(&pvh_global_lock, RA_WLOCKED);
 3492         if (pv_entry_count < pv_entry_high_water && 
 3493             (pv = pmap_get_pv_entry(pmap, TRUE)) != NULL) {
 3494                 pv->pv_va = va;
 3495                 pvh = pa_to_pvh(pa);
 3496                 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 3497                 return (TRUE);
 3498         } else
 3499                 return (FALSE);
 3500 }
 3501 
 3502 /*
 3503  * Create the pv entries for each of the pages within a superpage.
 3504  */
 3505 static void
 3506 pmap_pv_demote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 3507 {
 3508         struct md_page *pvh;
 3509         pv_entry_t pve, pv;
 3510         vm_offset_t va_last;
 3511         vm_page_t m;
 3512 
 3513         rw_assert(&pvh_global_lock, RA_WLOCKED);
 3514         KASSERT((pa & L1_S_OFFSET) == 0,
 3515             ("pmap_pv_demote_section: pa is not 1mpage aligned"));
 3516 
 3517         /*
 3518          * Transfer the 1mpage's pv entry for this mapping to the first
 3519          * page's pv list.
 3520          */
 3521         pvh = pa_to_pvh(pa);
 3522         va = trunc_1mpage(va);
 3523         pv = pmap_pvh_remove(pvh, pmap, va);
 3524         KASSERT(pv != NULL, ("pmap_pv_demote_section: pv not found"));
 3525         m = PHYS_TO_VM_PAGE(pa);
 3526         TAILQ_INSERT_HEAD(&m->md.pv_list, pv, pv_list);
 3527         /* Instantiate the remaining pv entries. */
 3528         va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE;
 3529         do {
 3530                 m++;
 3531                 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3532                     ("pmap_pv_demote_section: page %p is not managed", m));
 3533                 va += PAGE_SIZE;
 3534                 pve = pmap_get_pv_entry(pmap, FALSE);
 3535                 pmap_enter_pv(m, pve, pmap, va, pv->pv_flags);
 3536         } while (va < va_last);
 3537 }
 3538 
 3539 static void
 3540 pmap_pv_promote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 3541 {
 3542         struct md_page *pvh;
 3543         pv_entry_t pv;
 3544         vm_offset_t va_last;
 3545         vm_page_t m;
 3546 
 3547         rw_assert(&pvh_global_lock, RA_WLOCKED);
 3548         KASSERT((pa & L1_S_OFFSET) == 0,
 3549             ("pmap_pv_promote_section: pa is not 1mpage aligned"));
 3550 
 3551         /*
 3552          * Transfer the first page's pv entry for this mapping to the
 3553          * 1mpage's pv list.  Aside from avoiding the cost of a call
 3554          * to get_pv_entry(), a transfer avoids the possibility that
 3555          * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim()
 3556          * removes one of the mappings that is being promoted.
 3557          */
 3558         m = PHYS_TO_VM_PAGE(pa);
 3559         va = trunc_1mpage(va);
 3560         pv = pmap_pvh_remove(&m->md, pmap, va);
 3561         KASSERT(pv != NULL, ("pmap_pv_promote_section: pv not found"));
 3562         pvh = pa_to_pvh(pa);
 3563         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list);
 3564         /* Free the remaining pv entries in the newly mapped section pages */
 3565         va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE;
 3566         do {
 3567                 m++;
 3568                 va += PAGE_SIZE;
 3569                 /*
 3570                  * Don't care the flags, first pv contains sufficient
 3571                  * information for all of the pages so nothing is really lost.
 3572                  */
 3573                 pmap_pvh_free(&m->md, pmap, va);
 3574         } while (va < va_last);
 3575 }
 3576 
 3577 /*
 3578  * Tries to create a 1MB page mapping.  Returns TRUE if successful and
 3579  * FALSE otherwise.  Fails if (1) page is unmanageg, kernel pmap or vectors
 3580  * page, (2) a mapping already exists at the specified virtual address, or
 3581  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
 3582  */
 3583 static boolean_t
 3584 pmap_enter_section(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3585 {
 3586         pd_entry_t *pl1pd;
 3587         vm_offset_t pa;
 3588         struct l2_bucket *l2b;
 3589 
 3590         rw_assert(&pvh_global_lock, RA_WLOCKED);
 3591         PMAP_ASSERT_LOCKED(pmap);
 3592 
 3593         /* Skip kernel, vectors page and unmanaged mappings */
 3594         if ((pmap == pmap_kernel()) || (L1_IDX(va) == L1_IDX(vector_page)) ||
 3595             ((m->oflags & VPO_UNMANAGED) != 0)) {
 3596                 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx"
 3597                     " in pmap %p", va, pmap);
 3598                 return (FALSE);
 3599         }
 3600         /*
 3601          * Check whether this is a valid section superpage entry or
 3602          * there is a l2_bucket associated with that L1 page directory.
 3603          */
 3604         va = trunc_1mpage(va);
 3605         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 3606         l2b = pmap_get_l2_bucket(pmap, va);
 3607         if ((*pl1pd & L1_S_PROTO) || (l2b != NULL)) {
 3608                 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx"
 3609                     " in pmap %p", va, pmap);
 3610                 return (FALSE);
 3611         }
 3612         pa = VM_PAGE_TO_PHYS(m); 
 3613         /*
 3614          * Abort this mapping if its PV entry could not be created.
 3615          */
 3616         if (!pmap_pv_insert_section(pmap, va, VM_PAGE_TO_PHYS(m))) {
 3617                 CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx"
 3618                     " in pmap %p", va, pmap);
 3619                 return (FALSE);
 3620         }
 3621         /*
 3622          * Increment counters.
 3623          */
 3624         pmap->pm_stats.resident_count += L2_PTE_NUM_TOTAL;
 3625         /*
 3626          * Despite permissions, mark the superpage read-only.
 3627          */
 3628         prot &= ~VM_PROT_WRITE;
 3629         /*
 3630          * Map the superpage.
 3631          */
 3632         pmap_map_section(pmap, va, pa, prot, FALSE);
 3633 
 3634         pmap_section_mappings++;
 3635         CTR2(KTR_PMAP, "pmap_enter_section: success for va %#lx"
 3636             " in pmap %p", va, pmap);
 3637         return (TRUE);
 3638 }
 3639 
 3640 /*
 3641  * pmap_remove_section: do the things to unmap a superpage in a process
 3642  */
 3643 static void
 3644 pmap_remove_section(pmap_t pmap, vm_offset_t sva)
 3645 {
 3646         struct md_page *pvh;
 3647         struct l2_bucket *l2b;
 3648         pd_entry_t *pl1pd, l1pd;
 3649         vm_offset_t eva, va;
 3650         vm_page_t m;
 3651 
 3652         PMAP_ASSERT_LOCKED(pmap);
 3653         if ((pmap == pmap_kernel()) || (L1_IDX(sva) == L1_IDX(vector_page)))
 3654                 return;
 3655 
 3656         KASSERT((sva & L1_S_OFFSET) == 0,
 3657             ("pmap_remove_section: sva is not 1mpage aligned"));
 3658 
 3659         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)];
 3660         l1pd = *pl1pd;
 3661 
 3662         m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME);
 3663         KASSERT((m != NULL && ((m->oflags & VPO_UNMANAGED) == 0)),
 3664             ("pmap_remove_section: no corresponding vm_page or "
 3665             "page unmanaged"));
 3666 
 3667         pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL;
 3668         pvh = pa_to_pvh(l1pd & L1_S_FRAME);
 3669         pmap_pvh_free(pvh, pmap, sva);
 3670         eva = L2_NEXT_BUCKET(sva);
 3671         for (va = sva, m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME);
 3672             va < eva; va += PAGE_SIZE, m++) {
 3673                 /*
 3674                  * Mark base pages referenced but skip marking them dirty.
 3675                  * If the superpage is writeable, hence all base pages were
 3676                  * already marked as dirty in pmap_fault_fixup() before
 3677                  * promotion. Reference bit however, might not have been set
 3678                  * for each base page when the superpage was created at once,
 3679                  * not as a result of promotion.
 3680                  */
 3681                 if (L1_S_REFERENCED(l1pd))
 3682                         vm_page_aflag_set(m, PGA_REFERENCED);
 3683                 if (TAILQ_EMPTY(&m->md.pv_list) &&
 3684                     TAILQ_EMPTY(&pvh->pv_list))
 3685                         vm_page_aflag_clear(m, PGA_WRITEABLE);
 3686         }
 3687         
 3688         l2b = pmap_get_l2_bucket(pmap, sva);
 3689         if (l2b != NULL) {
 3690                 KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL,
 3691                     ("pmap_remove_section: l2_bucket occupancy error"));
 3692                 pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL);
 3693                 /*
 3694                  * Now invalidate L1 slot as it was not invalidated in
 3695                  * pmap_free_l2_bucket() due to L1_TYPE mismatch.
 3696                  */
 3697                 *pl1pd = 0;
 3698                 PTE_SYNC(pl1pd);
 3699         }
 3700 }
 3701 
 3702 /*
 3703  * Tries to promote the 256, contiguous 4KB page mappings that are
 3704  * within a single l2_bucket to a single 1MB section mapping.
 3705  * For promotion to occur, two conditions must be met: (1) the 4KB page
 3706  * mappings must map aligned, contiguous physical memory and (2) the 4KB page
 3707  * mappings must have identical characteristics.
 3708  */
 3709 static void
 3710 pmap_promote_section(pmap_t pmap, vm_offset_t va)
 3711 {
 3712         pt_entry_t *firstptep, firstpte, oldpte, pa, *pte;
 3713         vm_page_t m, oldm;
 3714         vm_offset_t first_va, old_va;
 3715         struct l2_bucket *l2b = NULL;
 3716         vm_prot_t prot;
 3717         struct pv_entry *pve, *first_pve;
 3718 
 3719         PMAP_ASSERT_LOCKED(pmap);
 3720 
 3721         prot = VM_PROT_ALL;
 3722         /*
 3723          * Skip promoting kernel pages. This is justified by following:
 3724          * 1. Kernel is already mapped using section mappings in each pmap
 3725          * 2. Managed mappings within the kernel are not to be promoted anyway
 3726          */
 3727         if (pmap == pmap_kernel()) {
 3728                 pmap_section_p_failures++;
 3729                 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x"
 3730                     " in pmap %p", va, pmap);
 3731                 return;
 3732         }
 3733         /* Do not attemp to promote vectors pages */
 3734         if (L1_IDX(va) == L1_IDX(vector_page)) {
 3735                 pmap_section_p_failures++;
 3736                 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x"
 3737                     " in pmap %p", va, pmap);
 3738                 return;
 3739         }
 3740         /*
 3741          * Examine the first PTE in the specified l2_bucket. Abort if this PTE
 3742          * is either invalid, unused, or does not map the first 4KB physical
 3743          * page within 1MB page.
 3744          */
 3745         first_va = trunc_1mpage(va);
 3746         l2b = pmap_get_l2_bucket(pmap, first_va);
 3747         KASSERT(l2b != NULL, ("pmap_promote_section: trying to promote "
 3748             "not existing l2 bucket"));
 3749         firstptep = &l2b->l2b_kva[0];
 3750 
 3751         firstpte = *firstptep;
 3752         if ((l2pte_pa(firstpte) & L1_S_OFFSET) != 0) {
 3753                 pmap_section_p_failures++;
 3754                 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x"
 3755                     " in pmap %p", va, pmap);
 3756                 return;
 3757         }
 3758 
 3759         if ((firstpte & (L2_S_PROTO | L2_S_REF)) != (L2_S_PROTO | L2_S_REF)) {
 3760                 pmap_section_p_failures++;
 3761                 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x"
 3762                     " in pmap %p", va, pmap);
 3763                 return;
 3764         }
 3765         /*
 3766          * ARM uses pv_entry to mark particular mapping WIRED so don't promote
 3767          * unmanaged pages since it is impossible to determine, whether the
 3768          * page is wired or not if there is no corresponding pv_entry.
 3769          */
 3770         m = PHYS_TO_VM_PAGE(l2pte_pa(firstpte));
 3771         if (m && ((m->oflags & VPO_UNMANAGED) != 0)) {
 3772                 pmap_section_p_failures++;
 3773                 CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x"
 3774                     " in pmap %p", va, pmap);
 3775                 return;
 3776         }
 3777         first_pve = pmap_find_pv(&m->md, pmap, first_va);
 3778         /*
 3779          * PTE is modified only on write due to modified bit
 3780          * emulation. If the entry is referenced and writable
 3781          * then it is modified and we don't clear write enable.
 3782          * Otherwise, writing is disabled in PTE anyway and
 3783          * we just configure protections for the section mapping
 3784          * that is going to be created.
 3785          */
 3786         if (!L2_S_WRITABLE(firstpte) && (first_pve->pv_flags & PVF_WRITE)) {
 3787                 first_pve->pv_flags &= ~PVF_WRITE;
 3788                 prot &= ~VM_PROT_WRITE;
 3789         }
 3790 
 3791         if (!L2_S_EXECUTABLE(firstpte))
 3792                 prot &= ~VM_PROT_EXECUTE;
 3793 
 3794         /* 
 3795          * Examine each of the other PTEs in the specified l2_bucket. 
 3796          * Abort if this PTE maps an unexpected 4KB physical page or
 3797          * does not have identical characteristics to the first PTE.
 3798          */
 3799         pa = l2pte_pa(firstpte) + ((L2_PTE_NUM_TOTAL - 1) * PAGE_SIZE);
 3800         old_va = L2_NEXT_BUCKET(first_va) - PAGE_SIZE;
 3801 
 3802         for (pte = (firstptep + L2_PTE_NUM_TOTAL - 1); pte > firstptep; pte--) {
 3803                 oldpte = *pte;
 3804                 if (l2pte_pa(oldpte) != pa) {
 3805                         pmap_section_p_failures++;
 3806                         CTR2(KTR_PMAP, "pmap_promote_section: failure for "
 3807                             "va %#x in pmap %p", va, pmap);
 3808                         return;
 3809                 }
 3810                 if ((oldpte & L2_S_PROMOTE) != (firstpte & L2_S_PROMOTE)) {
 3811                         pmap_section_p_failures++;
 3812                         CTR2(KTR_PMAP, "pmap_promote_section: failure for "
 3813                             "va %#x in pmap %p", va, pmap);
 3814                         return;
 3815                 }
 3816                 oldm = PHYS_TO_VM_PAGE(l2pte_pa(oldpte));
 3817                 if (oldm && ((oldm->oflags & VPO_UNMANAGED) != 0)) {
 3818                         pmap_section_p_failures++;
 3819                         CTR2(KTR_PMAP, "pmap_promote_section: failure for "
 3820                             "va %#x in pmap %p", va, pmap);
 3821                         return;
 3822                 }
 3823 
 3824                 pve = pmap_find_pv(&oldm->md, pmap, old_va);
 3825                 if (pve == NULL) {
 3826                         pmap_section_p_failures++;
 3827                         CTR2(KTR_PMAP, "pmap_promote_section: failure for "
 3828                             "va %#x old_va  %x - no pve", va, old_va);
 3829                         return;
 3830                 }
 3831 
 3832                 if (!L2_S_WRITABLE(oldpte) && (pve->pv_flags & PVF_WRITE))
 3833                         pve->pv_flags &= ~PVF_WRITE;
 3834 
 3835                 old_va -= PAGE_SIZE;
 3836                 pa -= PAGE_SIZE;
 3837         }
 3838         /*
 3839          * Promote the pv entries.
 3840          */
 3841         pmap_pv_promote_section(pmap, first_va, l2pte_pa(firstpte));
 3842         /*
 3843          * Map the superpage.
 3844          */
 3845         pmap_map_section(pmap, first_va, l2pte_pa(firstpte), prot, TRUE);
 3846         pmap_section_promotions++;
 3847         CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x"
 3848             " in pmap %p", first_va, pmap);
 3849 }
 3850 
 3851 /*
 3852  * Fills a l2_bucket with mappings to consecutive physical pages.
 3853  */
 3854 static void
 3855 pmap_fill_l2b(struct l2_bucket *l2b, pt_entry_t newpte)
 3856 {
 3857         pt_entry_t *ptep;
 3858         int i;
 3859 
 3860         for (i = 0; i < L2_PTE_NUM_TOTAL; i++) {
 3861                 ptep = &l2b->l2b_kva[i];
 3862                 *ptep = newpte;
 3863                 PTE_SYNC(ptep);
 3864 
 3865                 newpte += PAGE_SIZE;
 3866         }
 3867 
 3868         l2b->l2b_occupancy = L2_PTE_NUM_TOTAL;
 3869 }
 3870 
 3871 /*
 3872  * Tries to demote a 1MB section mapping. If demotion fails, the
 3873  * 1MB section mapping is invalidated.
 3874  */
 3875 static boolean_t
 3876 pmap_demote_section(pmap_t pmap, vm_offset_t va)
 3877 {
 3878         struct l2_bucket *l2b;
 3879         struct pv_entry *l1pdpve;
 3880         struct md_page *pvh;
 3881         pd_entry_t *pl1pd, l1pd;
 3882         pt_entry_t *firstptep, newpte;
 3883         vm_offset_t pa;
 3884         vm_page_t m;
 3885 
 3886         PMAP_ASSERT_LOCKED(pmap);
 3887         /*
 3888          * According to assumptions described in pmap_promote_section,
 3889          * kernel is and always should be mapped using 1MB section mappings.
 3890          * What more, managed kernel pages were not to be promoted. 
 3891          */
 3892         KASSERT(pmap != pmap_kernel() && L1_IDX(va) != L1_IDX(vector_page),
 3893             ("pmap_demote_section: forbidden section mapping"));
 3894 
 3895         va = trunc_1mpage(va);
 3896         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 3897         l1pd = *pl1pd;
 3898         KASSERT((l1pd & L1_TYPE_MASK) == L1_S_PROTO,
 3899             ("pmap_demote_section: not section or invalid section"));
 3900         
 3901         pa = l1pd & L1_S_FRAME;
 3902         m = PHYS_TO_VM_PAGE(pa);
 3903         KASSERT((m != NULL && (m->oflags & VPO_UNMANAGED) == 0),
 3904             ("pmap_demote_section: no vm_page for selected superpage or"
 3905              "unmanaged"));
 3906 
 3907         pvh = pa_to_pvh(pa);
 3908         l1pdpve = pmap_find_pv(pvh, pmap, va);
 3909         KASSERT(l1pdpve != NULL, ("pmap_demote_section: no pv entry for "
 3910             "managed page"));
 3911 
 3912         l2b = pmap_get_l2_bucket(pmap, va);
 3913         if (l2b == NULL) {
 3914                 KASSERT((l1pdpve->pv_flags & PVF_WIRED) == 0,
 3915                     ("pmap_demote_section: No l2_bucket for wired mapping"));
 3916                 /*
 3917                  * Invalidate the 1MB section mapping and return
 3918                  * "failure" if the mapping was never accessed or the
 3919                  * allocation of the new l2_bucket fails.
 3920                  */
 3921                 if (!L1_S_REFERENCED(l1pd) ||
 3922                     (l2b = pmap_alloc_l2_bucket(pmap, va)) == NULL) {
 3923                         /* Unmap and invalidate superpage. */
 3924                         pmap_remove_section(pmap, trunc_1mpage(va));
 3925                         CTR2(KTR_PMAP, "pmap_demote_section: failure for "
 3926                             "va %#x in pmap %p", va, pmap);
 3927                         return (FALSE);
 3928                 }
 3929         }
 3930 
 3931         /*
 3932          * Now we should have corresponding l2_bucket available.
 3933          * Let's process it to recreate 256 PTEs for each base page
 3934          * within superpage.
 3935          */
 3936         newpte = pa | L1_S_DEMOTE(l1pd);
 3937         if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE)
 3938                 newpte |= pte_l2_s_cache_mode;
 3939 
 3940         /*
 3941          * If the l2_bucket is new, initialize it.
 3942          */
 3943         if (l2b->l2b_occupancy == 0)
 3944                 pmap_fill_l2b(l2b, newpte);
 3945         else {
 3946                 firstptep = &l2b->l2b_kva[0];
 3947                 KASSERT(l2pte_pa(*firstptep) == (pa),
 3948                     ("pmap_demote_section: firstpte and newpte map different "
 3949                      "physical addresses"));
 3950                 /*
 3951                  * If the mapping has changed attributes, update the page table
 3952                  * entries.
 3953                  */ 
 3954                 if ((*firstptep & L2_S_PROMOTE) != (L1_S_DEMOTE(l1pd)))
 3955                         pmap_fill_l2b(l2b, newpte);
 3956         }
 3957         /* Demote PV entry */
 3958         pmap_pv_demote_section(pmap, va, pa);
 3959 
 3960         /* Now fix-up L1 */
 3961         l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO;
 3962         *pl1pd = l1pd;
 3963         PTE_SYNC(pl1pd);
 3964 
 3965         pmap_section_demotions++;
 3966         CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x"
 3967             " in pmap %p", va, pmap);
 3968         return (TRUE);
 3969 }
 3970 
 3971 /***************************************************
 3972  * page management routines.
 3973  ***************************************************/
 3974 
 3975 /*
 3976  * We are in a serious low memory condition.  Resort to
 3977  * drastic measures to free some pages so we can allocate
 3978  * another pv entry chunk.
 3979  */
 3980 static vm_page_t
 3981 pmap_pv_reclaim(pmap_t locked_pmap)
 3982 {
 3983         struct pch newtail;
 3984         struct pv_chunk *pc;
 3985         struct l2_bucket *l2b = NULL;
 3986         pmap_t pmap;
 3987         pd_entry_t *pl1pd;
 3988         pt_entry_t *ptep;
 3989         pv_entry_t pv;
 3990         vm_offset_t va;
 3991         vm_page_t free, m, m_pc;
 3992         uint32_t inuse;
 3993         int bit, field, freed, idx;
 3994 
 3995         PMAP_ASSERT_LOCKED(locked_pmap);
 3996         pmap = NULL;
 3997         free = m_pc = NULL;
 3998         TAILQ_INIT(&newtail);
 3999         while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
 4000             free == NULL)) {
 4001                 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 4002                 if (pmap != pc->pc_pmap) {
 4003                         if (pmap != NULL) {
 4004                                 cpu_tlb_flushID();
 4005                                 cpu_cpwait();
 4006                                 if (pmap != locked_pmap)
 4007                                         PMAP_UNLOCK(pmap);
 4008                         }
 4009                         pmap = pc->pc_pmap;
 4010                         /* Avoid deadlock and lock recursion. */
 4011                         if (pmap > locked_pmap)
 4012                                 PMAP_LOCK(pmap);
 4013                         else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
 4014                                 pmap = NULL;
 4015                                 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 4016                                 continue;
 4017                         }
 4018                 }
 4019 
 4020                 /*
 4021                  * Destroy every non-wired, 4 KB page mapping in the chunk.
 4022                  */
 4023                 freed = 0;
 4024                 for (field = 0; field < _NPCM; field++) {
 4025                         for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 4026                             inuse != 0; inuse &= ~(1UL << bit)) {
 4027                                 bit = ffs(inuse) - 1;
 4028                                 idx = field * sizeof(inuse) * NBBY + bit;
 4029                                 pv = &pc->pc_pventry[idx];
 4030                                 va = pv->pv_va;
 4031 
 4032                                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 4033                                 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO)
 4034                                         continue;
 4035                                 if (pv->pv_flags & PVF_WIRED)
 4036                                         continue;
 4037 
 4038                                 l2b = pmap_get_l2_bucket(pmap, va);
 4039                                 KASSERT(l2b != NULL, ("No l2 bucket"));
 4040                                 ptep = &l2b->l2b_kva[l2pte_index(va)];
 4041                                 m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep));
 4042                                 KASSERT((vm_offset_t)m >= KERNBASE,
 4043                                     ("Trying to access non-existent page "
 4044                                      "va %x pte %x", va, *ptep));
 4045                                 *ptep = 0;
 4046                                 PTE_SYNC(ptep);
 4047                                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 4048                                 if (TAILQ_EMPTY(&m->md.pv_list))
 4049                                         vm_page_aflag_clear(m, PGA_WRITEABLE);
 4050                                 pc->pc_map[field] |= 1UL << bit;
 4051                                 freed++;
 4052                         }
 4053                 }
 4054 
 4055                 if (freed == 0) {
 4056                         TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 4057                         continue;
 4058                 }
 4059                 /* Every freed mapping is for a 4 KB page. */
 4060                 pmap->pm_stats.resident_count -= freed;
 4061                 PV_STAT(pv_entry_frees += freed);
 4062                 PV_STAT(pv_entry_spare += freed);
 4063                 pv_entry_count -= freed;
 4064                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 4065                 for (field = 0; field < _NPCM; field++)
 4066                         if (pc->pc_map[field] != pc_freemask[field]) {
 4067                                 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 4068                                     pc_list);
 4069                                 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 4070 
 4071                                 /*
 4072                                  * One freed pv entry in locked_pmap is
 4073                                  * sufficient.
 4074                                  */
 4075                                 if (pmap == locked_pmap)
 4076                                         goto out;
 4077                                 break;
 4078                         }
 4079                 if (field == _NPCM) {
 4080                         PV_STAT(pv_entry_spare -= _NPCPV);
 4081                         PV_STAT(pc_chunk_count--);
 4082                         PV_STAT(pc_chunk_frees++);
 4083                         /* Entire chunk is free; return it. */
 4084                         m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 4085                         pmap_qremove((vm_offset_t)pc, 1);
 4086                         pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 4087                         break;
 4088                 }
 4089         }
 4090 out:
 4091         TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
 4092         if (pmap != NULL) {
 4093                 cpu_tlb_flushID();
 4094                 cpu_cpwait();
 4095                 if (pmap != locked_pmap)
 4096                         PMAP_UNLOCK(pmap);
 4097         }
 4098         return (m_pc);
 4099 }
 4100 
 4101 /*
 4102  * free the pv_entry back to the free list
 4103  */
 4104 static void
 4105 pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv)
 4106 {
 4107         struct pv_chunk *pc;
 4108         int bit, field, idx;
 4109 
 4110         rw_assert(&pvh_global_lock, RA_WLOCKED);
 4111         PMAP_ASSERT_LOCKED(pmap);
 4112         PV_STAT(pv_entry_frees++);
 4113         PV_STAT(pv_entry_spare++);
 4114         pv_entry_count--;
 4115         pc = pv_to_chunk(pv);
 4116         idx = pv - &pc->pc_pventry[0];
 4117         field = idx / (sizeof(u_long) * NBBY);
 4118         bit = idx % (sizeof(u_long) * NBBY);
 4119         pc->pc_map[field] |= 1ul << bit;
 4120         for (idx = 0; idx < _NPCM; idx++)
 4121                 if (pc->pc_map[idx] != pc_freemask[idx]) {
 4122                         /*
 4123                          * 98% of the time, pc is already at the head of the
 4124                          * list.  If it isn't already, move it to the head.
 4125                          */
 4126                         if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
 4127                             pc)) {
 4128                                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 4129                                 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 4130                                     pc_list);
 4131                         }
 4132                         return;
 4133                 }
 4134         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 4135         pmap_free_pv_chunk(pc);
 4136 }
 4137 
 4138 static void
 4139 pmap_free_pv_chunk(struct pv_chunk *pc)
 4140 {
 4141         vm_page_t m;
 4142 
 4143         TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 4144         PV_STAT(pv_entry_spare -= _NPCPV);
 4145         PV_STAT(pc_chunk_count--);
 4146         PV_STAT(pc_chunk_frees++);
 4147         /* entire chunk is free, return it */
 4148         m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 4149         pmap_qremove((vm_offset_t)pc, 1);
 4150         vm_page_unwire(m, 0);
 4151         vm_page_free(m);
 4152         pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 4153 
 4154 }
 4155 
 4156 static pv_entry_t
 4157 pmap_get_pv_entry(pmap_t pmap, boolean_t try)
 4158 {
 4159         static const struct timeval printinterval = { 60, 0 };
 4160         static struct timeval lastprint;
 4161         struct pv_chunk *pc;
 4162         pv_entry_t pv;
 4163         vm_page_t m;
 4164         int bit, field, idx;
 4165 
 4166         rw_assert(&pvh_global_lock, RA_WLOCKED);
 4167         PMAP_ASSERT_LOCKED(pmap);
 4168         PV_STAT(pv_entry_allocs++);
 4169         pv_entry_count++;
 4170 
 4171         if (pv_entry_count > pv_entry_high_water)
 4172                 if (ratecheck(&lastprint, &printinterval))
 4173                         printf("%s: Approaching the limit on PV entries.\n",
 4174                             __func__);
 4175 retry:
 4176         pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 4177         if (pc != NULL) {
 4178                 for (field = 0; field < _NPCM; field++) {
 4179                         if (pc->pc_map[field]) {
 4180                                 bit = ffs(pc->pc_map[field]) - 1;
 4181                                 break;
 4182                         }
 4183                 }
 4184                 if (field < _NPCM) {
 4185                         idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
 4186                         pv = &pc->pc_pventry[idx];
 4187                         pc->pc_map[field] &= ~(1ul << bit);
 4188                         /* If this was the last item, move it to tail */
 4189                         for (field = 0; field < _NPCM; field++)
 4190                                 if (pc->pc_map[field] != 0) {
 4191                                         PV_STAT(pv_entry_spare--);
 4192                                         return (pv);    /* not full, return */
 4193                                 }
 4194                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 4195                         TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 4196                         PV_STAT(pv_entry_spare--);
 4197                         return (pv);
 4198                 }
 4199         }
 4200         /*
 4201          * Access to the ptelist "pv_vafree" is synchronized by the pvh
 4202          * global lock.  If "pv_vafree" is currently non-empty, it will
 4203          * remain non-empty until pmap_ptelist_alloc() completes.
 4204          */
 4205         if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 4206             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 4207                 if (try) {
 4208                         pv_entry_count--;
 4209                         PV_STAT(pc_chunk_tryfail++);
 4210                         return (NULL);
 4211                 }
 4212                 m = pmap_pv_reclaim(pmap);
 4213                 if (m == NULL)
 4214                         goto retry;
 4215         }
 4216         PV_STAT(pc_chunk_count++);
 4217         PV_STAT(pc_chunk_allocs++);
 4218         pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
 4219         pmap_qenter((vm_offset_t)pc, &m, 1);
 4220         pc->pc_pmap = pmap;
 4221         pc->pc_map[0] = pc_freemask[0] & ~1ul;  /* preallocated bit 0 */
 4222         for (field = 1; field < _NPCM; field++)
 4223                 pc->pc_map[field] = pc_freemask[field];
 4224         TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 4225         pv = &pc->pc_pventry[0];
 4226         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 4227         PV_STAT(pv_entry_spare += _NPCPV - 1);
 4228         return (pv);
 4229 }
 4230 
 4231 /*
 4232  *      Remove the given range of addresses from the specified map.
 4233  *
 4234  *      It is assumed that the start and end are properly
 4235  *      rounded to the page size.
 4236  */
 4237 #define PMAP_REMOVE_CLEAN_LIST_SIZE     3
 4238 void
 4239 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 4240 {
 4241         struct l2_bucket *l2b;
 4242         vm_offset_t next_bucket;
 4243         pd_entry_t *pl1pd, l1pd;
 4244         pt_entry_t *ptep;
 4245         u_int total;
 4246         u_int mappings, is_exec, is_refd;
 4247         int flushall = 0;
 4248 
 4249 
 4250         /*
 4251          * we lock in the pmap => pv_head direction
 4252          */
 4253 
 4254         rw_wlock(&pvh_global_lock);
 4255         PMAP_LOCK(pmap);
 4256         total = 0;
 4257         while (sva < eva) {
 4258                 /*
 4259                  * Check for large page.
 4260                  */
 4261                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)];
 4262                 l1pd = *pl1pd;
 4263                 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 4264                         KASSERT((l1pd & L1_S_DOM_MASK) !=
 4265                             L1_S_DOM(PMAP_DOMAIN_KERNEL), ("pmap_remove: "
 4266                             "Trying to remove kernel section mapping"));
 4267                         /*
 4268                          * Are we removing the entire large page?  If not,
 4269                          * demote the mapping and fall through.
 4270                          */
 4271                         if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) &&
 4272                             eva >= L2_NEXT_BUCKET(sva)) {
 4273                                 pmap_remove_section(pmap, sva);
 4274                                 sva = L2_NEXT_BUCKET(sva);
 4275                                 continue;
 4276                         } else if (!pmap_demote_section(pmap, sva)) {
 4277                                 /* The large page mapping was destroyed. */
 4278                                 sva = L2_NEXT_BUCKET(sva);
 4279                                 continue;
 4280                         }
 4281                 }
 4282                 /*
 4283                  * Do one L2 bucket's worth at a time.
 4284                  */
 4285                 next_bucket = L2_NEXT_BUCKET(sva);
 4286                 if (next_bucket > eva)
 4287                         next_bucket = eva;
 4288 
 4289                 l2b = pmap_get_l2_bucket(pmap, sva);
 4290                 if (l2b == NULL) {
 4291                         sva = next_bucket;
 4292                         continue;
 4293                 }
 4294 
 4295                 ptep = &l2b->l2b_kva[l2pte_index(sva)];
 4296                 mappings = 0;
 4297 
 4298                 while (sva < next_bucket) {
 4299                         struct vm_page *m;
 4300                         pt_entry_t pte;
 4301                         vm_paddr_t pa;
 4302 
 4303                         pte = *ptep;
 4304 
 4305                         if (pte == 0) {
 4306                                 /*
 4307                                  * Nothing here, move along
 4308                                  */
 4309                                 sva += PAGE_SIZE;
 4310                                 ptep++;
 4311                                 continue;
 4312                         }
 4313 
 4314                         pmap->pm_stats.resident_count--;
 4315                         pa = l2pte_pa(pte);
 4316                         is_exec = 0;
 4317                         is_refd = 1;
 4318 
 4319                         /*
 4320                          * Update flags. In a number of circumstances,
 4321                          * we could cluster a lot of these and do a
 4322                          * number of sequential pages in one go.
 4323                          */
 4324                         if ((m = PHYS_TO_VM_PAGE(pa)) != NULL) {
 4325                                 struct pv_entry *pve;
 4326 
 4327                                 pve = pmap_remove_pv(m, pmap, sva);
 4328                                 if (pve) {
 4329                                         is_exec = PTE_BEEN_EXECD(pte);
 4330                                         is_refd = PTE_BEEN_REFD(pte);
 4331                                         pmap_free_pv_entry(pmap, pve);
 4332                                 }
 4333                         }
 4334 
 4335                         if (pmap_is_current(pmap)) {
 4336                                 total++;
 4337                                 if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) {
 4338                                         if (is_exec)
 4339                                                 cpu_tlb_flushID_SE(sva);
 4340                                         else if (is_refd)
 4341                                                 cpu_tlb_flushD_SE(sva);
 4342                                 } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE)
 4343                                         flushall = 1;
 4344                         }
 4345                         *ptep = 0;
 4346                         PTE_SYNC(ptep);
 4347 
 4348                         sva += PAGE_SIZE;
 4349                         ptep++;
 4350                         mappings++;
 4351                 }
 4352 
 4353                 pmap_free_l2_bucket(pmap, l2b, mappings);
 4354         }
 4355 
 4356         rw_wunlock(&pvh_global_lock);
 4357         if (flushall)
 4358                 cpu_tlb_flushID();
 4359         PMAP_UNLOCK(pmap);
 4360 }
 4361 
 4362 /*
 4363  * pmap_zero_page()
 4364  *
 4365  * Zero a given physical page by mapping it at a page hook point.
 4366  * In doing the zero page op, the page we zero is mapped cachable, as with
 4367  * StrongARM accesses to non-cached pages are non-burst making writing
 4368  * _any_ bulk data very slow.
 4369  */
 4370 static void
 4371 pmap_zero_page_gen(vm_page_t m, int off, int size)
 4372 {
 4373 
 4374         vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
 4375         if (!TAILQ_EMPTY(&m->md.pv_list))
 4376                 panic("pmap_zero_page: page has mappings");
 4377 
 4378         mtx_lock(&cmtx);
 4379         /*
 4380          * Hook in the page, zero it, invalidate the TLB as needed.
 4381          *
 4382          * Note the temporary zero-page mapping must be a non-cached page in
 4383          * order to work without corruption when write-allocate is enabled.
 4384          */
 4385         *cdst_pte = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF;
 4386         pmap_set_prot(cdst_pte, VM_PROT_WRITE, 0);
 4387         PTE_SYNC(cdst_pte);
 4388         cpu_tlb_flushD_SE(cdstp);
 4389         cpu_cpwait();
 4390         if (off || size != PAGE_SIZE)
 4391                 bzero((void *)(cdstp + off), size);
 4392         else
 4393                 bzero_page(cdstp);
 4394 
 4395         /*
 4396          * Although aliasing is not possible if we use 
 4397          * cdstp temporary mappings with memory that 
 4398          * will be mapped later as non-cached or with write-through 
 4399          * caches we might end up overwriting it when calling wbinv_all
 4400          * So make sure caches are clean after copy operation
 4401          */
 4402         cpu_idcache_wbinv_range(cdstp, size);
 4403         pmap_l2cache_wbinv_range(cdstp, phys, size);
 4404 
 4405         mtx_unlock(&cmtx);
 4406 }
 4407 
 4408 /*
 4409  *      pmap_zero_page zeros the specified hardware page by mapping
 4410  *      the page into KVM and using bzero to clear its contents.
 4411  */
 4412 void
 4413 pmap_zero_page(vm_page_t m)
 4414 {
 4415         pmap_zero_page_gen(m, 0, PAGE_SIZE);
 4416 }
 4417 
 4418 
 4419 /*
 4420  *      pmap_zero_page_area zeros the specified hardware page by mapping
 4421  *      the page into KVM and using bzero to clear its contents.
 4422  *
 4423  *      off and size may not cover an area beyond a single hardware page.
 4424  */
 4425 void
 4426 pmap_zero_page_area(vm_page_t m, int off, int size)
 4427 {
 4428 
 4429         pmap_zero_page_gen(m, off, size);
 4430 }
 4431 
 4432 
 4433 /*
 4434  *      pmap_zero_page_idle zeros the specified hardware page by mapping
 4435  *      the page into KVM and using bzero to clear its contents.  This
 4436  *      is intended to be called from the vm_pagezero process only and
 4437  *      outside of Giant.
 4438  */
 4439 void
 4440 pmap_zero_page_idle(vm_page_t m)
 4441 {
 4442 
 4443         pmap_zero_page(m);
 4444 }
 4445 
 4446 /*
 4447  *      pmap_copy_page copies the specified (machine independent)
 4448  *      page by mapping the page into virtual memory and using
 4449  *      bcopy to copy the page, one machine dependent page at a
 4450  *      time.
 4451  */
 4452 
 4453 /*
 4454  * pmap_copy_page()
 4455  *
 4456  * Copy one physical page into another, by mapping the pages into
 4457  * hook points. The same comment regarding cachability as in
 4458  * pmap_zero_page also applies here.
 4459  */
 4460 void
 4461 pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
 4462 {
 4463         /*
 4464          * Hold the source page's lock for the duration of the copy
 4465          * so that no other mappings can be created while we have a
 4466          * potentially aliased mapping.
 4467          * Map the pages into the page hook points, copy them, and purge
 4468          * the cache for the appropriate page. Invalidate the TLB
 4469          * as required.
 4470          */
 4471         mtx_lock(&cmtx);
 4472 
 4473         /* For ARMv6 using System bit is deprecated and mapping with AP
 4474          * bits set to 0x0 makes page not accessible. csrc_pte is mapped
 4475          * read/write until proper mapping defines are created for ARMv6.
 4476          */
 4477         *csrc_pte = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF;
 4478         pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
 4479         PTE_SYNC(csrc_pte);
 4480 
 4481         *cdst_pte = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF;
 4482         pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
 4483         PTE_SYNC(cdst_pte);
 4484 
 4485         cpu_tlb_flushD_SE(csrcp);
 4486         cpu_tlb_flushD_SE(cdstp);
 4487         cpu_cpwait();
 4488 
 4489         /*
 4490          * Although aliasing is not possible if we use 
 4491          * cdstp temporary mappings with memory that 
 4492          * will be mapped later as non-cached or with write-through 
 4493          * caches we might end up overwriting it when calling wbinv_all
 4494          * So make sure caches are clean after copy operation
 4495          */
 4496         bcopy_page(csrcp, cdstp);
 4497 
 4498         cpu_idcache_wbinv_range(cdstp, PAGE_SIZE);
 4499         pmap_l2cache_wbinv_range(cdstp, dst, PAGE_SIZE);
 4500 
 4501         mtx_unlock(&cmtx);
 4502 }
 4503 
 4504 int unmapped_buf_allowed = 1;
 4505 
 4506 void
 4507 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
 4508     vm_offset_t b_offset, int xfersize)
 4509 {
 4510         vm_page_t a_pg, b_pg;
 4511         vm_offset_t a_pg_offset, b_pg_offset;
 4512         int cnt;
 4513 
 4514         mtx_lock(&cmtx);
 4515         while (xfersize > 0) {
 4516                 a_pg = ma[a_offset >> PAGE_SHIFT];
 4517                 a_pg_offset = a_offset & PAGE_MASK;
 4518                 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 4519                 b_pg = mb[b_offset >> PAGE_SHIFT];
 4520                 b_pg_offset = b_offset & PAGE_MASK;
 4521                 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 4522                 *csrc_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) |
 4523                     pte_l2_s_cache_mode | L2_S_REF;
 4524                 pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
 4525                 PTE_SYNC(csrc_pte);
 4526                 *cdst_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) |
 4527                     pte_l2_s_cache_mode | L2_S_REF;
 4528                 pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
 4529                 PTE_SYNC(cdst_pte);
 4530                 cpu_tlb_flushD_SE(csrcp);
 4531                 cpu_tlb_flushD_SE(cdstp);
 4532                 cpu_cpwait();
 4533                 bcopy((char *)csrcp + a_pg_offset, (char *)cdstp + b_pg_offset,
 4534                     cnt);
 4535                 cpu_idcache_wbinv_range(cdstp + b_pg_offset, cnt);
 4536                 pmap_l2cache_wbinv_range(cdstp + b_pg_offset,
 4537                     VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt);
 4538                 xfersize -= cnt;
 4539                 a_offset += cnt;
 4540                 b_offset += cnt;
 4541         }
 4542         mtx_unlock(&cmtx);
 4543 }
 4544 
 4545 void
 4546 pmap_copy_page(vm_page_t src, vm_page_t dst)
 4547 {
 4548 
 4549         if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size &&
 4550             _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst),
 4551             (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0)
 4552                 return;
 4553 
 4554         pmap_copy_page_generic(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
 4555 }
 4556 
 4557 /*
 4558  * this routine returns true if a physical page resides
 4559  * in the given pmap.
 4560  */
 4561 boolean_t
 4562 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 4563 {
 4564         struct md_page *pvh;
 4565         pv_entry_t pv;
 4566         int loops = 0;
 4567         boolean_t rv;
 4568 
 4569         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4570             ("pmap_page_exists_quick: page %p is not managed", m));
 4571         rv = FALSE;
 4572         rw_wlock(&pvh_global_lock);
 4573         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4574                 if (PV_PMAP(pv) == pmap) {
 4575                         rv = TRUE;
 4576                         break;
 4577                 }
 4578                 loops++;
 4579                 if (loops >= 16)
 4580                         break;
 4581         }
 4582         if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 4583                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4584                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4585                         if (PV_PMAP(pv) == pmap) {
 4586                                 rv = TRUE;
 4587                                 break;
 4588                         }
 4589                         loops++;
 4590                         if (loops >= 16)
 4591                                 break;
 4592                 }
 4593         }
 4594         rw_wunlock(&pvh_global_lock);
 4595         return (rv);
 4596 }
 4597 
 4598 /*
 4599  *      pmap_page_wired_mappings:
 4600  *
 4601  *      Return the number of managed mappings to the given physical page
 4602  *      that are wired.
 4603  */
 4604 int
 4605 pmap_page_wired_mappings(vm_page_t m)
 4606 {
 4607         int count;
 4608 
 4609         count = 0;
 4610         if ((m->oflags & VPO_UNMANAGED) != 0)
 4611                 return (count);
 4612         rw_wlock(&pvh_global_lock);
 4613         count = pmap_pvh_wired_mappings(&m->md, count);
 4614         if ((m->flags & PG_FICTITIOUS) == 0) {
 4615             count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
 4616                 count);
 4617         }
 4618         rw_wunlock(&pvh_global_lock);
 4619         return (count);
 4620 }
 4621 
 4622 /*
 4623  *      pmap_pvh_wired_mappings:
 4624  *
 4625  *      Return the updated number "count" of managed mappings that are wired.
 4626  */
 4627 static int
 4628 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 4629 {
 4630         pv_entry_t pv;
 4631 
 4632         rw_assert(&pvh_global_lock, RA_WLOCKED);
 4633         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4634                 if ((pv->pv_flags & PVF_WIRED) != 0)
 4635                         count++;
 4636         }
 4637         return (count);
 4638 }
 4639 
 4640 /*
 4641  * Returns TRUE if any of the given mappings were referenced and FALSE
 4642  * otherwise.  Both page and section mappings are supported.
 4643  */
 4644 static boolean_t
 4645 pmap_is_referenced_pvh(struct md_page *pvh)
 4646 {
 4647         struct l2_bucket *l2b;
 4648         pv_entry_t pv;
 4649         pd_entry_t *pl1pd;
 4650         pt_entry_t *ptep;
 4651         pmap_t pmap;
 4652         boolean_t rv;
 4653 
 4654         rw_assert(&pvh_global_lock, RA_WLOCKED);
 4655         rv = FALSE;
 4656         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4657                 pmap = PV_PMAP(pv);
 4658                 PMAP_LOCK(pmap);
 4659                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)];
 4660                 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO)
 4661                         rv = L1_S_REFERENCED(*pl1pd);
 4662                 else {
 4663                         l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
 4664                         ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
 4665                         rv = L2_S_REFERENCED(*ptep);
 4666                 }
 4667                 PMAP_UNLOCK(pmap);
 4668                 if (rv)
 4669                         break;
 4670         }
 4671         return (rv);
 4672 }
 4673 
 4674 /*
 4675  *      pmap_is_referenced:
 4676  *
 4677  *      Return whether or not the specified physical page was referenced
 4678  *      in any physical maps.
 4679  */
 4680 boolean_t
 4681 pmap_is_referenced(vm_page_t m)
 4682 {
 4683         boolean_t rv;
 4684 
 4685         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4686             ("pmap_is_referenced: page %p is not managed", m));
 4687         rw_wlock(&pvh_global_lock);
 4688         rv = pmap_is_referenced_pvh(&m->md) ||
 4689             ((m->flags & PG_FICTITIOUS) == 0 &&
 4690             pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 4691         rw_wunlock(&pvh_global_lock);
 4692         return (rv);
 4693 }
 4694 
 4695 /*
 4696  *      pmap_ts_referenced:
 4697  *
 4698  *      Return the count of reference bits for a page, clearing all of them.
 4699  */
 4700 int
 4701 pmap_ts_referenced(vm_page_t m)
 4702 {
 4703 
 4704         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4705             ("pmap_ts_referenced: page %p is not managed", m));
 4706         return (pmap_clearbit(m, PVF_REF));
 4707 }
 4708 
 4709 /*
 4710  * Returns TRUE if any of the given mappings were used to modify
 4711  * physical memory. Otherwise, returns FALSE. Both page and 1MB section
 4712  * mappings are supported.
 4713  */
 4714 static boolean_t
 4715 pmap_is_modified_pvh(struct md_page *pvh)
 4716 {
 4717         pd_entry_t *pl1pd;
 4718         struct l2_bucket *l2b;
 4719         pv_entry_t pv;
 4720         pt_entry_t *ptep;
 4721         pmap_t pmap;
 4722         boolean_t rv;
 4723 
 4724         rw_assert(&pvh_global_lock, RA_WLOCKED);
 4725         rv = FALSE;
 4726 
 4727         TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
 4728                 pmap = PV_PMAP(pv);
 4729                 PMAP_LOCK(pmap);
 4730                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)];
 4731                 if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO)
 4732                         rv = L1_S_WRITABLE(*pl1pd);
 4733                 else {
 4734                         l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
 4735                         ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
 4736                         rv = L2_S_WRITABLE(*ptep);
 4737                 }
 4738                 PMAP_UNLOCK(pmap);
 4739                 if (rv)
 4740                         break;
 4741         }
 4742 
 4743         return (rv);
 4744 }
 4745 
 4746 boolean_t
 4747 pmap_is_modified(vm_page_t m)
 4748 {
 4749         boolean_t rv;
 4750 
 4751         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4752             ("pmap_is_modified: page %p is not managed", m));
 4753         /*
 4754          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 4755          * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 4756          * is clear, no PTEs can have APX cleared.
 4757          */
 4758         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4759         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 4760                 return (FALSE);
 4761         rw_wlock(&pvh_global_lock);
 4762         rv = pmap_is_modified_pvh(&m->md) ||
 4763             ((m->flags & PG_FICTITIOUS) == 0 &&
 4764             pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 4765         rw_wunlock(&pvh_global_lock);
 4766         return (rv);
 4767 }
 4768 
 4769 /*
 4770  *      Apply the given advice to the specified range of addresses within the
 4771  *      given pmap.  Depending on the advice, clear the referenced and/or
 4772  *      modified flags in each mapping.
 4773  */
 4774 void
 4775 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 4776 {
 4777         struct l2_bucket *l2b;
 4778         struct pv_entry *pve;
 4779         pd_entry_t *pl1pd, l1pd;
 4780         pt_entry_t *ptep, opte, pte;
 4781         vm_offset_t next_bucket;
 4782         vm_page_t m;
 4783 
 4784         if (advice != MADV_DONTNEED && advice != MADV_FREE)
 4785                 return;
 4786         rw_wlock(&pvh_global_lock);
 4787         PMAP_LOCK(pmap);
 4788         for (; sva < eva; sva = next_bucket) {
 4789                 next_bucket = L2_NEXT_BUCKET(sva);
 4790                 if (next_bucket < sva)
 4791                         next_bucket = eva;
 4792                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)];
 4793                 l1pd = *pl1pd;
 4794                 if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 4795                         if (pmap == pmap_kernel())
 4796                                 continue;
 4797                         if (!pmap_demote_section(pmap, sva)) {
 4798                                 /*
 4799                                  * The large page mapping was destroyed.
 4800                                  */
 4801                                 continue;
 4802                         }
 4803                         /*
 4804                          * Unless the page mappings are wired, remove the
 4805                          * mapping to a single page so that a subsequent
 4806                          * access may repromote. Since the underlying
 4807                          * l2_bucket is fully populated, this removal
 4808                          * never frees an entire l2_bucket.
 4809                          */
 4810                         l2b = pmap_get_l2_bucket(pmap, sva);
 4811                         KASSERT(l2b != NULL,
 4812                             ("pmap_advise: no l2 bucket for "
 4813                              "va 0x%#x, pmap 0x%p", sva, pmap));
 4814                         ptep = &l2b->l2b_kva[l2pte_index(sva)];
 4815                         opte = *ptep;
 4816                         m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep));
 4817                         KASSERT(m != NULL,
 4818                             ("pmap_advise: no vm_page for demoted superpage"));
 4819                         pve = pmap_find_pv(&m->md, pmap, sva);
 4820                         KASSERT(pve != NULL,
 4821                             ("pmap_advise: no PV entry for managed mapping"));
 4822                         if ((pve->pv_flags & PVF_WIRED) == 0) {
 4823                                 pmap_free_l2_bucket(pmap, l2b, 1);
 4824                                 pve = pmap_remove_pv(m, pmap, sva);
 4825                                 pmap_free_pv_entry(pmap, pve);
 4826                                 *ptep = 0;
 4827                                 PTE_SYNC(ptep);
 4828                                 if (pmap_is_current(pmap)) {
 4829                                         if (PTE_BEEN_EXECD(opte))
 4830                                                 cpu_tlb_flushID_SE(sva);
 4831                                         else if (PTE_BEEN_REFD(opte))
 4832                                                 cpu_tlb_flushD_SE(sva);
 4833                                 }
 4834                         }
 4835                 }
 4836                 if (next_bucket > eva)
 4837                         next_bucket = eva;
 4838                 l2b = pmap_get_l2_bucket(pmap, sva);
 4839                 if (l2b == NULL)
 4840                         continue;
 4841                 for (ptep = &l2b->l2b_kva[l2pte_index(sva)];
 4842                     sva != next_bucket; ptep++, sva += PAGE_SIZE) {
 4843                         opte = pte = *ptep;
 4844                         if ((opte & L2_S_PROTO) == 0)
 4845                                 continue;
 4846                         m = PHYS_TO_VM_PAGE(l2pte_pa(opte));
 4847                         if (m == NULL || (m->oflags & VPO_UNMANAGED) != 0)
 4848                                 continue;
 4849                         else if (L2_S_WRITABLE(opte)) {
 4850                                 if (advice == MADV_DONTNEED) {
 4851                                         /*
 4852                                          * Don't need to mark the page
 4853                                          * dirty as it was already marked as
 4854                                          * such in pmap_fault_fixup() or
 4855                                          * pmap_enter_locked().
 4856                                          * Just clear the state.
 4857                                          */
 4858                                 } else
 4859                                         pte |= L2_APX;
 4860 
 4861                                 pte &= ~L2_S_REF;
 4862                                 *ptep = pte;
 4863                                 PTE_SYNC(ptep);
 4864                         } else if (L2_S_REFERENCED(opte)) {
 4865                                 pte &= ~L2_S_REF;
 4866                                 *ptep = pte;
 4867                                 PTE_SYNC(ptep);
 4868                         } else
 4869                                 continue;
 4870                         if (pmap_is_current(pmap)) {
 4871                                 if (PTE_BEEN_EXECD(opte))
 4872                                         cpu_tlb_flushID_SE(sva);
 4873                                 else if (PTE_BEEN_REFD(opte))
 4874                                         cpu_tlb_flushD_SE(sva);
 4875                         }
 4876                 }
 4877         }
 4878         rw_wunlock(&pvh_global_lock);
 4879         PMAP_UNLOCK(pmap);
 4880 }
 4881 
 4882 /*
 4883  *      Clear the modify bits on the specified physical page.
 4884  */
 4885 void
 4886 pmap_clear_modify(vm_page_t m)
 4887 {
 4888 
 4889         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4890             ("pmap_clear_modify: page %p is not managed", m));
 4891         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4892         KASSERT(!vm_page_xbusied(m),
 4893             ("pmap_clear_modify: page %p is exclusive busied", m));
 4894 
 4895         /*
 4896          * If the page is not PGA_WRITEABLE, then no mappings can be modified.
 4897          * If the object containing the page is locked and the page is not
 4898          * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 4899          */
 4900         if ((m->aflags & PGA_WRITEABLE) == 0)
 4901                 return;
 4902         if (pmap_is_modified(m))
 4903                 pmap_clearbit(m, PVF_MOD);
 4904 }
 4905 
 4906 
 4907 /*
 4908  * Clear the write and modified bits in each of the given page's mappings.
 4909  */
 4910 void
 4911 pmap_remove_write(vm_page_t m)
 4912 {
 4913         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4914             ("pmap_remove_write: page %p is not managed", m));
 4915 
 4916         /*
 4917          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 4918          * set by another thread while the object is locked.  Thus,
 4919          * if PGA_WRITEABLE is clear, no page table entries need updating.
 4920          */
 4921         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4922         if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0)
 4923                 pmap_clearbit(m, PVF_WRITE);
 4924 }
 4925 
 4926 
 4927 /*
 4928  * perform the pmap work for mincore
 4929  */
 4930 int
 4931 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 4932 {
 4933         struct l2_bucket *l2b;
 4934         pd_entry_t *pl1pd, l1pd;
 4935         pt_entry_t *ptep, pte;
 4936         vm_paddr_t pa;
 4937         vm_page_t m;
 4938         int val;
 4939         boolean_t managed;
 4940 
 4941         PMAP_LOCK(pmap);
 4942 retry:
 4943         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(addr)];
 4944         l1pd = *pl1pd;
 4945         if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) {
 4946                 pa = (l1pd & L1_S_FRAME);
 4947                 val = MINCORE_SUPER | MINCORE_INCORE;
 4948                 if (L1_S_WRITABLE(l1pd))
 4949                         val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4950                 managed = FALSE;
 4951                 m = PHYS_TO_VM_PAGE(pa);
 4952                 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0)
 4953                         managed = TRUE;
 4954                 if (managed) {
 4955                         if (L1_S_REFERENCED(l1pd))
 4956                                 val |= MINCORE_REFERENCED |
 4957                                     MINCORE_REFERENCED_OTHER;
 4958                 }
 4959         } else {
 4960                 l2b = pmap_get_l2_bucket(pmap, addr);
 4961                 if (l2b == NULL) {
 4962                         val = 0;
 4963                         goto out;
 4964                 }
 4965                 ptep = &l2b->l2b_kva[l2pte_index(addr)];
 4966                 pte = *ptep;
 4967                 if (!l2pte_valid(pte)) {
 4968                         val = 0;
 4969                         goto out;
 4970                 }
 4971                 val = MINCORE_INCORE;
 4972                 if (L2_S_WRITABLE(pte))
 4973                         val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4974                 managed = FALSE;
 4975                 pa = l2pte_pa(pte);
 4976                 m = PHYS_TO_VM_PAGE(pa);
 4977                 if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0)
 4978                         managed = TRUE;
 4979                 if (managed) {
 4980                         if (L2_S_REFERENCED(pte))
 4981                                 val |= MINCORE_REFERENCED |
 4982                                     MINCORE_REFERENCED_OTHER;
 4983                 }
 4984         }
 4985         if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 4986             (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 4987                 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 4988                 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 4989                         goto retry;
 4990         } else
 4991 out:
 4992                 PA_UNLOCK_COND(*locked_pa);
 4993         PMAP_UNLOCK(pmap);
 4994         return (val);
 4995 }
 4996 
 4997 void
 4998 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
 4999 {
 5000 }
 5001 
 5002 /*
 5003  *      Increase the starting virtual address of the given mapping if a
 5004  *      different alignment might result in more superpage mappings.
 5005  */
 5006 void
 5007 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
 5008     vm_offset_t *addr, vm_size_t size)
 5009 {
 5010 }
 5011 
 5012 
 5013 /*
 5014  * Map a set of physical memory pages into the kernel virtual
 5015  * address space. Return a pointer to where it is mapped. This
 5016  * routine is intended to be used for mapping device memory,
 5017  * NOT real memory.
 5018  */
 5019 void *
 5020 pmap_mapdev(vm_offset_t pa, vm_size_t size)
 5021 {
 5022         vm_offset_t va, tmpva, offset;
 5023 
 5024         offset = pa & PAGE_MASK;
 5025         size = roundup(size, PAGE_SIZE);
 5026 
 5027         GIANT_REQUIRED;
 5028 
 5029         va = kva_alloc(size);
 5030         if (!va)
 5031                 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 5032         for (tmpva = va; size > 0;) {
 5033                 pmap_kenter_internal(tmpva, pa, 0);
 5034                 size -= PAGE_SIZE;
 5035                 tmpva += PAGE_SIZE;
 5036                 pa += PAGE_SIZE;
 5037         }
 5038 
 5039         return ((void *)(va + offset));
 5040 }
 5041 
 5042 /*
 5043  * pmap_map_section:
 5044  *
 5045  *      Create a single section mapping.
 5046  */
 5047 void
 5048 pmap_map_section(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot,
 5049     boolean_t ref)
 5050 {
 5051         pd_entry_t *pl1pd, l1pd;
 5052         pd_entry_t fl;
 5053 
 5054         KASSERT(((va | pa) & L1_S_OFFSET) == 0,
 5055             ("Not a valid section mapping"));
 5056 
 5057         fl = pte_l1_s_cache_mode;
 5058 
 5059         pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 5060         l1pd = L1_S_PROTO | pa | L1_S_PROT(PTE_USER, prot) | fl |
 5061             L1_S_DOM(pmap->pm_domain);
 5062 
 5063         /* Mark page referenced if this section is a result of a promotion. */
 5064         if (ref == TRUE)
 5065                 l1pd |= L1_S_REF;
 5066 #ifdef SMP
 5067         l1pd |= L1_SHARED;
 5068 #endif
 5069         *pl1pd = l1pd;
 5070         PTE_SYNC(pl1pd);
 5071 }
 5072 
 5073 /*
 5074  * pmap_link_l2pt:
 5075  *
 5076  *      Link the L2 page table specified by l2pv.pv_pa into the L1
 5077  *      page table at the slot for "va".
 5078  */
 5079 void
 5080 pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv)
 5081 {
 5082         pd_entry_t *pde = (pd_entry_t *) l1pt, proto;
 5083         u_int slot = va >> L1_S_SHIFT;
 5084 
 5085         proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO;
 5086 
 5087 #ifdef VERBOSE_INIT_ARM
 5088         printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va);
 5089 #endif
 5090 
 5091         pde[slot + 0] = proto | (l2pv->pv_pa + 0x000);
 5092         PTE_SYNC(&pde[slot]);
 5093 
 5094         SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
 5095 
 5096 }
 5097 
 5098 /*
 5099  * pmap_map_entry
 5100  *
 5101  *      Create a single page mapping.
 5102  */
 5103 void
 5104 pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot,
 5105     int cache)
 5106 {
 5107         pd_entry_t *pde = (pd_entry_t *) l1pt;
 5108         pt_entry_t fl;
 5109         pt_entry_t *ptep;
 5110 
 5111         KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin"));
 5112 
 5113         fl = l2s_mem_types[cache];
 5114 
 5115         if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
 5116                 panic("pmap_map_entry: no L2 table for VA 0x%08x", va);
 5117 
 5118         ptep = (pt_entry_t *)kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
 5119 
 5120         if (ptep == NULL)
 5121                 panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va);
 5122 
 5123         ptep[l2pte_index(va)] = L2_S_PROTO | pa | fl | L2_S_REF;
 5124         pmap_set_prot(&ptep[l2pte_index(va)], prot, 0);
 5125         PTE_SYNC(&ptep[l2pte_index(va)]);
 5126 }
 5127 
 5128 /*
 5129  * pmap_map_chunk:
 5130  *
 5131  *      Map a chunk of memory using the most efficient mappings
 5132  *      possible (section. large page, small page) into the
 5133  *      provided L1 and L2 tables at the specified virtual address.
 5134  */
 5135 vm_size_t
 5136 pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
 5137     vm_size_t size, int prot, int type)
 5138 {
 5139         pd_entry_t *pde = (pd_entry_t *) l1pt;
 5140         pt_entry_t *ptep, f1, f2s, f2l;
 5141         vm_size_t resid;
 5142         int i;
 5143 
 5144         resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
 5145 
 5146         if (l1pt == 0)
 5147                 panic("pmap_map_chunk: no L1 table provided");
 5148 
 5149 #ifdef VERBOSE_INIT_ARM
 5150         printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x "
 5151             "prot=0x%x type=%d\n", pa, va, size, resid, prot, type);
 5152 #endif
 5153 
 5154         f1 = l1_mem_types[type];
 5155         f2l = l2l_mem_types[type];
 5156         f2s = l2s_mem_types[type];
 5157 
 5158         size = resid;
 5159 
 5160         while (resid > 0) {
 5161                 /* See if we can use a section mapping. */
 5162                 if (L1_S_MAPPABLE_P(va, pa, resid)) {
 5163 #ifdef VERBOSE_INIT_ARM
 5164                         printf("S");
 5165 #endif
 5166                         pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
 5167                             L1_S_PROT(PTE_KERNEL, prot | VM_PROT_EXECUTE) |
 5168                             f1 | L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_S_REF;
 5169                         PTE_SYNC(&pde[va >> L1_S_SHIFT]);
 5170                         va += L1_S_SIZE;
 5171                         pa += L1_S_SIZE;
 5172                         resid -= L1_S_SIZE;
 5173                         continue;
 5174                 }
 5175 
 5176                 /*
 5177                  * Ok, we're going to use an L2 table.  Make sure
 5178                  * one is actually in the corresponding L1 slot
 5179                  * for the current VA.
 5180                  */
 5181                 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
 5182                         panic("pmap_map_chunk: no L2 table for VA 0x%08x", va);
 5183 
 5184                 ptep = (pt_entry_t *) kernel_pt_lookup(
 5185                     pde[L1_IDX(va)] & L1_C_ADDR_MASK);
 5186                 if (ptep == NULL)
 5187                         panic("pmap_map_chunk: can't find L2 table for VA"
 5188                             "0x%08x", va);
 5189                 /* See if we can use a L2 large page mapping. */
 5190                 if (L2_L_MAPPABLE_P(va, pa, resid)) {
 5191 #ifdef VERBOSE_INIT_ARM
 5192                         printf("L");
 5193 #endif
 5194                         for (i = 0; i < 16; i++) {
 5195                                 ptep[l2pte_index(va) + i] =
 5196                                     L2_L_PROTO | pa |
 5197                                     L2_L_PROT(PTE_KERNEL, prot) | f2l;
 5198                                 PTE_SYNC(&ptep[l2pte_index(va) + i]);
 5199                         }
 5200                         va += L2_L_SIZE;
 5201                         pa += L2_L_SIZE;
 5202                         resid -= L2_L_SIZE;
 5203                         continue;
 5204                 }
 5205 
 5206                 /* Use a small page mapping. */
 5207 #ifdef VERBOSE_INIT_ARM
 5208                 printf("P");
 5209 #endif
 5210                 ptep[l2pte_index(va)] = L2_S_PROTO | pa | f2s | L2_S_REF;
 5211                 pmap_set_prot(&ptep[l2pte_index(va)], prot, 0);
 5212                 PTE_SYNC(&ptep[l2pte_index(va)]);
 5213                 va += PAGE_SIZE;
 5214                 pa += PAGE_SIZE;
 5215                 resid -= PAGE_SIZE;
 5216         }
 5217 #ifdef VERBOSE_INIT_ARM
 5218         printf("\n");
 5219 #endif
 5220         return (size);
 5221 
 5222 }
 5223 
 5224 /********************** Static device map routines ***************************/
 5225 
 5226 static const struct pmap_devmap *pmap_devmap_table;
 5227 
 5228 /*
 5229  * Register the devmap table.  This is provided in case early console
 5230  * initialization needs to register mappings created by bootstrap code
 5231  * before pmap_devmap_bootstrap() is called.
 5232  */
 5233 void
 5234 pmap_devmap_register(const struct pmap_devmap *table)
 5235 {
 5236 
 5237         pmap_devmap_table = table;
 5238 }
 5239 
 5240 /*
 5241  * Map all of the static regions in the devmap table, and remember
 5242  * the devmap table so other parts of the kernel can look up entries
 5243  * later.
 5244  */
 5245 void
 5246 pmap_devmap_bootstrap(vm_offset_t l1pt, const struct pmap_devmap *table)
 5247 {
 5248         int i;
 5249 
 5250         pmap_devmap_table = table;
 5251 
 5252         for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
 5253 #ifdef VERBOSE_INIT_ARM
 5254                 printf("devmap: %08x -> %08x @ %08x\n",
 5255                     pmap_devmap_table[i].pd_pa,
 5256                     pmap_devmap_table[i].pd_pa +
 5257                         pmap_devmap_table[i].pd_size - 1,
 5258                     pmap_devmap_table[i].pd_va);
 5259 #endif
 5260                 pmap_map_chunk(l1pt, pmap_devmap_table[i].pd_va,
 5261                     pmap_devmap_table[i].pd_pa,
 5262                     pmap_devmap_table[i].pd_size,
 5263                     pmap_devmap_table[i].pd_prot,
 5264                     pmap_devmap_table[i].pd_cache);
 5265         }
 5266 }
 5267 
 5268 const struct pmap_devmap *
 5269 pmap_devmap_find_pa(vm_paddr_t pa, vm_size_t size)
 5270 {
 5271         int i;
 5272 
 5273         if (pmap_devmap_table == NULL)
 5274                 return (NULL);
 5275 
 5276         for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
 5277                 if (pa >= pmap_devmap_table[i].pd_pa &&
 5278                     pa + size <= pmap_devmap_table[i].pd_pa +
 5279                                  pmap_devmap_table[i].pd_size)
 5280                         return (&pmap_devmap_table[i]);
 5281         }
 5282 
 5283         return (NULL);
 5284 }
 5285 
 5286 const struct pmap_devmap *
 5287 pmap_devmap_find_va(vm_offset_t va, vm_size_t size)
 5288 {
 5289         int i;
 5290 
 5291         if (pmap_devmap_table == NULL)
 5292                 return (NULL);
 5293 
 5294         for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) {
 5295                 if (va >= pmap_devmap_table[i].pd_va &&
 5296                     va + size <= pmap_devmap_table[i].pd_va +
 5297                                  pmap_devmap_table[i].pd_size)
 5298                         return (&pmap_devmap_table[i]);
 5299         }
 5300 
 5301         return (NULL);
 5302 }
 5303 
 5304 int
 5305 pmap_dmap_iscurrent(pmap_t pmap)
 5306 {
 5307         return(pmap_is_current(pmap));
 5308 }
 5309 
 5310 void
 5311 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 5312 {
 5313         /* 
 5314          * Remember the memattr in a field that gets used to set the appropriate
 5315          * bits in the PTEs as mappings are established.
 5316          */
 5317         m->md.pv_memattr = ma;
 5318 
 5319         /*
 5320          * It appears that this function can only be called before any mappings
 5321          * for the page are established on ARM.  If this ever changes, this code
 5322          * will need to walk the pv_list and make each of the existing mappings
 5323          * uncacheable, being careful to sync caches and PTEs (and maybe
 5324          * invalidate TLB?) for any current mapping it modifies.
 5325          */
 5326         if (TAILQ_FIRST(&m->md.pv_list) != NULL)
 5327                 panic("Can't change memattr on page with existing mappings");
 5328 }
Cache object: 9274ca4605a4e7d046a8da17f0b7fbc1
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/pmap-v6.c

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/pmap-v6.c