pmap.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */
    2 /*-
    3  * Copyright 2004 Olivier Houchard.
    4  * Copyright 2003 Wasabi Systems, Inc.
    5  * All rights reserved.
    6  *
    7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed for the NetBSD Project by
   20  *      Wasabi Systems, Inc.
   21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
   22  *    or promote products derived from this software without specific prior
   23  *    written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
   29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   35  * POSSIBILITY OF SUCH DAMAGE.
   36  */
   37 
   38 /*-
   39  * Copyright (c) 2002-2003 Wasabi Systems, Inc.
   40  * Copyright (c) 2001 Richard Earnshaw
   41  * Copyright (c) 2001-2002 Christopher Gilbert
   42  * All rights reserved.
   43  *
   44  * 1. Redistributions of source code must retain the above copyright
   45  *    notice, this list of conditions and the following disclaimer.
   46  * 2. Redistributions in binary form must reproduce the above copyright
   47  *    notice, this list of conditions and the following disclaimer in the
   48  *    documentation and/or other materials provided with the distribution.
   49  * 3. The name of the company nor the name of the author may be used to
   50  *    endorse or promote products derived from this software without specific
   51  *    prior written permission.
   52  *
   53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
   54  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
   55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   56  * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
   57  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
   58  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
   59  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   63  * SUCH DAMAGE.
   64  */
   65 /*-
   66  * Copyright (c) 1999 The NetBSD Foundation, Inc.
   67  * All rights reserved.
   68  *
   69  * This code is derived from software contributed to The NetBSD Foundation
   70  * by Charles M. Hannum.
   71  *
   72  * Redistribution and use in source and binary forms, with or without
   73  * modification, are permitted provided that the following conditions
   74  * are met:
   75  * 1. Redistributions of source code must retain the above copyright
   76  *    notice, this list of conditions and the following disclaimer.
   77  * 2. Redistributions in binary form must reproduce the above copyright
   78  *    notice, this list of conditions and the following disclaimer in the
   79  *    documentation and/or other materials provided with the distribution.
   80  *
   81  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   82  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   83  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   84  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   85  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   86  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   87  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   88  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   89  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   90  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   91  * POSSIBILITY OF SUCH DAMAGE.
   92  */
   93 
   94 /*-
   95  * Copyright (c) 1994-1998 Mark Brinicombe.
   96  * Copyright (c) 1994 Brini.
   97  * All rights reserved.
   98  *
   99  * This code is derived from software written for Brini by Mark Brinicombe
  100  *
  101  * Redistribution and use in source and binary forms, with or without
  102  * modification, are permitted provided that the following conditions
  103  * are met:
  104  * 1. Redistributions of source code must retain the above copyright
  105  *    notice, this list of conditions and the following disclaimer.
  106  * 2. Redistributions in binary form must reproduce the above copyright
  107  *    notice, this list of conditions and the following disclaimer in the
  108  *    documentation and/or other materials provided with the distribution.
  109  * 3. All advertising materials mentioning features or use of this software
  110  *    must display the following acknowledgement:
  111  *      This product includes software developed by Mark Brinicombe.
  112  * 4. The name of the author may not be used to endorse or promote products
  113  *    derived from this software without specific prior written permission.
  114  *
  115  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  116  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  117  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  118  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  119  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  120  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  121  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  122  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  123  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  124  *
  125  * RiscBSD kernel project
  126  *
  127  * pmap.c
  128  *
  129  * Machine dependant vm stuff
  130  *
  131  * Created      : 20/09/94
  132  */
  133 
  134 /*
  135  * Special compilation symbols
  136  * PMAP_DEBUG           - Build in pmap_debug_level code
  137  *
  138  * Note that pmap_mapdev() and pmap_unmapdev() are implemented in arm/devmap.c
  139  */
  140 /* Include header files */
  141 
  142 #include "opt_vm.h"
  143 
  144 #include <sys/cdefs.h>
  145 __FBSDID("$FreeBSD: releng/10.2/sys/arm/arm/pmap.c 283931 2015-06-02 21:36:45Z imp $");
  146 #include <sys/param.h>
  147 #include <sys/systm.h>
  148 #include <sys/kernel.h>
  149 #include <sys/ktr.h>
  150 #include <sys/lock.h>
  151 #include <sys/proc.h>
  152 #include <sys/malloc.h>
  153 #include <sys/msgbuf.h>
  154 #include <sys/mutex.h>
  155 #include <sys/vmmeter.h>
  156 #include <sys/mman.h>
  157 #include <sys/rwlock.h>
  158 #include <sys/smp.h>
  159 #include <sys/sched.h>
  160 
  161 #include <vm/vm.h>
  162 #include <vm/vm_param.h>
  163 #include <vm/uma.h>
  164 #include <vm/pmap.h>
  165 #include <vm/vm_kern.h>
  166 #include <vm/vm_object.h>
  167 #include <vm/vm_map.h>
  168 #include <vm/vm_page.h>
  169 #include <vm/vm_pageout.h>
  170 #include <vm/vm_phys.h>
  171 #include <vm/vm_extern.h>
  172 
  173 #include <machine/md_var.h>
  174 #include <machine/cpu.h>
  175 #include <machine/cpufunc.h>
  176 #include <machine/pcb.h>
  177 
  178 #ifdef PMAP_DEBUG
  179 #define PDEBUG(_lev_,_stat_) \
  180         if (pmap_debug_level >= (_lev_)) \
  181                 ((_stat_))
  182 #define dprintf printf
  183 
  184 int pmap_debug_level = 0;
  185 #define PMAP_INLINE
  186 #else   /* PMAP_DEBUG */
  187 #define PDEBUG(_lev_,_stat_) /* Nothing */
  188 #define dprintf(x, arg...)
  189 #define PMAP_INLINE __inline
  190 #endif  /* PMAP_DEBUG */
  191 
  192 extern struct pv_addr systempage;
  193 
  194 extern int last_fault_code;
  195 
  196 /*
  197  * Internal function prototypes
  198  */
  199 static void pmap_free_pv_entry (pv_entry_t);
  200 static pv_entry_t pmap_get_pv_entry(void);
  201 
  202 static int              pmap_enter_locked(pmap_t, vm_offset_t, vm_page_t,
  203     vm_prot_t, u_int);
  204 static vm_paddr_t       pmap_extract_locked(pmap_t pmap, vm_offset_t va);
  205 static void             pmap_fix_cache(struct vm_page *, pmap_t, vm_offset_t);
  206 static void             pmap_alloc_l1(pmap_t);
  207 static void             pmap_free_l1(pmap_t);
  208 
  209 static int              pmap_clearbit(struct vm_page *, u_int);
  210 
  211 static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t);
  212 static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t);
  213 static void             pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int);
  214 static vm_offset_t      kernel_pt_lookup(vm_paddr_t);
  215 
  216 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
  217 
  218 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  219 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  220 vm_offset_t pmap_curmaxkvaddr;
  221 vm_paddr_t kernel_l1pa;
  222 
  223 vm_offset_t kernel_vm_end = 0;
  224 
  225 vm_offset_t vm_max_kernel_address;
  226 
  227 struct pmap kernel_pmap_store;
  228 
  229 static pt_entry_t *csrc_pte, *cdst_pte;
  230 static vm_offset_t csrcp, cdstp;
  231 static struct mtx cmtx;
  232 
  233 static void             pmap_init_l1(struct l1_ttable *, pd_entry_t *);
  234 /*
  235  * These routines are called when the CPU type is identified to set up
  236  * the PTE prototypes, cache modes, etc.
  237  *
  238  * The variables are always here, just in case LKMs need to reference
  239  * them (though, they shouldn't).
  240  */
  241 
  242 pt_entry_t      pte_l1_s_cache_mode;
  243 pt_entry_t      pte_l1_s_cache_mode_pt;
  244 pt_entry_t      pte_l1_s_cache_mask;
  245 
  246 pt_entry_t      pte_l2_l_cache_mode;
  247 pt_entry_t      pte_l2_l_cache_mode_pt;
  248 pt_entry_t      pte_l2_l_cache_mask;
  249 
  250 pt_entry_t      pte_l2_s_cache_mode;
  251 pt_entry_t      pte_l2_s_cache_mode_pt;
  252 pt_entry_t      pte_l2_s_cache_mask;
  253 
  254 pt_entry_t      pte_l2_s_prot_u;
  255 pt_entry_t      pte_l2_s_prot_w;
  256 pt_entry_t      pte_l2_s_prot_mask;
  257 
  258 pt_entry_t      pte_l1_s_proto;
  259 pt_entry_t      pte_l1_c_proto;
  260 pt_entry_t      pte_l2_s_proto;
  261 
  262 void            (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t);
  263 void            (*pmap_copy_page_offs_func)(vm_paddr_t a_phys,
  264                     vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs,
  265                     int cnt);
  266 void            (*pmap_zero_page_func)(vm_paddr_t, int, int);
  267 
  268 struct msgbuf *msgbufp = 0;
  269 
  270 /*
  271  * Crashdump maps.
  272  */
  273 static caddr_t crashdumpmap;
  274 
  275 extern void bcopy_page(vm_offset_t, vm_offset_t);
  276 extern void bzero_page(vm_offset_t);
  277 
  278 extern vm_offset_t alloc_firstaddr;
  279 
  280 char *_tmppt;
  281 
  282 /*
  283  * Metadata for L1 translation tables.
  284  */
  285 struct l1_ttable {
  286         /* Entry on the L1 Table list */
  287         SLIST_ENTRY(l1_ttable) l1_link;
  288 
  289         /* Entry on the L1 Least Recently Used list */
  290         TAILQ_ENTRY(l1_ttable) l1_lru;
  291 
  292         /* Track how many domains are allocated from this L1 */
  293         volatile u_int l1_domain_use_count;
  294 
  295         /*
  296          * A free-list of domain numbers for this L1.
  297          * We avoid using ffs() and a bitmap to track domains since ffs()
  298          * is slow on ARM.
  299          */
  300         u_int8_t l1_domain_first;
  301         u_int8_t l1_domain_free[PMAP_DOMAINS];
  302 
  303         /* Physical address of this L1 page table */
  304         vm_paddr_t l1_physaddr;
  305 
  306         /* KVA of this L1 page table */
  307         pd_entry_t *l1_kva;
  308 };
  309 
  310 /*
  311  * Convert a virtual address into its L1 table index. That is, the
  312  * index used to locate the L2 descriptor table pointer in an L1 table.
  313  * This is basically used to index l1->l1_kva[].
  314  *
  315  * Each L2 descriptor table represents 1MB of VA space.
  316  */
  317 #define L1_IDX(va)              (((vm_offset_t)(va)) >> L1_S_SHIFT)
  318 
  319 /*
  320  * L1 Page Tables are tracked using a Least Recently Used list.
  321  *  - New L1s are allocated from the HEAD.
  322  *  - Freed L1s are added to the TAIl.
  323  *  - Recently accessed L1s (where an 'access' is some change to one of
  324  *    the userland pmaps which owns this L1) are moved to the TAIL.
  325  */
  326 static TAILQ_HEAD(, l1_ttable) l1_lru_list;
  327 /*
  328  * A list of all L1 tables
  329  */
  330 static SLIST_HEAD(, l1_ttable) l1_list;
  331 static struct mtx l1_lru_lock;
  332 
  333 /*
  334  * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots.
  335  *
  336  * This is normally 16MB worth L2 page descriptors for any given pmap.
  337  * Reference counts are maintained for L2 descriptors so they can be
  338  * freed when empty.
  339  */
  340 struct l2_dtable {
  341         /* The number of L2 page descriptors allocated to this l2_dtable */
  342         u_int l2_occupancy;
  343 
  344         /* List of L2 page descriptors */
  345         struct l2_bucket {
  346                 pt_entry_t *l2b_kva;    /* KVA of L2 Descriptor Table */
  347                 vm_paddr_t l2b_phys;    /* Physical address of same */
  348                 u_short l2b_l1idx;      /* This L2 table's L1 index */
  349                 u_short l2b_occupancy;  /* How many active descriptors */
  350         } l2_bucket[L2_BUCKET_SIZE];
  351 };
  352 
  353 /* pmap_kenter_internal flags */
  354 #define KENTER_CACHE    0x1
  355 #define KENTER_USER     0x2
  356 
  357 /*
  358  * Given an L1 table index, calculate the corresponding l2_dtable index
  359  * and bucket index within the l2_dtable.
  360  */
  361 #define L2_IDX(l1idx)           (((l1idx) >> L2_BUCKET_LOG2) & \
  362                                  (L2_SIZE - 1))
  363 #define L2_BUCKET(l1idx)        ((l1idx) & (L2_BUCKET_SIZE - 1))
  364 
  365 /*
  366  * Given a virtual address, this macro returns the
  367  * virtual address required to drop into the next L2 bucket.
  368  */
  369 #define L2_NEXT_BUCKET(va)      (((va) & L1_S_FRAME) + L1_S_SIZE)
  370 
  371 /*
  372  * We try to map the page tables write-through, if possible.  However, not
  373  * all CPUs have a write-through cache mode, so on those we have to sync
  374  * the cache when we frob page tables.
  375  *
  376  * We try to evaluate this at compile time, if possible.  However, it's
  377  * not always possible to do that, hence this run-time var.
  378  */
  379 int     pmap_needs_pte_sync;
  380 
  381 /*
  382  * Macro to determine if a mapping might be resident in the
  383  * instruction cache and/or TLB
  384  */
  385 #define PV_BEEN_EXECD(f)  (((f) & (PVF_REF | PVF_EXEC)) == (PVF_REF | PVF_EXEC))
  386 
  387 /*
  388  * Macro to determine if a mapping might be resident in the
  389  * data cache and/or TLB
  390  */
  391 #define PV_BEEN_REFD(f)   (((f) & PVF_REF) != 0)
  392 
  393 #ifndef PMAP_SHPGPERPROC
  394 #define PMAP_SHPGPERPROC 200
  395 #endif
  396 
  397 #define pmap_is_current(pm)     ((pm) == pmap_kernel() || \
  398             curproc->p_vmspace->vm_map.pmap == (pm))
  399 static uma_zone_t pvzone = NULL;
  400 uma_zone_t l2zone;
  401 static uma_zone_t l2table_zone;
  402 static vm_offset_t pmap_kernel_l2dtable_kva;
  403 static vm_offset_t pmap_kernel_l2ptp_kva;
  404 static vm_paddr_t pmap_kernel_l2ptp_phys;
  405 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
  406 static struct rwlock pvh_global_lock;
  407 
  408 void pmap_copy_page_offs_generic(vm_paddr_t a_phys, vm_offset_t a_offs,
  409     vm_paddr_t b_phys, vm_offset_t b_offs, int cnt);
  410 #if ARM_MMU_XSCALE == 1
  411 void pmap_copy_page_offs_xscale(vm_paddr_t a_phys, vm_offset_t a_offs,
  412     vm_paddr_t b_phys, vm_offset_t b_offs, int cnt);
  413 #endif
  414 
  415 /*
  416  * This list exists for the benefit of pmap_map_chunk().  It keeps track
  417  * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can
  418  * find them as necessary.
  419  *
  420  * Note that the data on this list MUST remain valid after initarm() returns,
  421  * as pmap_bootstrap() uses it to contruct L2 table metadata.
  422  */
  423 SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list);
  424 
  425 static void
  426 pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt)
  427 {
  428         int i;
  429 
  430         l1->l1_kva = l1pt;
  431         l1->l1_domain_use_count = 0;
  432         l1->l1_domain_first = 0;
  433 
  434         for (i = 0; i < PMAP_DOMAINS; i++)
  435                 l1->l1_domain_free[i] = i + 1;
  436 
  437         /*
  438          * Copy the kernel's L1 entries to each new L1.
  439          */
  440         if (l1pt != pmap_kernel()->pm_l1->l1_kva)
  441                 memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE);
  442 
  443         if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0)
  444                 panic("pmap_init_l1: can't get PA of L1 at %p", l1pt);
  445         SLIST_INSERT_HEAD(&l1_list, l1, l1_link);
  446         TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
  447 }
  448 
  449 static vm_offset_t
  450 kernel_pt_lookup(vm_paddr_t pa)
  451 {
  452         struct pv_addr *pv;
  453 
  454         SLIST_FOREACH(pv, &kernel_pt_list, pv_list) {
  455                 if (pv->pv_pa == pa)
  456                         return (pv->pv_va);
  457         }
  458         return (0);
  459 }
  460 
  461 #if ARM_MMU_GENERIC != 0
  462 void
  463 pmap_pte_init_generic(void)
  464 {
  465 
  466         pte_l1_s_cache_mode = L1_S_B|L1_S_C;
  467         pte_l1_s_cache_mask = L1_S_CACHE_MASK_generic;
  468 
  469         pte_l2_l_cache_mode = L2_B|L2_C;
  470         pte_l2_l_cache_mask = L2_L_CACHE_MASK_generic;
  471 
  472         pte_l2_s_cache_mode = L2_B|L2_C;
  473         pte_l2_s_cache_mask = L2_S_CACHE_MASK_generic;
  474 
  475         /*
  476          * If we have a write-through cache, set B and C.  If
  477          * we have a write-back cache, then we assume setting
  478          * only C will make those pages write-through.
  479          */
  480         if (cpufuncs.cf_dcache_wb_range == (void *) cpufunc_nullop) {
  481                 pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
  482                 pte_l2_l_cache_mode_pt = L2_B|L2_C;
  483                 pte_l2_s_cache_mode_pt = L2_B|L2_C;
  484         } else {
  485                 pte_l1_s_cache_mode_pt = L1_S_C;
  486                 pte_l2_l_cache_mode_pt = L2_C;
  487                 pte_l2_s_cache_mode_pt = L2_C;
  488         }
  489 
  490         pte_l2_s_prot_u = L2_S_PROT_U_generic;
  491         pte_l2_s_prot_w = L2_S_PROT_W_generic;
  492         pte_l2_s_prot_mask = L2_S_PROT_MASK_generic;
  493 
  494         pte_l1_s_proto = L1_S_PROTO_generic;
  495         pte_l1_c_proto = L1_C_PROTO_generic;
  496         pte_l2_s_proto = L2_S_PROTO_generic;
  497 
  498         pmap_copy_page_func = pmap_copy_page_generic;
  499         pmap_copy_page_offs_func = pmap_copy_page_offs_generic;
  500         pmap_zero_page_func = pmap_zero_page_generic;
  501 }
  502 
  503 #if defined(CPU_ARM9) && defined(ARM9_CACHE_WRITE_THROUGH)
  504 void
  505 pmap_pte_init_arm9(void)
  506 {
  507 
  508         /*
  509          * ARM9 is compatible with generic, but we want to use
  510          * write-through caching for now.
  511          */
  512         pmap_pte_init_generic();
  513 
  514         pte_l1_s_cache_mode = L1_S_C;
  515         pte_l2_l_cache_mode = L2_C;
  516         pte_l2_s_cache_mode = L2_C;
  517 
  518         pte_l1_s_cache_mode_pt = L1_S_C;
  519         pte_l2_l_cache_mode_pt = L2_C;
  520         pte_l2_s_cache_mode_pt = L2_C;
  521 }
  522 #endif /* CPU_ARM9 */
  523 #endif /* ARM_MMU_GENERIC != 0 */
  524 
  525 #if defined(CPU_ARM10)
  526 void
  527 pmap_pte_init_arm10(void)
  528 {
  529 
  530         /*
  531          * ARM10 is compatible with generic, but we want to use
  532          * write-through caching for now.
  533          */
  534         pmap_pte_init_generic();
  535 
  536         pte_l1_s_cache_mode = L1_S_B | L1_S_C;
  537         pte_l2_l_cache_mode = L2_B | L2_C;
  538         pte_l2_s_cache_mode = L2_B | L2_C;
  539 
  540         pte_l1_s_cache_mode_pt = L1_S_C;
  541         pte_l2_l_cache_mode_pt = L2_C;
  542         pte_l2_s_cache_mode_pt = L2_C;
  543 
  544 }
  545 #endif /* CPU_ARM10 */
  546 
  547 #if ARM_MMU_XSCALE == 1
  548 #if (ARM_NMMUS > 1) || defined (CPU_XSCALE_CORE3)
  549 static u_int xscale_use_minidata;
  550 #endif
  551 
  552 void
  553 pmap_pte_init_xscale(void)
  554 {
  555         uint32_t auxctl;
  556         int write_through = 0;
  557 
  558         pte_l1_s_cache_mode = L1_S_B|L1_S_C|L1_S_XSCALE_P;
  559         pte_l1_s_cache_mask = L1_S_CACHE_MASK_xscale;
  560 
  561         pte_l2_l_cache_mode = L2_B|L2_C;
  562         pte_l2_l_cache_mask = L2_L_CACHE_MASK_xscale;
  563 
  564         pte_l2_s_cache_mode = L2_B|L2_C;
  565         pte_l2_s_cache_mask = L2_S_CACHE_MASK_xscale;
  566 
  567         pte_l1_s_cache_mode_pt = L1_S_C;
  568         pte_l2_l_cache_mode_pt = L2_C;
  569         pte_l2_s_cache_mode_pt = L2_C;
  570 #ifdef XSCALE_CACHE_READ_WRITE_ALLOCATE
  571         /*
  572          * The XScale core has an enhanced mode where writes that
  573          * miss the cache cause a cache line to be allocated.  This
  574          * is significantly faster than the traditional, write-through
  575          * behavior of this case.
  576          */
  577         pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_X);
  578         pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_X);
  579         pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_X);
  580 #endif /* XSCALE_CACHE_READ_WRITE_ALLOCATE */
  581 #ifdef XSCALE_CACHE_WRITE_THROUGH
  582         /*
  583          * Some versions of the XScale core have various bugs in
  584          * their cache units, the work-around for which is to run
  585          * the cache in write-through mode.  Unfortunately, this
  586          * has a major (negative) impact on performance.  So, we
  587          * go ahead and run fast-and-loose, in the hopes that we
  588          * don't line up the planets in a way that will trip the
  589          * bugs.
  590          *
  591          * However, we give you the option to be slow-but-correct.
  592          */
  593         write_through = 1;
  594 #elif defined(XSCALE_CACHE_WRITE_BACK)
  595         /* force write back cache mode */
  596         write_through = 0;
  597 #elif defined(CPU_XSCALE_PXA2X0)
  598         /*
  599          * Intel PXA2[15]0 processors are known to have a bug in
  600          * write-back cache on revision 4 and earlier (stepping
  601          * A[01] and B[012]).  Fixed for C0 and later.
  602          */
  603         {
  604                 uint32_t id, type;
  605 
  606                 id = cpufunc_id();
  607                 type = id & ~(CPU_ID_XSCALE_COREREV_MASK|CPU_ID_REVISION_MASK);
  608 
  609                 if (type == CPU_ID_PXA250 || type == CPU_ID_PXA210) {
  610                         if ((id & CPU_ID_REVISION_MASK) < 5) {
  611                                 /* write through for stepping A0-1 and B0-2 */
  612                                 write_through = 1;
  613                         }
  614                 }
  615         }
  616 #endif /* XSCALE_CACHE_WRITE_THROUGH */
  617 
  618         if (write_through) {
  619                 pte_l1_s_cache_mode = L1_S_C;
  620                 pte_l2_l_cache_mode = L2_C;
  621                 pte_l2_s_cache_mode = L2_C;
  622         }
  623 
  624 #if (ARM_NMMUS > 1)
  625         xscale_use_minidata = 1;
  626 #endif
  627 
  628         pte_l2_s_prot_u = L2_S_PROT_U_xscale;
  629         pte_l2_s_prot_w = L2_S_PROT_W_xscale;
  630         pte_l2_s_prot_mask = L2_S_PROT_MASK_xscale;
  631 
  632         pte_l1_s_proto = L1_S_PROTO_xscale;
  633         pte_l1_c_proto = L1_C_PROTO_xscale;
  634         pte_l2_s_proto = L2_S_PROTO_xscale;
  635 
  636 #ifdef CPU_XSCALE_CORE3
  637         pmap_copy_page_func = pmap_copy_page_generic;
  638         pmap_copy_page_offs_func = pmap_copy_page_offs_generic;
  639         pmap_zero_page_func = pmap_zero_page_generic;
  640         xscale_use_minidata = 0;
  641         /* Make sure it is L2-cachable */
  642         pte_l1_s_cache_mode |= L1_S_XSCALE_TEX(TEX_XSCALE_T);
  643         pte_l1_s_cache_mode_pt = pte_l1_s_cache_mode &~ L1_S_XSCALE_P;
  644         pte_l2_l_cache_mode |= L2_XSCALE_L_TEX(TEX_XSCALE_T) ;
  645         pte_l2_l_cache_mode_pt = pte_l1_s_cache_mode;
  646         pte_l2_s_cache_mode |= L2_XSCALE_T_TEX(TEX_XSCALE_T);
  647         pte_l2_s_cache_mode_pt = pte_l2_s_cache_mode;
  648 
  649 #else
  650         pmap_copy_page_func = pmap_copy_page_xscale;
  651         pmap_copy_page_offs_func = pmap_copy_page_offs_xscale;
  652         pmap_zero_page_func = pmap_zero_page_xscale;
  653 #endif
  654 
  655         /*
  656          * Disable ECC protection of page table access, for now.
  657          */
  658         __asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl));
  659         auxctl &= ~XSCALE_AUXCTL_P;
  660         __asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl));
  661 }
  662 
  663 /*
  664  * xscale_setup_minidata:
  665  *
  666  *      Set up the mini-data cache clean area.  We require the
  667  *      caller to allocate the right amount of physically and
  668  *      virtually contiguous space.
  669  */
  670 extern vm_offset_t xscale_minidata_clean_addr;
  671 extern vm_size_t xscale_minidata_clean_size; /* already initialized */
  672 void
  673 xscale_setup_minidata(vm_offset_t l1pt, vm_offset_t va, vm_paddr_t pa)
  674 {
  675         pd_entry_t *pde = (pd_entry_t *) l1pt;
  676         pt_entry_t *pte;
  677         vm_size_t size;
  678         uint32_t auxctl;
  679 
  680         xscale_minidata_clean_addr = va;
  681 
  682         /* Round it to page size. */
  683         size = (xscale_minidata_clean_size + L2_S_OFFSET) & L2_S_FRAME;
  684 
  685         for (; size != 0;
  686              va += L2_S_SIZE, pa += L2_S_SIZE, size -= L2_S_SIZE) {
  687                 pte = (pt_entry_t *) kernel_pt_lookup(
  688                     pde[L1_IDX(va)] & L1_C_ADDR_MASK);
  689                 if (pte == NULL)
  690                         panic("xscale_setup_minidata: can't find L2 table for "
  691                             "VA 0x%08x", (u_int32_t) va);
  692                 pte[l2pte_index(va)] =
  693                     L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
  694                     L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
  695         }
  696 
  697         /*
  698          * Configure the mini-data cache for write-back with
  699          * read/write-allocate.
  700          *
  701          * NOTE: In order to reconfigure the mini-data cache, we must
  702          * make sure it contains no valid data!  In order to do that,
  703          * we must issue a global data cache invalidate command!
  704          *
  705          * WE ASSUME WE ARE RUNNING UN-CACHED WHEN THIS ROUTINE IS CALLED!
  706          * THIS IS VERY IMPORTANT!
  707          */
  708 
  709         /* Invalidate data and mini-data. */
  710         __asm __volatile("mcr p15, 0, %0, c7, c6, 0" : : "r" (0));
  711         __asm __volatile("mrc p15, 0, %0, c1, c0, 1" : "=r" (auxctl));
  712         auxctl = (auxctl & ~XSCALE_AUXCTL_MD_MASK) | XSCALE_AUXCTL_MD_WB_RWA;
  713         __asm __volatile("mcr p15, 0, %0, c1, c0, 1" : : "r" (auxctl));
  714 }
  715 #endif
  716 
  717 /*
  718  * Allocate an L1 translation table for the specified pmap.
  719  * This is called at pmap creation time.
  720  */
  721 static void
  722 pmap_alloc_l1(pmap_t pm)
  723 {
  724         struct l1_ttable *l1;
  725         u_int8_t domain;
  726 
  727         /*
  728          * Remove the L1 at the head of the LRU list
  729          */
  730         mtx_lock(&l1_lru_lock);
  731         l1 = TAILQ_FIRST(&l1_lru_list);
  732         TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
  733 
  734         /*
  735          * Pick the first available domain number, and update
  736          * the link to the next number.
  737          */
  738         domain = l1->l1_domain_first;
  739         l1->l1_domain_first = l1->l1_domain_free[domain];
  740 
  741         /*
  742          * If there are still free domain numbers in this L1,
  743          * put it back on the TAIL of the LRU list.
  744          */
  745         if (++l1->l1_domain_use_count < PMAP_DOMAINS)
  746                 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
  747 
  748         mtx_unlock(&l1_lru_lock);
  749 
  750         /*
  751          * Fix up the relevant bits in the pmap structure
  752          */
  753         pm->pm_l1 = l1;
  754         pm->pm_domain = domain + 1;
  755 }
  756 
  757 /*
  758  * Free an L1 translation table.
  759  * This is called at pmap destruction time.
  760  */
  761 static void
  762 pmap_free_l1(pmap_t pm)
  763 {
  764         struct l1_ttable *l1 = pm->pm_l1;
  765 
  766         mtx_lock(&l1_lru_lock);
  767 
  768         /*
  769          * If this L1 is currently on the LRU list, remove it.
  770          */
  771         if (l1->l1_domain_use_count < PMAP_DOMAINS)
  772                 TAILQ_REMOVE(&l1_lru_list, l1, l1_lru);
  773 
  774         /*
  775          * Free up the domain number which was allocated to the pmap
  776          */
  777         l1->l1_domain_free[pm->pm_domain - 1] = l1->l1_domain_first;
  778         l1->l1_domain_first = pm->pm_domain - 1;
  779         l1->l1_domain_use_count--;
  780 
  781         /*
  782          * The L1 now must have at least 1 free domain, so add
  783          * it back to the LRU list. If the use count is zero,
  784          * put it at the head of the list, otherwise it goes
  785          * to the tail.
  786          */
  787         if (l1->l1_domain_use_count == 0) {
  788                 TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru);
  789         }       else
  790                 TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru);
  791 
  792         mtx_unlock(&l1_lru_lock);
  793 }
  794 
  795 /*
  796  * Returns a pointer to the L2 bucket associated with the specified pmap
  797  * and VA, or NULL if no L2 bucket exists for the address.
  798  */
  799 static PMAP_INLINE struct l2_bucket *
  800 pmap_get_l2_bucket(pmap_t pm, vm_offset_t va)
  801 {
  802         struct l2_dtable *l2;
  803         struct l2_bucket *l2b;
  804         u_short l1idx;
  805 
  806         l1idx = L1_IDX(va);
  807 
  808         if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL ||
  809             (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL)
  810                 return (NULL);
  811 
  812         return (l2b);
  813 }
  814 
  815 /*
  816  * Returns a pointer to the L2 bucket associated with the specified pmap
  817  * and VA.
  818  *
  819  * If no L2 bucket exists, perform the necessary allocations to put an L2
  820  * bucket/page table in place.
  821  *
  822  * Note that if a new L2 bucket/page was allocated, the caller *must*
  823  * increment the bucket occupancy counter appropriately *before*
  824  * releasing the pmap's lock to ensure no other thread or cpu deallocates
  825  * the bucket/page in the meantime.
  826  */
  827 static struct l2_bucket *
  828 pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
  829 {
  830         struct l2_dtable *l2;
  831         struct l2_bucket *l2b;
  832         u_short l1idx;
  833 
  834         l1idx = L1_IDX(va);
  835 
  836         PMAP_ASSERT_LOCKED(pm);
  837         rw_assert(&pvh_global_lock, RA_WLOCKED);
  838         if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
  839                 /*
  840                  * No mapping at this address, as there is
  841                  * no entry in the L1 table.
  842                  * Need to allocate a new l2_dtable.
  843                  */
  844                 PMAP_UNLOCK(pm);
  845                 rw_wunlock(&pvh_global_lock);
  846                 if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) {
  847                         rw_wlock(&pvh_global_lock);
  848                         PMAP_LOCK(pm);
  849                         return (NULL);
  850                 }
  851                 rw_wlock(&pvh_global_lock);
  852                 PMAP_LOCK(pm);
  853                 if (pm->pm_l2[L2_IDX(l1idx)] != NULL) {
  854                         /*
  855                          * Someone already allocated the l2_dtable while
  856                          * we were doing the same.
  857                          */
  858                         uma_zfree(l2table_zone, l2);
  859                         l2 = pm->pm_l2[L2_IDX(l1idx)];
  860                 } else {
  861                         bzero(l2, sizeof(*l2));
  862                         /*
  863                          * Link it into the parent pmap
  864                          */
  865                         pm->pm_l2[L2_IDX(l1idx)] = l2;
  866                 }
  867         }
  868 
  869         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
  870 
  871         /*
  872          * Fetch pointer to the L2 page table associated with the address.
  873          */
  874         if (l2b->l2b_kva == NULL) {
  875                 pt_entry_t *ptep;
  876 
  877                 /*
  878                  * No L2 page table has been allocated. Chances are, this
  879                  * is because we just allocated the l2_dtable, above.
  880                  */
  881                 PMAP_UNLOCK(pm);
  882                 rw_wunlock(&pvh_global_lock);
  883                 ptep = uma_zalloc(l2zone, M_NOWAIT);
  884                 rw_wlock(&pvh_global_lock);
  885                 PMAP_LOCK(pm);
  886                 if (l2b->l2b_kva != 0) {
  887                         /* We lost the race. */
  888                         uma_zfree(l2zone, ptep);
  889                         return (l2b);
  890                 }
  891                 l2b->l2b_phys = vtophys(ptep);
  892                 if (ptep == NULL) {
  893                         /*
  894                          * Oops, no more L2 page tables available at this
  895                          * time. We may need to deallocate the l2_dtable
  896                          * if we allocated a new one above.
  897                          */
  898                         if (l2->l2_occupancy == 0) {
  899                                 pm->pm_l2[L2_IDX(l1idx)] = NULL;
  900                                 uma_zfree(l2table_zone, l2);
  901                         }
  902                         return (NULL);
  903                 }
  904 
  905                 l2->l2_occupancy++;
  906                 l2b->l2b_kva = ptep;
  907                 l2b->l2b_l1idx = l1idx;
  908         }
  909 
  910         return (l2b);
  911 }
  912 
  913 static PMAP_INLINE void
  914 #ifndef PMAP_INCLUDE_PTE_SYNC
  915 pmap_free_l2_ptp(pt_entry_t *l2)
  916 #else
  917 pmap_free_l2_ptp(boolean_t need_sync, pt_entry_t *l2)
  918 #endif
  919 {
  920 #ifdef PMAP_INCLUDE_PTE_SYNC
  921         /*
  922          * Note: With a write-back cache, we may need to sync this
  923          * L2 table before re-using it.
  924          * This is because it may have belonged to a non-current
  925          * pmap, in which case the cache syncs would have been
  926          * skipped when the pages were being unmapped. If the
  927          * L2 table were then to be immediately re-allocated to
  928          * the *current* pmap, it may well contain stale mappings
  929          * which have not yet been cleared by a cache write-back
  930          * and so would still be visible to the mmu.
  931          */
  932         if (need_sync)
  933                 PTE_SYNC_RANGE(l2, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
  934 #endif
  935         uma_zfree(l2zone, l2);
  936 }
  937 /*
  938  * One or more mappings in the specified L2 descriptor table have just been
  939  * invalidated.
  940  *
  941  * Garbage collect the metadata and descriptor table itself if necessary.
  942  *
  943  * The pmap lock must be acquired when this is called (not necessary
  944  * for the kernel pmap).
  945  */
  946 static void
  947 pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l2b, u_int count)
  948 {
  949         struct l2_dtable *l2;
  950         pd_entry_t *pl1pd, l1pd;
  951         pt_entry_t *ptep;
  952         u_short l1idx;
  953 
  954 
  955         /*
  956          * Update the bucket's reference count according to how many
  957          * PTEs the caller has just invalidated.
  958          */
  959         l2b->l2b_occupancy -= count;
  960 
  961         /*
  962          * Note:
  963          *
  964          * Level 2 page tables allocated to the kernel pmap are never freed
  965          * as that would require checking all Level 1 page tables and
  966          * removing any references to the Level 2 page table. See also the
  967          * comment elsewhere about never freeing bootstrap L2 descriptors.
  968          *
  969          * We make do with just invalidating the mapping in the L2 table.
  970          *
  971          * This isn't really a big deal in practice and, in fact, leads
  972          * to a performance win over time as we don't need to continually
  973          * alloc/free.
  974          */
  975         if (l2b->l2b_occupancy > 0 || pm == pmap_kernel())
  976                 return;
  977 
  978         /*
  979          * There are no more valid mappings in this level 2 page table.
  980          * Go ahead and NULL-out the pointer in the bucket, then
  981          * free the page table.
  982          */
  983         l1idx = l2b->l2b_l1idx;
  984         ptep = l2b->l2b_kva;
  985         l2b->l2b_kva = NULL;
  986 
  987         pl1pd = &pm->pm_l1->l1_kva[l1idx];
  988 
  989         /*
  990          * If the L1 slot matches the pmap's domain
  991          * number, then invalidate it.
  992          */
  993         l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK);
  994         if (l1pd == (L1_C_DOM(pm->pm_domain) | L1_TYPE_C)) {
  995                 *pl1pd = 0;
  996                 PTE_SYNC(pl1pd);
  997         }
  998 
  999         /*
 1000          * Release the L2 descriptor table back to the pool cache.
 1001          */
 1002 #ifndef PMAP_INCLUDE_PTE_SYNC
 1003         pmap_free_l2_ptp(ptep);
 1004 #else
 1005         pmap_free_l2_ptp(!pmap_is_current(pm), ptep);
 1006 #endif
 1007 
 1008         /*
 1009          * Update the reference count in the associated l2_dtable
 1010          */
 1011         l2 = pm->pm_l2[L2_IDX(l1idx)];
 1012         if (--l2->l2_occupancy > 0)
 1013                 return;
 1014 
 1015         /*
 1016          * There are no more valid mappings in any of the Level 1
 1017          * slots managed by this l2_dtable. Go ahead and NULL-out
 1018          * the pointer in the parent pmap and free the l2_dtable.
 1019          */
 1020         pm->pm_l2[L2_IDX(l1idx)] = NULL;
 1021         uma_zfree(l2table_zone, l2);
 1022 }
 1023 
 1024 /*
 1025  * Pool cache constructors for L2 descriptor tables, metadata and pmap
 1026  * structures.
 1027  */
 1028 static int
 1029 pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags)
 1030 {
 1031 #ifndef PMAP_INCLUDE_PTE_SYNC
 1032         struct l2_bucket *l2b;
 1033         pt_entry_t *ptep, pte;
 1034 
 1035         vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK;
 1036 
 1037         /*
 1038          * The mappings for these page tables were initially made using
 1039          * pmap_kenter() by the pool subsystem. Therefore, the cache-
 1040          * mode will not be right for page table mappings. To avoid
 1041          * polluting the pmap_kenter() code with a special case for
 1042          * page tables, we simply fix up the cache-mode here if it's not
 1043          * correct.
 1044          */
 1045                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 1046                 ptep = &l2b->l2b_kva[l2pte_index(va)];
 1047                 pte = *ptep;
 1048                 
 1049                 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
 1050                         /*
 1051                          * Page tables must have the cache-mode set to
 1052                          * Write-Thru.
 1053                          */
 1054                         *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
 1055                         PTE_SYNC(ptep);
 1056                         cpu_tlb_flushD_SE(va);
 1057                         cpu_cpwait();
 1058                 }
 1059 #endif
 1060         memset(mem, 0, L2_TABLE_SIZE_REAL);
 1061         PTE_SYNC_RANGE(mem, L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
 1062         return (0);
 1063 }
 1064 
 1065 /*
 1066  * A bunch of routines to conditionally flush the caches/TLB depending
 1067  * on whether the specified pmap actually needs to be flushed at any
 1068  * given time.
 1069  */
 1070 static PMAP_INLINE void
 1071 pmap_tlb_flushID_SE(pmap_t pm, vm_offset_t va)
 1072 {
 1073 
 1074         if (pmap_is_current(pm))
 1075                 cpu_tlb_flushID_SE(va);
 1076 }
 1077 
 1078 static PMAP_INLINE void
 1079 pmap_tlb_flushD_SE(pmap_t pm, vm_offset_t va)
 1080 {
 1081 
 1082         if (pmap_is_current(pm))
 1083                 cpu_tlb_flushD_SE(va);
 1084 }
 1085 
 1086 static PMAP_INLINE void
 1087 pmap_tlb_flushID(pmap_t pm)
 1088 {
 1089 
 1090         if (pmap_is_current(pm))
 1091                 cpu_tlb_flushID();
 1092 }
 1093 static PMAP_INLINE void
 1094 pmap_tlb_flushD(pmap_t pm)
 1095 {
 1096 
 1097         if (pmap_is_current(pm))
 1098                 cpu_tlb_flushD();
 1099 }
 1100 
 1101 static int
 1102 pmap_has_valid_mapping(pmap_t pm, vm_offset_t va)
 1103 {
 1104         pd_entry_t *pde;
 1105         pt_entry_t *ptep;
 1106 
 1107         if (pmap_get_pde_pte(pm, va, &pde, &ptep) &&
 1108             ptep && ((*ptep & L2_TYPE_MASK) != L2_TYPE_INV))
 1109                 return (1);
 1110 
 1111         return (0);
 1112 }
 1113 
 1114 static PMAP_INLINE void
 1115 pmap_idcache_wbinv_range(pmap_t pm, vm_offset_t va, vm_size_t len)
 1116 {
 1117         vm_size_t rest;
 1118 
 1119         CTR4(KTR_PMAP, "pmap_dcache_wbinv_range: pmap %p is_kernel %d va 0x%08x"
 1120             " len 0x%x ", pm, pm == pmap_kernel(), va, len);
 1121 
 1122         if (pmap_is_current(pm) || pm == pmap_kernel()) {
 1123                 rest = MIN(PAGE_SIZE - (va & PAGE_MASK), len);
 1124                 while (len > 0) {
 1125                         if (pmap_has_valid_mapping(pm, va)) {
 1126                                 cpu_idcache_wbinv_range(va, rest);
 1127                                 cpu_l2cache_wbinv_range(va, rest);
 1128                         }
 1129                         len -= rest;
 1130                         va += rest;
 1131                         rest = MIN(PAGE_SIZE, len);
 1132                 }
 1133         }
 1134 }
 1135 
 1136 static PMAP_INLINE void
 1137 pmap_dcache_wb_range(pmap_t pm, vm_offset_t va, vm_size_t len, boolean_t do_inv,
 1138     boolean_t rd_only)
 1139 {
 1140         vm_size_t rest;
 1141 
 1142         CTR4(KTR_PMAP, "pmap_dcache_wb_range: pmap %p is_kernel %d va 0x%08x "
 1143             "len 0x%x ", pm, pm == pmap_kernel(), va, len);
 1144         CTR2(KTR_PMAP, " do_inv %d rd_only %d", do_inv, rd_only);
 1145 
 1146         if (pmap_is_current(pm)) {
 1147                 rest = MIN(PAGE_SIZE - (va & PAGE_MASK), len);
 1148                 while (len > 0) {
 1149                         if (pmap_has_valid_mapping(pm, va)) {
 1150                                 if (do_inv && rd_only) {
 1151                                         cpu_dcache_inv_range(va, rest);
 1152                                         cpu_l2cache_inv_range(va, rest);
 1153                                 } else if (do_inv) {
 1154                                         cpu_dcache_wbinv_range(va, rest);
 1155                                         cpu_l2cache_wbinv_range(va, rest);
 1156                                 } else if (!rd_only) {
 1157                                         cpu_dcache_wb_range(va, rest);
 1158                                         cpu_l2cache_wb_range(va, rest);
 1159                                 }
 1160                         }
 1161                         len -= rest;
 1162                         va += rest;
 1163 
 1164                         rest = MIN(PAGE_SIZE, len);
 1165                 }
 1166         }
 1167 }
 1168 
 1169 static PMAP_INLINE void
 1170 pmap_idcache_wbinv_all(pmap_t pm)
 1171 {
 1172 
 1173         if (pmap_is_current(pm)) {
 1174                 cpu_idcache_wbinv_all();
 1175                 cpu_l2cache_wbinv_all();
 1176         }
 1177 }
 1178 
 1179 #ifdef notyet
 1180 static PMAP_INLINE void
 1181 pmap_dcache_wbinv_all(pmap_t pm)
 1182 {
 1183 
 1184         if (pmap_is_current(pm)) {
 1185                 cpu_dcache_wbinv_all();
 1186                 cpu_l2cache_wbinv_all();
 1187         }
 1188 }
 1189 #endif
 1190 
 1191 /*
 1192  * PTE_SYNC_CURRENT:
 1193  *
 1194  *     Make sure the pte is written out to RAM.
 1195  *     We need to do this for one of two cases:
 1196  *       - We're dealing with the kernel pmap
 1197  *       - There is no pmap active in the cache/tlb.
 1198  *       - The specified pmap is 'active' in the cache/tlb.
 1199  */
 1200 #ifdef PMAP_INCLUDE_PTE_SYNC
 1201 #define PTE_SYNC_CURRENT(pm, ptep)      \
 1202 do {                                    \
 1203         if (PMAP_NEEDS_PTE_SYNC &&      \
 1204             pmap_is_current(pm))        \
 1205                 PTE_SYNC(ptep);         \
 1206 } while (/*CONSTCOND*/0)
 1207 #else
 1208 #define PTE_SYNC_CURRENT(pm, ptep)      /* nothing */
 1209 #endif
 1210 
 1211 /*
 1212  * cacheable == -1 means we must make the entry uncacheable, 1 means
 1213  * cacheable;
 1214  */
 1215 static __inline void
 1216 pmap_set_cache_entry(pv_entry_t pv, pmap_t pm, vm_offset_t va, int cacheable)
 1217 {
 1218         struct l2_bucket *l2b;
 1219         pt_entry_t *ptep, pte;
 1220 
 1221         l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
 1222         ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
 1223 
 1224         if (cacheable == 1) {
 1225                 pte = (*ptep & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode;
 1226                 if (l2pte_valid(pte)) {
 1227                         if (PV_BEEN_EXECD(pv->pv_flags)) {
 1228                                 pmap_tlb_flushID_SE(pv->pv_pmap, pv->pv_va);
 1229                         } else if (PV_BEEN_REFD(pv->pv_flags)) {
 1230                                 pmap_tlb_flushD_SE(pv->pv_pmap, pv->pv_va);
 1231                         }
 1232                 }
 1233         } else {
 1234                 pte = *ptep &~ L2_S_CACHE_MASK;
 1235                 if ((va != pv->pv_va || pm != pv->pv_pmap) &&
 1236                             l2pte_valid(pte)) {
 1237                         if (PV_BEEN_EXECD(pv->pv_flags)) {
 1238                                 pmap_idcache_wbinv_range(pv->pv_pmap,
 1239                                             pv->pv_va, PAGE_SIZE);
 1240                                 pmap_tlb_flushID_SE(pv->pv_pmap, pv->pv_va);
 1241                         } else if (PV_BEEN_REFD(pv->pv_flags)) {
 1242                                 pmap_dcache_wb_range(pv->pv_pmap,
 1243                                             pv->pv_va, PAGE_SIZE, TRUE,
 1244                                             (pv->pv_flags & PVF_WRITE) == 0);
 1245                                 pmap_tlb_flushD_SE(pv->pv_pmap,
 1246                                             pv->pv_va);
 1247                         }
 1248                 }
 1249         }
 1250         *ptep = pte;
 1251         PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
 1252 }
 1253 
 1254 static void
 1255 pmap_fix_cache(struct vm_page *pg, pmap_t pm, vm_offset_t va)
 1256 {
 1257         int pmwc = 0;
 1258         int writable = 0, kwritable = 0, uwritable = 0;
 1259         int entries = 0, kentries = 0, uentries = 0;
 1260         struct pv_entry *pv;
 1261 
 1262         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1263 
 1264         /* the cache gets written back/invalidated on context switch.
 1265          * therefore, if a user page shares an entry in the same page or
 1266          * with the kernel map and at least one is writable, then the
 1267          * cache entry must be set write-through.
 1268          */
 1269 
 1270         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
 1271                         /* generate a count of the pv_entry uses */
 1272                 if (pv->pv_flags & PVF_WRITE) {
 1273                         if (pv->pv_pmap == pmap_kernel())
 1274                                 kwritable++;
 1275                         else if (pv->pv_pmap == pm)
 1276                                 uwritable++;
 1277                         writable++;
 1278                 }
 1279                 if (pv->pv_pmap == pmap_kernel())
 1280                         kentries++;
 1281                 else {
 1282                         if (pv->pv_pmap == pm)
 1283                                 uentries++;
 1284                         entries++;
 1285                 }
 1286         }
 1287                 /*
 1288                  * check if the user duplicate mapping has
 1289                  * been removed.
 1290                  */
 1291         if ((pm != pmap_kernel()) && (((uentries > 1) && uwritable) ||
 1292             (uwritable > 1)))
 1293                         pmwc = 1;
 1294 
 1295         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
 1296                 /* check for user uncachable conditions - order is important */
 1297                 if (pm != pmap_kernel() &&
 1298                     (pv->pv_pmap == pm || pv->pv_pmap == pmap_kernel())) {
 1299 
 1300                         if ((uentries > 1 && uwritable) || uwritable > 1) {
 1301 
 1302                                 /* user duplicate mapping */
 1303                                 if (pv->pv_pmap != pmap_kernel())
 1304                                         pv->pv_flags |= PVF_MWC;
 1305 
 1306                                 if (!(pv->pv_flags & PVF_NC)) {
 1307                                         pv->pv_flags |= PVF_NC;
 1308                                         pmap_set_cache_entry(pv, pm, va, -1);
 1309                                 }
 1310                                 continue;
 1311                         } else  /* no longer a duplicate user */
 1312                                 pv->pv_flags &= ~PVF_MWC;
 1313                 }
 1314 
 1315                 /*
 1316                  * check for kernel uncachable conditions
 1317                  * kernel writable or kernel readable with writable user entry
 1318                  */
 1319                 if ((kwritable && (entries || kentries > 1)) ||
 1320                     (kwritable > 1) ||
 1321                     ((kwritable != writable) && kentries &&
 1322                      (pv->pv_pmap == pmap_kernel() ||
 1323                       (pv->pv_flags & PVF_WRITE) ||
 1324                       (pv->pv_flags & PVF_MWC)))) {
 1325 
 1326                         if (!(pv->pv_flags & PVF_NC)) {
 1327                                 pv->pv_flags |= PVF_NC;
 1328                                 pmap_set_cache_entry(pv, pm, va, -1);
 1329                         }
 1330                         continue;
 1331                 }
 1332 
 1333                         /* kernel and user are cachable */
 1334                 if ((pm == pmap_kernel()) && !(pv->pv_flags & PVF_MWC) &&
 1335                     (pv->pv_flags & PVF_NC)) {
 1336 
 1337                         pv->pv_flags &= ~PVF_NC;
 1338                         if (pg->md.pv_memattr != VM_MEMATTR_UNCACHEABLE)
 1339                                 pmap_set_cache_entry(pv, pm, va, 1);
 1340                         continue;
 1341                 }
 1342                         /* user is no longer sharable and writable */
 1343                 if (pm != pmap_kernel() &&
 1344                     (pv->pv_pmap == pm || pv->pv_pmap == pmap_kernel()) &&
 1345                     !pmwc && (pv->pv_flags & PVF_NC)) {
 1346 
 1347                         pv->pv_flags &= ~(PVF_NC | PVF_MWC);
 1348                         if (pg->md.pv_memattr != VM_MEMATTR_UNCACHEABLE)
 1349                                 pmap_set_cache_entry(pv, pm, va, 1);
 1350                 }
 1351         }
 1352 
 1353         if ((kwritable == 0) && (writable == 0)) {
 1354                 pg->md.pvh_attrs &= ~PVF_MOD;
 1355                 vm_page_aflag_clear(pg, PGA_WRITEABLE);
 1356                 return;
 1357         }
 1358 }
 1359 
 1360 /*
 1361  * Modify pte bits for all ptes corresponding to the given physical address.
 1362  * We use `maskbits' rather than `clearbits' because we're always passing
 1363  * constants and the latter would require an extra inversion at run-time.
 1364  */
 1365 static int
 1366 pmap_clearbit(struct vm_page *pg, u_int maskbits)
 1367 {
 1368         struct l2_bucket *l2b;
 1369         struct pv_entry *pv;
 1370         pt_entry_t *ptep, npte, opte;
 1371         pmap_t pm;
 1372         vm_offset_t va;
 1373         u_int oflags;
 1374         int count = 0;
 1375 
 1376         rw_wlock(&pvh_global_lock);
 1377 
 1378         if (maskbits & PVF_WRITE)
 1379                 maskbits |= PVF_MOD;
 1380         /*
 1381          * Clear saved attributes (modify, reference)
 1382          */
 1383         pg->md.pvh_attrs &= ~(maskbits & (PVF_MOD | PVF_REF));
 1384 
 1385         if (TAILQ_EMPTY(&pg->md.pv_list)) {
 1386                 rw_wunlock(&pvh_global_lock);
 1387                 return (0);
 1388         }
 1389 
 1390         /*
 1391          * Loop over all current mappings setting/clearing as appropos
 1392          */
 1393         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list) {
 1394                 va = pv->pv_va;
 1395                 pm = pv->pv_pmap;
 1396                 oflags = pv->pv_flags;
 1397 
 1398                 if (!(oflags & maskbits)) {
 1399                         if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_NC)) {
 1400                                 if (pg->md.pv_memattr != 
 1401                                     VM_MEMATTR_UNCACHEABLE) {
 1402                                         PMAP_LOCK(pm);
 1403                                         l2b = pmap_get_l2_bucket(pm, va);
 1404                                         ptep = &l2b->l2b_kva[l2pte_index(va)];
 1405                                         *ptep |= pte_l2_s_cache_mode;
 1406                                         PTE_SYNC(ptep);
 1407                                         PMAP_UNLOCK(pm);
 1408                                 }
 1409                                 pv->pv_flags &= ~(PVF_NC | PVF_MWC);
 1410                         }
 1411                         continue;
 1412                 }
 1413                 pv->pv_flags &= ~maskbits;
 1414 
 1415                 PMAP_LOCK(pm);
 1416 
 1417                 l2b = pmap_get_l2_bucket(pm, va);
 1418 
 1419                 ptep = &l2b->l2b_kva[l2pte_index(va)];
 1420                 npte = opte = *ptep;
 1421 
 1422                 if (maskbits & (PVF_WRITE|PVF_MOD)) {
 1423                         if ((pv->pv_flags & PVF_NC)) {
 1424                                 /*
 1425                                  * Entry is not cacheable:
 1426                                  *
 1427                                  * Don't turn caching on again if this is a
 1428                                  * modified emulation. This would be
 1429                                  * inconsitent with the settings created by
 1430                                  * pmap_fix_cache(). Otherwise, it's safe
 1431                                  * to re-enable cacheing.
 1432                                  *
 1433                                  * There's no need to call pmap_fix_cache()
 1434                                  * here: all pages are losing their write
 1435                                  * permission.
 1436                                  */
 1437                                 if (maskbits & PVF_WRITE) {
 1438                                         if (pg->md.pv_memattr !=
 1439                                             VM_MEMATTR_UNCACHEABLE)
 1440                                                 npte |= pte_l2_s_cache_mode;
 1441                                         pv->pv_flags &= ~(PVF_NC | PVF_MWC);
 1442                                 }
 1443                         } else
 1444                         if (opte & L2_S_PROT_W) {
 1445                                 vm_page_dirty(pg);
 1446                                 /*
 1447                                  * Entry is writable/cacheable: check if pmap
 1448                                  * is current if it is flush it, otherwise it
 1449                                  * won't be in the cache
 1450                                  */
 1451                                 if (PV_BEEN_EXECD(oflags))
 1452                                         pmap_idcache_wbinv_range(pm, pv->pv_va,
 1453                                             PAGE_SIZE);
 1454                                 else
 1455                                 if (PV_BEEN_REFD(oflags))
 1456                                         pmap_dcache_wb_range(pm, pv->pv_va,
 1457                                             PAGE_SIZE,
 1458                                             (maskbits & PVF_REF) ? TRUE : FALSE,
 1459                                             FALSE);
 1460                         }
 1461 
 1462                         /* make the pte read only */
 1463                         npte &= ~L2_S_PROT_W;
 1464                 }
 1465 
 1466                 if (maskbits & PVF_REF) {
 1467                         if ((pv->pv_flags & PVF_NC) == 0 &&
 1468                             (maskbits & (PVF_WRITE|PVF_MOD)) == 0) {
 1469                                 /*
 1470                                  * Check npte here; we may have already
 1471                                  * done the wbinv above, and the validity
 1472                                  * of the PTE is the same for opte and
 1473                                  * npte.
 1474                                  */
 1475                                 if (npte & L2_S_PROT_W) {
 1476                                         if (PV_BEEN_EXECD(oflags))
 1477                                                 pmap_idcache_wbinv_range(pm,
 1478                                                     pv->pv_va, PAGE_SIZE);
 1479                                         else
 1480                                         if (PV_BEEN_REFD(oflags))
 1481                                                 pmap_dcache_wb_range(pm,
 1482                                                     pv->pv_va, PAGE_SIZE,
 1483                                                     TRUE, FALSE);
 1484                                 } else
 1485                                 if ((npte & L2_TYPE_MASK) != L2_TYPE_INV) {
 1486                                         /* XXXJRT need idcache_inv_range */
 1487                                         if (PV_BEEN_EXECD(oflags))
 1488                                                 pmap_idcache_wbinv_range(pm,
 1489                                                     pv->pv_va, PAGE_SIZE);
 1490                                         else
 1491                                         if (PV_BEEN_REFD(oflags))
 1492                                                 pmap_dcache_wb_range(pm,
 1493                                                     pv->pv_va, PAGE_SIZE,
 1494                                                     TRUE, TRUE);
 1495                                 }
 1496                         }
 1497 
 1498                         /*
 1499                          * Make the PTE invalid so that we will take a
 1500                          * page fault the next time the mapping is
 1501                          * referenced.
 1502                          */
 1503                         npte &= ~L2_TYPE_MASK;
 1504                         npte |= L2_TYPE_INV;
 1505                 }
 1506 
 1507                 if (npte != opte) {
 1508                         count++;
 1509                         *ptep = npte;
 1510                         PTE_SYNC(ptep);
 1511                         /* Flush the TLB entry if a current pmap. */
 1512                         if (PV_BEEN_EXECD(oflags))
 1513                                 pmap_tlb_flushID_SE(pm, pv->pv_va);
 1514                         else
 1515                         if (PV_BEEN_REFD(oflags))
 1516                                 pmap_tlb_flushD_SE(pm, pv->pv_va);
 1517                 }
 1518 
 1519                 PMAP_UNLOCK(pm);
 1520 
 1521         }
 1522 
 1523         if (maskbits & PVF_WRITE)
 1524                 vm_page_aflag_clear(pg, PGA_WRITEABLE);
 1525         rw_wunlock(&pvh_global_lock);
 1526         return (count);
 1527 }
 1528 
 1529 /*
 1530  * main pv_entry manipulation functions:
 1531  *   pmap_enter_pv: enter a mapping onto a vm_page list
 1532  *   pmap_remove_pv: remove a mappiing from a vm_page list
 1533  *
 1534  * NOTE: pmap_enter_pv expects to lock the pvh itself
 1535  *       pmap_remove_pv expects the caller to lock the pvh before calling
 1536  */
 1537 
 1538 /*
 1539  * pmap_enter_pv: enter a mapping onto a vm_page's PV list
 1540  *
 1541  * => caller should hold the proper lock on pvh_global_lock
 1542  * => caller should have pmap locked
 1543  * => we will (someday) gain the lock on the vm_page's PV list
 1544  * => caller should adjust ptp's wire_count before calling
 1545  * => caller should not adjust pmap's wire_count
 1546  */
 1547 static void
 1548 pmap_enter_pv(struct vm_page *pg, struct pv_entry *pve, pmap_t pm,
 1549     vm_offset_t va, u_int flags)
 1550 {
 1551 
 1552         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1553         PMAP_ASSERT_LOCKED(pm);
 1554         if (pg->md.pv_kva != 0) {
 1555                 pve->pv_pmap = kernel_pmap;
 1556                 pve->pv_va = pg->md.pv_kva;
 1557                 pve->pv_flags = PVF_WRITE | PVF_UNMAN;
 1558                 if (pm != kernel_pmap)
 1559                         PMAP_LOCK(kernel_pmap);
 1560                 TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list);
 1561                 TAILQ_INSERT_HEAD(&kernel_pmap->pm_pvlist, pve, pv_plist);
 1562                 if (pm != kernel_pmap)
 1563                         PMAP_UNLOCK(kernel_pmap);
 1564                 pg->md.pv_kva = 0;
 1565                 if ((pve = pmap_get_pv_entry()) == NULL)
 1566                         panic("pmap_kenter_pv: no pv entries");
 1567         }
 1568         pve->pv_pmap = pm;
 1569         pve->pv_va = va;
 1570         pve->pv_flags = flags;
 1571         TAILQ_INSERT_HEAD(&pg->md.pv_list, pve, pv_list);
 1572         TAILQ_INSERT_HEAD(&pm->pm_pvlist, pve, pv_plist);
 1573         pg->md.pvh_attrs |= flags & (PVF_REF | PVF_MOD);
 1574         if (pve->pv_flags & PVF_WIRED)
 1575                 ++pm->pm_stats.wired_count;
 1576         vm_page_aflag_set(pg, PGA_REFERENCED);
 1577 }
 1578 
 1579 /*
 1580  *
 1581  * pmap_find_pv: Find a pv entry
 1582  *
 1583  * => caller should hold lock on vm_page
 1584  */
 1585 static PMAP_INLINE struct pv_entry *
 1586 pmap_find_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
 1587 {
 1588         struct pv_entry *pv;
 1589 
 1590         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1591         TAILQ_FOREACH(pv, &pg->md.pv_list, pv_list)
 1592             if (pm == pv->pv_pmap && va == pv->pv_va)
 1593                     break;
 1594         return (pv);
 1595 }
 1596 
 1597 /*
 1598  * vector_page_setprot:
 1599  *
 1600  *      Manipulate the protection of the vector page.
 1601  */
 1602 void
 1603 vector_page_setprot(int prot)
 1604 {
 1605         struct l2_bucket *l2b;
 1606         pt_entry_t *ptep;
 1607 
 1608         l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page);
 1609 
 1610         ptep = &l2b->l2b_kva[l2pte_index(vector_page)];
 1611 
 1612         *ptep = (*ptep & ~L1_S_PROT_MASK) | L2_S_PROT(PTE_KERNEL, prot);
 1613         PTE_SYNC(ptep);
 1614         cpu_tlb_flushD_SE(vector_page);
 1615         cpu_cpwait();
 1616 }
 1617 
 1618 /*
 1619  * pmap_remove_pv: try to remove a mapping from a pv_list
 1620  *
 1621  * => caller should hold proper lock on pmap_main_lock
 1622  * => pmap should be locked
 1623  * => caller should hold lock on vm_page [so that attrs can be adjusted]
 1624  * => caller should adjust ptp's wire_count and free PTP if needed
 1625  * => caller should NOT adjust pmap's wire_count
 1626  * => we return the removed pve
 1627  */
 1628 
 1629 static void
 1630 pmap_nuke_pv(struct vm_page *pg, pmap_t pm, struct pv_entry *pve)
 1631 {
 1632 
 1633         struct pv_entry *pv;
 1634         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1635         PMAP_ASSERT_LOCKED(pm);
 1636         TAILQ_REMOVE(&pg->md.pv_list, pve, pv_list);
 1637         TAILQ_REMOVE(&pm->pm_pvlist, pve, pv_plist);
 1638         if (pve->pv_flags & PVF_WIRED)
 1639                 --pm->pm_stats.wired_count;
 1640         if (pg->md.pvh_attrs & PVF_MOD)
 1641                 vm_page_dirty(pg);
 1642         if (TAILQ_FIRST(&pg->md.pv_list) == NULL)
 1643                 pg->md.pvh_attrs &= ~PVF_REF;
 1644         else
 1645                 vm_page_aflag_set(pg, PGA_REFERENCED);
 1646         if ((pve->pv_flags & PVF_NC) && ((pm == pmap_kernel()) ||
 1647              (pve->pv_flags & PVF_WRITE) || !(pve->pv_flags & PVF_MWC)))
 1648                 pmap_fix_cache(pg, pm, 0);
 1649         else if (pve->pv_flags & PVF_WRITE) {
 1650                 TAILQ_FOREACH(pve, &pg->md.pv_list, pv_list)
 1651                     if (pve->pv_flags & PVF_WRITE)
 1652                             break;
 1653                 if (!pve) {
 1654                         pg->md.pvh_attrs &= ~PVF_MOD;
 1655                         vm_page_aflag_clear(pg, PGA_WRITEABLE);
 1656                 }
 1657         }
 1658         pv = TAILQ_FIRST(&pg->md.pv_list);
 1659         if (pv != NULL && (pv->pv_flags & PVF_UNMAN) &&
 1660             TAILQ_NEXT(pv, pv_list) == NULL) {
 1661                 pm = kernel_pmap;
 1662                 pg->md.pv_kva = pv->pv_va;
 1663                         /* a recursive pmap_nuke_pv */
 1664                 TAILQ_REMOVE(&pg->md.pv_list, pv, pv_list);
 1665                 TAILQ_REMOVE(&pm->pm_pvlist, pv, pv_plist);
 1666                 if (pv->pv_flags & PVF_WIRED)
 1667                         --pm->pm_stats.wired_count;
 1668                 pg->md.pvh_attrs &= ~PVF_REF;
 1669                 pg->md.pvh_attrs &= ~PVF_MOD;
 1670                 vm_page_aflag_clear(pg, PGA_WRITEABLE);
 1671                 pmap_free_pv_entry(pv);
 1672         }
 1673 }
 1674 
 1675 static struct pv_entry *
 1676 pmap_remove_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va)
 1677 {
 1678         struct pv_entry *pve;
 1679 
 1680         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1681         pve = TAILQ_FIRST(&pg->md.pv_list);
 1682 
 1683         while (pve) {
 1684                 if (pve->pv_pmap == pm && pve->pv_va == va) {   /* match? */
 1685                         pmap_nuke_pv(pg, pm, pve);
 1686                         break;
 1687                 }
 1688                 pve = TAILQ_NEXT(pve, pv_list);
 1689         }
 1690 
 1691         if (pve == NULL && pg->md.pv_kva == va)
 1692                 pg->md.pv_kva = 0;
 1693 
 1694         return(pve);                            /* return removed pve */
 1695 }
 1696 /*
 1697  *
 1698  * pmap_modify_pv: Update pv flags
 1699  *
 1700  * => caller should hold lock on vm_page [so that attrs can be adjusted]
 1701  * => caller should NOT adjust pmap's wire_count
 1702  * => we return the old flags
 1703  *
 1704  * Modify a physical-virtual mapping in the pv table
 1705  */
 1706 static u_int
 1707 pmap_modify_pv(struct vm_page *pg, pmap_t pm, vm_offset_t va,
 1708     u_int clr_mask, u_int set_mask)
 1709 {
 1710         struct pv_entry *npv;
 1711         u_int flags, oflags;
 1712 
 1713         PMAP_ASSERT_LOCKED(pm);
 1714         rw_assert(&pvh_global_lock, RA_WLOCKED);
 1715         if ((npv = pmap_find_pv(pg, pm, va)) == NULL)
 1716                 return (0);
 1717 
 1718         /*
 1719          * There is at least one VA mapping this page.
 1720          */
 1721 
 1722         if (clr_mask & (PVF_REF | PVF_MOD))
 1723                 pg->md.pvh_attrs |= set_mask & (PVF_REF | PVF_MOD);
 1724 
 1725         oflags = npv->pv_flags;
 1726         npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask;
 1727 
 1728         if ((flags ^ oflags) & PVF_WIRED) {
 1729                 if (flags & PVF_WIRED)
 1730                         ++pm->pm_stats.wired_count;
 1731                 else
 1732                         --pm->pm_stats.wired_count;
 1733         }
 1734 
 1735         if ((flags ^ oflags) & PVF_WRITE)
 1736                 pmap_fix_cache(pg, pm, 0);
 1737 
 1738         return (oflags);
 1739 }
 1740 
 1741 /* Function to set the debug level of the pmap code */
 1742 #ifdef PMAP_DEBUG
 1743 void
 1744 pmap_debug(int level)
 1745 {
 1746         pmap_debug_level = level;
 1747         dprintf("pmap_debug: level=%d\n", pmap_debug_level);
 1748 }
 1749 #endif  /* PMAP_DEBUG */
 1750 
 1751 void
 1752 pmap_pinit0(struct pmap *pmap)
 1753 {
 1754         PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap));
 1755 
 1756         bcopy(kernel_pmap, pmap, sizeof(*pmap));
 1757         bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx));
 1758         PMAP_LOCK_INIT(pmap);
 1759 }
 1760 
 1761 /*
 1762  *      Initialize a vm_page's machine-dependent fields.
 1763  */
 1764 void
 1765 pmap_page_init(vm_page_t m)
 1766 {
 1767 
 1768         TAILQ_INIT(&m->md.pv_list);
 1769         m->md.pv_memattr = VM_MEMATTR_DEFAULT;
 1770 }
 1771 
 1772 /*
 1773  *      Initialize the pmap module.
 1774  *      Called by vm_init, to initialize any structures that the pmap
 1775  *      system needs to map virtual memory.
 1776  */
 1777 void
 1778 pmap_init(void)
 1779 {
 1780         int shpgperproc = PMAP_SHPGPERPROC;
 1781 
 1782         l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
 1783             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 1784         l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL,
 1785             NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 1786 
 1787         /*
 1788          * Initialize the PV entry allocator.
 1789          */
 1790         pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
 1791             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 1792         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 1793         pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
 1794         uma_zone_reserve_kva(pvzone, pv_entry_max);
 1795         pv_entry_high_water = 9 * (pv_entry_max / 10);
 1796 
 1797         /*
 1798          * Now it is safe to enable pv_table recording.
 1799          */
 1800         PDEBUG(1, printf("pmap_init: done!\n"));
 1801 }
 1802 
 1803 int
 1804 pmap_fault_fixup(pmap_t pm, vm_offset_t va, vm_prot_t ftype, int user)
 1805 {
 1806         struct l2_dtable *l2;
 1807         struct l2_bucket *l2b;
 1808         pd_entry_t *pl1pd, l1pd;
 1809         pt_entry_t *ptep, pte;
 1810         vm_paddr_t pa;
 1811         u_int l1idx;
 1812         int rv = 0;
 1813 
 1814         l1idx = L1_IDX(va);
 1815         rw_wlock(&pvh_global_lock);
 1816         PMAP_LOCK(pm);
 1817 
 1818         /*
 1819          * If there is no l2_dtable for this address, then the process
 1820          * has no business accessing it.
 1821          *
 1822          * Note: This will catch userland processes trying to access
 1823          * kernel addresses.
 1824          */
 1825         l2 = pm->pm_l2[L2_IDX(l1idx)];
 1826         if (l2 == NULL)
 1827                 goto out;
 1828 
 1829         /*
 1830          * Likewise if there is no L2 descriptor table
 1831          */
 1832         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 1833         if (l2b->l2b_kva == NULL)
 1834                 goto out;
 1835 
 1836         /*
 1837          * Check the PTE itself.
 1838          */
 1839         ptep = &l2b->l2b_kva[l2pte_index(va)];
 1840         pte = *ptep;
 1841         if (pte == 0)
 1842                 goto out;
 1843 
 1844         /*
 1845          * Catch a userland access to the vector page mapped at 0x0
 1846          */
 1847         if (user && (pte & L2_S_PROT_U) == 0)
 1848                 goto out;
 1849         if (va == vector_page)
 1850                 goto out;
 1851 
 1852         pa = l2pte_pa(pte);
 1853 
 1854         if ((ftype & VM_PROT_WRITE) && (pte & L2_S_PROT_W) == 0) {
 1855                 /*
 1856                  * This looks like a good candidate for "page modified"
 1857                  * emulation...
 1858                  */
 1859                 struct pv_entry *pv;
 1860                 struct vm_page *pg;
 1861 
 1862                 /* Extract the physical address of the page */
 1863                 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL) {
 1864                         goto out;
 1865                 }
 1866                 /* Get the current flags for this page. */
 1867 
 1868                 pv = pmap_find_pv(pg, pm, va);
 1869                 if (pv == NULL) {
 1870                         goto out;
 1871                 }
 1872 
 1873                 /*
 1874                  * Do the flags say this page is writable? If not then it
 1875                  * is a genuine write fault. If yes then the write fault is
 1876                  * our fault as we did not reflect the write access in the
 1877                  * PTE. Now we know a write has occurred we can correct this
 1878                  * and also set the modified bit
 1879                  */
 1880                 if ((pv->pv_flags & PVF_WRITE) == 0) {
 1881                         goto out;
 1882                 }
 1883 
 1884                 pg->md.pvh_attrs |= PVF_REF | PVF_MOD;
 1885                 vm_page_dirty(pg);
 1886                 pv->pv_flags |= PVF_REF | PVF_MOD;
 1887 
 1888                 /*
 1889                  * Re-enable write permissions for the page.  No need to call
 1890                  * pmap_fix_cache(), since this is just a
 1891                  * modified-emulation fault, and the PVF_WRITE bit isn't
 1892                  * changing. We've already set the cacheable bits based on
 1893                  * the assumption that we can write to this page.
 1894                  */
 1895                 *ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO | L2_S_PROT_W;
 1896                 PTE_SYNC(ptep);
 1897                 rv = 1;
 1898         } else
 1899         if ((pte & L2_TYPE_MASK) == L2_TYPE_INV) {
 1900                 /*
 1901                  * This looks like a good candidate for "page referenced"
 1902                  * emulation.
 1903                  */
 1904                 struct pv_entry *pv;
 1905                 struct vm_page *pg;
 1906 
 1907                 /* Extract the physical address of the page */
 1908                 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
 1909                         goto out;
 1910                 /* Get the current flags for this page. */
 1911 
 1912                 pv = pmap_find_pv(pg, pm, va);
 1913                 if (pv == NULL)
 1914                         goto out;
 1915 
 1916                 pg->md.pvh_attrs |= PVF_REF;
 1917                 pv->pv_flags |= PVF_REF;
 1918 
 1919 
 1920                 *ptep = (pte & ~L2_TYPE_MASK) | L2_S_PROTO;
 1921                 PTE_SYNC(ptep);
 1922                 rv = 1;
 1923         }
 1924 
 1925         /*
 1926          * We know there is a valid mapping here, so simply
 1927          * fix up the L1 if necessary.
 1928          */
 1929         pl1pd = &pm->pm_l1->l1_kva[l1idx];
 1930         l1pd = l2b->l2b_phys | L1_C_DOM(pm->pm_domain) | L1_C_PROTO;
 1931         if (*pl1pd != l1pd) {
 1932                 *pl1pd = l1pd;
 1933                 PTE_SYNC(pl1pd);
 1934                 rv = 1;
 1935         }
 1936 
 1937 #ifdef DEBUG
 1938         /*
 1939          * If 'rv == 0' at this point, it generally indicates that there is a
 1940          * stale TLB entry for the faulting address. This happens when two or
 1941          * more processes are sharing an L1. Since we don't flush the TLB on
 1942          * a context switch between such processes, we can take domain faults
 1943          * for mappings which exist at the same VA in both processes. EVEN IF
 1944          * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for
 1945          * example.
 1946          *
 1947          * This is extremely likely to happen if pmap_enter() updated the L1
 1948          * entry for a recently entered mapping. In this case, the TLB is
 1949          * flushed for the new mapping, but there may still be TLB entries for
 1950          * other mappings belonging to other processes in the 1MB range
 1951          * covered by the L1 entry.
 1952          *
 1953          * Since 'rv == 0', we know that the L1 already contains the correct
 1954          * value, so the fault must be due to a stale TLB entry.
 1955          *
 1956          * Since we always need to flush the TLB anyway in the case where we
 1957          * fixed up the L1, or frobbed the L2 PTE, we effectively deal with
 1958          * stale TLB entries dynamically.
 1959          *
 1960          * However, the above condition can ONLY happen if the current L1 is
 1961          * being shared. If it happens when the L1 is unshared, it indicates
 1962          * that other parts of the pmap are not doing their job WRT managing
 1963          * the TLB.
 1964          */
 1965         if (rv == 0 && pm->pm_l1->l1_domain_use_count == 1) {
 1966                 printf("fixup: pm %p, va 0x%lx, ftype %d - nothing to do!\n",
 1967                     pm, (u_long)va, ftype);
 1968                 printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n",
 1969                     l2, l2b, ptep, pl1pd);
 1970                 printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n",
 1971                     pte, l1pd, last_fault_code);
 1972 #ifdef DDB
 1973                 Debugger();
 1974 #endif
 1975         }
 1976 #endif
 1977 
 1978         cpu_tlb_flushID_SE(va);
 1979         cpu_cpwait();
 1980 
 1981         rv = 1;
 1982 
 1983 out:
 1984         rw_wunlock(&pvh_global_lock);
 1985         PMAP_UNLOCK(pm);
 1986         return (rv);
 1987 }
 1988 
 1989 void
 1990 pmap_postinit(void)
 1991 {
 1992         struct l2_bucket *l2b;
 1993         struct l1_ttable *l1;
 1994         pd_entry_t *pl1pt;
 1995         pt_entry_t *ptep, pte;
 1996         vm_offset_t va, eva;
 1997         u_int loop, needed;
 1998         
 1999         needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0);
 2000         needed -= 1;
 2001         l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK);
 2002 
 2003         for (loop = 0; loop < needed; loop++, l1++) {
 2004                 /* Allocate a L1 page table */
 2005                 va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0,
 2006                     0xffffffff, L1_TABLE_SIZE, 0);
 2007 
 2008                 if (va == 0)
 2009                         panic("Cannot allocate L1 KVM");
 2010 
 2011                 eva = va + L1_TABLE_SIZE;
 2012                 pl1pt = (pd_entry_t *)va;
 2013                 
 2014                 while (va < eva) {
 2015                                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2016                                 ptep = &l2b->l2b_kva[l2pte_index(va)];
 2017                                 pte = *ptep;
 2018                                 pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
 2019                                 *ptep = pte;
 2020                                 PTE_SYNC(ptep);
 2021                                 cpu_tlb_flushD_SE(va);
 2022                                 
 2023                                 va += PAGE_SIZE;
 2024                 }
 2025                 pmap_init_l1(l1, pl1pt);
 2026         }
 2027 
 2028 
 2029 #ifdef DEBUG
 2030         printf("pmap_postinit: Allocated %d static L1 descriptor tables\n",
 2031             needed);
 2032 #endif
 2033 }
 2034 
 2035 /*
 2036  * This is used to stuff certain critical values into the PCB where they
 2037  * can be accessed quickly from cpu_switch() et al.
 2038  */
 2039 void
 2040 pmap_set_pcb_pagedir(pmap_t pm, struct pcb *pcb)
 2041 {
 2042         struct l2_bucket *l2b;
 2043 
 2044         pcb->pcb_pagedir = pm->pm_l1->l1_physaddr;
 2045         pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) |
 2046             (DOMAIN_CLIENT << (pm->pm_domain * 2));
 2047 
 2048         if (vector_page < KERNBASE) {
 2049                 pcb->pcb_pl1vec = &pm->pm_l1->l1_kva[L1_IDX(vector_page)];
 2050                 l2b = pmap_get_l2_bucket(pm, vector_page);
 2051                 pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO |
 2052                     L1_C_DOM(pm->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL);
 2053         } else
 2054                 pcb->pcb_pl1vec = NULL;
 2055 }
 2056 
 2057 void
 2058 pmap_activate(struct thread *td)
 2059 {
 2060         pmap_t pm;
 2061         struct pcb *pcb;
 2062 
 2063         pm = vmspace_pmap(td->td_proc->p_vmspace);
 2064         pcb = td->td_pcb;
 2065 
 2066         critical_enter();
 2067         pmap_set_pcb_pagedir(pm, pcb);
 2068 
 2069         if (td == curthread) {
 2070                 u_int cur_dacr, cur_ttb;
 2071 
 2072                 __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb));
 2073                 __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr));
 2074 
 2075                 cur_ttb &= ~(L1_TABLE_SIZE - 1);
 2076 
 2077                 if (cur_ttb == (u_int)pcb->pcb_pagedir &&
 2078                     cur_dacr == pcb->pcb_dacr) {
 2079                         /*
 2080                          * No need to switch address spaces.
 2081                          */
 2082                         critical_exit();
 2083                         return;
 2084                 }
 2085 
 2086 
 2087                 /*
 2088                  * We MUST, I repeat, MUST fix up the L1 entry corresponding
 2089                  * to 'vector_page' in the incoming L1 table before switching
 2090                  * to it otherwise subsequent interrupts/exceptions (including
 2091                  * domain faults!) will jump into hyperspace.
 2092                  */
 2093                 if (pcb->pcb_pl1vec) {
 2094 
 2095                         *pcb->pcb_pl1vec = pcb->pcb_l1vec;
 2096                         /*
 2097                          * Don't need to PTE_SYNC() at this point since
 2098                          * cpu_setttb() is about to flush both the cache
 2099                          * and the TLB.
 2100                          */
 2101                 }
 2102 
 2103                 cpu_domains(pcb->pcb_dacr);
 2104                 cpu_setttb(pcb->pcb_pagedir);
 2105         }
 2106         critical_exit();
 2107 }
 2108 
 2109 static int
 2110 pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va)
 2111 {
 2112         pd_entry_t *pdep, pde;
 2113         pt_entry_t *ptep, pte;
 2114         vm_offset_t pa;
 2115         int rv = 0;
 2116 
 2117         /*
 2118          * Make sure the descriptor itself has the correct cache mode
 2119          */
 2120         pdep = &kl1[L1_IDX(va)];
 2121         pde = *pdep;
 2122 
 2123         if (l1pte_section_p(pde)) {
 2124                 if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) {
 2125                         *pdep = (pde & ~L1_S_CACHE_MASK) |
 2126                             pte_l1_s_cache_mode_pt;
 2127                         PTE_SYNC(pdep);
 2128                         cpu_dcache_wbinv_range((vm_offset_t)pdep,
 2129                             sizeof(*pdep));
 2130                         cpu_l2cache_wbinv_range((vm_offset_t)pdep,
 2131                             sizeof(*pdep));
 2132                         rv = 1;
 2133                 }
 2134         } else {
 2135                 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
 2136                 ptep = (pt_entry_t *)kernel_pt_lookup(pa);
 2137                 if (ptep == NULL)
 2138                         panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep);
 2139 
 2140                 ptep = &ptep[l2pte_index(va)];
 2141                 pte = *ptep;
 2142                 if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) {
 2143                         *ptep = (pte & ~L2_S_CACHE_MASK) |
 2144                             pte_l2_s_cache_mode_pt;
 2145                         PTE_SYNC(ptep);
 2146                         cpu_dcache_wbinv_range((vm_offset_t)ptep,
 2147                             sizeof(*ptep));
 2148                         cpu_l2cache_wbinv_range((vm_offset_t)ptep,
 2149                             sizeof(*ptep));
 2150                         rv = 1;
 2151                 }
 2152         }
 2153 
 2154         return (rv);
 2155 }
 2156 
 2157 static void
 2158 pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap,
 2159     pt_entry_t **ptep)
 2160 {
 2161         vm_offset_t va = *availp;
 2162         struct l2_bucket *l2b;
 2163 
 2164         if (ptep) {
 2165                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2166                 if (l2b == NULL)
 2167                         panic("pmap_alloc_specials: no l2b for 0x%x", va);
 2168 
 2169                 *ptep = &l2b->l2b_kva[l2pte_index(va)];
 2170         }
 2171 
 2172         *vap = va;
 2173         *availp = va + (PAGE_SIZE * pages);
 2174 }
 2175 
 2176 /*
 2177  *      Bootstrap the system enough to run with virtual memory.
 2178  *
 2179  *      On the arm this is called after mapping has already been enabled
 2180  *      and just syncs the pmap module with what has already been done.
 2181  *      [We can't call it easily with mapping off since the kernel is not
 2182  *      mapped with PA == VA, hence we would have to relocate every address
 2183  *      from the linked base (virtual) address "KERNBASE" to the actual
 2184  *      (physical) address starting relative to 0]
 2185  */
 2186 #define PMAP_STATIC_L2_SIZE 16
 2187 void
 2188 pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt)
 2189 {
 2190         static struct l1_ttable static_l1;
 2191         static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE];
 2192         struct l1_ttable *l1 = &static_l1;
 2193         struct l2_dtable *l2;
 2194         struct l2_bucket *l2b;
 2195         pd_entry_t pde;
 2196         pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
 2197         pt_entry_t *ptep;
 2198         vm_paddr_t pa;
 2199         vm_offset_t va;
 2200         vm_size_t size;
 2201         int l1idx, l2idx, l2next = 0;
 2202 
 2203         PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n",
 2204             firstaddr, vm_max_kernel_address));
 2205         
 2206         virtual_avail = firstaddr;
 2207         kernel_pmap->pm_l1 = l1;
 2208         kernel_l1pa = l1pt->pv_pa;
 2209         
 2210         /*
 2211          * Scan the L1 translation table created by initarm() and create
 2212          * the required metadata for all valid mappings found in it.
 2213          */
 2214         for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) {
 2215                 pde = kernel_l1pt[l1idx];
 2216 
 2217                 /*
 2218                  * We're only interested in Coarse mappings.
 2219                  * pmap_extract() can deal with section mappings without
 2220                  * recourse to checking L2 metadata.
 2221                  */
 2222                 if ((pde & L1_TYPE_MASK) != L1_TYPE_C)
 2223                         continue;
 2224 
 2225                 /*
 2226                  * Lookup the KVA of this L2 descriptor table
 2227                  */
 2228                 pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK);
 2229                 ptep = (pt_entry_t *)kernel_pt_lookup(pa);
 2230                 
 2231                 if (ptep == NULL) {
 2232                         panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx",
 2233                             (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa);
 2234                 }
 2235 
 2236                 /*
 2237                  * Fetch the associated L2 metadata structure.
 2238                  * Allocate a new one if necessary.
 2239                  */
 2240                 if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) {
 2241                         if (l2next == PMAP_STATIC_L2_SIZE)
 2242                                 panic("pmap_bootstrap: out of static L2s");
 2243                         kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 =
 2244                             &static_l2[l2next++];
 2245                 }
 2246 
 2247                 /*
 2248                  * One more L1 slot tracked...
 2249                  */
 2250                 l2->l2_occupancy++;
 2251 
 2252                 /*
 2253                  * Fill in the details of the L2 descriptor in the
 2254                  * appropriate bucket.
 2255                  */
 2256                 l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 2257                 l2b->l2b_kva = ptep;
 2258                 l2b->l2b_phys = pa;
 2259                 l2b->l2b_l1idx = l1idx;
 2260 
 2261                 /*
 2262                  * Establish an initial occupancy count for this descriptor
 2263                  */
 2264                 for (l2idx = 0;
 2265                     l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t));
 2266                     l2idx++) {
 2267                         if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) {
 2268                                 l2b->l2b_occupancy++;
 2269                         }
 2270                 }
 2271 
 2272                 /*
 2273                  * Make sure the descriptor itself has the correct cache mode.
 2274                  * If not, fix it, but whine about the problem. Port-meisters
 2275                  * should consider this a clue to fix up their initarm()
 2276                  * function. :)
 2277                  */
 2278                 if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) {
 2279                         printf("pmap_bootstrap: WARNING! wrong cache mode for "
 2280                             "L2 pte @ %p\n", ptep);
 2281                 }
 2282         }
 2283 
 2284         
 2285         /*
 2286          * Ensure the primary (kernel) L1 has the correct cache mode for
 2287          * a page table. Bitch if it is not correctly set.
 2288          */
 2289         for (va = (vm_offset_t)kernel_l1pt;
 2290             va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) {
 2291                 if (pmap_set_pt_cache_mode(kernel_l1pt, va))
 2292                         printf("pmap_bootstrap: WARNING! wrong cache mode for "
 2293                             "primary L1 @ 0x%x\n", va);
 2294         }
 2295 
 2296         cpu_dcache_wbinv_all();
 2297         cpu_l2cache_wbinv_all();
 2298         cpu_tlb_flushID();
 2299         cpu_cpwait();
 2300 
 2301         PMAP_LOCK_INIT(kernel_pmap);
 2302         CPU_FILL(&kernel_pmap->pm_active);
 2303         kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
 2304         TAILQ_INIT(&kernel_pmap->pm_pvlist);
 2305 
 2306         /*
 2307          * Initialize the global pv list lock.
 2308          */
 2309         rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE);
 2310         
 2311         /*
 2312          * Reserve some special page table entries/VA space for temporary
 2313          * mapping of pages.
 2314          */
 2315 #define SYSMAP(c, p, v, n)                                              \
 2316     v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 2317 
 2318         pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
 2319         pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
 2320         pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
 2321         pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
 2322         size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) /
 2323             L1_S_SIZE;
 2324         pmap_alloc_specials(&virtual_avail,
 2325             round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE,
 2326             &pmap_kernel_l2ptp_kva, NULL);
 2327         
 2328         size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE;
 2329         pmap_alloc_specials(&virtual_avail,
 2330             round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE,
 2331             &pmap_kernel_l2dtable_kva, NULL);
 2332 
 2333         pmap_alloc_specials(&virtual_avail,
 2334             1, (vm_offset_t*)&_tmppt, NULL);
 2335         pmap_alloc_specials(&virtual_avail,
 2336             MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL);
 2337         SLIST_INIT(&l1_list);
 2338         TAILQ_INIT(&l1_lru_list);
 2339         mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF);
 2340         pmap_init_l1(l1, kernel_l1pt);
 2341         cpu_dcache_wbinv_all();
 2342         cpu_l2cache_wbinv_all();
 2343 
 2344         virtual_avail = round_page(virtual_avail);
 2345         virtual_end = vm_max_kernel_address;
 2346         kernel_vm_end = pmap_curmaxkvaddr;
 2347         mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
 2348 
 2349         pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
 2350 }
 2351 
 2352 /***************************************************
 2353  * Pmap allocation/deallocation routines.
 2354  ***************************************************/
 2355 
 2356 /*
 2357  * Release any resources held by the given physical map.
 2358  * Called when a pmap initialized by pmap_pinit is being released.
 2359  * Should only be called if the map contains no valid mappings.
 2360  */
 2361 void
 2362 pmap_release(pmap_t pmap)
 2363 {
 2364         struct pcb *pcb;
 2365         
 2366         pmap_idcache_wbinv_all(pmap);
 2367         cpu_l2cache_wbinv_all();
 2368         pmap_tlb_flushID(pmap);
 2369         cpu_cpwait();
 2370         if (vector_page < KERNBASE) {
 2371                 struct pcb *curpcb = PCPU_GET(curpcb);
 2372                 pcb = thread0.td_pcb;
 2373                 if (pmap_is_current(pmap)) {
 2374                         /*
 2375                          * Frob the L1 entry corresponding to the vector
 2376                          * page so that it contains the kernel pmap's domain
 2377                          * number. This will ensure pmap_remove() does not
 2378                          * pull the current vector page out from under us.
 2379                          */
 2380                         critical_enter();
 2381                         *pcb->pcb_pl1vec = pcb->pcb_l1vec;
 2382                         cpu_domains(pcb->pcb_dacr);
 2383                         cpu_setttb(pcb->pcb_pagedir);
 2384                         critical_exit();
 2385                 }
 2386                 pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE);
 2387                 /*
 2388                  * Make sure cpu_switch(), et al, DTRT. This is safe to do
 2389                  * since this process has no remaining mappings of its own.
 2390                  */
 2391                 curpcb->pcb_pl1vec = pcb->pcb_pl1vec;
 2392                 curpcb->pcb_l1vec = pcb->pcb_l1vec;
 2393                 curpcb->pcb_dacr = pcb->pcb_dacr;
 2394                 curpcb->pcb_pagedir = pcb->pcb_pagedir;
 2395 
 2396         }
 2397         pmap_free_l1(pmap);
 2398         
 2399         dprintf("pmap_release()\n");
 2400 }
 2401 
 2402 
 2403 
 2404 /*
 2405  * Helper function for pmap_grow_l2_bucket()
 2406  */
 2407 static __inline int
 2408 pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap)
 2409 {
 2410         struct l2_bucket *l2b;
 2411         pt_entry_t *ptep;
 2412         vm_paddr_t pa;
 2413         struct vm_page *pg;
 2414         
 2415         pg = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
 2416         if (pg == NULL)
 2417                 return (1);
 2418         pa = VM_PAGE_TO_PHYS(pg);
 2419 
 2420         if (pap)
 2421                 *pap = pa;
 2422 
 2423         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2424 
 2425         ptep = &l2b->l2b_kva[l2pte_index(va)];
 2426         *ptep = L2_S_PROTO | pa | cache_mode |
 2427             L2_S_PROT(PTE_KERNEL, VM_PROT_READ | VM_PROT_WRITE);
 2428         PTE_SYNC(ptep);
 2429         return (0);
 2430 }
 2431 
 2432 /*
 2433  * This is the same as pmap_alloc_l2_bucket(), except that it is only
 2434  * used by pmap_growkernel().
 2435  */
 2436 static __inline struct l2_bucket *
 2437 pmap_grow_l2_bucket(pmap_t pm, vm_offset_t va)
 2438 {
 2439         struct l2_dtable *l2;
 2440         struct l2_bucket *l2b;
 2441         struct l1_ttable *l1;
 2442         pd_entry_t *pl1pd;
 2443         u_short l1idx;
 2444         vm_offset_t nva;
 2445 
 2446         l1idx = L1_IDX(va);
 2447 
 2448         if ((l2 = pm->pm_l2[L2_IDX(l1idx)]) == NULL) {
 2449                 /*
 2450                  * No mapping at this address, as there is
 2451                  * no entry in the L1 table.
 2452                  * Need to allocate a new l2_dtable.
 2453                  */
 2454                 nva = pmap_kernel_l2dtable_kva;
 2455                 if ((nva & PAGE_MASK) == 0) {
 2456                         /*
 2457                          * Need to allocate a backing page
 2458                          */
 2459                         if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
 2460                                 return (NULL);
 2461                 }
 2462 
 2463                 l2 = (struct l2_dtable *)nva;
 2464                 nva += sizeof(struct l2_dtable);
 2465 
 2466                 if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva &
 2467                     PAGE_MASK)) {
 2468                         /*
 2469                          * The new l2_dtable straddles a page boundary.
 2470                          * Map in another page to cover it.
 2471                          */
 2472                         if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL))
 2473                                 return (NULL);
 2474                 }
 2475 
 2476                 pmap_kernel_l2dtable_kva = nva;
 2477 
 2478                 /*
 2479                  * Link it into the parent pmap
 2480                  */
 2481                 pm->pm_l2[L2_IDX(l1idx)] = l2;
 2482                 memset(l2, 0, sizeof(*l2));
 2483         }
 2484 
 2485         l2b = &l2->l2_bucket[L2_BUCKET(l1idx)];
 2486 
 2487         /*
 2488          * Fetch pointer to the L2 page table associated with the address.
 2489          */
 2490         if (l2b->l2b_kva == NULL) {
 2491                 pt_entry_t *ptep;
 2492 
 2493                 /*
 2494                  * No L2 page table has been allocated. Chances are, this
 2495                  * is because we just allocated the l2_dtable, above.
 2496                  */
 2497                 nva = pmap_kernel_l2ptp_kva;
 2498                 ptep = (pt_entry_t *)nva;
 2499                 if ((nva & PAGE_MASK) == 0) {
 2500                         /*
 2501                          * Need to allocate a backing page
 2502                          */
 2503                         if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt,
 2504                             &pmap_kernel_l2ptp_phys))
 2505                                 return (NULL);
 2506                         PTE_SYNC_RANGE(ptep, PAGE_SIZE / sizeof(pt_entry_t));
 2507                 }
 2508                 memset(ptep, 0, L2_TABLE_SIZE_REAL);
 2509                 l2->l2_occupancy++;
 2510                 l2b->l2b_kva = ptep;
 2511                 l2b->l2b_l1idx = l1idx;
 2512                 l2b->l2b_phys = pmap_kernel_l2ptp_phys;
 2513 
 2514                 pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL;
 2515                 pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL;
 2516         }
 2517 
 2518         /* Distribute new L1 entry to all other L1s */
 2519         SLIST_FOREACH(l1, &l1_list, l1_link) {
 2520                         pl1pd = &l1->l1_kva[L1_IDX(va)];
 2521                         *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) |
 2522                             L1_C_PROTO;
 2523                         PTE_SYNC(pl1pd);
 2524         }
 2525 
 2526         return (l2b);
 2527 }
 2528 
 2529 
 2530 /*
 2531  * grow the number of kernel page table entries, if needed
 2532  */
 2533 void
 2534 pmap_growkernel(vm_offset_t addr)
 2535 {
 2536         pmap_t kpm = pmap_kernel();
 2537 
 2538         if (addr <= pmap_curmaxkvaddr)
 2539                 return;         /* we are OK */
 2540 
 2541         /*
 2542          * whoops!   we need to add kernel PTPs
 2543          */
 2544 
 2545         /* Map 1MB at a time */
 2546         for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE)
 2547                 pmap_grow_l2_bucket(kpm, pmap_curmaxkvaddr);
 2548 
 2549         /*
 2550          * flush out the cache, expensive but growkernel will happen so
 2551          * rarely
 2552          */
 2553         cpu_dcache_wbinv_all();
 2554         cpu_l2cache_wbinv_all();
 2555         cpu_tlb_flushD();
 2556         cpu_cpwait();
 2557         kernel_vm_end = pmap_curmaxkvaddr;
 2558 }
 2559 
 2560 
 2561 /*
 2562  * Remove all pages from specified address space
 2563  * this aids process exit speeds.  Also, this code
 2564  * is special cased for current process only, but
 2565  * can have the more generic (and slightly slower)
 2566  * mode enabled.  This is much faster than pmap_remove
 2567  * in the case of running down an entire address space.
 2568  */
 2569 void
 2570 pmap_remove_pages(pmap_t pmap)
 2571 {
 2572         struct pv_entry *pv, *npv;
 2573         struct l2_bucket *l2b = NULL;
 2574         vm_page_t m;
 2575         pt_entry_t *pt;
 2576         
 2577         rw_wlock(&pvh_global_lock);
 2578         PMAP_LOCK(pmap);
 2579         cpu_idcache_wbinv_all();
 2580         cpu_l2cache_wbinv_all();
 2581         for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 2582                 if (pv->pv_flags & PVF_WIRED || pv->pv_flags & PVF_UNMAN) {
 2583                         /* Cannot remove wired or unmanaged pages now. */
 2584                         npv = TAILQ_NEXT(pv, pv_plist);
 2585                         continue;
 2586                 }
 2587                 pmap->pm_stats.resident_count--;
 2588                 l2b = pmap_get_l2_bucket(pmap, pv->pv_va);
 2589                 KASSERT(l2b != NULL, ("No L2 bucket in pmap_remove_pages"));
 2590                 pt = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
 2591                 m = PHYS_TO_VM_PAGE(*pt & L2_ADDR_MASK);
 2592                 KASSERT((vm_offset_t)m >= KERNBASE, ("Trying to access non-existent page va %x pte %x", pv->pv_va, *pt));
 2593                 *pt = 0;
 2594                 PTE_SYNC(pt);
 2595                 npv = TAILQ_NEXT(pv, pv_plist);
 2596                 pmap_nuke_pv(m, pmap, pv);
 2597                 if (TAILQ_EMPTY(&m->md.pv_list))
 2598                         vm_page_aflag_clear(m, PGA_WRITEABLE);
 2599                 pmap_free_pv_entry(pv);
 2600                 pmap_free_l2_bucket(pmap, l2b, 1);
 2601         }
 2602         rw_wunlock(&pvh_global_lock);
 2603         cpu_tlb_flushID();
 2604         cpu_cpwait();
 2605         PMAP_UNLOCK(pmap);
 2606 }
 2607 
 2608 
 2609 /***************************************************
 2610  * Low level mapping routines.....
 2611  ***************************************************/
 2612 
 2613 #ifdef ARM_HAVE_SUPERSECTIONS
 2614 /* Map a super section into the KVA. */
 2615 
 2616 void
 2617 pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags)
 2618 {
 2619         pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) |
 2620             (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL,
 2621             VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
 2622         struct l1_ttable *l1;   
 2623         vm_offset_t va0, va_end;
 2624 
 2625         KASSERT(((va | pa) & L1_SUP_OFFSET) == 0,
 2626             ("Not a valid super section mapping"));
 2627         if (flags & SECTION_CACHE)
 2628                 pd |= pte_l1_s_cache_mode;
 2629         else if (flags & SECTION_PT)
 2630                 pd |= pte_l1_s_cache_mode_pt;
 2631         va0 = va & L1_SUP_FRAME;
 2632         va_end = va + L1_SUP_SIZE;
 2633         SLIST_FOREACH(l1, &l1_list, l1_link) {
 2634                 va = va0;
 2635                 for (; va < va_end; va += L1_S_SIZE) {
 2636                         l1->l1_kva[L1_IDX(va)] = pd;
 2637                         PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
 2638                 }
 2639         }
 2640 }
 2641 #endif
 2642 
 2643 /* Map a section into the KVA. */
 2644 
 2645 void
 2646 pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags)
 2647 {
 2648         pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL,
 2649             VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL);
 2650         struct l1_ttable *l1;
 2651 
 2652         KASSERT(((va | pa) & L1_S_OFFSET) == 0,
 2653             ("Not a valid section mapping"));
 2654         if (flags & SECTION_CACHE)
 2655                 pd |= pte_l1_s_cache_mode;
 2656         else if (flags & SECTION_PT)
 2657                 pd |= pte_l1_s_cache_mode_pt;
 2658         SLIST_FOREACH(l1, &l1_list, l1_link) {
 2659                 l1->l1_kva[L1_IDX(va)] = pd;
 2660                 PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
 2661         }
 2662 }
 2663 
 2664 /*
 2665  * Make a temporary mapping for a physical address.  This is only intended
 2666  * to be used for panic dumps.
 2667  */
 2668 void *
 2669 pmap_kenter_temporary(vm_paddr_t pa, int i)
 2670 {
 2671         vm_offset_t va;
 2672 
 2673         va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 2674         pmap_kenter(va, pa);
 2675         return ((void *)crashdumpmap);
 2676 }
 2677 
 2678 /*
 2679  * add a wired page to the kva
 2680  * note that in order for the mapping to take effect -- you
 2681  * should do a invltlb after doing the pmap_kenter...
 2682  */
 2683 static PMAP_INLINE void
 2684 pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags)
 2685 {
 2686         struct l2_bucket *l2b;
 2687         pt_entry_t *pte;
 2688         pt_entry_t opte;
 2689         struct pv_entry *pve;
 2690         vm_page_t m;
 2691 
 2692         PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n",
 2693             (uint32_t) va, (uint32_t) pa));
 2694 
 2695 
 2696         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2697         if (l2b == NULL)
 2698                 l2b = pmap_grow_l2_bucket(pmap_kernel(), va);
 2699         KASSERT(l2b != NULL, ("No L2 Bucket"));
 2700         pte = &l2b->l2b_kva[l2pte_index(va)];
 2701         opte = *pte;
 2702         PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
 2703             (uint32_t) pte, opte, *pte));
 2704         if (l2pte_valid(opte)) {
 2705                 pmap_kremove(va);
 2706         } else {
 2707                 if (opte == 0)
 2708                         l2b->l2b_occupancy++;
 2709         }
 2710         *pte = L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL,
 2711             VM_PROT_READ | VM_PROT_WRITE);
 2712         if (flags & KENTER_CACHE)
 2713                 *pte |= pte_l2_s_cache_mode;
 2714         if (flags & KENTER_USER)
 2715                 *pte |= L2_S_PROT_U;
 2716         PTE_SYNC(pte);
 2717 
 2718         /*
 2719          * A kernel mapping may not be the page's only mapping, so create a PV
 2720          * entry to ensure proper caching.
 2721          *
 2722          * The existence test for the pvzone is used to delay the recording of
 2723          * kernel mappings until the VM system is fully initialized.
 2724          *
 2725          * This expects the physical memory to have a vm_page_array entry.
 2726          */
 2727         if (pvzone != NULL && (m = vm_phys_paddr_to_vm_page(pa)) != NULL) {
 2728                 rw_wlock(&pvh_global_lock);
 2729                 if (!TAILQ_EMPTY(&m->md.pv_list) || m->md.pv_kva != 0) {
 2730                         if ((pve = pmap_get_pv_entry()) == NULL)
 2731                                 panic("pmap_kenter_internal: no pv entries");   
 2732                         PMAP_LOCK(pmap_kernel());
 2733                         pmap_enter_pv(m, pve, pmap_kernel(), va,
 2734                             PVF_WRITE | PVF_UNMAN);
 2735                         pmap_fix_cache(m, pmap_kernel(), va);
 2736                         PMAP_UNLOCK(pmap_kernel());
 2737                 } else {
 2738                         m->md.pv_kva = va;
 2739                 }
 2740                 rw_wunlock(&pvh_global_lock);
 2741         }
 2742 }
 2743 
 2744 void
 2745 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 2746 {
 2747         pmap_kenter_internal(va, pa, KENTER_CACHE);
 2748 }
 2749 
 2750 void
 2751 pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa)
 2752 {
 2753 
 2754         pmap_kenter_internal(va, pa, 0);
 2755 }
 2756 
 2757 void
 2758 pmap_kenter_device(vm_offset_t va, vm_paddr_t pa)
 2759 {
 2760 
 2761         /*
 2762          * XXX - Need a way for kenter_internal to handle PTE_DEVICE mapping as
 2763          * a potentially different thing than PTE_NOCACHE.
 2764          */
 2765         pmap_kenter_internal(va, pa, 0);
 2766 }
 2767 
 2768 void
 2769 pmap_kenter_user(vm_offset_t va, vm_paddr_t pa)
 2770 {
 2771 
 2772         pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER);
 2773         /*
 2774          * Call pmap_fault_fixup now, to make sure we'll have no exception
 2775          * at the first use of the new address, or bad things will happen,
 2776          * as we use one of these addresses in the exception handlers.
 2777          */
 2778         pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1);
 2779 }
 2780 
 2781 vm_paddr_t
 2782 pmap_kextract(vm_offset_t va)
 2783 {
 2784 
 2785         return (pmap_extract_locked(kernel_pmap, va));
 2786 }
 2787 
 2788 /*
 2789  * remove a page from the kernel pagetables
 2790  */
 2791 void
 2792 pmap_kremove(vm_offset_t va)
 2793 {
 2794         struct l2_bucket *l2b;
 2795         pt_entry_t *pte, opte;
 2796         struct pv_entry *pve;
 2797         vm_page_t m;
 2798         vm_offset_t pa;
 2799                 
 2800         l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 2801         if (!l2b)
 2802                 return;
 2803         KASSERT(l2b != NULL, ("No L2 Bucket"));
 2804         pte = &l2b->l2b_kva[l2pte_index(va)];
 2805         opte = *pte;
 2806         if (l2pte_valid(opte)) {
 2807                         /* pa = vtophs(va) taken from pmap_extract() */
 2808                 switch (opte & L2_TYPE_MASK) {
 2809                 case L2_TYPE_L:
 2810                         pa = (opte & L2_L_FRAME) | (va & L2_L_OFFSET);
 2811                         break;
 2812                 default:
 2813                         pa = (opte & L2_S_FRAME) | (va & L2_S_OFFSET);
 2814                         break;
 2815                 }
 2816                         /* note: should never have to remove an allocation
 2817                          * before the pvzone is initialized.
 2818                          */
 2819                 rw_wlock(&pvh_global_lock);
 2820                 PMAP_LOCK(pmap_kernel());
 2821                 if (pvzone != NULL && (m = vm_phys_paddr_to_vm_page(pa)) &&
 2822                     (pve = pmap_remove_pv(m, pmap_kernel(), va)))
 2823                         pmap_free_pv_entry(pve);
 2824                 PMAP_UNLOCK(pmap_kernel());
 2825                 rw_wunlock(&pvh_global_lock);
 2826                 va = va & ~PAGE_MASK;
 2827                 cpu_dcache_wbinv_range(va, PAGE_SIZE);
 2828                 cpu_l2cache_wbinv_range(va, PAGE_SIZE);
 2829                 cpu_tlb_flushD_SE(va);
 2830                 cpu_cpwait();
 2831                 *pte = 0;
 2832         }
 2833 }
 2834 
 2835 
 2836 /*
 2837  *      Used to map a range of physical addresses into kernel
 2838  *      virtual address space.
 2839  *
 2840  *      The value passed in '*virt' is a suggested virtual address for
 2841  *      the mapping. Architectures which can support a direct-mapped
 2842  *      physical to virtual region can return the appropriate address
 2843  *      within that region, leaving '*virt' unchanged. Other
 2844  *      architectures should map the pages starting at '*virt' and
 2845  *      update '*virt' with the first usable address after the mapped
 2846  *      region.
 2847  */
 2848 vm_offset_t
 2849 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
 2850 {
 2851         vm_offset_t sva = *virt;
 2852         vm_offset_t va = sva;
 2853 
 2854         PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, "
 2855             "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end,
 2856             prot));
 2857 
 2858         while (start < end) {
 2859                 pmap_kenter(va, start);
 2860                 va += PAGE_SIZE;
 2861                 start += PAGE_SIZE;
 2862         }
 2863         *virt = va;
 2864         return (sva);
 2865 }
 2866 
 2867 static void
 2868 pmap_wb_page(vm_page_t m)
 2869 {
 2870         struct pv_entry *pv;
 2871 
 2872         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
 2873             pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, FALSE,
 2874                 (pv->pv_flags & PVF_WRITE) == 0);
 2875 }
 2876 
 2877 static void
 2878 pmap_inv_page(vm_page_t m)
 2879 {
 2880         struct pv_entry *pv;
 2881 
 2882         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
 2883             pmap_dcache_wb_range(pv->pv_pmap, pv->pv_va, PAGE_SIZE, TRUE, TRUE);
 2884 }
 2885 /*
 2886  * Add a list of wired pages to the kva
 2887  * this routine is only used for temporary
 2888  * kernel mappings that do not need to have
 2889  * page modification or references recorded.
 2890  * Note that old mappings are simply written
 2891  * over.  The page *must* be wired.
 2892  */
 2893 void
 2894 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
 2895 {
 2896         int i;
 2897 
 2898         for (i = 0; i < count; i++) {
 2899                 pmap_wb_page(m[i]);
 2900                 pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]),
 2901                     KENTER_CACHE);
 2902                 va += PAGE_SIZE;
 2903         }
 2904 }
 2905 
 2906 
 2907 /*
 2908  * this routine jerks page mappings from the
 2909  * kernel -- it is meant only for temporary mappings.
 2910  */
 2911 void
 2912 pmap_qremove(vm_offset_t va, int count)
 2913 {
 2914         vm_paddr_t pa;
 2915         int i;
 2916 
 2917         for (i = 0; i < count; i++) {
 2918                 pa = vtophys(va);
 2919                 if (pa) {
 2920                         pmap_inv_page(PHYS_TO_VM_PAGE(pa));
 2921                         pmap_kremove(va);
 2922                 }
 2923                 va += PAGE_SIZE;
 2924         }
 2925 }
 2926 
 2927 
 2928 /*
 2929  * pmap_object_init_pt preloads the ptes for a given object
 2930  * into the specified pmap.  This eliminates the blast of soft
 2931  * faults on process startup and immediately after an mmap.
 2932  */
 2933 void
 2934 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 2935     vm_pindex_t pindex, vm_size_t size)
 2936 {
 2937 
 2938         VM_OBJECT_ASSERT_WLOCKED(object);
 2939         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 2940             ("pmap_object_init_pt: non-device object"));
 2941 }
 2942 
 2943 
 2944 /*
 2945  *      pmap_is_prefaultable:
 2946  *
 2947  *      Return whether or not the specified virtual address is elgible
 2948  *      for prefault.
 2949  */
 2950 boolean_t
 2951 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 2952 {
 2953         pd_entry_t *pde;
 2954         pt_entry_t *pte;
 2955 
 2956         if (!pmap_get_pde_pte(pmap, addr, &pde, &pte))
 2957                 return (FALSE);
 2958         KASSERT(pte != NULL, ("Valid mapping but no pte ?"));
 2959         if (*pte == 0)
 2960                 return (TRUE);
 2961         return (FALSE);
 2962 }
 2963 
 2964 /*
 2965  * Fetch pointers to the PDE/PTE for the given pmap/VA pair.
 2966  * Returns TRUE if the mapping exists, else FALSE.
 2967  *
 2968  * NOTE: This function is only used by a couple of arm-specific modules.
 2969  * It is not safe to take any pmap locks here, since we could be right
 2970  * in the middle of debugging the pmap anyway...
 2971  *
 2972  * It is possible for this routine to return FALSE even though a valid
 2973  * mapping does exist. This is because we don't lock, so the metadata
 2974  * state may be inconsistent.
 2975  *
 2976  * NOTE: We can return a NULL *ptp in the case where the L1 pde is
 2977  * a "section" mapping.
 2978  */
 2979 boolean_t
 2980 pmap_get_pde_pte(pmap_t pm, vm_offset_t va, pd_entry_t **pdp, pt_entry_t **ptp)
 2981 {
 2982         struct l2_dtable *l2;
 2983         pd_entry_t *pl1pd, l1pd;
 2984         pt_entry_t *ptep;
 2985         u_short l1idx;
 2986 
 2987         if (pm->pm_l1 == NULL)
 2988                 return (FALSE);
 2989 
 2990         l1idx = L1_IDX(va);
 2991         *pdp = pl1pd = &pm->pm_l1->l1_kva[l1idx];
 2992         l1pd = *pl1pd;
 2993 
 2994         if (l1pte_section_p(l1pd)) {
 2995                 *ptp = NULL;
 2996                 return (TRUE);
 2997         }
 2998 
 2999         if (pm->pm_l2 == NULL)
 3000                 return (FALSE);
 3001 
 3002         l2 = pm->pm_l2[L2_IDX(l1idx)];
 3003 
 3004         if (l2 == NULL ||
 3005             (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
 3006                 return (FALSE);
 3007         }
 3008 
 3009         *ptp = &ptep[l2pte_index(va)];
 3010         return (TRUE);
 3011 }
 3012 
 3013 /*
 3014  *      Routine:        pmap_remove_all
 3015  *      Function:
 3016  *              Removes this physical page from
 3017  *              all physical maps in which it resides.
 3018  *              Reflects back modify bits to the pager.
 3019  *
 3020  *      Notes:
 3021  *              Original versions of this routine were very
 3022  *              inefficient because they iteratively called
 3023  *              pmap_remove (slow...)
 3024  */
 3025 void
 3026 pmap_remove_all(vm_page_t m)
 3027 {
 3028         pv_entry_t pv;
 3029         pt_entry_t *ptep;
 3030         struct l2_bucket *l2b;
 3031         boolean_t flush = FALSE;
 3032         pmap_t curpm;
 3033         int flags = 0;
 3034 
 3035         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3036             ("pmap_remove_all: page %p is not managed", m));
 3037         if (TAILQ_EMPTY(&m->md.pv_list))
 3038                 return;
 3039         rw_wlock(&pvh_global_lock);
 3040         pmap_remove_write(m);
 3041         curpm = vmspace_pmap(curproc->p_vmspace);
 3042         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 3043                 if (flush == FALSE && (pv->pv_pmap == curpm ||
 3044                     pv->pv_pmap == pmap_kernel()))
 3045                         flush = TRUE;
 3046 
 3047                 PMAP_LOCK(pv->pv_pmap);
 3048                 /*
 3049                  * Cached contents were written-back in pmap_remove_write(),
 3050                  * but we still have to invalidate the cache entry to make
 3051                  * sure stale data are not retrieved when another page will be
 3052                  * mapped under this virtual address.
 3053                  */
 3054                 if (pmap_is_current(pv->pv_pmap)) {
 3055                         cpu_dcache_inv_range(pv->pv_va, PAGE_SIZE);
 3056                         if (pmap_has_valid_mapping(pv->pv_pmap, pv->pv_va))
 3057                                 cpu_l2cache_inv_range(pv->pv_va, PAGE_SIZE);
 3058                 }
 3059 
 3060                 if (pv->pv_flags & PVF_UNMAN) {
 3061                         /* remove the pv entry, but do not remove the mapping
 3062                          * and remember this is a kernel mapped page
 3063                          */
 3064                         m->md.pv_kva = pv->pv_va;
 3065                 } else {
 3066                         /* remove the mapping and pv entry */
 3067                         l2b = pmap_get_l2_bucket(pv->pv_pmap, pv->pv_va);
 3068                         KASSERT(l2b != NULL, ("No l2 bucket"));
 3069                         ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)];
 3070                         *ptep = 0;
 3071                         PTE_SYNC_CURRENT(pv->pv_pmap, ptep);
 3072                         pmap_free_l2_bucket(pv->pv_pmap, l2b, 1);
 3073                         pv->pv_pmap->pm_stats.resident_count--;
 3074                         flags |= pv->pv_flags;
 3075                 }
 3076                 pmap_nuke_pv(m, pv->pv_pmap, pv);
 3077                 PMAP_UNLOCK(pv->pv_pmap);
 3078                 pmap_free_pv_entry(pv);
 3079         }
 3080 
 3081         if (flush) {
 3082                 if (PV_BEEN_EXECD(flags))
 3083                         pmap_tlb_flushID(curpm);
 3084                 else
 3085                         pmap_tlb_flushD(curpm);
 3086         }
 3087         vm_page_aflag_clear(m, PGA_WRITEABLE);
 3088         rw_wunlock(&pvh_global_lock);
 3089 }
 3090 
 3091 
 3092 /*
 3093  *      Set the physical protection on the
 3094  *      specified range of this map as requested.
 3095  */
 3096 void
 3097 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 3098 {
 3099         struct l2_bucket *l2b;
 3100         pt_entry_t *ptep, pte;
 3101         vm_offset_t next_bucket;
 3102         u_int flags;
 3103         int flush;
 3104 
 3105         CTR4(KTR_PMAP, "pmap_protect: pmap %p sva 0x%08x eva 0x%08x prot %x",
 3106             pm, sva, eva, prot);
 3107 
 3108         if ((prot & VM_PROT_READ) == 0) {
 3109                 pmap_remove(pm, sva, eva);
 3110                 return;
 3111         }
 3112 
 3113         if (prot & VM_PROT_WRITE) {
 3114                 /*
 3115                  * If this is a read->write transition, just ignore it and let
 3116                  * vm_fault() take care of it later.
 3117                  */
 3118                 return;
 3119         }
 3120 
 3121         rw_wlock(&pvh_global_lock);
 3122         PMAP_LOCK(pm);
 3123 
 3124         /*
 3125          * OK, at this point, we know we're doing write-protect operation.
 3126          * If the pmap is active, write-back the range.
 3127          */
 3128         pmap_dcache_wb_range(pm, sva, eva - sva, FALSE, FALSE);
 3129 
 3130         flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1;
 3131         flags = 0;
 3132 
 3133         while (sva < eva) {
 3134                 next_bucket = L2_NEXT_BUCKET(sva);
 3135                 if (next_bucket > eva)
 3136                         next_bucket = eva;
 3137 
 3138                 l2b = pmap_get_l2_bucket(pm, sva);
 3139                 if (l2b == NULL) {
 3140                         sva = next_bucket;
 3141                         continue;
 3142                 }
 3143 
 3144                 ptep = &l2b->l2b_kva[l2pte_index(sva)];
 3145 
 3146                 while (sva < next_bucket) {
 3147                         if ((pte = *ptep) != 0 && (pte & L2_S_PROT_W) != 0) {
 3148                                 struct vm_page *pg;
 3149                                 u_int f;
 3150 
 3151                                 pg = PHYS_TO_VM_PAGE(l2pte_pa(pte));
 3152                                 pte &= ~L2_S_PROT_W;
 3153                                 *ptep = pte;
 3154                                 PTE_SYNC(ptep);
 3155 
 3156                                 if (!(pg->oflags & VPO_UNMANAGED)) {
 3157                                         f = pmap_modify_pv(pg, pm, sva,
 3158                                             PVF_WRITE, 0);
 3159                                         if (f & PVF_WRITE)
 3160                                                 vm_page_dirty(pg);
 3161                                 } else
 3162                                         f = 0;
 3163 
 3164                                 if (flush >= 0) {
 3165                                         flush++;
 3166                                         flags |= f;
 3167                                 } else
 3168                                 if (PV_BEEN_EXECD(f))
 3169                                         pmap_tlb_flushID_SE(pm, sva);
 3170                                 else
 3171                                 if (PV_BEEN_REFD(f))
 3172                                         pmap_tlb_flushD_SE(pm, sva);
 3173                         }
 3174 
 3175                         sva += PAGE_SIZE;
 3176                         ptep++;
 3177                 }
 3178         }
 3179 
 3180 
 3181         if (flush) {
 3182                 if (PV_BEEN_EXECD(flags))
 3183                         pmap_tlb_flushID(pm);
 3184                 else
 3185                 if (PV_BEEN_REFD(flags))
 3186                         pmap_tlb_flushD(pm);
 3187         }
 3188         rw_wunlock(&pvh_global_lock);
 3189 
 3190         PMAP_UNLOCK(pm);
 3191 }
 3192 
 3193 
 3194 /*
 3195  *      Insert the given physical page (p) at
 3196  *      the specified virtual address (v) in the
 3197  *      target physical map with the protection requested.
 3198  *
 3199  *      If specified, the page will be wired down, meaning
 3200  *      that the related pte can not be reclaimed.
 3201  *
 3202  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 3203  *      or lose information.  That is, this routine must actually
 3204  *      insert this page into the given map NOW.
 3205  */
 3206 
 3207 int
 3208 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 3209     u_int flags, int8_t psind __unused)
 3210 {
 3211         int rv;
 3212 
 3213         rw_wlock(&pvh_global_lock);
 3214         PMAP_LOCK(pmap);
 3215         rv = pmap_enter_locked(pmap, va, m, prot, flags);
 3216         rw_wunlock(&pvh_global_lock);
 3217         PMAP_UNLOCK(pmap);
 3218         return (rv);
 3219 }
 3220 
 3221 /*
 3222  *      The pvh global and pmap locks must be held.
 3223  */
 3224 static int
 3225 pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 3226     u_int flags)
 3227 {
 3228         struct l2_bucket *l2b = NULL;
 3229         struct vm_page *opg;
 3230         struct pv_entry *pve = NULL;
 3231         pt_entry_t *ptep, npte, opte;
 3232         u_int nflags;
 3233         u_int oflags;
 3234         vm_paddr_t pa;
 3235 
 3236         PMAP_ASSERT_LOCKED(pmap);
 3237         rw_assert(&pvh_global_lock, RA_WLOCKED);
 3238         if (va == vector_page) {
 3239                 pa = systempage.pv_pa;
 3240                 m = NULL;
 3241         } else {
 3242                 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 3243                         VM_OBJECT_ASSERT_LOCKED(m->object);
 3244                 pa = VM_PAGE_TO_PHYS(m);
 3245         }
 3246         nflags = 0;
 3247         if (prot & VM_PROT_WRITE)
 3248                 nflags |= PVF_WRITE;
 3249         if (prot & VM_PROT_EXECUTE)
 3250                 nflags |= PVF_EXEC;
 3251         if ((flags & PMAP_ENTER_WIRED) != 0)
 3252                 nflags |= PVF_WIRED;
 3253         PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, prot = %x, "
 3254             "flags = %x\n", (uint32_t) pmap, va, (uint32_t) m, prot, flags));
 3255 
 3256         if (pmap == pmap_kernel()) {
 3257                 l2b = pmap_get_l2_bucket(pmap, va);
 3258                 if (l2b == NULL)
 3259                         l2b = pmap_grow_l2_bucket(pmap, va);
 3260         } else {
 3261 do_l2b_alloc:
 3262                 l2b = pmap_alloc_l2_bucket(pmap, va);
 3263                 if (l2b == NULL) {
 3264                         if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
 3265                                 PMAP_UNLOCK(pmap);
 3266                                 rw_wunlock(&pvh_global_lock);
 3267                                 VM_WAIT;
 3268                                 rw_wlock(&pvh_global_lock);
 3269                                 PMAP_LOCK(pmap);
 3270                                 goto do_l2b_alloc;
 3271                         }
 3272                         return (KERN_RESOURCE_SHORTAGE);
 3273                 }
 3274         }
 3275 
 3276         ptep = &l2b->l2b_kva[l2pte_index(va)];
 3277 
 3278         opte = *ptep;
 3279         npte = pa;
 3280         oflags = 0;
 3281         if (opte) {
 3282                 /*
 3283                  * There is already a mapping at this address.
 3284                  * If the physical address is different, lookup the
 3285                  * vm_page.
 3286                  */
 3287                 if (l2pte_pa(opte) != pa)
 3288                         opg = PHYS_TO_VM_PAGE(l2pte_pa(opte));
 3289                 else
 3290                         opg = m;
 3291         } else
 3292                 opg = NULL;
 3293 
 3294         if ((prot & (VM_PROT_ALL)) ||
 3295             (!m || m->md.pvh_attrs & PVF_REF)) {
 3296                 /*
 3297                  * - The access type indicates that we don't need
 3298                  *   to do referenced emulation.
 3299                  * OR
 3300                  * - The physical page has already been referenced
 3301                  *   so no need to re-do referenced emulation here.
 3302                  */
 3303                 npte |= L2_S_PROTO;
 3304                 
 3305                 nflags |= PVF_REF;
 3306                 
 3307                 if (m && ((prot & VM_PROT_WRITE) != 0 ||
 3308                     (m->md.pvh_attrs & PVF_MOD))) {
 3309                         /*
 3310                          * This is a writable mapping, and the
 3311                          * page's mod state indicates it has
 3312                          * already been modified. Make it
 3313                          * writable from the outset.
 3314                          */
 3315                         nflags |= PVF_MOD;
 3316                         if (!(m->md.pvh_attrs & PVF_MOD))
 3317                                 vm_page_dirty(m);
 3318                 }
 3319                 if (m && opte)
 3320                         vm_page_aflag_set(m, PGA_REFERENCED);
 3321         } else {
 3322                 /*
 3323                  * Need to do page referenced emulation.
 3324                  */
 3325                 npte |= L2_TYPE_INV;
 3326         }
 3327         
 3328         if (prot & VM_PROT_WRITE) {
 3329                 npte |= L2_S_PROT_W;
 3330                 if (m != NULL &&
 3331                     (m->oflags & VPO_UNMANAGED) == 0)
 3332                         vm_page_aflag_set(m, PGA_WRITEABLE);
 3333         }
 3334         if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE)
 3335                 npte |= pte_l2_s_cache_mode;
 3336         if (m && m == opg) {
 3337                 /*
 3338                  * We're changing the attrs of an existing mapping.
 3339                  */
 3340                 oflags = pmap_modify_pv(m, pmap, va,
 3341                     PVF_WRITE | PVF_EXEC | PVF_WIRED |
 3342                     PVF_MOD | PVF_REF, nflags);
 3343                 
 3344                 /*
 3345                  * We may need to flush the cache if we're
 3346                  * doing rw-ro...
 3347                  */
 3348                 if (pmap_is_current(pmap) &&
 3349                     (oflags & PVF_NC) == 0 &&
 3350                     (opte & L2_S_PROT_W) != 0 &&
 3351                     (prot & VM_PROT_WRITE) == 0 &&
 3352                     (opte & L2_TYPE_MASK) != L2_TYPE_INV) {
 3353                         cpu_dcache_wb_range(va, PAGE_SIZE);
 3354                         cpu_l2cache_wb_range(va, PAGE_SIZE);
 3355                 }
 3356         } else {
 3357                 /*
 3358                  * New mapping, or changing the backing page
 3359                  * of an existing mapping.
 3360                  */
 3361                 if (opg) {
 3362                         /*
 3363                          * Replacing an existing mapping with a new one.
 3364                          * It is part of our managed memory so we
 3365                          * must remove it from the PV list
 3366                          */
 3367                         if ((pve = pmap_remove_pv(opg, pmap, va))) {
 3368 
 3369                         /* note for patch: the oflags/invalidation was moved
 3370                          * because PG_FICTITIOUS pages could free the pve
 3371                          */
 3372                             oflags = pve->pv_flags;
 3373                         /*
 3374                          * If the old mapping was valid (ref/mod
 3375                          * emulation creates 'invalid' mappings
 3376                          * initially) then make sure to frob
 3377                          * the cache.
 3378                          */
 3379                             if ((oflags & PVF_NC) == 0 && l2pte_valid(opte)) {
 3380                                 if (PV_BEEN_EXECD(oflags)) {
 3381                                         pmap_idcache_wbinv_range(pmap, va,
 3382                                             PAGE_SIZE);
 3383                                 } else
 3384                                         if (PV_BEEN_REFD(oflags)) {
 3385                                                 pmap_dcache_wb_range(pmap, va,
 3386                                                     PAGE_SIZE, TRUE,
 3387                                                     (oflags & PVF_WRITE) == 0);
 3388                                         }
 3389                             }
 3390 
 3391                         /* free/allocate a pv_entry for UNMANAGED pages if
 3392                          * this physical page is not/is already mapped.
 3393                          */
 3394 
 3395                             if (m && (m->oflags & VPO_UNMANAGED) &&
 3396                                   !m->md.pv_kva &&
 3397                                  TAILQ_EMPTY(&m->md.pv_list)) {
 3398                                 pmap_free_pv_entry(pve);
 3399                                 pve = NULL;
 3400                             }
 3401                         } else if (m &&
 3402                                  (!(m->oflags & VPO_UNMANAGED) || m->md.pv_kva ||
 3403                                   !TAILQ_EMPTY(&m->md.pv_list)))
 3404                                 pve = pmap_get_pv_entry();
 3405                 } else if (m &&
 3406                            (!(m->oflags & VPO_UNMANAGED) || m->md.pv_kva ||
 3407                            !TAILQ_EMPTY(&m->md.pv_list)))
 3408                         pve = pmap_get_pv_entry();
 3409 
 3410                 if (m) {
 3411                         if ((m->oflags & VPO_UNMANAGED)) {
 3412                                 if (!TAILQ_EMPTY(&m->md.pv_list) ||
 3413                                     m->md.pv_kva) {
 3414                                         KASSERT(pve != NULL, ("No pv"));
 3415                                         nflags |= PVF_UNMAN;
 3416                                         pmap_enter_pv(m, pve, pmap, va, nflags);
 3417                                 } else
 3418                                         m->md.pv_kva = va;
 3419                         } else {
 3420                                 KASSERT(va < kmi.clean_sva ||
 3421                                     va >= kmi.clean_eva,
 3422                 ("pmap_enter: managed mapping within the clean submap"));
 3423                                 KASSERT(pve != NULL, ("No pv"));
 3424                                 pmap_enter_pv(m, pve, pmap, va, nflags);
 3425                         }
 3426                 }
 3427         }
 3428         /*
 3429          * Make sure userland mappings get the right permissions
 3430          */
 3431         if (pmap != pmap_kernel() && va != vector_page) {
 3432                 npte |= L2_S_PROT_U;
 3433         }
 3434 
 3435         /*
 3436          * Keep the stats up to date
 3437          */
 3438         if (opte == 0) {
 3439                 l2b->l2b_occupancy++;
 3440                 pmap->pm_stats.resident_count++;
 3441         }
 3442 
 3443         /*
 3444          * If this is just a wiring change, the two PTEs will be
 3445          * identical, so there's no need to update the page table.
 3446          */
 3447         if (npte != opte) {
 3448                 boolean_t is_cached = pmap_is_current(pmap);
 3449 
 3450                 *ptep = npte;
 3451                 if (is_cached) {
 3452                         /*
 3453                          * We only need to frob the cache/tlb if this pmap
 3454                          * is current
 3455                          */
 3456                         PTE_SYNC(ptep);
 3457                         if (L1_IDX(va) != L1_IDX(vector_page) &&
 3458                             l2pte_valid(npte)) {
 3459                                 /*
 3460                                  * This mapping is likely to be accessed as
 3461                                  * soon as we return to userland. Fix up the
 3462                                  * L1 entry to avoid taking another
 3463                                  * page/domain fault.
 3464                                  */
 3465                                 pd_entry_t *pl1pd, l1pd;
 3466 
 3467                                 pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)];
 3468                                 l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) |
 3469                                     L1_C_PROTO;
 3470                                 if (*pl1pd != l1pd) {
 3471                                         *pl1pd = l1pd;
 3472                                         PTE_SYNC(pl1pd);
 3473                                 }
 3474                         }
 3475                 }
 3476 
 3477                 if (PV_BEEN_EXECD(oflags))
 3478                         pmap_tlb_flushID_SE(pmap, va);
 3479                 else if (PV_BEEN_REFD(oflags))
 3480                         pmap_tlb_flushD_SE(pmap, va);
 3481 
 3482 
 3483                 if (m)
 3484                         pmap_fix_cache(m, pmap, va);
 3485         }
 3486         return (KERN_SUCCESS);
 3487 }
 3488 
 3489 /*
 3490  * Maps a sequence of resident pages belonging to the same object.
 3491  * The sequence begins with the given page m_start.  This page is
 3492  * mapped at the given virtual address start.  Each subsequent page is
 3493  * mapped at a virtual address that is offset from start by the same
 3494  * amount as the page is offset from m_start within the object.  The
 3495  * last page in the sequence is the page with the largest offset from
 3496  * m_start that can be mapped at a virtual address less than the given
 3497  * virtual address end.  Not every virtual page between start and end
 3498  * is mapped; only those for which a resident page exists with the
 3499  * corresponding offset from m_start are mapped.
 3500  */
 3501 void
 3502 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
 3503     vm_page_t m_start, vm_prot_t prot)
 3504 {
 3505         vm_page_t m;
 3506         vm_pindex_t diff, psize;
 3507 
 3508         VM_OBJECT_ASSERT_LOCKED(m_start->object);
 3509 
 3510         psize = atop(end - start);
 3511         m = m_start;
 3512         rw_wlock(&pvh_global_lock);
 3513         PMAP_LOCK(pmap);
 3514         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 3515                 pmap_enter_locked(pmap, start + ptoa(diff), m, prot &
 3516                     (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP);
 3517                 m = TAILQ_NEXT(m, listq);
 3518         }
 3519         rw_wunlock(&pvh_global_lock);
 3520         PMAP_UNLOCK(pmap);
 3521 }
 3522 
 3523 /*
 3524  * this code makes some *MAJOR* assumptions:
 3525  * 1. Current pmap & pmap exists.
 3526  * 2. Not wired.
 3527  * 3. Read access.
 3528  * 4. No page table pages.
 3529  * but is *MUCH* faster than pmap_enter...
 3530  */
 3531 
 3532 void
 3533 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3534 {
 3535 
 3536         rw_wlock(&pvh_global_lock);
 3537         PMAP_LOCK(pmap);
 3538         pmap_enter_locked(pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
 3539             PMAP_ENTER_NOSLEEP);
 3540         rw_wunlock(&pvh_global_lock);
 3541         PMAP_UNLOCK(pmap);
 3542 }
 3543 
 3544 /*
 3545  *      Clear the wired attribute from the mappings for the specified range of
 3546  *      addresses in the given pmap.  Every valid mapping within that range
 3547  *      must have the wired attribute set.  In contrast, invalid mappings
 3548  *      cannot have the wired attribute set, so they are ignored.
 3549  *
 3550  *      XXX Wired mappings of unmanaged pages cannot be counted by this pmap
 3551  *      implementation.
 3552  */
 3553 void
 3554 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 3555 {
 3556         struct l2_bucket *l2b;
 3557         pt_entry_t *ptep, pte;
 3558         pv_entry_t pv;
 3559         vm_offset_t next_bucket;
 3560         vm_page_t m;
 3561  
 3562         rw_wlock(&pvh_global_lock);
 3563         PMAP_LOCK(pmap);
 3564         while (sva < eva) {
 3565                 next_bucket = L2_NEXT_BUCKET(sva);
 3566                 if (next_bucket > eva)
 3567                         next_bucket = eva;
 3568                 l2b = pmap_get_l2_bucket(pmap, sva);
 3569                 if (l2b == NULL) {
 3570                         sva = next_bucket;
 3571                         continue;
 3572                 }
 3573                 for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; sva < next_bucket;
 3574                     sva += PAGE_SIZE, ptep++) {
 3575                         if ((pte = *ptep) == 0 ||
 3576                             (m = PHYS_TO_VM_PAGE(l2pte_pa(pte))) == NULL ||
 3577                             (m->oflags & VPO_UNMANAGED) != 0)
 3578                                 continue;
 3579                         pv = pmap_find_pv(m, pmap, sva);
 3580                         if ((pv->pv_flags & PVF_WIRED) == 0)
 3581                                 panic("pmap_unwire: pv %p isn't wired", pv);
 3582                         pv->pv_flags &= ~PVF_WIRED;
 3583                         pmap->pm_stats.wired_count--;
 3584                 }
 3585         }
 3586         rw_wunlock(&pvh_global_lock);
 3587         PMAP_UNLOCK(pmap);
 3588 }
 3589 
 3590 
 3591 /*
 3592  *      Copy the range specified by src_addr/len
 3593  *      from the source map to the range dst_addr/len
 3594  *      in the destination map.
 3595  *
 3596  *      This routine is only advisory and need not do anything.
 3597  */
 3598 void
 3599 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
 3600     vm_size_t len, vm_offset_t src_addr)
 3601 {
 3602 }
 3603 
 3604 
 3605 /*
 3606  *      Routine:        pmap_extract
 3607  *      Function:
 3608  *              Extract the physical page address associated
 3609  *              with the given map/virtual_address pair.
 3610  */
 3611 vm_paddr_t
 3612 pmap_extract(pmap_t pmap, vm_offset_t va)
 3613 {
 3614         vm_paddr_t pa;
 3615 
 3616         PMAP_LOCK(pmap);
 3617         pa = pmap_extract_locked(pmap, va);
 3618         PMAP_UNLOCK(pmap);
 3619         return (pa);
 3620 }
 3621 
 3622 static vm_paddr_t
 3623 pmap_extract_locked(pmap_t pmap, vm_offset_t va)
 3624 {
 3625         struct l2_dtable *l2;
 3626         pd_entry_t l1pd;
 3627         pt_entry_t *ptep, pte;
 3628         vm_paddr_t pa;
 3629         u_int l1idx;
 3630 
 3631         if (pmap != kernel_pmap)
 3632                 PMAP_ASSERT_LOCKED(pmap);
 3633         l1idx = L1_IDX(va);
 3634         l1pd = pmap->pm_l1->l1_kva[l1idx];
 3635         if (l1pte_section_p(l1pd)) {
 3636                 /*
 3637                  * These should only happen for the kernel pmap.
 3638                  */
 3639                 KASSERT(pmap == kernel_pmap, ("unexpected section"));
 3640                 /* XXX: what to do about the bits > 32 ? */
 3641                 if (l1pd & L1_S_SUPERSEC)
 3642                         pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
 3643                 else
 3644                         pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
 3645         } else {
 3646                 /*
 3647                  * Note that we can't rely on the validity of the L1
 3648                  * descriptor as an indication that a mapping exists.
 3649                  * We have to look it up in the L2 dtable.
 3650                  */
 3651                 l2 = pmap->pm_l2[L2_IDX(l1idx)];
 3652                 if (l2 == NULL ||
 3653                     (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL)
 3654                         return (0);
 3655                 pte = ptep[l2pte_index(va)];
 3656                 if (pte == 0)
 3657                         return (0);
 3658                 switch (pte & L2_TYPE_MASK) {
 3659                 case L2_TYPE_L:
 3660                         pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
 3661                         break;
 3662                 default:
 3663                         pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
 3664                         break;
 3665                 }
 3666         }
 3667         return (pa);
 3668 }
 3669 
 3670 /*
 3671  * Atomically extract and hold the physical page with the given
 3672  * pmap and virtual address pair if that mapping permits the given
 3673  * protection.
 3674  *
 3675  */
 3676 vm_page_t
 3677 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 3678 {
 3679         struct l2_dtable *l2;
 3680         pd_entry_t l1pd;
 3681         pt_entry_t *ptep, pte;
 3682         vm_paddr_t pa, paddr;
 3683         vm_page_t m = NULL;
 3684         u_int l1idx;
 3685         l1idx = L1_IDX(va);
 3686         paddr = 0;
 3687 
 3688         PMAP_LOCK(pmap);
 3689 retry:
 3690         l1pd = pmap->pm_l1->l1_kva[l1idx];
 3691         if (l1pte_section_p(l1pd)) {
 3692                 /*
 3693                  * These should only happen for pmap_kernel()
 3694                  */
 3695                 KASSERT(pmap == pmap_kernel(), ("huh"));
 3696                 /* XXX: what to do about the bits > 32 ? */
 3697                 if (l1pd & L1_S_SUPERSEC)
 3698                         pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET);
 3699                 else
 3700                         pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET);
 3701                 if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
 3702                         goto retry;
 3703                 if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
 3704                         m = PHYS_TO_VM_PAGE(pa);
 3705                         vm_page_hold(m);
 3706                 }
 3707                         
 3708         } else {
 3709                 /*
 3710                  * Note that we can't rely on the validity of the L1
 3711                  * descriptor as an indication that a mapping exists.
 3712                  * We have to look it up in the L2 dtable.
 3713                  */
 3714                 l2 = pmap->pm_l2[L2_IDX(l1idx)];
 3715 
 3716                 if (l2 == NULL ||
 3717                     (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) {
 3718                         PMAP_UNLOCK(pmap);
 3719                         return (NULL);
 3720                 }
 3721 
 3722                 ptep = &ptep[l2pte_index(va)];
 3723                 pte = *ptep;
 3724 
 3725                 if (pte == 0) {
 3726                         PMAP_UNLOCK(pmap);
 3727                         return (NULL);
 3728                 }
 3729                 if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) {
 3730                         switch (pte & L2_TYPE_MASK) {
 3731                         case L2_TYPE_L:
 3732                                 pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET);
 3733                                 break;
 3734                                 
 3735                         default:
 3736                                 pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET);
 3737                                 break;
 3738                         }
 3739                         if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr))
 3740                                 goto retry;             
 3741                         m = PHYS_TO_VM_PAGE(pa);
 3742                         vm_page_hold(m);
 3743                 }
 3744         }
 3745 
 3746         PMAP_UNLOCK(pmap);
 3747         PA_UNLOCK_COND(paddr);
 3748         return (m);
 3749 }
 3750 
 3751 /*
 3752  * Initialize a preallocated and zeroed pmap structure,
 3753  * such as one in a vmspace structure.
 3754  */
 3755 
 3756 int
 3757 pmap_pinit(pmap_t pmap)
 3758 {
 3759         PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap));
 3760         
 3761         pmap_alloc_l1(pmap);
 3762         bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
 3763 
 3764         CPU_ZERO(&pmap->pm_active);
 3765                 
 3766         TAILQ_INIT(&pmap->pm_pvlist);
 3767         bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 3768         pmap->pm_stats.resident_count = 1;
 3769         if (vector_page < KERNBASE) {
 3770                 pmap_enter(pmap, vector_page, PHYS_TO_VM_PAGE(systempage.pv_pa),
 3771                     VM_PROT_READ, PMAP_ENTER_WIRED | VM_PROT_READ, 0);
 3772         }
 3773         return (1);
 3774 }
 3775 
 3776 
 3777 /***************************************************
 3778  * page management routines.
 3779  ***************************************************/
 3780 
 3781 
 3782 static void
 3783 pmap_free_pv_entry(pv_entry_t pv)
 3784 {
 3785         pv_entry_count--;
 3786         uma_zfree(pvzone, pv);
 3787 }
 3788 
 3789 
 3790 /*
 3791  * get a new pv_entry, allocating a block from the system
 3792  * when needed.
 3793  * the memory allocation is performed bypassing the malloc code
 3794  * because of the possibility of allocations at interrupt time.
 3795  */
 3796 static pv_entry_t
 3797 pmap_get_pv_entry(void)
 3798 {
 3799         pv_entry_t ret_value;
 3800         
 3801         pv_entry_count++;
 3802         if (pv_entry_count > pv_entry_high_water)
 3803                 pagedaemon_wakeup();
 3804         ret_value = uma_zalloc(pvzone, M_NOWAIT);
 3805         return ret_value;
 3806 }
 3807 
 3808 /*
 3809  *      Remove the given range of addresses from the specified map.
 3810  *
 3811  *      It is assumed that the start and end are properly
 3812  *      rounded to the page size.
 3813  */
 3814 #define PMAP_REMOVE_CLEAN_LIST_SIZE     3
 3815 void
 3816 pmap_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 3817 {
 3818         struct l2_bucket *l2b;
 3819         vm_offset_t next_bucket;
 3820         pt_entry_t *ptep;
 3821         u_int total;
 3822         u_int mappings, is_exec, is_refd;
 3823         int flushall = 0;
 3824 
 3825 
 3826         /*
 3827          * we lock in the pmap => pv_head direction
 3828          */
 3829 
 3830         rw_wlock(&pvh_global_lock);
 3831         PMAP_LOCK(pm);
 3832         total = 0;
 3833         while (sva < eva) {
 3834                 /*
 3835                  * Do one L2 bucket's worth at a time.
 3836                  */
 3837                 next_bucket = L2_NEXT_BUCKET(sva);
 3838                 if (next_bucket > eva)
 3839                         next_bucket = eva;
 3840 
 3841                 l2b = pmap_get_l2_bucket(pm, sva);
 3842                 if (l2b == NULL) {
 3843                         sva = next_bucket;
 3844                         continue;
 3845                 }
 3846 
 3847                 ptep = &l2b->l2b_kva[l2pte_index(sva)];
 3848                 mappings = 0;
 3849 
 3850                 while (sva < next_bucket) {
 3851                         struct vm_page *pg;
 3852                         pt_entry_t pte;
 3853                         vm_paddr_t pa;
 3854 
 3855                         pte = *ptep;
 3856 
 3857                         if (pte == 0) {
 3858                                 /*
 3859                                  * Nothing here, move along
 3860                                  */
 3861                                 sva += PAGE_SIZE;
 3862                                 ptep++;
 3863                                 continue;
 3864                         }
 3865 
 3866                         pm->pm_stats.resident_count--;
 3867                         pa = l2pte_pa(pte);
 3868                         is_exec = 0;
 3869                         is_refd = 1;
 3870 
 3871                         /*
 3872                          * Update flags. In a number of circumstances,
 3873                          * we could cluster a lot of these and do a
 3874                          * number of sequential pages in one go.
 3875                          */
 3876                         if ((pg = PHYS_TO_VM_PAGE(pa)) != NULL) {
 3877                                 struct pv_entry *pve;
 3878 
 3879                                 pve = pmap_remove_pv(pg, pm, sva);
 3880                                 if (pve) {
 3881                                         is_exec = PV_BEEN_EXECD(pve->pv_flags);
 3882                                         is_refd = PV_BEEN_REFD(pve->pv_flags);
 3883                                         pmap_free_pv_entry(pve);
 3884                                 }
 3885                         }
 3886 
 3887                         if (l2pte_valid(pte) && pmap_is_current(pm)) {
 3888                                 if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) {
 3889                                         total++;
 3890                                         if (is_exec) {
 3891                                                 cpu_idcache_wbinv_range(sva,
 3892                                                     PAGE_SIZE);
 3893                                                 cpu_l2cache_wbinv_range(sva,
 3894                                                     PAGE_SIZE);
 3895                                                 cpu_tlb_flushID_SE(sva);
 3896                                         } else if (is_refd) {
 3897                                                 cpu_dcache_wbinv_range(sva,
 3898                                                     PAGE_SIZE);
 3899                                                 cpu_l2cache_wbinv_range(sva,
 3900                                                     PAGE_SIZE);
 3901                                                 cpu_tlb_flushD_SE(sva);
 3902                                         }
 3903                                 } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) {
 3904                                         /* flushall will also only get set for
 3905                                          * for a current pmap
 3906                                          */
 3907                                         cpu_idcache_wbinv_all();
 3908                                         cpu_l2cache_wbinv_all();
 3909                                         flushall = 1;
 3910                                         total++;
 3911                                 }
 3912                         }
 3913                         *ptep = 0;
 3914                         PTE_SYNC(ptep);
 3915 
 3916                         sva += PAGE_SIZE;
 3917                         ptep++;
 3918                         mappings++;
 3919                 }
 3920 
 3921                 pmap_free_l2_bucket(pm, l2b, mappings);
 3922         }
 3923 
 3924         rw_wunlock(&pvh_global_lock);
 3925         if (flushall)
 3926                 cpu_tlb_flushID();
 3927         PMAP_UNLOCK(pm);
 3928 }
 3929 
 3930 /*
 3931  * pmap_zero_page()
 3932  *
 3933  * Zero a given physical page by mapping it at a page hook point.
 3934  * In doing the zero page op, the page we zero is mapped cachable, as with
 3935  * StrongARM accesses to non-cached pages are non-burst making writing
 3936  * _any_ bulk data very slow.
 3937  */
 3938 #if ARM_MMU_GENERIC != 0 || defined(CPU_XSCALE_CORE3)
 3939 void
 3940 pmap_zero_page_generic(vm_paddr_t phys, int off, int size)
 3941 {
 3942 
 3943         if (_arm_bzero && size >= _min_bzero_size &&
 3944             _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
 3945                 return;
 3946 
 3947         mtx_lock(&cmtx);
 3948         /*
 3949          * Hook in the page, zero it, invalidate the TLB as needed.
 3950          *
 3951          * Note the temporary zero-page mapping must be a non-cached page in
 3952          * order to work without corruption when write-allocate is enabled.
 3953          */
 3954         *cdst_pte = L2_S_PROTO | phys | L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE);
 3955         PTE_SYNC(cdst_pte);
 3956         cpu_tlb_flushD_SE(cdstp);
 3957         cpu_cpwait();
 3958         if (off || size != PAGE_SIZE)
 3959                 bzero((void *)(cdstp + off), size);
 3960         else
 3961                 bzero_page(cdstp);
 3962 
 3963         mtx_unlock(&cmtx);
 3964 }
 3965 #endif /* ARM_MMU_GENERIC != 0 */
 3966 
 3967 #if ARM_MMU_XSCALE == 1
 3968 void
 3969 pmap_zero_page_xscale(vm_paddr_t phys, int off, int size)
 3970 {
 3971 
 3972         if (_arm_bzero && size >= _min_bzero_size &&
 3973             _arm_bzero((void *)(phys + off), size, IS_PHYSICAL) == 0)
 3974                 return;
 3975 
 3976         mtx_lock(&cmtx);
 3977         /*
 3978          * Hook in the page, zero it, and purge the cache for that
 3979          * zeroed page. Invalidate the TLB as needed.
 3980          */
 3981         *cdst_pte = L2_S_PROTO | phys |
 3982             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
 3983             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
 3984         PTE_SYNC(cdst_pte);
 3985         cpu_tlb_flushD_SE(cdstp);
 3986         cpu_cpwait();
 3987         if (off || size != PAGE_SIZE)
 3988                 bzero((void *)(cdstp + off), size);
 3989         else
 3990                 bzero_page(cdstp);
 3991         mtx_unlock(&cmtx);
 3992         xscale_cache_clean_minidata();
 3993 }
 3994 
 3995 /*
 3996  * Change the PTEs for the specified kernel mappings such that they
 3997  * will use the mini data cache instead of the main data cache.
 3998  */
 3999 void
 4000 pmap_use_minicache(vm_offset_t va, vm_size_t size)
 4001 {
 4002         struct l2_bucket *l2b;
 4003         pt_entry_t *ptep, *sptep, pte;
 4004         vm_offset_t next_bucket, eva;
 4005 
 4006 #if (ARM_NMMUS > 1) || defined(CPU_XSCALE_CORE3)
 4007         if (xscale_use_minidata == 0)
 4008                 return;
 4009 #endif
 4010 
 4011         eva = va + size;
 4012 
 4013         while (va < eva) {
 4014                 next_bucket = L2_NEXT_BUCKET(va);
 4015                 if (next_bucket > eva)
 4016                         next_bucket = eva;
 4017 
 4018                 l2b = pmap_get_l2_bucket(pmap_kernel(), va);
 4019 
 4020                 sptep = ptep = &l2b->l2b_kva[l2pte_index(va)];
 4021 
 4022                 while (va < next_bucket) {
 4023                         pte = *ptep;
 4024                         if (!l2pte_minidata(pte)) {
 4025                                 cpu_dcache_wbinv_range(va, PAGE_SIZE);
 4026                                 cpu_tlb_flushD_SE(va);
 4027                                 *ptep = pte & ~L2_B;
 4028                         }
 4029                         ptep++;
 4030                         va += PAGE_SIZE;
 4031                 }
 4032                 PTE_SYNC_RANGE(sptep, (u_int)(ptep - sptep));
 4033         }
 4034         cpu_cpwait();
 4035 }
 4036 #endif /* ARM_MMU_XSCALE == 1 */
 4037 
 4038 /*
 4039  *      pmap_zero_page zeros the specified hardware page by mapping
 4040  *      the page into KVM and using bzero to clear its contents.
 4041  */
 4042 void
 4043 pmap_zero_page(vm_page_t m)
 4044 {
 4045         pmap_zero_page_func(VM_PAGE_TO_PHYS(m), 0, PAGE_SIZE);
 4046 }
 4047 
 4048 
 4049 /*
 4050  *      pmap_zero_page_area zeros the specified hardware page by mapping
 4051  *      the page into KVM and using bzero to clear its contents.
 4052  *
 4053  *      off and size may not cover an area beyond a single hardware page.
 4054  */
 4055 void
 4056 pmap_zero_page_area(vm_page_t m, int off, int size)
 4057 {
 4058 
 4059         pmap_zero_page_func(VM_PAGE_TO_PHYS(m), off, size);
 4060 }
 4061 
 4062 
 4063 /*
 4064  *      pmap_zero_page_idle zeros the specified hardware page by mapping
 4065  *      the page into KVM and using bzero to clear its contents.  This
 4066  *      is intended to be called from the vm_pagezero process only and
 4067  *      outside of Giant.
 4068  */
 4069 void
 4070 pmap_zero_page_idle(vm_page_t m)
 4071 {
 4072 
 4073         pmap_zero_page(m);
 4074 }
 4075 
 4076 #if 0
 4077 /*
 4078  * pmap_clean_page()
 4079  *
 4080  * This is a local function used to work out the best strategy to clean
 4081  * a single page referenced by its entry in the PV table. It should be used by
 4082  * pmap_copy_page, pmap_zero page and maybe some others later on.
 4083  *
 4084  * Its policy is effectively:
 4085  *  o If there are no mappings, we don't bother doing anything with the cache.
 4086  *  o If there is one mapping, we clean just that page.
 4087  *  o If there are multiple mappings, we clean the entire cache.
 4088  *
 4089  * So that some functions can be further optimised, it returns 0 if it didn't
 4090  * clean the entire cache, or 1 if it did.
 4091  *
 4092  * XXX One bug in this routine is that if the pv_entry has a single page
 4093  * mapped at 0x00000000 a whole cache clean will be performed rather than
 4094  * just the 1 page. Since this should not occur in everyday use and if it does
 4095  * it will just result in not the most efficient clean for the page.
 4096  *
 4097  * We don't yet use this function but may want to.
 4098  */
 4099 static int
 4100 pmap_clean_page(struct pv_entry *pv, boolean_t is_src)
 4101 {
 4102         pmap_t pm, pm_to_clean = NULL;
 4103         struct pv_entry *npv;
 4104         u_int cache_needs_cleaning = 0;
 4105         u_int flags = 0;
 4106         vm_offset_t page_to_clean = 0;
 4107 
 4108         if (pv == NULL) {
 4109                 /* nothing mapped in so nothing to flush */
 4110                 return (0);
 4111         }
 4112 
 4113         /*
 4114          * Since we flush the cache each time we change to a different
 4115          * user vmspace, we only need to flush the page if it is in the
 4116          * current pmap.
 4117          */
 4118         if (curthread)
 4119                 pm = vmspace_pmap(curproc->p_vmspace);
 4120         else
 4121                 pm = pmap_kernel();
 4122 
 4123         for (npv = pv; npv; npv = TAILQ_NEXT(npv, pv_list)) {
 4124                 if (npv->pv_pmap == pmap_kernel() || npv->pv_pmap == pm) {
 4125                         flags |= npv->pv_flags;
 4126                         /*
 4127                          * The page is mapped non-cacheable in
 4128                          * this map.  No need to flush the cache.
 4129                          */
 4130                         if (npv->pv_flags & PVF_NC) {
 4131 #ifdef DIAGNOSTIC
 4132                                 if (cache_needs_cleaning)
 4133                                         panic("pmap_clean_page: "
 4134                                             "cache inconsistency");
 4135 #endif
 4136                                 break;
 4137                         } else if (is_src && (npv->pv_flags & PVF_WRITE) == 0)
 4138                                 continue;
 4139                         if (cache_needs_cleaning) {
 4140                                 page_to_clean = 0;
 4141                                 break;
 4142                         } else {
 4143                                 page_to_clean = npv->pv_va;
 4144                                 pm_to_clean = npv->pv_pmap;
 4145                         }
 4146                         cache_needs_cleaning = 1;
 4147                 }
 4148         }
 4149         if (page_to_clean) {
 4150                 if (PV_BEEN_EXECD(flags))
 4151                         pmap_idcache_wbinv_range(pm_to_clean, page_to_clean,
 4152                             PAGE_SIZE);
 4153                 else
 4154                         pmap_dcache_wb_range(pm_to_clean, page_to_clean,
 4155                             PAGE_SIZE, !is_src, (flags & PVF_WRITE) == 0);
 4156         } else if (cache_needs_cleaning) {
 4157                 if (PV_BEEN_EXECD(flags))
 4158                         pmap_idcache_wbinv_all(pm);
 4159                 else
 4160                         pmap_dcache_wbinv_all(pm);
 4161                 return (1);
 4162         }
 4163         return (0);
 4164 }
 4165 #endif
 4166 
 4167 /*
 4168  *      pmap_copy_page copies the specified (machine independent)
 4169  *      page by mapping the page into virtual memory and using
 4170  *      bcopy to copy the page, one machine dependent page at a
 4171  *      time.
 4172  */
 4173 
 4174 /*
 4175  * pmap_copy_page()
 4176  *
 4177  * Copy one physical page into another, by mapping the pages into
 4178  * hook points. The same comment regarding cachability as in
 4179  * pmap_zero_page also applies here.
 4180  */
 4181 #if ARM_MMU_GENERIC != 0 || defined (CPU_XSCALE_CORE3)
 4182 void
 4183 pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
 4184 {
 4185 #if 0
 4186         struct vm_page *src_pg = PHYS_TO_VM_PAGE(src);
 4187 #endif
 4188 
 4189         /*
 4190          * Clean the source page.  Hold the source page's lock for
 4191          * the duration of the copy so that no other mappings can
 4192          * be created while we have a potentially aliased mapping.
 4193          */
 4194 #if 0
 4195         /*
 4196          * XXX: Not needed while we call cpu_dcache_wbinv_all() in
 4197          * pmap_copy_page().
 4198          */
 4199         (void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE);
 4200 #endif
 4201         /*
 4202          * Map the pages into the page hook points, copy them, and purge
 4203          * the cache for the appropriate page. Invalidate the TLB
 4204          * as required.
 4205          */
 4206         mtx_lock(&cmtx);
 4207         *csrc_pte = L2_S_PROTO | src |
 4208             L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode;
 4209         PTE_SYNC(csrc_pte);
 4210         *cdst_pte = L2_S_PROTO | dst |
 4211             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
 4212         PTE_SYNC(cdst_pte);
 4213         cpu_tlb_flushD_SE(csrcp);
 4214         cpu_tlb_flushD_SE(cdstp);
 4215         cpu_cpwait();
 4216         bcopy_page(csrcp, cdstp);
 4217         mtx_unlock(&cmtx);
 4218         cpu_dcache_inv_range(csrcp, PAGE_SIZE);
 4219         cpu_dcache_wbinv_range(cdstp, PAGE_SIZE);
 4220         cpu_l2cache_inv_range(csrcp, PAGE_SIZE);
 4221         cpu_l2cache_wbinv_range(cdstp, PAGE_SIZE);
 4222 }
 4223 
 4224 void
 4225 pmap_copy_page_offs_generic(vm_paddr_t a_phys, vm_offset_t a_offs,
 4226     vm_paddr_t b_phys, vm_offset_t b_offs, int cnt)
 4227 {
 4228 
 4229         mtx_lock(&cmtx);
 4230         *csrc_pte = L2_S_PROTO | a_phys |
 4231             L2_S_PROT(PTE_KERNEL, VM_PROT_READ) | pte_l2_s_cache_mode;
 4232         PTE_SYNC(csrc_pte);
 4233         *cdst_pte = L2_S_PROTO | b_phys |
 4234             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) | pte_l2_s_cache_mode;
 4235         PTE_SYNC(cdst_pte);
 4236         cpu_tlb_flushD_SE(csrcp);
 4237         cpu_tlb_flushD_SE(cdstp);
 4238         cpu_cpwait();
 4239         bcopy((char *)csrcp + a_offs, (char *)cdstp + b_offs, cnt);
 4240         mtx_unlock(&cmtx);
 4241         cpu_dcache_inv_range(csrcp + a_offs, cnt);
 4242         cpu_dcache_wbinv_range(cdstp + b_offs, cnt);
 4243         cpu_l2cache_inv_range(csrcp + a_offs, cnt);
 4244         cpu_l2cache_wbinv_range(cdstp + b_offs, cnt);
 4245 }
 4246 #endif /* ARM_MMU_GENERIC != 0 */
 4247 
 4248 #if ARM_MMU_XSCALE == 1
 4249 void
 4250 pmap_copy_page_xscale(vm_paddr_t src, vm_paddr_t dst)
 4251 {
 4252 #if 0
 4253         /* XXX: Only needed for pmap_clean_page(), which is commented out. */
 4254         struct vm_page *src_pg = PHYS_TO_VM_PAGE(src);
 4255 #endif
 4256 
 4257         /*
 4258          * Clean the source page.  Hold the source page's lock for
 4259          * the duration of the copy so that no other mappings can
 4260          * be created while we have a potentially aliased mapping.
 4261          */
 4262 #if 0
 4263         /*
 4264          * XXX: Not needed while we call cpu_dcache_wbinv_all() in
 4265          * pmap_copy_page().
 4266          */
 4267         (void) pmap_clean_page(TAILQ_FIRST(&src_pg->md.pv_list), TRUE);
 4268 #endif
 4269         /*
 4270          * Map the pages into the page hook points, copy them, and purge
 4271          * the cache for the appropriate page. Invalidate the TLB
 4272          * as required.
 4273          */
 4274         mtx_lock(&cmtx);
 4275         *csrc_pte = L2_S_PROTO | src |
 4276             L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
 4277             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
 4278         PTE_SYNC(csrc_pte);
 4279         *cdst_pte = L2_S_PROTO | dst |
 4280             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
 4281             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);       /* mini-data */
 4282         PTE_SYNC(cdst_pte);
 4283         cpu_tlb_flushD_SE(csrcp);
 4284         cpu_tlb_flushD_SE(cdstp);
 4285         cpu_cpwait();
 4286         bcopy_page(csrcp, cdstp);
 4287         mtx_unlock(&cmtx);
 4288         xscale_cache_clean_minidata();
 4289 }
 4290 
 4291 void
 4292 pmap_copy_page_offs_xscale(vm_paddr_t a_phys, vm_offset_t a_offs,
 4293     vm_paddr_t b_phys, vm_offset_t b_offs, int cnt)
 4294 {
 4295 
 4296         mtx_lock(&cmtx);
 4297         *csrc_pte = L2_S_PROTO | a_phys |
 4298             L2_S_PROT(PTE_KERNEL, VM_PROT_READ) |
 4299             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
 4300         PTE_SYNC(csrc_pte);
 4301         *cdst_pte = L2_S_PROTO | b_phys |
 4302             L2_S_PROT(PTE_KERNEL, VM_PROT_WRITE) |
 4303             L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X);
 4304         PTE_SYNC(cdst_pte);
 4305         cpu_tlb_flushD_SE(csrcp);
 4306         cpu_tlb_flushD_SE(cdstp);
 4307         cpu_cpwait();
 4308         bcopy((char *)csrcp + a_offs, (char *)cdstp + b_offs, cnt);
 4309         mtx_unlock(&cmtx);
 4310         xscale_cache_clean_minidata();
 4311 }
 4312 #endif /* ARM_MMU_XSCALE == 1 */
 4313 
 4314 void
 4315 pmap_copy_page(vm_page_t src, vm_page_t dst)
 4316 {
 4317 
 4318         cpu_dcache_wbinv_all();
 4319         cpu_l2cache_wbinv_all();
 4320         if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size &&
 4321             _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst),
 4322             (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0)
 4323                 return;
 4324         pmap_copy_page_func(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
 4325 }
 4326 
 4327 /*
 4328  * We have code to do unmapped I/O. However, it isn't quite right and
 4329  * causes un-page-aligned I/O to devices to fail (most notably newfs
 4330  * or fsck). We give up a little performance to not allow unmapped I/O
 4331  * to gain stability.
 4332  */
 4333 int unmapped_buf_allowed = 0;
 4334 
 4335 void
 4336 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
 4337     vm_offset_t b_offset, int xfersize)
 4338 {
 4339         vm_page_t a_pg, b_pg;
 4340         vm_offset_t a_pg_offset, b_pg_offset;
 4341         int cnt;
 4342 
 4343         cpu_dcache_wbinv_all();
 4344         cpu_l2cache_wbinv_all();
 4345         while (xfersize > 0) {
 4346                 a_pg = ma[a_offset >> PAGE_SHIFT];
 4347                 a_pg_offset = a_offset & PAGE_MASK;
 4348                 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 4349                 b_pg = mb[b_offset >> PAGE_SHIFT];
 4350                 b_pg_offset = b_offset & PAGE_MASK;
 4351                 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 4352                 pmap_copy_page_offs_func(VM_PAGE_TO_PHYS(a_pg), a_pg_offset,
 4353                     VM_PAGE_TO_PHYS(b_pg), b_pg_offset, cnt);
 4354                 xfersize -= cnt;
 4355                 a_offset += cnt;
 4356                 b_offset += cnt;
 4357         }
 4358 }
 4359 
 4360 /*
 4361  * this routine returns true if a physical page resides
 4362  * in the given pmap.
 4363  */
 4364 boolean_t
 4365 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 4366 {
 4367         pv_entry_t pv;
 4368         int loops = 0;
 4369         boolean_t rv;
 4370         
 4371         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4372             ("pmap_page_exists_quick: page %p is not managed", m));
 4373         rv = FALSE;
 4374         rw_wlock(&pvh_global_lock);
 4375         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 4376                 if (pv->pv_pmap == pmap) {
 4377                         rv = TRUE;
 4378                         break;
 4379                 }
 4380                 loops++;
 4381                 if (loops >= 16)
 4382                         break;
 4383         }
 4384         rw_wunlock(&pvh_global_lock);
 4385         return (rv);
 4386 }
 4387 
 4388 /*
 4389  *      pmap_page_wired_mappings:
 4390  *
 4391  *      Return the number of managed mappings to the given physical page
 4392  *      that are wired.
 4393  */
 4394 int
 4395 pmap_page_wired_mappings(vm_page_t m)
 4396 {
 4397         pv_entry_t pv;
 4398         int count;
 4399 
 4400         count = 0;
 4401         if ((m->oflags & VPO_UNMANAGED) != 0)
 4402                 return (count);
 4403         rw_wlock(&pvh_global_lock);
 4404         TAILQ_FOREACH(pv, &m->md.pv_list, pv_list)
 4405                 if ((pv->pv_flags & PVF_WIRED) != 0)
 4406                         count++;
 4407         rw_wunlock(&pvh_global_lock);
 4408         return (count);
 4409 }
 4410 
 4411 /*
 4412  *      This function is advisory.
 4413  */
 4414 void
 4415 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 4416 {
 4417 }
 4418 
 4419 /*
 4420  *      pmap_ts_referenced:
 4421  *
 4422  *      Return the count of reference bits for a page, clearing all of them.
 4423  */
 4424 int
 4425 pmap_ts_referenced(vm_page_t m)
 4426 {
 4427 
 4428         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4429             ("pmap_ts_referenced: page %p is not managed", m));
 4430         return (pmap_clearbit(m, PVF_REF));
 4431 }
 4432 
 4433 
 4434 boolean_t
 4435 pmap_is_modified(vm_page_t m)
 4436 {
 4437 
 4438         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4439             ("pmap_is_modified: page %p is not managed", m));
 4440         if (m->md.pvh_attrs & PVF_MOD)
 4441                 return (TRUE);
 4442         
 4443         return(FALSE);
 4444 }
 4445 
 4446 
 4447 /*
 4448  *      Clear the modify bits on the specified physical page.
 4449  */
 4450 void
 4451 pmap_clear_modify(vm_page_t m)
 4452 {
 4453 
 4454         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4455             ("pmap_clear_modify: page %p is not managed", m));
 4456         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4457         KASSERT(!vm_page_xbusied(m),
 4458             ("pmap_clear_modify: page %p is exclusive busied", m));
 4459 
 4460         /*
 4461          * If the page is not PGA_WRITEABLE, then no mappings can be modified.
 4462          * If the object containing the page is locked and the page is not
 4463          * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 4464          */
 4465         if ((m->aflags & PGA_WRITEABLE) == 0)
 4466                 return;
 4467         if (m->md.pvh_attrs & PVF_MOD)
 4468                 pmap_clearbit(m, PVF_MOD);
 4469 }
 4470 
 4471 
 4472 /*
 4473  *      pmap_is_referenced:
 4474  *
 4475  *      Return whether or not the specified physical page was referenced
 4476  *      in any physical maps.
 4477  */
 4478 boolean_t
 4479 pmap_is_referenced(vm_page_t m)
 4480 {
 4481 
 4482         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4483             ("pmap_is_referenced: page %p is not managed", m));
 4484         return ((m->md.pvh_attrs & PVF_REF) != 0);
 4485 }
 4486 
 4487 
 4488 /*
 4489  * Clear the write and modified bits in each of the given page's mappings.
 4490  */
 4491 void
 4492 pmap_remove_write(vm_page_t m)
 4493 {
 4494 
 4495         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4496             ("pmap_remove_write: page %p is not managed", m));
 4497 
 4498         /*
 4499          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 4500          * set by another thread while the object is locked.  Thus,
 4501          * if PGA_WRITEABLE is clear, no page table entries need updating.
 4502          */
 4503         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4504         if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0)
 4505                 pmap_clearbit(m, PVF_WRITE);
 4506 }
 4507 
 4508 
 4509 /*
 4510  * perform the pmap work for mincore
 4511  */
 4512 int
 4513 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 4514 {
 4515         struct l2_bucket *l2b;
 4516         pt_entry_t *ptep, pte;
 4517         vm_paddr_t pa;
 4518         vm_page_t m;
 4519         int val;
 4520         boolean_t managed;
 4521 
 4522         PMAP_LOCK(pmap);
 4523 retry:
 4524         l2b = pmap_get_l2_bucket(pmap, addr);
 4525         if (l2b == NULL) {
 4526                 val = 0;
 4527                 goto out;
 4528         }
 4529         ptep = &l2b->l2b_kva[l2pte_index(addr)];
 4530         pte = *ptep;
 4531         if (!l2pte_valid(pte)) {
 4532                 val = 0;
 4533                 goto out;
 4534         }
 4535         val = MINCORE_INCORE;
 4536         if (pte & L2_S_PROT_W)
 4537                 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4538         managed = false;
 4539         pa = l2pte_pa(pte);
 4540         m = PHYS_TO_VM_PAGE(pa);
 4541         if (m != NULL && !(m->oflags & VPO_UNMANAGED))
 4542                 managed = true;
 4543         if (managed) {
 4544                 /*
 4545                  * The ARM pmap tries to maintain a per-mapping
 4546                  * reference bit.  The trouble is that it's kept in
 4547                  * the PV entry, not the PTE, so it's costly to access
 4548                  * here.  You would need to acquire the pvh global
 4549                  * lock, call pmap_find_pv(), and introduce a custom
 4550                  * version of vm_page_pa_tryrelock() that releases and
 4551                  * reacquires the pvh global lock.  In the end, I
 4552                  * doubt it's worthwhile.  This may falsely report
 4553                  * the given address as referenced.
 4554                  */
 4555                 if ((m->md.pvh_attrs & PVF_REF) != 0)
 4556                         val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 4557         }
 4558         if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 4559             (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 4560                 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 4561                 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 4562                         goto retry;
 4563         } else
 4564 out:
 4565                 PA_UNLOCK_COND(*locked_pa);
 4566         PMAP_UNLOCK(pmap);
 4567         return (val);
 4568 }
 4569 
 4570 
 4571 void
 4572 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 4573 {
 4574 }
 4575 
 4576 
 4577 /*
 4578  *      Increase the starting virtual address of the given mapping if a
 4579  *      different alignment might result in more superpage mappings.
 4580  */
 4581 void
 4582 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
 4583     vm_offset_t *addr, vm_size_t size)
 4584 {
 4585 }
 4586 
 4587 #define BOOTSTRAP_DEBUG
 4588 
 4589 /*
 4590  * pmap_map_section:
 4591  *
 4592  *      Create a single section mapping.
 4593  */
 4594 void
 4595 pmap_map_section(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
 4596     int prot, int cache)
 4597 {
 4598         pd_entry_t *pde = (pd_entry_t *) l1pt;
 4599         pd_entry_t fl;
 4600 
 4601         KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("ouin2"));
 4602 
 4603         switch (cache) {
 4604         case PTE_NOCACHE:
 4605         default:
 4606                 fl = 0;
 4607                 break;
 4608 
 4609         case PTE_CACHE:
 4610                 fl = pte_l1_s_cache_mode;
 4611                 break;
 4612 
 4613         case PTE_PAGETABLE:
 4614                 fl = pte_l1_s_cache_mode_pt;
 4615                 break;
 4616         }
 4617 
 4618         pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
 4619             L1_S_PROT(PTE_KERNEL, prot) | fl | L1_S_DOM(PMAP_DOMAIN_KERNEL);
 4620         PTE_SYNC(&pde[va >> L1_S_SHIFT]);
 4621 
 4622 }
 4623 
 4624 /*
 4625  * pmap_link_l2pt:
 4626  *
 4627  *      Link the L2 page table specified by l2pv.pv_pa into the L1
 4628  *      page table at the slot for "va".
 4629  */
 4630 void
 4631 pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv)
 4632 {
 4633         pd_entry_t *pde = (pd_entry_t *) l1pt, proto;
 4634         u_int slot = va >> L1_S_SHIFT;
 4635 
 4636         proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO;
 4637 
 4638 #ifdef VERBOSE_INIT_ARM
 4639         printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va);
 4640 #endif
 4641 
 4642         pde[slot + 0] = proto | (l2pv->pv_pa + 0x000);
 4643 
 4644         PTE_SYNC(&pde[slot]);
 4645 
 4646         SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list);
 4647 
 4648         
 4649 }
 4650 
 4651 /*
 4652  * pmap_map_entry
 4653  *
 4654  *      Create a single page mapping.
 4655  */
 4656 void
 4657 pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot,
 4658     int cache)
 4659 {
 4660         pd_entry_t *pde = (pd_entry_t *) l1pt;
 4661         pt_entry_t fl;
 4662         pt_entry_t *pte;
 4663 
 4664         KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin"));
 4665 
 4666         switch (cache) {
 4667         case PTE_NOCACHE:
 4668         default:
 4669                 fl = 0;
 4670                 break;
 4671 
 4672         case PTE_CACHE:
 4673                 fl = pte_l2_s_cache_mode;
 4674                 break;
 4675 
 4676         case PTE_PAGETABLE:
 4677                 fl = pte_l2_s_cache_mode_pt;
 4678                 break;
 4679         }
 4680 
 4681         if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
 4682                 panic("pmap_map_entry: no L2 table for VA 0x%08x", va);
 4683 
 4684         pte = (pt_entry_t *) kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK);
 4685 
 4686         if (pte == NULL)
 4687                 panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va);
 4688 
 4689         pte[l2pte_index(va)] =
 4690             L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | fl;
 4691         PTE_SYNC(&pte[l2pte_index(va)]);
 4692 }
 4693 
 4694 /*
 4695  * pmap_map_chunk:
 4696  *
 4697  *      Map a chunk of memory using the most efficient mappings
 4698  *      possible (section. large page, small page) into the
 4699  *      provided L1 and L2 tables at the specified virtual address.
 4700  */
 4701 vm_size_t
 4702 pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa,
 4703     vm_size_t size, int prot, int cache)
 4704 {
 4705         pd_entry_t *pde = (pd_entry_t *) l1pt;
 4706         pt_entry_t *pte, f1, f2s, f2l;
 4707         vm_size_t resid;
 4708         int i;
 4709 
 4710         resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
 4711 
 4712         if (l1pt == 0)
 4713                 panic("pmap_map_chunk: no L1 table provided");
 4714 
 4715 #ifdef VERBOSE_INIT_ARM
 4716         printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x "
 4717             "prot=0x%x cache=%d\n", pa, va, size, resid, prot, cache);
 4718 #endif
 4719 
 4720         switch (cache) {
 4721         case PTE_NOCACHE:
 4722         default:
 4723                 f1 = 0;
 4724                 f2l = 0;
 4725                 f2s = 0;
 4726                 break;
 4727 
 4728         case PTE_CACHE:
 4729                 f1 = pte_l1_s_cache_mode;
 4730                 f2l = pte_l2_l_cache_mode;
 4731                 f2s = pte_l2_s_cache_mode;
 4732                 break;
 4733 
 4734         case PTE_PAGETABLE:
 4735                 f1 = pte_l1_s_cache_mode_pt;
 4736                 f2l = pte_l2_l_cache_mode_pt;
 4737                 f2s = pte_l2_s_cache_mode_pt;
 4738                 break;
 4739         }
 4740 
 4741         size = resid;
 4742 
 4743         while (resid > 0) {
 4744                 /* See if we can use a section mapping. */
 4745                 if (L1_S_MAPPABLE_P(va, pa, resid)) {
 4746 #ifdef VERBOSE_INIT_ARM
 4747                         printf("S");
 4748 #endif
 4749                         pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa |
 4750                             L1_S_PROT(PTE_KERNEL, prot) | f1 |
 4751                             L1_S_DOM(PMAP_DOMAIN_KERNEL);
 4752                         PTE_SYNC(&pde[va >> L1_S_SHIFT]);
 4753                         va += L1_S_SIZE;
 4754                         pa += L1_S_SIZE;
 4755                         resid -= L1_S_SIZE;
 4756                         continue;
 4757                 }
 4758 
 4759                 /*
 4760                  * Ok, we're going to use an L2 table.  Make sure
 4761                  * one is actually in the corresponding L1 slot
 4762                  * for the current VA.
 4763                  */
 4764                 if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C)
 4765                         panic("pmap_map_chunk: no L2 table for VA 0x%08x", va);
 4766 
 4767                 pte = (pt_entry_t *) kernel_pt_lookup(
 4768                     pde[L1_IDX(va)] & L1_C_ADDR_MASK);
 4769                 if (pte == NULL)
 4770                         panic("pmap_map_chunk: can't find L2 table for VA"
 4771                             "0x%08x", va);
 4772                 /* See if we can use a L2 large page mapping. */
 4773                 if (L2_L_MAPPABLE_P(va, pa, resid)) {
 4774 #ifdef VERBOSE_INIT_ARM
 4775                         printf("L");
 4776 #endif
 4777                         for (i = 0; i < 16; i++) {
 4778                                 pte[l2pte_index(va) + i] =
 4779                                     L2_L_PROTO | pa |
 4780                                     L2_L_PROT(PTE_KERNEL, prot) | f2l;
 4781                                 PTE_SYNC(&pte[l2pte_index(va) + i]);
 4782                         }
 4783                         va += L2_L_SIZE;
 4784                         pa += L2_L_SIZE;
 4785                         resid -= L2_L_SIZE;
 4786                         continue;
 4787                 }
 4788 
 4789                 /* Use a small page mapping. */
 4790 #ifdef VERBOSE_INIT_ARM
 4791                 printf("P");
 4792 #endif
 4793                 pte[l2pte_index(va)] =
 4794                     L2_S_PROTO | pa | L2_S_PROT(PTE_KERNEL, prot) | f2s;
 4795                 PTE_SYNC(&pte[l2pte_index(va)]);
 4796                 va += PAGE_SIZE;
 4797                 pa += PAGE_SIZE;
 4798                 resid -= PAGE_SIZE;
 4799         }
 4800 #ifdef VERBOSE_INIT_ARM
 4801         printf("\n");
 4802 #endif
 4803         return (size);
 4804 
 4805 }
 4806 
 4807 void
 4808 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 4809 {
 4810         /* 
 4811          * Remember the memattr in a field that gets used to set the appropriate
 4812          * bits in the PTEs as mappings are established.
 4813          */
 4814         m->md.pv_memattr = ma;
 4815 
 4816         /*
 4817          * It appears that this function can only be called before any mappings
 4818          * for the page are established on ARM.  If this ever changes, this code
 4819          * will need to walk the pv_list and make each of the existing mappings
 4820          * uncacheable, being careful to sync caches and PTEs (and maybe
 4821          * invalidate TLB?) for any current mapping it modifies.
 4822          */
 4823         if (m->md.pv_kva != 0 || TAILQ_FIRST(&m->md.pv_list) != NULL)
 4824                 panic("Can't change memattr on page with existing mappings");
 4825 }
 4826 
 4827
Cache object: 983e9355fa42f36a0a1122c15c38e4e5
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/arm/arm/pmap.c

FreeBSD/Linux Kernel Cross Reference
sys/arm/arm/pmap.c