The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/riscv/riscv/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-4-Clause
    3  *
    4  * Copyright (c) 1991 Regents of the University of California.
    5  * All rights reserved.
    6  * Copyright (c) 1994 John S. Dyson
    7  * All rights reserved.
    8  * Copyright (c) 1994 David Greenman
    9  * All rights reserved.
   10  * Copyright (c) 2003 Peter Wemm
   11  * All rights reserved.
   12  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
   13  * All rights reserved.
   14  * Copyright (c) 2014 Andrew Turner
   15  * All rights reserved.
   16  * Copyright (c) 2014 The FreeBSD Foundation
   17  * All rights reserved.
   18  * Copyright (c) 2015-2018 Ruslan Bukin <br@bsdpad.com>
   19  * All rights reserved.
   20  *
   21  * This code is derived from software contributed to Berkeley by
   22  * the Systems Programming Group of the University of Utah Computer
   23  * Science Department and William Jolitz of UUNET Technologies Inc.
   24  *
   25  * Portions of this software were developed by Andrew Turner under
   26  * sponsorship from The FreeBSD Foundation.
   27  *
   28  * Portions of this software were developed by SRI International and the
   29  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
   30  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
   31  *
   32  * Portions of this software were developed by the University of Cambridge
   33  * Computer Laboratory as part of the CTSRD Project, with support from the
   34  * UK Higher Education Innovation Fund (HEIF).
   35  *
   36  * Redistribution and use in source and binary forms, with or without
   37  * modification, are permitted provided that the following conditions
   38  * are met:
   39  * 1. Redistributions of source code must retain the above copyright
   40  *    notice, this list of conditions and the following disclaimer.
   41  * 2. Redistributions in binary form must reproduce the above copyright
   42  *    notice, this list of conditions and the following disclaimer in the
   43  *    documentation and/or other materials provided with the distribution.
   44  * 3. All advertising materials mentioning features or use of this software
   45  *    must display the following acknowledgement:
   46  *      This product includes software developed by the University of
   47  *      California, Berkeley and its contributors.
   48  * 4. Neither the name of the University nor the names of its contributors
   49  *    may be used to endorse or promote products derived from this software
   50  *    without specific prior written permission.
   51  *
   52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   62  * SUCH DAMAGE.
   63  *
   64  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   65  */
   66 /*-
   67  * Copyright (c) 2003 Networks Associates Technology, Inc.
   68  * All rights reserved.
   69  *
   70  * This software was developed for the FreeBSD Project by Jake Burkholder,
   71  * Safeport Network Services, and Network Associates Laboratories, the
   72  * Security Research Division of Network Associates, Inc. under
   73  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   74  * CHATS research program.
   75  *
   76  * Redistribution and use in source and binary forms, with or without
   77  * modification, are permitted provided that the following conditions
   78  * are met:
   79  * 1. Redistributions of source code must retain the above copyright
   80  *    notice, this list of conditions and the following disclaimer.
   81  * 2. Redistributions in binary form must reproduce the above copyright
   82  *    notice, this list of conditions and the following disclaimer in the
   83  *    documentation and/or other materials provided with the distribution.
   84  *
   85  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   86  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   87  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   88  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   89  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   90  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   91  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   92  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   93  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   94  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   95  * SUCH DAMAGE.
   96  */
   97 
   98 #include <sys/cdefs.h>
   99 __FBSDID("$FreeBSD$");
  100 
  101 /*
  102  *      Manages physical address maps.
  103  *
  104  *      Since the information managed by this module is
  105  *      also stored by the logical address mapping module,
  106  *      this module may throw away valid virtual-to-physical
  107  *      mappings at almost any time.  However, invalidations
  108  *      of virtual-to-physical mappings must be done as
  109  *      requested.
  110  *
  111  *      In order to cope with hardware architectures which
  112  *      make virtual-to-physical map invalidates expensive,
  113  *      this module may delay invalidate or reduced protection
  114  *      operations until such time as they are actually
  115  *      necessary.  This module is given full information as
  116  *      to which processors are currently using which maps,
  117  *      and to when physical maps must be made correct.
  118  */
  119 
  120 #include <sys/param.h>
  121 #include <sys/systm.h>
  122 #include <sys/bitstring.h>
  123 #include <sys/bus.h>
  124 #include <sys/cpuset.h>
  125 #include <sys/kernel.h>
  126 #include <sys/ktr.h>
  127 #include <sys/lock.h>
  128 #include <sys/malloc.h>
  129 #include <sys/mman.h>
  130 #include <sys/msgbuf.h>
  131 #include <sys/mutex.h>
  132 #include <sys/proc.h>
  133 #include <sys/rwlock.h>
  134 #include <sys/sx.h>
  135 #include <sys/vmem.h>
  136 #include <sys/vmmeter.h>
  137 #include <sys/sched.h>
  138 #include <sys/sysctl.h>
  139 #include <sys/smp.h>
  140 
  141 #include <vm/vm.h>
  142 #include <vm/vm_param.h>
  143 #include <vm/vm_kern.h>
  144 #include <vm/vm_page.h>
  145 #include <vm/vm_map.h>
  146 #include <vm/vm_object.h>
  147 #include <vm/vm_extern.h>
  148 #include <vm/vm_pageout.h>
  149 #include <vm/vm_pager.h>
  150 #include <vm/vm_phys.h>
  151 #include <vm/vm_radix.h>
  152 #include <vm/vm_reserv.h>
  153 #include <vm/uma.h>
  154 
  155 #include <machine/machdep.h>
  156 #include <machine/md_var.h>
  157 #include <machine/pcb.h>
  158 #include <machine/sbi.h>
  159 
  160 #define NUL1E           (Ln_ENTRIES * Ln_ENTRIES)
  161 #define NUL2E           (Ln_ENTRIES * NUL1E)
  162 
  163 #if !defined(DIAGNOSTIC)
  164 #ifdef __GNUC_GNU_INLINE__
  165 #define PMAP_INLINE     __attribute__((__gnu_inline__)) inline
  166 #else
  167 #define PMAP_INLINE     extern inline
  168 #endif
  169 #else
  170 #define PMAP_INLINE
  171 #endif
  172 
  173 #ifdef PV_STATS
  174 #define PV_STAT(x)      do { x ; } while (0)
  175 #else
  176 #define PV_STAT(x)      do { } while (0)
  177 #endif
  178 
  179 #define pmap_l2_pindex(v)       ((v) >> L2_SHIFT)
  180 #define pa_to_pvh(pa)           (&pv_table[pa_index(pa)])
  181 
  182 #define NPV_LIST_LOCKS  MAXCPU
  183 
  184 #define PHYS_TO_PV_LIST_LOCK(pa)        \
  185                         (&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS])
  186 
  187 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {    \
  188         struct rwlock **_lockp = (lockp);               \
  189         struct rwlock *_new_lock;                       \
  190                                                         \
  191         _new_lock = PHYS_TO_PV_LIST_LOCK(pa);           \
  192         if (_new_lock != *_lockp) {                     \
  193                 if (*_lockp != NULL)                    \
  194                         rw_wunlock(*_lockp);            \
  195                 *_lockp = _new_lock;                    \
  196                 rw_wlock(*_lockp);                      \
  197         }                                               \
  198 } while (0)
  199 
  200 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)        \
  201                         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
  202 
  203 #define RELEASE_PV_LIST_LOCK(lockp)             do {    \
  204         struct rwlock **_lockp = (lockp);               \
  205                                                         \
  206         if (*_lockp != NULL) {                          \
  207                 rw_wunlock(*_lockp);                    \
  208                 *_lockp = NULL;                         \
  209         }                                               \
  210 } while (0)
  211 
  212 #define VM_PAGE_TO_PV_LIST_LOCK(m)      \
  213                         PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
  214 
  215 /* The list of all the user pmaps */
  216 LIST_HEAD(pmaplist, pmap);
  217 static struct pmaplist allpmaps = LIST_HEAD_INITIALIZER();
  218 
  219 struct pmap kernel_pmap_store;
  220 
  221 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  222 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  223 vm_offset_t kernel_vm_end = 0;
  224 
  225 vm_paddr_t dmap_phys_base;      /* The start of the dmap region */
  226 vm_paddr_t dmap_phys_max;       /* The limit of the dmap region */
  227 vm_offset_t dmap_max_addr;      /* The virtual address limit of the dmap */
  228 
  229 /* This code assumes all L1 DMAP entries will be used */
  230 CTASSERT((DMAP_MIN_ADDRESS  & ~L1_OFFSET) == DMAP_MIN_ADDRESS);
  231 CTASSERT((DMAP_MAX_ADDRESS  & ~L1_OFFSET) == DMAP_MAX_ADDRESS);
  232 
  233 static struct rwlock_padalign pvh_global_lock;
  234 static struct mtx_padalign allpmaps_lock;
  235 
  236 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0,
  237     "VM/pmap parameters");
  238 
  239 static int superpages_enabled = 1;
  240 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
  241     CTLFLAG_RDTUN, &superpages_enabled, 0,
  242     "Enable support for transparent superpages");
  243 
  244 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0,
  245     "2MB page mapping counters");
  246 
  247 static u_long pmap_l2_demotions;
  248 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
  249     &pmap_l2_demotions, 0,
  250     "2MB page demotions");
  251 
  252 static u_long pmap_l2_mappings;
  253 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD,
  254     &pmap_l2_mappings, 0,
  255     "2MB page mappings");
  256 
  257 static u_long pmap_l2_p_failures;
  258 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
  259     &pmap_l2_p_failures, 0,
  260     "2MB page promotion failures");
  261 
  262 static u_long pmap_l2_promotions;
  263 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
  264     &pmap_l2_promotions, 0,
  265     "2MB page promotions");
  266 
  267 /*
  268  * Data for the pv entry allocation mechanism
  269  */
  270 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
  271 static struct mtx pv_chunks_mutex;
  272 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
  273 static struct md_page *pv_table;
  274 static struct md_page pv_dummy;
  275 
  276 extern cpuset_t all_harts;
  277 
  278 /*
  279  * Internal flags for pmap_enter()'s helper functions.
  280  */
  281 #define PMAP_ENTER_NORECLAIM    0x1000000       /* Don't reclaim PV entries. */
  282 #define PMAP_ENTER_NOREPLACE    0x2000000       /* Don't replace mappings. */
  283 
  284 static void     free_pv_chunk(struct pv_chunk *pc);
  285 static void     free_pv_entry(pmap_t pmap, pv_entry_t pv);
  286 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
  287 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
  288 static void     pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
  289 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
  290                     vm_offset_t va);
  291 static bool     pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va);
  292 static bool     pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2,
  293                     vm_offset_t va, struct rwlock **lockp);
  294 static int      pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
  295                     u_int flags, vm_page_t m, struct rwlock **lockp);
  296 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
  297     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
  298 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
  299     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
  300 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
  301     vm_page_t m, struct rwlock **lockp);
  302 
  303 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
  304                 struct rwlock **lockp);
  305 
  306 static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
  307     struct spglist *free);
  308 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
  309 
  310 #define pmap_clear(pte)                 pmap_store(pte, 0)
  311 #define pmap_clear_bits(pte, bits)      atomic_clear_64(pte, bits)
  312 #define pmap_load_store(pte, entry)     atomic_swap_64(pte, entry)
  313 #define pmap_load_clear(pte)            pmap_load_store(pte, 0)
  314 #define pmap_load(pte)                  atomic_load_64(pte)
  315 #define pmap_store(pte, entry)          atomic_store_64(pte, entry)
  316 #define pmap_store_bits(pte, bits)      atomic_set_64(pte, bits)
  317 
  318 /********************/
  319 /* Inline functions */
  320 /********************/
  321 
  322 static __inline void
  323 pagecopy(void *s, void *d)
  324 {
  325 
  326         memcpy(d, s, PAGE_SIZE);
  327 }
  328 
  329 static __inline void
  330 pagezero(void *p)
  331 {
  332 
  333         bzero(p, PAGE_SIZE);
  334 }
  335 
  336 #define pmap_l1_index(va)       (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
  337 #define pmap_l2_index(va)       (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
  338 #define pmap_l3_index(va)       (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
  339 
  340 #define PTE_TO_PHYS(pte)        ((pte >> PTE_PPN0_S) * PAGE_SIZE)
  341 
  342 static __inline pd_entry_t *
  343 pmap_l1(pmap_t pmap, vm_offset_t va)
  344 {
  345 
  346         return (&pmap->pm_l1[pmap_l1_index(va)]);
  347 }
  348 
  349 static __inline pd_entry_t *
  350 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
  351 {
  352         vm_paddr_t phys;
  353         pd_entry_t *l2;
  354 
  355         phys = PTE_TO_PHYS(pmap_load(l1));
  356         l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
  357 
  358         return (&l2[pmap_l2_index(va)]);
  359 }
  360 
  361 static __inline pd_entry_t *
  362 pmap_l2(pmap_t pmap, vm_offset_t va)
  363 {
  364         pd_entry_t *l1;
  365 
  366         l1 = pmap_l1(pmap, va);
  367         if ((pmap_load(l1) & PTE_V) == 0)
  368                 return (NULL);
  369         if ((pmap_load(l1) & PTE_RX) != 0)
  370                 return (NULL);
  371 
  372         return (pmap_l1_to_l2(l1, va));
  373 }
  374 
  375 static __inline pt_entry_t *
  376 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
  377 {
  378         vm_paddr_t phys;
  379         pt_entry_t *l3;
  380 
  381         phys = PTE_TO_PHYS(pmap_load(l2));
  382         l3 = (pd_entry_t *)PHYS_TO_DMAP(phys);
  383 
  384         return (&l3[pmap_l3_index(va)]);
  385 }
  386 
  387 static __inline pt_entry_t *
  388 pmap_l3(pmap_t pmap, vm_offset_t va)
  389 {
  390         pd_entry_t *l2;
  391 
  392         l2 = pmap_l2(pmap, va);
  393         if (l2 == NULL)
  394                 return (NULL);
  395         if ((pmap_load(l2) & PTE_V) == 0)
  396                 return (NULL);
  397         if ((pmap_load(l2) & PTE_RX) != 0)
  398                 return (NULL);
  399 
  400         return (pmap_l2_to_l3(l2, va));
  401 }
  402 
  403 static __inline void
  404 pmap_resident_count_inc(pmap_t pmap, int count)
  405 {
  406 
  407         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  408         pmap->pm_stats.resident_count += count;
  409 }
  410 
  411 static __inline void
  412 pmap_resident_count_dec(pmap_t pmap, int count)
  413 {
  414 
  415         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  416         KASSERT(pmap->pm_stats.resident_count >= count,
  417             ("pmap %p resident count underflow %ld %d", pmap,
  418             pmap->pm_stats.resident_count, count));
  419         pmap->pm_stats.resident_count -= count;
  420 }
  421 
  422 static void
  423 pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
  424     pt_entry_t entry)
  425 {
  426         struct pmap *user_pmap;
  427         pd_entry_t *l1;
  428 
  429         /* Distribute new kernel L1 entry to all the user pmaps */
  430         if (pmap != kernel_pmap)
  431                 return;
  432 
  433         mtx_lock(&allpmaps_lock);
  434         LIST_FOREACH(user_pmap, &allpmaps, pm_list) {
  435                 l1 = &user_pmap->pm_l1[l1index];
  436                 pmap_store(l1, entry);
  437         }
  438         mtx_unlock(&allpmaps_lock);
  439 }
  440 
  441 static pt_entry_t *
  442 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
  443     u_int *l2_slot)
  444 {
  445         pt_entry_t *l2;
  446         pd_entry_t *l1;
  447 
  448         l1 = (pd_entry_t *)l1pt;
  449         *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
  450 
  451         /* Check locore has used a table L1 map */
  452         KASSERT((l1[*l1_slot] & PTE_RX) == 0,
  453                 ("Invalid bootstrap L1 table"));
  454 
  455         /* Find the address of the L2 table */
  456         l2 = (pt_entry_t *)init_pt_va;
  457         *l2_slot = pmap_l2_index(va);
  458 
  459         return (l2);
  460 }
  461 
  462 static vm_paddr_t
  463 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
  464 {
  465         u_int l1_slot, l2_slot;
  466         pt_entry_t *l2;
  467         vm_paddr_t ret;
  468 
  469         l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
  470 
  471         /* Check locore has used L2 superpages */
  472         KASSERT((l2[l2_slot] & PTE_RX) != 0,
  473                 ("Invalid bootstrap L2 table"));
  474 
  475         /* L2 is superpages */
  476         ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT;
  477         ret += (va & L2_OFFSET);
  478 
  479         return (ret);
  480 }
  481 
  482 static void
  483 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
  484 {
  485         vm_offset_t va;
  486         vm_paddr_t pa;
  487         pd_entry_t *l1;
  488         u_int l1_slot;
  489         pt_entry_t entry;
  490         pn_t pn;
  491 
  492         pa = dmap_phys_base = min_pa & ~L1_OFFSET;
  493         va = DMAP_MIN_ADDRESS;
  494         l1 = (pd_entry_t *)kern_l1;
  495         l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
  496 
  497         for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
  498             pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
  499                 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
  500 
  501                 /* superpages */
  502                 pn = (pa / PAGE_SIZE);
  503                 entry = PTE_KERN;
  504                 entry |= (pn << PTE_PPN0_S);
  505                 pmap_store(&l1[l1_slot], entry);
  506         }
  507 
  508         /* Set the upper limit of the DMAP region */
  509         dmap_phys_max = pa;
  510         dmap_max_addr = va;
  511 
  512         sfence_vma();
  513 }
  514 
  515 static vm_offset_t
  516 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
  517 {
  518         vm_offset_t l3pt;
  519         pt_entry_t entry;
  520         pd_entry_t *l2;
  521         vm_paddr_t pa;
  522         u_int l2_slot;
  523         pn_t pn;
  524 
  525         KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
  526 
  527         l2 = pmap_l2(kernel_pmap, va);
  528         l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
  529         l2_slot = pmap_l2_index(va);
  530         l3pt = l3_start;
  531 
  532         for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
  533                 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
  534 
  535                 pa = pmap_early_vtophys(l1pt, l3pt);
  536                 pn = (pa / PAGE_SIZE);
  537                 entry = (PTE_V);
  538                 entry |= (pn << PTE_PPN0_S);
  539                 pmap_store(&l2[l2_slot], entry);
  540                 l3pt += PAGE_SIZE;
  541         }
  542 
  543 
  544         /* Clean the L2 page table */
  545         memset((void *)l3_start, 0, l3pt - l3_start);
  546 
  547         return (l3pt);
  548 }
  549 
  550 /*
  551  *      Bootstrap the system enough to run with virtual memory.
  552  */
  553 void
  554 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
  555 {
  556         u_int l1_slot, l2_slot, avail_slot, map_slot;
  557         vm_offset_t freemempos;
  558         vm_offset_t dpcpu, msgbufpv;
  559         vm_paddr_t end, max_pa, min_pa, pa, start;
  560         int i;
  561 
  562         printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
  563         printf("%lx\n", l1pt);
  564         printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
  565 
  566         /* Set this early so we can use the pagetable walking functions */
  567         kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
  568         PMAP_LOCK_INIT(kernel_pmap);
  569 
  570         rw_init(&pvh_global_lock, "pmap pv global");
  571 
  572         CPU_FILL(&kernel_pmap->pm_active);
  573 
  574         /* Assume the address we were loaded to is a valid physical address. */
  575         min_pa = max_pa = kernstart;
  576 
  577         /*
  578          * Find the minimum physical address. physmap is sorted,
  579          * but may contain empty ranges.
  580          */
  581         for (i = 0; i < physmap_idx * 2; i += 2) {
  582                 if (physmap[i] == physmap[i + 1])
  583                         continue;
  584                 if (physmap[i] <= min_pa)
  585                         min_pa = physmap[i];
  586                 if (physmap[i + 1] > max_pa)
  587                         max_pa = physmap[i + 1];
  588         }
  589         printf("physmap_idx %lx\n", physmap_idx);
  590         printf("min_pa %lx\n", min_pa);
  591         printf("max_pa %lx\n", max_pa);
  592 
  593         /* Create a direct map region early so we can use it for pa -> va */
  594         pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
  595 
  596         /*
  597          * Read the page table to find out what is already mapped.
  598          * This assumes we have mapped a block of memory from KERNBASE
  599          * using a single L1 entry.
  600          */
  601         (void)pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
  602 
  603         /* Sanity check the index, KERNBASE should be the first VA */
  604         KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
  605 
  606         freemempos = roundup2(KERNBASE + kernlen, PAGE_SIZE);
  607 
  608         /* Create the l3 tables for the early devmap */
  609         freemempos = pmap_bootstrap_l3(l1pt,
  610             VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
  611 
  612         sfence_vma();
  613 
  614 #define alloc_pages(var, np)                                            \
  615         (var) = freemempos;                                             \
  616         freemempos += (np * PAGE_SIZE);                                 \
  617         memset((char *)(var), 0, ((np) * PAGE_SIZE));
  618 
  619         /* Allocate dynamic per-cpu area. */
  620         alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
  621         dpcpu_init((void *)dpcpu, 0);
  622 
  623         /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
  624         alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
  625         msgbufp = (void *)msgbufpv;
  626 
  627         virtual_avail = roundup2(freemempos, L2_SIZE);
  628         virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
  629         kernel_vm_end = virtual_avail;
  630         
  631         pa = pmap_early_vtophys(l1pt, freemempos);
  632 
  633         /* Initialize phys_avail and dump_avail. */
  634         for (avail_slot = map_slot = physmem = 0; map_slot < physmap_idx * 2;
  635             map_slot += 2) {
  636                 start = physmap[map_slot];
  637                 end = physmap[map_slot + 1];
  638 
  639                 if (start == end)
  640                         continue;
  641                 dump_avail[map_slot] = start;
  642                 dump_avail[map_slot + 1] = end;
  643                 realmem += atop((vm_offset_t)(end - start));
  644 
  645                 if (start >= kernstart && end <= pa)
  646                         continue;
  647 
  648                 if (start < kernstart && end > kernstart)
  649                         end = kernstart;
  650                 else if (start < pa && end > pa)
  651                         start = pa;
  652                 phys_avail[avail_slot] = start;
  653                 phys_avail[avail_slot + 1] = end;
  654                 physmem += (end - start) >> PAGE_SHIFT;
  655                 avail_slot += 2;
  656 
  657                 if (end != physmap[map_slot + 1] && end > pa) {
  658                         phys_avail[avail_slot] = pa;
  659                         phys_avail[avail_slot + 1] = physmap[map_slot + 1];
  660                         physmem += (physmap[map_slot + 1] - pa) >> PAGE_SHIFT;
  661                         avail_slot += 2;
  662                 }
  663         }
  664         phys_avail[avail_slot] = 0;
  665         phys_avail[avail_slot + 1] = 0;
  666 
  667         /*
  668          * Maxmem isn't the "maximum memory", it's one larger than the
  669          * highest page of the physical address space.  It should be
  670          * called something like "Maxphyspage".
  671          */
  672         Maxmem = atop(phys_avail[avail_slot - 1]);
  673 }
  674 
  675 /*
  676  *      Initialize a vm_page's machine-dependent fields.
  677  */
  678 void
  679 pmap_page_init(vm_page_t m)
  680 {
  681 
  682         TAILQ_INIT(&m->md.pv_list);
  683         m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
  684 }
  685 
  686 /*
  687  *      Initialize the pmap module.
  688  *      Called by vm_init, to initialize any structures that the pmap
  689  *      system needs to map virtual memory.
  690  */
  691 void
  692 pmap_init(void)
  693 {
  694         vm_size_t s;
  695         int i, pv_npg;
  696 
  697         /*
  698          * Initialize the pv chunk and pmap list mutexes.
  699          */
  700         mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
  701         mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_DEF);
  702 
  703         /*
  704          * Initialize the pool of pv list locks.
  705          */
  706         for (i = 0; i < NPV_LIST_LOCKS; i++)
  707                 rw_init(&pv_list_locks[i], "pmap pv list");
  708 
  709         /*
  710          * Calculate the size of the pv head table for superpages.
  711          */
  712         pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
  713 
  714         /*
  715          * Allocate memory for the pv head table for superpages.
  716          */
  717         s = (vm_size_t)(pv_npg * sizeof(struct md_page));
  718         s = round_page(s);
  719         pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
  720         for (i = 0; i < pv_npg; i++)
  721                 TAILQ_INIT(&pv_table[i].pv_list);
  722         TAILQ_INIT(&pv_dummy.pv_list);
  723 
  724         if (superpages_enabled)
  725                 pagesizes[1] = L2_SIZE;
  726 }
  727 
  728 #ifdef SMP
  729 /*
  730  * For SMP, these functions have to use IPIs for coherence.
  731  *
  732  * In general, the calling thread uses a plain fence to order the
  733  * writes to the page tables before invoking an SBI callback to invoke
  734  * sfence_vma() on remote CPUs.
  735  */
  736 static void
  737 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  738 {
  739         cpuset_t mask;
  740 
  741         sched_pin();
  742         mask = pmap->pm_active;
  743         CPU_CLR(PCPU_GET(hart), &mask);
  744         fence();
  745         if (!CPU_EMPTY(&mask) && smp_started)
  746                 sbi_remote_sfence_vma(mask.__bits, va, 1);
  747         sfence_vma_page(va);
  748         sched_unpin();
  749 }
  750 
  751 static void
  752 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  753 {
  754         cpuset_t mask;
  755 
  756         sched_pin();
  757         mask = pmap->pm_active;
  758         CPU_CLR(PCPU_GET(hart), &mask);
  759         fence();
  760         if (!CPU_EMPTY(&mask) && smp_started)
  761                 sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
  762 
  763         /*
  764          * Might consider a loop of sfence_vma_page() for a small
  765          * number of pages in the future.
  766          */
  767         sfence_vma();
  768         sched_unpin();
  769 }
  770 
  771 static void
  772 pmap_invalidate_all(pmap_t pmap)
  773 {
  774         cpuset_t mask;
  775 
  776         sched_pin();
  777         mask = pmap->pm_active;
  778         CPU_CLR(PCPU_GET(hart), &mask);
  779 
  780         /*
  781          * XXX: The SBI doc doesn't detail how to specify x0 as the
  782          * address to perform a global fence.  BBL currently treats
  783          * all sfence_vma requests as global however.
  784          */
  785         fence();
  786         if (!CPU_EMPTY(&mask) && smp_started)
  787                 sbi_remote_sfence_vma(mask.__bits, 0, 0);
  788         sfence_vma();
  789         sched_unpin();
  790 }
  791 #else
  792 /*
  793  * Normal, non-SMP, invalidation functions.
  794  * We inline these within pmap.c for speed.
  795  */
  796 static __inline void
  797 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  798 {
  799 
  800         sfence_vma_page(va);
  801 }
  802 
  803 static __inline void
  804 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  805 {
  806 
  807         /*
  808          * Might consider a loop of sfence_vma_page() for a small
  809          * number of pages in the future.
  810          */
  811         sfence_vma();
  812 }
  813 
  814 static __inline void
  815 pmap_invalidate_all(pmap_t pmap)
  816 {
  817 
  818         sfence_vma();
  819 }
  820 #endif
  821 
  822 /*
  823  *      Routine:        pmap_extract
  824  *      Function:
  825  *              Extract the physical page address associated
  826  *              with the given map/virtual_address pair.
  827  */
  828 vm_paddr_t 
  829 pmap_extract(pmap_t pmap, vm_offset_t va)
  830 {
  831         pd_entry_t *l2p, l2;
  832         pt_entry_t *l3p, l3;
  833         vm_paddr_t pa;
  834 
  835         pa = 0;
  836         PMAP_LOCK(pmap);
  837         /*
  838          * Start with the l2 tabel. We are unable to allocate
  839          * pages in the l1 table.
  840          */
  841         l2p = pmap_l2(pmap, va);
  842         if (l2p != NULL) {
  843                 l2 = pmap_load(l2p);
  844                 if ((l2 & PTE_RX) == 0) {
  845                         l3p = pmap_l2_to_l3(l2p, va);
  846                         if (l3p != NULL) {
  847                                 l3 = pmap_load(l3p);
  848                                 pa = PTE_TO_PHYS(l3);
  849                                 pa |= (va & L3_OFFSET);
  850                         }
  851                 } else {
  852                         /* L2 is superpages */
  853                         pa = (l2 >> PTE_PPN1_S) << L2_SHIFT;
  854                         pa |= (va & L2_OFFSET);
  855                 }
  856         }
  857         PMAP_UNLOCK(pmap);
  858         return (pa);
  859 }
  860 
  861 /*
  862  *      Routine:        pmap_extract_and_hold
  863  *      Function:
  864  *              Atomically extract and hold the physical page
  865  *              with the given pmap and virtual address pair
  866  *              if that mapping permits the given protection.
  867  */
  868 vm_page_t
  869 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
  870 {
  871         pt_entry_t *l3p, l3;
  872         vm_paddr_t phys;
  873         vm_paddr_t pa;
  874         vm_page_t m;
  875 
  876         pa = 0;
  877         m = NULL;
  878         PMAP_LOCK(pmap);
  879 retry:
  880         l3p = pmap_l3(pmap, va);
  881         if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
  882                 if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) {
  883                         phys = PTE_TO_PHYS(l3);
  884                         if (vm_page_pa_tryrelock(pmap, phys, &pa))
  885                                 goto retry;
  886                         m = PHYS_TO_VM_PAGE(phys);
  887                         vm_page_hold(m);
  888                 }
  889         }
  890         PA_UNLOCK_COND(pa);
  891         PMAP_UNLOCK(pmap);
  892         return (m);
  893 }
  894 
  895 vm_paddr_t
  896 pmap_kextract(vm_offset_t va)
  897 {
  898         pd_entry_t *l2;
  899         pt_entry_t *l3;
  900         vm_paddr_t pa;
  901 
  902         if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
  903                 pa = DMAP_TO_PHYS(va);
  904         } else {
  905                 l2 = pmap_l2(kernel_pmap, va);
  906                 if (l2 == NULL)
  907                         panic("pmap_kextract: No l2");
  908                 if ((pmap_load(l2) & PTE_RX) != 0) {
  909                         /* superpages */
  910                         pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT;
  911                         pa |= (va & L2_OFFSET);
  912                         return (pa);
  913                 }
  914 
  915                 l3 = pmap_l2_to_l3(l2, va);
  916                 if (l3 == NULL)
  917                         panic("pmap_kextract: No l3...");
  918                 pa = PTE_TO_PHYS(pmap_load(l3));
  919                 pa |= (va & PAGE_MASK);
  920         }
  921         return (pa);
  922 }
  923 
  924 /***************************************************
  925  * Low level mapping routines.....
  926  ***************************************************/
  927 
  928 void
  929 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
  930 {
  931         pt_entry_t entry;
  932         pt_entry_t *l3;
  933         vm_offset_t va;
  934         pn_t pn;
  935 
  936         KASSERT((pa & L3_OFFSET) == 0,
  937            ("pmap_kenter_device: Invalid physical address"));
  938         KASSERT((sva & L3_OFFSET) == 0,
  939            ("pmap_kenter_device: Invalid virtual address"));
  940         KASSERT((size & PAGE_MASK) == 0,
  941             ("pmap_kenter_device: Mapping is not page-sized"));
  942 
  943         va = sva;
  944         while (size != 0) {
  945                 l3 = pmap_l3(kernel_pmap, va);
  946                 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
  947 
  948                 pn = (pa / PAGE_SIZE);
  949                 entry = PTE_KERN;
  950                 entry |= (pn << PTE_PPN0_S);
  951                 pmap_store(l3, entry);
  952 
  953                 va += PAGE_SIZE;
  954                 pa += PAGE_SIZE;
  955                 size -= PAGE_SIZE;
  956         }
  957         pmap_invalidate_range(kernel_pmap, sva, va);
  958 }
  959 
  960 /*
  961  * Remove a page from the kernel pagetables.
  962  * Note: not SMP coherent.
  963  */
  964 PMAP_INLINE void
  965 pmap_kremove(vm_offset_t va)
  966 {
  967         pt_entry_t *l3;
  968 
  969         l3 = pmap_l3(kernel_pmap, va);
  970         KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
  971 
  972         pmap_clear(l3);
  973         sfence_vma();
  974 }
  975 
  976 void
  977 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
  978 {
  979         pt_entry_t *l3;
  980         vm_offset_t va;
  981 
  982         KASSERT((sva & L3_OFFSET) == 0,
  983            ("pmap_kremove_device: Invalid virtual address"));
  984         KASSERT((size & PAGE_MASK) == 0,
  985             ("pmap_kremove_device: Mapping is not page-sized"));
  986 
  987         va = sva;
  988         while (size != 0) {
  989                 l3 = pmap_l3(kernel_pmap, va);
  990                 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
  991                 pmap_clear(l3);
  992 
  993                 va += PAGE_SIZE;
  994                 size -= PAGE_SIZE;
  995         }
  996 
  997         pmap_invalidate_range(kernel_pmap, sva, va);
  998 }
  999 
 1000 /*
 1001  *      Used to map a range of physical addresses into kernel
 1002  *      virtual address space.
 1003  *
 1004  *      The value passed in '*virt' is a suggested virtual address for
 1005  *      the mapping. Architectures which can support a direct-mapped
 1006  *      physical to virtual region can return the appropriate address
 1007  *      within that region, leaving '*virt' unchanged. Other
 1008  *      architectures should map the pages starting at '*virt' and
 1009  *      update '*virt' with the first usable address after the mapped
 1010  *      region.
 1011  */
 1012 vm_offset_t
 1013 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 1014 {
 1015 
 1016         return PHYS_TO_DMAP(start);
 1017 }
 1018 
 1019 
 1020 /*
 1021  * Add a list of wired pages to the kva
 1022  * this routine is only used for temporary
 1023  * kernel mappings that do not need to have
 1024  * page modification or references recorded.
 1025  * Note that old mappings are simply written
 1026  * over.  The page *must* be wired.
 1027  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1028  */
 1029 void
 1030 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 1031 {
 1032         pt_entry_t *l3, pa;
 1033         vm_offset_t va;
 1034         vm_page_t m;
 1035         pt_entry_t entry;
 1036         pn_t pn;
 1037         int i;
 1038 
 1039         va = sva;
 1040         for (i = 0; i < count; i++) {
 1041                 m = ma[i];
 1042                 pa = VM_PAGE_TO_PHYS(m);
 1043                 pn = (pa / PAGE_SIZE);
 1044                 l3 = pmap_l3(kernel_pmap, va);
 1045 
 1046                 entry = PTE_KERN;
 1047                 entry |= (pn << PTE_PPN0_S);
 1048                 pmap_store(l3, entry);
 1049 
 1050                 va += L3_SIZE;
 1051         }
 1052         pmap_invalidate_range(kernel_pmap, sva, va);
 1053 }
 1054 
 1055 /*
 1056  * This routine tears out page mappings from the
 1057  * kernel -- it is meant only for temporary mappings.
 1058  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1059  */
 1060 void
 1061 pmap_qremove(vm_offset_t sva, int count)
 1062 {
 1063         pt_entry_t *l3;
 1064         vm_offset_t va;
 1065 
 1066         KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 1067 
 1068         for (va = sva; count-- > 0; va += PAGE_SIZE) {
 1069                 l3 = pmap_l3(kernel_pmap, va);
 1070                 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 1071                 pmap_clear(l3);
 1072         }
 1073         pmap_invalidate_range(kernel_pmap, sva, va);
 1074 }
 1075 
 1076 bool
 1077 pmap_ps_enabled(pmap_t pmap __unused)
 1078 {
 1079 
 1080         return (superpages_enabled);
 1081 }
 1082 
 1083 /***************************************************
 1084  * Page table page management routines.....
 1085  ***************************************************/
 1086 /*
 1087  * Schedule the specified unused page table page to be freed.  Specifically,
 1088  * add the page to the specified list of pages that will be released to the
 1089  * physical memory manager after the TLB has been updated.
 1090  */
 1091 static __inline void
 1092 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
 1093     boolean_t set_PG_ZERO)
 1094 {
 1095 
 1096         if (set_PG_ZERO)
 1097                 m->flags |= PG_ZERO;
 1098         else
 1099                 m->flags &= ~PG_ZERO;
 1100         SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 1101 }
 1102 
 1103 /*
 1104  * Inserts the specified page table page into the specified pmap's collection
 1105  * of idle page table pages.  Each of a pmap's page table pages is responsible
 1106  * for mapping a distinct range of virtual addresses.  The pmap's collection is
 1107  * ordered by this virtual address range.
 1108  *
 1109  * If "promoted" is false, then the page table page "ml3" must be zero filled.
 1110  */
 1111 static __inline int
 1112 pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3, bool promoted)
 1113 {
 1114 
 1115         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1116         ml3->valid = promoted ? VM_PAGE_BITS_ALL : 0;
 1117         return (vm_radix_insert(&pmap->pm_root, ml3));
 1118 }
 1119 
 1120 /*
 1121  * Removes the page table page mapping the specified virtual address from the
 1122  * specified pmap's collection of idle page table pages, and returns it.
 1123  * Otherwise, returns NULL if there is no page table page corresponding to the
 1124  * specified virtual address.
 1125  */
 1126 static __inline vm_page_t
 1127 pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
 1128 {
 1129 
 1130         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1131         return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
 1132 }
 1133         
 1134 /*
 1135  * Decrements a page table page's wire count, which is used to record the
 1136  * number of valid page table entries within the page.  If the wire count
 1137  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
 1138  * page table page was unmapped and FALSE otherwise.
 1139  */
 1140 static inline boolean_t
 1141 pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 1142 {
 1143 
 1144         --m->wire_count;
 1145         if (m->wire_count == 0) {
 1146                 _pmap_unwire_ptp(pmap, va, m, free);
 1147                 return (TRUE);
 1148         } else {
 1149                 return (FALSE);
 1150         }
 1151 }
 1152 
 1153 static void
 1154 _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 1155 {
 1156         vm_paddr_t phys;
 1157 
 1158         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1159         if (m->pindex >= NUL1E) {
 1160                 pd_entry_t *l1;
 1161                 l1 = pmap_l1(pmap, va);
 1162                 pmap_clear(l1);
 1163                 pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
 1164         } else {
 1165                 pd_entry_t *l2;
 1166                 l2 = pmap_l2(pmap, va);
 1167                 pmap_clear(l2);
 1168         }
 1169         pmap_resident_count_dec(pmap, 1);
 1170         if (m->pindex < NUL1E) {
 1171                 pd_entry_t *l1;
 1172                 vm_page_t pdpg;
 1173 
 1174                 l1 = pmap_l1(pmap, va);
 1175                 phys = PTE_TO_PHYS(pmap_load(l1));
 1176                 pdpg = PHYS_TO_VM_PAGE(phys);
 1177                 pmap_unwire_ptp(pmap, va, pdpg, free);
 1178         }
 1179         pmap_invalidate_page(pmap, va);
 1180 
 1181         vm_wire_sub(1);
 1182 
 1183         /* 
 1184          * Put page on a list so that it is released after
 1185          * *ALL* TLB shootdown is done
 1186          */
 1187         pmap_add_delayed_free_list(m, free, TRUE);
 1188 }
 1189 
 1190 /*
 1191  * After removing a page table entry, this routine is used to
 1192  * conditionally free the page, and manage the hold/wire counts.
 1193  */
 1194 static int
 1195 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
 1196     struct spglist *free)
 1197 {
 1198         vm_page_t mpte;
 1199 
 1200         if (va >= VM_MAXUSER_ADDRESS)
 1201                 return (0);
 1202         KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 1203         mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde));
 1204         return (pmap_unwire_ptp(pmap, va, mpte, free));
 1205 }
 1206 
 1207 void
 1208 pmap_pinit0(pmap_t pmap)
 1209 {
 1210 
 1211         PMAP_LOCK_INIT(pmap);
 1212         bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 1213         pmap->pm_l1 = kernel_pmap->pm_l1;
 1214         pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_l1) >> PAGE_SHIFT);
 1215         CPU_ZERO(&pmap->pm_active);
 1216         pmap_activate_boot(pmap);
 1217 }
 1218 
 1219 int
 1220 pmap_pinit(pmap_t pmap)
 1221 {
 1222         vm_paddr_t l1phys;
 1223         vm_page_t l1pt;
 1224 
 1225         /*
 1226          * allocate the l1 page
 1227          */
 1228         while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
 1229             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 1230                 vm_wait(NULL);
 1231 
 1232         l1phys = VM_PAGE_TO_PHYS(l1pt);
 1233         pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
 1234         pmap->pm_satp = SATP_MODE_SV39 | (l1phys >> PAGE_SHIFT);
 1235 
 1236         if ((l1pt->flags & PG_ZERO) == 0)
 1237                 pagezero(pmap->pm_l1);
 1238 
 1239         bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 1240 
 1241         CPU_ZERO(&pmap->pm_active);
 1242 
 1243         /* Install kernel pagetables */
 1244         memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE);
 1245 
 1246         /* Add to the list of all user pmaps */
 1247         mtx_lock(&allpmaps_lock);
 1248         LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 1249         mtx_unlock(&allpmaps_lock);
 1250 
 1251         vm_radix_init(&pmap->pm_root);
 1252 
 1253         return (1);
 1254 }
 1255 
 1256 /*
 1257  * This routine is called if the desired page table page does not exist.
 1258  *
 1259  * If page table page allocation fails, this routine may sleep before
 1260  * returning NULL.  It sleeps only if a lock pointer was given.
 1261  *
 1262  * Note: If a page allocation fails at page table level two or three,
 1263  * one or two pages may be held during the wait, only to be released
 1264  * afterwards.  This conservative approach is easily argued to avoid
 1265  * race conditions.
 1266  */
 1267 static vm_page_t
 1268 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 1269 {
 1270         vm_page_t m, /*pdppg, */pdpg;
 1271         pt_entry_t entry;
 1272         vm_paddr_t phys;
 1273         pn_t pn;
 1274 
 1275         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1276 
 1277         /*
 1278          * Allocate a page table page.
 1279          */
 1280         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1281             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1282                 if (lockp != NULL) {
 1283                         RELEASE_PV_LIST_LOCK(lockp);
 1284                         PMAP_UNLOCK(pmap);
 1285                         rw_runlock(&pvh_global_lock);
 1286                         vm_wait(NULL);
 1287                         rw_rlock(&pvh_global_lock);
 1288                         PMAP_LOCK(pmap);
 1289                 }
 1290 
 1291                 /*
 1292                  * Indicate the need to retry.  While waiting, the page table
 1293                  * page may have been allocated.
 1294                  */
 1295                 return (NULL);
 1296         }
 1297 
 1298         if ((m->flags & PG_ZERO) == 0)
 1299                 pmap_zero_page(m);
 1300 
 1301         /*
 1302          * Map the pagetable page into the process address space, if
 1303          * it isn't already there.
 1304          */
 1305 
 1306         if (ptepindex >= NUL1E) {
 1307                 pd_entry_t *l1;
 1308                 vm_pindex_t l1index;
 1309 
 1310                 l1index = ptepindex - NUL1E;
 1311                 l1 = &pmap->pm_l1[l1index];
 1312 
 1313                 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
 1314                 entry = (PTE_V);
 1315                 entry |= (pn << PTE_PPN0_S);
 1316                 pmap_store(l1, entry);
 1317                 pmap_distribute_l1(pmap, l1index, entry);
 1318         } else {
 1319                 vm_pindex_t l1index;
 1320                 pd_entry_t *l1, *l2;
 1321 
 1322                 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
 1323                 l1 = &pmap->pm_l1[l1index];
 1324                 if (pmap_load(l1) == 0) {
 1325                         /* recurse for allocating page dir */
 1326                         if (_pmap_alloc_l3(pmap, NUL1E + l1index,
 1327                             lockp) == NULL) {
 1328                                 vm_page_unwire_noq(m);
 1329                                 vm_page_free_zero(m);
 1330                                 return (NULL);
 1331                         }
 1332                 } else {
 1333                         phys = PTE_TO_PHYS(pmap_load(l1));
 1334                         pdpg = PHYS_TO_VM_PAGE(phys);
 1335                         pdpg->wire_count++;
 1336                 }
 1337 
 1338                 phys = PTE_TO_PHYS(pmap_load(l1));
 1339                 l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 1340                 l2 = &l2[ptepindex & Ln_ADDR_MASK];
 1341 
 1342                 pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
 1343                 entry = (PTE_V);
 1344                 entry |= (pn << PTE_PPN0_S);
 1345                 pmap_store(l2, entry);
 1346         }
 1347 
 1348         pmap_resident_count_inc(pmap, 1);
 1349 
 1350         return (m);
 1351 }
 1352 
 1353 static vm_page_t
 1354 pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 1355 {
 1356         pd_entry_t *l1;
 1357         vm_page_t l2pg;
 1358         vm_pindex_t l2pindex;
 1359 
 1360 retry:
 1361         l1 = pmap_l1(pmap, va);
 1362         if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) {
 1363                 /* Add a reference to the L2 page. */
 1364                 l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1)));
 1365                 l2pg->wire_count++;
 1366         } else {
 1367                 /* Allocate a L2 page. */
 1368                 l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
 1369                 l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
 1370                 if (l2pg == NULL && lockp != NULL)
 1371                         goto retry;
 1372         }
 1373         return (l2pg);
 1374 }
 1375 
 1376 static vm_page_t
 1377 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 1378 {
 1379         vm_pindex_t ptepindex;
 1380         pd_entry_t *l2;
 1381         vm_paddr_t phys;
 1382         vm_page_t m;
 1383 
 1384         /*
 1385          * Calculate pagetable page index
 1386          */
 1387         ptepindex = pmap_l2_pindex(va);
 1388 retry:
 1389         /*
 1390          * Get the page directory entry
 1391          */
 1392         l2 = pmap_l2(pmap, va);
 1393 
 1394         /*
 1395          * If the page table page is mapped, we just increment the
 1396          * hold count, and activate it.
 1397          */
 1398         if (l2 != NULL && pmap_load(l2) != 0) {
 1399                 phys = PTE_TO_PHYS(pmap_load(l2));
 1400                 m = PHYS_TO_VM_PAGE(phys);
 1401                 m->wire_count++;
 1402         } else {
 1403                 /*
 1404                  * Here if the pte page isn't mapped, or if it has been
 1405                  * deallocated.
 1406                  */
 1407                 m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 1408                 if (m == NULL && lockp != NULL)
 1409                         goto retry;
 1410         }
 1411         return (m);
 1412 }
 1413 
 1414 
 1415 /***************************************************
 1416  * Pmap allocation/deallocation routines.
 1417  ***************************************************/
 1418 
 1419 /*
 1420  * Release any resources held by the given physical map.
 1421  * Called when a pmap initialized by pmap_pinit is being released.
 1422  * Should only be called if the map contains no valid mappings.
 1423  */
 1424 void
 1425 pmap_release(pmap_t pmap)
 1426 {
 1427         vm_page_t m;
 1428 
 1429         KASSERT(pmap->pm_stats.resident_count == 0,
 1430             ("pmap_release: pmap resident count %ld != 0",
 1431             pmap->pm_stats.resident_count));
 1432         KASSERT(CPU_EMPTY(&pmap->pm_active),
 1433             ("releasing active pmap %p", pmap));
 1434 
 1435         mtx_lock(&allpmaps_lock);
 1436         LIST_REMOVE(pmap, pm_list);
 1437         mtx_unlock(&allpmaps_lock);
 1438 
 1439         m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
 1440         vm_page_unwire_noq(m);
 1441         vm_page_free(m);
 1442 }
 1443 
 1444 #if 0
 1445 static int
 1446 kvm_size(SYSCTL_HANDLER_ARGS)
 1447 {
 1448         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 1449 
 1450         return sysctl_handle_long(oidp, &ksize, 0, req);
 1451 }
 1452 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
 1453     0, 0, kvm_size, "LU", "Size of KVM");
 1454 
 1455 static int
 1456 kvm_free(SYSCTL_HANDLER_ARGS)
 1457 {
 1458         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1459 
 1460         return sysctl_handle_long(oidp, &kfree, 0, req);
 1461 }
 1462 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
 1463     0, 0, kvm_free, "LU", "Amount of KVM free");
 1464 #endif /* 0 */
 1465 
 1466 /*
 1467  * grow the number of kernel page table entries, if needed
 1468  */
 1469 void
 1470 pmap_growkernel(vm_offset_t addr)
 1471 {
 1472         vm_paddr_t paddr;
 1473         vm_page_t nkpg;
 1474         pd_entry_t *l1, *l2;
 1475         pt_entry_t entry;
 1476         pn_t pn;
 1477 
 1478         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 1479 
 1480         addr = roundup2(addr, L2_SIZE);
 1481         if (addr - 1 >= vm_map_max(kernel_map))
 1482                 addr = vm_map_max(kernel_map);
 1483         while (kernel_vm_end < addr) {
 1484                 l1 = pmap_l1(kernel_pmap, kernel_vm_end);
 1485                 if (pmap_load(l1) == 0) {
 1486                         /* We need a new PDP entry */
 1487                         nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 1488                             VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 1489                             VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 1490                         if (nkpg == NULL)
 1491                                 panic("pmap_growkernel: no memory to grow kernel");
 1492                         if ((nkpg->flags & PG_ZERO) == 0)
 1493                                 pmap_zero_page(nkpg);
 1494                         paddr = VM_PAGE_TO_PHYS(nkpg);
 1495 
 1496                         pn = (paddr / PAGE_SIZE);
 1497                         entry = (PTE_V);
 1498                         entry |= (pn << PTE_PPN0_S);
 1499                         pmap_store(l1, entry);
 1500                         pmap_distribute_l1(kernel_pmap,
 1501                             pmap_l1_index(kernel_vm_end), entry);
 1502                         continue; /* try again */
 1503                 }
 1504                 l2 = pmap_l1_to_l2(l1, kernel_vm_end);
 1505                 if ((pmap_load(l2) & PTE_V) != 0 &&
 1506                     (pmap_load(l2) & PTE_RWX) == 0) {
 1507                         kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 1508                         if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 1509                                 kernel_vm_end = vm_map_max(kernel_map);
 1510                                 break;
 1511                         }
 1512                         continue;
 1513                 }
 1514 
 1515                 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 1516                     VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1517                     VM_ALLOC_ZERO);
 1518                 if (nkpg == NULL)
 1519                         panic("pmap_growkernel: no memory to grow kernel");
 1520                 if ((nkpg->flags & PG_ZERO) == 0) {
 1521                         pmap_zero_page(nkpg);
 1522                 }
 1523                 paddr = VM_PAGE_TO_PHYS(nkpg);
 1524 
 1525                 pn = (paddr / PAGE_SIZE);
 1526                 entry = (PTE_V);
 1527                 entry |= (pn << PTE_PPN0_S);
 1528                 pmap_store(l2, entry);
 1529 
 1530                 pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 1531 
 1532                 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 1533                 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
 1534                         kernel_vm_end = vm_map_max(kernel_map);
 1535                         break;                       
 1536                 }
 1537         }
 1538 }
 1539 
 1540 
 1541 /***************************************************
 1542  * page management routines.
 1543  ***************************************************/
 1544 
 1545 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 1546 CTASSERT(_NPCM == 3);
 1547 CTASSERT(_NPCPV == 168);
 1548 
 1549 static __inline struct pv_chunk *
 1550 pv_to_chunk(pv_entry_t pv)
 1551 {
 1552 
 1553         return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 1554 }
 1555 
 1556 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 1557 
 1558 #define PC_FREE0        0xfffffffffffffffful
 1559 #define PC_FREE1        0xfffffffffffffffful
 1560 #define PC_FREE2        0x000000fffffffffful
 1561 
 1562 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 1563 
 1564 #if 0
 1565 #ifdef PV_STATS
 1566 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 1567 
 1568 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 1569         "Current number of pv entry chunks");
 1570 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 1571         "Current number of pv entry chunks allocated");
 1572 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 1573         "Current number of pv entry chunks frees");
 1574 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 1575         "Number of times tried to get a chunk page but failed.");
 1576 
 1577 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 1578 static int pv_entry_spare;
 1579 
 1580 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 1581         "Current number of pv entry frees");
 1582 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 1583         "Current number of pv entry allocs");
 1584 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 1585         "Current number of pv entries");
 1586 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 1587         "Current number of spare pv entries");
 1588 #endif
 1589 #endif /* 0 */
 1590 
 1591 /*
 1592  * We are in a serious low memory condition.  Resort to
 1593  * drastic measures to free some pages so we can allocate
 1594  * another pv entry chunk.
 1595  *
 1596  * Returns NULL if PV entries were reclaimed from the specified pmap.
 1597  *
 1598  * We do not, however, unmap 2mpages because subsequent accesses will
 1599  * allocate per-page pv entries until repromotion occurs, thereby
 1600  * exacerbating the shortage of free pv entries.
 1601  */
 1602 static vm_page_t
 1603 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 1604 {
 1605 
 1606         panic("RISCVTODO: reclaim_pv_chunk");
 1607 }
 1608 
 1609 /*
 1610  * free the pv_entry back to the free list
 1611  */
 1612 static void
 1613 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 1614 {
 1615         struct pv_chunk *pc;
 1616         int idx, field, bit;
 1617 
 1618         rw_assert(&pvh_global_lock, RA_LOCKED);
 1619         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1620         PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 1621         PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 1622         PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 1623         pc = pv_to_chunk(pv);
 1624         idx = pv - &pc->pc_pventry[0];
 1625         field = idx / 64;
 1626         bit = idx % 64;
 1627         pc->pc_map[field] |= 1ul << bit;
 1628         if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 1629             pc->pc_map[2] != PC_FREE2) {
 1630                 /* 98% of the time, pc is already at the head of the list. */
 1631                 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 1632                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 1633                         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 1634                 }
 1635                 return;
 1636         }
 1637         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 1638         free_pv_chunk(pc);
 1639 }
 1640 
 1641 static void
 1642 free_pv_chunk(struct pv_chunk *pc)
 1643 {
 1644         vm_page_t m;
 1645 
 1646         mtx_lock(&pv_chunks_mutex);
 1647         TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 1648         mtx_unlock(&pv_chunks_mutex);
 1649         PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 1650         PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 1651         PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 1652         /* entire chunk is free, return it */
 1653         m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 1654         dump_drop_page(m->phys_addr);
 1655         vm_page_unwire_noq(m);
 1656         vm_page_free(m);
 1657 }
 1658 
 1659 /*
 1660  * Returns a new PV entry, allocating a new PV chunk from the system when
 1661  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
 1662  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
 1663  * returned.
 1664  *
 1665  * The given PV list lock may be released.
 1666  */
 1667 static pv_entry_t
 1668 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 1669 {
 1670         int bit, field;
 1671         pv_entry_t pv;
 1672         struct pv_chunk *pc;
 1673         vm_page_t m;
 1674 
 1675         rw_assert(&pvh_global_lock, RA_LOCKED);
 1676         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1677         PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 1678 retry:
 1679         pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 1680         if (pc != NULL) {
 1681                 for (field = 0; field < _NPCM; field++) {
 1682                         if (pc->pc_map[field]) {
 1683                                 bit = ffsl(pc->pc_map[field]) - 1;
 1684                                 break;
 1685                         }
 1686                 }
 1687                 if (field < _NPCM) {
 1688                         pv = &pc->pc_pventry[field * 64 + bit];
 1689                         pc->pc_map[field] &= ~(1ul << bit);
 1690                         /* If this was the last item, move it to tail */
 1691                         if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 1692                             pc->pc_map[2] == 0) {
 1693                                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 1694                                 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 1695                                     pc_list);
 1696                         }
 1697                         PV_STAT(atomic_add_long(&pv_entry_count, 1));
 1698                         PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 1699                         return (pv);
 1700                 }
 1701         }
 1702         /* No free items, allocate another chunk */
 1703         m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 1704             VM_ALLOC_WIRED);
 1705         if (m == NULL) {
 1706                 if (lockp == NULL) {
 1707                         PV_STAT(pc_chunk_tryfail++);
 1708                         return (NULL);
 1709                 }
 1710                 m = reclaim_pv_chunk(pmap, lockp);
 1711                 if (m == NULL)
 1712                         goto retry;
 1713         }
 1714         PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 1715         PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 1716         dump_add_page(m->phys_addr);
 1717         pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 1718         pc->pc_pmap = pmap;
 1719         pc->pc_map[0] = PC_FREE0 & ~1ul;        /* preallocated bit 0 */
 1720         pc->pc_map[1] = PC_FREE1;
 1721         pc->pc_map[2] = PC_FREE2;
 1722         mtx_lock(&pv_chunks_mutex);
 1723         TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 1724         mtx_unlock(&pv_chunks_mutex);
 1725         pv = &pc->pc_pventry[0];
 1726         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 1727         PV_STAT(atomic_add_long(&pv_entry_count, 1));
 1728         PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 1729         return (pv);
 1730 }
 1731 
 1732 /*
 1733  * Ensure that the number of spare PV entries in the specified pmap meets or
 1734  * exceeds the given count, "needed".
 1735  *
 1736  * The given PV list lock may be released.
 1737  */
 1738 static void
 1739 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
 1740 {
 1741         struct pch new_tail;
 1742         struct pv_chunk *pc;
 1743         vm_page_t m;
 1744         int avail, free;
 1745         bool reclaimed;
 1746 
 1747         rw_assert(&pvh_global_lock, RA_LOCKED);
 1748         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1749         KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
 1750 
 1751         /*
 1752          * Newly allocated PV chunks must be stored in a private list until
 1753          * the required number of PV chunks have been allocated.  Otherwise,
 1754          * reclaim_pv_chunk() could recycle one of these chunks.  In
 1755          * contrast, these chunks must be added to the pmap upon allocation.
 1756          */
 1757         TAILQ_INIT(&new_tail);
 1758 retry:
 1759         avail = 0;
 1760         TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
 1761                 bit_count((bitstr_t *)pc->pc_map, 0,
 1762                     sizeof(pc->pc_map) * NBBY, &free);
 1763                 if (free == 0)
 1764                         break;
 1765                 avail += free;
 1766                 if (avail >= needed)
 1767                         break;
 1768         }
 1769         for (reclaimed = false; avail < needed; avail += _NPCPV) {
 1770                 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 1771                     VM_ALLOC_WIRED);
 1772                 if (m == NULL) {
 1773                         m = reclaim_pv_chunk(pmap, lockp);
 1774                         if (m == NULL)
 1775                                 goto retry;
 1776                         reclaimed = true;
 1777                 }
 1778                 /* XXX PV STATS */
 1779 #if 0
 1780                 dump_add_page(m->phys_addr);
 1781 #endif
 1782                 pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 1783                 pc->pc_pmap = pmap;
 1784                 pc->pc_map[0] = PC_FREE0;
 1785                 pc->pc_map[1] = PC_FREE1;
 1786                 pc->pc_map[2] = PC_FREE2;
 1787                 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 1788                 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 1789 
 1790                 /*
 1791                  * The reclaim might have freed a chunk from the current pmap.
 1792                  * If that chunk contained available entries, we need to
 1793                  * re-count the number of available entries.
 1794                  */
 1795                 if (reclaimed)
 1796                         goto retry;
 1797         }
 1798         if (!TAILQ_EMPTY(&new_tail)) {
 1799                 mtx_lock(&pv_chunks_mutex);
 1800                 TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 1801                 mtx_unlock(&pv_chunks_mutex);
 1802         }
 1803 }
 1804 
 1805 /*
 1806  * First find and then remove the pv entry for the specified pmap and virtual
 1807  * address from the specified pv list.  Returns the pv entry if found and NULL
 1808  * otherwise.  This operation can be performed on pv lists for either 4KB or
 1809  * 2MB page mappings.
 1810  */
 1811 static __inline pv_entry_t
 1812 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 1813 {
 1814         pv_entry_t pv;
 1815 
 1816         rw_assert(&pvh_global_lock, RA_LOCKED);
 1817         TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 1818                 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 1819                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 1820                         pvh->pv_gen++;
 1821                         break;
 1822                 }
 1823         }
 1824         return (pv);
 1825 }
 1826 
 1827 /*
 1828  * First find and then destroy the pv entry for the specified pmap and virtual
 1829  * address.  This operation can be performed on pv lists for either 4KB or 2MB
 1830  * page mappings.
 1831  */
 1832 static void
 1833 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 1834 {
 1835         pv_entry_t pv;
 1836 
 1837         pv = pmap_pvh_remove(pvh, pmap, va);
 1838 
 1839         KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va));
 1840         free_pv_entry(pmap, pv);
 1841 }
 1842 
 1843 /*
 1844  * Conditionally create the PV entry for a 4KB page mapping if the required
 1845  * memory can be allocated without resorting to reclamation.
 1846  */
 1847 static boolean_t
 1848 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
 1849     struct rwlock **lockp)
 1850 {
 1851         pv_entry_t pv;
 1852 
 1853         rw_assert(&pvh_global_lock, RA_LOCKED);
 1854         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1855         /* Pass NULL instead of the lock pointer to disable reclamation. */
 1856         if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 1857                 pv->pv_va = va;
 1858                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 1859                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 1860                 m->md.pv_gen++;
 1861                 return (TRUE);
 1862         } else
 1863                 return (FALSE);
 1864 }
 1865 
 1866 /*
 1867  * After demotion from a 2MB page mapping to 512 4KB page mappings,
 1868  * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
 1869  * entries for each of the 4KB page mappings.
 1870  */
 1871 static void __unused
 1872 pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 1873     struct rwlock **lockp)
 1874 {
 1875         struct md_page *pvh;
 1876         struct pv_chunk *pc;
 1877         pv_entry_t pv;
 1878         vm_page_t m;
 1879         vm_offset_t va_last;
 1880         int bit, field;
 1881 
 1882         rw_assert(&pvh_global_lock, RA_LOCKED);
 1883         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1884         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 1885 
 1886         /*
 1887          * Transfer the 2mpage's pv entry for this mapping to the first
 1888          * page's pv list.  Once this transfer begins, the pv list lock
 1889          * must not be released until the last pv entry is reinstantiated.
 1890          */
 1891         pvh = pa_to_pvh(pa);
 1892         va &= ~L2_OFFSET;
 1893         pv = pmap_pvh_remove(pvh, pmap, va);
 1894         KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
 1895         m = PHYS_TO_VM_PAGE(pa);
 1896         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 1897         m->md.pv_gen++;
 1898         /* Instantiate the remaining 511 pv entries. */
 1899         va_last = va + L2_SIZE - PAGE_SIZE;
 1900         for (;;) {
 1901                 pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 1902                 KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
 1903                     pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
 1904                 for (field = 0; field < _NPCM; field++) {
 1905                         while (pc->pc_map[field] != 0) {
 1906                                 bit = ffsl(pc->pc_map[field]) - 1;
 1907                                 pc->pc_map[field] &= ~(1ul << bit);
 1908                                 pv = &pc->pc_pventry[field * 64 + bit];
 1909                                 va += PAGE_SIZE;
 1910                                 pv->pv_va = va;
 1911                                 m++;
 1912                                 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 1913                             ("pmap_pv_demote_l2: page %p is not managed", m));
 1914                                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 1915                                 m->md.pv_gen++;
 1916                                 if (va == va_last)
 1917                                         goto out;
 1918                         }
 1919                 }
 1920                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 1921                 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 1922         }
 1923 out:
 1924         if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
 1925                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 1926                 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 1927         }
 1928         /* XXX PV stats */
 1929 }
 1930 
 1931 #if VM_NRESERVLEVEL > 0
 1932 static void
 1933 pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 1934     struct rwlock **lockp)
 1935 {
 1936         struct md_page *pvh;
 1937         pv_entry_t pv;
 1938         vm_page_t m;
 1939         vm_offset_t va_last;
 1940 
 1941         rw_assert(&pvh_global_lock, RA_LOCKED);
 1942         KASSERT((va & L2_OFFSET) == 0,
 1943             ("pmap_pv_promote_l2: misaligned va %#lx", va));
 1944 
 1945         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 1946 
 1947         m = PHYS_TO_VM_PAGE(pa);
 1948         pv = pmap_pvh_remove(&m->md, pmap, va);
 1949         KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va));
 1950         pvh = pa_to_pvh(pa);
 1951         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 1952         pvh->pv_gen++;
 1953 
 1954         va_last = va + L2_SIZE - PAGE_SIZE;
 1955         do {
 1956                 m++;
 1957                 va += PAGE_SIZE;
 1958                 pmap_pvh_free(&m->md, pmap, va);
 1959         } while (va < va_last);
 1960 }
 1961 #endif /* VM_NRESERVLEVEL > 0 */
 1962 
 1963 /*
 1964  * Create the PV entry for a 2MB page mapping.  Always returns true unless the
 1965  * flag PMAP_ENTER_NORECLAIM is specified.  If that flag is specified, returns
 1966  * false if the PV entry cannot be allocated without resorting to reclamation.
 1967  */
 1968 static bool
 1969 pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags,
 1970     struct rwlock **lockp)
 1971 {
 1972         struct md_page *pvh;
 1973         pv_entry_t pv;
 1974         vm_paddr_t pa;
 1975 
 1976         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1977         /* Pass NULL instead of the lock pointer to disable reclamation. */
 1978         if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
 1979             NULL : lockp)) == NULL)
 1980                 return (false);
 1981         pv->pv_va = va;
 1982         pa = PTE_TO_PHYS(l2e);
 1983         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 1984         pvh = pa_to_pvh(pa);
 1985         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 1986         pvh->pv_gen++;
 1987         return (true);
 1988 }
 1989 
 1990 static void
 1991 pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 1992 {
 1993         pt_entry_t newl2, oldl2;
 1994         vm_page_t ml3;
 1995         vm_paddr_t ml3pa;
 1996 
 1997         KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va));
 1998         KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
 1999         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2000 
 2001         ml3 = pmap_remove_pt_page(pmap, va);
 2002         if (ml3 == NULL)
 2003                 panic("pmap_remove_kernel_l2: Missing pt page");
 2004 
 2005         ml3pa = VM_PAGE_TO_PHYS(ml3);
 2006         newl2 = ml3pa | PTE_V;
 2007 
 2008         /*
 2009          * If this page table page was unmapped by a promotion, then it
 2010          * contains valid mappings.  Zero it to invalidate those mappings.
 2011          */
 2012         if (ml3->valid != 0)
 2013                 pagezero((void *)PHYS_TO_DMAP(ml3pa));
 2014 
 2015         /*
 2016          * Demote the mapping.
 2017          */
 2018         oldl2 = pmap_load_store(l2, newl2);
 2019         KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx",
 2020             __func__, l2, oldl2));
 2021 }
 2022 
 2023 /*
 2024  * pmap_remove_l2: Do the things to unmap a level 2 superpage.
 2025  */
 2026 static int
 2027 pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
 2028     pd_entry_t l1e, struct spglist *free, struct rwlock **lockp)
 2029 {
 2030         struct md_page *pvh;
 2031         pt_entry_t oldl2;
 2032         vm_offset_t eva, va;
 2033         vm_page_t m, ml3;
 2034 
 2035         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2036         KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
 2037         oldl2 = pmap_load_clear(l2);
 2038         KASSERT((oldl2 & PTE_RWX) != 0,
 2039             ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2));
 2040 
 2041         /*
 2042          * The sfence.vma documentation states that it is sufficient to specify
 2043          * a single address within a superpage mapping.  However, since we do
 2044          * not perform any invalidation upon promotion, TLBs may still be
 2045          * caching 4KB mappings within the superpage, so we must invalidate the
 2046          * entire range.
 2047          */
 2048         pmap_invalidate_range(pmap, sva, sva + L2_SIZE);
 2049         if ((oldl2 & PTE_SW_WIRED) != 0)
 2050                 pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
 2051         pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
 2052         if ((oldl2 & PTE_SW_MANAGED) != 0) {
 2053                 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2));
 2054                 pvh = pa_to_pvh(PTE_TO_PHYS(oldl2));
 2055                 pmap_pvh_free(pvh, pmap, sva);
 2056                 eva = sva + L2_SIZE;
 2057                 for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2));
 2058                     va < eva; va += PAGE_SIZE, m++) {
 2059                         if ((oldl2 & PTE_D) != 0)
 2060                                 vm_page_dirty(m);
 2061                         if ((oldl2 & PTE_A) != 0)
 2062                                 vm_page_aflag_set(m, PGA_REFERENCED);
 2063                         if (TAILQ_EMPTY(&m->md.pv_list) &&
 2064                             TAILQ_EMPTY(&pvh->pv_list))
 2065                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 2066                 }
 2067         }
 2068         if (pmap == kernel_pmap) {
 2069                 pmap_remove_kernel_l2(pmap, l2, sva);
 2070         } else {
 2071                 ml3 = pmap_remove_pt_page(pmap, sva);
 2072                 if (ml3 != NULL) {
 2073                         KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
 2074                             ("pmap_remove_l2: l3 page not promoted"));
 2075                         pmap_resident_count_dec(pmap, 1);
 2076                         KASSERT(ml3->wire_count == Ln_ENTRIES,
 2077                             ("pmap_remove_l2: l3 page wire count error"));
 2078                         ml3->wire_count = 1;
 2079                         vm_page_unwire_noq(ml3);
 2080                         pmap_add_delayed_free_list(ml3, free, FALSE);
 2081                 }
 2082         }
 2083         return (pmap_unuse_pt(pmap, sva, l1e, free));
 2084 }
 2085 
 2086 /*
 2087  * pmap_remove_l3: do the things to unmap a page in a process
 2088  */
 2089 static int
 2090 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 
 2091     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 2092 {
 2093         struct md_page *pvh;
 2094         pt_entry_t old_l3;
 2095         vm_paddr_t phys;
 2096         vm_page_t m;
 2097 
 2098         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2099         old_l3 = pmap_load_clear(l3);
 2100         pmap_invalidate_page(pmap, va);
 2101         if (old_l3 & PTE_SW_WIRED)
 2102                 pmap->pm_stats.wired_count -= 1;
 2103         pmap_resident_count_dec(pmap, 1);
 2104         if (old_l3 & PTE_SW_MANAGED) {
 2105                 phys = PTE_TO_PHYS(old_l3);
 2106                 m = PHYS_TO_VM_PAGE(phys);
 2107                 if ((old_l3 & PTE_D) != 0)
 2108                         vm_page_dirty(m);
 2109                 if (old_l3 & PTE_A)
 2110                         vm_page_aflag_set(m, PGA_REFERENCED);
 2111                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 2112                 pmap_pvh_free(&m->md, pmap, va);
 2113                 if (TAILQ_EMPTY(&m->md.pv_list) &&
 2114                     (m->flags & PG_FICTITIOUS) == 0) {
 2115                         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2116                         if (TAILQ_EMPTY(&pvh->pv_list))
 2117                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 2118                 }
 2119         }
 2120 
 2121         return (pmap_unuse_pt(pmap, va, l2e, free));
 2122 }
 2123 
 2124 /*
 2125  *      Remove the given range of addresses from the specified map.
 2126  *
 2127  *      It is assumed that the start and end are properly
 2128  *      rounded to the page size.
 2129  */
 2130 void
 2131 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2132 {
 2133         struct spglist free;
 2134         struct rwlock *lock;
 2135         vm_offset_t va, va_next;
 2136         pd_entry_t *l1, *l2, l2e;
 2137         pt_entry_t *l3;
 2138 
 2139         /*
 2140          * Perform an unsynchronized read.  This is, however, safe.
 2141          */
 2142         if (pmap->pm_stats.resident_count == 0)
 2143                 return;
 2144 
 2145         SLIST_INIT(&free);
 2146 
 2147         rw_rlock(&pvh_global_lock);
 2148         PMAP_LOCK(pmap);
 2149 
 2150         lock = NULL;
 2151         for (; sva < eva; sva = va_next) {
 2152                 if (pmap->pm_stats.resident_count == 0)
 2153                         break;
 2154 
 2155                 l1 = pmap_l1(pmap, sva);
 2156                 if (pmap_load(l1) == 0) {
 2157                         va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 2158                         if (va_next < sva)
 2159                                 va_next = eva;
 2160                         continue;
 2161                 }
 2162 
 2163                 /*
 2164                  * Calculate index for next page table.
 2165                  */
 2166                 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 2167                 if (va_next < sva)
 2168                         va_next = eva;
 2169 
 2170                 l2 = pmap_l1_to_l2(l1, sva);
 2171                 if (l2 == NULL)
 2172                         continue;
 2173                 if ((l2e = pmap_load(l2)) == 0)
 2174                         continue;
 2175                 if ((l2e & PTE_RWX) != 0) {
 2176                         if (sva + L2_SIZE == va_next && eva >= va_next) {
 2177                                 (void)pmap_remove_l2(pmap, l2, sva,
 2178                                     pmap_load(l1), &free, &lock);
 2179                                 continue;
 2180                         } else if (!pmap_demote_l2_locked(pmap, l2, sva,
 2181                             &lock)) {
 2182                                 /*
 2183                                  * The large page mapping was destroyed.
 2184                                  */
 2185                                 continue;
 2186                         }
 2187                         l2e = pmap_load(l2);
 2188                 }
 2189 
 2190                 /*
 2191                  * Limit our scan to either the end of the va represented
 2192                  * by the current page table page, or to the end of the
 2193                  * range being removed.
 2194                  */
 2195                 if (va_next > eva)
 2196                         va_next = eva;
 2197 
 2198                 va = va_next;
 2199                 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 2200                     sva += L3_SIZE) {
 2201                         if (pmap_load(l3) == 0) {
 2202                                 if (va != va_next) {
 2203                                         pmap_invalidate_range(pmap, va, sva);
 2204                                         va = va_next;
 2205                                 }
 2206                                 continue;
 2207                         }
 2208                         if (va == va_next)
 2209                                 va = sva;
 2210                         if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) {
 2211                                 sva += L3_SIZE;
 2212                                 break;
 2213                         }
 2214                 }
 2215                 if (va != va_next)
 2216                         pmap_invalidate_range(pmap, va, sva);
 2217         }
 2218         if (lock != NULL)
 2219                 rw_wunlock(lock);
 2220         rw_runlock(&pvh_global_lock);
 2221         PMAP_UNLOCK(pmap);
 2222         vm_page_free_pages_toq(&free, false);
 2223 }
 2224 
 2225 /*
 2226  *      Routine:        pmap_remove_all
 2227  *      Function:
 2228  *              Removes this physical page from
 2229  *              all physical maps in which it resides.
 2230  *              Reflects back modify bits to the pager.
 2231  *
 2232  *      Notes:
 2233  *              Original versions of this routine were very
 2234  *              inefficient because they iteratively called
 2235  *              pmap_remove (slow...)
 2236  */
 2237 
 2238 void
 2239 pmap_remove_all(vm_page_t m)
 2240 {
 2241         struct spglist free;
 2242         struct md_page *pvh;
 2243         pmap_t pmap;
 2244         pt_entry_t *l3, l3e;
 2245         pd_entry_t *l2, l2e;
 2246         pv_entry_t pv;
 2247         vm_offset_t va;
 2248 
 2249         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2250             ("pmap_remove_all: page %p is not managed", m));
 2251         SLIST_INIT(&free);
 2252         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 2253             pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2254 
 2255         rw_wlock(&pvh_global_lock);
 2256         while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 2257                 pmap = PV_PMAP(pv);
 2258                 PMAP_LOCK(pmap);
 2259                 va = pv->pv_va;
 2260                 l2 = pmap_l2(pmap, va);
 2261                 (void)pmap_demote_l2(pmap, l2, va);
 2262                 PMAP_UNLOCK(pmap);
 2263         }
 2264         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2265                 pmap = PV_PMAP(pv);
 2266                 PMAP_LOCK(pmap);
 2267                 pmap_resident_count_dec(pmap, 1);
 2268                 l2 = pmap_l2(pmap, pv->pv_va);
 2269                 KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
 2270                 l2e = pmap_load(l2);
 2271 
 2272                 KASSERT((l2e & PTE_RX) == 0,
 2273                     ("pmap_remove_all: found a superpage in %p's pv list", m));
 2274 
 2275                 l3 = pmap_l2_to_l3(l2, pv->pv_va);
 2276                 l3e = pmap_load_clear(l3);
 2277                 pmap_invalidate_page(pmap, pv->pv_va);
 2278                 if (l3e & PTE_SW_WIRED)
 2279                         pmap->pm_stats.wired_count--;
 2280                 if ((l3e & PTE_A) != 0)
 2281                         vm_page_aflag_set(m, PGA_REFERENCED);
 2282 
 2283                 /*
 2284                  * Update the vm_page_t clean and reference bits.
 2285                  */
 2286                 if ((l3e & PTE_D) != 0)
 2287                         vm_page_dirty(m);
 2288                 pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free);
 2289                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 2290                 m->md.pv_gen++;
 2291                 free_pv_entry(pmap, pv);
 2292                 PMAP_UNLOCK(pmap);
 2293         }
 2294         vm_page_aflag_clear(m, PGA_WRITEABLE);
 2295         rw_wunlock(&pvh_global_lock);
 2296         vm_page_free_pages_toq(&free, false);
 2297 }
 2298 
 2299 /*
 2300  *      Set the physical protection on the
 2301  *      specified range of this map as requested.
 2302  */
 2303 void
 2304 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 2305 {
 2306         pd_entry_t *l1, *l2, l2e;
 2307         pt_entry_t *l3, l3e, mask;
 2308         vm_page_t m;
 2309         vm_paddr_t pa;
 2310         vm_offset_t va, va_next;
 2311         bool anychanged, pv_lists_locked;
 2312 
 2313         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 2314                 pmap_remove(pmap, sva, eva);
 2315                 return;
 2316         }
 2317 
 2318         if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
 2319             (VM_PROT_WRITE | VM_PROT_EXECUTE))
 2320                 return;
 2321 
 2322         anychanged = false;
 2323         pv_lists_locked = false;
 2324         mask = 0;
 2325         if ((prot & VM_PROT_WRITE) == 0)
 2326                 mask |= PTE_W | PTE_D;
 2327         if ((prot & VM_PROT_EXECUTE) == 0)
 2328                 mask |= PTE_X;
 2329 resume:
 2330         PMAP_LOCK(pmap);
 2331         for (; sva < eva; sva = va_next) {
 2332                 l1 = pmap_l1(pmap, sva);
 2333                 if (pmap_load(l1) == 0) {
 2334                         va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 2335                         if (va_next < sva)
 2336                                 va_next = eva;
 2337                         continue;
 2338                 }
 2339 
 2340                 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 2341                 if (va_next < sva)
 2342                         va_next = eva;
 2343 
 2344                 l2 = pmap_l1_to_l2(l1, sva);
 2345                 if (l2 == NULL || (l2e = pmap_load(l2)) == 0)
 2346                         continue;
 2347                 if ((l2e & PTE_RWX) != 0) {
 2348                         if (sva + L2_SIZE == va_next && eva >= va_next) {
 2349 retryl2:
 2350                                 if ((l2e & (PTE_SW_MANAGED | PTE_D)) ==
 2351                                     (PTE_SW_MANAGED | PTE_D)) {
 2352                                         pa = PTE_TO_PHYS(l2e);
 2353                                         for (va = sva, m = PHYS_TO_VM_PAGE(pa);
 2354                                             va < va_next; m++, va += PAGE_SIZE)
 2355                                                 vm_page_dirty(m);
 2356                                 }
 2357                                 if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask))
 2358                                         goto retryl2;
 2359                                 anychanged = true;
 2360                         } else {
 2361                                 if (!pv_lists_locked) {
 2362                                         pv_lists_locked = true;
 2363                                         if (!rw_try_rlock(&pvh_global_lock)) {
 2364                                                 if (anychanged)
 2365                                                         pmap_invalidate_all(
 2366                                                             pmap);
 2367                                                 PMAP_UNLOCK(pmap);
 2368                                                 rw_rlock(&pvh_global_lock);
 2369                                                 goto resume;
 2370                                         }
 2371                                 }
 2372                                 if (!pmap_demote_l2(pmap, l2, sva)) {
 2373                                         /*
 2374                                          * The large page mapping was destroyed.
 2375                                          */
 2376                                         continue;
 2377                                 }
 2378                         }
 2379                 }
 2380 
 2381                 if (va_next > eva)
 2382                         va_next = eva;
 2383 
 2384                 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 2385                     sva += L3_SIZE) {
 2386                         l3e = pmap_load(l3);
 2387 retryl3:
 2388                         if ((l3e & PTE_V) == 0)
 2389                                 continue;
 2390                         if ((prot & VM_PROT_WRITE) == 0 &&
 2391                             (l3e & (PTE_SW_MANAGED | PTE_D)) ==
 2392                             (PTE_SW_MANAGED | PTE_D)) {
 2393                                 m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(l3e));
 2394                                 vm_page_dirty(m);
 2395                         }
 2396                         if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask))
 2397                                 goto retryl3;
 2398                         anychanged = true;
 2399                 }
 2400         }
 2401         if (anychanged)
 2402                 pmap_invalidate_all(pmap);
 2403         if (pv_lists_locked)
 2404                 rw_runlock(&pvh_global_lock);
 2405         PMAP_UNLOCK(pmap);
 2406 }
 2407 
 2408 int
 2409 pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype)
 2410 {
 2411         pd_entry_t *l2, l2e;
 2412         pt_entry_t bits, *pte, oldpte;
 2413         int rv;
 2414 
 2415         rv = 0;
 2416         PMAP_LOCK(pmap);
 2417         l2 = pmap_l2(pmap, va);
 2418         if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0)
 2419                 goto done;
 2420         if ((l2e & PTE_RWX) == 0) {
 2421                 pte = pmap_l2_to_l3(l2, va);
 2422                 if (pte == NULL || ((oldpte = pmap_load(pte) & PTE_V)) == 0)
 2423                         goto done;
 2424         } else {
 2425                 pte = l2;
 2426                 oldpte = l2e;
 2427         }
 2428 
 2429         if ((pmap != kernel_pmap && (oldpte & PTE_U) == 0) ||
 2430             (ftype == VM_PROT_WRITE && (oldpte & PTE_W) == 0) ||
 2431             (ftype == VM_PROT_EXECUTE && (oldpte & PTE_X) == 0) ||
 2432             (ftype == VM_PROT_READ && (oldpte & PTE_R) == 0))
 2433                 goto done;
 2434 
 2435         bits = PTE_A;
 2436         if (ftype == VM_PROT_WRITE)
 2437                 bits |= PTE_D;
 2438 
 2439         /*
 2440          * Spurious faults can occur if the implementation caches invalid
 2441          * entries in the TLB, or if simultaneous accesses on multiple CPUs
 2442          * race with each other.
 2443          */
 2444         if ((oldpte & bits) != bits)
 2445                 pmap_store_bits(pte, bits);
 2446         sfence_vma();
 2447         rv = 1;
 2448 done:
 2449         PMAP_UNLOCK(pmap);
 2450         return (rv);
 2451 }
 2452 
 2453 static bool
 2454 pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va)
 2455 {
 2456         struct rwlock *lock;
 2457         bool rv;
 2458 
 2459         lock = NULL;
 2460         rv = pmap_demote_l2_locked(pmap, l2, va, &lock);
 2461         if (lock != NULL)
 2462                 rw_wunlock(lock);
 2463         return (rv);
 2464 }
 2465 
 2466 /*
 2467  * Tries to demote a 2MB page mapping.  If demotion fails, the 2MB page
 2468  * mapping is invalidated.
 2469  */
 2470 static bool
 2471 pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
 2472     struct rwlock **lockp)
 2473 {
 2474         struct spglist free;
 2475         vm_page_t mpte;
 2476         pd_entry_t newl2, oldl2;
 2477         pt_entry_t *firstl3, newl3;
 2478         vm_paddr_t mptepa;
 2479         int i;
 2480 
 2481         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2482 
 2483         oldl2 = pmap_load(l2);
 2484         KASSERT((oldl2 & PTE_RWX) != 0,
 2485             ("pmap_demote_l2_locked: oldl2 is not a leaf entry"));
 2486         if ((oldl2 & PTE_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) ==
 2487             NULL) {
 2488                 if ((oldl2 & PTE_A) == 0 || (mpte = vm_page_alloc(NULL,
 2489                     pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT :
 2490                     VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) ==
 2491                     NULL) {
 2492                         SLIST_INIT(&free);
 2493                         (void)pmap_remove_l2(pmap, l2, va & ~L2_OFFSET,
 2494                             pmap_load(pmap_l1(pmap, va)), &free, lockp);
 2495                         vm_page_free_pages_toq(&free, true);
 2496                         CTR2(KTR_PMAP, "pmap_demote_l2_locked: "
 2497                             "failure for va %#lx in pmap %p", va, pmap);
 2498                         return (false);
 2499                 }
 2500                 if (va < VM_MAXUSER_ADDRESS) {
 2501                         mpte->wire_count = Ln_ENTRIES;
 2502                         pmap_resident_count_inc(pmap, 1);
 2503                 }
 2504         }
 2505         mptepa = VM_PAGE_TO_PHYS(mpte);
 2506         firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
 2507         newl2 = ((mptepa / PAGE_SIZE) << PTE_PPN0_S) | PTE_V;
 2508         KASSERT((oldl2 & PTE_A) != 0,
 2509             ("pmap_demote_l2_locked: oldl2 is missing PTE_A"));
 2510         KASSERT((oldl2 & (PTE_D | PTE_W)) != PTE_W,
 2511             ("pmap_demote_l2_locked: oldl2 is missing PTE_D"));
 2512         newl3 = oldl2;
 2513 
 2514         /*
 2515          * If the page table page is not leftover from an earlier promotion,
 2516          * initialize it.
 2517          */
 2518         if (mpte->valid == 0) {
 2519                 for (i = 0; i < Ln_ENTRIES; i++)
 2520                         pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S));
 2521         }
 2522         KASSERT(PTE_TO_PHYS(pmap_load(firstl3)) == PTE_TO_PHYS(newl3),
 2523             ("pmap_demote_l2_locked: firstl3 and newl3 map different physical "
 2524             "addresses"));
 2525 
 2526         /*
 2527          * If the mapping has changed attributes, update the page table
 2528          * entries.
 2529          */
 2530         if ((pmap_load(firstl3) & PTE_PROMOTE) != (newl3 & PTE_PROMOTE))
 2531                 for (i = 0; i < Ln_ENTRIES; i++)
 2532                         pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S));
 2533 
 2534         /*
 2535          * The spare PV entries must be reserved prior to demoting the
 2536          * mapping, that is, prior to changing the L2 entry.  Otherwise, the
 2537          * state of the L2 entry and the PV lists will be inconsistent, which
 2538          * can result in reclaim_pv_chunk() attempting to remove a PV entry from
 2539          * the wrong PV list and pmap_pv_demote_l2() failing to find the
 2540          * expected PV entry for the 2MB page mapping that is being demoted.
 2541          */
 2542         if ((oldl2 & PTE_SW_MANAGED) != 0)
 2543                 reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
 2544 
 2545         /*
 2546          * Demote the mapping.
 2547          */
 2548         pmap_store(l2, newl2);
 2549 
 2550         /*
 2551          * Demote the PV entry.
 2552          */
 2553         if ((oldl2 & PTE_SW_MANAGED) != 0)
 2554                 pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp);
 2555 
 2556         atomic_add_long(&pmap_l2_demotions, 1);
 2557         CTR2(KTR_PMAP, "pmap_demote_l2_locked: success for va %#lx in pmap %p",
 2558             va, pmap);
 2559         return (true);
 2560 }
 2561 
 2562 #if VM_NRESERVLEVEL > 0
 2563 static void
 2564 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
 2565     struct rwlock **lockp)
 2566 {
 2567         pt_entry_t *firstl3, *l3;
 2568         vm_paddr_t pa;
 2569         vm_page_t ml3;
 2570 
 2571         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2572 
 2573         va &= ~L2_OFFSET;
 2574         KASSERT((pmap_load(l2) & PTE_RWX) == 0,
 2575             ("pmap_promote_l2: invalid l2 entry %p", l2));
 2576 
 2577         firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2)));
 2578         pa = PTE_TO_PHYS(pmap_load(firstl3));
 2579         if ((pa & L2_OFFSET) != 0) {
 2580                 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
 2581                     va, pmap);
 2582                 atomic_add_long(&pmap_l2_p_failures, 1);
 2583                 return;
 2584         }
 2585 
 2586         pa += PAGE_SIZE;
 2587         for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) {
 2588                 if (PTE_TO_PHYS(pmap_load(l3)) != pa) {
 2589                         CTR2(KTR_PMAP,
 2590                             "pmap_promote_l2: failure for va %#lx pmap %p",
 2591                             va, pmap);
 2592                         atomic_add_long(&pmap_l2_p_failures, 1);
 2593                         return;
 2594                 }
 2595                 if ((pmap_load(l3) & PTE_PROMOTE) !=
 2596                     (pmap_load(firstl3) & PTE_PROMOTE)) {
 2597                         CTR2(KTR_PMAP,
 2598                             "pmap_promote_l2: failure for va %#lx pmap %p",
 2599                             va, pmap);
 2600                         atomic_add_long(&pmap_l2_p_failures, 1);
 2601                         return;
 2602                 }
 2603                 pa += PAGE_SIZE;
 2604         }
 2605 
 2606         ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
 2607         KASSERT(ml3->pindex == pmap_l2_pindex(va),
 2608             ("pmap_promote_l2: page table page's pindex is wrong"));
 2609         if (pmap_insert_pt_page(pmap, ml3, true)) {
 2610                 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p",
 2611                     va, pmap);
 2612                 atomic_add_long(&pmap_l2_p_failures, 1);
 2613                 return;
 2614         }
 2615 
 2616         if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0)
 2617                 pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)),
 2618                     lockp);
 2619 
 2620         pmap_store(l2, pmap_load(firstl3));
 2621 
 2622         atomic_add_long(&pmap_l2_promotions, 1);
 2623         CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
 2624             pmap);
 2625 }
 2626 #endif
 2627 
 2628 /*
 2629  *      Insert the given physical page (p) at
 2630  *      the specified virtual address (v) in the
 2631  *      target physical map with the protection requested.
 2632  *
 2633  *      If specified, the page will be wired down, meaning
 2634  *      that the related pte can not be reclaimed.
 2635  *
 2636  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 2637  *      or lose information.  That is, this routine must actually
 2638  *      insert this page into the given map NOW.
 2639  */
 2640 int
 2641 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 2642     u_int flags, int8_t psind)
 2643 {
 2644         struct rwlock *lock;
 2645         pd_entry_t *l1, *l2, l2e;
 2646         pt_entry_t new_l3, orig_l3;
 2647         pt_entry_t *l3;
 2648         pv_entry_t pv;
 2649         vm_paddr_t opa, pa, l2_pa, l3_pa;
 2650         vm_page_t mpte, om, l2_m, l3_m;
 2651         pt_entry_t entry;
 2652         pn_t l2_pn, l3_pn, pn;
 2653         int rv;
 2654         bool nosleep;
 2655 
 2656         va = trunc_page(va);
 2657         if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 2658                 VM_OBJECT_ASSERT_LOCKED(m->object);
 2659         pa = VM_PAGE_TO_PHYS(m);
 2660         pn = (pa / PAGE_SIZE);
 2661 
 2662         new_l3 = PTE_V | PTE_R | PTE_A;
 2663         if (prot & VM_PROT_EXECUTE)
 2664                 new_l3 |= PTE_X;
 2665         if (flags & VM_PROT_WRITE)
 2666                 new_l3 |= PTE_D;
 2667         if (prot & VM_PROT_WRITE)
 2668                 new_l3 |= PTE_W;
 2669         if (va < VM_MAX_USER_ADDRESS)
 2670                 new_l3 |= PTE_U;
 2671 
 2672         new_l3 |= (pn << PTE_PPN0_S);
 2673         if ((flags & PMAP_ENTER_WIRED) != 0)
 2674                 new_l3 |= PTE_SW_WIRED;
 2675 
 2676         /*
 2677          * Set modified bit gratuitously for writeable mappings if
 2678          * the page is unmanaged. We do not want to take a fault
 2679          * to do the dirty bit accounting for these mappings.
 2680          */
 2681         if ((m->oflags & VPO_UNMANAGED) != 0) {
 2682                 if (prot & VM_PROT_WRITE)
 2683                         new_l3 |= PTE_D;
 2684         } else
 2685                 new_l3 |= PTE_SW_MANAGED;
 2686 
 2687         CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 2688 
 2689         lock = NULL;
 2690         mpte = NULL;
 2691         rw_rlock(&pvh_global_lock);
 2692         PMAP_LOCK(pmap);
 2693         if (psind == 1) {
 2694                 /* Assert the required virtual and physical alignment. */
 2695                 KASSERT((va & L2_OFFSET) == 0,
 2696                     ("pmap_enter: va %#lx unaligned", va));
 2697                 KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
 2698                 rv = pmap_enter_l2(pmap, va, new_l3, flags, m, &lock);
 2699                 goto out;
 2700         }
 2701 
 2702         l2 = pmap_l2(pmap, va);
 2703         if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 &&
 2704             ((l2e & PTE_RWX) == 0 || pmap_demote_l2_locked(pmap, l2,
 2705             va, &lock))) {
 2706                 l3 = pmap_l2_to_l3(l2, va);
 2707                 if (va < VM_MAXUSER_ADDRESS) {
 2708                         mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
 2709                         mpte->wire_count++;
 2710                 }
 2711         } else if (va < VM_MAXUSER_ADDRESS) {
 2712                 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 2713                 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
 2714                 if (mpte == NULL && nosleep) {
 2715                         CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 2716                         if (lock != NULL)
 2717                                 rw_wunlock(lock);
 2718                         rw_runlock(&pvh_global_lock);
 2719                         PMAP_UNLOCK(pmap);
 2720                         return (KERN_RESOURCE_SHORTAGE);
 2721                 }
 2722                 l3 = pmap_l3(pmap, va);
 2723         } else {
 2724                 l3 = pmap_l3(pmap, va);
 2725                 /* TODO: This is not optimal, but should mostly work */
 2726                 if (l3 == NULL) {
 2727                         if (l2 == NULL) {
 2728                                 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 2729                                     VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 2730                                     VM_ALLOC_ZERO);
 2731                                 if (l2_m == NULL)
 2732                                         panic("pmap_enter: l2 pte_m == NULL");
 2733                                 if ((l2_m->flags & PG_ZERO) == 0)
 2734                                         pmap_zero_page(l2_m);
 2735 
 2736                                 l2_pa = VM_PAGE_TO_PHYS(l2_m);
 2737                                 l2_pn = (l2_pa / PAGE_SIZE);
 2738 
 2739                                 l1 = pmap_l1(pmap, va);
 2740                                 entry = (PTE_V);
 2741                                 entry |= (l2_pn << PTE_PPN0_S);
 2742                                 pmap_store(l1, entry);
 2743                                 pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
 2744                                 l2 = pmap_l1_to_l2(l1, va);
 2745                         }
 2746 
 2747                         l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 2748                             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 2749                         if (l3_m == NULL)
 2750                                 panic("pmap_enter: l3 pte_m == NULL");
 2751                         if ((l3_m->flags & PG_ZERO) == 0)
 2752                                 pmap_zero_page(l3_m);
 2753 
 2754                         l3_pa = VM_PAGE_TO_PHYS(l3_m);
 2755                         l3_pn = (l3_pa / PAGE_SIZE);
 2756                         entry = (PTE_V);
 2757                         entry |= (l3_pn << PTE_PPN0_S);
 2758                         pmap_store(l2, entry);
 2759                         l3 = pmap_l2_to_l3(l2, va);
 2760                 }
 2761                 pmap_invalidate_page(pmap, va);
 2762         }
 2763 
 2764         orig_l3 = pmap_load(l3);
 2765         opa = PTE_TO_PHYS(orig_l3);
 2766         pv = NULL;
 2767 
 2768         /*
 2769          * Is the specified virtual address already mapped?
 2770          */
 2771         if ((orig_l3 & PTE_V) != 0) {
 2772                 /*
 2773                  * Wiring change, just update stats. We don't worry about
 2774                  * wiring PT pages as they remain resident as long as there
 2775                  * are valid mappings in them. Hence, if a user page is wired,
 2776                  * the PT page will be also.
 2777                  */
 2778                 if ((flags & PMAP_ENTER_WIRED) != 0 &&
 2779                     (orig_l3 & PTE_SW_WIRED) == 0)
 2780                         pmap->pm_stats.wired_count++;
 2781                 else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 2782                     (orig_l3 & PTE_SW_WIRED) != 0)
 2783                         pmap->pm_stats.wired_count--;
 2784 
 2785                 /*
 2786                  * Remove the extra PT page reference.
 2787                  */
 2788                 if (mpte != NULL) {
 2789                         mpte->wire_count--;
 2790                         KASSERT(mpte->wire_count > 0,
 2791                             ("pmap_enter: missing reference to page table page,"
 2792                              " va: 0x%lx", va));
 2793                 }
 2794 
 2795                 /*
 2796                  * Has the physical page changed?
 2797                  */
 2798                 if (opa == pa) {
 2799                         /*
 2800                          * No, might be a protection or wiring change.
 2801                          */
 2802                         if ((orig_l3 & PTE_SW_MANAGED) != 0 &&
 2803                             (new_l3 & PTE_W) != 0)
 2804                                 vm_page_aflag_set(m, PGA_WRITEABLE);
 2805                         goto validate;
 2806                 }
 2807 
 2808                 /*
 2809                  * The physical page has changed.  Temporarily invalidate
 2810                  * the mapping.  This ensures that all threads sharing the
 2811                  * pmap keep a consistent view of the mapping, which is
 2812                  * necessary for the correct handling of COW faults.  It
 2813                  * also permits reuse of the old mapping's PV entry,
 2814                  * avoiding an allocation.
 2815                  *
 2816                  * For consistency, handle unmanaged mappings the same way.
 2817                  */
 2818                 orig_l3 = pmap_load_clear(l3);
 2819                 KASSERT(PTE_TO_PHYS(orig_l3) == opa,
 2820                     ("pmap_enter: unexpected pa update for %#lx", va));
 2821                 if ((orig_l3 & PTE_SW_MANAGED) != 0) {
 2822                         om = PHYS_TO_VM_PAGE(opa);
 2823 
 2824                         /*
 2825                          * The pmap lock is sufficient to synchronize with
 2826                          * concurrent calls to pmap_page_test_mappings() and
 2827                          * pmap_ts_referenced().
 2828                          */
 2829                         if ((orig_l3 & PTE_D) != 0)
 2830                                 vm_page_dirty(om);
 2831                         if ((orig_l3 & PTE_A) != 0)
 2832                                 vm_page_aflag_set(om, PGA_REFERENCED);
 2833                         CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 2834                         pv = pmap_pvh_remove(&om->md, pmap, va);
 2835                         KASSERT(pv != NULL,
 2836                             ("pmap_enter: no PV entry for %#lx", va));
 2837                         if ((new_l3 & PTE_SW_MANAGED) == 0)
 2838                                 free_pv_entry(pmap, pv);
 2839                         if ((om->aflags & PGA_WRITEABLE) != 0 &&
 2840                             TAILQ_EMPTY(&om->md.pv_list) &&
 2841                             ((om->flags & PG_FICTITIOUS) != 0 ||
 2842                             TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 2843                                 vm_page_aflag_clear(om, PGA_WRITEABLE);
 2844                 }
 2845                 pmap_invalidate_page(pmap, va);
 2846                 orig_l3 = 0;
 2847         } else {
 2848                 /*
 2849                  * Increment the counters.
 2850                  */
 2851                 if ((new_l3 & PTE_SW_WIRED) != 0)
 2852                         pmap->pm_stats.wired_count++;
 2853                 pmap_resident_count_inc(pmap, 1);
 2854         }
 2855         /*
 2856          * Enter on the PV list if part of our managed memory.
 2857          */
 2858         if ((new_l3 & PTE_SW_MANAGED) != 0) {
 2859                 if (pv == NULL) {
 2860                         pv = get_pv_entry(pmap, &lock);
 2861                         pv->pv_va = va;
 2862                 }
 2863                 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 2864                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 2865                 m->md.pv_gen++;
 2866                 if ((new_l3 & PTE_W) != 0)
 2867                         vm_page_aflag_set(m, PGA_WRITEABLE);
 2868         }
 2869 
 2870 validate:
 2871         /*
 2872          * Sync the i-cache on all harts before updating the PTE
 2873          * if the new PTE is executable.
 2874          */
 2875         if (prot & VM_PROT_EXECUTE)
 2876                 pmap_sync_icache(pmap, va, PAGE_SIZE);
 2877 
 2878         /*
 2879          * Update the L3 entry.
 2880          */
 2881         if (orig_l3 != 0) {
 2882                 orig_l3 = pmap_load_store(l3, new_l3);
 2883                 pmap_invalidate_page(pmap, va);
 2884                 KASSERT(PTE_TO_PHYS(orig_l3) == pa,
 2885                     ("pmap_enter: invalid update"));
 2886                 if ((orig_l3 & (PTE_D | PTE_SW_MANAGED)) ==
 2887                     (PTE_D | PTE_SW_MANAGED))
 2888                         vm_page_dirty(m);
 2889         } else {
 2890                 pmap_store(l3, new_l3);
 2891         }
 2892 
 2893 #if VM_NRESERVLEVEL > 0
 2894         if (mpte != NULL && mpte->wire_count == Ln_ENTRIES &&
 2895             pmap_ps_enabled(pmap) &&
 2896             (m->flags & PG_FICTITIOUS) == 0 &&
 2897             vm_reserv_level_iffullpop(m) == 0)
 2898                 pmap_promote_l2(pmap, l2, va, &lock);
 2899 #endif
 2900 
 2901         rv = KERN_SUCCESS;
 2902 out:
 2903         if (lock != NULL)
 2904                 rw_wunlock(lock);
 2905         rw_runlock(&pvh_global_lock);
 2906         PMAP_UNLOCK(pmap);
 2907         return (rv);
 2908 }
 2909 
 2910 /*
 2911  * Tries to create a read- and/or execute-only 2MB page mapping.  Returns true
 2912  * if successful.  Returns false if (1) a page table page cannot be allocated
 2913  * without sleeping, (2) a mapping already exists at the specified virtual
 2914  * address, or (3) a PV entry cannot be allocated without reclaiming another
 2915  * PV entry.
 2916  */
 2917 static bool
 2918 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 2919     struct rwlock **lockp)
 2920 {
 2921         pd_entry_t new_l2;
 2922         pn_t pn;
 2923 
 2924         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2925 
 2926         pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE;
 2927         new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V);
 2928         if ((m->oflags & VPO_UNMANAGED) == 0)
 2929                 new_l2 |= PTE_SW_MANAGED;
 2930         if ((prot & VM_PROT_EXECUTE) != 0)
 2931                 new_l2 |= PTE_X;
 2932         if (va < VM_MAXUSER_ADDRESS)
 2933                 new_l2 |= PTE_U;
 2934         return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP |
 2935             PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) ==
 2936             KERN_SUCCESS);
 2937 }
 2938 
 2939 /*
 2940  * Tries to create the specified 2MB page mapping.  Returns KERN_SUCCESS if
 2941  * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE
 2942  * otherwise.  Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and
 2943  * a mapping already exists at the specified virtual address.  Returns
 2944  * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table
 2945  * page allocation failed.  Returns KERN_RESOURCE_SHORTAGE if
 2946  * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed.
 2947  *
 2948  * The parameter "m" is only used when creating a managed, writeable mapping.
 2949  */
 2950 static int
 2951 pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
 2952     vm_page_t m, struct rwlock **lockp)
 2953 {
 2954         struct spglist free;
 2955         pd_entry_t *l2, *l3, oldl2;
 2956         vm_offset_t sva;
 2957         vm_page_t l2pg, mt;
 2958 
 2959         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2960 
 2961         if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ?
 2962             NULL : lockp)) == NULL) {
 2963                 CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p",
 2964                     va, pmap);
 2965                 return (KERN_RESOURCE_SHORTAGE);
 2966         }
 2967 
 2968         l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg));
 2969         l2 = &l2[pmap_l2_index(va)];
 2970         if ((oldl2 = pmap_load(l2)) != 0) {
 2971                 KASSERT(l2pg->wire_count > 1,
 2972                     ("pmap_enter_l2: l2pg's wire count is too low"));
 2973                 if ((flags & PMAP_ENTER_NOREPLACE) != 0) {
 2974                         l2pg->wire_count--;
 2975                         CTR2(KTR_PMAP,
 2976                             "pmap_enter_l2: failure for va %#lx in pmap %p",
 2977                             va, pmap);
 2978                         return (KERN_FAILURE);
 2979                 }
 2980                 SLIST_INIT(&free);
 2981                 if ((oldl2 & PTE_RWX) != 0)
 2982                         (void)pmap_remove_l2(pmap, l2, va,
 2983                             pmap_load(pmap_l1(pmap, va)), &free, lockp);
 2984                 else
 2985                         for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) {
 2986                                 l3 = pmap_l2_to_l3(l2, sva);
 2987                                 if ((pmap_load(l3) & PTE_V) != 0 &&
 2988                                     pmap_remove_l3(pmap, l3, sva, oldl2, &free,
 2989                                     lockp) != 0)
 2990                                         break;
 2991                         }
 2992                 vm_page_free_pages_toq(&free, true);
 2993                 if (va >= VM_MAXUSER_ADDRESS) {
 2994                         /*
 2995                          * Both pmap_remove_l2() and pmap_remove_l3() will
 2996                          * leave the kernel page table page zero filled.
 2997                          */
 2998                         mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2)));
 2999                         if (pmap_insert_pt_page(pmap, mt, false))
 3000                                 panic("pmap_enter_l2: trie insert failed");
 3001                 } else
 3002                         KASSERT(pmap_load(l2) == 0,
 3003                             ("pmap_enter_l2: non-zero L2 entry %p", l2));
 3004         }
 3005 
 3006         if ((new_l2 & PTE_SW_MANAGED) != 0) {
 3007                 /*
 3008                  * Abort this mapping if its PV entry could not be created.
 3009                  */
 3010                 if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) {
 3011                         SLIST_INIT(&free);
 3012                         if (pmap_unwire_ptp(pmap, va, l2pg, &free)) {
 3013                                 /*
 3014                                  * Although "va" is not mapped, paging-structure
 3015                                  * caches could nonetheless have entries that
 3016                                  * refer to the freed page table pages.
 3017                                  * Invalidate those entries.
 3018                                  */
 3019                                 pmap_invalidate_page(pmap, va);
 3020                                 vm_page_free_pages_toq(&free, true);
 3021                         }
 3022                         CTR2(KTR_PMAP,
 3023                             "pmap_enter_l2: failure for va %#lx in pmap %p",
 3024                             va, pmap);
 3025                         return (KERN_RESOURCE_SHORTAGE);
 3026                 }
 3027                 if ((new_l2 & PTE_W) != 0)
 3028                         for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 3029                                 vm_page_aflag_set(mt, PGA_WRITEABLE);
 3030         }
 3031 
 3032         /*
 3033          * Increment counters.
 3034          */
 3035         if ((new_l2 & PTE_SW_WIRED) != 0)
 3036                 pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE;
 3037         pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE;
 3038 
 3039         /*
 3040          * Map the superpage.
 3041          */
 3042         pmap_store(l2, new_l2);
 3043 
 3044         atomic_add_long(&pmap_l2_mappings, 1);
 3045         CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p",
 3046             va, pmap);
 3047 
 3048         return (KERN_SUCCESS);
 3049 }
 3050 
 3051 /*
 3052  * Maps a sequence of resident pages belonging to the same object.
 3053  * The sequence begins with the given page m_start.  This page is
 3054  * mapped at the given virtual address start.  Each subsequent page is
 3055  * mapped at a virtual address that is offset from start by the same
 3056  * amount as the page is offset from m_start within the object.  The
 3057  * last page in the sequence is the page with the largest offset from
 3058  * m_start that can be mapped at a virtual address less than the given
 3059  * virtual address end.  Not every virtual page between start and end
 3060  * is mapped; only those for which a resident page exists with the
 3061  * corresponding offset from m_start are mapped.
 3062  */
 3063 void
 3064 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
 3065     vm_page_t m_start, vm_prot_t prot)
 3066 {
 3067         struct rwlock *lock;
 3068         vm_offset_t va;
 3069         vm_page_t m, mpte;
 3070         vm_pindex_t diff, psize;
 3071 
 3072         VM_OBJECT_ASSERT_LOCKED(m_start->object);
 3073 
 3074         psize = atop(end - start);
 3075         mpte = NULL;
 3076         m = m_start;
 3077         lock = NULL;
 3078         rw_rlock(&pvh_global_lock);
 3079         PMAP_LOCK(pmap);
 3080         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 3081                 va = start + ptoa(diff);
 3082                 if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
 3083                     m->psind == 1 && pmap_ps_enabled(pmap) &&
 3084                     pmap_enter_2mpage(pmap, va, m, prot, &lock))
 3085                         m = &m[L2_SIZE / PAGE_SIZE - 1];
 3086                 else
 3087                         mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte,
 3088                             &lock);
 3089                 m = TAILQ_NEXT(m, listq);
 3090         }
 3091         if (lock != NULL)
 3092                 rw_wunlock(lock);
 3093         rw_runlock(&pvh_global_lock);
 3094         PMAP_UNLOCK(pmap);
 3095 }
 3096 
 3097 /*
 3098  * this code makes some *MAJOR* assumptions:
 3099  * 1. Current pmap & pmap exists.
 3100  * 2. Not wired.
 3101  * 3. Read access.
 3102  * 4. No page table pages.
 3103  * but is *MUCH* faster than pmap_enter...
 3104  */
 3105 
 3106 void
 3107 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3108 {
 3109         struct rwlock *lock;
 3110 
 3111         lock = NULL;
 3112         rw_rlock(&pvh_global_lock);
 3113         PMAP_LOCK(pmap);
 3114         (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 3115         if (lock != NULL)
 3116                 rw_wunlock(lock);
 3117         rw_runlock(&pvh_global_lock);
 3118         PMAP_UNLOCK(pmap);
 3119 }
 3120 
 3121 static vm_page_t
 3122 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 3123     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 3124 {
 3125         struct spglist free;
 3126         vm_paddr_t phys;
 3127         pd_entry_t *l2;
 3128         pt_entry_t *l3, newl3;
 3129 
 3130         KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 3131             (m->oflags & VPO_UNMANAGED) != 0,
 3132             ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 3133         rw_assert(&pvh_global_lock, RA_LOCKED);
 3134         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3135 
 3136         CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 3137         /*
 3138          * In the case that a page table page is not
 3139          * resident, we are creating it here.
 3140          */
 3141         if (va < VM_MAXUSER_ADDRESS) {
 3142                 vm_pindex_t l2pindex;
 3143 
 3144                 /*
 3145                  * Calculate pagetable page index
 3146                  */
 3147                 l2pindex = pmap_l2_pindex(va);
 3148                 if (mpte && (mpte->pindex == l2pindex)) {
 3149                         mpte->wire_count++;
 3150                 } else {
 3151                         /*
 3152                          * Get the l2 entry
 3153                          */
 3154                         l2 = pmap_l2(pmap, va);
 3155 
 3156                         /*
 3157                          * If the page table page is mapped, we just increment
 3158                          * the hold count, and activate it.  Otherwise, we
 3159                          * attempt to allocate a page table page.  If this
 3160                          * attempt fails, we don't retry.  Instead, we give up.
 3161                          */
 3162                         if (l2 != NULL && pmap_load(l2) != 0) {
 3163                                 phys = PTE_TO_PHYS(pmap_load(l2));
 3164                                 mpte = PHYS_TO_VM_PAGE(phys);
 3165                                 mpte->wire_count++;
 3166                         } else {
 3167                                 /*
 3168                                  * Pass NULL instead of the PV list lock
 3169                                  * pointer, because we don't intend to sleep.
 3170                                  */
 3171                                 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 3172                                 if (mpte == NULL)
 3173                                         return (mpte);
 3174                         }
 3175                 }
 3176                 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 3177                 l3 = &l3[pmap_l3_index(va)];
 3178         } else {
 3179                 mpte = NULL;
 3180                 l3 = pmap_l3(kernel_pmap, va);
 3181         }
 3182         if (l3 == NULL)
 3183                 panic("pmap_enter_quick_locked: No l3");
 3184         if (pmap_load(l3) != 0) {
 3185                 if (mpte != NULL) {
 3186                         mpte->wire_count--;
 3187                         mpte = NULL;
 3188                 }
 3189                 return (mpte);
 3190         }
 3191 
 3192         /*
 3193          * Enter on the PV list if part of our managed memory.
 3194          */
 3195         if ((m->oflags & VPO_UNMANAGED) == 0 &&
 3196             !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 3197                 if (mpte != NULL) {
 3198                         SLIST_INIT(&free);
 3199                         if (pmap_unwire_ptp(pmap, va, mpte, &free)) {
 3200                                 pmap_invalidate_page(pmap, va);
 3201                                 vm_page_free_pages_toq(&free, false);
 3202                         }
 3203                         mpte = NULL;
 3204                 }
 3205                 return (mpte);
 3206         }
 3207 
 3208         /*
 3209          * Increment counters
 3210          */
 3211         pmap_resident_count_inc(pmap, 1);
 3212 
 3213         newl3 = ((VM_PAGE_TO_PHYS(m) / PAGE_SIZE) << PTE_PPN0_S) |
 3214             PTE_V | PTE_R;
 3215         if ((prot & VM_PROT_EXECUTE) != 0)
 3216                 newl3 |= PTE_X;
 3217         if ((m->oflags & VPO_UNMANAGED) == 0)
 3218                 newl3 |= PTE_SW_MANAGED;
 3219         if (va < VM_MAX_USER_ADDRESS)
 3220                 newl3 |= PTE_U;
 3221 
 3222         /*
 3223          * Sync the i-cache on all harts before updating the PTE
 3224          * if the new PTE is executable.
 3225          */
 3226         if (prot & VM_PROT_EXECUTE)
 3227                 pmap_sync_icache(pmap, va, PAGE_SIZE);
 3228 
 3229         pmap_store(l3, newl3);
 3230 
 3231         pmap_invalidate_page(pmap, va);
 3232         return (mpte);
 3233 }
 3234 
 3235 /*
 3236  * This code maps large physical mmap regions into the
 3237  * processor address space.  Note that some shortcuts
 3238  * are taken, but the code works.
 3239  */
 3240 void
 3241 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 3242     vm_pindex_t pindex, vm_size_t size)
 3243 {
 3244 
 3245         VM_OBJECT_ASSERT_WLOCKED(object);
 3246         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 3247             ("pmap_object_init_pt: non-device object"));
 3248 }
 3249 
 3250 /*
 3251  *      Clear the wired attribute from the mappings for the specified range of
 3252  *      addresses in the given pmap.  Every valid mapping within that range
 3253  *      must have the wired attribute set.  In contrast, invalid mappings
 3254  *      cannot have the wired attribute set, so they are ignored.
 3255  *
 3256  *      The wired attribute of the page table entry is not a hardware feature,
 3257  *      so there is no need to invalidate any TLB entries.
 3258  */
 3259 void
 3260 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 3261 {
 3262         vm_offset_t va_next;
 3263         pd_entry_t *l1, *l2, l2e;
 3264         pt_entry_t *l3, l3e;
 3265         bool pv_lists_locked;
 3266 
 3267         pv_lists_locked = false;
 3268 retry:
 3269         PMAP_LOCK(pmap);
 3270         for (; sva < eva; sva = va_next) {
 3271                 l1 = pmap_l1(pmap, sva);
 3272                 if (pmap_load(l1) == 0) {
 3273                         va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 3274                         if (va_next < sva)
 3275                                 va_next = eva;
 3276                         continue;
 3277                 }
 3278 
 3279                 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 3280                 if (va_next < sva)
 3281                         va_next = eva;
 3282 
 3283                 l2 = pmap_l1_to_l2(l1, sva);
 3284                 if ((l2e = pmap_load(l2)) == 0)
 3285                         continue;
 3286                 if ((l2e & PTE_RWX) != 0) {
 3287                         if (sva + L2_SIZE == va_next && eva >= va_next) {
 3288                                 if ((l2e & PTE_SW_WIRED) == 0)
 3289                                         panic("pmap_unwire: l2 %#jx is missing "
 3290                                             "PTE_SW_WIRED", (uintmax_t)l2e);
 3291                                 pmap_clear_bits(l2, PTE_SW_WIRED);
 3292                                 continue;
 3293                         } else {
 3294                                 if (!pv_lists_locked) {
 3295                                         pv_lists_locked = true;
 3296                                         if (!rw_try_rlock(&pvh_global_lock)) {
 3297                                                 PMAP_UNLOCK(pmap);
 3298                                                 rw_rlock(&pvh_global_lock);
 3299                                                 /* Repeat sva. */
 3300                                                 goto retry;
 3301                                         }
 3302                                 }
 3303                                 if (!pmap_demote_l2(pmap, l2, sva))
 3304                                         panic("pmap_unwire: demotion failed");
 3305                         }
 3306                 }
 3307 
 3308                 if (va_next > eva)
 3309                         va_next = eva;
 3310                 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 3311                     sva += L3_SIZE) {
 3312                         if ((l3e = pmap_load(l3)) == 0)
 3313                                 continue;
 3314                         if ((l3e & PTE_SW_WIRED) == 0)
 3315                                 panic("pmap_unwire: l3 %#jx is missing "
 3316                                     "PTE_SW_WIRED", (uintmax_t)l3e);
 3317 
 3318                         /*
 3319                          * PG_W must be cleared atomically.  Although the pmap
 3320                          * lock synchronizes access to PG_W, another processor
 3321                          * could be setting PG_M and/or PG_A concurrently.
 3322                          */
 3323                         pmap_clear_bits(l3, PTE_SW_WIRED);
 3324                         pmap->pm_stats.wired_count--;
 3325                 }
 3326         }
 3327         if (pv_lists_locked)
 3328                 rw_runlock(&pvh_global_lock);
 3329         PMAP_UNLOCK(pmap);
 3330 }
 3331 
 3332 /*
 3333  *      Copy the range specified by src_addr/len
 3334  *      from the source map to the range dst_addr/len
 3335  *      in the destination map.
 3336  *
 3337  *      This routine is only advisory and need not do anything.
 3338  */
 3339 
 3340 void
 3341 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 3342     vm_offset_t src_addr)
 3343 {
 3344 
 3345 }
 3346 
 3347 /*
 3348  *      pmap_zero_page zeros the specified hardware page by mapping
 3349  *      the page into KVM and using bzero to clear its contents.
 3350  */
 3351 void
 3352 pmap_zero_page(vm_page_t m)
 3353 {
 3354         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 3355 
 3356         pagezero((void *)va);
 3357 }
 3358 
 3359 /*
 3360  *      pmap_zero_page_area zeros the specified hardware page by mapping 
 3361  *      the page into KVM and using bzero to clear its contents.
 3362  *
 3363  *      off and size may not cover an area beyond a single hardware page.
 3364  */
 3365 void
 3366 pmap_zero_page_area(vm_page_t m, int off, int size)
 3367 {
 3368         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 3369 
 3370         if (off == 0 && size == PAGE_SIZE)
 3371                 pagezero((void *)va);
 3372         else
 3373                 bzero((char *)va + off, size);
 3374 }
 3375 
 3376 /*
 3377  *      pmap_copy_page copies the specified (machine independent)
 3378  *      page by mapping the page into virtual memory and using
 3379  *      bcopy to copy the page, one machine dependent page at a
 3380  *      time.
 3381  */
 3382 void
 3383 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 3384 {
 3385         vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 3386         vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 3387 
 3388         pagecopy((void *)src, (void *)dst);
 3389 }
 3390 
 3391 int unmapped_buf_allowed = 1;
 3392 
 3393 void
 3394 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
 3395     vm_offset_t b_offset, int xfersize)
 3396 {
 3397         void *a_cp, *b_cp;
 3398         vm_page_t m_a, m_b;
 3399         vm_paddr_t p_a, p_b;
 3400         vm_offset_t a_pg_offset, b_pg_offset;
 3401         int cnt;
 3402 
 3403         while (xfersize > 0) {
 3404                 a_pg_offset = a_offset & PAGE_MASK;
 3405                 m_a = ma[a_offset >> PAGE_SHIFT];
 3406                 p_a = m_a->phys_addr;
 3407                 b_pg_offset = b_offset & PAGE_MASK;
 3408                 m_b = mb[b_offset >> PAGE_SHIFT];
 3409                 p_b = m_b->phys_addr;
 3410                 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 3411                 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 3412                 if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 3413                         panic("!DMAP a %lx", p_a);
 3414                 } else {
 3415                         a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 3416                 }
 3417                 if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 3418                         panic("!DMAP b %lx", p_b);
 3419                 } else {
 3420                         b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 3421                 }
 3422                 bcopy(a_cp, b_cp, cnt);
 3423                 a_offset += cnt;
 3424                 b_offset += cnt;
 3425                 xfersize -= cnt;
 3426         }
 3427 }
 3428 
 3429 vm_offset_t
 3430 pmap_quick_enter_page(vm_page_t m)
 3431 {
 3432 
 3433         return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 3434 }
 3435 
 3436 void
 3437 pmap_quick_remove_page(vm_offset_t addr)
 3438 {
 3439 }
 3440 
 3441 /*
 3442  * Returns true if the pmap's pv is one of the first
 3443  * 16 pvs linked to from this page.  This count may
 3444  * be changed upwards or downwards in the future; it
 3445  * is only necessary that true be returned for a small
 3446  * subset of pmaps for proper page aging.
 3447  */
 3448 boolean_t
 3449 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 3450 {
 3451         struct md_page *pvh;
 3452         struct rwlock *lock;
 3453         pv_entry_t pv;
 3454         int loops = 0;
 3455         boolean_t rv;
 3456 
 3457         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3458             ("pmap_page_exists_quick: page %p is not managed", m));
 3459         rv = FALSE;
 3460         rw_rlock(&pvh_global_lock);
 3461         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3462         rw_rlock(lock);
 3463         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3464                 if (PV_PMAP(pv) == pmap) {
 3465                         rv = TRUE;
 3466                         break;
 3467                 }
 3468                 loops++;
 3469                 if (loops >= 16)
 3470                         break;
 3471         }
 3472         if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 3473                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3474                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 3475                         if (PV_PMAP(pv) == pmap) {
 3476                                 rv = TRUE;
 3477                                 break;
 3478                         }
 3479                         loops++;
 3480                         if (loops >= 16)
 3481                                 break;
 3482                 }
 3483         }
 3484         rw_runlock(lock);
 3485         rw_runlock(&pvh_global_lock);
 3486         return (rv);
 3487 }
 3488 
 3489 /*
 3490  *      pmap_page_wired_mappings:
 3491  *
 3492  *      Return the number of managed mappings to the given physical page
 3493  *      that are wired.
 3494  */
 3495 int
 3496 pmap_page_wired_mappings(vm_page_t m)
 3497 {
 3498         struct md_page *pvh;
 3499         struct rwlock *lock;
 3500         pmap_t pmap;
 3501         pd_entry_t *l2;
 3502         pt_entry_t *l3;
 3503         pv_entry_t pv;
 3504         int count, md_gen, pvh_gen;
 3505 
 3506         if ((m->oflags & VPO_UNMANAGED) != 0)
 3507                 return (0);
 3508         rw_rlock(&pvh_global_lock);
 3509         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3510         rw_rlock(lock);
 3511 restart:
 3512         count = 0;
 3513         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3514                 pmap = PV_PMAP(pv);
 3515                 if (!PMAP_TRYLOCK(pmap)) {
 3516                         md_gen = m->md.pv_gen;
 3517                         rw_runlock(lock);
 3518                         PMAP_LOCK(pmap);
 3519                         rw_rlock(lock);
 3520                         if (md_gen != m->md.pv_gen) {
 3521                                 PMAP_UNLOCK(pmap);
 3522                                 goto restart;
 3523                         }
 3524                 }
 3525                 l3 = pmap_l3(pmap, pv->pv_va);
 3526                 if ((pmap_load(l3) & PTE_SW_WIRED) != 0)
 3527                         count++;
 3528                 PMAP_UNLOCK(pmap);
 3529         }
 3530         if ((m->flags & PG_FICTITIOUS) == 0) {
 3531                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3532                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 3533                         pmap = PV_PMAP(pv);
 3534                         if (!PMAP_TRYLOCK(pmap)) {
 3535                                 md_gen = m->md.pv_gen;
 3536                                 pvh_gen = pvh->pv_gen;
 3537                                 rw_runlock(lock);
 3538                                 PMAP_LOCK(pmap);
 3539                                 rw_rlock(lock);
 3540                                 if (md_gen != m->md.pv_gen ||
 3541                                     pvh_gen != pvh->pv_gen) {
 3542                                         PMAP_UNLOCK(pmap);
 3543                                         goto restart;
 3544                                 }
 3545                         }
 3546                         l2 = pmap_l2(pmap, pv->pv_va);
 3547                         if ((pmap_load(l2) & PTE_SW_WIRED) != 0)
 3548                                 count++;
 3549                         PMAP_UNLOCK(pmap);
 3550                 }
 3551         }
 3552         rw_runlock(lock);
 3553         rw_runlock(&pvh_global_lock);
 3554         return (count);
 3555 }
 3556 
 3557 /*
 3558  * Returns true if the given page is mapped individually or as part of
 3559  * a 2mpage.  Otherwise, returns false.
 3560  */
 3561 bool
 3562 pmap_page_is_mapped(vm_page_t m)
 3563 {
 3564         struct rwlock *lock;
 3565         bool rv;
 3566 
 3567         if ((m->oflags & VPO_UNMANAGED) != 0)
 3568                 return (false);
 3569         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3570         rw_rlock(lock);
 3571         rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 3572             ((m->flags & PG_FICTITIOUS) == 0 &&
 3573             !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 3574         rw_runlock(lock);
 3575         return (rv);
 3576 }
 3577 
 3578 static void
 3579 pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv,
 3580     struct spglist *free, bool superpage)
 3581 {
 3582         struct md_page *pvh;
 3583         vm_page_t mpte, mt;
 3584 
 3585         if (superpage) {
 3586                 pmap_resident_count_dec(pmap, Ln_ENTRIES);
 3587                 pvh = pa_to_pvh(m->phys_addr);
 3588                 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 3589                 pvh->pv_gen++;
 3590                 if (TAILQ_EMPTY(&pvh->pv_list)) {
 3591                         for (mt = m; mt < &m[Ln_ENTRIES]; mt++)
 3592                                 if (TAILQ_EMPTY(&mt->md.pv_list) &&
 3593                                     (mt->aflags & PGA_WRITEABLE) != 0)
 3594                                         vm_page_aflag_clear(mt, PGA_WRITEABLE);
 3595                 }
 3596                 mpte = pmap_remove_pt_page(pmap, pv->pv_va);
 3597                 if (mpte != NULL) {
 3598                         KASSERT(ml3->valid == VM_PAGE_BITS_ALL,
 3599                             ("pmap_remove_pages: l3 page not promoted"));
 3600                         pmap_resident_count_dec(pmap, 1);
 3601                         KASSERT(mpte->wire_count == Ln_ENTRIES,
 3602                             ("pmap_remove_pages: pte page wire count error"));
 3603                         mpte->wire_count = 0;
 3604                         pmap_add_delayed_free_list(mpte, free, FALSE);
 3605                 }
 3606         } else {
 3607                 pmap_resident_count_dec(pmap, 1);
 3608                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 3609                 m->md.pv_gen++;
 3610                 if (TAILQ_EMPTY(&m->md.pv_list) &&
 3611                     (m->aflags & PGA_WRITEABLE) != 0) {
 3612                         pvh = pa_to_pvh(m->phys_addr);
 3613                         if (TAILQ_EMPTY(&pvh->pv_list))
 3614                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 3615                 }
 3616         }
 3617 }
 3618 
 3619 /*
 3620  * Destroy all managed, non-wired mappings in the given user-space
 3621  * pmap.  This pmap cannot be active on any processor besides the
 3622  * caller.
 3623  *
 3624  * This function cannot be applied to the kernel pmap.  Moreover, it
 3625  * is not intended for general use.  It is only to be used during
 3626  * process termination.  Consequently, it can be implemented in ways
 3627  * that make it faster than pmap_remove().  First, it can more quickly
 3628  * destroy mappings by iterating over the pmap's collection of PV
 3629  * entries, rather than searching the page table.  Second, it doesn't
 3630  * have to test and clear the page table entries atomically, because
 3631  * no processor is currently accessing the user address space.  In
 3632  * particular, a page table entry's dirty bit won't change state once
 3633  * this function starts.
 3634  */
 3635 void
 3636 pmap_remove_pages(pmap_t pmap)
 3637 {
 3638         struct spglist free;
 3639         pd_entry_t ptepde;
 3640         pt_entry_t *pte, tpte;
 3641         vm_page_t m, mt;
 3642         pv_entry_t pv;
 3643         struct pv_chunk *pc, *npc;
 3644         struct rwlock *lock;
 3645         int64_t bit;
 3646         uint64_t inuse, bitmask;
 3647         int allfree, field, freed, idx;
 3648         bool superpage;
 3649 
 3650         lock = NULL;
 3651 
 3652         SLIST_INIT(&free);
 3653         rw_rlock(&pvh_global_lock);
 3654         PMAP_LOCK(pmap);
 3655         TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 3656                 allfree = 1;
 3657                 freed = 0;
 3658                 for (field = 0; field < _NPCM; field++) {
 3659                         inuse = ~pc->pc_map[field] & pc_freemask[field];
 3660                         while (inuse != 0) {
 3661                                 bit = ffsl(inuse) - 1;
 3662                                 bitmask = 1UL << bit;
 3663                                 idx = field * 64 + bit;
 3664                                 pv = &pc->pc_pventry[idx];
 3665                                 inuse &= ~bitmask;
 3666 
 3667                                 pte = pmap_l1(pmap, pv->pv_va);
 3668                                 ptepde = pmap_load(pte);
 3669                                 pte = pmap_l1_to_l2(pte, pv->pv_va);
 3670                                 tpte = pmap_load(pte);
 3671                                 if ((tpte & PTE_RWX) != 0) {
 3672                                         superpage = true;
 3673                                 } else {
 3674                                         ptepde = tpte;
 3675                                         pte = pmap_l2_to_l3(pte, pv->pv_va);
 3676                                         tpte = pmap_load(pte);
 3677                                         superpage = false;
 3678                                 }
 3679 
 3680                                 /*
 3681                                  * We cannot remove wired pages from a
 3682                                  * process' mapping at this time.
 3683                                  */
 3684                                 if (tpte & PTE_SW_WIRED) {
 3685                                         allfree = 0;
 3686                                         continue;
 3687                                 }
 3688 
 3689                                 m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte));
 3690                                 KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 3691                                     m < &vm_page_array[vm_page_array_size],
 3692                                     ("pmap_remove_pages: bad pte %#jx",
 3693                                     (uintmax_t)tpte));
 3694 
 3695                                 pmap_clear(pte);
 3696 
 3697                                 /*
 3698                                  * Update the vm_page_t clean/reference bits.
 3699                                  */
 3700                                 if ((tpte & (PTE_D | PTE_W)) ==
 3701                                     (PTE_D | PTE_W)) {
 3702                                         if (superpage)
 3703                                                 for (mt = m;
 3704                                                     mt < &m[Ln_ENTRIES]; mt++)
 3705                                                         vm_page_dirty(mt);
 3706                                         else
 3707                                                 vm_page_dirty(m);
 3708                                 }
 3709 
 3710                                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 3711 
 3712                                 /* Mark free */
 3713                                 pc->pc_map[field] |= bitmask;
 3714 
 3715                                 pmap_remove_pages_pv(pmap, m, pv, &free,
 3716                                     superpage);
 3717                                 pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free);
 3718                                 freed++;
 3719                         }
 3720                 }
 3721                 PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 3722                 PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 3723                 PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 3724                 if (allfree) {
 3725                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 3726                         free_pv_chunk(pc);
 3727                 }
 3728         }
 3729         if (lock != NULL)
 3730                 rw_wunlock(lock);
 3731         pmap_invalidate_all(pmap);
 3732         rw_runlock(&pvh_global_lock);
 3733         PMAP_UNLOCK(pmap);
 3734         vm_page_free_pages_toq(&free, false);
 3735 }
 3736 
 3737 static bool
 3738 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 3739 {
 3740         struct md_page *pvh;
 3741         struct rwlock *lock;
 3742         pd_entry_t *l2;
 3743         pt_entry_t *l3, mask;
 3744         pv_entry_t pv;
 3745         pmap_t pmap;
 3746         int md_gen, pvh_gen;
 3747         bool rv;
 3748 
 3749         mask = 0;
 3750         if (modified)
 3751                 mask |= PTE_D;
 3752         if (accessed)
 3753                 mask |= PTE_A;
 3754 
 3755         rv = FALSE;
 3756         rw_rlock(&pvh_global_lock);
 3757         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3758         rw_rlock(lock);
 3759 restart:
 3760         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3761                 pmap = PV_PMAP(pv);
 3762                 if (!PMAP_TRYLOCK(pmap)) {
 3763                         md_gen = m->md.pv_gen;
 3764                         rw_runlock(lock);
 3765                         PMAP_LOCK(pmap);
 3766                         rw_rlock(lock);
 3767                         if (md_gen != m->md.pv_gen) {
 3768                                 PMAP_UNLOCK(pmap);
 3769                                 goto restart;
 3770                         }
 3771                 }
 3772                 l3 = pmap_l3(pmap, pv->pv_va);
 3773                 rv = (pmap_load(l3) & mask) == mask;
 3774                 PMAP_UNLOCK(pmap);
 3775                 if (rv)
 3776                         goto out;
 3777         }
 3778         if ((m->flags & PG_FICTITIOUS) == 0) {
 3779                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3780                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 3781                         pmap = PV_PMAP(pv);
 3782                         if (!PMAP_TRYLOCK(pmap)) {
 3783                                 md_gen = m->md.pv_gen;
 3784                                 pvh_gen = pvh->pv_gen;
 3785                                 rw_runlock(lock);
 3786                                 PMAP_LOCK(pmap);
 3787                                 rw_rlock(lock);
 3788                                 if (md_gen != m->md.pv_gen ||
 3789                                     pvh_gen != pvh->pv_gen) {
 3790                                         PMAP_UNLOCK(pmap);
 3791                                         goto restart;
 3792                                 }
 3793                         }
 3794                         l2 = pmap_l2(pmap, pv->pv_va);
 3795                         rv = (pmap_load(l2) & mask) == mask;
 3796                         PMAP_UNLOCK(pmap);
 3797                         if (rv)
 3798                                 goto out;
 3799                 }
 3800         }
 3801 out:
 3802         rw_runlock(lock);
 3803         rw_runlock(&pvh_global_lock);
 3804         return (rv);
 3805 }
 3806 
 3807 /*
 3808  *      pmap_is_modified:
 3809  *
 3810  *      Return whether or not the specified physical page was modified
 3811  *      in any physical maps.
 3812  */
 3813 boolean_t
 3814 pmap_is_modified(vm_page_t m)
 3815 {
 3816 
 3817         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3818             ("pmap_is_modified: page %p is not managed", m));
 3819 
 3820         /*
 3821          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 3822          * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 3823          * is clear, no PTEs can have PG_M set.
 3824          */
 3825         VM_OBJECT_ASSERT_WLOCKED(m->object);
 3826         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 3827                 return (FALSE);
 3828         return (pmap_page_test_mappings(m, FALSE, TRUE));
 3829 }
 3830 
 3831 /*
 3832  *      pmap_is_prefaultable:
 3833  *
 3834  *      Return whether or not the specified virtual address is eligible
 3835  *      for prefault.
 3836  */
 3837 boolean_t
 3838 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 3839 {
 3840         pt_entry_t *l3;
 3841         boolean_t rv;
 3842 
 3843         rv = FALSE;
 3844         PMAP_LOCK(pmap);
 3845         l3 = pmap_l3(pmap, addr);
 3846         if (l3 != NULL && pmap_load(l3) != 0) {
 3847                 rv = TRUE;
 3848         }
 3849         PMAP_UNLOCK(pmap);
 3850         return (rv);
 3851 }
 3852 
 3853 /*
 3854  *      pmap_is_referenced:
 3855  *
 3856  *      Return whether or not the specified physical page was referenced
 3857  *      in any physical maps.
 3858  */
 3859 boolean_t
 3860 pmap_is_referenced(vm_page_t m)
 3861 {
 3862 
 3863         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3864             ("pmap_is_referenced: page %p is not managed", m));
 3865         return (pmap_page_test_mappings(m, TRUE, FALSE));
 3866 }
 3867 
 3868 /*
 3869  * Clear the write and modified bits in each of the given page's mappings.
 3870  */
 3871 void
 3872 pmap_remove_write(vm_page_t m)
 3873 {
 3874         struct md_page *pvh;
 3875         struct rwlock *lock;
 3876         pmap_t pmap;
 3877         pd_entry_t *l2;
 3878         pt_entry_t *l3, oldl3, newl3;
 3879         pv_entry_t next_pv, pv;
 3880         vm_offset_t va;
 3881         int md_gen, pvh_gen;
 3882 
 3883         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3884             ("pmap_remove_write: page %p is not managed", m));
 3885 
 3886         /*
 3887          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 3888          * set by another thread while the object is locked.  Thus,
 3889          * if PGA_WRITEABLE is clear, no page table entries need updating.
 3890          */
 3891         VM_OBJECT_ASSERT_WLOCKED(m->object);
 3892         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 3893                 return;
 3894         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3895         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 3896             pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3897         rw_rlock(&pvh_global_lock);
 3898 retry_pv_loop:
 3899         rw_wlock(lock);
 3900         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 3901                 pmap = PV_PMAP(pv);
 3902                 if (!PMAP_TRYLOCK(pmap)) {
 3903                         pvh_gen = pvh->pv_gen;
 3904                         rw_wunlock(lock);
 3905                         PMAP_LOCK(pmap);
 3906                         rw_wlock(lock);
 3907                         if (pvh_gen != pvh->pv_gen) {
 3908                                 PMAP_UNLOCK(pmap);
 3909                                 rw_wunlock(lock);
 3910                                 goto retry_pv_loop;
 3911                         }
 3912                 }
 3913                 va = pv->pv_va;
 3914                 l2 = pmap_l2(pmap, va);
 3915                 if ((pmap_load(l2) & PTE_W) != 0)
 3916                         (void)pmap_demote_l2_locked(pmap, l2, va, &lock);
 3917                 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 3918                     ("inconsistent pv lock %p %p for page %p",
 3919                     lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 3920                 PMAP_UNLOCK(pmap);
 3921         }
 3922         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3923                 pmap = PV_PMAP(pv);
 3924                 if (!PMAP_TRYLOCK(pmap)) {
 3925                         pvh_gen = pvh->pv_gen;
 3926                         md_gen = m->md.pv_gen;
 3927                         rw_wunlock(lock);
 3928                         PMAP_LOCK(pmap);
 3929                         rw_wlock(lock);
 3930                         if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 3931                                 PMAP_UNLOCK(pmap);
 3932                                 rw_wunlock(lock);
 3933                                 goto retry_pv_loop;
 3934                         }
 3935                 }
 3936                 l3 = pmap_l3(pmap, pv->pv_va);
 3937                 oldl3 = pmap_load(l3);
 3938 retry:
 3939                 if ((oldl3 & PTE_W) != 0) {
 3940                         newl3 = oldl3 & ~(PTE_D | PTE_W);
 3941                         if (!atomic_fcmpset_long(l3, &oldl3, newl3))
 3942                                 goto retry;
 3943                         if ((oldl3 & PTE_D) != 0)
 3944                                 vm_page_dirty(m);
 3945                         pmap_invalidate_page(pmap, pv->pv_va);
 3946                 }
 3947                 PMAP_UNLOCK(pmap);
 3948         }
 3949         rw_wunlock(lock);
 3950         vm_page_aflag_clear(m, PGA_WRITEABLE);
 3951         rw_runlock(&pvh_global_lock);
 3952 }
 3953 
 3954 /*
 3955  *      pmap_ts_referenced:
 3956  *
 3957  *      Return a count of reference bits for a page, clearing those bits.
 3958  *      It is not necessary for every reference bit to be cleared, but it
 3959  *      is necessary that 0 only be returned when there are truly no
 3960  *      reference bits set.
 3961  *
 3962  *      As an optimization, update the page's dirty field if a modified bit is
 3963  *      found while counting reference bits.  This opportunistic update can be
 3964  *      performed at low cost and can eliminate the need for some future calls
 3965  *      to pmap_is_modified().  However, since this function stops after
 3966  *      finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
 3967  *      dirty pages.  Those dirty pages will only be detected by a future call
 3968  *      to pmap_is_modified().
 3969  */
 3970 int
 3971 pmap_ts_referenced(vm_page_t m)
 3972 {
 3973         struct spglist free;
 3974         struct md_page *pvh;
 3975         struct rwlock *lock;
 3976         pv_entry_t pv, pvf;
 3977         pmap_t pmap;
 3978         pd_entry_t *l2, l2e;
 3979         pt_entry_t *l3, l3e;
 3980         vm_paddr_t pa;
 3981         vm_offset_t va;
 3982         int cleared, md_gen, not_cleared, pvh_gen;
 3983 
 3984         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3985             ("pmap_ts_referenced: page %p is not managed", m));
 3986         SLIST_INIT(&free);
 3987         cleared = 0;
 3988         pa = VM_PAGE_TO_PHYS(m);
 3989         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
 3990 
 3991         lock = PHYS_TO_PV_LIST_LOCK(pa);
 3992         rw_rlock(&pvh_global_lock);
 3993         rw_wlock(lock);
 3994 retry:
 3995         not_cleared = 0;
 3996         if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 3997                 goto small_mappings;
 3998         pv = pvf;
 3999         do {
 4000                 pmap = PV_PMAP(pv);
 4001                 if (!PMAP_TRYLOCK(pmap)) {
 4002                         pvh_gen = pvh->pv_gen;
 4003                         rw_wunlock(lock);
 4004                         PMAP_LOCK(pmap);
 4005                         rw_wlock(lock);
 4006                         if (pvh_gen != pvh->pv_gen) {
 4007                                 PMAP_UNLOCK(pmap);
 4008                                 goto retry;
 4009                         }
 4010                 }
 4011                 va = pv->pv_va;
 4012                 l2 = pmap_l2(pmap, va);
 4013                 l2e = pmap_load(l2);
 4014                 if ((l2e & (PTE_W | PTE_D)) == (PTE_W | PTE_D)) {
 4015                         /*
 4016                          * Although l2e is mapping a 2MB page, because
 4017                          * this function is called at a 4KB page granularity,
 4018                          * we only update the 4KB page under test.
 4019                          */
 4020                         vm_page_dirty(m);
 4021                 }
 4022                 if ((l2e & PTE_A) != 0) {
 4023                         /*
 4024                          * Since this reference bit is shared by 512 4KB
 4025                          * pages, it should not be cleared every time it is
 4026                          * tested.  Apply a simple "hash" function on the
 4027                          * physical page number, the virtual superpage number,
 4028                          * and the pmap address to select one 4KB page out of
 4029                          * the 512 on which testing the reference bit will
 4030                          * result in clearing that reference bit.  This
 4031                          * function is designed to avoid the selection of the
 4032                          * same 4KB page for every 2MB page mapping.
 4033                          *
 4034                          * On demotion, a mapping that hasn't been referenced
 4035                          * is simply destroyed.  To avoid the possibility of a
 4036                          * subsequent page fault on a demoted wired mapping,
 4037                          * always leave its reference bit set.  Moreover,
 4038                          * since the superpage is wired, the current state of
 4039                          * its reference bit won't affect page replacement.
 4040                          */
 4041                         if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
 4042                             (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
 4043                             (l2e & PTE_SW_WIRED) == 0) {
 4044                                 pmap_clear_bits(l2, PTE_A);
 4045                                 pmap_invalidate_page(pmap, va);
 4046                                 cleared++;
 4047                         } else
 4048                                 not_cleared++;
 4049                 }
 4050                 PMAP_UNLOCK(pmap);
 4051                 /* Rotate the PV list if it has more than one entry. */
 4052                 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 4053                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 4054                         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 4055                         pvh->pv_gen++;
 4056                 }
 4057                 if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
 4058                         goto out;
 4059         } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 4060 small_mappings:
 4061         if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 4062                 goto out;
 4063         pv = pvf;
 4064         do {
 4065                 pmap = PV_PMAP(pv);
 4066                 if (!PMAP_TRYLOCK(pmap)) {
 4067                         pvh_gen = pvh->pv_gen;
 4068                         md_gen = m->md.pv_gen;
 4069                         rw_wunlock(lock);
 4070                         PMAP_LOCK(pmap);
 4071                         rw_wlock(lock);
 4072                         if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 4073                                 PMAP_UNLOCK(pmap);
 4074                                 goto retry;
 4075                         }
 4076                 }
 4077                 l2 = pmap_l2(pmap, pv->pv_va);
 4078 
 4079                 KASSERT((pmap_load(l2) & PTE_RX) == 0,
 4080                     ("pmap_ts_referenced: found an invalid l2 table"));
 4081 
 4082                 l3 = pmap_l2_to_l3(l2, pv->pv_va);
 4083                 l3e = pmap_load(l3);
 4084                 if ((l3e & PTE_D) != 0)
 4085                         vm_page_dirty(m);
 4086                 if ((l3e & PTE_A) != 0) {
 4087                         if ((l3e & PTE_SW_WIRED) == 0) {
 4088                                 /*
 4089                                  * Wired pages cannot be paged out so
 4090                                  * doing accessed bit emulation for
 4091                                  * them is wasted effort. We do the
 4092                                  * hard work for unwired pages only.
 4093                                  */
 4094                                 pmap_clear_bits(l3, PTE_A);
 4095                                 pmap_invalidate_page(pmap, pv->pv_va);
 4096                                 cleared++;
 4097                         } else
 4098                                 not_cleared++;
 4099                 }
 4100                 PMAP_UNLOCK(pmap);
 4101                 /* Rotate the PV list if it has more than one entry. */
 4102                 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 4103                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 4104                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 4105                         m->md.pv_gen++;
 4106                 }
 4107         } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 4108             not_cleared < PMAP_TS_REFERENCED_MAX);
 4109 out:
 4110         rw_wunlock(lock);
 4111         rw_runlock(&pvh_global_lock);
 4112         vm_page_free_pages_toq(&free, false);
 4113         return (cleared + not_cleared);
 4114 }
 4115 
 4116 /*
 4117  *      Apply the given advice to the specified range of addresses within the
 4118  *      given pmap.  Depending on the advice, clear the referenced and/or
 4119  *      modified flags in each mapping and set the mapped page's dirty field.
 4120  */
 4121 void
 4122 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 4123 {
 4124 }
 4125 
 4126 /*
 4127  *      Clear the modify bits on the specified physical page.
 4128  */
 4129 void
 4130 pmap_clear_modify(vm_page_t m)
 4131 {
 4132         struct md_page *pvh;
 4133         struct rwlock *lock;
 4134         pmap_t pmap;
 4135         pv_entry_t next_pv, pv;
 4136         pd_entry_t *l2, oldl2;
 4137         pt_entry_t *l3;
 4138         vm_offset_t va;
 4139         int md_gen, pvh_gen;
 4140 
 4141         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4142             ("pmap_clear_modify: page %p is not managed", m));
 4143         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4144         KASSERT(!vm_page_xbusied(m),
 4145             ("pmap_clear_modify: page %p is exclusive busied", m));
 4146 
 4147         /*
 4148          * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 4149          * If the object containing the page is locked and the page is not
 4150          * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 4151          */
 4152         if ((m->aflags & PGA_WRITEABLE) == 0)
 4153                 return;
 4154         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 4155             pa_to_pvh(VM_PAGE_TO_PHYS(m));
 4156         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 4157         rw_rlock(&pvh_global_lock);
 4158         rw_wlock(lock);
 4159 restart:
 4160         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 4161                 pmap = PV_PMAP(pv);
 4162                 if (!PMAP_TRYLOCK(pmap)) {
 4163                         pvh_gen = pvh->pv_gen;
 4164                         rw_wunlock(lock);
 4165                         PMAP_LOCK(pmap);
 4166                         rw_wlock(lock);
 4167                         if (pvh_gen != pvh->pv_gen) {
 4168                                 PMAP_UNLOCK(pmap);
 4169                                 goto restart;
 4170                         }
 4171                 }
 4172                 va = pv->pv_va;
 4173                 l2 = pmap_l2(pmap, va);
 4174                 oldl2 = pmap_load(l2);
 4175                 /* If oldl2 has PTE_W set, then it also has PTE_D set. */
 4176                 if ((oldl2 & PTE_W) != 0 &&
 4177                     pmap_demote_l2_locked(pmap, l2, va, &lock) &&
 4178                     (oldl2 & PTE_SW_WIRED) == 0) {
 4179                         /*
 4180                          * Write protect the mapping to a single page so that
 4181                          * a subsequent write access may repromote.
 4182                          */
 4183                         va += VM_PAGE_TO_PHYS(m) - PTE_TO_PHYS(oldl2);
 4184                         l3 = pmap_l2_to_l3(l2, va);
 4185                         pmap_clear_bits(l3, PTE_D | PTE_W);
 4186                         vm_page_dirty(m);
 4187                         pmap_invalidate_page(pmap, va);
 4188                 }
 4189                 PMAP_UNLOCK(pmap);
 4190         }
 4191         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 4192                 pmap = PV_PMAP(pv);
 4193                 if (!PMAP_TRYLOCK(pmap)) {
 4194                         md_gen = m->md.pv_gen;
 4195                         pvh_gen = pvh->pv_gen;
 4196                         rw_wunlock(lock);
 4197                         PMAP_LOCK(pmap);
 4198                         rw_wlock(lock);
 4199                         if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 4200                                 PMAP_UNLOCK(pmap);
 4201                                 goto restart;
 4202                         }
 4203                 }
 4204                 l2 = pmap_l2(pmap, pv->pv_va);
 4205                 KASSERT((pmap_load(l2) & PTE_RWX) == 0,
 4206                     ("pmap_clear_modify: found a 2mpage in page %p's pv list",
 4207                     m));
 4208                 l3 = pmap_l2_to_l3(l2, pv->pv_va);
 4209                 if ((pmap_load(l3) & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) {
 4210                         pmap_clear_bits(l3, PTE_D | PTE_W);
 4211                         pmap_invalidate_page(pmap, pv->pv_va);
 4212                 }
 4213                 PMAP_UNLOCK(pmap);
 4214         }
 4215         rw_wunlock(lock);
 4216         rw_runlock(&pvh_global_lock);
 4217 }
 4218 
 4219 void *
 4220 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 4221 {
 4222 
 4223         return ((void *)PHYS_TO_DMAP(pa));
 4224 }
 4225 
 4226 void
 4227 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
 4228 {
 4229 }
 4230 
 4231 /*
 4232  * Sets the memory attribute for the specified page.
 4233  */
 4234 void
 4235 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 4236 {
 4237 
 4238         m->md.pv_memattr = ma;
 4239 }
 4240 
 4241 /*
 4242  * perform the pmap work for mincore
 4243  */
 4244 int
 4245 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 4246 {
 4247         pt_entry_t *l2, *l3, tpte;
 4248         vm_paddr_t pa;
 4249         int val;
 4250         bool managed;
 4251 
 4252         PMAP_LOCK(pmap);
 4253 retry:
 4254         managed = false;
 4255         val = 0;
 4256 
 4257         l2 = pmap_l2(pmap, addr);
 4258         if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) {
 4259                 if ((tpte & PTE_RWX) != 0) {
 4260                         pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET);
 4261                         val = MINCORE_INCORE | MINCORE_SUPER;
 4262                 } else {
 4263                         l3 = pmap_l2_to_l3(l2, addr);
 4264                         tpte = pmap_load(l3);
 4265                         if ((tpte & PTE_V) == 0)
 4266                                 goto done;
 4267                         pa = PTE_TO_PHYS(tpte) | (addr & L3_OFFSET);
 4268                         val = MINCORE_INCORE;
 4269                 }
 4270 
 4271                 if ((tpte & PTE_D) != 0)
 4272                         val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4273                 if ((tpte & PTE_A) != 0)
 4274                         val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 4275                 managed = (tpte & PTE_SW_MANAGED) == PTE_SW_MANAGED;
 4276         }
 4277 
 4278 done:
 4279         if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 4280             (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 4281                 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 4282                 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 4283                         goto retry;
 4284         } else
 4285                 PA_UNLOCK_COND(*locked_pa);
 4286         PMAP_UNLOCK(pmap);
 4287         return (val);
 4288 }
 4289 
 4290 void
 4291 pmap_activate_sw(struct thread *td)
 4292 {
 4293         pmap_t oldpmap, pmap;
 4294         u_int hart;
 4295 
 4296         oldpmap = PCPU_GET(curpmap);
 4297         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 4298         if (pmap == oldpmap)
 4299                 return;
 4300         load_satp(pmap->pm_satp);
 4301 
 4302         hart = PCPU_GET(hart);
 4303 #ifdef SMP
 4304         CPU_SET_ATOMIC(hart, &pmap->pm_active);
 4305         CPU_CLR_ATOMIC(hart, &oldpmap->pm_active);
 4306 #else
 4307         CPU_SET(hart, &pmap->pm_active);
 4308         CPU_CLR(hart, &oldpmap->pm_active);
 4309 #endif
 4310         PCPU_SET(curpmap, pmap);
 4311 
 4312         sfence_vma();
 4313 }
 4314 
 4315 void
 4316 pmap_activate(struct thread *td)
 4317 {
 4318 
 4319         critical_enter();
 4320         pmap_activate_sw(td);
 4321         critical_exit();
 4322 }
 4323 
 4324 void
 4325 pmap_activate_boot(pmap_t pmap)
 4326 {
 4327         u_int hart;
 4328 
 4329         hart = PCPU_GET(hart);
 4330 #ifdef SMP
 4331         CPU_SET_ATOMIC(hart, &pmap->pm_active);
 4332 #else
 4333         CPU_SET(hart, &pmap->pm_active);
 4334 #endif
 4335         PCPU_SET(curpmap, pmap);
 4336 }
 4337 
 4338 void
 4339 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
 4340 {
 4341         cpuset_t mask;
 4342 
 4343         /*
 4344          * From the RISC-V User-Level ISA V2.2:
 4345          *
 4346          * "To make a store to instruction memory visible to all
 4347          * RISC-V harts, the writing hart has to execute a data FENCE
 4348          * before requesting that all remote RISC-V harts execute a
 4349          * FENCE.I."
 4350          */
 4351         sched_pin();
 4352         mask = all_harts;
 4353         CPU_CLR(PCPU_GET(hart), &mask);
 4354         fence();
 4355         if (!CPU_EMPTY(&mask) && smp_started)
 4356                 sbi_remote_fence_i(mask.__bits);
 4357         sched_unpin();
 4358 }
 4359 
 4360 /*
 4361  *      Increase the starting virtual address of the given mapping if a
 4362  *      different alignment might result in more superpage mappings.
 4363  */
 4364 void
 4365 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
 4366     vm_offset_t *addr, vm_size_t size)
 4367 {
 4368         vm_offset_t superpage_offset;
 4369 
 4370         if (size < L2_SIZE)
 4371                 return;
 4372         if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 4373                 offset += ptoa(object->pg_color);
 4374         superpage_offset = offset & L2_OFFSET;
 4375         if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
 4376             (*addr & L2_OFFSET) == superpage_offset)
 4377                 return;
 4378         if ((*addr & L2_OFFSET) < superpage_offset)
 4379                 *addr = (*addr & ~L2_OFFSET) + superpage_offset;
 4380         else
 4381                 *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
 4382 }
 4383 
 4384 /**
 4385  * Get the kernel virtual address of a set of physical pages. If there are
 4386  * physical addresses not covered by the DMAP perform a transient mapping
 4387  * that will be removed when calling pmap_unmap_io_transient.
 4388  *
 4389  * \param page        The pages the caller wishes to obtain the virtual
 4390  *                    address on the kernel memory map.
 4391  * \param vaddr       On return contains the kernel virtual memory address
 4392  *                    of the pages passed in the page parameter.
 4393  * \param count       Number of pages passed in.
 4394  * \param can_fault   TRUE if the thread using the mapped pages can take
 4395  *                    page faults, FALSE otherwise.
 4396  *
 4397  * \returns TRUE if the caller must call pmap_unmap_io_transient when
 4398  *          finished or FALSE otherwise.
 4399  *
 4400  */
 4401 boolean_t
 4402 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
 4403     boolean_t can_fault)
 4404 {
 4405         vm_paddr_t paddr;
 4406         boolean_t needs_mapping;
 4407         int error, i;
 4408 
 4409         /*
 4410          * Allocate any KVA space that we need, this is done in a separate
 4411          * loop to prevent calling vmem_alloc while pinned.
 4412          */
 4413         needs_mapping = FALSE;
 4414         for (i = 0; i < count; i++) {
 4415                 paddr = VM_PAGE_TO_PHYS(page[i]);
 4416                 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
 4417                         error = vmem_alloc(kernel_arena, PAGE_SIZE,
 4418                             M_BESTFIT | M_WAITOK, &vaddr[i]);
 4419                         KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 4420                         needs_mapping = TRUE;
 4421                 } else {
 4422                         vaddr[i] = PHYS_TO_DMAP(paddr);
 4423                 }
 4424         }
 4425 
 4426         /* Exit early if everything is covered by the DMAP */
 4427         if (!needs_mapping)
 4428                 return (FALSE);
 4429 
 4430         if (!can_fault)
 4431                 sched_pin();
 4432         for (i = 0; i < count; i++) {
 4433                 paddr = VM_PAGE_TO_PHYS(page[i]);
 4434                 if (paddr >= DMAP_MAX_PHYSADDR) {
 4435                         panic(
 4436                            "pmap_map_io_transient: TODO: Map out of DMAP data");
 4437                 }
 4438         }
 4439 
 4440         return (needs_mapping);
 4441 }
 4442 
 4443 void
 4444 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
 4445     boolean_t can_fault)
 4446 {
 4447         vm_paddr_t paddr;
 4448         int i;
 4449 
 4450         if (!can_fault)
 4451                 sched_unpin();
 4452         for (i = 0; i < count; i++) {
 4453                 paddr = VM_PAGE_TO_PHYS(page[i]);
 4454                 if (paddr >= DMAP_MAX_PHYSADDR) {
 4455                         panic("RISCVTODO: pmap_unmap_io_transient: Unmap data");
 4456                 }
 4457         }
 4458 }
 4459 
 4460 boolean_t
 4461 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
 4462 {
 4463 
 4464         return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK);
 4465 }
 4466 
 4467 bool
 4468 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2,
 4469     pt_entry_t **l3)
 4470 {
 4471         pd_entry_t *l1p, *l2p;
 4472 
 4473         /* Get l1 directory entry. */
 4474         l1p = pmap_l1(pmap, va);
 4475         *l1 = l1p;
 4476 
 4477         if (l1p == NULL || (pmap_load(l1p) & PTE_V) == 0)
 4478                 return (false);
 4479 
 4480         if ((pmap_load(l1p) & PTE_RX) != 0) {
 4481                 *l2 = NULL;
 4482                 *l3 = NULL;
 4483                 return (true);
 4484         }
 4485 
 4486         /* Get l2 directory entry. */
 4487         l2p = pmap_l1_to_l2(l1p, va);
 4488         *l2 = l2p;
 4489 
 4490         if (l2p == NULL || (pmap_load(l2p) & PTE_V) == 0)
 4491                 return (false);
 4492 
 4493         if ((pmap_load(l2p) & PTE_RX) != 0) {
 4494                 *l3 = NULL;
 4495                 return (true);
 4496         }
 4497 
 4498         /* Get l3 page table entry. */
 4499         *l3 = pmap_l2_to_l3(l2p, va);
 4500 
 4501         return (true);
 4502 }

Cache object: 86e4b9fa9209a873d12d33c364665332


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.