The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/arm64/arm64/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  * Copyright (c) 2003 Peter Wemm
    9  * All rights reserved.
   10  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
   11  * All rights reserved.
   12  * Copyright (c) 2014 Andrew Turner
   13  * All rights reserved.
   14  * Copyright (c) 2014-2016 The FreeBSD Foundation
   15  * All rights reserved.
   16  *
   17  * This code is derived from software contributed to Berkeley by
   18  * the Systems Programming Group of the University of Utah Computer
   19  * Science Department and William Jolitz of UUNET Technologies Inc.
   20  *
   21  * This software was developed by Andrew Turner under sponsorship from
   22  * the FreeBSD Foundation.
   23  *
   24  * Redistribution and use in source and binary forms, with or without
   25  * modification, are permitted provided that the following conditions
   26  * are met:
   27  * 1. Redistributions of source code must retain the above copyright
   28  *    notice, this list of conditions and the following disclaimer.
   29  * 2. Redistributions in binary form must reproduce the above copyright
   30  *    notice, this list of conditions and the following disclaimer in the
   31  *    documentation and/or other materials provided with the distribution.
   32  * 3. All advertising materials mentioning features or use of this software
   33  *    must display the following acknowledgement:
   34  *      This product includes software developed by the University of
   35  *      California, Berkeley and its contributors.
   36  * 4. Neither the name of the University nor the names of its contributors
   37  *    may be used to endorse or promote products derived from this software
   38  *    without specific prior written permission.
   39  *
   40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   50  * SUCH DAMAGE.
   51  *
   52  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
   53  */
   54 /*-
   55  * Copyright (c) 2003 Networks Associates Technology, Inc.
   56  * All rights reserved.
   57  *
   58  * This software was developed for the FreeBSD Project by Jake Burkholder,
   59  * Safeport Network Services, and Network Associates Laboratories, the
   60  * Security Research Division of Network Associates, Inc. under
   61  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
   62  * CHATS research program.
   63  *
   64  * Redistribution and use in source and binary forms, with or without
   65  * modification, are permitted provided that the following conditions
   66  * are met:
   67  * 1. Redistributions of source code must retain the above copyright
   68  *    notice, this list of conditions and the following disclaimer.
   69  * 2. Redistributions in binary form must reproduce the above copyright
   70  *    notice, this list of conditions and the following disclaimer in the
   71  *    documentation and/or other materials provided with the distribution.
   72  *
   73  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   74  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   75  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   76  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   77  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   78  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   79  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   80  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   81  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   82  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   83  * SUCH DAMAGE.
   84  */
   85 
   86 #include <sys/cdefs.h>
   87 __FBSDID("$FreeBSD: releng/11.2/sys/arm64/arm64/pmap.c 325238 2017-10-31 15:06:26Z markj $");
   88 
   89 /*
   90  *      Manages physical address maps.
   91  *
   92  *      Since the information managed by this module is
   93  *      also stored by the logical address mapping module,
   94  *      this module may throw away valid virtual-to-physical
   95  *      mappings at almost any time.  However, invalidations
   96  *      of virtual-to-physical mappings must be done as
   97  *      requested.
   98  *
   99  *      In order to cope with hardware architectures which
  100  *      make virtual-to-physical map invalidates expensive,
  101  *      this module may delay invalidate or reduced protection
  102  *      operations until such time as they are actually
  103  *      necessary.  This module is given full information as
  104  *      to which processors are currently using which maps,
  105  *      and to when physical maps must be made correct.
  106  */
  107 
  108 #include "opt_vm.h"
  109 
  110 #include <sys/param.h>
  111 #include <sys/bitstring.h>
  112 #include <sys/bus.h>
  113 #include <sys/systm.h>
  114 #include <sys/kernel.h>
  115 #include <sys/ktr.h>
  116 #include <sys/lock.h>
  117 #include <sys/malloc.h>
  118 #include <sys/mman.h>
  119 #include <sys/msgbuf.h>
  120 #include <sys/mutex.h>
  121 #include <sys/proc.h>
  122 #include <sys/rwlock.h>
  123 #include <sys/sx.h>
  124 #include <sys/vmem.h>
  125 #include <sys/vmmeter.h>
  126 #include <sys/sched.h>
  127 #include <sys/sysctl.h>
  128 #include <sys/_unrhdr.h>
  129 #include <sys/smp.h>
  130 
  131 #include <vm/vm.h>
  132 #include <vm/vm_param.h>
  133 #include <vm/vm_kern.h>
  134 #include <vm/vm_page.h>
  135 #include <vm/vm_map.h>
  136 #include <vm/vm_object.h>
  137 #include <vm/vm_extern.h>
  138 #include <vm/vm_pageout.h>
  139 #include <vm/vm_pager.h>
  140 #include <vm/vm_phys.h>
  141 #include <vm/vm_radix.h>
  142 #include <vm/vm_reserv.h>
  143 #include <vm/uma.h>
  144 
  145 #include <machine/machdep.h>
  146 #include <machine/md_var.h>
  147 #include <machine/pcb.h>
  148 
  149 #define NL0PG           (PAGE_SIZE/(sizeof (pd_entry_t)))
  150 #define NL1PG           (PAGE_SIZE/(sizeof (pd_entry_t)))
  151 #define NL2PG           (PAGE_SIZE/(sizeof (pd_entry_t)))
  152 #define NL3PG           (PAGE_SIZE/(sizeof (pt_entry_t)))
  153 
  154 #define NUL0E           L0_ENTRIES
  155 #define NUL1E           (NUL0E * NL1PG)
  156 #define NUL2E           (NUL1E * NL2PG)
  157 
  158 #if !defined(DIAGNOSTIC)
  159 #ifdef __GNUC_GNU_INLINE__
  160 #define PMAP_INLINE     __attribute__((__gnu_inline__)) inline
  161 #else
  162 #define PMAP_INLINE     extern inline
  163 #endif
  164 #else
  165 #define PMAP_INLINE
  166 #endif
  167 
  168 /*
  169  * These are configured by the mair_el1 register. This is set up in locore.S
  170  */
  171 #define DEVICE_MEMORY   0
  172 #define UNCACHED_MEMORY 1
  173 #define CACHED_MEMORY   2
  174 
  175 
  176 #ifdef PV_STATS
  177 #define PV_STAT(x)      do { x ; } while (0)
  178 #else
  179 #define PV_STAT(x)      do { } while (0)
  180 #endif
  181 
  182 #define pmap_l2_pindex(v)       ((v) >> L2_SHIFT)
  183 #define pa_to_pvh(pa)           (&pv_table[pmap_l2_pindex(pa)])
  184 
  185 #define NPV_LIST_LOCKS  MAXCPU
  186 
  187 #define PHYS_TO_PV_LIST_LOCK(pa)        \
  188                         (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
  189 
  190 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)  do {    \
  191         struct rwlock **_lockp = (lockp);               \
  192         struct rwlock *_new_lock;                       \
  193                                                         \
  194         _new_lock = PHYS_TO_PV_LIST_LOCK(pa);           \
  195         if (_new_lock != *_lockp) {                     \
  196                 if (*_lockp != NULL)                    \
  197                         rw_wunlock(*_lockp);            \
  198                 *_lockp = _new_lock;                    \
  199                 rw_wlock(*_lockp);                      \
  200         }                                               \
  201 } while (0)
  202 
  203 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)        \
  204                         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
  205 
  206 #define RELEASE_PV_LIST_LOCK(lockp)             do {    \
  207         struct rwlock **_lockp = (lockp);               \
  208                                                         \
  209         if (*_lockp != NULL) {                          \
  210                 rw_wunlock(*_lockp);                    \
  211                 *_lockp = NULL;                         \
  212         }                                               \
  213 } while (0)
  214 
  215 #define VM_PAGE_TO_PV_LIST_LOCK(m)      \
  216                         PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
  217 
  218 struct pmap kernel_pmap_store;
  219 
  220 vm_offset_t virtual_avail;      /* VA of first avail page (after kernel bss) */
  221 vm_offset_t virtual_end;        /* VA of last avail page (end of kernel AS) */
  222 vm_offset_t kernel_vm_end = 0;
  223 
  224 struct msgbuf *msgbufp = NULL;
  225 
  226 /*
  227  * Data for the pv entry allocation mechanism.
  228  * Updates to pv_invl_gen are protected by the pv_list_locks[]
  229  * elements, but reads are not.
  230  */
  231 static struct md_page *pv_table;
  232 static struct md_page pv_dummy;
  233 
  234 vm_paddr_t dmap_phys_base;      /* The start of the dmap region */
  235 vm_paddr_t dmap_phys_max;       /* The limit of the dmap region */
  236 vm_offset_t dmap_max_addr;      /* The virtual address limit of the dmap */
  237 
  238 /* This code assumes all L1 DMAP entries will be used */
  239 CTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
  240 CTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
  241 
  242 #define DMAP_TABLES     ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
  243 extern pt_entry_t pagetable_dmap[];
  244 
  245 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
  246 
  247 static int superpages_enabled = 0;
  248 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
  249     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
  250     "Are large page mappings enabled?");
  251 
  252 /*
  253  * Data for the pv entry allocation mechanism
  254  */
  255 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
  256 static struct mtx pv_chunks_mutex;
  257 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
  258 
  259 static void     free_pv_chunk(struct pv_chunk *pc);
  260 static void     free_pv_entry(pmap_t pmap, pv_entry_t pv);
  261 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
  262 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
  263 static void     pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
  264 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
  265                     vm_offset_t va);
  266 
  267 static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode);
  268 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
  269 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va);
  270 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2,
  271     vm_offset_t va, struct rwlock **lockp);
  272 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va);
  273 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
  274     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
  275 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
  276     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
  277 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
  278     vm_page_t m, struct rwlock **lockp);
  279 
  280 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
  281                 struct rwlock **lockp);
  282 
  283 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
  284     struct spglist *free);
  285 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
  286 
  287 /*
  288  * These load the old table data and store the new value.
  289  * They need to be atomic as the System MMU may write to the table at
  290  * the same time as the CPU.
  291  */
  292 #define pmap_load_store(table, entry) atomic_swap_64(table, entry)
  293 #define pmap_set(table, mask) atomic_set_64(table, mask)
  294 #define pmap_load_clear(table) atomic_swap_64(table, 0)
  295 #define pmap_load(table) (*table)
  296 
  297 /********************/
  298 /* Inline functions */
  299 /********************/
  300 
  301 static __inline void
  302 pagecopy(void *s, void *d)
  303 {
  304 
  305         memcpy(d, s, PAGE_SIZE);
  306 }
  307 
  308 #define pmap_l0_index(va)       (((va) >> L0_SHIFT) & L0_ADDR_MASK)
  309 #define pmap_l1_index(va)       (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
  310 #define pmap_l2_index(va)       (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
  311 #define pmap_l3_index(va)       (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
  312 
  313 static __inline pd_entry_t *
  314 pmap_l0(pmap_t pmap, vm_offset_t va)
  315 {
  316 
  317         return (&pmap->pm_l0[pmap_l0_index(va)]);
  318 }
  319 
  320 static __inline pd_entry_t *
  321 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
  322 {
  323         pd_entry_t *l1;
  324 
  325         l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
  326         return (&l1[pmap_l1_index(va)]);
  327 }
  328 
  329 static __inline pd_entry_t *
  330 pmap_l1(pmap_t pmap, vm_offset_t va)
  331 {
  332         pd_entry_t *l0;
  333 
  334         l0 = pmap_l0(pmap, va);
  335         if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
  336                 return (NULL);
  337 
  338         return (pmap_l0_to_l1(l0, va));
  339 }
  340 
  341 static __inline pd_entry_t *
  342 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
  343 {
  344         pd_entry_t *l2;
  345 
  346         l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
  347         return (&l2[pmap_l2_index(va)]);
  348 }
  349 
  350 static __inline pd_entry_t *
  351 pmap_l2(pmap_t pmap, vm_offset_t va)
  352 {
  353         pd_entry_t *l1;
  354 
  355         l1 = pmap_l1(pmap, va);
  356         if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
  357                 return (NULL);
  358 
  359         return (pmap_l1_to_l2(l1, va));
  360 }
  361 
  362 static __inline pt_entry_t *
  363 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
  364 {
  365         pt_entry_t *l3;
  366 
  367         l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
  368         return (&l3[pmap_l3_index(va)]);
  369 }
  370 
  371 /*
  372  * Returns the lowest valid pde for a given virtual address.
  373  * The next level may or may not point to a valid page or block.
  374  */
  375 static __inline pd_entry_t *
  376 pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
  377 {
  378         pd_entry_t *l0, *l1, *l2, desc;
  379 
  380         l0 = pmap_l0(pmap, va);
  381         desc = pmap_load(l0) & ATTR_DESCR_MASK;
  382         if (desc != L0_TABLE) {
  383                 *level = -1;
  384                 return (NULL);
  385         }
  386 
  387         l1 = pmap_l0_to_l1(l0, va);
  388         desc = pmap_load(l1) & ATTR_DESCR_MASK;
  389         if (desc != L1_TABLE) {
  390                 *level = 0;
  391                 return (l0);
  392         }
  393 
  394         l2 = pmap_l1_to_l2(l1, va);
  395         desc = pmap_load(l2) & ATTR_DESCR_MASK;
  396         if (desc != L2_TABLE) {
  397                 *level = 1;
  398                 return (l1);
  399         }
  400 
  401         *level = 2;
  402         return (l2);
  403 }
  404 
  405 /*
  406  * Returns the lowest valid pte block or table entry for a given virtual
  407  * address. If there are no valid entries return NULL and set the level to
  408  * the first invalid level.
  409  */
  410 static __inline pt_entry_t *
  411 pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
  412 {
  413         pd_entry_t *l1, *l2, desc;
  414         pt_entry_t *l3;
  415 
  416         l1 = pmap_l1(pmap, va);
  417         if (l1 == NULL) {
  418                 *level = 0;
  419                 return (NULL);
  420         }
  421         desc = pmap_load(l1) & ATTR_DESCR_MASK;
  422         if (desc == L1_BLOCK) {
  423                 *level = 1;
  424                 return (l1);
  425         }
  426 
  427         if (desc != L1_TABLE) {
  428                 *level = 1;
  429                 return (NULL);
  430         }
  431 
  432         l2 = pmap_l1_to_l2(l1, va);
  433         desc = pmap_load(l2) & ATTR_DESCR_MASK;
  434         if (desc == L2_BLOCK) {
  435                 *level = 2;
  436                 return (l2);
  437         }
  438 
  439         if (desc != L2_TABLE) {
  440                 *level = 2;
  441                 return (NULL);
  442         }
  443 
  444         *level = 3;
  445         l3 = pmap_l2_to_l3(l2, va);
  446         if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
  447                 return (NULL);
  448 
  449         return (l3);
  450 }
  451 
  452 static inline bool
  453 pmap_superpages_enabled(void)
  454 {
  455 
  456         return (superpages_enabled != 0);
  457 }
  458 
  459 bool
  460 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
  461     pd_entry_t **l2, pt_entry_t **l3)
  462 {
  463         pd_entry_t *l0p, *l1p, *l2p;
  464 
  465         if (pmap->pm_l0 == NULL)
  466                 return (false);
  467 
  468         l0p = pmap_l0(pmap, va);
  469         *l0 = l0p;
  470 
  471         if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
  472                 return (false);
  473 
  474         l1p = pmap_l0_to_l1(l0p, va);
  475         *l1 = l1p;
  476 
  477         if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
  478                 *l2 = NULL;
  479                 *l3 = NULL;
  480                 return (true);
  481         }
  482 
  483         if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
  484                 return (false);
  485 
  486         l2p = pmap_l1_to_l2(l1p, va);
  487         *l2 = l2p;
  488 
  489         if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
  490                 *l3 = NULL;
  491                 return (true);
  492         }
  493 
  494         *l3 = pmap_l2_to_l3(l2p, va);
  495 
  496         return (true);
  497 }
  498 
  499 static __inline int
  500 pmap_is_current(pmap_t pmap)
  501 {
  502 
  503         return ((pmap == pmap_kernel()) ||
  504             (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
  505 }
  506 
  507 static __inline int
  508 pmap_l3_valid(pt_entry_t l3)
  509 {
  510 
  511         return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
  512 }
  513 
  514 
  515 /* Is a level 1 or 2entry a valid block and cacheable */
  516 CTASSERT(L1_BLOCK == L2_BLOCK);
  517 static __inline int
  518 pmap_pte_valid_cacheable(pt_entry_t pte)
  519 {
  520 
  521         return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) &&
  522             ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
  523 }
  524 
  525 static __inline int
  526 pmap_l3_valid_cacheable(pt_entry_t l3)
  527 {
  528 
  529         return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
  530             ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
  531 }
  532 
  533 #define PTE_SYNC(pte)   cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
  534 
  535 /*
  536  * Checks if the page is dirty. We currently lack proper tracking of this on
  537  * arm64 so for now assume is a page mapped as rw was accessed it is.
  538  */
  539 static inline int
  540 pmap_page_dirty(pt_entry_t pte)
  541 {
  542 
  543         return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
  544             (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
  545 }
  546 
  547 static __inline void
  548 pmap_resident_count_inc(pmap_t pmap, int count)
  549 {
  550 
  551         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  552         pmap->pm_stats.resident_count += count;
  553 }
  554 
  555 static __inline void
  556 pmap_resident_count_dec(pmap_t pmap, int count)
  557 {
  558 
  559         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  560         KASSERT(pmap->pm_stats.resident_count >= count,
  561             ("pmap %p resident count underflow %ld %d", pmap,
  562             pmap->pm_stats.resident_count, count));
  563         pmap->pm_stats.resident_count -= count;
  564 }
  565 
  566 static pt_entry_t *
  567 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
  568     u_int *l2_slot)
  569 {
  570         pt_entry_t *l2;
  571         pd_entry_t *l1;
  572 
  573         l1 = (pd_entry_t *)l1pt;
  574         *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
  575 
  576         /* Check locore has used a table L1 map */
  577         KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
  578            ("Invalid bootstrap L1 table"));
  579         /* Find the address of the L2 table */
  580         l2 = (pt_entry_t *)init_pt_va;
  581         *l2_slot = pmap_l2_index(va);
  582 
  583         return (l2);
  584 }
  585 
  586 static vm_paddr_t
  587 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
  588 {
  589         u_int l1_slot, l2_slot;
  590         pt_entry_t *l2;
  591 
  592         l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
  593 
  594         return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
  595 }
  596 
  597 static void
  598 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
  599 {
  600         vm_offset_t va;
  601         vm_paddr_t pa;
  602         u_int l1_slot;
  603 
  604         pa = dmap_phys_base = min_pa & ~L1_OFFSET;
  605         va = DMAP_MIN_ADDRESS;
  606         for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
  607             pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
  608                 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
  609 
  610                 pmap_load_store(&pagetable_dmap[l1_slot],
  611                     (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN |
  612                     ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
  613         }
  614 
  615         /* Set the upper limit of the DMAP region */
  616         dmap_phys_max = pa;
  617         dmap_max_addr = va;
  618 
  619         cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
  620             PAGE_SIZE * DMAP_TABLES);
  621         cpu_tlb_flushID();
  622 }
  623 
  624 static vm_offset_t
  625 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
  626 {
  627         vm_offset_t l2pt;
  628         vm_paddr_t pa;
  629         pd_entry_t *l1;
  630         u_int l1_slot;
  631 
  632         KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
  633 
  634         l1 = (pd_entry_t *)l1pt;
  635         l1_slot = pmap_l1_index(va);
  636         l2pt = l2_start;
  637 
  638         for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
  639                 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
  640 
  641                 pa = pmap_early_vtophys(l1pt, l2pt);
  642                 pmap_load_store(&l1[l1_slot],
  643                     (pa & ~Ln_TABLE_MASK) | L1_TABLE);
  644                 l2pt += PAGE_SIZE;
  645         }
  646 
  647         /* Clean the L2 page table */
  648         memset((void *)l2_start, 0, l2pt - l2_start);
  649         cpu_dcache_wb_range(l2_start, l2pt - l2_start);
  650 
  651         /* Flush the l1 table to ram */
  652         cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
  653 
  654         return l2pt;
  655 }
  656 
  657 static vm_offset_t
  658 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
  659 {
  660         vm_offset_t l2pt, l3pt;
  661         vm_paddr_t pa;
  662         pd_entry_t *l2;
  663         u_int l2_slot;
  664 
  665         KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
  666 
  667         l2 = pmap_l2(kernel_pmap, va);
  668         l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
  669         l2pt = (vm_offset_t)l2;
  670         l2_slot = pmap_l2_index(va);
  671         l3pt = l3_start;
  672 
  673         for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
  674                 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
  675 
  676                 pa = pmap_early_vtophys(l1pt, l3pt);
  677                 pmap_load_store(&l2[l2_slot],
  678                     (pa & ~Ln_TABLE_MASK) | L2_TABLE);
  679                 l3pt += PAGE_SIZE;
  680         }
  681 
  682         /* Clean the L2 page table */
  683         memset((void *)l3_start, 0, l3pt - l3_start);
  684         cpu_dcache_wb_range(l3_start, l3pt - l3_start);
  685 
  686         cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
  687 
  688         return l3pt;
  689 }
  690 
  691 /*
  692  *      Bootstrap the system enough to run with virtual memory.
  693  */
  694 void
  695 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
  696     vm_size_t kernlen)
  697 {
  698         u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
  699         uint64_t kern_delta;
  700         pt_entry_t *l2;
  701         vm_offset_t va, freemempos;
  702         vm_offset_t dpcpu, msgbufpv;
  703         vm_paddr_t pa, max_pa, min_pa;
  704         int i;
  705 
  706         kern_delta = KERNBASE - kernstart;
  707         physmem = 0;
  708 
  709         printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
  710         printf("%lx\n", l1pt);
  711         printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
  712 
  713         /* Set this early so we can use the pagetable walking functions */
  714         kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
  715         PMAP_LOCK_INIT(kernel_pmap);
  716 
  717         /* Assume the address we were loaded to is a valid physical address */
  718         min_pa = max_pa = KERNBASE - kern_delta;
  719 
  720         /*
  721          * Find the minimum physical address. physmap is sorted,
  722          * but may contain empty ranges.
  723          */
  724         for (i = 0; i < (physmap_idx * 2); i += 2) {
  725                 if (physmap[i] == physmap[i + 1])
  726                         continue;
  727                 if (physmap[i] <= min_pa)
  728                         min_pa = physmap[i];
  729                 if (physmap[i + 1] > max_pa)
  730                         max_pa = physmap[i + 1];
  731         }
  732 
  733         /* Create a direct map region early so we can use it for pa -> va */
  734         pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
  735 
  736         va = KERNBASE;
  737         pa = KERNBASE - kern_delta;
  738 
  739         /*
  740          * Start to initialise phys_avail by copying from physmap
  741          * up to the physical address KERNBASE points at.
  742          */
  743         map_slot = avail_slot = 0;
  744         for (; map_slot < (physmap_idx * 2) &&
  745             avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
  746                 if (physmap[map_slot] == physmap[map_slot + 1])
  747                         continue;
  748 
  749                 if (physmap[map_slot] <= pa &&
  750                     physmap[map_slot + 1] > pa)
  751                         break;
  752 
  753                 phys_avail[avail_slot] = physmap[map_slot];
  754                 phys_avail[avail_slot + 1] = physmap[map_slot + 1];
  755                 physmem += (phys_avail[avail_slot + 1] -
  756                     phys_avail[avail_slot]) >> PAGE_SHIFT;
  757                 avail_slot += 2;
  758         }
  759 
  760         /* Add the memory before the kernel */
  761         if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
  762                 phys_avail[avail_slot] = physmap[map_slot];
  763                 phys_avail[avail_slot + 1] = pa;
  764                 physmem += (phys_avail[avail_slot + 1] -
  765                     phys_avail[avail_slot]) >> PAGE_SHIFT;
  766                 avail_slot += 2;
  767         }
  768         used_map_slot = map_slot;
  769 
  770         /*
  771          * Read the page table to find out what is already mapped.
  772          * This assumes we have mapped a block of memory from KERNBASE
  773          * using a single L1 entry.
  774          */
  775         l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
  776 
  777         /* Sanity check the index, KERNBASE should be the first VA */
  778         KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
  779 
  780         /* Find how many pages we have mapped */
  781         for (; l2_slot < Ln_ENTRIES; l2_slot++) {
  782                 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
  783                         break;
  784 
  785                 /* Check locore used L2 blocks */
  786                 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
  787                     ("Invalid bootstrap L2 table"));
  788                 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
  789                     ("Incorrect PA in L2 table"));
  790 
  791                 va += L2_SIZE;
  792                 pa += L2_SIZE;
  793         }
  794 
  795         va = roundup2(va, L1_SIZE);
  796 
  797         freemempos = KERNBASE + kernlen;
  798         freemempos = roundup2(freemempos, PAGE_SIZE);
  799         /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
  800         freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
  801         /* And the l3 tables for the early devmap */
  802         freemempos = pmap_bootstrap_l3(l1pt,
  803             VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
  804 
  805         cpu_tlb_flushID();
  806 
  807 #define alloc_pages(var, np)                                            \
  808         (var) = freemempos;                                             \
  809         freemempos += (np * PAGE_SIZE);                                 \
  810         memset((char *)(var), 0, ((np) * PAGE_SIZE));
  811 
  812         /* Allocate dynamic per-cpu area. */
  813         alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
  814         dpcpu_init((void *)dpcpu, 0);
  815 
  816         /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
  817         alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
  818         msgbufp = (void *)msgbufpv;
  819 
  820         virtual_avail = roundup2(freemempos, L1_SIZE);
  821         virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
  822         kernel_vm_end = virtual_avail;
  823 
  824         pa = pmap_early_vtophys(l1pt, freemempos);
  825 
  826         /* Finish initialising physmap */
  827         map_slot = used_map_slot;
  828         for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
  829             map_slot < (physmap_idx * 2); map_slot += 2) {
  830                 if (physmap[map_slot] == physmap[map_slot + 1])
  831                         continue;
  832 
  833                 /* Have we used the current range? */
  834                 if (physmap[map_slot + 1] <= pa)
  835                         continue;
  836 
  837                 /* Do we need to split the entry? */
  838                 if (physmap[map_slot] < pa) {
  839                         phys_avail[avail_slot] = pa;
  840                         phys_avail[avail_slot + 1] = physmap[map_slot + 1];
  841                 } else {
  842                         phys_avail[avail_slot] = physmap[map_slot];
  843                         phys_avail[avail_slot + 1] = physmap[map_slot + 1];
  844                 }
  845                 physmem += (phys_avail[avail_slot + 1] -
  846                     phys_avail[avail_slot]) >> PAGE_SHIFT;
  847 
  848                 avail_slot += 2;
  849         }
  850         phys_avail[avail_slot] = 0;
  851         phys_avail[avail_slot + 1] = 0;
  852 
  853         /*
  854          * Maxmem isn't the "maximum memory", it's one larger than the
  855          * highest page of the physical address space.  It should be
  856          * called something like "Maxphyspage".
  857          */
  858         Maxmem = atop(phys_avail[avail_slot - 1]);
  859 
  860         cpu_tlb_flushID();
  861 }
  862 
  863 /*
  864  *      Initialize a vm_page's machine-dependent fields.
  865  */
  866 void
  867 pmap_page_init(vm_page_t m)
  868 {
  869 
  870         TAILQ_INIT(&m->md.pv_list);
  871         m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
  872 }
  873 
  874 /*
  875  *      Initialize the pmap module.
  876  *      Called by vm_init, to initialize any structures that the pmap
  877  *      system needs to map virtual memory.
  878  */
  879 void
  880 pmap_init(void)
  881 {
  882         vm_size_t s;
  883         int i, pv_npg;
  884 
  885         /*
  886          * Are large page mappings enabled?
  887          */
  888         TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
  889 
  890         /*
  891          * Initialize the pv chunk list mutex.
  892          */
  893         mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
  894 
  895         /*
  896          * Initialize the pool of pv list locks.
  897          */
  898         for (i = 0; i < NPV_LIST_LOCKS; i++)
  899                 rw_init(&pv_list_locks[i], "pmap pv list");
  900 
  901         /*
  902          * Calculate the size of the pv head table for superpages.
  903          */
  904         pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
  905 
  906         /*
  907          * Allocate memory for the pv head table for superpages.
  908          */
  909         s = (vm_size_t)(pv_npg * sizeof(struct md_page));
  910         s = round_page(s);
  911         pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
  912             M_WAITOK | M_ZERO);
  913         for (i = 0; i < pv_npg; i++)
  914                 TAILQ_INIT(&pv_table[i].pv_list);
  915         TAILQ_INIT(&pv_dummy.pv_list);
  916 }
  917 
  918 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0,
  919     "2MB page mapping counters");
  920 
  921 static u_long pmap_l2_demotions;
  922 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
  923     &pmap_l2_demotions, 0, "2MB page demotions");
  924 
  925 static u_long pmap_l2_p_failures;
  926 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
  927     &pmap_l2_p_failures, 0, "2MB page promotion failures");
  928 
  929 static u_long pmap_l2_promotions;
  930 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
  931     &pmap_l2_promotions, 0, "2MB page promotions");
  932 
  933 /*
  934  * Invalidate a single TLB entry.
  935  */
  936 PMAP_INLINE void
  937 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
  938 {
  939 
  940         sched_pin();
  941         __asm __volatile(
  942             "dsb  ishst         \n"
  943             "tlbi vaae1is, %0   \n"
  944             "dsb  ish           \n"
  945             "isb                \n"
  946             : : "r"(va >> PAGE_SHIFT));
  947         sched_unpin();
  948 }
  949 
  950 PMAP_INLINE void
  951 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
  952 {
  953         vm_offset_t addr;
  954 
  955         sched_pin();
  956         dsb(ishst);
  957         for (addr = sva; addr < eva; addr += PAGE_SIZE) {
  958                 __asm __volatile(
  959                     "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
  960         }
  961         __asm __volatile(
  962             "dsb  ish   \n"
  963             "isb        \n");
  964         sched_unpin();
  965 }
  966 
  967 PMAP_INLINE void
  968 pmap_invalidate_all(pmap_t pmap)
  969 {
  970 
  971         sched_pin();
  972         __asm __volatile(
  973             "dsb  ishst         \n"
  974             "tlbi vmalle1is     \n"
  975             "dsb  ish           \n"
  976             "isb                \n");
  977         sched_unpin();
  978 }
  979 
  980 /*
  981  *      Routine:        pmap_extract
  982  *      Function:
  983  *              Extract the physical page address associated
  984  *              with the given map/virtual_address pair.
  985  */
  986 vm_paddr_t
  987 pmap_extract(pmap_t pmap, vm_offset_t va)
  988 {
  989         pt_entry_t *pte, tpte;
  990         vm_paddr_t pa;
  991         int lvl;
  992 
  993         pa = 0;
  994         PMAP_LOCK(pmap);
  995         /*
  996          * Find the block or page map for this virtual address. pmap_pte
  997          * will return either a valid block/page entry, or NULL.
  998          */
  999         pte = pmap_pte(pmap, va, &lvl);
 1000         if (pte != NULL) {
 1001                 tpte = pmap_load(pte);
 1002                 pa = tpte & ~ATTR_MASK;
 1003                 switch(lvl) {
 1004                 case 1:
 1005                         KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 1006                             ("pmap_extract: Invalid L1 pte found: %lx",
 1007                             tpte & ATTR_DESCR_MASK));
 1008                         pa |= (va & L1_OFFSET);
 1009                         break;
 1010                 case 2:
 1011                         KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 1012                             ("pmap_extract: Invalid L2 pte found: %lx",
 1013                             tpte & ATTR_DESCR_MASK));
 1014                         pa |= (va & L2_OFFSET);
 1015                         break;
 1016                 case 3:
 1017                         KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 1018                             ("pmap_extract: Invalid L3 pte found: %lx",
 1019                             tpte & ATTR_DESCR_MASK));
 1020                         pa |= (va & L3_OFFSET);
 1021                         break;
 1022                 }
 1023         }
 1024         PMAP_UNLOCK(pmap);
 1025         return (pa);
 1026 }
 1027 
 1028 /*
 1029  *      Routine:        pmap_extract_and_hold
 1030  *      Function:
 1031  *              Atomically extract and hold the physical page
 1032  *              with the given pmap and virtual address pair
 1033  *              if that mapping permits the given protection.
 1034  */
 1035 vm_page_t
 1036 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 1037 {
 1038         pt_entry_t *pte, tpte;
 1039         vm_offset_t off;
 1040         vm_paddr_t pa;
 1041         vm_page_t m;
 1042         int lvl;
 1043 
 1044         pa = 0;
 1045         m = NULL;
 1046         PMAP_LOCK(pmap);
 1047 retry:
 1048         pte = pmap_pte(pmap, va, &lvl);
 1049         if (pte != NULL) {
 1050                 tpte = pmap_load(pte);
 1051 
 1052                 KASSERT(lvl > 0 && lvl <= 3,
 1053                     ("pmap_extract_and_hold: Invalid level %d", lvl));
 1054                 CTASSERT(L1_BLOCK == L2_BLOCK);
 1055                 KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
 1056                     (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
 1057                     ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
 1058                      tpte & ATTR_DESCR_MASK));
 1059                 if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
 1060                     ((prot & VM_PROT_WRITE) == 0)) {
 1061                         switch(lvl) {
 1062                         case 1:
 1063                                 off = va & L1_OFFSET;
 1064                                 break;
 1065                         case 2:
 1066                                 off = va & L2_OFFSET;
 1067                                 break;
 1068                         case 3:
 1069                         default:
 1070                                 off = 0;
 1071                         }
 1072                         if (vm_page_pa_tryrelock(pmap,
 1073                             (tpte & ~ATTR_MASK) | off, &pa))
 1074                                 goto retry;
 1075                         m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off);
 1076                         vm_page_hold(m);
 1077                 }
 1078         }
 1079         PA_UNLOCK_COND(pa);
 1080         PMAP_UNLOCK(pmap);
 1081         return (m);
 1082 }
 1083 
 1084 vm_paddr_t
 1085 pmap_kextract(vm_offset_t va)
 1086 {
 1087         pt_entry_t *pte, tpte;
 1088         vm_paddr_t pa;
 1089         int lvl;
 1090 
 1091         if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 1092                 pa = DMAP_TO_PHYS(va);
 1093         } else {
 1094                 pa = 0;
 1095                 pte = pmap_pte(kernel_pmap, va, &lvl);
 1096                 if (pte != NULL) {
 1097                         tpte = pmap_load(pte);
 1098                         pa = tpte & ~ATTR_MASK;
 1099                         switch(lvl) {
 1100                         case 1:
 1101                                 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 1102                                     ("pmap_kextract: Invalid L1 pte found: %lx",
 1103                                     tpte & ATTR_DESCR_MASK));
 1104                                 pa |= (va & L1_OFFSET);
 1105                                 break;
 1106                         case 2:
 1107                                 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 1108                                     ("pmap_kextract: Invalid L2 pte found: %lx",
 1109                                     tpte & ATTR_DESCR_MASK));
 1110                                 pa |= (va & L2_OFFSET);
 1111                                 break;
 1112                         case 3:
 1113                                 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 1114                                     ("pmap_kextract: Invalid L3 pte found: %lx",
 1115                                     tpte & ATTR_DESCR_MASK));
 1116                                 pa |= (va & L3_OFFSET);
 1117                                 break;
 1118                         }
 1119                 }
 1120         }
 1121         return (pa);
 1122 }
 1123 
 1124 /***************************************************
 1125  * Low level mapping routines.....
 1126  ***************************************************/
 1127 
 1128 static void
 1129 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 1130 {
 1131         pd_entry_t *pde;
 1132         pt_entry_t *pte, attr;
 1133         vm_offset_t va;
 1134         int lvl;
 1135 
 1136         KASSERT((pa & L3_OFFSET) == 0,
 1137            ("pmap_kenter: Invalid physical address"));
 1138         KASSERT((sva & L3_OFFSET) == 0,
 1139            ("pmap_kenter: Invalid virtual address"));
 1140         KASSERT((size & PAGE_MASK) == 0,
 1141             ("pmap_kenter: Mapping is not page-sized"));
 1142 
 1143         attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE;
 1144         if (mode == DEVICE_MEMORY)
 1145                 attr |= ATTR_XN;
 1146 
 1147         va = sva;
 1148         while (size != 0) {
 1149                 pde = pmap_pde(kernel_pmap, va, &lvl);
 1150                 KASSERT(pde != NULL,
 1151                     ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
 1152                 KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 1153 
 1154                 pte = pmap_l2_to_l3(pde, va);
 1155                 pmap_load_store(pte, (pa & ~L3_OFFSET) | attr);
 1156                 PTE_SYNC(pte);
 1157 
 1158                 va += PAGE_SIZE;
 1159                 pa += PAGE_SIZE;
 1160                 size -= PAGE_SIZE;
 1161         }
 1162         pmap_invalidate_range(kernel_pmap, sva, va);
 1163 }
 1164 
 1165 void
 1166 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 1167 {
 1168 
 1169         pmap_kenter(sva, size, pa, DEVICE_MEMORY);
 1170 }
 1171 
 1172 /*
 1173  * Remove a page from the kernel pagetables.
 1174  */
 1175 PMAP_INLINE void
 1176 pmap_kremove(vm_offset_t va)
 1177 {
 1178         pt_entry_t *pte;
 1179         int lvl;
 1180 
 1181         pte = pmap_pte(kernel_pmap, va, &lvl);
 1182         KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
 1183         KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 1184 
 1185         if (pmap_l3_valid_cacheable(pmap_load(pte)))
 1186                 cpu_dcache_wb_range(va, L3_SIZE);
 1187         pmap_load_clear(pte);
 1188         PTE_SYNC(pte);
 1189         pmap_invalidate_page(kernel_pmap, va);
 1190 }
 1191 
 1192 void
 1193 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 1194 {
 1195         pt_entry_t *pte;
 1196         vm_offset_t va;
 1197         int lvl;
 1198 
 1199         KASSERT((sva & L3_OFFSET) == 0,
 1200            ("pmap_kremove_device: Invalid virtual address"));
 1201         KASSERT((size & PAGE_MASK) == 0,
 1202             ("pmap_kremove_device: Mapping is not page-sized"));
 1203 
 1204         va = sva;
 1205         while (size != 0) {
 1206                 pte = pmap_pte(kernel_pmap, va, &lvl);
 1207                 KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
 1208                 KASSERT(lvl == 3,
 1209                     ("Invalid device pagetable level: %d != 3", lvl));
 1210                 pmap_load_clear(pte);
 1211                 PTE_SYNC(pte);
 1212 
 1213                 va += PAGE_SIZE;
 1214                 size -= PAGE_SIZE;
 1215         }
 1216         pmap_invalidate_range(kernel_pmap, sva, va);
 1217 }
 1218 
 1219 /*
 1220  *      Used to map a range of physical addresses into kernel
 1221  *      virtual address space.
 1222  *
 1223  *      The value passed in '*virt' is a suggested virtual address for
 1224  *      the mapping. Architectures which can support a direct-mapped
 1225  *      physical to virtual region can return the appropriate address
 1226  *      within that region, leaving '*virt' unchanged. Other
 1227  *      architectures should map the pages starting at '*virt' and
 1228  *      update '*virt' with the first usable address after the mapped
 1229  *      region.
 1230  */
 1231 vm_offset_t
 1232 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 1233 {
 1234         return PHYS_TO_DMAP(start);
 1235 }
 1236 
 1237 
 1238 /*
 1239  * Add a list of wired pages to the kva
 1240  * this routine is only used for temporary
 1241  * kernel mappings that do not need to have
 1242  * page modification or references recorded.
 1243  * Note that old mappings are simply written
 1244  * over.  The page *must* be wired.
 1245  * Note: SMP coherent.  Uses a ranged shootdown IPI.
 1246  */
 1247 void
 1248 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 1249 {
 1250         pd_entry_t *pde;
 1251         pt_entry_t *pte, pa;
 1252         vm_offset_t va;
 1253         vm_page_t m;
 1254         int i, lvl;
 1255 
 1256         va = sva;
 1257         for (i = 0; i < count; i++) {
 1258                 pde = pmap_pde(kernel_pmap, va, &lvl);
 1259                 KASSERT(pde != NULL,
 1260                     ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
 1261                 KASSERT(lvl == 2,
 1262                     ("pmap_qenter: Invalid level %d", lvl));
 1263 
 1264                 m = ma[i];
 1265                 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
 1266                     ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
 1267                 if (m->md.pv_memattr == DEVICE_MEMORY)
 1268                         pa |= ATTR_XN;
 1269                 pte = pmap_l2_to_l3(pde, va);
 1270                 pmap_load_store(pte, pa);
 1271                 PTE_SYNC(pte);
 1272 
 1273                 va += L3_SIZE;
 1274         }
 1275         pmap_invalidate_range(kernel_pmap, sva, va);
 1276 }
 1277 
 1278 /*
 1279  * This routine tears out page mappings from the
 1280  * kernel -- it is meant only for temporary mappings.
 1281  */
 1282 void
 1283 pmap_qremove(vm_offset_t sva, int count)
 1284 {
 1285         pt_entry_t *pte;
 1286         vm_offset_t va;
 1287         int lvl;
 1288 
 1289         KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 1290 
 1291         va = sva;
 1292         while (count-- > 0) {
 1293                 pte = pmap_pte(kernel_pmap, va, &lvl);
 1294                 KASSERT(lvl == 3,
 1295                     ("Invalid device pagetable level: %d != 3", lvl));
 1296                 if (pte != NULL) {
 1297                         if (pmap_l3_valid_cacheable(pmap_load(pte)))
 1298                                 cpu_dcache_wb_range(va, L3_SIZE);
 1299                         pmap_load_clear(pte);
 1300                         PTE_SYNC(pte);
 1301                 }
 1302 
 1303                 va += PAGE_SIZE;
 1304         }
 1305         pmap_invalidate_range(kernel_pmap, sva, va);
 1306 }
 1307 
 1308 /***************************************************
 1309  * Page table page management routines.....
 1310  ***************************************************/
 1311 static __inline void
 1312 pmap_free_zero_pages(struct spglist *free)
 1313 {
 1314         vm_page_t m;
 1315 
 1316         while ((m = SLIST_FIRST(free)) != NULL) {
 1317                 SLIST_REMOVE_HEAD(free, plinks.s.ss);
 1318                 /* Preserve the page's PG_ZERO setting. */
 1319                 vm_page_free_toq(m);
 1320         }
 1321 }
 1322 
 1323 /*
 1324  * Schedule the specified unused page table page to be freed.  Specifically,
 1325  * add the page to the specified list of pages that will be released to the
 1326  * physical memory manager after the TLB has been updated.
 1327  */
 1328 static __inline void
 1329 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
 1330     boolean_t set_PG_ZERO)
 1331 {
 1332 
 1333         if (set_PG_ZERO)
 1334                 m->flags |= PG_ZERO;
 1335         else
 1336                 m->flags &= ~PG_ZERO;
 1337         SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 1338 }
 1339 
 1340 /*
 1341  * Decrements a page table page's wire count, which is used to record the
 1342  * number of valid page table entries within the page.  If the wire count
 1343  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
 1344  * page table page was unmapped and FALSE otherwise.
 1345  */
 1346 static inline boolean_t
 1347 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 1348 {
 1349 
 1350         --m->wire_count;
 1351         if (m->wire_count == 0) {
 1352                 _pmap_unwire_l3(pmap, va, m, free);
 1353                 return (TRUE);
 1354         } else
 1355                 return (FALSE);
 1356 }
 1357 
 1358 static void
 1359 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 1360 {
 1361 
 1362         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1363         /*
 1364          * unmap the page table page
 1365          */
 1366         if (m->pindex >= (NUL2E + NUL1E)) {
 1367                 /* l1 page */
 1368                 pd_entry_t *l0;
 1369 
 1370                 l0 = pmap_l0(pmap, va);
 1371                 pmap_load_clear(l0);
 1372                 PTE_SYNC(l0);
 1373         } else if (m->pindex >= NUL2E) {
 1374                 /* l2 page */
 1375                 pd_entry_t *l1;
 1376 
 1377                 l1 = pmap_l1(pmap, va);
 1378                 pmap_load_clear(l1);
 1379                 PTE_SYNC(l1);
 1380         } else {
 1381                 /* l3 page */
 1382                 pd_entry_t *l2;
 1383 
 1384                 l2 = pmap_l2(pmap, va);
 1385                 pmap_load_clear(l2);
 1386                 PTE_SYNC(l2);
 1387         }
 1388         pmap_resident_count_dec(pmap, 1);
 1389         if (m->pindex < NUL2E) {
 1390                 /* We just released an l3, unhold the matching l2 */
 1391                 pd_entry_t *l1, tl1;
 1392                 vm_page_t l2pg;
 1393 
 1394                 l1 = pmap_l1(pmap, va);
 1395                 tl1 = pmap_load(l1);
 1396                 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 1397                 pmap_unwire_l3(pmap, va, l2pg, free);
 1398         } else if (m->pindex < (NUL2E + NUL1E)) {
 1399                 /* We just released an l2, unhold the matching l1 */
 1400                 pd_entry_t *l0, tl0;
 1401                 vm_page_t l1pg;
 1402 
 1403                 l0 = pmap_l0(pmap, va);
 1404                 tl0 = pmap_load(l0);
 1405                 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 1406                 pmap_unwire_l3(pmap, va, l1pg, free);
 1407         }
 1408         pmap_invalidate_page(pmap, va);
 1409 
 1410         /*
 1411          * This is a release store so that the ordinary store unmapping
 1412          * the page table page is globally performed before TLB shoot-
 1413          * down is begun.
 1414          */
 1415         atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
 1416 
 1417         /*
 1418          * Put page on a list so that it is released after
 1419          * *ALL* TLB shootdown is done
 1420          */
 1421         pmap_add_delayed_free_list(m, free, TRUE);
 1422 }
 1423 
 1424 /*
 1425  * After removing an l3 entry, this routine is used to
 1426  * conditionally free the page, and manage the hold/wire counts.
 1427  */
 1428 static int
 1429 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
 1430     struct spglist *free)
 1431 {
 1432         vm_page_t mpte;
 1433 
 1434         if (va >= VM_MAXUSER_ADDRESS)
 1435                 return (0);
 1436         KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 1437         mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
 1438         return (pmap_unwire_l3(pmap, va, mpte, free));
 1439 }
 1440 
 1441 void
 1442 pmap_pinit0(pmap_t pmap)
 1443 {
 1444 
 1445         PMAP_LOCK_INIT(pmap);
 1446         bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 1447         pmap->pm_l0 = kernel_pmap->pm_l0;
 1448         pmap->pm_root.rt_root = 0;
 1449 }
 1450 
 1451 int
 1452 pmap_pinit(pmap_t pmap)
 1453 {
 1454         vm_paddr_t l0phys;
 1455         vm_page_t l0pt;
 1456 
 1457         /*
 1458          * allocate the l0 page
 1459          */
 1460         while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 1461             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 1462                 VM_WAIT;
 1463 
 1464         l0phys = VM_PAGE_TO_PHYS(l0pt);
 1465         pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
 1466 
 1467         if ((l0pt->flags & PG_ZERO) == 0)
 1468                 pagezero(pmap->pm_l0);
 1469 
 1470         pmap->pm_root.rt_root = 0;
 1471         bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 1472 
 1473         return (1);
 1474 }
 1475 
 1476 /*
 1477  * This routine is called if the desired page table page does not exist.
 1478  *
 1479  * If page table page allocation fails, this routine may sleep before
 1480  * returning NULL.  It sleeps only if a lock pointer was given.
 1481  *
 1482  * Note: If a page allocation fails at page table level two or three,
 1483  * one or two pages may be held during the wait, only to be released
 1484  * afterwards.  This conservative approach is easily argued to avoid
 1485  * race conditions.
 1486  */
 1487 static vm_page_t
 1488 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 1489 {
 1490         vm_page_t m, l1pg, l2pg;
 1491 
 1492         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1493 
 1494         /*
 1495          * Allocate a page table page.
 1496          */
 1497         if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 1498             VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 1499                 if (lockp != NULL) {
 1500                         RELEASE_PV_LIST_LOCK(lockp);
 1501                         PMAP_UNLOCK(pmap);
 1502                         VM_WAIT;
 1503                         PMAP_LOCK(pmap);
 1504                 }
 1505 
 1506                 /*
 1507                  * Indicate the need to retry.  While waiting, the page table
 1508                  * page may have been allocated.
 1509                  */
 1510                 return (NULL);
 1511         }
 1512         if ((m->flags & PG_ZERO) == 0)
 1513                 pmap_zero_page(m);
 1514 
 1515         /*
 1516          * Map the pagetable page into the process address space, if
 1517          * it isn't already there.
 1518          */
 1519 
 1520         if (ptepindex >= (NUL2E + NUL1E)) {
 1521                 pd_entry_t *l0;
 1522                 vm_pindex_t l0index;
 1523 
 1524                 l0index = ptepindex - (NUL2E + NUL1E);
 1525                 l0 = &pmap->pm_l0[l0index];
 1526                 pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
 1527                 PTE_SYNC(l0);
 1528         } else if (ptepindex >= NUL2E) {
 1529                 vm_pindex_t l0index, l1index;
 1530                 pd_entry_t *l0, *l1;
 1531                 pd_entry_t tl0;
 1532 
 1533                 l1index = ptepindex - NUL2E;
 1534                 l0index = l1index >> L0_ENTRIES_SHIFT;
 1535 
 1536                 l0 = &pmap->pm_l0[l0index];
 1537                 tl0 = pmap_load(l0);
 1538                 if (tl0 == 0) {
 1539                         /* recurse for allocating page dir */
 1540                         if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
 1541                             lockp) == NULL) {
 1542                                 --m->wire_count;
 1543                                 /* XXX: release mem barrier? */
 1544                                 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 1545                                 vm_page_free_zero(m);
 1546                                 return (NULL);
 1547                         }
 1548                 } else {
 1549                         l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 1550                         l1pg->wire_count++;
 1551                 }
 1552 
 1553                 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 1554                 l1 = &l1[ptepindex & Ln_ADDR_MASK];
 1555                 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
 1556                 PTE_SYNC(l1);
 1557         } else {
 1558                 vm_pindex_t l0index, l1index;
 1559                 pd_entry_t *l0, *l1, *l2;
 1560                 pd_entry_t tl0, tl1;
 1561 
 1562                 l1index = ptepindex >> Ln_ENTRIES_SHIFT;
 1563                 l0index = l1index >> L0_ENTRIES_SHIFT;
 1564 
 1565                 l0 = &pmap->pm_l0[l0index];
 1566                 tl0 = pmap_load(l0);
 1567                 if (tl0 == 0) {
 1568                         /* recurse for allocating page dir */
 1569                         if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 1570                             lockp) == NULL) {
 1571                                 --m->wire_count;
 1572                                 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 1573                                 vm_page_free_zero(m);
 1574                                 return (NULL);
 1575                         }
 1576                         tl0 = pmap_load(l0);
 1577                         l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 1578                         l1 = &l1[l1index & Ln_ADDR_MASK];
 1579                 } else {
 1580                         l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 1581                         l1 = &l1[l1index & Ln_ADDR_MASK];
 1582                         tl1 = pmap_load(l1);
 1583                         if (tl1 == 0) {
 1584                                 /* recurse for allocating page dir */
 1585                                 if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 1586                                     lockp) == NULL) {
 1587                                         --m->wire_count;
 1588                                         /* XXX: release mem barrier? */
 1589                                         atomic_subtract_int(
 1590                                             &vm_cnt.v_wire_count, 1);
 1591                                         vm_page_free_zero(m);
 1592                                         return (NULL);
 1593                                 }
 1594                         } else {
 1595                                 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 1596                                 l2pg->wire_count++;
 1597                         }
 1598                 }
 1599 
 1600                 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 1601                 l2 = &l2[ptepindex & Ln_ADDR_MASK];
 1602                 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
 1603                 PTE_SYNC(l2);
 1604         }
 1605 
 1606         pmap_resident_count_inc(pmap, 1);
 1607 
 1608         return (m);
 1609 }
 1610 
 1611 static vm_page_t
 1612 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 1613 {
 1614         vm_pindex_t ptepindex;
 1615         pd_entry_t *pde, tpde;
 1616 #ifdef INVARIANTS
 1617         pt_entry_t *pte;
 1618 #endif
 1619         vm_page_t m;
 1620         int lvl;
 1621 
 1622         /*
 1623          * Calculate pagetable page index
 1624          */
 1625         ptepindex = pmap_l2_pindex(va);
 1626 retry:
 1627         /*
 1628          * Get the page directory entry
 1629          */
 1630         pde = pmap_pde(pmap, va, &lvl);
 1631 
 1632         /*
 1633          * If the page table page is mapped, we just increment the hold count,
 1634          * and activate it. If we get a level 2 pde it will point to a level 3
 1635          * table.
 1636          */
 1637         switch (lvl) {
 1638         case -1:
 1639                 break;
 1640         case 0:
 1641 #ifdef INVARIANTS
 1642                 pte = pmap_l0_to_l1(pde, va);
 1643                 KASSERT(pmap_load(pte) == 0,
 1644                     ("pmap_alloc_l3: TODO: l0 superpages"));
 1645 #endif
 1646                 break;
 1647         case 1:
 1648 #ifdef INVARIANTS
 1649                 pte = pmap_l1_to_l2(pde, va);
 1650                 KASSERT(pmap_load(pte) == 0,
 1651                     ("pmap_alloc_l3: TODO: l1 superpages"));
 1652 #endif
 1653                 break;
 1654         case 2:
 1655                 tpde = pmap_load(pde);
 1656                 if (tpde != 0) {
 1657                         m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
 1658                         m->wire_count++;
 1659                         return (m);
 1660                 }
 1661                 break;
 1662         default:
 1663                 panic("pmap_alloc_l3: Invalid level %d", lvl);
 1664         }
 1665 
 1666         /*
 1667          * Here if the pte page isn't mapped, or if it has been deallocated.
 1668          */
 1669         m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 1670         if (m == NULL && lockp != NULL)
 1671                 goto retry;
 1672 
 1673         return (m);
 1674 }
 1675 
 1676 
 1677 /***************************************************
 1678  * Pmap allocation/deallocation routines.
 1679  ***************************************************/
 1680 
 1681 /*
 1682  * Release any resources held by the given physical map.
 1683  * Called when a pmap initialized by pmap_pinit is being released.
 1684  * Should only be called if the map contains no valid mappings.
 1685  */
 1686 void
 1687 pmap_release(pmap_t pmap)
 1688 {
 1689         vm_page_t m;
 1690 
 1691         KASSERT(pmap->pm_stats.resident_count == 0,
 1692             ("pmap_release: pmap resident count %ld != 0",
 1693             pmap->pm_stats.resident_count));
 1694         KASSERT(vm_radix_is_empty(&pmap->pm_root),
 1695             ("pmap_release: pmap has reserved page table page(s)"));
 1696 
 1697         m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
 1698 
 1699         m->wire_count--;
 1700         atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 1701         vm_page_free_zero(m);
 1702 }
 1703 
 1704 static int
 1705 kvm_size(SYSCTL_HANDLER_ARGS)
 1706 {
 1707         unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 1708 
 1709         return sysctl_handle_long(oidp, &ksize, 0, req);
 1710 }
 1711 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
 1712     0, 0, kvm_size, "LU", "Size of KVM");
 1713 
 1714 static int
 1715 kvm_free(SYSCTL_HANDLER_ARGS)
 1716 {
 1717         unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 1718 
 1719         return sysctl_handle_long(oidp, &kfree, 0, req);
 1720 }
 1721 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
 1722     0, 0, kvm_free, "LU", "Amount of KVM free");
 1723 
 1724 /*
 1725  * grow the number of kernel page table entries, if needed
 1726  */
 1727 void
 1728 pmap_growkernel(vm_offset_t addr)
 1729 {
 1730         vm_paddr_t paddr;
 1731         vm_page_t nkpg;
 1732         pd_entry_t *l0, *l1, *l2;
 1733 
 1734         mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 1735 
 1736         addr = roundup2(addr, L2_SIZE);
 1737         if (addr - 1 >= kernel_map->max_offset)
 1738                 addr = kernel_map->max_offset;
 1739         while (kernel_vm_end < addr) {
 1740                 l0 = pmap_l0(kernel_pmap, kernel_vm_end);
 1741                 KASSERT(pmap_load(l0) != 0,
 1742                     ("pmap_growkernel: No level 0 kernel entry"));
 1743 
 1744                 l1 = pmap_l0_to_l1(l0, kernel_vm_end);
 1745                 if (pmap_load(l1) == 0) {
 1746                         /* We need a new PDP entry */
 1747                         nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 1748                             VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 1749                             VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 1750                         if (nkpg == NULL)
 1751                                 panic("pmap_growkernel: no memory to grow kernel");
 1752                         if ((nkpg->flags & PG_ZERO) == 0)
 1753                                 pmap_zero_page(nkpg);
 1754                         paddr = VM_PAGE_TO_PHYS(nkpg);
 1755                         pmap_load_store(l1, paddr | L1_TABLE);
 1756                         PTE_SYNC(l1);
 1757                         continue; /* try again */
 1758                 }
 1759                 l2 = pmap_l1_to_l2(l1, kernel_vm_end);
 1760                 if ((pmap_load(l2) & ATTR_AF) != 0) {
 1761                         kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 1762                         if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 1763                                 kernel_vm_end = kernel_map->max_offset;
 1764                                 break;
 1765                         }
 1766                         continue;
 1767                 }
 1768 
 1769                 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 1770                     VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 1771                     VM_ALLOC_ZERO);
 1772                 if (nkpg == NULL)
 1773                         panic("pmap_growkernel: no memory to grow kernel");
 1774                 if ((nkpg->flags & PG_ZERO) == 0)
 1775                         pmap_zero_page(nkpg);
 1776                 paddr = VM_PAGE_TO_PHYS(nkpg);
 1777                 pmap_load_store(l2, paddr | L2_TABLE);
 1778                 PTE_SYNC(l2);
 1779                 pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 1780 
 1781                 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 1782                 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 1783                         kernel_vm_end = kernel_map->max_offset;
 1784                         break;
 1785                 }
 1786         }
 1787 }
 1788 
 1789 
 1790 /***************************************************
 1791  * page management routines.
 1792  ***************************************************/
 1793 
 1794 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 1795 CTASSERT(_NPCM == 3);
 1796 CTASSERT(_NPCPV == 168);
 1797 
 1798 static __inline struct pv_chunk *
 1799 pv_to_chunk(pv_entry_t pv)
 1800 {
 1801 
 1802         return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 1803 }
 1804 
 1805 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 1806 
 1807 #define PC_FREE0        0xfffffffffffffffful
 1808 #define PC_FREE1        0xfffffffffffffffful
 1809 #define PC_FREE2        0x000000fffffffffful
 1810 
 1811 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 1812 
 1813 #if 0
 1814 #ifdef PV_STATS
 1815 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 1816 
 1817 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 1818         "Current number of pv entry chunks");
 1819 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 1820         "Current number of pv entry chunks allocated");
 1821 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 1822         "Current number of pv entry chunks frees");
 1823 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 1824         "Number of times tried to get a chunk page but failed.");
 1825 
 1826 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 1827 static int pv_entry_spare;
 1828 
 1829 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 1830         "Current number of pv entry frees");
 1831 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 1832         "Current number of pv entry allocs");
 1833 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 1834         "Current number of pv entries");
 1835 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 1836         "Current number of spare pv entries");
 1837 #endif
 1838 #endif /* 0 */
 1839 
 1840 /*
 1841  * We are in a serious low memory condition.  Resort to
 1842  * drastic measures to free some pages so we can allocate
 1843  * another pv entry chunk.
 1844  *
 1845  * Returns NULL if PV entries were reclaimed from the specified pmap.
 1846  *
 1847  * We do not, however, unmap 2mpages because subsequent accesses will
 1848  * allocate per-page pv entries until repromotion occurs, thereby
 1849  * exacerbating the shortage of free pv entries.
 1850  */
 1851 static vm_page_t
 1852 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 1853 {
 1854         struct pch new_tail;
 1855         struct pv_chunk *pc;
 1856         struct md_page *pvh;
 1857         pd_entry_t *pde;
 1858         pmap_t pmap;
 1859         pt_entry_t *pte, tpte;
 1860         pv_entry_t pv;
 1861         vm_offset_t va;
 1862         vm_page_t m, m_pc;
 1863         struct spglist free;
 1864         uint64_t inuse;
 1865         int bit, field, freed, lvl;
 1866 
 1867         PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 1868         KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
 1869         pmap = NULL;
 1870         m_pc = NULL;
 1871         SLIST_INIT(&free);
 1872         TAILQ_INIT(&new_tail);
 1873         mtx_lock(&pv_chunks_mutex);
 1874         while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) {
 1875                 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 1876                 mtx_unlock(&pv_chunks_mutex);
 1877                 if (pmap != pc->pc_pmap) {
 1878                         if (pmap != NULL && pmap != locked_pmap)
 1879                                 PMAP_UNLOCK(pmap);
 1880                         pmap = pc->pc_pmap;
 1881                         /* Avoid deadlock and lock recursion. */
 1882                         if (pmap > locked_pmap) {
 1883                                 RELEASE_PV_LIST_LOCK(lockp);
 1884                                 PMAP_LOCK(pmap);
 1885                         } else if (pmap != locked_pmap &&
 1886                             !PMAP_TRYLOCK(pmap)) {
 1887                                 pmap = NULL;
 1888                                 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 1889                                 mtx_lock(&pv_chunks_mutex);
 1890                                 continue;
 1891                         }
 1892                 }
 1893 
 1894                 /*
 1895                  * Destroy every non-wired, 4 KB page mapping in the chunk.
 1896                  */
 1897                 freed = 0;
 1898                 for (field = 0; field < _NPCM; field++) {
 1899                         for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 1900                             inuse != 0; inuse &= ~(1UL << bit)) {
 1901                                 bit = ffsl(inuse) - 1;
 1902                                 pv = &pc->pc_pventry[field * 64 + bit];
 1903                                 va = pv->pv_va;
 1904                                 pde = pmap_pde(pmap, va, &lvl);
 1905                                 if (lvl != 2)
 1906                                         continue;
 1907                                 pte = pmap_l2_to_l3(pde, va);
 1908                                 tpte = pmap_load(pte);
 1909                                 if ((tpte & ATTR_SW_WIRED) != 0)
 1910                                         continue;
 1911                                 tpte = pmap_load_clear(pte);
 1912                                 PTE_SYNC(pte);
 1913                                 pmap_invalidate_page(pmap, va);
 1914                                 m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
 1915                                 if (pmap_page_dirty(tpte))
 1916                                         vm_page_dirty(m);
 1917                                 if ((tpte & ATTR_AF) != 0)
 1918                                         vm_page_aflag_set(m, PGA_REFERENCED);
 1919                                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 1920                                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 1921                                 m->md.pv_gen++;
 1922                                 if (TAILQ_EMPTY(&m->md.pv_list) &&
 1923                                     (m->flags & PG_FICTITIOUS) == 0) {
 1924                                         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 1925                                         if (TAILQ_EMPTY(&pvh->pv_list)) {
 1926                                                 vm_page_aflag_clear(m,
 1927                                                     PGA_WRITEABLE);
 1928                                         }
 1929                                 }
 1930                                 pc->pc_map[field] |= 1UL << bit;
 1931                                 pmap_unuse_l3(pmap, va, pmap_load(pde), &free);
 1932                                 freed++;
 1933                         }
 1934                 }
 1935                 if (freed == 0) {
 1936                         TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 1937                         mtx_lock(&pv_chunks_mutex);
 1938                         continue;
 1939                 }
 1940                 /* Every freed mapping is for a 4 KB page. */
 1941                 pmap_resident_count_dec(pmap, freed);
 1942                 PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 1943                 PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 1944                 PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 1945                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 1946                 if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
 1947                     pc->pc_map[2] == PC_FREE2) {
 1948                         PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 1949                         PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 1950                         PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 1951                         /* Entire chunk is free; return it. */
 1952                         m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 1953                         dump_drop_page(m_pc->phys_addr);
 1954                         mtx_lock(&pv_chunks_mutex);
 1955                         break;
 1956                 }
 1957                 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 1958                 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 1959                 mtx_lock(&pv_chunks_mutex);
 1960                 /* One freed pv entry in locked_pmap is sufficient. */
 1961                 if (pmap == locked_pmap)
 1962                         break;
 1963         }
 1964         TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 1965         mtx_unlock(&pv_chunks_mutex);
 1966         if (pmap != NULL && pmap != locked_pmap)
 1967                 PMAP_UNLOCK(pmap);
 1968         if (m_pc == NULL && !SLIST_EMPTY(&free)) {
 1969                 m_pc = SLIST_FIRST(&free);
 1970                 SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 1971                 /* Recycle a freed page table page. */
 1972                 m_pc->wire_count = 1;
 1973                 atomic_add_int(&vm_cnt.v_wire_count, 1);
 1974         }
 1975         pmap_free_zero_pages(&free);
 1976         return (m_pc);
 1977 }
 1978 
 1979 /*
 1980  * free the pv_entry back to the free list
 1981  */
 1982 static void
 1983 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 1984 {
 1985         struct pv_chunk *pc;
 1986         int idx, field, bit;
 1987 
 1988         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1989         PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 1990         PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 1991         PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 1992         pc = pv_to_chunk(pv);
 1993         idx = pv - &pc->pc_pventry[0];
 1994         field = idx / 64;
 1995         bit = idx % 64;
 1996         pc->pc_map[field] |= 1ul << bit;
 1997         if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 1998             pc->pc_map[2] != PC_FREE2) {
 1999                 /* 98% of the time, pc is already at the head of the list. */
 2000                 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 2001                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2002                         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2003                 }
 2004                 return;
 2005         }
 2006         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2007         free_pv_chunk(pc);
 2008 }
 2009 
 2010 static void
 2011 free_pv_chunk(struct pv_chunk *pc)
 2012 {
 2013         vm_page_t m;
 2014 
 2015         mtx_lock(&pv_chunks_mutex);
 2016         TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 2017         mtx_unlock(&pv_chunks_mutex);
 2018         PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 2019         PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 2020         PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 2021         /* entire chunk is free, return it */
 2022         m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 2023         dump_drop_page(m->phys_addr);
 2024         vm_page_unwire(m, PQ_NONE);
 2025         vm_page_free(m);
 2026 }
 2027 
 2028 /*
 2029  * Returns a new PV entry, allocating a new PV chunk from the system when
 2030  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
 2031  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
 2032  * returned.
 2033  *
 2034  * The given PV list lock may be released.
 2035  */
 2036 static pv_entry_t
 2037 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 2038 {
 2039         int bit, field;
 2040         pv_entry_t pv;
 2041         struct pv_chunk *pc;
 2042         vm_page_t m;
 2043 
 2044         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2045         PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 2046 retry:
 2047         pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 2048         if (pc != NULL) {
 2049                 for (field = 0; field < _NPCM; field++) {
 2050                         if (pc->pc_map[field]) {
 2051                                 bit = ffsl(pc->pc_map[field]) - 1;
 2052                                 break;
 2053                         }
 2054                 }
 2055                 if (field < _NPCM) {
 2056                         pv = &pc->pc_pventry[field * 64 + bit];
 2057                         pc->pc_map[field] &= ~(1ul << bit);
 2058                         /* If this was the last item, move it to tail */
 2059                         if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 2060                             pc->pc_map[2] == 0) {
 2061                                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2062                                 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 2063                                     pc_list);
 2064                         }
 2065                         PV_STAT(atomic_add_long(&pv_entry_count, 1));
 2066                         PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 2067                         return (pv);
 2068                 }
 2069         }
 2070         /* No free items, allocate another chunk */
 2071         m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 2072             VM_ALLOC_WIRED);
 2073         if (m == NULL) {
 2074                 if (lockp == NULL) {
 2075                         PV_STAT(pc_chunk_tryfail++);
 2076                         return (NULL);
 2077                 }
 2078                 m = reclaim_pv_chunk(pmap, lockp);
 2079                 if (m == NULL)
 2080                         goto retry;
 2081         }
 2082         PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 2083         PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 2084         dump_add_page(m->phys_addr);
 2085         pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 2086         pc->pc_pmap = pmap;
 2087         pc->pc_map[0] = PC_FREE0 & ~1ul;        /* preallocated bit 0 */
 2088         pc->pc_map[1] = PC_FREE1;
 2089         pc->pc_map[2] = PC_FREE2;
 2090         mtx_lock(&pv_chunks_mutex);
 2091         TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 2092         mtx_unlock(&pv_chunks_mutex);
 2093         pv = &pc->pc_pventry[0];
 2094         TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2095         PV_STAT(atomic_add_long(&pv_entry_count, 1));
 2096         PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 2097         return (pv);
 2098 }
 2099 
 2100 /*
 2101  * Ensure that the number of spare PV entries in the specified pmap meets or
 2102  * exceeds the given count, "needed".
 2103  *
 2104  * The given PV list lock may be released.
 2105  */
 2106 static void
 2107 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
 2108 {
 2109         struct pch new_tail;
 2110         struct pv_chunk *pc;
 2111         int avail, free;
 2112         vm_page_t m;
 2113 
 2114         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2115         KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
 2116 
 2117         /*
 2118          * Newly allocated PV chunks must be stored in a private list until
 2119          * the required number of PV chunks have been allocated.  Otherwise,
 2120          * reclaim_pv_chunk() could recycle one of these chunks.  In
 2121          * contrast, these chunks must be added to the pmap upon allocation.
 2122          */
 2123         TAILQ_INIT(&new_tail);
 2124 retry:
 2125         avail = 0;
 2126         TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
 2127                 bit_count((bitstr_t *)pc->pc_map, 0,
 2128                     sizeof(pc->pc_map) * NBBY, &free);
 2129                 if (free == 0)
 2130                         break;
 2131                 avail += free;
 2132                 if (avail >= needed)
 2133                         break;
 2134         }
 2135         for (; avail < needed; avail += _NPCPV) {
 2136                 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 2137                     VM_ALLOC_WIRED);
 2138                 if (m == NULL) {
 2139                         m = reclaim_pv_chunk(pmap, lockp);
 2140                         if (m == NULL)
 2141                                 goto retry;
 2142                 }
 2143                 PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 2144                 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 2145                 dump_add_page(m->phys_addr);
 2146                 pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 2147                 pc->pc_pmap = pmap;
 2148                 pc->pc_map[0] = PC_FREE0;
 2149                 pc->pc_map[1] = PC_FREE1;
 2150                 pc->pc_map[2] = PC_FREE2;
 2151                 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 2152                 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 2153                 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
 2154         }
 2155         if (!TAILQ_EMPTY(&new_tail)) {
 2156                 mtx_lock(&pv_chunks_mutex);
 2157                 TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 2158                 mtx_unlock(&pv_chunks_mutex);
 2159         }
 2160 }
 2161 
 2162 /*
 2163  * First find and then remove the pv entry for the specified pmap and virtual
 2164  * address from the specified pv list.  Returns the pv entry if found and NULL
 2165  * otherwise.  This operation can be performed on pv lists for either 4KB or
 2166  * 2MB page mappings.
 2167  */
 2168 static __inline pv_entry_t
 2169 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 2170 {
 2171         pv_entry_t pv;
 2172 
 2173         TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 2174                 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 2175                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 2176                         pvh->pv_gen++;
 2177                         break;
 2178                 }
 2179         }
 2180         return (pv);
 2181 }
 2182 
 2183 /*
 2184  * After demotion from a 2MB page mapping to 512 4KB page mappings,
 2185  * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
 2186  * entries for each of the 4KB page mappings.
 2187  */
 2188 static void
 2189 pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 2190     struct rwlock **lockp)
 2191 {
 2192         struct md_page *pvh;
 2193         struct pv_chunk *pc;
 2194         pv_entry_t pv;
 2195         vm_offset_t va_last;
 2196         vm_page_t m;
 2197         int bit, field;
 2198 
 2199         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2200         KASSERT((pa & L2_OFFSET) == 0,
 2201             ("pmap_pv_demote_l2: pa is not 2mpage aligned"));
 2202         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 2203 
 2204         /*
 2205          * Transfer the 2mpage's pv entry for this mapping to the first
 2206          * page's pv list.  Once this transfer begins, the pv list lock
 2207          * must not be released until the last pv entry is reinstantiated.
 2208          */
 2209         pvh = pa_to_pvh(pa);
 2210         va = va & ~L2_OFFSET;
 2211         pv = pmap_pvh_remove(pvh, pmap, va);
 2212         KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
 2213         m = PHYS_TO_VM_PAGE(pa);
 2214         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 2215         m->md.pv_gen++;
 2216         /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */
 2217         PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1));
 2218         va_last = va + L2_SIZE - PAGE_SIZE;
 2219         for (;;) {
 2220                 pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 2221                 KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
 2222                     pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
 2223                 for (field = 0; field < _NPCM; field++) {
 2224                         while (pc->pc_map[field]) {
 2225                                 bit = ffsl(pc->pc_map[field]) - 1;
 2226                                 pc->pc_map[field] &= ~(1ul << bit);
 2227                                 pv = &pc->pc_pventry[field * 64 + bit];
 2228                                 va += PAGE_SIZE;
 2229                                 pv->pv_va = va;
 2230                                 m++;
 2231                                 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2232                             ("pmap_pv_demote_l2: page %p is not managed", m));
 2233                                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 2234                                 m->md.pv_gen++;
 2235                                 if (va == va_last)
 2236                                         goto out;
 2237                         }
 2238                 }
 2239                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2240                 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 2241         }
 2242 out:
 2243         if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
 2244                 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 2245                 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 2246         }
 2247         PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1));
 2248         PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1));
 2249 }
 2250 
 2251 /*
 2252  * First find and then destroy the pv entry for the specified pmap and virtual
 2253  * address.  This operation can be performed on pv lists for either 4KB or 2MB
 2254  * page mappings.
 2255  */
 2256 static void
 2257 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 2258 {
 2259         pv_entry_t pv;
 2260 
 2261         pv = pmap_pvh_remove(pvh, pmap, va);
 2262         KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 2263         free_pv_entry(pmap, pv);
 2264 }
 2265 
 2266 /*
 2267  * Conditionally create the PV entry for a 4KB page mapping if the required
 2268  * memory can be allocated without resorting to reclamation.
 2269  */
 2270 static boolean_t
 2271 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
 2272     struct rwlock **lockp)
 2273 {
 2274         pv_entry_t pv;
 2275 
 2276         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2277         /* Pass NULL instead of the lock pointer to disable reclamation. */
 2278         if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 2279                 pv->pv_va = va;
 2280                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 2281                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 2282                 m->md.pv_gen++;
 2283                 return (TRUE);
 2284         } else
 2285                 return (FALSE);
 2286 }
 2287 
 2288 /*
 2289  * pmap_remove_l3: do the things to unmap a page in a process
 2290  */
 2291 static int
 2292 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
 2293     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 2294 {
 2295         struct md_page *pvh;
 2296         pt_entry_t old_l3;
 2297         vm_page_t m;
 2298 
 2299         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2300         if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
 2301                 cpu_dcache_wb_range(va, L3_SIZE);
 2302         old_l3 = pmap_load_clear(l3);
 2303         PTE_SYNC(l3);
 2304         pmap_invalidate_page(pmap, va);
 2305         if (old_l3 & ATTR_SW_WIRED)
 2306                 pmap->pm_stats.wired_count -= 1;
 2307         pmap_resident_count_dec(pmap, 1);
 2308         if (old_l3 & ATTR_SW_MANAGED) {
 2309                 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
 2310                 if (pmap_page_dirty(old_l3))
 2311                         vm_page_dirty(m);
 2312                 if (old_l3 & ATTR_AF)
 2313                         vm_page_aflag_set(m, PGA_REFERENCED);
 2314                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 2315                 pmap_pvh_free(&m->md, pmap, va);
 2316                 if (TAILQ_EMPTY(&m->md.pv_list) &&
 2317                     (m->flags & PG_FICTITIOUS) == 0) {
 2318                         pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2319                         if (TAILQ_EMPTY(&pvh->pv_list))
 2320                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
 2321                 }
 2322         }
 2323         return (pmap_unuse_l3(pmap, va, l2e, free));
 2324 }
 2325 
 2326 /*
 2327  *      Remove the given range of addresses from the specified map.
 2328  *
 2329  *      It is assumed that the start and end are properly
 2330  *      rounded to the page size.
 2331  */
 2332 void
 2333 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2334 {
 2335         struct rwlock *lock;
 2336         vm_offset_t va, va_next;
 2337         pd_entry_t *l0, *l1, *l2;
 2338         pt_entry_t l3_paddr, *l3;
 2339         struct spglist free;
 2340 
 2341         /*
 2342          * Perform an unsynchronized read.  This is, however, safe.
 2343          */
 2344         if (pmap->pm_stats.resident_count == 0)
 2345                 return;
 2346 
 2347         SLIST_INIT(&free);
 2348 
 2349         PMAP_LOCK(pmap);
 2350 
 2351         lock = NULL;
 2352         for (; sva < eva; sva = va_next) {
 2353 
 2354                 if (pmap->pm_stats.resident_count == 0)
 2355                         break;
 2356 
 2357                 l0 = pmap_l0(pmap, sva);
 2358                 if (pmap_load(l0) == 0) {
 2359                         va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 2360                         if (va_next < sva)
 2361                                 va_next = eva;
 2362                         continue;
 2363                 }
 2364 
 2365                 l1 = pmap_l0_to_l1(l0, sva);
 2366                 if (pmap_load(l1) == 0) {
 2367                         va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 2368                         if (va_next < sva)
 2369                                 va_next = eva;
 2370                         continue;
 2371                 }
 2372 
 2373                 /*
 2374                  * Calculate index for next page table.
 2375                  */
 2376                 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 2377                 if (va_next < sva)
 2378                         va_next = eva;
 2379 
 2380                 l2 = pmap_l1_to_l2(l1, sva);
 2381                 if (l2 == NULL)
 2382                         continue;
 2383 
 2384                 l3_paddr = pmap_load(l2);
 2385 
 2386                 if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) {
 2387                         /* TODO: Add pmap_remove_l2 */
 2388                         if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET,
 2389                             &lock) == NULL)
 2390                                 continue;
 2391                         l3_paddr = pmap_load(l2);
 2392                 }
 2393 
 2394                 /*
 2395                  * Weed out invalid mappings.
 2396                  */
 2397                 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
 2398                         continue;
 2399 
 2400                 /*
 2401                  * Limit our scan to either the end of the va represented
 2402                  * by the current page table page, or to the end of the
 2403                  * range being removed.
 2404                  */
 2405                 if (va_next > eva)
 2406                         va_next = eva;
 2407 
 2408                 va = va_next;
 2409                 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 2410                     sva += L3_SIZE) {
 2411                         if (l3 == NULL)
 2412                                 panic("l3 == NULL");
 2413                         if (pmap_load(l3) == 0) {
 2414                                 if (va != va_next) {
 2415                                         pmap_invalidate_range(pmap, va, sva);
 2416                                         va = va_next;
 2417                                 }
 2418                                 continue;
 2419                         }
 2420                         if (va == va_next)
 2421                                 va = sva;
 2422                         if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
 2423                             &lock)) {
 2424                                 sva += L3_SIZE;
 2425                                 break;
 2426                         }
 2427                 }
 2428                 if (va != va_next)
 2429                         pmap_invalidate_range(pmap, va, sva);
 2430         }
 2431         if (lock != NULL)
 2432                 rw_wunlock(lock);
 2433         PMAP_UNLOCK(pmap);
 2434         pmap_free_zero_pages(&free);
 2435 }
 2436 
 2437 /*
 2438  *      Routine:        pmap_remove_all
 2439  *      Function:
 2440  *              Removes this physical page from
 2441  *              all physical maps in which it resides.
 2442  *              Reflects back modify bits to the pager.
 2443  *
 2444  *      Notes:
 2445  *              Original versions of this routine were very
 2446  *              inefficient because they iteratively called
 2447  *              pmap_remove (slow...)
 2448  */
 2449 
 2450 void
 2451 pmap_remove_all(vm_page_t m)
 2452 {
 2453         struct md_page *pvh;
 2454         pv_entry_t pv;
 2455         pmap_t pmap;
 2456         struct rwlock *lock;
 2457         pd_entry_t *pde, tpde;
 2458         pt_entry_t *pte, tpte;
 2459         vm_offset_t va;
 2460         struct spglist free;
 2461         int lvl, pvh_gen, md_gen;
 2462 
 2463         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2464             ("pmap_remove_all: page %p is not managed", m));
 2465         SLIST_INIT(&free);
 2466         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 2467         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 2468             pa_to_pvh(VM_PAGE_TO_PHYS(m));
 2469 retry:
 2470         rw_wlock(lock);
 2471         while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 2472                 pmap = PV_PMAP(pv);
 2473                 if (!PMAP_TRYLOCK(pmap)) {
 2474                         pvh_gen = pvh->pv_gen;
 2475                         rw_wunlock(lock);
 2476                         PMAP_LOCK(pmap);
 2477                         rw_wlock(lock);
 2478                         if (pvh_gen != pvh->pv_gen) {
 2479                                 rw_wunlock(lock);
 2480                                 PMAP_UNLOCK(pmap);
 2481                                 goto retry;
 2482                         }
 2483                 }
 2484                 va = pv->pv_va;
 2485                 pte = pmap_pte(pmap, va, &lvl);
 2486                 KASSERT(pte != NULL,
 2487                     ("pmap_remove_all: no page table entry found"));
 2488                 KASSERT(lvl == 2,
 2489                     ("pmap_remove_all: invalid pte level %d", lvl));
 2490 
 2491                 pmap_demote_l2_locked(pmap, pte, va, &lock);
 2492                 PMAP_UNLOCK(pmap);
 2493         }
 2494         while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 2495                 pmap = PV_PMAP(pv);
 2496                 if (!PMAP_TRYLOCK(pmap)) {
 2497                         pvh_gen = pvh->pv_gen;
 2498                         md_gen = m->md.pv_gen;
 2499                         rw_wunlock(lock);
 2500                         PMAP_LOCK(pmap);
 2501                         rw_wlock(lock);
 2502                         if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 2503                                 rw_wunlock(lock);
 2504                                 PMAP_UNLOCK(pmap);
 2505                                 goto retry;
 2506                         }
 2507                 }
 2508                 pmap_resident_count_dec(pmap, 1);
 2509 
 2510                 pde = pmap_pde(pmap, pv->pv_va, &lvl);
 2511                 KASSERT(pde != NULL,
 2512                     ("pmap_remove_all: no page directory entry found"));
 2513                 KASSERT(lvl == 2,
 2514                     ("pmap_remove_all: invalid pde level %d", lvl));
 2515                 tpde = pmap_load(pde);
 2516 
 2517                 pte = pmap_l2_to_l3(pde, pv->pv_va);
 2518                 tpte = pmap_load(pte);
 2519                 if (pmap_is_current(pmap) &&
 2520                     pmap_l3_valid_cacheable(tpte))
 2521                         cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 2522                 pmap_load_clear(pte);
 2523                 PTE_SYNC(pte);
 2524                 pmap_invalidate_page(pmap, pv->pv_va);
 2525                 if (tpte & ATTR_SW_WIRED)
 2526                         pmap->pm_stats.wired_count--;
 2527                 if ((tpte & ATTR_AF) != 0)
 2528                         vm_page_aflag_set(m, PGA_REFERENCED);
 2529 
 2530                 /*
 2531                  * Update the vm_page_t clean and reference bits.
 2532                  */
 2533                 if (pmap_page_dirty(tpte))
 2534                         vm_page_dirty(m);
 2535                 pmap_unuse_l3(pmap, pv->pv_va, tpde, &free);
 2536                 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 2537                 m->md.pv_gen++;
 2538                 free_pv_entry(pmap, pv);
 2539                 PMAP_UNLOCK(pmap);
 2540         }
 2541         vm_page_aflag_clear(m, PGA_WRITEABLE);
 2542         rw_wunlock(lock);
 2543         pmap_free_zero_pages(&free);
 2544 }
 2545 
 2546 /*
 2547  *      Set the physical protection on the
 2548  *      specified range of this map as requested.
 2549  */
 2550 void
 2551 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 2552 {
 2553         vm_offset_t va, va_next;
 2554         pd_entry_t *l0, *l1, *l2;
 2555         pt_entry_t *l3p, l3, nbits;
 2556 
 2557         KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
 2558         if (prot == VM_PROT_NONE) {
 2559                 pmap_remove(pmap, sva, eva);
 2560                 return;
 2561         }
 2562 
 2563         if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) ==
 2564             (VM_PROT_WRITE | VM_PROT_EXECUTE))
 2565                 return;
 2566 
 2567         PMAP_LOCK(pmap);
 2568         for (; sva < eva; sva = va_next) {
 2569 
 2570                 l0 = pmap_l0(pmap, sva);
 2571                 if (pmap_load(l0) == 0) {
 2572                         va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 2573                         if (va_next < sva)
 2574                                 va_next = eva;
 2575                         continue;
 2576                 }
 2577 
 2578                 l1 = pmap_l0_to_l1(l0, sva);
 2579                 if (pmap_load(l1) == 0) {
 2580                         va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 2581                         if (va_next < sva)
 2582                                 va_next = eva;
 2583                         continue;
 2584                 }
 2585 
 2586                 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 2587                 if (va_next < sva)
 2588                         va_next = eva;
 2589 
 2590                 l2 = pmap_l1_to_l2(l1, sva);
 2591                 if (pmap_load(l2) == 0)
 2592                         continue;
 2593 
 2594                 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
 2595                         l3p = pmap_demote_l2(pmap, l2, sva);
 2596                         if (l3p == NULL)
 2597                                 continue;
 2598                 }
 2599                 KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
 2600                     ("pmap_protect: Invalid L2 entry after demotion"));
 2601 
 2602                 if (va_next > eva)
 2603                         va_next = eva;
 2604 
 2605                 va = va_next;
 2606                 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 2607                     sva += L3_SIZE) {
 2608                         l3 = pmap_load(l3p);
 2609                         if (!pmap_l3_valid(l3))
 2610                                 continue;
 2611 
 2612                         nbits = 0;
 2613                         if ((prot & VM_PROT_WRITE) == 0) {
 2614                                 if ((l3 & ATTR_SW_MANAGED) &&
 2615                                     pmap_page_dirty(l3)) {
 2616                                         vm_page_dirty(PHYS_TO_VM_PAGE(l3 &
 2617                                             ~ATTR_MASK));
 2618                                 }
 2619                                 nbits |= ATTR_AP(ATTR_AP_RO);
 2620                         }
 2621                         if ((prot & VM_PROT_EXECUTE) == 0)
 2622                                 nbits |= ATTR_XN;
 2623 
 2624                         pmap_set(l3p, nbits);
 2625                         PTE_SYNC(l3p);
 2626                         /* XXX: Use pmap_invalidate_range */
 2627                         pmap_invalidate_page(pmap, sva);
 2628                 }
 2629         }
 2630         PMAP_UNLOCK(pmap);
 2631 }
 2632 
 2633 /*
 2634  * Inserts the specified page table page into the specified pmap's collection
 2635  * of idle page table pages.  Each of a pmap's page table pages is responsible
 2636  * for mapping a distinct range of virtual addresses.  The pmap's collection is
 2637  * ordered by this virtual address range.
 2638  */
 2639 static __inline int
 2640 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 2641 {
 2642 
 2643         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2644         return (vm_radix_insert(&pmap->pm_root, mpte));
 2645 }
 2646 
 2647 /*
 2648  * Removes the page table page mapping the specified virtual address from the
 2649  * specified pmap's collection of idle page table pages, and returns it.
 2650  * Otherwise, returns NULL if there is no page table page corresponding to the
 2651  * specified virtual address.
 2652  */
 2653 static __inline vm_page_t
 2654 pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
 2655 {
 2656 
 2657         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2658         return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
 2659 }
 2660 
 2661 /*
 2662  * Performs a break-before-make update of a pmap entry. This is needed when
 2663  * either promoting or demoting pages to ensure the TLB doesn't get into an
 2664  * inconsistent state.
 2665  */
 2666 static void
 2667 pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte,
 2668     vm_offset_t va, vm_size_t size)
 2669 {
 2670         register_t intr;
 2671 
 2672         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2673 
 2674         /*
 2675          * Ensure we don't get switched out with the page table in an
 2676          * inconsistent state. We also need to ensure no interrupts fire
 2677          * as they may make use of an address we are about to invalidate.
 2678          */
 2679         intr = intr_disable();
 2680         critical_enter();
 2681 
 2682         /* Clear the old mapping */
 2683         pmap_load_clear(pte);
 2684         PTE_SYNC(pte);
 2685         pmap_invalidate_range(pmap, va, va + size);
 2686 
 2687         /* Create the new mapping */
 2688         pmap_load_store(pte, newpte);
 2689         PTE_SYNC(pte);
 2690 
 2691         critical_exit();
 2692         intr_restore(intr);
 2693 }
 2694 
 2695 #if VM_NRESERVLEVEL > 0
 2696 /*
 2697  * After promotion from 512 4KB page mappings to a single 2MB page mapping,
 2698  * replace the many pv entries for the 4KB page mappings by a single pv entry
 2699  * for the 2MB page mapping.
 2700  */
 2701 static void
 2702 pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 2703     struct rwlock **lockp)
 2704 {
 2705         struct md_page *pvh;
 2706         pv_entry_t pv;
 2707         vm_offset_t va_last;
 2708         vm_page_t m;
 2709 
 2710         KASSERT((pa & L2_OFFSET) == 0,
 2711             ("pmap_pv_promote_l2: pa is not 2mpage aligned"));
 2712         CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 2713 
 2714         /*
 2715          * Transfer the first page's pv entry for this mapping to the 2mpage's
 2716          * pv list.  Aside from avoiding the cost of a call to get_pv_entry(),
 2717          * a transfer avoids the possibility that get_pv_entry() calls
 2718          * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
 2719          * mappings that is being promoted.
 2720          */
 2721         m = PHYS_TO_VM_PAGE(pa);
 2722         va = va & ~L2_OFFSET;
 2723         pv = pmap_pvh_remove(&m->md, pmap, va);
 2724         KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found"));
 2725         pvh = pa_to_pvh(pa);
 2726         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 2727         pvh->pv_gen++;
 2728         /* Free the remaining NPTEPG - 1 pv entries. */
 2729         va_last = va + L2_SIZE - PAGE_SIZE;
 2730         do {
 2731                 m++;
 2732                 va += PAGE_SIZE;
 2733                 pmap_pvh_free(&m->md, pmap, va);
 2734         } while (va < va_last);
 2735 }
 2736 
 2737 /*
 2738  * Tries to promote the 512, contiguous 4KB page mappings that are within a
 2739  * single level 2 table entry to a single 2MB page mapping.  For promotion
 2740  * to occur, two conditions must be met: (1) the 4KB page mappings must map
 2741  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
 2742  * identical characteristics.
 2743  */
 2744 static void
 2745 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va,
 2746     struct rwlock **lockp)
 2747 {
 2748         pt_entry_t *firstl3, *l3, newl2, oldl3, pa;
 2749         vm_page_t mpte;
 2750         vm_offset_t sva;
 2751 
 2752         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 2753 
 2754         sva = va & ~L2_OFFSET;
 2755         firstl3 = pmap_l2_to_l3(l2, sva);
 2756         newl2 = pmap_load(firstl3);
 2757 
 2758         /* Check the alingment is valid */
 2759         if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) {
 2760                 atomic_add_long(&pmap_l2_p_failures, 1);
 2761                 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
 2762                     " in pmap %p", va, pmap);
 2763                 return;
 2764         }
 2765 
 2766         pa = newl2 + L2_SIZE - PAGE_SIZE;
 2767         for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) {
 2768                 oldl3 = pmap_load(l3);
 2769                 if (oldl3 != pa) {
 2770                         atomic_add_long(&pmap_l2_p_failures, 1);
 2771                         CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx"
 2772                             " in pmap %p", va, pmap);
 2773                         return;
 2774                 }
 2775                 pa -= PAGE_SIZE;
 2776         }
 2777 
 2778         /*
 2779          * Save the page table page in its current state until the L2
 2780          * mapping the superpage is demoted by pmap_demote_l2() or
 2781          * destroyed by pmap_remove_l3().
 2782          */
 2783         mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
 2784         KASSERT(mpte >= vm_page_array &&
 2785             mpte < &vm_page_array[vm_page_array_size],
 2786             ("pmap_promote_l2: page table page is out of range"));
 2787         KASSERT(mpte->pindex == pmap_l2_pindex(va),
 2788             ("pmap_promote_l2: page table page's pindex is wrong"));
 2789         if (pmap_insert_pt_page(pmap, mpte)) {
 2790                 atomic_add_long(&pmap_l2_p_failures, 1);
 2791                 CTR2(KTR_PMAP,
 2792                     "pmap_promote_l2: failure for va %#lx in pmap %p", va,
 2793                     pmap);
 2794                 return;
 2795         }
 2796 
 2797         if ((newl2 & ATTR_SW_MANAGED) != 0)
 2798                 pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp);
 2799 
 2800         newl2 &= ~ATTR_DESCR_MASK;
 2801         newl2 |= L2_BLOCK;
 2802 
 2803         pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE);
 2804 
 2805         atomic_add_long(&pmap_l2_promotions, 1);
 2806         CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va,
 2807                     pmap);
 2808 }
 2809 #endif /* VM_NRESERVLEVEL > 0 */
 2810 
 2811 /*
 2812  *      Insert the given physical page (p) at
 2813  *      the specified virtual address (v) in the
 2814  *      target physical map with the protection requested.
 2815  *
 2816  *      If specified, the page will be wired down, meaning
 2817  *      that the related pte can not be reclaimed.
 2818  *
 2819  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 2820  *      or lose information.  That is, this routine must actually
 2821  *      insert this page into the given map NOW.
 2822  */
 2823 int
 2824 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 2825     u_int flags, int8_t psind __unused)
 2826 {
 2827         struct rwlock *lock;
 2828         pd_entry_t *pde;
 2829         pt_entry_t new_l3, orig_l3;
 2830         pt_entry_t *l2, *l3;
 2831         pv_entry_t pv;
 2832         vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
 2833         vm_page_t mpte, om, l1_m, l2_m, l3_m;
 2834         boolean_t nosleep;
 2835         int lvl;
 2836 
 2837         va = trunc_page(va);
 2838         if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 2839                 VM_OBJECT_ASSERT_LOCKED(m->object);
 2840         pa = VM_PAGE_TO_PHYS(m);
 2841         new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
 2842             L3_PAGE);
 2843         if ((prot & VM_PROT_WRITE) == 0)
 2844                 new_l3 |= ATTR_AP(ATTR_AP_RO);
 2845         if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY)
 2846                 new_l3 |= ATTR_XN;
 2847         if ((flags & PMAP_ENTER_WIRED) != 0)
 2848                 new_l3 |= ATTR_SW_WIRED;
 2849         if ((va >> 63) == 0)
 2850                 new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN;
 2851 
 2852         CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 2853 
 2854         mpte = NULL;
 2855 
 2856         lock = NULL;
 2857         PMAP_LOCK(pmap);
 2858 
 2859         pde = pmap_pde(pmap, va, &lvl);
 2860         if (pde != NULL && lvl == 1) {
 2861                 l2 = pmap_l1_to_l2(pde, va);
 2862                 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK &&
 2863                     (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET,
 2864                     &lock)) != NULL) {
 2865                         l3 = &l3[pmap_l3_index(va)];
 2866                         if (va < VM_MAXUSER_ADDRESS) {
 2867                                 mpte = PHYS_TO_VM_PAGE(
 2868                                     pmap_load(l2) & ~ATTR_MASK);
 2869                                 mpte->wire_count++;
 2870                         }
 2871                         goto havel3;
 2872                 }
 2873         }
 2874 
 2875         if (va < VM_MAXUSER_ADDRESS) {
 2876                 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 2877                 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
 2878                 if (mpte == NULL && nosleep) {
 2879                         CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 2880                         if (lock != NULL)
 2881                                 rw_wunlock(lock);
 2882                         PMAP_UNLOCK(pmap);
 2883                         return (KERN_RESOURCE_SHORTAGE);
 2884                 }
 2885                 pde = pmap_pde(pmap, va, &lvl);
 2886                 KASSERT(pde != NULL,
 2887                     ("pmap_enter: Invalid page entry, va: 0x%lx", va));
 2888                 KASSERT(lvl == 2,
 2889                     ("pmap_enter: Invalid level %d", lvl));
 2890 
 2891                 l3 = pmap_l2_to_l3(pde, va);
 2892         } else {
 2893                 /*
 2894                  * If we get a level 2 pde it must point to a level 3 entry
 2895                  * otherwise we will need to create the intermediate tables
 2896                  */
 2897                 if (lvl < 2) {
 2898                         switch(lvl) {
 2899                         default:
 2900                         case -1:
 2901                                 /* Get the l0 pde to update */
 2902                                 pde = pmap_l0(pmap, va);
 2903                                 KASSERT(pde != NULL, ("..."));
 2904 
 2905                                 l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 2906                                     VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 2907                                     VM_ALLOC_ZERO);
 2908                                 if (l1_m == NULL)
 2909                                         panic("pmap_enter: l1 pte_m == NULL");
 2910                                 if ((l1_m->flags & PG_ZERO) == 0)
 2911                                         pmap_zero_page(l1_m);
 2912 
 2913                                 l1_pa = VM_PAGE_TO_PHYS(l1_m);
 2914                                 pmap_load_store(pde, l1_pa | L0_TABLE);
 2915                                 PTE_SYNC(pde);
 2916                                 /* FALLTHROUGH */
 2917                         case 0:
 2918                                 /* Get the l1 pde to update */
 2919                                 pde = pmap_l1_to_l2(pde, va);
 2920                                 KASSERT(pde != NULL, ("..."));
 2921 
 2922                                 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 2923                                     VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 2924                                     VM_ALLOC_ZERO);
 2925                                 if (l2_m == NULL)
 2926                                         panic("pmap_enter: l2 pte_m == NULL");
 2927                                 if ((l2_m->flags & PG_ZERO) == 0)
 2928                                         pmap_zero_page(l2_m);
 2929 
 2930                                 l2_pa = VM_PAGE_TO_PHYS(l2_m);
 2931                                 pmap_load_store(pde, l2_pa | L1_TABLE);
 2932                                 PTE_SYNC(pde);
 2933                                 /* FALLTHROUGH */
 2934                         case 1:
 2935                                 /* Get the l2 pde to update */
 2936                                 pde = pmap_l1_to_l2(pde, va);
 2937 
 2938                                 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 2939                                     VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 2940                                     VM_ALLOC_ZERO);
 2941                                 if (l3_m == NULL)
 2942                                         panic("pmap_enter: l3 pte_m == NULL");
 2943                                 if ((l3_m->flags & PG_ZERO) == 0)
 2944                                         pmap_zero_page(l3_m);
 2945 
 2946                                 l3_pa = VM_PAGE_TO_PHYS(l3_m);
 2947                                 pmap_load_store(pde, l3_pa | L2_TABLE);
 2948                                 PTE_SYNC(pde);
 2949                                 break;
 2950                         }
 2951                 }
 2952                 l3 = pmap_l2_to_l3(pde, va);
 2953                 pmap_invalidate_page(pmap, va);
 2954         }
 2955 havel3:
 2956 
 2957         om = NULL;
 2958         orig_l3 = pmap_load(l3);
 2959         opa = orig_l3 & ~ATTR_MASK;
 2960 
 2961         /*
 2962          * Is the specified virtual address already mapped?
 2963          */
 2964         if (pmap_l3_valid(orig_l3)) {
 2965                 /*
 2966                  * Wiring change, just update stats. We don't worry about
 2967                  * wiring PT pages as they remain resident as long as there
 2968                  * are valid mappings in them. Hence, if a user page is wired,
 2969                  * the PT page will be also.
 2970                  */
 2971                 if ((flags & PMAP_ENTER_WIRED) != 0 &&
 2972                     (orig_l3 & ATTR_SW_WIRED) == 0)
 2973                         pmap->pm_stats.wired_count++;
 2974                 else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 2975                     (orig_l3 & ATTR_SW_WIRED) != 0)
 2976                         pmap->pm_stats.wired_count--;
 2977 
 2978                 /*
 2979                  * Remove the extra PT page reference.
 2980                  */
 2981                 if (mpte != NULL) {
 2982                         mpte->wire_count--;
 2983                         KASSERT(mpte->wire_count > 0,
 2984                             ("pmap_enter: missing reference to page table page,"
 2985                              " va: 0x%lx", va));
 2986                 }
 2987 
 2988                 /*
 2989                  * Has the physical page changed?
 2990                  */
 2991                 if (opa == pa) {
 2992                         /*
 2993                          * No, might be a protection or wiring change.
 2994                          */
 2995                         if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 2996                                 new_l3 |= ATTR_SW_MANAGED;
 2997                                 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
 2998                                     ATTR_AP(ATTR_AP_RW)) {
 2999                                         vm_page_aflag_set(m, PGA_WRITEABLE);
 3000                                 }
 3001                         }
 3002                         goto validate;
 3003                 }
 3004 
 3005                 /* Flush the cache, there might be uncommitted data in it */
 3006                 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
 3007                         cpu_dcache_wb_range(va, L3_SIZE);
 3008         } else {
 3009                 /*
 3010                  * Increment the counters.
 3011                  */
 3012                 if ((new_l3 & ATTR_SW_WIRED) != 0)
 3013                         pmap->pm_stats.wired_count++;
 3014                 pmap_resident_count_inc(pmap, 1);
 3015         }
 3016         /*
 3017          * Enter on the PV list if part of our managed memory.
 3018          */
 3019         if ((m->oflags & VPO_UNMANAGED) == 0) {
 3020                 new_l3 |= ATTR_SW_MANAGED;
 3021                 pv = get_pv_entry(pmap, &lock);
 3022                 pv->pv_va = va;
 3023                 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 3024                 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 3025                 m->md.pv_gen++;
 3026                 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
 3027                         vm_page_aflag_set(m, PGA_WRITEABLE);
 3028         }
 3029 
 3030         /*
 3031          * Update the L3 entry.
 3032          */
 3033         if (orig_l3 != 0) {
 3034 validate:
 3035                 orig_l3 = pmap_load(l3);
 3036                 opa = orig_l3 & ~ATTR_MASK;
 3037 
 3038                 if (opa != pa) {
 3039                         pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE);
 3040                         if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 3041                                 om = PHYS_TO_VM_PAGE(opa);
 3042                                 if (pmap_page_dirty(orig_l3))
 3043                                         vm_page_dirty(om);
 3044                                 if ((orig_l3 & ATTR_AF) != 0)
 3045                                         vm_page_aflag_set(om, PGA_REFERENCED);
 3046                                 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 3047                                 pmap_pvh_free(&om->md, pmap, va);
 3048                                 if ((om->aflags & PGA_WRITEABLE) != 0 &&
 3049                                     TAILQ_EMPTY(&om->md.pv_list) &&
 3050                                     ((om->flags & PG_FICTITIOUS) != 0 ||
 3051                                     TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 3052                                         vm_page_aflag_clear(om, PGA_WRITEABLE);
 3053                         }
 3054                 } else {
 3055                         pmap_load_store(l3, new_l3);
 3056                         PTE_SYNC(l3);
 3057                         pmap_invalidate_page(pmap, va);
 3058                         if (pmap_page_dirty(orig_l3) &&
 3059                             (orig_l3 & ATTR_SW_MANAGED) != 0)
 3060                                 vm_page_dirty(m);
 3061                 }
 3062         } else {
 3063                 pmap_load_store(l3, new_l3);
 3064         }
 3065 
 3066         PTE_SYNC(l3);
 3067         pmap_invalidate_page(pmap, va);
 3068 
 3069         if (pmap != pmap_kernel()) {
 3070                 if (pmap == &curproc->p_vmspace->vm_pmap &&
 3071                     (prot & VM_PROT_EXECUTE) != 0)
 3072                         cpu_icache_sync_range(va, PAGE_SIZE);
 3073 
 3074 #if VM_NRESERVLEVEL > 0
 3075                 if ((mpte == NULL || mpte->wire_count == NL3PG) &&
 3076                     pmap_superpages_enabled() &&
 3077                     (m->flags & PG_FICTITIOUS) == 0 &&
 3078                     vm_reserv_level_iffullpop(m) == 0) {
 3079                         pmap_promote_l2(pmap, pde, va, &lock);
 3080                 }
 3081 #endif
 3082         }
 3083 
 3084         if (lock != NULL)
 3085                 rw_wunlock(lock);
 3086         PMAP_UNLOCK(pmap);
 3087         return (KERN_SUCCESS);
 3088 }
 3089 
 3090 /*
 3091  * Maps a sequence of resident pages belonging to the same object.
 3092  * The sequence begins with the given page m_start.  This page is
 3093  * mapped at the given virtual address start.  Each subsequent page is
 3094  * mapped at a virtual address that is offset from start by the same
 3095  * amount as the page is offset from m_start within the object.  The
 3096  * last page in the sequence is the page with the largest offset from
 3097  * m_start that can be mapped at a virtual address less than the given
 3098  * virtual address end.  Not every virtual page between start and end
 3099  * is mapped; only those for which a resident page exists with the
 3100  * corresponding offset from m_start are mapped.
 3101  */
 3102 void
 3103 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
 3104     vm_page_t m_start, vm_prot_t prot)
 3105 {
 3106         struct rwlock *lock;
 3107         vm_offset_t va;
 3108         vm_page_t m, mpte;
 3109         vm_pindex_t diff, psize;
 3110 
 3111         VM_OBJECT_ASSERT_LOCKED(m_start->object);
 3112 
 3113         psize = atop(end - start);
 3114         mpte = NULL;
 3115         m = m_start;
 3116         lock = NULL;
 3117         PMAP_LOCK(pmap);
 3118         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 3119                 va = start + ptoa(diff);
 3120                 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
 3121                 m = TAILQ_NEXT(m, listq);
 3122         }
 3123         if (lock != NULL)
 3124                 rw_wunlock(lock);
 3125         PMAP_UNLOCK(pmap);
 3126 }
 3127 
 3128 /*
 3129  * this code makes some *MAJOR* assumptions:
 3130  * 1. Current pmap & pmap exists.
 3131  * 2. Not wired.
 3132  * 3. Read access.
 3133  * 4. No page table pages.
 3134  * but is *MUCH* faster than pmap_enter...
 3135  */
 3136 
 3137 void
 3138 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 3139 {
 3140         struct rwlock *lock;
 3141 
 3142         lock = NULL;
 3143         PMAP_LOCK(pmap);
 3144         (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 3145         if (lock != NULL)
 3146                 rw_wunlock(lock);
 3147         PMAP_UNLOCK(pmap);
 3148 }
 3149 
 3150 static vm_page_t
 3151 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
 3152     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 3153 {
 3154         struct spglist free;
 3155         pd_entry_t *pde;
 3156         pt_entry_t *l2, *l3;
 3157         vm_paddr_t pa;
 3158         int lvl;
 3159 
 3160         KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 3161             (m->oflags & VPO_UNMANAGED) != 0,
 3162             ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 3163         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 3164 
 3165         CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 3166         /*
 3167          * In the case that a page table page is not
 3168          * resident, we are creating it here.
 3169          */
 3170         if (va < VM_MAXUSER_ADDRESS) {
 3171                 vm_pindex_t l2pindex;
 3172 
 3173                 /*
 3174                  * Calculate pagetable page index
 3175                  */
 3176                 l2pindex = pmap_l2_pindex(va);
 3177                 if (mpte && (mpte->pindex == l2pindex)) {
 3178                         mpte->wire_count++;
 3179                 } else {
 3180                         /*
 3181                          * Get the l2 entry
 3182                          */
 3183                         pde = pmap_pde(pmap, va, &lvl);
 3184 
 3185                         /*
 3186                          * If the page table page is mapped, we just increment
 3187                          * the hold count, and activate it.  Otherwise, we
 3188                          * attempt to allocate a page table page.  If this
 3189                          * attempt fails, we don't retry.  Instead, we give up.
 3190                          */
 3191                         if (lvl == 1) {
 3192                                 l2 = pmap_l1_to_l2(pde, va);
 3193                                 if ((pmap_load(l2) & ATTR_DESCR_MASK) ==
 3194                                     L2_BLOCK)
 3195                                         return (NULL);
 3196                         }
 3197                         if (lvl == 2 && pmap_load(pde) != 0) {
 3198                                 mpte =
 3199                                     PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
 3200                                 mpte->wire_count++;
 3201                         } else {
 3202                                 /*
 3203                                  * Pass NULL instead of the PV list lock
 3204                                  * pointer, because we don't intend to sleep.
 3205                                  */
 3206                                 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 3207                                 if (mpte == NULL)
 3208                                         return (mpte);
 3209                         }
 3210                 }
 3211                 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 3212                 l3 = &l3[pmap_l3_index(va)];
 3213         } else {
 3214                 mpte = NULL;
 3215                 pde = pmap_pde(kernel_pmap, va, &lvl);
 3216                 KASSERT(pde != NULL,
 3217                     ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 3218                      va));
 3219                 KASSERT(lvl == 2,
 3220                     ("pmap_enter_quick_locked: Invalid level %d", lvl));
 3221                 l3 = pmap_l2_to_l3(pde, va);
 3222         }
 3223 
 3224         if (pmap_load(l3) != 0) {
 3225                 if (mpte != NULL) {
 3226                         mpte->wire_count--;
 3227                         mpte = NULL;
 3228                 }
 3229                 return (mpte);
 3230         }
 3231 
 3232         /*
 3233          * Enter on the PV list if part of our managed memory.
 3234          */
 3235         if ((m->oflags & VPO_UNMANAGED) == 0 &&
 3236             !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 3237                 if (mpte != NULL) {
 3238                         SLIST_INIT(&free);
 3239                         if (pmap_unwire_l3(pmap, va, mpte, &free)) {
 3240                                 pmap_invalidate_page(pmap, va);
 3241                                 pmap_free_zero_pages(&free);
 3242                         }
 3243                         mpte = NULL;
 3244                 }
 3245                 return (mpte);
 3246         }
 3247 
 3248         /*
 3249          * Increment counters
 3250          */
 3251         pmap_resident_count_inc(pmap, 1);
 3252 
 3253         pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
 3254             ATTR_AP(ATTR_AP_RO) | L3_PAGE;
 3255         if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY)
 3256                 pa |= ATTR_XN;
 3257         else if (va < VM_MAXUSER_ADDRESS)
 3258                 pa |= ATTR_PXN;
 3259 
 3260         /*
 3261          * Now validate mapping with RO protection
 3262          */
 3263         if ((m->oflags & VPO_UNMANAGED) == 0)
 3264                 pa |= ATTR_SW_MANAGED;
 3265         pmap_load_store(l3, pa);
 3266         PTE_SYNC(l3);
 3267         pmap_invalidate_page(pmap, va);
 3268         return (mpte);
 3269 }
 3270 
 3271 /*
 3272  * This code maps large physical mmap regions into the
 3273  * processor address space.  Note that some shortcuts
 3274  * are taken, but the code works.
 3275  */
 3276 void
 3277 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
 3278     vm_pindex_t pindex, vm_size_t size)
 3279 {
 3280 
 3281         VM_OBJECT_ASSERT_WLOCKED(object);
 3282         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 3283             ("pmap_object_init_pt: non-device object"));
 3284 }
 3285 
 3286 /*
 3287  *      Clear the wired attribute from the mappings for the specified range of
 3288  *      addresses in the given pmap.  Every valid mapping within that range
 3289  *      must have the wired attribute set.  In contrast, invalid mappings
 3290  *      cannot have the wired attribute set, so they are ignored.
 3291  *
 3292  *      The wired attribute of the page table entry is not a hardware feature,
 3293  *      so there is no need to invalidate any TLB entries.
 3294  */
 3295 void
 3296 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 3297 {
 3298         vm_offset_t va_next;
 3299         pd_entry_t *l0, *l1, *l2;
 3300         pt_entry_t *l3;
 3301 
 3302         PMAP_LOCK(pmap);
 3303         for (; sva < eva; sva = va_next) {
 3304                 l0 = pmap_l0(pmap, sva);
 3305                 if (pmap_load(l0) == 0) {
 3306                         va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 3307                         if (va_next < sva)
 3308                                 va_next = eva;
 3309                         continue;
 3310                 }
 3311 
 3312                 l1 = pmap_l0_to_l1(l0, sva);
 3313                 if (pmap_load(l1) == 0) {
 3314                         va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 3315                         if (va_next < sva)
 3316                                 va_next = eva;
 3317                         continue;
 3318                 }
 3319 
 3320                 va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 3321                 if (va_next < sva)
 3322                         va_next = eva;
 3323 
 3324                 l2 = pmap_l1_to_l2(l1, sva);
 3325                 if (pmap_load(l2) == 0)
 3326                         continue;
 3327 
 3328                 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
 3329                         l3 = pmap_demote_l2(pmap, l2, sva);
 3330                         if (l3 == NULL)
 3331                                 continue;
 3332                 }
 3333                 KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE,
 3334                     ("pmap_unwire: Invalid l2 entry after demotion"));
 3335 
 3336                 if (va_next > eva)
 3337                         va_next = eva;
 3338                 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 3339                     sva += L3_SIZE) {
 3340                         if (pmap_load(l3) == 0)
 3341                                 continue;
 3342                         if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
 3343                                 panic("pmap_unwire: l3 %#jx is missing "
 3344                                     "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
 3345 
 3346                         /*
 3347                          * PG_W must be cleared atomically.  Although the pmap
 3348                          * lock synchronizes access to PG_W, another processor
 3349                          * could be setting PG_M and/or PG_A concurrently.
 3350                          */
 3351                         atomic_clear_long(l3, ATTR_SW_WIRED);
 3352                         pmap->pm_stats.wired_count--;
 3353                 }
 3354         }
 3355         PMAP_UNLOCK(pmap);
 3356 }
 3357 
 3358 /*
 3359  *      Copy the range specified by src_addr/len
 3360  *      from the source map to the range dst_addr/len
 3361  *      in the destination map.
 3362  *
 3363  *      This routine is only advisory and need not do anything.
 3364  */
 3365 
 3366 void
 3367 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 3368     vm_offset_t src_addr)
 3369 {
 3370 }
 3371 
 3372 /*
 3373  *      pmap_zero_page zeros the specified hardware page by mapping
 3374  *      the page into KVM and using bzero to clear its contents.
 3375  */
 3376 void
 3377 pmap_zero_page(vm_page_t m)
 3378 {
 3379         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 3380 
 3381         pagezero((void *)va);
 3382 }
 3383 
 3384 /*
 3385  *      pmap_zero_page_area zeros the specified hardware page by mapping
 3386  *      the page into KVM and using bzero to clear its contents.
 3387  *
 3388  *      off and size may not cover an area beyond a single hardware page.
 3389  */
 3390 void
 3391 pmap_zero_page_area(vm_page_t m, int off, int size)
 3392 {
 3393         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 3394 
 3395         if (off == 0 && size == PAGE_SIZE)
 3396                 pagezero((void *)va);
 3397         else
 3398                 bzero((char *)va + off, size);
 3399 }
 3400 
 3401 /*
 3402  *      pmap_zero_page_idle zeros the specified hardware page by mapping
 3403  *      the page into KVM and using bzero to clear its contents.  This
 3404  *      is intended to be called from the vm_pagezero process only and
 3405  *      outside of Giant.
 3406  */
 3407 void
 3408 pmap_zero_page_idle(vm_page_t m)
 3409 {
 3410         vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 3411 
 3412         pagezero((void *)va);
 3413 }
 3414 
 3415 /*
 3416  *      pmap_copy_page copies the specified (machine independent)
 3417  *      page by mapping the page into virtual memory and using
 3418  *      bcopy to copy the page, one machine dependent page at a
 3419  *      time.
 3420  */
 3421 void
 3422 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 3423 {
 3424         vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 3425         vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 3426 
 3427         pagecopy((void *)src, (void *)dst);
 3428 }
 3429 
 3430 int unmapped_buf_allowed = 1;
 3431 
 3432 void
 3433 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
 3434     vm_offset_t b_offset, int xfersize)
 3435 {
 3436         void *a_cp, *b_cp;
 3437         vm_page_t m_a, m_b;
 3438         vm_paddr_t p_a, p_b;
 3439         vm_offset_t a_pg_offset, b_pg_offset;
 3440         int cnt;
 3441 
 3442         while (xfersize > 0) {
 3443                 a_pg_offset = a_offset & PAGE_MASK;
 3444                 m_a = ma[a_offset >> PAGE_SHIFT];
 3445                 p_a = m_a->phys_addr;
 3446                 b_pg_offset = b_offset & PAGE_MASK;
 3447                 m_b = mb[b_offset >> PAGE_SHIFT];
 3448                 p_b = m_b->phys_addr;
 3449                 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 3450                 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 3451                 if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 3452                         panic("!DMAP a %lx", p_a);
 3453                 } else {
 3454                         a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 3455                 }
 3456                 if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 3457                         panic("!DMAP b %lx", p_b);
 3458                 } else {
 3459                         b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 3460                 }
 3461                 bcopy(a_cp, b_cp, cnt);
 3462                 a_offset += cnt;
 3463                 b_offset += cnt;
 3464                 xfersize -= cnt;
 3465         }
 3466 }
 3467 
 3468 vm_offset_t
 3469 pmap_quick_enter_page(vm_page_t m)
 3470 {
 3471 
 3472         return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 3473 }
 3474 
 3475 void
 3476 pmap_quick_remove_page(vm_offset_t addr)
 3477 {
 3478 }
 3479 
 3480 /*
 3481  * Returns true if the pmap's pv is one of the first
 3482  * 16 pvs linked to from this page.  This count may
 3483  * be changed upwards or downwards in the future; it
 3484  * is only necessary that true be returned for a small
 3485  * subset of pmaps for proper page aging.
 3486  */
 3487 boolean_t
 3488 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 3489 {
 3490         struct md_page *pvh;
 3491         struct rwlock *lock;
 3492         pv_entry_t pv;
 3493         int loops = 0;
 3494         boolean_t rv;
 3495 
 3496         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3497             ("pmap_page_exists_quick: page %p is not managed", m));
 3498         rv = FALSE;
 3499         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3500         rw_rlock(lock);
 3501         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3502                 if (PV_PMAP(pv) == pmap) {
 3503                         rv = TRUE;
 3504                         break;
 3505                 }
 3506                 loops++;
 3507                 if (loops >= 16)
 3508                         break;
 3509         }
 3510         if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 3511                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3512                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 3513                         if (PV_PMAP(pv) == pmap) {
 3514                                 rv = TRUE;
 3515                                 break;
 3516                         }
 3517                         loops++;
 3518                         if (loops >= 16)
 3519                                 break;
 3520                 }
 3521         }
 3522         rw_runlock(lock);
 3523         return (rv);
 3524 }
 3525 
 3526 /*
 3527  *      pmap_page_wired_mappings:
 3528  *
 3529  *      Return the number of managed mappings to the given physical page
 3530  *      that are wired.
 3531  */
 3532 int
 3533 pmap_page_wired_mappings(vm_page_t m)
 3534 {
 3535         struct rwlock *lock;
 3536         struct md_page *pvh;
 3537         pmap_t pmap;
 3538         pt_entry_t *pte;
 3539         pv_entry_t pv;
 3540         int count, lvl, md_gen, pvh_gen;
 3541 
 3542         if ((m->oflags & VPO_UNMANAGED) != 0)
 3543                 return (0);
 3544         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3545         rw_rlock(lock);
 3546 restart:
 3547         count = 0;
 3548         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3549                 pmap = PV_PMAP(pv);
 3550                 if (!PMAP_TRYLOCK(pmap)) {
 3551                         md_gen = m->md.pv_gen;
 3552                         rw_runlock(lock);
 3553                         PMAP_LOCK(pmap);
 3554                         rw_rlock(lock);
 3555                         if (md_gen != m->md.pv_gen) {
 3556                                 PMAP_UNLOCK(pmap);
 3557                                 goto restart;
 3558                         }
 3559                 }
 3560                 pte = pmap_pte(pmap, pv->pv_va, &lvl);
 3561                 if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
 3562                         count++;
 3563                 PMAP_UNLOCK(pmap);
 3564         }
 3565         if ((m->flags & PG_FICTITIOUS) == 0) {
 3566                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3567                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 3568                         pmap = PV_PMAP(pv);
 3569                         if (!PMAP_TRYLOCK(pmap)) {
 3570                                 md_gen = m->md.pv_gen;
 3571                                 pvh_gen = pvh->pv_gen;
 3572                                 rw_runlock(lock);
 3573                                 PMAP_LOCK(pmap);
 3574                                 rw_rlock(lock);
 3575                                 if (md_gen != m->md.pv_gen ||
 3576                                     pvh_gen != pvh->pv_gen) {
 3577                                         PMAP_UNLOCK(pmap);
 3578                                         goto restart;
 3579                                 }
 3580                         }
 3581                         pte = pmap_pte(pmap, pv->pv_va, &lvl);
 3582                         if (pte != NULL &&
 3583                             (pmap_load(pte) & ATTR_SW_WIRED) != 0)
 3584                                 count++;
 3585                         PMAP_UNLOCK(pmap);
 3586                 }
 3587         }
 3588         rw_runlock(lock);
 3589         return (count);
 3590 }
 3591 
 3592 /*
 3593  * Destroy all managed, non-wired mappings in the given user-space
 3594  * pmap.  This pmap cannot be active on any processor besides the
 3595  * caller.
 3596  *
 3597  * This function cannot be applied to the kernel pmap.  Moreover, it
 3598  * is not intended for general use.  It is only to be used during
 3599  * process termination.  Consequently, it can be implemented in ways
 3600  * that make it faster than pmap_remove().  First, it can more quickly
 3601  * destroy mappings by iterating over the pmap's collection of PV
 3602  * entries, rather than searching the page table.  Second, it doesn't
 3603  * have to test and clear the page table entries atomically, because
 3604  * no processor is currently accessing the user address space.  In
 3605  * particular, a page table entry's dirty bit won't change state once
 3606  * this function starts.
 3607  */
 3608 void
 3609 pmap_remove_pages(pmap_t pmap)
 3610 {
 3611         pd_entry_t *pde;
 3612         pt_entry_t *pte, tpte;
 3613         struct spglist free;
 3614         vm_page_t m, ml3, mt;
 3615         pv_entry_t pv;
 3616         struct md_page *pvh;
 3617         struct pv_chunk *pc, *npc;
 3618         struct rwlock *lock;
 3619         int64_t bit;
 3620         uint64_t inuse, bitmask;
 3621         int allfree, field, freed, idx, lvl;
 3622         vm_paddr_t pa;
 3623 
 3624         lock = NULL;
 3625 
 3626         SLIST_INIT(&free);
 3627         PMAP_LOCK(pmap);
 3628         TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 3629                 allfree = 1;
 3630                 freed = 0;
 3631                 for (field = 0; field < _NPCM; field++) {
 3632                         inuse = ~pc->pc_map[field] & pc_freemask[field];
 3633                         while (inuse != 0) {
 3634                                 bit = ffsl(inuse) - 1;
 3635                                 bitmask = 1UL << bit;
 3636                                 idx = field * 64 + bit;
 3637                                 pv = &pc->pc_pventry[idx];
 3638                                 inuse &= ~bitmask;
 3639 
 3640                                 pde = pmap_pde(pmap, pv->pv_va, &lvl);
 3641                                 KASSERT(pde != NULL,
 3642                                     ("Attempting to remove an unmapped page"));
 3643 
 3644                                 switch(lvl) {
 3645                                 case 1:
 3646                                         pte = pmap_l1_to_l2(pde, pv->pv_va);
 3647                                         tpte = pmap_load(pte); 
 3648                                         KASSERT((tpte & ATTR_DESCR_MASK) ==
 3649                                             L2_BLOCK,
 3650                                             ("Attempting to remove an invalid "
 3651                                             "block: %lx", tpte));
 3652                                         tpte = pmap_load(pte);
 3653                                         break;
 3654                                 case 2:
 3655                                         pte = pmap_l2_to_l3(pde, pv->pv_va);
 3656                                         tpte = pmap_load(pte);
 3657                                         KASSERT((tpte & ATTR_DESCR_MASK) ==
 3658                                             L3_PAGE,
 3659                                             ("Attempting to remove an invalid "
 3660                                              "page: %lx", tpte));
 3661                                         break;
 3662                                 default:
 3663                                         panic(
 3664                                             "Invalid page directory level: %d",
 3665                                             lvl);
 3666                                 }
 3667 
 3668 /*
 3669  * We cannot remove wired pages from a process' mapping at this time
 3670  */
 3671                                 if (tpte & ATTR_SW_WIRED) {
 3672                                         allfree = 0;
 3673                                         continue;
 3674                                 }
 3675 
 3676                                 pa = tpte & ~ATTR_MASK;
 3677 
 3678                                 m = PHYS_TO_VM_PAGE(pa);
 3679                                 KASSERT(m->phys_addr == pa,
 3680                                     ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 3681                                     m, (uintmax_t)m->phys_addr,
 3682                                     (uintmax_t)tpte));
 3683 
 3684                                 KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 3685                                     m < &vm_page_array[vm_page_array_size],
 3686                                     ("pmap_remove_pages: bad pte %#jx",
 3687                                     (uintmax_t)tpte));
 3688 
 3689                                 if (pmap_is_current(pmap)) {
 3690                                         if (lvl == 2 &&
 3691                                             pmap_l3_valid_cacheable(tpte)) {
 3692                                                 cpu_dcache_wb_range(pv->pv_va,
 3693                                                     L3_SIZE);
 3694                                         } else if (lvl == 1 &&
 3695                                             pmap_pte_valid_cacheable(tpte)) {
 3696                                                 cpu_dcache_wb_range(pv->pv_va,
 3697                                                     L2_SIZE);
 3698                                         }
 3699                                 }
 3700                                 pmap_load_clear(pte);
 3701                                 PTE_SYNC(pte);
 3702                                 pmap_invalidate_page(pmap, pv->pv_va);
 3703 
 3704                                 /*
 3705                                  * Update the vm_page_t clean/reference bits.
 3706                                  */
 3707                                 if ((tpte & ATTR_AP_RW_BIT) ==
 3708                                     ATTR_AP(ATTR_AP_RW)) {
 3709                                         switch (lvl) {
 3710                                         case 1:
 3711                                                 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 3712                                                         vm_page_dirty(m);
 3713                                                 break;
 3714                                         case 2:
 3715                                                 vm_page_dirty(m);
 3716                                                 break;
 3717                                         }
 3718                                 }
 3719 
 3720                                 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 3721 
 3722                                 /* Mark free */
 3723                                 pc->pc_map[field] |= bitmask;
 3724                                 switch (lvl) {
 3725                                 case 1:
 3726                                         pmap_resident_count_dec(pmap,
 3727                                             L2_SIZE / PAGE_SIZE);
 3728                                         pvh = pa_to_pvh(tpte & ~ATTR_MASK);
 3729                                         TAILQ_REMOVE(&pvh->pv_list, pv,pv_next);
 3730                                         pvh->pv_gen++;
 3731                                         if (TAILQ_EMPTY(&pvh->pv_list)) {
 3732                                                 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
 3733                                                         if ((mt->aflags & PGA_WRITEABLE) != 0 &&
 3734                                                             TAILQ_EMPTY(&mt->md.pv_list))
 3735                                                                 vm_page_aflag_clear(mt, PGA_WRITEABLE);
 3736                                         }
 3737                                         ml3 = pmap_remove_pt_page(pmap,
 3738                                             pv->pv_va);
 3739                                         if (ml3 != NULL) {
 3740                                                 pmap_resident_count_dec(pmap,1);
 3741                                                 KASSERT(ml3->wire_count == NL3PG,
 3742                                                     ("pmap_remove_pages: l3 page wire count error"));
 3743                                                 ml3->wire_count = 0;
 3744                                                 pmap_add_delayed_free_list(ml3,
 3745                                                     &free, FALSE);
 3746                                                 atomic_subtract_int(
 3747                                                     &vm_cnt.v_wire_count, 1);
 3748                                         }
 3749                                         break;
 3750                                 case 2:
 3751                                         pmap_resident_count_dec(pmap, 1);
 3752                                         TAILQ_REMOVE(&m->md.pv_list, pv,
 3753                                             pv_next);
 3754                                         m->md.pv_gen++;
 3755                                         if ((m->aflags & PGA_WRITEABLE) != 0 &&
 3756                                             TAILQ_EMPTY(&m->md.pv_list) &&
 3757                                             (m->flags & PG_FICTITIOUS) == 0) {
 3758                                                 pvh = pa_to_pvh(
 3759                                                     VM_PAGE_TO_PHYS(m));
 3760                                                 if (TAILQ_EMPTY(&pvh->pv_list))
 3761                                                         vm_page_aflag_clear(m,
 3762                                                             PGA_WRITEABLE);
 3763                                         }
 3764                                         break;
 3765                                 }
 3766                                 pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde),
 3767                                     &free);
 3768                                 freed++;
 3769                         }
 3770                 }
 3771                 PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 3772                 PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 3773                 PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 3774                 if (allfree) {
 3775                         TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 3776                         free_pv_chunk(pc);
 3777                 }
 3778         }
 3779         pmap_invalidate_all(pmap);
 3780         if (lock != NULL)
 3781                 rw_wunlock(lock);
 3782         PMAP_UNLOCK(pmap);
 3783         pmap_free_zero_pages(&free);
 3784 }
 3785 
 3786 /*
 3787  * This is used to check if a page has been accessed or modified. As we
 3788  * don't have a bit to see if it has been modified we have to assume it
 3789  * has been if the page is read/write.
 3790  */
 3791 static boolean_t
 3792 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 3793 {
 3794         struct rwlock *lock;
 3795         pv_entry_t pv;
 3796         struct md_page *pvh;
 3797         pt_entry_t *pte, mask, value;
 3798         pmap_t pmap;
 3799         int lvl, md_gen, pvh_gen;
 3800         boolean_t rv;
 3801 
 3802         rv = FALSE;
 3803         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3804         rw_rlock(lock);
 3805 restart:
 3806         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3807                 pmap = PV_PMAP(pv);
 3808                 if (!PMAP_TRYLOCK(pmap)) {
 3809                         md_gen = m->md.pv_gen;
 3810                         rw_runlock(lock);
 3811                         PMAP_LOCK(pmap);
 3812                         rw_rlock(lock);
 3813                         if (md_gen != m->md.pv_gen) {
 3814                                 PMAP_UNLOCK(pmap);
 3815                                 goto restart;
 3816                         }
 3817                 }
 3818                 pte = pmap_pte(pmap, pv->pv_va, &lvl);
 3819                 KASSERT(lvl == 3,
 3820                     ("pmap_page_test_mappings: Invalid level %d", lvl));
 3821                 mask = 0;
 3822                 value = 0;
 3823                 if (modified) {
 3824                         mask |= ATTR_AP_RW_BIT;
 3825                         value |= ATTR_AP(ATTR_AP_RW);
 3826                 }
 3827                 if (accessed) {
 3828                         mask |= ATTR_AF | ATTR_DESCR_MASK;
 3829                         value |= ATTR_AF | L3_PAGE;
 3830                 }
 3831                 rv = (pmap_load(pte) & mask) == value;
 3832                 PMAP_UNLOCK(pmap);
 3833                 if (rv)
 3834                         goto out;
 3835         }
 3836         if ((m->flags & PG_FICTITIOUS) == 0) {
 3837                 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3838                 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 3839                         pmap = PV_PMAP(pv);
 3840                         if (!PMAP_TRYLOCK(pmap)) {
 3841                                 md_gen = m->md.pv_gen;
 3842                                 pvh_gen = pvh->pv_gen;
 3843                                 rw_runlock(lock);
 3844                                 PMAP_LOCK(pmap);
 3845                                 rw_rlock(lock);
 3846                                 if (md_gen != m->md.pv_gen ||
 3847                                     pvh_gen != pvh->pv_gen) {
 3848                                         PMAP_UNLOCK(pmap);
 3849                                         goto restart;
 3850                                 }
 3851                         }
 3852                         pte = pmap_pte(pmap, pv->pv_va, &lvl);
 3853                         KASSERT(lvl == 2,
 3854                             ("pmap_page_test_mappings: Invalid level %d", lvl));
 3855                         mask = 0;
 3856                         value = 0;
 3857                         if (modified) {
 3858                                 mask |= ATTR_AP_RW_BIT;
 3859                                 value |= ATTR_AP(ATTR_AP_RW);
 3860                         }
 3861                         if (accessed) {
 3862                                 mask |= ATTR_AF | ATTR_DESCR_MASK;
 3863                                 value |= ATTR_AF | L2_BLOCK;
 3864                         }
 3865                         rv = (pmap_load(pte) & mask) == value;
 3866                         PMAP_UNLOCK(pmap);
 3867                         if (rv)
 3868                                 goto out;
 3869                 }
 3870         }
 3871 out:
 3872         rw_runlock(lock);
 3873         return (rv);
 3874 }
 3875 
 3876 /*
 3877  *      pmap_is_modified:
 3878  *
 3879  *      Return whether or not the specified physical page was modified
 3880  *      in any physical maps.
 3881  */
 3882 boolean_t
 3883 pmap_is_modified(vm_page_t m)
 3884 {
 3885 
 3886         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3887             ("pmap_is_modified: page %p is not managed", m));
 3888 
 3889         /*
 3890          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 3891          * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 3892          * is clear, no PTEs can have PG_M set.
 3893          */
 3894         VM_OBJECT_ASSERT_WLOCKED(m->object);
 3895         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 3896                 return (FALSE);
 3897         return (pmap_page_test_mappings(m, FALSE, TRUE));
 3898 }
 3899 
 3900 /*
 3901  *      pmap_is_prefaultable:
 3902  *
 3903  *      Return whether or not the specified virtual address is eligible
 3904  *      for prefault.
 3905  */
 3906 boolean_t
 3907 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 3908 {
 3909         pt_entry_t *pte;
 3910         boolean_t rv;
 3911         int lvl;
 3912 
 3913         rv = FALSE;
 3914         PMAP_LOCK(pmap);
 3915         pte = pmap_pte(pmap, addr, &lvl);
 3916         if (pte != NULL && pmap_load(pte) != 0) {
 3917                 rv = TRUE;
 3918         }
 3919         PMAP_UNLOCK(pmap);
 3920         return (rv);
 3921 }
 3922 
 3923 /*
 3924  *      pmap_is_referenced:
 3925  *
 3926  *      Return whether or not the specified physical page was referenced
 3927  *      in any physical maps.
 3928  */
 3929 boolean_t
 3930 pmap_is_referenced(vm_page_t m)
 3931 {
 3932 
 3933         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3934             ("pmap_is_referenced: page %p is not managed", m));
 3935         return (pmap_page_test_mappings(m, TRUE, FALSE));
 3936 }
 3937 
 3938 /*
 3939  * Clear the write and modified bits in each of the given page's mappings.
 3940  */
 3941 void
 3942 pmap_remove_write(vm_page_t m)
 3943 {
 3944         struct md_page *pvh;
 3945         pmap_t pmap;
 3946         struct rwlock *lock;
 3947         pv_entry_t next_pv, pv;
 3948         pt_entry_t oldpte, *pte;
 3949         vm_offset_t va;
 3950         int lvl, md_gen, pvh_gen;
 3951 
 3952         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 3953             ("pmap_remove_write: page %p is not managed", m));
 3954 
 3955         /*
 3956          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 3957          * set by another thread while the object is locked.  Thus,
 3958          * if PGA_WRITEABLE is clear, no page table entries need updating.
 3959          */
 3960         VM_OBJECT_ASSERT_WLOCKED(m->object);
 3961         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 3962                 return;
 3963         lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 3964         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 3965             pa_to_pvh(VM_PAGE_TO_PHYS(m));
 3966 retry_pv_loop:
 3967         rw_wlock(lock);
 3968         TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 3969                 pmap = PV_PMAP(pv);
 3970                 if (!PMAP_TRYLOCK(pmap)) {
 3971                         pvh_gen = pvh->pv_gen;
 3972                         rw_wunlock(lock);
 3973                         PMAP_LOCK(pmap);
 3974                         rw_wlock(lock);
 3975                         if (pvh_gen != pvh->pv_gen) {
 3976                                 PMAP_UNLOCK(pmap);
 3977                                 rw_wunlock(lock);
 3978                                 goto retry_pv_loop;
 3979                         }
 3980                 }
 3981                 va = pv->pv_va;
 3982                 pte = pmap_pte(pmap, pv->pv_va, &lvl);
 3983                 if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
 3984                         pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET,
 3985                             &lock);
 3986                 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 3987                     ("inconsistent pv lock %p %p for page %p",
 3988                     lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 3989                 PMAP_UNLOCK(pmap);
 3990         }
 3991         TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 3992                 pmap = PV_PMAP(pv);
 3993                 if (!PMAP_TRYLOCK(pmap)) {
 3994                         pvh_gen = pvh->pv_gen;
 3995                         md_gen = m->md.pv_gen;
 3996                         rw_wunlock(lock);
 3997                         PMAP_LOCK(pmap);
 3998                         rw_wlock(lock);
 3999                         if (pvh_gen != pvh->pv_gen ||
 4000                             md_gen != m->md.pv_gen) {
 4001                                 PMAP_UNLOCK(pmap);
 4002                                 rw_wunlock(lock);
 4003                                 goto retry_pv_loop;
 4004                         }
 4005                 }
 4006                 pte = pmap_pte(pmap, pv->pv_va, &lvl);
 4007 retry:
 4008                 oldpte = pmap_load(pte);
 4009                 if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
 4010                         if (!atomic_cmpset_long(pte, oldpte,
 4011                             oldpte | ATTR_AP(ATTR_AP_RO)))
 4012                                 goto retry;
 4013                         if ((oldpte & ATTR_AF) != 0)
 4014                                 vm_page_dirty(m);
 4015                         pmap_invalidate_page(pmap, pv->pv_va);
 4016                 }
 4017                 PMAP_UNLOCK(pmap);
 4018         }
 4019         rw_wunlock(lock);
 4020         vm_page_aflag_clear(m, PGA_WRITEABLE);
 4021 }
 4022 
 4023 static __inline boolean_t
 4024 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
 4025 {
 4026 
 4027         return (FALSE);
 4028 }
 4029 
 4030 /*
 4031  *      pmap_ts_referenced:
 4032  *
 4033  *      Return a count of reference bits for a page, clearing those bits.
 4034  *      It is not necessary for every reference bit to be cleared, but it
 4035  *      is necessary that 0 only be returned when there are truly no
 4036  *      reference bits set.
 4037  *
 4038  *      As an optimization, update the page's dirty field if a modified bit is
 4039  *      found while counting reference bits.  This opportunistic update can be
 4040  *      performed at low cost and can eliminate the need for some future calls
 4041  *      to pmap_is_modified().  However, since this function stops after
 4042  *      finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
 4043  *      dirty pages.  Those dirty pages will only be detected by a future call
 4044  *      to pmap_is_modified().
 4045  */
 4046 int
 4047 pmap_ts_referenced(vm_page_t m)
 4048 {
 4049         struct md_page *pvh;
 4050         pv_entry_t pv, pvf;
 4051         pmap_t pmap;
 4052         struct rwlock *lock;
 4053         pd_entry_t *pde, tpde;
 4054         pt_entry_t *pte, tpte;
 4055         pt_entry_t *l3;
 4056         vm_offset_t va;
 4057         vm_paddr_t pa;
 4058         int cleared, md_gen, not_cleared, lvl, pvh_gen;
 4059         struct spglist free;
 4060         bool demoted;
 4061 
 4062         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4063             ("pmap_ts_referenced: page %p is not managed", m));
 4064         SLIST_INIT(&free);
 4065         cleared = 0;
 4066         pa = VM_PAGE_TO_PHYS(m);
 4067         lock = PHYS_TO_PV_LIST_LOCK(pa);
 4068         pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
 4069         rw_wlock(lock);
 4070 retry:
 4071         not_cleared = 0;
 4072         if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 4073                 goto small_mappings;
 4074         pv = pvf;
 4075         do {
 4076                 if (pvf == NULL)
 4077                         pvf = pv;
 4078                 pmap = PV_PMAP(pv);
 4079                 if (!PMAP_TRYLOCK(pmap)) {
 4080                         pvh_gen = pvh->pv_gen;
 4081                         rw_wunlock(lock);
 4082                         PMAP_LOCK(pmap);
 4083                         rw_wlock(lock);
 4084                         if (pvh_gen != pvh->pv_gen) {
 4085                                 PMAP_UNLOCK(pmap);
 4086                                 goto retry;
 4087                         }
 4088                 }
 4089                 va = pv->pv_va;
 4090                 pde = pmap_pde(pmap, pv->pv_va, &lvl);
 4091                 KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found"));
 4092                 KASSERT(lvl == 1,
 4093                     ("pmap_ts_referenced: invalid pde level %d", lvl));
 4094                 tpde = pmap_load(pde);
 4095                 KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE,
 4096                     ("pmap_ts_referenced: found an invalid l1 table"));
 4097                 pte = pmap_l1_to_l2(pde, pv->pv_va);
 4098                 tpte = pmap_load(pte);
 4099                 if (pmap_page_dirty(tpte)) {
 4100                         /*
 4101                          * Although "tpte" is mapping a 2MB page, because
 4102                          * this function is called at a 4KB page granularity,
 4103                          * we only update the 4KB page under test.
 4104                          */
 4105                         vm_page_dirty(m);
 4106                 }
 4107                 if ((tpte & ATTR_AF) != 0) {
 4108                         /*
 4109                          * Since this reference bit is shared by 512 4KB
 4110                          * pages, it should not be cleared every time it is
 4111                          * tested.  Apply a simple "hash" function on the
 4112                          * physical page number, the virtual superpage number,
 4113                          * and the pmap address to select one 4KB page out of
 4114                          * the 512 on which testing the reference bit will
 4115                          * result in clearing that reference bit.  This
 4116                          * function is designed to avoid the selection of the
 4117                          * same 4KB page for every 2MB page mapping.
 4118                          *
 4119                          * On demotion, a mapping that hasn't been referenced
 4120                          * is simply destroyed.  To avoid the possibility of a
 4121                          * subsequent page fault on a demoted wired mapping,
 4122                          * always leave its reference bit set.  Moreover,
 4123                          * since the superpage is wired, the current state of
 4124                          * its reference bit won't affect page replacement.
 4125                          */
 4126                         if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
 4127                             (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
 4128                             (tpte & ATTR_SW_WIRED) == 0) {
 4129                                 if (safe_to_clear_referenced(pmap, tpte)) {
 4130                                         /*
 4131                                          * TODO: We don't handle the access
 4132                                          * flag at all. We need to be able
 4133                                          * to set it in  the exception handler.
 4134                                          */
 4135                                         panic("ARM64TODO: "
 4136                                             "safe_to_clear_referenced\n");
 4137                                 } else if (pmap_demote_l2_locked(pmap, pte,
 4138                                     pv->pv_va, &lock) != NULL) {
 4139                                         demoted = true;
 4140                                         va += VM_PAGE_TO_PHYS(m) -
 4141                                             (tpte & ~ATTR_MASK);
 4142                                         l3 = pmap_l2_to_l3(pte, va);
 4143                                         pmap_remove_l3(pmap, l3, va,
 4144                                             pmap_load(pte), NULL, &lock);
 4145                                 } else
 4146                                         demoted = true;
 4147 
 4148                                 if (demoted) {
 4149                                         /*
 4150                                          * The superpage mapping was removed
 4151                                          * entirely and therefore 'pv' is no
 4152                                          * longer valid.
 4153                                          */
 4154                                         if (pvf == pv)
 4155                                                 pvf = NULL;
 4156                                         pv = NULL;
 4157                                 }
 4158                                 cleared++;
 4159                                 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 4160                                     ("inconsistent pv lock %p %p for page %p",
 4161                                     lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 4162                         } else
 4163                                 not_cleared++;
 4164                 }
 4165                 PMAP_UNLOCK(pmap);
 4166                 /* Rotate the PV list if it has more than one entry. */
 4167                 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 4168                         TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 4169                         TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 4170                         pvh->pv_gen++;
 4171                 }
 4172                 if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
 4173                         goto out;
 4174         } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 4175 small_mappings:
 4176         if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 4177                 goto out;
 4178         pv = pvf;
 4179         do {
 4180                 if (pvf == NULL)
 4181                         pvf = pv;
 4182                 pmap = PV_PMAP(pv);
 4183                 if (!PMAP_TRYLOCK(pmap)) {
 4184                         pvh_gen = pvh->pv_gen;
 4185                         md_gen = m->md.pv_gen;
 4186                         rw_wunlock(lock);
 4187                         PMAP_LOCK(pmap);
 4188                         rw_wlock(lock);
 4189                         if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 4190                                 PMAP_UNLOCK(pmap);
 4191                                 goto retry;
 4192                         }
 4193                 }
 4194                 pde = pmap_pde(pmap, pv->pv_va, &lvl);
 4195                 KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
 4196                 KASSERT(lvl == 2,
 4197                     ("pmap_ts_referenced: invalid pde level %d", lvl));
 4198                 tpde = pmap_load(pde);
 4199                 KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
 4200                     ("pmap_ts_referenced: found an invalid l2 table"));
 4201                 pte = pmap_l2_to_l3(pde, pv->pv_va);
 4202                 tpte = pmap_load(pte);
 4203                 if (pmap_page_dirty(tpte))
 4204                         vm_page_dirty(m);
 4205                 if ((tpte & ATTR_AF) != 0) {
 4206                         if (safe_to_clear_referenced(pmap, tpte)) {
 4207                                 /*
 4208                                  * TODO: We don't handle the access flag
 4209                                  * at all. We need to be able to set it in
 4210                                  * the exception handler.
 4211                                  */
 4212                                 panic("ARM64TODO: safe_to_clear_referenced\n");
 4213                         } else if ((tpte & ATTR_SW_WIRED) == 0) {
 4214                                 /*
 4215                                  * Wired pages cannot be paged out so
 4216                                  * doing accessed bit emulation for
 4217                                  * them is wasted effort. We do the
 4218                                  * hard work for unwired pages only.
 4219                                  */
 4220                                 pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
 4221                                     &free, &lock);
 4222                                 pmap_invalidate_page(pmap, pv->pv_va);
 4223                                 cleared++;
 4224                                 if (pvf == pv)
 4225                                         pvf = NULL;
 4226                                 pv = NULL;
 4227                                 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 4228                                     ("inconsistent pv lock %p %p for page %p",
 4229                                     lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 4230                         } else
 4231                                 not_cleared++;
 4232                 }
 4233                 PMAP_UNLOCK(pmap);
 4234                 /* Rotate the PV list if it has more than one entry. */
 4235                 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 4236                         TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 4237                         TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 4238                         m->md.pv_gen++;
 4239                 }
 4240         } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 4241             not_cleared < PMAP_TS_REFERENCED_MAX);
 4242 out:
 4243         rw_wunlock(lock);
 4244         pmap_free_zero_pages(&free);
 4245         return (cleared + not_cleared);
 4246 }
 4247 
 4248 /*
 4249  *      Apply the given advice to the specified range of addresses within the
 4250  *      given pmap.  Depending on the advice, clear the referenced and/or
 4251  *      modified flags in each mapping and set the mapped page's dirty field.
 4252  */
 4253 void
 4254 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 4255 {
 4256 }
 4257 
 4258 /*
 4259  *      Clear the modify bits on the specified physical page.
 4260  */
 4261 void
 4262 pmap_clear_modify(vm_page_t m)
 4263 {
 4264 
 4265         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 4266             ("pmap_clear_modify: page %p is not managed", m));
 4267         VM_OBJECT_ASSERT_WLOCKED(m->object);
 4268         KASSERT(!vm_page_xbusied(m),
 4269             ("pmap_clear_modify: page %p is exclusive busied", m));
 4270 
 4271         /*
 4272          * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 4273          * If the object containing the page is locked and the page is not
 4274          * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 4275          */
 4276         if ((m->aflags & PGA_WRITEABLE) == 0)
 4277                 return;
 4278 
 4279         /* ARM64TODO: We lack support for tracking if a page is modified */
 4280 }
 4281 
 4282 void *
 4283 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 4284 {
 4285 
 4286         return ((void *)PHYS_TO_DMAP(pa));
 4287 }
 4288 
 4289 void
 4290 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
 4291 {
 4292 }
 4293 
 4294 /*
 4295  * Sets the memory attribute for the specified page.
 4296  */
 4297 void
 4298 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 4299 {
 4300 
 4301         m->md.pv_memattr = ma;
 4302 
 4303         /*
 4304          * If "m" is a normal page, update its direct mapping.  This update
 4305          * can be relied upon to perform any cache operations that are
 4306          * required for data coherence.
 4307          */
 4308         if ((m->flags & PG_FICTITIOUS) == 0 &&
 4309             pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
 4310             m->md.pv_memattr) != 0)
 4311                 panic("memory attribute change on the direct map failed");
 4312 }
 4313 
 4314 /*
 4315  * Changes the specified virtual address range's memory type to that given by
 4316  * the parameter "mode".  The specified virtual address range must be
 4317  * completely contained within either the direct map or the kernel map.  If
 4318  * the virtual address range is contained within the kernel map, then the
 4319  * memory type for each of the corresponding ranges of the direct map is also
 4320  * changed.  (The corresponding ranges of the direct map are those ranges that
 4321  * map the same physical pages as the specified virtual address range.)  These
 4322  * changes to the direct map are necessary because Intel describes the
 4323  * behavior of their processors as "undefined" if two or more mappings to the
 4324  * same physical page have different memory types.
 4325  *
 4326  * Returns zero if the change completed successfully, and either EINVAL or
 4327  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
 4328  * of the virtual address range was not mapped, and ENOMEM is returned if
 4329  * there was insufficient memory available to complete the change.  In the
 4330  * latter case, the memory type may have been changed on some part of the
 4331  * virtual address range or the direct map.
 4332  */
 4333 static int
 4334 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 4335 {
 4336         int error;
 4337 
 4338         PMAP_LOCK(kernel_pmap);
 4339         error = pmap_change_attr_locked(va, size, mode);
 4340         PMAP_UNLOCK(kernel_pmap);
 4341         return (error);
 4342 }
 4343 
 4344 static int
 4345 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
 4346 {
 4347         vm_offset_t base, offset, tmpva;
 4348         pt_entry_t l3, *pte, *newpte;
 4349         int lvl;
 4350 
 4351         PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
 4352         base = trunc_page(va);
 4353         offset = va & PAGE_MASK;
 4354         size = round_page(offset + size);
 4355 
 4356         if (!VIRT_IN_DMAP(base))
 4357                 return (EINVAL);
 4358 
 4359         for (tmpva = base; tmpva < base + size; ) {
 4360                 pte = pmap_pte(kernel_pmap, va, &lvl);
 4361                 if (pte == NULL)
 4362                         return (EINVAL);
 4363 
 4364                 if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) {
 4365                         /*
 4366                          * We already have the correct attribute,
 4367                          * ignore this entry.
 4368                          */
 4369                         switch (lvl) {
 4370                         default:
 4371                                 panic("Invalid DMAP table level: %d\n", lvl);
 4372                         case 1:
 4373                                 tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE;
 4374                                 break;
 4375                         case 2:
 4376                                 tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE;
 4377                                 break;
 4378                         case 3:
 4379                                 tmpva += PAGE_SIZE;
 4380                                 break;
 4381                         }
 4382                 } else {
 4383                         /*
 4384                          * Split the entry to an level 3 table, then
 4385                          * set the new attribute.
 4386                          */
 4387                         switch (lvl) {
 4388                         default:
 4389                                 panic("Invalid DMAP table level: %d\n", lvl);
 4390                         case 1:
 4391                                 newpte = pmap_demote_l1(kernel_pmap, pte,
 4392                                     tmpva & ~L1_OFFSET);
 4393                                 if (newpte == NULL)
 4394                                         return (EINVAL);
 4395                                 pte = pmap_l1_to_l2(pte, tmpva);
 4396                         case 2:
 4397                                 newpte = pmap_demote_l2(kernel_pmap, pte,
 4398                                     tmpva & ~L2_OFFSET);
 4399                                 if (newpte == NULL)
 4400                                         return (EINVAL);
 4401                                 pte = pmap_l2_to_l3(pte, tmpva);
 4402                         case 3:
 4403                                 /* Update the entry */
 4404                                 l3 = pmap_load(pte);
 4405                                 l3 &= ~ATTR_IDX_MASK;
 4406                                 l3 |= ATTR_IDX(mode);
 4407                                 if (mode == DEVICE_MEMORY)
 4408                                         l3 |= ATTR_XN;
 4409 
 4410                                 pmap_update_entry(kernel_pmap, pte, l3, tmpva,
 4411                                     PAGE_SIZE);
 4412 
 4413                                 /*
 4414                                  * If moving to a non-cacheable entry flush
 4415                                  * the cache.
 4416                                  */
 4417                                 if (mode == VM_MEMATTR_UNCACHEABLE)
 4418                                         cpu_dcache_wbinv_range(tmpva, L3_SIZE);
 4419 
 4420                                 break;
 4421                         }
 4422                         tmpva += PAGE_SIZE;
 4423                 }
 4424         }
 4425 
 4426         return (0);
 4427 }
 4428 
 4429 /*
 4430  * Create an L2 table to map all addresses within an L1 mapping.
 4431  */
 4432 static pt_entry_t *
 4433 pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va)
 4434 {
 4435         pt_entry_t *l2, newl2, oldl1;
 4436         vm_offset_t tmpl1;
 4437         vm_paddr_t l2phys, phys;
 4438         vm_page_t ml2;
 4439         int i;
 4440 
 4441         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 4442         oldl1 = pmap_load(l1);
 4443         KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK,
 4444             ("pmap_demote_l1: Demoting a non-block entry"));
 4445         KASSERT((va & L1_OFFSET) == 0,
 4446             ("pmap_demote_l1: Invalid virtual address %#lx", va));
 4447         KASSERT((oldl1 & ATTR_SW_MANAGED) == 0,
 4448             ("pmap_demote_l1: Level 1 table shouldn't be managed"));
 4449 
 4450         tmpl1 = 0;
 4451         if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) {
 4452                 tmpl1 = kva_alloc(PAGE_SIZE);
 4453                 if (tmpl1 == 0)
 4454                         return (NULL);
 4455         }
 4456 
 4457         if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
 4458             VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 4459                 CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx"
 4460                     " in pmap %p", va, pmap);
 4461                 return (NULL);
 4462         }
 4463 
 4464         l2phys = VM_PAGE_TO_PHYS(ml2);
 4465         l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys);
 4466 
 4467         /* Address the range points at */
 4468         phys = oldl1 & ~ATTR_MASK;
 4469         /* The attributed from the old l1 table to be copied */
 4470         newl2 = oldl1 & ATTR_MASK;
 4471 
 4472         /* Create the new entries */
 4473         for (i = 0; i < Ln_ENTRIES; i++) {
 4474                 l2[i] = newl2 | phys;
 4475                 phys += L2_SIZE;
 4476         }
 4477         cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
 4478         KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK),
 4479             ("Invalid l2 page (%lx != %lx)", l2[0],
 4480             (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK));
 4481 
 4482         if (tmpl1 != 0) {
 4483                 pmap_kenter(tmpl1, PAGE_SIZE,
 4484                     DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY);
 4485                 l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK));
 4486         }
 4487 
 4488         pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE);
 4489 
 4490         if (tmpl1 != 0) {
 4491                 pmap_kremove(tmpl1);
 4492                 kva_free(tmpl1, PAGE_SIZE);
 4493         }
 4494 
 4495         return (l2);
 4496 }
 4497 
 4498 /*
 4499  * Create an L3 table to map all addresses within an L2 mapping.
 4500  */
 4501 static pt_entry_t *
 4502 pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
 4503     struct rwlock **lockp)
 4504 {
 4505         pt_entry_t *l3, newl3, oldl2;
 4506         vm_offset_t tmpl2;
 4507         vm_paddr_t l3phys, phys;
 4508         vm_page_t ml3;
 4509         int i;
 4510 
 4511         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 4512         l3 = NULL;
 4513         oldl2 = pmap_load(l2);
 4514         KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK,
 4515             ("pmap_demote_l2: Demoting a non-block entry"));
 4516         KASSERT((va & L2_OFFSET) == 0,
 4517             ("pmap_demote_l2: Invalid virtual address %#lx", va));
 4518 
 4519         tmpl2 = 0;
 4520         if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) {
 4521                 tmpl2 = kva_alloc(PAGE_SIZE);
 4522                 if (tmpl2 == 0)
 4523                         return (NULL);
 4524         }
 4525 
 4526         if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
 4527                 ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va),
 4528                     (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
 4529                     VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
 4530                 if (ml3 == NULL) {
 4531                         CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx"
 4532                             " in pmap %p", va, pmap);
 4533                         goto fail;
 4534                 }
 4535                 if (va < VM_MAXUSER_ADDRESS)
 4536                         pmap_resident_count_inc(pmap, 1);
 4537         }
 4538 
 4539         l3phys = VM_PAGE_TO_PHYS(ml3);
 4540         l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys);
 4541 
 4542         /* Address the range points at */
 4543         phys = oldl2 & ~ATTR_MASK;
 4544         /* The attributed from the old l2 table to be copied */
 4545         newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE;
 4546 
 4547         /*
 4548          * If the page table page is new, initialize it.
 4549          */
 4550         if (ml3->wire_count == 1) {
 4551                 for (i = 0; i < Ln_ENTRIES; i++) {
 4552                         l3[i] = newl3 | phys;
 4553                         phys += L3_SIZE;
 4554                 }
 4555                 cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE);
 4556         }
 4557         KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE),
 4558             ("Invalid l3 page (%lx != %lx)", l3[0],
 4559             (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE));
 4560 
 4561         /*
 4562          * Map the temporary page so we don't lose access to the l2 table.
 4563          */
 4564         if (tmpl2 != 0) {
 4565                 pmap_kenter(tmpl2, PAGE_SIZE,
 4566                     DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY);
 4567                 l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK));
 4568         }
 4569 
 4570         /*
 4571          * The spare PV entries must be reserved prior to demoting the
 4572          * mapping, that is, prior to changing the PDE.  Otherwise, the state
 4573          * of the L2 and the PV lists will be inconsistent, which can result
 4574          * in reclaim_pv_chunk() attempting to remove a PV entry from the
 4575          * wrong PV list and pmap_pv_demote_l2() failing to find the expected
 4576          * PV entry for the 2MB page mapping that is being demoted.
 4577          */
 4578         if ((oldl2 & ATTR_SW_MANAGED) != 0)
 4579                 reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp);
 4580 
 4581         pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE);
 4582 
 4583         /*
 4584          * Demote the PV entry.
 4585          */
 4586         if ((oldl2 & ATTR_SW_MANAGED) != 0)
 4587                 pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp);
 4588 
 4589         atomic_add_long(&pmap_l2_demotions, 1);
 4590         CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx"
 4591             " in pmap %p %lx", va, pmap, l3[0]);
 4592 
 4593 fail:
 4594         if (tmpl2 != 0) {
 4595                 pmap_kremove(tmpl2);
 4596                 kva_free(tmpl2, PAGE_SIZE);
 4597         }
 4598 
 4599         return (l3);
 4600 
 4601 }
 4602 
 4603 static pt_entry_t *
 4604 pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
 4605 {
 4606         struct rwlock *lock;
 4607         pt_entry_t *l3;
 4608 
 4609         lock = NULL;
 4610         l3 = pmap_demote_l2_locked(pmap, l2, va, &lock);
 4611         if (lock != NULL)
 4612                 rw_wunlock(lock);
 4613         return (l3);
 4614 }
 4615 
 4616 /*
 4617  * perform the pmap work for mincore
 4618  */
 4619 int
 4620 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 4621 {
 4622         pd_entry_t *l1p, l1;
 4623         pd_entry_t *l2p, l2;
 4624         pt_entry_t *l3p, l3;
 4625         vm_paddr_t pa;
 4626         bool managed;
 4627         int val;
 4628 
 4629         PMAP_LOCK(pmap);
 4630 retry:
 4631         pa = 0;
 4632         val = 0;
 4633         managed = false;
 4634 
 4635         l1p = pmap_l1(pmap, addr);
 4636         if (l1p == NULL) /* No l1 */
 4637                 goto done;
 4638 
 4639         l1 = pmap_load(l1p);
 4640         if ((l1 & ATTR_DESCR_MASK) == L1_INVAL)
 4641                 goto done;
 4642 
 4643         if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
 4644                 pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
 4645                 managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 4646                 val = MINCORE_SUPER | MINCORE_INCORE;
 4647                 if (pmap_page_dirty(l1))
 4648                         val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4649                 if ((l1 & ATTR_AF) == ATTR_AF)
 4650                         val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 4651                 goto done;
 4652         }
 4653 
 4654         l2p = pmap_l1_to_l2(l1p, addr);
 4655         if (l2p == NULL) /* No l2 */
 4656                 goto done;
 4657 
 4658         l2 = pmap_load(l2p);
 4659         if ((l2 & ATTR_DESCR_MASK) == L2_INVAL)
 4660                 goto done;
 4661 
 4662         if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
 4663                 pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
 4664                 managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 4665                 val = MINCORE_SUPER | MINCORE_INCORE;
 4666                 if (pmap_page_dirty(l2))
 4667                         val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4668                 if ((l2 & ATTR_AF) == ATTR_AF)
 4669                         val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 4670                 goto done;
 4671         }
 4672 
 4673         l3p = pmap_l2_to_l3(l2p, addr);
 4674         if (l3p == NULL) /* No l3 */
 4675                 goto done;
 4676 
 4677         l3 = pmap_load(l2p);
 4678         if ((l3 & ATTR_DESCR_MASK) == L3_INVAL)
 4679                 goto done;
 4680 
 4681         if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
 4682                 pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
 4683                 managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 4684                 val = MINCORE_INCORE;
 4685                 if (pmap_page_dirty(l3))
 4686                         val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 4687                 if ((l3 & ATTR_AF) == ATTR_AF)
 4688                         val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 4689         }
 4690 
 4691 done:
 4692         if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 4693             (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 4694                 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 4695                 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 4696                         goto retry;
 4697         } else
 4698                 PA_UNLOCK_COND(*locked_pa);
 4699         PMAP_UNLOCK(pmap);
 4700 
 4701         return (val);
 4702 }
 4703 
 4704 void
 4705 pmap_activate(struct thread *td)
 4706 {
 4707         pmap_t  pmap;
 4708 
 4709         critical_enter();
 4710         pmap = vmspace_pmap(td->td_proc->p_vmspace);
 4711         td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0);
 4712         __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr));
 4713         pmap_invalidate_all(pmap);
 4714         critical_exit();
 4715 }
 4716 
 4717 void
 4718 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
 4719 {
 4720 
 4721         if (va >= VM_MIN_KERNEL_ADDRESS) {
 4722                 cpu_icache_sync_range(va, sz);
 4723         } else {
 4724                 u_int len, offset;
 4725                 vm_paddr_t pa;
 4726 
 4727                 /* Find the length of data in this page to flush */
 4728                 offset = va & PAGE_MASK;
 4729                 len = imin(PAGE_SIZE - offset, sz);
 4730 
 4731                 while (sz != 0) {
 4732                         /* Extract the physical address & find it in the DMAP */
 4733                         pa = pmap_extract(pmap, va);
 4734                         if (pa != 0)
 4735                                 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
 4736 
 4737                         /* Move to the next page */
 4738                         sz -= len;
 4739                         va += len;
 4740                         /* Set the length for the next iteration */
 4741                         len = imin(PAGE_SIZE, sz);
 4742                 }
 4743         }
 4744 }
 4745 
 4746 int
 4747 pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
 4748 {
 4749 #ifdef SMP
 4750         uint64_t par;
 4751 #endif
 4752 
 4753         switch (ESR_ELx_EXCEPTION(esr)) {
 4754         case EXCP_DATA_ABORT_L:
 4755         case EXCP_DATA_ABORT:
 4756                 break;
 4757         default:
 4758                 return (KERN_FAILURE);
 4759         }
 4760 
 4761 #ifdef SMP
 4762         PMAP_LOCK(pmap);
 4763         switch (esr & ISS_DATA_DFSC_MASK) {
 4764         case ISS_DATA_DFSC_TF_L0:
 4765         case ISS_DATA_DFSC_TF_L1:
 4766         case ISS_DATA_DFSC_TF_L2:
 4767         case ISS_DATA_DFSC_TF_L3:
 4768                 /* Ask the MMU to check the address */
 4769                 if (pmap == kernel_pmap)
 4770                         par = arm64_address_translate_s1e1r(far);
 4771                 else
 4772                         par = arm64_address_translate_s1e0r(far);
 4773 
 4774                 /*
 4775                  * If the translation was successful the address was invalid
 4776                  * due to a break-before-make sequence. We can unlock and
 4777                  * return success to the trap handler.
 4778                  */
 4779                 if (PAR_SUCCESS(par)) {
 4780                         PMAP_UNLOCK(pmap);
 4781                         return (KERN_SUCCESS);
 4782                 }
 4783                 break;
 4784         default:
 4785                 break;
 4786         }
 4787         PMAP_UNLOCK(pmap);
 4788 #endif
 4789 
 4790         return (KERN_FAILURE);
 4791 }
 4792 
 4793 /*
 4794  *      Increase the starting virtual address of the given mapping if a
 4795  *      different alignment might result in more superpage mappings.
 4796  */
 4797 void
 4798 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
 4799     vm_offset_t *addr, vm_size_t size)
 4800 {
 4801         vm_offset_t superpage_offset;
 4802 
 4803         if (size < L2_SIZE)
 4804                 return;
 4805         if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 4806                 offset += ptoa(object->pg_color);
 4807         superpage_offset = offset & L2_OFFSET;
 4808         if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
 4809             (*addr & L2_OFFSET) == superpage_offset)
 4810                 return;
 4811         if ((*addr & L2_OFFSET) < superpage_offset)
 4812                 *addr = (*addr & ~L2_OFFSET) + superpage_offset;
 4813         else
 4814                 *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
 4815 }
 4816 
 4817 /**
 4818  * Get the kernel virtual address of a set of physical pages. If there are
 4819  * physical addresses not covered by the DMAP perform a transient mapping
 4820  * that will be removed when calling pmap_unmap_io_transient.
 4821  *
 4822  * \param page        The pages the caller wishes to obtain the virtual
 4823  *                    address on the kernel memory map.
 4824  * \param vaddr       On return contains the kernel virtual memory address
 4825  *                    of the pages passed in the page parameter.
 4826  * \param count       Number of pages passed in.
 4827  * \param can_fault   TRUE if the thread using the mapped pages can take
 4828  *                    page faults, FALSE otherwise.
 4829  *
 4830  * \returns TRUE if the caller must call pmap_unmap_io_transient when
 4831  *          finished or FALSE otherwise.
 4832  *
 4833  */
 4834 boolean_t
 4835 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
 4836     boolean_t can_fault)
 4837 {
 4838         vm_paddr_t paddr;
 4839         boolean_t needs_mapping;
 4840         int error, i;
 4841 
 4842         /*
 4843          * Allocate any KVA space that we need, this is done in a separate
 4844          * loop to prevent calling vmem_alloc while pinned.
 4845          */
 4846         needs_mapping = FALSE;
 4847         for (i = 0; i < count; i++) {
 4848                 paddr = VM_PAGE_TO_PHYS(page[i]);
 4849                 if (__predict_false(!PHYS_IN_DMAP(paddr))) {
 4850                         error = vmem_alloc(kernel_arena, PAGE_SIZE,
 4851                             M_BESTFIT | M_WAITOK, &vaddr[i]);
 4852                         KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 4853                         needs_mapping = TRUE;
 4854                 } else {
 4855                         vaddr[i] = PHYS_TO_DMAP(paddr);
 4856                 }
 4857         }
 4858 
 4859         /* Exit early if everything is covered by the DMAP */
 4860         if (!needs_mapping)
 4861                 return (FALSE);
 4862 
 4863         if (!can_fault)
 4864                 sched_pin();
 4865         for (i = 0; i < count; i++) {
 4866                 paddr = VM_PAGE_TO_PHYS(page[i]);
 4867                 if (!PHYS_IN_DMAP(paddr)) {
 4868                         panic(
 4869                            "pmap_map_io_transient: TODO: Map out of DMAP data");
 4870                 }
 4871         }
 4872 
 4873         return (needs_mapping);
 4874 }
 4875 
 4876 void
 4877 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
 4878     boolean_t can_fault)
 4879 {
 4880         vm_paddr_t paddr;
 4881         int i;
 4882 
 4883         if (!can_fault)
 4884                 sched_unpin();
 4885         for (i = 0; i < count; i++) {
 4886                 paddr = VM_PAGE_TO_PHYS(page[i]);
 4887                 if (!PHYS_IN_DMAP(paddr)) {
 4888                         panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
 4889                 }
 4890         }
 4891 }

Cache object: 4c8425d481c6cad38be948c3f11c4d92


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.