The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/powerpc/booke/pmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
    3  * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
   18  * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
   20  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   22  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   23  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   25  *
   26  * Some hw specific parts of this pmap were derived or influenced
   27  * by NetBSD's ibm4xx pmap module. More generic code is shared with
   28  * a few other pmap modules from the FreeBSD tree.
   29  */
   30 
   31  /*
   32   * VM layout notes:
   33   *
   34   * Kernel and user threads run within one common virtual address space
   35   * defined by AS=0.
   36   *
   37   * Virtual address space layout:
   38   * -----------------------------
   39   * 0x0000_0000 - 0xafff_ffff   : user process
   40   * 0xb000_0000 - 0xbfff_ffff   : pmap_mapdev()-ed area (PCI/PCIE etc.)
   41   * 0xc000_0000 - 0xc0ff_ffff   : kernel reserved
   42   *   0xc000_0000 - data_end    : kernel code+data, env, metadata etc.
   43   * 0xc100_0000 - 0xfeef_ffff   : KVA
   44   *   0xc100_0000 - 0xc100_3fff : reserved for page zero/copy
   45   *   0xc100_4000 - 0xc200_3fff : reserved for ptbl bufs
   46   *   0xc200_4000 - 0xc200_8fff : guard page + kstack0
   47   *   0xc200_9000 - 0xfeef_ffff : actual free KVA space
   48   * 0xfef0_0000 - 0xffff_ffff   : I/O devices region
   49   */
   50 
   51 #include <sys/cdefs.h>
   52 __FBSDID("$FreeBSD: releng/11.0/sys/powerpc/booke/pmap.c 298642 2016-04-26 14:44:49Z pfg $");
   53 
   54 #include "opt_kstack_pages.h"
   55 
   56 #include <sys/param.h>
   57 #include <sys/conf.h>
   58 #include <sys/malloc.h>
   59 #include <sys/ktr.h>
   60 #include <sys/proc.h>
   61 #include <sys/user.h>
   62 #include <sys/queue.h>
   63 #include <sys/systm.h>
   64 #include <sys/kernel.h>
   65 #include <sys/kerneldump.h>
   66 #include <sys/linker.h>
   67 #include <sys/msgbuf.h>
   68 #include <sys/lock.h>
   69 #include <sys/mutex.h>
   70 #include <sys/rwlock.h>
   71 #include <sys/sched.h>
   72 #include <sys/smp.h>
   73 #include <sys/vmmeter.h>
   74 
   75 #include <vm/vm.h>
   76 #include <vm/vm_page.h>
   77 #include <vm/vm_kern.h>
   78 #include <vm/vm_pageout.h>
   79 #include <vm/vm_extern.h>
   80 #include <vm/vm_object.h>
   81 #include <vm/vm_param.h>
   82 #include <vm/vm_map.h>
   83 #include <vm/vm_pager.h>
   84 #include <vm/uma.h>
   85 
   86 #include <machine/cpu.h>
   87 #include <machine/pcb.h>
   88 #include <machine/platform.h>
   89 
   90 #include <machine/tlb.h>
   91 #include <machine/spr.h>
   92 #include <machine/md_var.h>
   93 #include <machine/mmuvar.h>
   94 #include <machine/pmap.h>
   95 #include <machine/pte.h>
   96 
   97 #include "mmu_if.h"
   98 
   99 #define SPARSE_MAPDEV
  100 #ifdef  DEBUG
  101 #define debugf(fmt, args...) printf(fmt, ##args)
  102 #else
  103 #define debugf(fmt, args...)
  104 #endif
  105 
  106 #define TODO                    panic("%s: not implemented", __func__);
  107 
  108 extern unsigned char _etext[];
  109 extern unsigned char _end[];
  110 
  111 extern uint32_t *bootinfo;
  112 
  113 vm_paddr_t kernload;
  114 vm_offset_t kernstart;
  115 vm_size_t kernsize;
  116 
  117 /* Message buffer and tables. */
  118 static vm_offset_t data_start;
  119 static vm_size_t data_end;
  120 
  121 /* Phys/avail memory regions. */
  122 static struct mem_region *availmem_regions;
  123 static int availmem_regions_sz;
  124 static struct mem_region *physmem_regions;
  125 static int physmem_regions_sz;
  126 
  127 /* Reserved KVA space and mutex for mmu_booke_zero_page. */
  128 static vm_offset_t zero_page_va;
  129 static struct mtx zero_page_mutex;
  130 
  131 static struct mtx tlbivax_mutex;
  132 
  133 /*
  134  * Reserved KVA space for mmu_booke_zero_page_idle. This is used
  135  * by idle thred only, no lock required.
  136  */
  137 static vm_offset_t zero_page_idle_va;
  138 
  139 /* Reserved KVA space and mutex for mmu_booke_copy_page. */
  140 static vm_offset_t copy_page_src_va;
  141 static vm_offset_t copy_page_dst_va;
  142 static struct mtx copy_page_mutex;
  143 
  144 /**************************************************************************/
  145 /* PMAP */
  146 /**************************************************************************/
  147 
  148 static int mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t,
  149     vm_prot_t, u_int flags, int8_t psind);
  150 
  151 unsigned int kptbl_min;         /* Index of the first kernel ptbl. */
  152 unsigned int kernel_ptbls;      /* Number of KVA ptbls. */
  153 
  154 /*
  155  * If user pmap is processed with mmu_booke_remove and the resident count
  156  * drops to 0, there are no more pages to remove, so we need not continue.
  157  */
  158 #define PMAP_REMOVE_DONE(pmap) \
  159         ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0)
  160 
  161 extern int elf32_nxstack;
  162 
  163 /**************************************************************************/
  164 /* TLB and TID handling */
  165 /**************************************************************************/
  166 
  167 /* Translation ID busy table */
  168 static volatile pmap_t tidbusy[MAXCPU][TID_MAX + 1];
  169 
  170 /*
  171  * TLB0 capabilities (entry, way numbers etc.). These can vary between e500
  172  * core revisions and should be read from h/w registers during early config.
  173  */
  174 uint32_t tlb0_entries;
  175 uint32_t tlb0_ways;
  176 uint32_t tlb0_entries_per_way;
  177 uint32_t tlb1_entries;
  178 
  179 #define TLB0_ENTRIES            (tlb0_entries)
  180 #define TLB0_WAYS               (tlb0_ways)
  181 #define TLB0_ENTRIES_PER_WAY    (tlb0_entries_per_way)
  182 
  183 #define TLB1_ENTRIES (tlb1_entries)
  184 #define TLB1_MAXENTRIES 64
  185 
  186 static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE;
  187 
  188 static tlbtid_t tid_alloc(struct pmap *);
  189 static void tid_flush(tlbtid_t tid);
  190 
  191 static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t);
  192 
  193 static void tlb1_read_entry(tlb_entry_t *, unsigned int);
  194 static void tlb1_write_entry(tlb_entry_t *, unsigned int);
  195 static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *);
  196 static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t);
  197 
  198 static vm_size_t tsize2size(unsigned int);
  199 static unsigned int size2tsize(vm_size_t);
  200 static unsigned int ilog2(unsigned int);
  201 
  202 static void set_mas4_defaults(void);
  203 
  204 static inline void tlb0_flush_entry(vm_offset_t);
  205 static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int);
  206 
  207 /**************************************************************************/
  208 /* Page table management */
  209 /**************************************************************************/
  210 
  211 static struct rwlock_padalign pvh_global_lock;
  212 
  213 /* Data for the pv entry allocation mechanism */
  214 static uma_zone_t pvzone;
  215 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
  216 
  217 #define PV_ENTRY_ZONE_MIN       2048    /* min pv entries in uma zone */
  218 
  219 #ifndef PMAP_SHPGPERPROC
  220 #define PMAP_SHPGPERPROC        200
  221 #endif
  222 
  223 static void ptbl_init(void);
  224 static struct ptbl_buf *ptbl_buf_alloc(void);
  225 static void ptbl_buf_free(struct ptbl_buf *);
  226 static void ptbl_free_pmap_ptbl(pmap_t, pte_t *);
  227 
  228 static pte_t *ptbl_alloc(mmu_t, pmap_t, unsigned int, boolean_t);
  229 static void ptbl_free(mmu_t, pmap_t, unsigned int);
  230 static void ptbl_hold(mmu_t, pmap_t, unsigned int);
  231 static int ptbl_unhold(mmu_t, pmap_t, unsigned int);
  232 
  233 static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t);
  234 static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t);
  235 static int pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, uint32_t, boolean_t);
  236 static int pte_remove(mmu_t, pmap_t, vm_offset_t, uint8_t);
  237 static void kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr,
  238                              vm_offset_t pdir);
  239 
  240 static pv_entry_t pv_alloc(void);
  241 static void pv_free(pv_entry_t);
  242 static void pv_insert(pmap_t, vm_offset_t, vm_page_t);
  243 static void pv_remove(pmap_t, vm_offset_t, vm_page_t);
  244 
  245 static void booke_pmap_init_qpages(void);
  246 
  247 /* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */
  248 #define PTBL_BUFS               (128 * 16)
  249 
  250 struct ptbl_buf {
  251         TAILQ_ENTRY(ptbl_buf) link;     /* list link */
  252         vm_offset_t kva;                /* va of mapping */
  253 };
  254 
  255 /* ptbl free list and a lock used for access synchronization. */
  256 static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist;
  257 static struct mtx ptbl_buf_freelist_lock;
  258 
  259 /* Base address of kva space allocated fot ptbl bufs. */
  260 static vm_offset_t ptbl_buf_pool_vabase;
  261 
  262 /* Pointer to ptbl_buf structures. */
  263 static struct ptbl_buf *ptbl_bufs;
  264 
  265 #ifdef SMP
  266 extern tlb_entry_t __boot_tlb1[];
  267 void pmap_bootstrap_ap(volatile uint32_t *);
  268 #endif
  269 
  270 /*
  271  * Kernel MMU interface
  272  */
  273 static void             mmu_booke_clear_modify(mmu_t, vm_page_t);
  274 static void             mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t,
  275     vm_size_t, vm_offset_t);
  276 static void             mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t);
  277 static void             mmu_booke_copy_pages(mmu_t, vm_page_t *,
  278     vm_offset_t, vm_page_t *, vm_offset_t, int);
  279 static int              mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t,
  280     vm_prot_t, u_int flags, int8_t psind);
  281 static void             mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
  282     vm_page_t, vm_prot_t);
  283 static void             mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t,
  284     vm_prot_t);
  285 static vm_paddr_t       mmu_booke_extract(mmu_t, pmap_t, vm_offset_t);
  286 static vm_page_t        mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t,
  287     vm_prot_t);
  288 static void             mmu_booke_init(mmu_t);
  289 static boolean_t        mmu_booke_is_modified(mmu_t, vm_page_t);
  290 static boolean_t        mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t);
  291 static boolean_t        mmu_booke_is_referenced(mmu_t, vm_page_t);
  292 static int              mmu_booke_ts_referenced(mmu_t, vm_page_t);
  293 static vm_offset_t      mmu_booke_map(mmu_t, vm_offset_t *, vm_paddr_t, vm_paddr_t,
  294     int);
  295 static int              mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t,
  296     vm_paddr_t *);
  297 static void             mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t,
  298     vm_object_t, vm_pindex_t, vm_size_t);
  299 static boolean_t        mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t);
  300 static void             mmu_booke_page_init(mmu_t, vm_page_t);
  301 static int              mmu_booke_page_wired_mappings(mmu_t, vm_page_t);
  302 static void             mmu_booke_pinit(mmu_t, pmap_t);
  303 static void             mmu_booke_pinit0(mmu_t, pmap_t);
  304 static void             mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t,
  305     vm_prot_t);
  306 static void             mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int);
  307 static void             mmu_booke_qremove(mmu_t, vm_offset_t, int);
  308 static void             mmu_booke_release(mmu_t, pmap_t);
  309 static void             mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
  310 static void             mmu_booke_remove_all(mmu_t, vm_page_t);
  311 static void             mmu_booke_remove_write(mmu_t, vm_page_t);
  312 static void             mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t);
  313 static void             mmu_booke_zero_page(mmu_t, vm_page_t);
  314 static void             mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int);
  315 static void             mmu_booke_zero_page_idle(mmu_t, vm_page_t);
  316 static void             mmu_booke_activate(mmu_t, struct thread *);
  317 static void             mmu_booke_deactivate(mmu_t, struct thread *);
  318 static void             mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t);
  319 static void             *mmu_booke_mapdev(mmu_t, vm_paddr_t, vm_size_t);
  320 static void             *mmu_booke_mapdev_attr(mmu_t, vm_paddr_t, vm_size_t, vm_memattr_t);
  321 static void             mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t);
  322 static vm_paddr_t       mmu_booke_kextract(mmu_t, vm_offset_t);
  323 static void             mmu_booke_kenter(mmu_t, vm_offset_t, vm_paddr_t);
  324 static void             mmu_booke_kenter_attr(mmu_t, vm_offset_t, vm_paddr_t, vm_memattr_t);
  325 static void             mmu_booke_kremove(mmu_t, vm_offset_t);
  326 static boolean_t        mmu_booke_dev_direct_mapped(mmu_t, vm_paddr_t, vm_size_t);
  327 static void             mmu_booke_sync_icache(mmu_t, pmap_t, vm_offset_t,
  328     vm_size_t);
  329 static void             mmu_booke_dumpsys_map(mmu_t, vm_paddr_t pa, size_t,
  330     void **);
  331 static void             mmu_booke_dumpsys_unmap(mmu_t, vm_paddr_t pa, size_t,
  332     void *);
  333 static void             mmu_booke_scan_init(mmu_t);
  334 static vm_offset_t      mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m);
  335 static void             mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr);
  336 static int              mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr,
  337     vm_size_t sz, vm_memattr_t mode);
  338 
  339 static mmu_method_t mmu_booke_methods[] = {
  340         /* pmap dispatcher interface */
  341         MMUMETHOD(mmu_clear_modify,     mmu_booke_clear_modify),
  342         MMUMETHOD(mmu_copy,             mmu_booke_copy),
  343         MMUMETHOD(mmu_copy_page,        mmu_booke_copy_page),
  344         MMUMETHOD(mmu_copy_pages,       mmu_booke_copy_pages),
  345         MMUMETHOD(mmu_enter,            mmu_booke_enter),
  346         MMUMETHOD(mmu_enter_object,     mmu_booke_enter_object),
  347         MMUMETHOD(mmu_enter_quick,      mmu_booke_enter_quick),
  348         MMUMETHOD(mmu_extract,          mmu_booke_extract),
  349         MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold),
  350         MMUMETHOD(mmu_init,             mmu_booke_init),
  351         MMUMETHOD(mmu_is_modified,      mmu_booke_is_modified),
  352         MMUMETHOD(mmu_is_prefaultable,  mmu_booke_is_prefaultable),
  353         MMUMETHOD(mmu_is_referenced,    mmu_booke_is_referenced),
  354         MMUMETHOD(mmu_ts_referenced,    mmu_booke_ts_referenced),
  355         MMUMETHOD(mmu_map,              mmu_booke_map),
  356         MMUMETHOD(mmu_mincore,          mmu_booke_mincore),
  357         MMUMETHOD(mmu_object_init_pt,   mmu_booke_object_init_pt),
  358         MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick),
  359         MMUMETHOD(mmu_page_init,        mmu_booke_page_init),
  360         MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings),
  361         MMUMETHOD(mmu_pinit,            mmu_booke_pinit),
  362         MMUMETHOD(mmu_pinit0,           mmu_booke_pinit0),
  363         MMUMETHOD(mmu_protect,          mmu_booke_protect),
  364         MMUMETHOD(mmu_qenter,           mmu_booke_qenter),
  365         MMUMETHOD(mmu_qremove,          mmu_booke_qremove),
  366         MMUMETHOD(mmu_release,          mmu_booke_release),
  367         MMUMETHOD(mmu_remove,           mmu_booke_remove),
  368         MMUMETHOD(mmu_remove_all,       mmu_booke_remove_all),
  369         MMUMETHOD(mmu_remove_write,     mmu_booke_remove_write),
  370         MMUMETHOD(mmu_sync_icache,      mmu_booke_sync_icache),
  371         MMUMETHOD(mmu_unwire,           mmu_booke_unwire),
  372         MMUMETHOD(mmu_zero_page,        mmu_booke_zero_page),
  373         MMUMETHOD(mmu_zero_page_area,   mmu_booke_zero_page_area),
  374         MMUMETHOD(mmu_zero_page_idle,   mmu_booke_zero_page_idle),
  375         MMUMETHOD(mmu_activate,         mmu_booke_activate),
  376         MMUMETHOD(mmu_deactivate,       mmu_booke_deactivate),
  377         MMUMETHOD(mmu_quick_enter_page, mmu_booke_quick_enter_page),
  378         MMUMETHOD(mmu_quick_remove_page, mmu_booke_quick_remove_page),
  379 
  380         /* Internal interfaces */
  381         MMUMETHOD(mmu_bootstrap,        mmu_booke_bootstrap),
  382         MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped),
  383         MMUMETHOD(mmu_mapdev,           mmu_booke_mapdev),
  384         MMUMETHOD(mmu_mapdev_attr,      mmu_booke_mapdev_attr),
  385         MMUMETHOD(mmu_kenter,           mmu_booke_kenter),
  386         MMUMETHOD(mmu_kenter_attr,      mmu_booke_kenter_attr),
  387         MMUMETHOD(mmu_kextract,         mmu_booke_kextract),
  388 /*      MMUMETHOD(mmu_kremove,          mmu_booke_kremove),     */
  389         MMUMETHOD(mmu_unmapdev,         mmu_booke_unmapdev),
  390         MMUMETHOD(mmu_change_attr,      mmu_booke_change_attr),
  391 
  392         /* dumpsys() support */
  393         MMUMETHOD(mmu_dumpsys_map,      mmu_booke_dumpsys_map),
  394         MMUMETHOD(mmu_dumpsys_unmap,    mmu_booke_dumpsys_unmap),
  395         MMUMETHOD(mmu_scan_init,        mmu_booke_scan_init),
  396 
  397         { 0, 0 }
  398 };
  399 
  400 MMU_DEF(booke_mmu, MMU_TYPE_BOOKE, mmu_booke_methods, 0);
  401 
  402 static __inline uint32_t
  403 tlb_calc_wimg(vm_paddr_t pa, vm_memattr_t ma)
  404 {
  405         uint32_t attrib;
  406         int i;
  407 
  408         if (ma != VM_MEMATTR_DEFAULT) {
  409                 switch (ma) {
  410                 case VM_MEMATTR_UNCACHEABLE:
  411                         return (MAS2_I | MAS2_G);
  412                 case VM_MEMATTR_WRITE_COMBINING:
  413                 case VM_MEMATTR_WRITE_BACK:
  414                 case VM_MEMATTR_PREFETCHABLE:
  415                         return (MAS2_I);
  416                 case VM_MEMATTR_WRITE_THROUGH:
  417                         return (MAS2_W | MAS2_M);
  418                 case VM_MEMATTR_CACHEABLE:
  419                         return (MAS2_M);
  420                 }
  421         }
  422 
  423         /*
  424          * Assume the page is cache inhibited and access is guarded unless
  425          * it's in our available memory array.
  426          */
  427         attrib = _TLB_ENTRY_IO;
  428         for (i = 0; i < physmem_regions_sz; i++) {
  429                 if ((pa >= physmem_regions[i].mr_start) &&
  430                     (pa < (physmem_regions[i].mr_start +
  431                      physmem_regions[i].mr_size))) {
  432                         attrib = _TLB_ENTRY_MEM;
  433                         break;
  434                 }
  435         }
  436 
  437         return (attrib);
  438 }
  439 
  440 static inline void
  441 tlb_miss_lock(void)
  442 {
  443 #ifdef SMP
  444         struct pcpu *pc;
  445 
  446         if (!smp_started)
  447                 return;
  448 
  449         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
  450                 if (pc != pcpup) {
  451 
  452                         CTR3(KTR_PMAP, "%s: tlb miss LOCK of CPU=%d, "
  453                             "tlb_lock=%p", __func__, pc->pc_cpuid, pc->pc_booke_tlb_lock);
  454 
  455                         KASSERT((pc->pc_cpuid != PCPU_GET(cpuid)),
  456                             ("tlb_miss_lock: tried to lock self"));
  457 
  458                         tlb_lock(pc->pc_booke_tlb_lock);
  459 
  460                         CTR1(KTR_PMAP, "%s: locked", __func__);
  461                 }
  462         }
  463 #endif
  464 }
  465 
  466 static inline void
  467 tlb_miss_unlock(void)
  468 {
  469 #ifdef SMP
  470         struct pcpu *pc;
  471 
  472         if (!smp_started)
  473                 return;
  474 
  475         STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
  476                 if (pc != pcpup) {
  477                         CTR2(KTR_PMAP, "%s: tlb miss UNLOCK of CPU=%d",
  478                             __func__, pc->pc_cpuid);
  479 
  480                         tlb_unlock(pc->pc_booke_tlb_lock);
  481 
  482                         CTR1(KTR_PMAP, "%s: unlocked", __func__);
  483                 }
  484         }
  485 #endif
  486 }
  487 
  488 /* Return number of entries in TLB0. */
  489 static __inline void
  490 tlb0_get_tlbconf(void)
  491 {
  492         uint32_t tlb0_cfg;
  493 
  494         tlb0_cfg = mfspr(SPR_TLB0CFG);
  495         tlb0_entries = tlb0_cfg & TLBCFG_NENTRY_MASK;
  496         tlb0_ways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT;
  497         tlb0_entries_per_way = tlb0_entries / tlb0_ways;
  498 }
  499 
  500 /* Return number of entries in TLB1. */
  501 static __inline void
  502 tlb1_get_tlbconf(void)
  503 {
  504         uint32_t tlb1_cfg;
  505 
  506         tlb1_cfg = mfspr(SPR_TLB1CFG);
  507         tlb1_entries = tlb1_cfg & TLBCFG_NENTRY_MASK;
  508 }
  509 
  510 /**************************************************************************/
  511 /* Page table related */
  512 /**************************************************************************/
  513 
  514 /* Initialize pool of kva ptbl buffers. */
  515 static void
  516 ptbl_init(void)
  517 {
  518         int i;
  519 
  520         CTR3(KTR_PMAP, "%s: s (ptbl_bufs = 0x%08x size 0x%08x)", __func__,
  521             (uint32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS);
  522         CTR3(KTR_PMAP, "%s: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)",
  523             __func__, ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE);
  524 
  525         mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF);
  526         TAILQ_INIT(&ptbl_buf_freelist);
  527 
  528         for (i = 0; i < PTBL_BUFS; i++) {
  529                 ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE;
  530                 TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link);
  531         }
  532 }
  533 
  534 /* Get a ptbl_buf from the freelist. */
  535 static struct ptbl_buf *
  536 ptbl_buf_alloc(void)
  537 {
  538         struct ptbl_buf *buf;
  539 
  540         mtx_lock(&ptbl_buf_freelist_lock);
  541         buf = TAILQ_FIRST(&ptbl_buf_freelist);
  542         if (buf != NULL)
  543                 TAILQ_REMOVE(&ptbl_buf_freelist, buf, link);
  544         mtx_unlock(&ptbl_buf_freelist_lock);
  545 
  546         CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
  547 
  548         return (buf);
  549 }
  550 
  551 /* Return ptbl buff to free pool. */
  552 static void
  553 ptbl_buf_free(struct ptbl_buf *buf)
  554 {
  555 
  556         CTR2(KTR_PMAP, "%s: buf = %p", __func__, buf);
  557 
  558         mtx_lock(&ptbl_buf_freelist_lock);
  559         TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link);
  560         mtx_unlock(&ptbl_buf_freelist_lock);
  561 }
  562 
  563 /*
  564  * Search the list of allocated ptbl bufs and find on list of allocated ptbls
  565  */
  566 static void
  567 ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl)
  568 {
  569         struct ptbl_buf *pbuf;
  570 
  571         CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
  572 
  573         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  574 
  575         TAILQ_FOREACH(pbuf, &pmap->pm_ptbl_list, link)
  576                 if (pbuf->kva == (vm_offset_t)ptbl) {
  577                         /* Remove from pmap ptbl buf list. */
  578                         TAILQ_REMOVE(&pmap->pm_ptbl_list, pbuf, link);
  579 
  580                         /* Free corresponding ptbl buf. */
  581                         ptbl_buf_free(pbuf);
  582                         break;
  583                 }
  584 }
  585 
  586 /* Allocate page table. */
  587 static pte_t *
  588 ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
  589 {
  590         vm_page_t mtbl[PTBL_PAGES];
  591         vm_page_t m;
  592         struct ptbl_buf *pbuf;
  593         unsigned int pidx;
  594         pte_t *ptbl;
  595         int i, j;
  596 
  597         CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
  598             (pmap == kernel_pmap), pdir_idx);
  599 
  600         KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  601             ("ptbl_alloc: invalid pdir_idx"));
  602         KASSERT((pmap->pm_pdir[pdir_idx] == NULL),
  603             ("pte_alloc: valid ptbl entry exists!"));
  604 
  605         pbuf = ptbl_buf_alloc();
  606         if (pbuf == NULL)
  607                 panic("pte_alloc: couldn't alloc kernel virtual memory");
  608                 
  609         ptbl = (pte_t *)pbuf->kva;
  610 
  611         CTR2(KTR_PMAP, "%s: ptbl kva = %p", __func__, ptbl);
  612 
  613         /* Allocate ptbl pages, this will sleep! */
  614         for (i = 0; i < PTBL_PAGES; i++) {
  615                 pidx = (PTBL_PAGES * pdir_idx) + i;
  616                 while ((m = vm_page_alloc(NULL, pidx,
  617                     VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
  618                         PMAP_UNLOCK(pmap);
  619                         rw_wunlock(&pvh_global_lock);
  620                         if (nosleep) {
  621                                 ptbl_free_pmap_ptbl(pmap, ptbl);
  622                                 for (j = 0; j < i; j++)
  623                                         vm_page_free(mtbl[j]);
  624                                 atomic_subtract_int(&vm_cnt.v_wire_count, i);
  625                                 return (NULL);
  626                         }
  627                         VM_WAIT;
  628                         rw_wlock(&pvh_global_lock);
  629                         PMAP_LOCK(pmap);
  630                 }
  631                 mtbl[i] = m;
  632         }
  633 
  634         /* Map allocated pages into kernel_pmap. */
  635         mmu_booke_qenter(mmu, (vm_offset_t)ptbl, mtbl, PTBL_PAGES);
  636 
  637         /* Zero whole ptbl. */
  638         bzero((caddr_t)ptbl, PTBL_PAGES * PAGE_SIZE);
  639 
  640         /* Add pbuf to the pmap ptbl bufs list. */
  641         TAILQ_INSERT_TAIL(&pmap->pm_ptbl_list, pbuf, link);
  642 
  643         return (ptbl);
  644 }
  645 
  646 /* Free ptbl pages and invalidate pdir entry. */
  647 static void
  648 ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
  649 {
  650         pte_t *ptbl;
  651         vm_paddr_t pa;
  652         vm_offset_t va;
  653         vm_page_t m;
  654         int i;
  655 
  656         CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
  657             (pmap == kernel_pmap), pdir_idx);
  658 
  659         KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  660             ("ptbl_free: invalid pdir_idx"));
  661 
  662         ptbl = pmap->pm_pdir[pdir_idx];
  663 
  664         CTR2(KTR_PMAP, "%s: ptbl = %p", __func__, ptbl);
  665 
  666         KASSERT((ptbl != NULL), ("ptbl_free: null ptbl"));
  667 
  668         /*
  669          * Invalidate the pdir entry as soon as possible, so that other CPUs
  670          * don't attempt to look up the page tables we are releasing.
  671          */
  672         mtx_lock_spin(&tlbivax_mutex);
  673         tlb_miss_lock();
  674         
  675         pmap->pm_pdir[pdir_idx] = NULL;
  676 
  677         tlb_miss_unlock();
  678         mtx_unlock_spin(&tlbivax_mutex);
  679 
  680         for (i = 0; i < PTBL_PAGES; i++) {
  681                 va = ((vm_offset_t)ptbl + (i * PAGE_SIZE));
  682                 pa = pte_vatopa(mmu, kernel_pmap, va);
  683                 m = PHYS_TO_VM_PAGE(pa);
  684                 vm_page_free_zero(m);
  685                 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
  686                 mmu_booke_kremove(mmu, va);
  687         }
  688 
  689         ptbl_free_pmap_ptbl(pmap, ptbl);
  690 }
  691 
  692 /*
  693  * Decrement ptbl pages hold count and attempt to free ptbl pages.
  694  * Called when removing pte entry from ptbl.
  695  *
  696  * Return 1 if ptbl pages were freed.
  697  */
  698 static int
  699 ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
  700 {
  701         pte_t *ptbl;
  702         vm_paddr_t pa;
  703         vm_page_t m;
  704         int i;
  705 
  706         CTR4(KTR_PMAP, "%s: pmap = %p su = %d pdir_idx = %d", __func__, pmap,
  707             (pmap == kernel_pmap), pdir_idx);
  708 
  709         KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  710             ("ptbl_unhold: invalid pdir_idx"));
  711         KASSERT((pmap != kernel_pmap),
  712             ("ptbl_unhold: unholding kernel ptbl!"));
  713 
  714         ptbl = pmap->pm_pdir[pdir_idx];
  715 
  716         //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl);
  717         KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS),
  718             ("ptbl_unhold: non kva ptbl"));
  719 
  720         /* decrement hold count */
  721         for (i = 0; i < PTBL_PAGES; i++) {
  722                 pa = pte_vatopa(mmu, kernel_pmap,
  723                     (vm_offset_t)ptbl + (i * PAGE_SIZE));
  724                 m = PHYS_TO_VM_PAGE(pa);
  725                 m->wire_count--;
  726         }
  727 
  728         /*
  729          * Free ptbl pages if there are no pte etries in this ptbl.
  730          * wire_count has the same value for all ptbl pages, so check the last
  731          * page.
  732          */
  733         if (m->wire_count == 0) {
  734                 ptbl_free(mmu, pmap, pdir_idx);
  735 
  736                 //debugf("ptbl_unhold: e (freed ptbl)\n");
  737                 return (1);
  738         }
  739 
  740         return (0);
  741 }
  742 
  743 /*
  744  * Increment hold count for ptbl pages. This routine is used when a new pte
  745  * entry is being inserted into the ptbl.
  746  */
  747 static void
  748 ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx)
  749 {
  750         vm_paddr_t pa;
  751         pte_t *ptbl;
  752         vm_page_t m;
  753         int i;
  754 
  755         CTR3(KTR_PMAP, "%s: pmap = %p pdir_idx = %d", __func__, pmap,
  756             pdir_idx);
  757 
  758         KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)),
  759             ("ptbl_hold: invalid pdir_idx"));
  760         KASSERT((pmap != kernel_pmap),
  761             ("ptbl_hold: holding kernel ptbl!"));
  762 
  763         ptbl = pmap->pm_pdir[pdir_idx];
  764 
  765         KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl"));
  766 
  767         for (i = 0; i < PTBL_PAGES; i++) {
  768                 pa = pte_vatopa(mmu, kernel_pmap,
  769                     (vm_offset_t)ptbl + (i * PAGE_SIZE));
  770                 m = PHYS_TO_VM_PAGE(pa);
  771                 m->wire_count++;
  772         }
  773 }
  774 
  775 /* Allocate pv_entry structure. */
  776 pv_entry_t
  777 pv_alloc(void)
  778 {
  779         pv_entry_t pv;
  780 
  781         pv_entry_count++;
  782         if (pv_entry_count > pv_entry_high_water)
  783                 pagedaemon_wakeup();
  784         pv = uma_zalloc(pvzone, M_NOWAIT);
  785 
  786         return (pv);
  787 }
  788 
  789 /* Free pv_entry structure. */
  790 static __inline void
  791 pv_free(pv_entry_t pve)
  792 {
  793 
  794         pv_entry_count--;
  795         uma_zfree(pvzone, pve);
  796 }
  797 
  798 
  799 /* Allocate and initialize pv_entry structure. */
  800 static void
  801 pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m)
  802 {
  803         pv_entry_t pve;
  804 
  805         //int su = (pmap == kernel_pmap);
  806         //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su,
  807         //      (u_int32_t)pmap, va, (u_int32_t)m);
  808 
  809         pve = pv_alloc();
  810         if (pve == NULL)
  811                 panic("pv_insert: no pv entries!");
  812 
  813         pve->pv_pmap = pmap;
  814         pve->pv_va = va;
  815 
  816         /* add to pv_list */
  817         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  818         rw_assert(&pvh_global_lock, RA_WLOCKED);
  819 
  820         TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link);
  821 
  822         //debugf("pv_insert: e\n");
  823 }
  824 
  825 /* Destroy pv entry. */
  826 static void
  827 pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m)
  828 {
  829         pv_entry_t pve;
  830 
  831         //int su = (pmap == kernel_pmap);
  832         //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va);
  833 
  834         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
  835         rw_assert(&pvh_global_lock, RA_WLOCKED);
  836 
  837         /* find pv entry */
  838         TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) {
  839                 if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) {
  840                         /* remove from pv_list */
  841                         TAILQ_REMOVE(&m->md.pv_list, pve, pv_link);
  842                         if (TAILQ_EMPTY(&m->md.pv_list))
  843                                 vm_page_aflag_clear(m, PGA_WRITEABLE);
  844 
  845                         /* free pv entry struct */
  846                         pv_free(pve);
  847                         break;
  848                 }
  849         }
  850 
  851         //debugf("pv_remove: e\n");
  852 }
  853 
  854 /*
  855  * Clean pte entry, try to free page table page if requested.
  856  *
  857  * Return 1 if ptbl pages were freed, otherwise return 0.
  858  */
  859 static int
  860 pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, uint8_t flags)
  861 {
  862         unsigned int pdir_idx = PDIR_IDX(va);
  863         unsigned int ptbl_idx = PTBL_IDX(va);
  864         vm_page_t m;
  865         pte_t *ptbl;
  866         pte_t *pte;
  867 
  868         //int su = (pmap == kernel_pmap);
  869         //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n",
  870         //              su, (u_int32_t)pmap, va, flags);
  871 
  872         ptbl = pmap->pm_pdir[pdir_idx];
  873         KASSERT(ptbl, ("pte_remove: null ptbl"));
  874 
  875         pte = &ptbl[ptbl_idx];
  876 
  877         if (pte == NULL || !PTE_ISVALID(pte))
  878                 return (0);
  879 
  880         if (PTE_ISWIRED(pte))
  881                 pmap->pm_stats.wired_count--;
  882 
  883         /* Handle managed entry. */
  884         if (PTE_ISMANAGED(pte)) {
  885                 /* Get vm_page_t for mapped pte. */
  886                 m = PHYS_TO_VM_PAGE(PTE_PA(pte));
  887 
  888                 if (PTE_ISMODIFIED(pte))
  889                         vm_page_dirty(m);
  890 
  891                 if (PTE_ISREFERENCED(pte))
  892                         vm_page_aflag_set(m, PGA_REFERENCED);
  893 
  894                 pv_remove(pmap, va, m);
  895         }
  896 
  897         mtx_lock_spin(&tlbivax_mutex);
  898         tlb_miss_lock();
  899 
  900         tlb0_flush_entry(va);
  901         *pte = 0;
  902 
  903         tlb_miss_unlock();
  904         mtx_unlock_spin(&tlbivax_mutex);
  905 
  906         pmap->pm_stats.resident_count--;
  907 
  908         if (flags & PTBL_UNHOLD) {
  909                 //debugf("pte_remove: e (unhold)\n");
  910                 return (ptbl_unhold(mmu, pmap, pdir_idx));
  911         }
  912 
  913         //debugf("pte_remove: e\n");
  914         return (0);
  915 }
  916 
  917 /*
  918  * Insert PTE for a given page and virtual address.
  919  */
  920 static int
  921 pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, uint32_t flags,
  922     boolean_t nosleep)
  923 {
  924         unsigned int pdir_idx = PDIR_IDX(va);
  925         unsigned int ptbl_idx = PTBL_IDX(va);
  926         pte_t *ptbl, *pte;
  927 
  928         CTR4(KTR_PMAP, "%s: su = %d pmap = %p va = %p", __func__,
  929             pmap == kernel_pmap, pmap, va);
  930 
  931         /* Get the page table pointer. */
  932         ptbl = pmap->pm_pdir[pdir_idx];
  933 
  934         if (ptbl == NULL) {
  935                 /* Allocate page table pages. */
  936                 ptbl = ptbl_alloc(mmu, pmap, pdir_idx, nosleep);
  937                 if (ptbl == NULL) {
  938                         KASSERT(nosleep, ("nosleep and NULL ptbl"));
  939                         return (ENOMEM);
  940                 }
  941         } else {
  942                 /*
  943                  * Check if there is valid mapping for requested
  944                  * va, if there is, remove it.
  945                  */
  946                 pte = &pmap->pm_pdir[pdir_idx][ptbl_idx];
  947                 if (PTE_ISVALID(pte)) {
  948                         pte_remove(mmu, pmap, va, PTBL_HOLD);
  949                 } else {
  950                         /*
  951                          * pte is not used, increment hold count
  952                          * for ptbl pages.
  953                          */
  954                         if (pmap != kernel_pmap)
  955                                 ptbl_hold(mmu, pmap, pdir_idx);
  956                 }
  957         }
  958 
  959         /*
  960          * Insert pv_entry into pv_list for mapped page if part of managed
  961          * memory.
  962          */
  963         if ((m->oflags & VPO_UNMANAGED) == 0) {
  964                 flags |= PTE_MANAGED;
  965 
  966                 /* Create and insert pv entry. */
  967                 pv_insert(pmap, va, m);
  968         }
  969 
  970         pmap->pm_stats.resident_count++;
  971         
  972         mtx_lock_spin(&tlbivax_mutex);
  973         tlb_miss_lock();
  974 
  975         tlb0_flush_entry(va);
  976         if (pmap->pm_pdir[pdir_idx] == NULL) {
  977                 /*
  978                  * If we just allocated a new page table, hook it in
  979                  * the pdir.
  980                  */
  981                 pmap->pm_pdir[pdir_idx] = ptbl;
  982         }
  983         pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]);
  984         *pte = PTE_RPN_FROM_PA(VM_PAGE_TO_PHYS(m));
  985         *pte |= (PTE_VALID | flags | PTE_PS_4KB); /* 4KB pages only */
  986 
  987         tlb_miss_unlock();
  988         mtx_unlock_spin(&tlbivax_mutex);
  989         return (0);
  990 }
  991 
  992 /* Return the pa for the given pmap/va. */
  993 static vm_paddr_t
  994 pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va)
  995 {
  996         vm_paddr_t pa = 0;
  997         pte_t *pte;
  998 
  999         pte = pte_find(mmu, pmap, va);
 1000         if ((pte != NULL) && PTE_ISVALID(pte))
 1001                 pa = (PTE_PA(pte) | (va & PTE_PA_MASK));
 1002         return (pa);
 1003 }
 1004 
 1005 /* Get a pointer to a PTE in a page table. */
 1006 static pte_t *
 1007 pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 1008 {
 1009         unsigned int pdir_idx = PDIR_IDX(va);
 1010         unsigned int ptbl_idx = PTBL_IDX(va);
 1011 
 1012         KASSERT((pmap != NULL), ("pte_find: invalid pmap"));
 1013 
 1014         if (pmap->pm_pdir[pdir_idx])
 1015                 return (&(pmap->pm_pdir[pdir_idx][ptbl_idx]));
 1016 
 1017         return (NULL);
 1018 }
 1019 
 1020 /* Set up kernel page tables. */
 1021 static void
 1022 kernel_pte_alloc(vm_offset_t data_end, vm_offset_t addr, vm_offset_t pdir)
 1023 {
 1024         int             i;
 1025         vm_offset_t     va;
 1026         pte_t           *pte;
 1027 
 1028         /* Initialize kernel pdir */
 1029         for (i = 0; i < kernel_ptbls; i++)
 1030                 kernel_pmap->pm_pdir[kptbl_min + i] =
 1031                     (pte_t *)(pdir + (i * PAGE_SIZE * PTBL_PAGES));
 1032 
 1033         /*
 1034          * Fill in PTEs covering kernel code and data. They are not required
 1035          * for address translation, as this area is covered by static TLB1
 1036          * entries, but for pte_vatopa() to work correctly with kernel area
 1037          * addresses.
 1038          */
 1039         for (va = addr; va < data_end; va += PAGE_SIZE) {
 1040                 pte = &(kernel_pmap->pm_pdir[PDIR_IDX(va)][PTBL_IDX(va)]);
 1041                 *pte = PTE_RPN_FROM_PA(kernload + (va - kernstart));
 1042                 *pte |= PTE_M | PTE_SR | PTE_SW | PTE_SX | PTE_WIRED |
 1043                     PTE_VALID | PTE_PS_4KB;
 1044         }
 1045 }
 1046 
 1047 /**************************************************************************/
 1048 /* PMAP related */
 1049 /**************************************************************************/
 1050 
 1051 /*
 1052  * This is called during booke_init, before the system is really initialized.
 1053  */
 1054 static void
 1055 mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
 1056 {
 1057         vm_paddr_t phys_kernelend;
 1058         struct mem_region *mp, *mp1;
 1059         int cnt, i, j;
 1060         vm_paddr_t s, e, sz;
 1061         vm_paddr_t physsz, hwphyssz;
 1062         u_int phys_avail_count;
 1063         vm_size_t kstack0_sz;
 1064         vm_offset_t kernel_pdir, kstack0;
 1065         vm_paddr_t kstack0_phys;
 1066         void *dpcpu;
 1067 
 1068         debugf("mmu_booke_bootstrap: entered\n");
 1069 
 1070         /* Set interesting system properties */
 1071         hw_direct_map = 0;
 1072         elf32_nxstack = 1;
 1073 
 1074         /* Initialize invalidation mutex */
 1075         mtx_init(&tlbivax_mutex, "tlbivax", NULL, MTX_SPIN);
 1076 
 1077         /* Read TLB0 size and associativity. */
 1078         tlb0_get_tlbconf();
 1079 
 1080         /*
 1081          * Align kernel start and end address (kernel image).
 1082          * Note that kernel end does not necessarily relate to kernsize.
 1083          * kernsize is the size of the kernel that is actually mapped.
 1084          */
 1085         kernstart = trunc_page(start);
 1086         data_start = round_page(kernelend);
 1087         data_end = data_start;
 1088 
 1089         /*
 1090          * Addresses of preloaded modules (like file systems) use
 1091          * physical addresses. Make sure we relocate those into
 1092          * virtual addresses.
 1093          */
 1094         preload_addr_relocate = kernstart - kernload;
 1095 
 1096         /* Allocate the dynamic per-cpu area. */
 1097         dpcpu = (void *)data_end;
 1098         data_end += DPCPU_SIZE;
 1099 
 1100         /* Allocate space for the message buffer. */
 1101         msgbufp = (struct msgbuf *)data_end;
 1102         data_end += msgbufsize;
 1103         debugf(" msgbufp at 0x%08x end = 0x%08x\n", (uint32_t)msgbufp,
 1104             data_end);
 1105 
 1106         data_end = round_page(data_end);
 1107 
 1108         /* Allocate space for ptbl_bufs. */
 1109         ptbl_bufs = (struct ptbl_buf *)data_end;
 1110         data_end += sizeof(struct ptbl_buf) * PTBL_BUFS;
 1111         debugf(" ptbl_bufs at 0x%08x end = 0x%08x\n", (uint32_t)ptbl_bufs,
 1112             data_end);
 1113 
 1114         data_end = round_page(data_end);
 1115 
 1116         /* Allocate PTE tables for kernel KVA. */
 1117         kernel_pdir = data_end;
 1118         kernel_ptbls = howmany(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
 1119             PDIR_SIZE);
 1120         data_end += kernel_ptbls * PTBL_PAGES * PAGE_SIZE;
 1121         debugf(" kernel ptbls: %d\n", kernel_ptbls);
 1122         debugf(" kernel pdir at 0x%08x end = 0x%08x\n", kernel_pdir, data_end);
 1123 
 1124         debugf(" data_end: 0x%08x\n", data_end);
 1125         if (data_end - kernstart > kernsize) {
 1126                 kernsize += tlb1_mapin_region(kernstart + kernsize,
 1127                     kernload + kernsize, (data_end - kernstart) - kernsize);
 1128         }
 1129         data_end = kernstart + kernsize;
 1130         debugf(" updated data_end: 0x%08x\n", data_end);
 1131 
 1132         /*
 1133          * Clear the structures - note we can only do it safely after the
 1134          * possible additional TLB1 translations are in place (above) so that
 1135          * all range up to the currently calculated 'data_end' is covered.
 1136          */
 1137         dpcpu_init(dpcpu, 0);
 1138         memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE);
 1139         memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE);
 1140 
 1141         /*******************************************************/
 1142         /* Set the start and end of kva. */
 1143         /*******************************************************/
 1144         virtual_avail = round_page(data_end);
 1145         virtual_end = VM_MAX_KERNEL_ADDRESS;
 1146 
 1147         /* Allocate KVA space for page zero/copy operations. */
 1148         zero_page_va = virtual_avail;
 1149         virtual_avail += PAGE_SIZE;
 1150         zero_page_idle_va = virtual_avail;
 1151         virtual_avail += PAGE_SIZE;
 1152         copy_page_src_va = virtual_avail;
 1153         virtual_avail += PAGE_SIZE;
 1154         copy_page_dst_va = virtual_avail;
 1155         virtual_avail += PAGE_SIZE;
 1156         debugf("zero_page_va = 0x%08x\n", zero_page_va);
 1157         debugf("zero_page_idle_va = 0x%08x\n", zero_page_idle_va);
 1158         debugf("copy_page_src_va = 0x%08x\n", copy_page_src_va);
 1159         debugf("copy_page_dst_va = 0x%08x\n", copy_page_dst_va);
 1160 
 1161         /* Initialize page zero/copy mutexes. */
 1162         mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF);
 1163         mtx_init(&copy_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF);
 1164 
 1165         /* Allocate KVA space for ptbl bufs. */
 1166         ptbl_buf_pool_vabase = virtual_avail;
 1167         virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE;
 1168         debugf("ptbl_buf_pool_vabase = 0x%08x end = 0x%08x\n",
 1169             ptbl_buf_pool_vabase, virtual_avail);
 1170 
 1171         /* Calculate corresponding physical addresses for the kernel region. */
 1172         phys_kernelend = kernload + kernsize;
 1173         debugf("kernel image and allocated data:\n");
 1174         debugf(" kernload    = 0x%09llx\n", (uint64_t)kernload);
 1175         debugf(" kernstart   = 0x%08x\n", kernstart);
 1176         debugf(" kernsize    = 0x%08x\n", kernsize);
 1177 
 1178         if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz)
 1179                 panic("mmu_booke_bootstrap: phys_avail too small");
 1180 
 1181         /*
 1182          * Remove kernel physical address range from avail regions list. Page
 1183          * align all regions.  Non-page aligned memory isn't very interesting
 1184          * to us.  Also, sort the entries for ascending addresses.
 1185          */
 1186 
 1187         /* Retrieve phys/avail mem regions */
 1188         mem_regions(&physmem_regions, &physmem_regions_sz,
 1189             &availmem_regions, &availmem_regions_sz);
 1190         sz = 0;
 1191         cnt = availmem_regions_sz;
 1192         debugf("processing avail regions:\n");
 1193         for (mp = availmem_regions; mp->mr_size; mp++) {
 1194                 s = mp->mr_start;
 1195                 e = mp->mr_start + mp->mr_size;
 1196                 debugf(" %09jx-%09jx -> ", (uintmax_t)s, (uintmax_t)e);
 1197                 /* Check whether this region holds all of the kernel. */
 1198                 if (s < kernload && e > phys_kernelend) {
 1199                         availmem_regions[cnt].mr_start = phys_kernelend;
 1200                         availmem_regions[cnt++].mr_size = e - phys_kernelend;
 1201                         e = kernload;
 1202                 }
 1203                 /* Look whether this regions starts within the kernel. */
 1204                 if (s >= kernload && s < phys_kernelend) {
 1205                         if (e <= phys_kernelend)
 1206                                 goto empty;
 1207                         s = phys_kernelend;
 1208                 }
 1209                 /* Now look whether this region ends within the kernel. */
 1210                 if (e > kernload && e <= phys_kernelend) {
 1211                         if (s >= kernload)
 1212                                 goto empty;
 1213                         e = kernload;
 1214                 }
 1215                 /* Now page align the start and size of the region. */
 1216                 s = round_page(s);
 1217                 e = trunc_page(e);
 1218                 if (e < s)
 1219                         e = s;
 1220                 sz = e - s;
 1221                 debugf("%09jx-%09jx = %jx\n",
 1222                     (uintmax_t)s, (uintmax_t)e, (uintmax_t)sz);
 1223 
 1224                 /* Check whether some memory is left here. */
 1225                 if (sz == 0) {
 1226                 empty:
 1227                         memmove(mp, mp + 1,
 1228                             (cnt - (mp - availmem_regions)) * sizeof(*mp));
 1229                         cnt--;
 1230                         mp--;
 1231                         continue;
 1232                 }
 1233 
 1234                 /* Do an insertion sort. */
 1235                 for (mp1 = availmem_regions; mp1 < mp; mp1++)
 1236                         if (s < mp1->mr_start)
 1237                                 break;
 1238                 if (mp1 < mp) {
 1239                         memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1);
 1240                         mp1->mr_start = s;
 1241                         mp1->mr_size = sz;
 1242                 } else {
 1243                         mp->mr_start = s;
 1244                         mp->mr_size = sz;
 1245                 }
 1246         }
 1247         availmem_regions_sz = cnt;
 1248 
 1249         /*******************************************************/
 1250         /* Steal physical memory for kernel stack from the end */
 1251         /* of the first avail region                           */
 1252         /*******************************************************/
 1253         kstack0_sz = kstack_pages * PAGE_SIZE;
 1254         kstack0_phys = availmem_regions[0].mr_start +
 1255             availmem_regions[0].mr_size;
 1256         kstack0_phys -= kstack0_sz;
 1257         availmem_regions[0].mr_size -= kstack0_sz;
 1258 
 1259         /*******************************************************/
 1260         /* Fill in phys_avail table, based on availmem_regions */
 1261         /*******************************************************/
 1262         phys_avail_count = 0;
 1263         physsz = 0;
 1264         hwphyssz = 0;
 1265         TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz);
 1266 
 1267         debugf("fill in phys_avail:\n");
 1268         for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) {
 1269 
 1270                 debugf(" region: 0x%jx - 0x%jx (0x%jx)\n",
 1271                     (uintmax_t)availmem_regions[i].mr_start,
 1272                     (uintmax_t)availmem_regions[i].mr_start +
 1273                         availmem_regions[i].mr_size,
 1274                     (uintmax_t)availmem_regions[i].mr_size);
 1275 
 1276                 if (hwphyssz != 0 &&
 1277                     (physsz + availmem_regions[i].mr_size) >= hwphyssz) {
 1278                         debugf(" hw.physmem adjust\n");
 1279                         if (physsz < hwphyssz) {
 1280                                 phys_avail[j] = availmem_regions[i].mr_start;
 1281                                 phys_avail[j + 1] =
 1282                                     availmem_regions[i].mr_start +
 1283                                     hwphyssz - physsz;
 1284                                 physsz = hwphyssz;
 1285                                 phys_avail_count++;
 1286                         }
 1287                         break;
 1288                 }
 1289 
 1290                 phys_avail[j] = availmem_regions[i].mr_start;
 1291                 phys_avail[j + 1] = availmem_regions[i].mr_start +
 1292                     availmem_regions[i].mr_size;
 1293                 phys_avail_count++;
 1294                 physsz += availmem_regions[i].mr_size;
 1295         }
 1296         physmem = btoc(physsz);
 1297 
 1298         /* Calculate the last available physical address. */
 1299         for (i = 0; phys_avail[i + 2] != 0; i += 2)
 1300                 ;
 1301         Maxmem = powerpc_btop(phys_avail[i + 1]);
 1302 
 1303         debugf("Maxmem = 0x%08lx\n", Maxmem);
 1304         debugf("phys_avail_count = %d\n", phys_avail_count);
 1305         debugf("physsz = 0x%09jx physmem = %jd (0x%09jx)\n",
 1306             (uintmax_t)physsz, (uintmax_t)physmem, (uintmax_t)physmem);
 1307 
 1308         /*******************************************************/
 1309         /* Initialize (statically allocated) kernel pmap. */
 1310         /*******************************************************/
 1311         PMAP_LOCK_INIT(kernel_pmap);
 1312         kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE;
 1313 
 1314         debugf("kernel_pmap = 0x%08x\n", (uint32_t)kernel_pmap);
 1315         debugf("kptbl_min = %d, kernel_ptbls = %d\n", kptbl_min, kernel_ptbls);
 1316         debugf("kernel pdir range: 0x%08x - 0x%08x\n",
 1317             kptbl_min * PDIR_SIZE, (kptbl_min + kernel_ptbls) * PDIR_SIZE - 1);
 1318 
 1319         kernel_pte_alloc(data_end, kernstart, kernel_pdir);
 1320         for (i = 0; i < MAXCPU; i++) {
 1321                 kernel_pmap->pm_tid[i] = TID_KERNEL;
 1322                 
 1323                 /* Initialize each CPU's tidbusy entry 0 with kernel_pmap */
 1324                 tidbusy[i][TID_KERNEL] = kernel_pmap;
 1325         }
 1326 
 1327         /* Mark kernel_pmap active on all CPUs */
 1328         CPU_FILL(&kernel_pmap->pm_active);
 1329 
 1330         /*
 1331          * Initialize the global pv list lock.
 1332          */
 1333         rw_init(&pvh_global_lock, "pmap pv global");
 1334 
 1335         /*******************************************************/
 1336         /* Final setup */
 1337         /*******************************************************/
 1338 
 1339         /* Enter kstack0 into kernel map, provide guard page */
 1340         kstack0 = virtual_avail + KSTACK_GUARD_PAGES * PAGE_SIZE;
 1341         thread0.td_kstack = kstack0;
 1342         thread0.td_kstack_pages = kstack_pages;
 1343 
 1344         debugf("kstack_sz = 0x%08x\n", kstack0_sz);
 1345         debugf("kstack0_phys at 0x%09llx - 0x%09llx\n",
 1346             kstack0_phys, kstack0_phys + kstack0_sz);
 1347         debugf("kstack0 at 0x%08x - 0x%08x\n", kstack0, kstack0 + kstack0_sz);
 1348         
 1349         virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE + kstack0_sz;
 1350         for (i = 0; i < kstack_pages; i++) {
 1351                 mmu_booke_kenter(mmu, kstack0, kstack0_phys);
 1352                 kstack0 += PAGE_SIZE;
 1353                 kstack0_phys += PAGE_SIZE;
 1354         }
 1355 
 1356         pmap_bootstrapped = 1;
 1357         
 1358         debugf("virtual_avail = %08x\n", virtual_avail);
 1359         debugf("virtual_end   = %08x\n", virtual_end);
 1360 
 1361         debugf("mmu_booke_bootstrap: exit\n");
 1362 }
 1363 
 1364 #ifdef SMP
 1365  void
 1366 tlb1_ap_prep(void)
 1367 {
 1368         tlb_entry_t *e, tmp;
 1369         unsigned int i;
 1370 
 1371         /* Prepare TLB1 image for AP processors */
 1372         e = __boot_tlb1;
 1373         for (i = 0; i < TLB1_ENTRIES; i++) {
 1374                 tlb1_read_entry(&tmp, i);
 1375 
 1376                 if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED))
 1377                         memcpy(e++, &tmp, sizeof(tmp));
 1378         }
 1379 }
 1380 
 1381 void
 1382 pmap_bootstrap_ap(volatile uint32_t *trcp __unused)
 1383 {
 1384         int i;
 1385 
 1386         /*
 1387          * Finish TLB1 configuration: the BSP already set up its TLB1 and we
 1388          * have the snapshot of its contents in the s/w __boot_tlb1[] table
 1389          * created by tlb1_ap_prep(), so use these values directly to
 1390          * (re)program AP's TLB1 hardware.
 1391          *
 1392          * Start at index 1 because index 0 has the kernel map.
 1393          */
 1394         for (i = 1; i < TLB1_ENTRIES; i++) {
 1395                 if (__boot_tlb1[i].mas1 & MAS1_VALID)
 1396                         tlb1_write_entry(&__boot_tlb1[i], i);
 1397         }
 1398 
 1399         set_mas4_defaults();
 1400 }
 1401 #endif
 1402 
 1403 static void
 1404 booke_pmap_init_qpages(void)
 1405 {
 1406         struct pcpu *pc;
 1407         int i;
 1408 
 1409         CPU_FOREACH(i) {
 1410                 pc = pcpu_find(i);
 1411                 pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
 1412                 if (pc->pc_qmap_addr == 0)
 1413                         panic("pmap_init_qpages: unable to allocate KVA");
 1414         }
 1415 }
 1416 
 1417 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, booke_pmap_init_qpages, NULL);
 1418 
 1419 /*
 1420  * Get the physical page address for the given pmap/virtual address.
 1421  */
 1422 static vm_paddr_t
 1423 mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va)
 1424 {
 1425         vm_paddr_t pa;
 1426 
 1427         PMAP_LOCK(pmap);
 1428         pa = pte_vatopa(mmu, pmap, va);
 1429         PMAP_UNLOCK(pmap);
 1430 
 1431         return (pa);
 1432 }
 1433 
 1434 /*
 1435  * Extract the physical page address associated with the given
 1436  * kernel virtual address.
 1437  */
 1438 static vm_paddr_t
 1439 mmu_booke_kextract(mmu_t mmu, vm_offset_t va)
 1440 {
 1441         tlb_entry_t e;
 1442         int i;
 1443 
 1444         /* Check TLB1 mappings */
 1445         for (i = 0; i < TLB1_ENTRIES; i++) {
 1446                 tlb1_read_entry(&e, i);
 1447                 if (!(e.mas1 & MAS1_VALID))
 1448                         continue;
 1449                 if (va >= e.virt && va < e.virt + e.size)
 1450                         return (e.phys + (va - e.virt));
 1451         }
 1452 
 1453         return (pte_vatopa(mmu, kernel_pmap, va));
 1454 }
 1455 
 1456 /*
 1457  * Initialize the pmap module.
 1458  * Called by vm_init, to initialize any structures that the pmap
 1459  * system needs to map virtual memory.
 1460  */
 1461 static void
 1462 mmu_booke_init(mmu_t mmu)
 1463 {
 1464         int shpgperproc = PMAP_SHPGPERPROC;
 1465 
 1466         /*
 1467          * Initialize the address space (zone) for the pv entries.  Set a
 1468          * high water mark so that the system can recover from excessive
 1469          * numbers of pv entries.
 1470          */
 1471         pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
 1472             NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
 1473 
 1474         TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 1475         pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
 1476 
 1477         TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 1478         pv_entry_high_water = 9 * (pv_entry_max / 10);
 1479 
 1480         uma_zone_reserve_kva(pvzone, pv_entry_max);
 1481 
 1482         /* Pre-fill pvzone with initial number of pv entries. */
 1483         uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN);
 1484 
 1485         /* Initialize ptbl allocation. */
 1486         ptbl_init();
 1487 }
 1488 
 1489 /*
 1490  * Map a list of wired pages into kernel virtual address space.  This is
 1491  * intended for temporary mappings which do not need page modification or
 1492  * references recorded.  Existing mappings in the region are overwritten.
 1493  */
 1494 static void
 1495 mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count)
 1496 {
 1497         vm_offset_t va;
 1498 
 1499         va = sva;
 1500         while (count-- > 0) {
 1501                 mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m));
 1502                 va += PAGE_SIZE;
 1503                 m++;
 1504         }
 1505 }
 1506 
 1507 /*
 1508  * Remove page mappings from kernel virtual address space.  Intended for
 1509  * temporary mappings entered by mmu_booke_qenter.
 1510  */
 1511 static void
 1512 mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count)
 1513 {
 1514         vm_offset_t va;
 1515 
 1516         va = sva;
 1517         while (count-- > 0) {
 1518                 mmu_booke_kremove(mmu, va);
 1519                 va += PAGE_SIZE;
 1520         }
 1521 }
 1522 
 1523 /*
 1524  * Map a wired page into kernel virtual address space.
 1525  */
 1526 static void
 1527 mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_paddr_t pa)
 1528 {
 1529 
 1530         mmu_booke_kenter_attr(mmu, va, pa, VM_MEMATTR_DEFAULT);
 1531 }
 1532 
 1533 static void
 1534 mmu_booke_kenter_attr(mmu_t mmu, vm_offset_t va, vm_paddr_t pa, vm_memattr_t ma)
 1535 {
 1536         uint32_t flags;
 1537         pte_t *pte;
 1538 
 1539         KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
 1540             (va <= VM_MAX_KERNEL_ADDRESS)), ("mmu_booke_kenter: invalid va"));
 1541 
 1542         flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
 1543         flags |= tlb_calc_wimg(pa, ma) << PTE_MAS2_SHIFT;
 1544         flags |= PTE_PS_4KB;
 1545 
 1546         pte = pte_find(mmu, kernel_pmap, va);
 1547 
 1548         mtx_lock_spin(&tlbivax_mutex);
 1549         tlb_miss_lock();
 1550         
 1551         if (PTE_ISVALID(pte)) {
 1552         
 1553                 CTR1(KTR_PMAP, "%s: replacing entry!", __func__);
 1554 
 1555                 /* Flush entry from TLB0 */
 1556                 tlb0_flush_entry(va);
 1557         }
 1558 
 1559         *pte = PTE_RPN_FROM_PA(pa) | flags;
 1560 
 1561         //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x "
 1562         //              "pa=0x%08x rpn=0x%08x flags=0x%08x\n",
 1563         //              pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags);
 1564 
 1565         /* Flush the real memory from the instruction cache. */
 1566         if ((flags & (PTE_I | PTE_G)) == 0)
 1567                 __syncicache((void *)va, PAGE_SIZE);
 1568 
 1569         tlb_miss_unlock();
 1570         mtx_unlock_spin(&tlbivax_mutex);
 1571 }
 1572 
 1573 /*
 1574  * Remove a page from kernel page table.
 1575  */
 1576 static void
 1577 mmu_booke_kremove(mmu_t mmu, vm_offset_t va)
 1578 {
 1579         pte_t *pte;
 1580 
 1581         CTR2(KTR_PMAP,"%s: s (va = 0x%08x)\n", __func__, va);
 1582 
 1583         KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) &&
 1584             (va <= VM_MAX_KERNEL_ADDRESS)),
 1585             ("mmu_booke_kremove: invalid va"));
 1586 
 1587         pte = pte_find(mmu, kernel_pmap, va);
 1588 
 1589         if (!PTE_ISVALID(pte)) {
 1590         
 1591                 CTR1(KTR_PMAP, "%s: invalid pte", __func__);
 1592 
 1593                 return;
 1594         }
 1595 
 1596         mtx_lock_spin(&tlbivax_mutex);
 1597         tlb_miss_lock();
 1598 
 1599         /* Invalidate entry in TLB0, update PTE. */
 1600         tlb0_flush_entry(va);
 1601         *pte = 0;
 1602 
 1603         tlb_miss_unlock();
 1604         mtx_unlock_spin(&tlbivax_mutex);
 1605 }
 1606 
 1607 /*
 1608  * Initialize pmap associated with process 0.
 1609  */
 1610 static void
 1611 mmu_booke_pinit0(mmu_t mmu, pmap_t pmap)
 1612 {
 1613 
 1614         PMAP_LOCK_INIT(pmap);
 1615         mmu_booke_pinit(mmu, pmap);
 1616         PCPU_SET(curpmap, pmap);
 1617 }
 1618 
 1619 /*
 1620  * Initialize a preallocated and zeroed pmap structure,
 1621  * such as one in a vmspace structure.
 1622  */
 1623 static void
 1624 mmu_booke_pinit(mmu_t mmu, pmap_t pmap)
 1625 {
 1626         int i;
 1627 
 1628         CTR4(KTR_PMAP, "%s: pmap = %p, proc %d '%s'", __func__, pmap,
 1629             curthread->td_proc->p_pid, curthread->td_proc->p_comm);
 1630 
 1631         KASSERT((pmap != kernel_pmap), ("pmap_pinit: initializing kernel_pmap"));
 1632 
 1633         for (i = 0; i < MAXCPU; i++)
 1634                 pmap->pm_tid[i] = TID_NONE;
 1635         CPU_ZERO(&kernel_pmap->pm_active);
 1636         bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 1637         bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
 1638         TAILQ_INIT(&pmap->pm_ptbl_list);
 1639 }
 1640 
 1641 /*
 1642  * Release any resources held by the given physical map.
 1643  * Called when a pmap initialized by mmu_booke_pinit is being released.
 1644  * Should only be called if the map contains no valid mappings.
 1645  */
 1646 static void
 1647 mmu_booke_release(mmu_t mmu, pmap_t pmap)
 1648 {
 1649 
 1650         KASSERT(pmap->pm_stats.resident_count == 0,
 1651             ("pmap_release: pmap resident count %ld != 0",
 1652             pmap->pm_stats.resident_count));
 1653 }
 1654 
 1655 /*
 1656  * Insert the given physical page at the specified virtual address in the
 1657  * target physical map with the protection requested. If specified the page
 1658  * will be wired down.
 1659  */
 1660 static int
 1661 mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
 1662     vm_prot_t prot, u_int flags, int8_t psind)
 1663 {
 1664         int error;
 1665 
 1666         rw_wlock(&pvh_global_lock);
 1667         PMAP_LOCK(pmap);
 1668         error = mmu_booke_enter_locked(mmu, pmap, va, m, prot, flags, psind);
 1669         rw_wunlock(&pvh_global_lock);
 1670         PMAP_UNLOCK(pmap);
 1671         return (error);
 1672 }
 1673 
 1674 static int
 1675 mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
 1676     vm_prot_t prot, u_int pmap_flags, int8_t psind __unused)
 1677 {
 1678         pte_t *pte;
 1679         vm_paddr_t pa;
 1680         uint32_t flags;
 1681         int error, su, sync;
 1682 
 1683         pa = VM_PAGE_TO_PHYS(m);
 1684         su = (pmap == kernel_pmap);
 1685         sync = 0;
 1686 
 1687         //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x "
 1688         //              "pa=0x%08x prot=0x%08x flags=%#x)\n",
 1689         //              (u_int32_t)pmap, su, pmap->pm_tid,
 1690         //              (u_int32_t)m, va, pa, prot, flags);
 1691 
 1692         if (su) {
 1693                 KASSERT(((va >= virtual_avail) &&
 1694                     (va <= VM_MAX_KERNEL_ADDRESS)),
 1695                     ("mmu_booke_enter_locked: kernel pmap, non kernel va"));
 1696         } else {
 1697                 KASSERT((va <= VM_MAXUSER_ADDRESS),
 1698                     ("mmu_booke_enter_locked: user pmap, non user va"));
 1699         }
 1700         if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 1701                 VM_OBJECT_ASSERT_LOCKED(m->object);
 1702 
 1703         PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 1704 
 1705         /*
 1706          * If there is an existing mapping, and the physical address has not
 1707          * changed, must be protection or wiring change.
 1708          */
 1709         if (((pte = pte_find(mmu, pmap, va)) != NULL) &&
 1710             (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) {
 1711             
 1712                 /*
 1713                  * Before actually updating pte->flags we calculate and
 1714                  * prepare its new value in a helper var.
 1715                  */
 1716                 flags = *pte;
 1717                 flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED);
 1718 
 1719                 /* Wiring change, just update stats. */
 1720                 if ((pmap_flags & PMAP_ENTER_WIRED) != 0) {
 1721                         if (!PTE_ISWIRED(pte)) {
 1722                                 flags |= PTE_WIRED;
 1723                                 pmap->pm_stats.wired_count++;
 1724                         }
 1725                 } else {
 1726                         if (PTE_ISWIRED(pte)) {
 1727                                 flags &= ~PTE_WIRED;
 1728                                 pmap->pm_stats.wired_count--;
 1729                         }
 1730                 }
 1731 
 1732                 if (prot & VM_PROT_WRITE) {
 1733                         /* Add write permissions. */
 1734                         flags |= PTE_SW;
 1735                         if (!su)
 1736                                 flags |= PTE_UW;
 1737 
 1738                         if ((flags & PTE_MANAGED) != 0)
 1739                                 vm_page_aflag_set(m, PGA_WRITEABLE);
 1740                 } else {
 1741                         /* Handle modified pages, sense modify status. */
 1742 
 1743                         /*
 1744                          * The PTE_MODIFIED flag could be set by underlying
 1745                          * TLB misses since we last read it (above), possibly
 1746                          * other CPUs could update it so we check in the PTE
 1747                          * directly rather than rely on that saved local flags
 1748                          * copy.
 1749                          */
 1750                         if (PTE_ISMODIFIED(pte))
 1751                                 vm_page_dirty(m);
 1752                 }
 1753 
 1754                 if (prot & VM_PROT_EXECUTE) {
 1755                         flags |= PTE_SX;
 1756                         if (!su)
 1757                                 flags |= PTE_UX;
 1758 
 1759                         /*
 1760                          * Check existing flags for execute permissions: if we
 1761                          * are turning execute permissions on, icache should
 1762                          * be flushed.
 1763                          */
 1764                         if ((*pte & (PTE_UX | PTE_SX)) == 0)
 1765                                 sync++;
 1766                 }
 1767 
 1768                 flags &= ~PTE_REFERENCED;
 1769 
 1770                 /*
 1771                  * The new flags value is all calculated -- only now actually
 1772                  * update the PTE.
 1773                  */
 1774                 mtx_lock_spin(&tlbivax_mutex);
 1775                 tlb_miss_lock();
 1776 
 1777                 tlb0_flush_entry(va);
 1778                 *pte &= ~PTE_FLAGS_MASK;
 1779                 *pte |= flags;
 1780 
 1781                 tlb_miss_unlock();
 1782                 mtx_unlock_spin(&tlbivax_mutex);
 1783 
 1784         } else {
 1785                 /*
 1786                  * If there is an existing mapping, but it's for a different
 1787                  * physical address, pte_enter() will delete the old mapping.
 1788                  */
 1789                 //if ((pte != NULL) && PTE_ISVALID(pte))
 1790                 //      debugf("mmu_booke_enter_locked: replace\n");
 1791                 //else
 1792                 //      debugf("mmu_booke_enter_locked: new\n");
 1793 
 1794                 /* Now set up the flags and install the new mapping. */
 1795                 flags = (PTE_SR | PTE_VALID);
 1796                 flags |= PTE_M;
 1797 
 1798                 if (!su)
 1799                         flags |= PTE_UR;
 1800 
 1801                 if (prot & VM_PROT_WRITE) {
 1802                         flags |= PTE_SW;
 1803                         if (!su)
 1804                                 flags |= PTE_UW;
 1805 
 1806                         if ((m->oflags & VPO_UNMANAGED) == 0)
 1807                                 vm_page_aflag_set(m, PGA_WRITEABLE);
 1808                 }
 1809 
 1810                 if (prot & VM_PROT_EXECUTE) {
 1811                         flags |= PTE_SX;
 1812                         if (!su)
 1813                                 flags |= PTE_UX;
 1814                 }
 1815 
 1816                 /* If its wired update stats. */
 1817                 if ((pmap_flags & PMAP_ENTER_WIRED) != 0)
 1818                         flags |= PTE_WIRED;
 1819 
 1820                 error = pte_enter(mmu, pmap, m, va, flags,
 1821                     (pmap_flags & PMAP_ENTER_NOSLEEP) != 0);
 1822                 if (error != 0)
 1823                         return (KERN_RESOURCE_SHORTAGE);
 1824 
 1825                 if ((flags & PMAP_ENTER_WIRED) != 0)
 1826                         pmap->pm_stats.wired_count++;
 1827 
 1828                 /* Flush the real memory from the instruction cache. */
 1829                 if (prot & VM_PROT_EXECUTE)
 1830                         sync++;
 1831         }
 1832 
 1833         if (sync && (su || pmap == PCPU_GET(curpmap))) {
 1834                 __syncicache((void *)va, PAGE_SIZE);
 1835                 sync = 0;
 1836         }
 1837 
 1838         return (KERN_SUCCESS);
 1839 }
 1840 
 1841 /*
 1842  * Maps a sequence of resident pages belonging to the same object.
 1843  * The sequence begins with the given page m_start.  This page is
 1844  * mapped at the given virtual address start.  Each subsequent page is
 1845  * mapped at a virtual address that is offset from start by the same
 1846  * amount as the page is offset from m_start within the object.  The
 1847  * last page in the sequence is the page with the largest offset from
 1848  * m_start that can be mapped at a virtual address less than the given
 1849  * virtual address end.  Not every virtual page between start and end
 1850  * is mapped; only those for which a resident page exists with the
 1851  * corresponding offset from m_start are mapped.
 1852  */
 1853 static void
 1854 mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start,
 1855     vm_offset_t end, vm_page_t m_start, vm_prot_t prot)
 1856 {
 1857         vm_page_t m;
 1858         vm_pindex_t diff, psize;
 1859 
 1860         VM_OBJECT_ASSERT_LOCKED(m_start->object);
 1861 
 1862         psize = atop(end - start);
 1863         m = m_start;
 1864         rw_wlock(&pvh_global_lock);
 1865         PMAP_LOCK(pmap);
 1866         while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 1867                 mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m,
 1868                     prot & (VM_PROT_READ | VM_PROT_EXECUTE),
 1869                     PMAP_ENTER_NOSLEEP, 0);
 1870                 m = TAILQ_NEXT(m, listq);
 1871         }
 1872         rw_wunlock(&pvh_global_lock);
 1873         PMAP_UNLOCK(pmap);
 1874 }
 1875 
 1876 static void
 1877 mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
 1878     vm_prot_t prot)
 1879 {
 1880 
 1881         rw_wlock(&pvh_global_lock);
 1882         PMAP_LOCK(pmap);
 1883         mmu_booke_enter_locked(mmu, pmap, va, m,
 1884             prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP,
 1885             0);
 1886         rw_wunlock(&pvh_global_lock);
 1887         PMAP_UNLOCK(pmap);
 1888 }
 1889 
 1890 /*
 1891  * Remove the given range of addresses from the specified map.
 1892  *
 1893  * It is assumed that the start and end are properly rounded to the page size.
 1894  */
 1895 static void
 1896 mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva)
 1897 {
 1898         pte_t *pte;
 1899         uint8_t hold_flag;
 1900 
 1901         int su = (pmap == kernel_pmap);
 1902 
 1903         //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n",
 1904         //              su, (u_int32_t)pmap, pmap->pm_tid, va, endva);
 1905 
 1906         if (su) {
 1907                 KASSERT(((va >= virtual_avail) &&
 1908                     (va <= VM_MAX_KERNEL_ADDRESS)),
 1909                     ("mmu_booke_remove: kernel pmap, non kernel va"));
 1910         } else {
 1911                 KASSERT((va <= VM_MAXUSER_ADDRESS),
 1912                     ("mmu_booke_remove: user pmap, non user va"));
 1913         }
 1914 
 1915         if (PMAP_REMOVE_DONE(pmap)) {
 1916                 //debugf("mmu_booke_remove: e (empty)\n");
 1917                 return;
 1918         }
 1919 
 1920         hold_flag = PTBL_HOLD_FLAG(pmap);
 1921         //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag);
 1922 
 1923         rw_wlock(&pvh_global_lock);
 1924         PMAP_LOCK(pmap);
 1925         for (; va < endva; va += PAGE_SIZE) {
 1926                 pte = pte_find(mmu, pmap, va);
 1927                 if ((pte != NULL) && PTE_ISVALID(pte))
 1928                         pte_remove(mmu, pmap, va, hold_flag);
 1929         }
 1930         PMAP_UNLOCK(pmap);
 1931         rw_wunlock(&pvh_global_lock);
 1932 
 1933         //debugf("mmu_booke_remove: e\n");
 1934 }
 1935 
 1936 /*
 1937  * Remove physical page from all pmaps in which it resides.
 1938  */
 1939 static void
 1940 mmu_booke_remove_all(mmu_t mmu, vm_page_t m)
 1941 {
 1942         pv_entry_t pv, pvn;
 1943         uint8_t hold_flag;
 1944 
 1945         rw_wlock(&pvh_global_lock);
 1946         for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) {
 1947                 pvn = TAILQ_NEXT(pv, pv_link);
 1948 
 1949                 PMAP_LOCK(pv->pv_pmap);
 1950                 hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap);
 1951                 pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag);
 1952                 PMAP_UNLOCK(pv->pv_pmap);
 1953         }
 1954         vm_page_aflag_clear(m, PGA_WRITEABLE);
 1955         rw_wunlock(&pvh_global_lock);
 1956 }
 1957 
 1958 /*
 1959  * Map a range of physical addresses into kernel virtual address space.
 1960  */
 1961 static vm_offset_t
 1962 mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_paddr_t pa_start,
 1963     vm_paddr_t pa_end, int prot)
 1964 {
 1965         vm_offset_t sva = *virt;
 1966         vm_offset_t va = sva;
 1967 
 1968         //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n",
 1969         //              sva, pa_start, pa_end);
 1970 
 1971         while (pa_start < pa_end) {
 1972                 mmu_booke_kenter(mmu, va, pa_start);
 1973                 va += PAGE_SIZE;
 1974                 pa_start += PAGE_SIZE;
 1975         }
 1976         *virt = va;
 1977 
 1978         //debugf("mmu_booke_map: e (va = 0x%08x)\n", va);
 1979         return (sva);
 1980 }
 1981 
 1982 /*
 1983  * The pmap must be activated before it's address space can be accessed in any
 1984  * way.
 1985  */
 1986 static void
 1987 mmu_booke_activate(mmu_t mmu, struct thread *td)
 1988 {
 1989         pmap_t pmap;
 1990         u_int cpuid;
 1991 
 1992         pmap = &td->td_proc->p_vmspace->vm_pmap;
 1993 
 1994         CTR5(KTR_PMAP, "%s: s (td = %p, proc = '%s', id = %d, pmap = 0x%08x)",
 1995             __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
 1996 
 1997         KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!"));
 1998 
 1999         sched_pin();
 2000 
 2001         cpuid = PCPU_GET(cpuid);
 2002         CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
 2003         PCPU_SET(curpmap, pmap);
 2004         
 2005         if (pmap->pm_tid[cpuid] == TID_NONE)
 2006                 tid_alloc(pmap);
 2007 
 2008         /* Load PID0 register with pmap tid value. */
 2009         mtspr(SPR_PID0, pmap->pm_tid[cpuid]);
 2010         __asm __volatile("isync");
 2011 
 2012         mtspr(SPR_DBCR0, td->td_pcb->pcb_cpu.booke.dbcr0);
 2013 
 2014         sched_unpin();
 2015 
 2016         CTR3(KTR_PMAP, "%s: e (tid = %d for '%s')", __func__,
 2017             pmap->pm_tid[PCPU_GET(cpuid)], td->td_proc->p_comm);
 2018 }
 2019 
 2020 /*
 2021  * Deactivate the specified process's address space.
 2022  */
 2023 static void
 2024 mmu_booke_deactivate(mmu_t mmu, struct thread *td)
 2025 {
 2026         pmap_t pmap;
 2027 
 2028         pmap = &td->td_proc->p_vmspace->vm_pmap;
 2029         
 2030         CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x",
 2031             __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
 2032 
 2033         td->td_pcb->pcb_cpu.booke.dbcr0 = mfspr(SPR_DBCR0);
 2034 
 2035         CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmap->pm_active);
 2036         PCPU_SET(curpmap, NULL);
 2037 }
 2038 
 2039 /*
 2040  * Copy the range specified by src_addr/len
 2041  * from the source map to the range dst_addr/len
 2042  * in the destination map.
 2043  *
 2044  * This routine is only advisory and need not do anything.
 2045  */
 2046 static void
 2047 mmu_booke_copy(mmu_t mmu, pmap_t dst_pmap, pmap_t src_pmap,
 2048     vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr)
 2049 {
 2050 
 2051 }
 2052 
 2053 /*
 2054  * Set the physical protection on the specified range of this map as requested.
 2055  */
 2056 static void
 2057 mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
 2058     vm_prot_t prot)
 2059 {
 2060         vm_offset_t va;
 2061         vm_page_t m;
 2062         pte_t *pte;
 2063 
 2064         if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 2065                 mmu_booke_remove(mmu, pmap, sva, eva);
 2066                 return;
 2067         }
 2068 
 2069         if (prot & VM_PROT_WRITE)
 2070                 return;
 2071 
 2072         PMAP_LOCK(pmap);
 2073         for (va = sva; va < eva; va += PAGE_SIZE) {
 2074                 if ((pte = pte_find(mmu, pmap, va)) != NULL) {
 2075                         if (PTE_ISVALID(pte)) {
 2076                                 m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 2077 
 2078                                 mtx_lock_spin(&tlbivax_mutex);
 2079                                 tlb_miss_lock();
 2080 
 2081                                 /* Handle modified pages. */
 2082                                 if (PTE_ISMODIFIED(pte) && PTE_ISMANAGED(pte))
 2083                                         vm_page_dirty(m);
 2084 
 2085                                 tlb0_flush_entry(va);
 2086                                 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
 2087 
 2088                                 tlb_miss_unlock();
 2089                                 mtx_unlock_spin(&tlbivax_mutex);
 2090                         }
 2091                 }
 2092         }
 2093         PMAP_UNLOCK(pmap);
 2094 }
 2095 
 2096 /*
 2097  * Clear the write and modified bits in each of the given page's mappings.
 2098  */
 2099 static void
 2100 mmu_booke_remove_write(mmu_t mmu, vm_page_t m)
 2101 {
 2102         pv_entry_t pv;
 2103         pte_t *pte;
 2104 
 2105         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2106             ("mmu_booke_remove_write: page %p is not managed", m));
 2107 
 2108         /*
 2109          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 2110          * set by another thread while the object is locked.  Thus,
 2111          * if PGA_WRITEABLE is clear, no page table entries need updating.
 2112          */
 2113         VM_OBJECT_ASSERT_WLOCKED(m->object);
 2114         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 2115                 return;
 2116         rw_wlock(&pvh_global_lock);
 2117         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2118                 PMAP_LOCK(pv->pv_pmap);
 2119                 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) {
 2120                         if (PTE_ISVALID(pte)) {
 2121                                 m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 2122 
 2123                                 mtx_lock_spin(&tlbivax_mutex);
 2124                                 tlb_miss_lock();
 2125 
 2126                                 /* Handle modified pages. */
 2127                                 if (PTE_ISMODIFIED(pte))
 2128                                         vm_page_dirty(m);
 2129 
 2130                                 /* Flush mapping from TLB0. */
 2131                                 *pte &= ~(PTE_UW | PTE_SW | PTE_MODIFIED);
 2132 
 2133                                 tlb_miss_unlock();
 2134                                 mtx_unlock_spin(&tlbivax_mutex);
 2135                         }
 2136                 }
 2137                 PMAP_UNLOCK(pv->pv_pmap);
 2138         }
 2139         vm_page_aflag_clear(m, PGA_WRITEABLE);
 2140         rw_wunlock(&pvh_global_lock);
 2141 }
 2142 
 2143 static void
 2144 mmu_booke_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz)
 2145 {
 2146         pte_t *pte;
 2147         pmap_t pmap;
 2148         vm_page_t m;
 2149         vm_offset_t addr;
 2150         vm_paddr_t pa = 0;
 2151         int active, valid;
 2152  
 2153         va = trunc_page(va);
 2154         sz = round_page(sz);
 2155 
 2156         rw_wlock(&pvh_global_lock);
 2157         pmap = PCPU_GET(curpmap);
 2158         active = (pm == kernel_pmap || pm == pmap) ? 1 : 0;
 2159         while (sz > 0) {
 2160                 PMAP_LOCK(pm);
 2161                 pte = pte_find(mmu, pm, va);
 2162                 valid = (pte != NULL && PTE_ISVALID(pte)) ? 1 : 0;
 2163                 if (valid)
 2164                         pa = PTE_PA(pte);
 2165                 PMAP_UNLOCK(pm);
 2166                 if (valid) {
 2167                         if (!active) {
 2168                                 /* Create a mapping in the active pmap. */
 2169                                 addr = 0;
 2170                                 m = PHYS_TO_VM_PAGE(pa);
 2171                                 PMAP_LOCK(pmap);
 2172                                 pte_enter(mmu, pmap, m, addr,
 2173                                     PTE_SR | PTE_VALID | PTE_UR, FALSE);
 2174                                 __syncicache((void *)addr, PAGE_SIZE);
 2175                                 pte_remove(mmu, pmap, addr, PTBL_UNHOLD);
 2176                                 PMAP_UNLOCK(pmap);
 2177                         } else
 2178                                 __syncicache((void *)va, PAGE_SIZE);
 2179                 }
 2180                 va += PAGE_SIZE;
 2181                 sz -= PAGE_SIZE;
 2182         }
 2183         rw_wunlock(&pvh_global_lock);
 2184 }
 2185 
 2186 /*
 2187  * Atomically extract and hold the physical page with the given
 2188  * pmap and virtual address pair if that mapping permits the given
 2189  * protection.
 2190  */
 2191 static vm_page_t
 2192 mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va,
 2193     vm_prot_t prot)
 2194 {
 2195         pte_t *pte;
 2196         vm_page_t m;
 2197         uint32_t pte_wbit;
 2198         vm_paddr_t pa;
 2199         
 2200         m = NULL;
 2201         pa = 0; 
 2202         PMAP_LOCK(pmap);
 2203 retry:
 2204         pte = pte_find(mmu, pmap, va);
 2205         if ((pte != NULL) && PTE_ISVALID(pte)) {
 2206                 if (pmap == kernel_pmap)
 2207                         pte_wbit = PTE_SW;
 2208                 else
 2209                         pte_wbit = PTE_UW;
 2210 
 2211                 if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) {
 2212                         if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa))
 2213                                 goto retry;
 2214                         m = PHYS_TO_VM_PAGE(PTE_PA(pte));
 2215                         vm_page_hold(m);
 2216                 }
 2217         }
 2218 
 2219         PA_UNLOCK_COND(pa);
 2220         PMAP_UNLOCK(pmap);
 2221         return (m);
 2222 }
 2223 
 2224 /*
 2225  * Initialize a vm_page's machine-dependent fields.
 2226  */
 2227 static void
 2228 mmu_booke_page_init(mmu_t mmu, vm_page_t m)
 2229 {
 2230 
 2231         TAILQ_INIT(&m->md.pv_list);
 2232 }
 2233 
 2234 /*
 2235  * mmu_booke_zero_page_area zeros the specified hardware page by
 2236  * mapping it into virtual memory and using bzero to clear
 2237  * its contents.
 2238  *
 2239  * off and size must reside within a single page.
 2240  */
 2241 static void
 2242 mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size)
 2243 {
 2244         vm_offset_t va;
 2245 
 2246         /* XXX KASSERT off and size are within a single page? */
 2247 
 2248         mtx_lock(&zero_page_mutex);
 2249         va = zero_page_va;
 2250 
 2251         mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
 2252         bzero((caddr_t)va + off, size);
 2253         mmu_booke_kremove(mmu, va);
 2254 
 2255         mtx_unlock(&zero_page_mutex);
 2256 }
 2257 
 2258 /*
 2259  * mmu_booke_zero_page zeros the specified hardware page.
 2260  */
 2261 static void
 2262 mmu_booke_zero_page(mmu_t mmu, vm_page_t m)
 2263 {
 2264         vm_offset_t off, va;
 2265 
 2266         mtx_lock(&zero_page_mutex);
 2267         va = zero_page_va;
 2268 
 2269         mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
 2270         for (off = 0; off < PAGE_SIZE; off += cacheline_size)
 2271                 __asm __volatile("dcbz 0,%0" :: "r"(va + off));
 2272         mmu_booke_kremove(mmu, va);
 2273 
 2274         mtx_unlock(&zero_page_mutex);
 2275 }
 2276 
 2277 /*
 2278  * mmu_booke_copy_page copies the specified (machine independent) page by
 2279  * mapping the page into virtual memory and using memcopy to copy the page,
 2280  * one machine dependent page at a time.
 2281  */
 2282 static void
 2283 mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm)
 2284 {
 2285         vm_offset_t sva, dva;
 2286 
 2287         sva = copy_page_src_va;
 2288         dva = copy_page_dst_va;
 2289 
 2290         mtx_lock(&copy_page_mutex);
 2291         mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm));
 2292         mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm));
 2293         memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE);
 2294         mmu_booke_kremove(mmu, dva);
 2295         mmu_booke_kremove(mmu, sva);
 2296         mtx_unlock(&copy_page_mutex);
 2297 }
 2298 
 2299 static inline void
 2300 mmu_booke_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset,
 2301     vm_page_t *mb, vm_offset_t b_offset, int xfersize)
 2302 {
 2303         void *a_cp, *b_cp;
 2304         vm_offset_t a_pg_offset, b_pg_offset;
 2305         int cnt;
 2306 
 2307         mtx_lock(&copy_page_mutex);
 2308         while (xfersize > 0) {
 2309                 a_pg_offset = a_offset & PAGE_MASK;
 2310                 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 2311                 mmu_booke_kenter(mmu, copy_page_src_va,
 2312                     VM_PAGE_TO_PHYS(ma[a_offset >> PAGE_SHIFT]));
 2313                 a_cp = (char *)copy_page_src_va + a_pg_offset;
 2314                 b_pg_offset = b_offset & PAGE_MASK;
 2315                 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 2316                 mmu_booke_kenter(mmu, copy_page_dst_va,
 2317                     VM_PAGE_TO_PHYS(mb[b_offset >> PAGE_SHIFT]));
 2318                 b_cp = (char *)copy_page_dst_va + b_pg_offset;
 2319                 bcopy(a_cp, b_cp, cnt);
 2320                 mmu_booke_kremove(mmu, copy_page_dst_va);
 2321                 mmu_booke_kremove(mmu, copy_page_src_va);
 2322                 a_offset += cnt;
 2323                 b_offset += cnt;
 2324                 xfersize -= cnt;
 2325         }
 2326         mtx_unlock(&copy_page_mutex);
 2327 }
 2328 
 2329 /*
 2330  * mmu_booke_zero_page_idle zeros the specified hardware page by mapping it
 2331  * into virtual memory and using bzero to clear its contents. This is intended
 2332  * to be called from the vm_pagezero process only and outside of Giant. No
 2333  * lock is required.
 2334  */
 2335 static void
 2336 mmu_booke_zero_page_idle(mmu_t mmu, vm_page_t m)
 2337 {
 2338         vm_offset_t va;
 2339 
 2340         va = zero_page_idle_va;
 2341         mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m));
 2342         bzero((caddr_t)va, PAGE_SIZE);
 2343         mmu_booke_kremove(mmu, va);
 2344 }
 2345 
 2346 static vm_offset_t
 2347 mmu_booke_quick_enter_page(mmu_t mmu, vm_page_t m)
 2348 {
 2349         vm_paddr_t paddr;
 2350         vm_offset_t qaddr;
 2351         uint32_t flags;
 2352         pte_t *pte;
 2353 
 2354         paddr = VM_PAGE_TO_PHYS(m);
 2355 
 2356         flags = PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID;
 2357         flags |= tlb_calc_wimg(paddr, pmap_page_get_memattr(m)) << PTE_MAS2_SHIFT;
 2358         flags |= PTE_PS_4KB;
 2359 
 2360         critical_enter();
 2361         qaddr = PCPU_GET(qmap_addr);
 2362 
 2363         pte = pte_find(mmu, kernel_pmap, qaddr);
 2364 
 2365         KASSERT(*pte == 0, ("mmu_booke_quick_enter_page: PTE busy"));
 2366 
 2367         /* 
 2368          * XXX: tlbivax is broadcast to other cores, but qaddr should
 2369          * not be present in other TLBs.  Is there a better instruction
 2370          * sequence to use? Or just forget it & use mmu_booke_kenter()... 
 2371          */
 2372         __asm __volatile("tlbivax 0, %0" :: "r"(qaddr & MAS2_EPN_MASK));
 2373         __asm __volatile("isync; msync");
 2374 
 2375         *pte = PTE_RPN_FROM_PA(paddr) | flags;
 2376 
 2377         /* Flush the real memory from the instruction cache. */
 2378         if ((flags & (PTE_I | PTE_G)) == 0)
 2379                 __syncicache((void *)qaddr, PAGE_SIZE);
 2380 
 2381         return (qaddr);
 2382 }
 2383 
 2384 static void
 2385 mmu_booke_quick_remove_page(mmu_t mmu, vm_offset_t addr)
 2386 {
 2387         pte_t *pte;
 2388 
 2389         pte = pte_find(mmu, kernel_pmap, addr);
 2390 
 2391         KASSERT(PCPU_GET(qmap_addr) == addr,
 2392             ("mmu_booke_quick_remove_page: invalid address"));
 2393         KASSERT(*pte != 0,
 2394             ("mmu_booke_quick_remove_page: PTE not in use"));
 2395 
 2396         *pte = 0;
 2397         critical_exit();
 2398 }
 2399 
 2400 /*
 2401  * Return whether or not the specified physical page was modified
 2402  * in any of physical maps.
 2403  */
 2404 static boolean_t
 2405 mmu_booke_is_modified(mmu_t mmu, vm_page_t m)
 2406 {
 2407         pte_t *pte;
 2408         pv_entry_t pv;
 2409         boolean_t rv;
 2410 
 2411         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2412             ("mmu_booke_is_modified: page %p is not managed", m));
 2413         rv = FALSE;
 2414 
 2415         /*
 2416          * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 2417          * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 2418          * is clear, no PTEs can be modified.
 2419          */
 2420         VM_OBJECT_ASSERT_WLOCKED(m->object);
 2421         if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 2422                 return (rv);
 2423         rw_wlock(&pvh_global_lock);
 2424         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2425                 PMAP_LOCK(pv->pv_pmap);
 2426                 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 2427                     PTE_ISVALID(pte)) {
 2428                         if (PTE_ISMODIFIED(pte))
 2429                                 rv = TRUE;
 2430                 }
 2431                 PMAP_UNLOCK(pv->pv_pmap);
 2432                 if (rv)
 2433                         break;
 2434         }
 2435         rw_wunlock(&pvh_global_lock);
 2436         return (rv);
 2437 }
 2438 
 2439 /*
 2440  * Return whether or not the specified virtual address is eligible
 2441  * for prefault.
 2442  */
 2443 static boolean_t
 2444 mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr)
 2445 {
 2446 
 2447         return (FALSE);
 2448 }
 2449 
 2450 /*
 2451  * Return whether or not the specified physical page was referenced
 2452  * in any physical maps.
 2453  */
 2454 static boolean_t
 2455 mmu_booke_is_referenced(mmu_t mmu, vm_page_t m)
 2456 {
 2457         pte_t *pte;
 2458         pv_entry_t pv;
 2459         boolean_t rv;
 2460 
 2461         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2462             ("mmu_booke_is_referenced: page %p is not managed", m));
 2463         rv = FALSE;
 2464         rw_wlock(&pvh_global_lock);
 2465         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2466                 PMAP_LOCK(pv->pv_pmap);
 2467                 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 2468                     PTE_ISVALID(pte)) {
 2469                         if (PTE_ISREFERENCED(pte))
 2470                                 rv = TRUE;
 2471                 }
 2472                 PMAP_UNLOCK(pv->pv_pmap);
 2473                 if (rv)
 2474                         break;
 2475         }
 2476         rw_wunlock(&pvh_global_lock);
 2477         return (rv);
 2478 }
 2479 
 2480 /*
 2481  * Clear the modify bits on the specified physical page.
 2482  */
 2483 static void
 2484 mmu_booke_clear_modify(mmu_t mmu, vm_page_t m)
 2485 {
 2486         pte_t *pte;
 2487         pv_entry_t pv;
 2488 
 2489         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2490             ("mmu_booke_clear_modify: page %p is not managed", m));
 2491         VM_OBJECT_ASSERT_WLOCKED(m->object);
 2492         KASSERT(!vm_page_xbusied(m),
 2493             ("mmu_booke_clear_modify: page %p is exclusive busied", m));
 2494 
 2495         /*
 2496          * If the page is not PG_AWRITEABLE, then no PTEs can be modified.
 2497          * If the object containing the page is locked and the page is not
 2498          * exclusive busied, then PG_AWRITEABLE cannot be concurrently set.
 2499          */
 2500         if ((m->aflags & PGA_WRITEABLE) == 0)
 2501                 return;
 2502         rw_wlock(&pvh_global_lock);
 2503         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2504                 PMAP_LOCK(pv->pv_pmap);
 2505                 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 2506                     PTE_ISVALID(pte)) {
 2507                         mtx_lock_spin(&tlbivax_mutex);
 2508                         tlb_miss_lock();
 2509                         
 2510                         if (*pte & (PTE_SW | PTE_UW | PTE_MODIFIED)) {
 2511                                 tlb0_flush_entry(pv->pv_va);
 2512                                 *pte &= ~(PTE_SW | PTE_UW | PTE_MODIFIED |
 2513                                     PTE_REFERENCED);
 2514                         }
 2515 
 2516                         tlb_miss_unlock();
 2517                         mtx_unlock_spin(&tlbivax_mutex);
 2518                 }
 2519                 PMAP_UNLOCK(pv->pv_pmap);
 2520         }
 2521         rw_wunlock(&pvh_global_lock);
 2522 }
 2523 
 2524 /*
 2525  * Return a count of reference bits for a page, clearing those bits.
 2526  * It is not necessary for every reference bit to be cleared, but it
 2527  * is necessary that 0 only be returned when there are truly no
 2528  * reference bits set.
 2529  *
 2530  * XXX: The exact number of bits to check and clear is a matter that
 2531  * should be tested and standardized at some point in the future for
 2532  * optimal aging of shared pages.
 2533  */
 2534 static int
 2535 mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m)
 2536 {
 2537         pte_t *pte;
 2538         pv_entry_t pv;
 2539         int count;
 2540 
 2541         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2542             ("mmu_booke_ts_referenced: page %p is not managed", m));
 2543         count = 0;
 2544         rw_wlock(&pvh_global_lock);
 2545         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2546                 PMAP_LOCK(pv->pv_pmap);
 2547                 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL &&
 2548                     PTE_ISVALID(pte)) {
 2549                         if (PTE_ISREFERENCED(pte)) {
 2550                                 mtx_lock_spin(&tlbivax_mutex);
 2551                                 tlb_miss_lock();
 2552 
 2553                                 tlb0_flush_entry(pv->pv_va);
 2554                                 *pte &= ~PTE_REFERENCED;
 2555 
 2556                                 tlb_miss_unlock();
 2557                                 mtx_unlock_spin(&tlbivax_mutex);
 2558 
 2559                                 if (++count > 4) {
 2560                                         PMAP_UNLOCK(pv->pv_pmap);
 2561                                         break;
 2562                                 }
 2563                         }
 2564                 }
 2565                 PMAP_UNLOCK(pv->pv_pmap);
 2566         }
 2567         rw_wunlock(&pvh_global_lock);
 2568         return (count);
 2569 }
 2570 
 2571 /*
 2572  * Clear the wired attribute from the mappings for the specified range of
 2573  * addresses in the given pmap.  Every valid mapping within that range must
 2574  * have the wired attribute set.  In contrast, invalid mappings cannot have
 2575  * the wired attribute set, so they are ignored.
 2576  *
 2577  * The wired attribute of the page table entry is not a hardware feature, so
 2578  * there is no need to invalidate any TLB entries.
 2579  */
 2580 static void
 2581 mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 2582 {
 2583         vm_offset_t va;
 2584         pte_t *pte;
 2585 
 2586         PMAP_LOCK(pmap);
 2587         for (va = sva; va < eva; va += PAGE_SIZE) {
 2588                 if ((pte = pte_find(mmu, pmap, va)) != NULL &&
 2589                     PTE_ISVALID(pte)) {
 2590                         if (!PTE_ISWIRED(pte))
 2591                                 panic("mmu_booke_unwire: pte %p isn't wired",
 2592                                     pte);
 2593                         *pte &= ~PTE_WIRED;
 2594                         pmap->pm_stats.wired_count--;
 2595                 }
 2596         }
 2597         PMAP_UNLOCK(pmap);
 2598 
 2599 }
 2600 
 2601 /*
 2602  * Return true if the pmap's pv is one of the first 16 pvs linked to from this
 2603  * page.  This count may be changed upwards or downwards in the future; it is
 2604  * only necessary that true be returned for a small subset of pmaps for proper
 2605  * page aging.
 2606  */
 2607 static boolean_t
 2608 mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m)
 2609 {
 2610         pv_entry_t pv;
 2611         int loops;
 2612         boolean_t rv;
 2613 
 2614         KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 2615             ("mmu_booke_page_exists_quick: page %p is not managed", m));
 2616         loops = 0;
 2617         rv = FALSE;
 2618         rw_wlock(&pvh_global_lock);
 2619         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2620                 if (pv->pv_pmap == pmap) {
 2621                         rv = TRUE;
 2622                         break;
 2623                 }
 2624                 if (++loops >= 16)
 2625                         break;
 2626         }
 2627         rw_wunlock(&pvh_global_lock);
 2628         return (rv);
 2629 }
 2630 
 2631 /*
 2632  * Return the number of managed mappings to the given physical page that are
 2633  * wired.
 2634  */
 2635 static int
 2636 mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m)
 2637 {
 2638         pv_entry_t pv;
 2639         pte_t *pte;
 2640         int count = 0;
 2641 
 2642         if ((m->oflags & VPO_UNMANAGED) != 0)
 2643                 return (count);
 2644         rw_wlock(&pvh_global_lock);
 2645         TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) {
 2646                 PMAP_LOCK(pv->pv_pmap);
 2647                 if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL)
 2648                         if (PTE_ISVALID(pte) && PTE_ISWIRED(pte))
 2649                                 count++;
 2650                 PMAP_UNLOCK(pv->pv_pmap);
 2651         }
 2652         rw_wunlock(&pvh_global_lock);
 2653         return (count);
 2654 }
 2655 
 2656 static int
 2657 mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 2658 {
 2659         int i;
 2660         vm_offset_t va;
 2661 
 2662         /*
 2663          * This currently does not work for entries that
 2664          * overlap TLB1 entries.
 2665          */
 2666         for (i = 0; i < TLB1_ENTRIES; i ++) {
 2667                 if (tlb1_iomapped(i, pa, size, &va) == 0)
 2668                         return (0);
 2669         }
 2670 
 2671         return (EFAULT);
 2672 }
 2673 
 2674 void
 2675 mmu_booke_dumpsys_map(mmu_t mmu, vm_paddr_t pa, size_t sz, void **va)
 2676 {
 2677         vm_paddr_t ppa;
 2678         vm_offset_t ofs;
 2679         vm_size_t gran;
 2680 
 2681         /* Minidumps are based on virtual memory addresses. */
 2682         if (do_minidump) {
 2683                 *va = (void *)(vm_offset_t)pa;
 2684                 return;
 2685         }
 2686 
 2687         /* Raw physical memory dumps don't have a virtual address. */
 2688         /* We always map a 256MB page at 256M. */
 2689         gran = 256 * 1024 * 1024;
 2690         ppa = rounddown2(pa, gran);
 2691         ofs = pa - ppa;
 2692         *va = (void *)gran;
 2693         tlb1_set_entry((vm_offset_t)va, ppa, gran, _TLB_ENTRY_IO);
 2694 
 2695         if (sz > (gran - ofs))
 2696                 tlb1_set_entry((vm_offset_t)(va + gran), ppa + gran, gran,
 2697                     _TLB_ENTRY_IO);
 2698 }
 2699 
 2700 void
 2701 mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va)
 2702 {
 2703         vm_paddr_t ppa;
 2704         vm_offset_t ofs;
 2705         vm_size_t gran;
 2706         tlb_entry_t e;
 2707         int i;
 2708 
 2709         /* Minidumps are based on virtual memory addresses. */
 2710         /* Nothing to do... */
 2711         if (do_minidump)
 2712                 return;
 2713 
 2714         for (i = 0; i < TLB1_ENTRIES; i++) {
 2715                 tlb1_read_entry(&e, i);
 2716                 if (!(e.mas1 & MAS1_VALID))
 2717                         break;
 2718         }
 2719 
 2720         /* Raw physical memory dumps don't have a virtual address. */
 2721         i--;
 2722         e.mas1 = 0;
 2723         e.mas2 = 0;
 2724         e.mas3 = 0;
 2725         tlb1_write_entry(&e, i);
 2726 
 2727         gran = 256 * 1024 * 1024;
 2728         ppa = rounddown2(pa, gran);
 2729         ofs = pa - ppa;
 2730         if (sz > (gran - ofs)) {
 2731                 i--;
 2732                 e.mas1 = 0;
 2733                 e.mas2 = 0;
 2734                 e.mas3 = 0;
 2735                 tlb1_write_entry(&e, i);
 2736         }
 2737 }
 2738 
 2739 extern struct dump_pa dump_map[PHYS_AVAIL_SZ + 1];
 2740 
 2741 void
 2742 mmu_booke_scan_init(mmu_t mmu)
 2743 {
 2744         vm_offset_t va;
 2745         pte_t *pte;
 2746         int i;
 2747 
 2748         if (!do_minidump) {
 2749                 /* Initialize phys. segments for dumpsys(). */
 2750                 memset(&dump_map, 0, sizeof(dump_map));
 2751                 mem_regions(&physmem_regions, &physmem_regions_sz, &availmem_regions,
 2752                     &availmem_regions_sz);
 2753                 for (i = 0; i < physmem_regions_sz; i++) {
 2754                         dump_map[i].pa_start = physmem_regions[i].mr_start;
 2755                         dump_map[i].pa_size = physmem_regions[i].mr_size;
 2756                 }
 2757                 return;
 2758         }
 2759 
 2760         /* Virtual segments for minidumps: */
 2761         memset(&dump_map, 0, sizeof(dump_map));
 2762 
 2763         /* 1st: kernel .data and .bss. */
 2764         dump_map[0].pa_start = trunc_page((uintptr_t)_etext);
 2765         dump_map[0].pa_size =
 2766             round_page((uintptr_t)_end) - dump_map[0].pa_start;
 2767 
 2768         /* 2nd: msgbuf and tables (see pmap_bootstrap()). */
 2769         dump_map[1].pa_start = data_start;
 2770         dump_map[1].pa_size = data_end - data_start;
 2771 
 2772         /* 3rd: kernel VM. */
 2773         va = dump_map[1].pa_start + dump_map[1].pa_size;
 2774         /* Find start of next chunk (from va). */
 2775         while (va < virtual_end) {
 2776                 /* Don't dump the buffer cache. */
 2777                 if (va >= kmi.buffer_sva && va < kmi.buffer_eva) {
 2778                         va = kmi.buffer_eva;
 2779                         continue;
 2780                 }
 2781                 pte = pte_find(mmu, kernel_pmap, va);
 2782                 if (pte != NULL && PTE_ISVALID(pte))
 2783                         break;
 2784                 va += PAGE_SIZE;
 2785         }
 2786         if (va < virtual_end) {
 2787                 dump_map[2].pa_start = va;
 2788                 va += PAGE_SIZE;
 2789                 /* Find last page in chunk. */
 2790                 while (va < virtual_end) {
 2791                         /* Don't run into the buffer cache. */
 2792                         if (va == kmi.buffer_sva)
 2793                                 break;
 2794                         pte = pte_find(mmu, kernel_pmap, va);
 2795                         if (pte == NULL || !PTE_ISVALID(pte))
 2796                                 break;
 2797                         va += PAGE_SIZE;
 2798                 }
 2799                 dump_map[2].pa_size = va - dump_map[2].pa_start;
 2800         }
 2801 }
 2802 
 2803 /*
 2804  * Map a set of physical memory pages into the kernel virtual address space.
 2805  * Return a pointer to where it is mapped. This routine is intended to be used
 2806  * for mapping device memory, NOT real memory.
 2807  */
 2808 static void *
 2809 mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
 2810 {
 2811 
 2812         return (mmu_booke_mapdev_attr(mmu, pa, size, VM_MEMATTR_DEFAULT));
 2813 }
 2814 
 2815 static void *
 2816 mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma)
 2817 {
 2818         tlb_entry_t e;
 2819         void *res;
 2820         uintptr_t va, tmpva;
 2821         vm_size_t sz;
 2822         int i;
 2823 
 2824         /*
 2825          * Check if this is premapped in TLB1. Note: this should probably also
 2826          * check whether a sequence of TLB1 entries exist that match the
 2827          * requirement, but now only checks the easy case.
 2828          */
 2829         if (ma == VM_MEMATTR_DEFAULT) {
 2830                 for (i = 0; i < TLB1_ENTRIES; i++) {
 2831                         tlb1_read_entry(&e, i);
 2832                         if (!(e.mas1 & MAS1_VALID))
 2833                                 continue;
 2834                         if (pa >= e.phys &&
 2835                             (pa + size) <= (e.phys + e.size))
 2836                                 return (void *)(e.virt +
 2837                                     (vm_offset_t)(pa - e.phys));
 2838                 }
 2839         }
 2840 
 2841         size = roundup(size, PAGE_SIZE);
 2842 
 2843         /*
 2844          * The device mapping area is between VM_MAXUSER_ADDRESS and
 2845          * VM_MIN_KERNEL_ADDRESS.  This gives 1GB of device addressing.
 2846          */
 2847 #ifdef SPARSE_MAPDEV
 2848         /*
 2849          * With a sparse mapdev, align to the largest starting region.  This
 2850          * could feasibly be optimized for a 'best-fit' alignment, but that
 2851          * calculation could be very costly.
 2852          */
 2853         do {
 2854             tmpva = tlb1_map_base;
 2855             va = roundup(tlb1_map_base, 1 << flsl(size));
 2856         } while (!atomic_cmpset_int(&tlb1_map_base, tmpva, va + size));
 2857 #else
 2858         va = atomic_fetchadd_int(&tlb1_map_base, size);
 2859 #endif
 2860         res = (void *)va;
 2861 
 2862         do {
 2863                 sz = 1 << (ilog2(size) & ~1);
 2864                 if (va % sz != 0) {
 2865                         do {
 2866                                 sz >>= 2;
 2867                         } while (va % sz != 0);
 2868                 }
 2869                 if (bootverbose)
 2870                         printf("Wiring VA=%x to PA=%jx (size=%x)\n",
 2871                             va, (uintmax_t)pa, sz);
 2872                 tlb1_set_entry(va, pa, sz,
 2873                     _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma));
 2874                 size -= sz;
 2875                 pa += sz;
 2876                 va += sz;
 2877         } while (size > 0);
 2878 
 2879         return (res);
 2880 }
 2881 
 2882 /*
 2883  * 'Unmap' a range mapped by mmu_booke_mapdev().
 2884  */
 2885 static void
 2886 mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size)
 2887 {
 2888 #ifdef SUPPORTS_SHRINKING_TLB1
 2889         vm_offset_t base, offset;
 2890 
 2891         /*
 2892          * Unmap only if this is inside kernel virtual space.
 2893          */
 2894         if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) {
 2895                 base = trunc_page(va);
 2896                 offset = va & PAGE_MASK;
 2897                 size = roundup(offset + size, PAGE_SIZE);
 2898                 kva_free(base, size);
 2899         }
 2900 #endif
 2901 }
 2902 
 2903 /*
 2904  * mmu_booke_object_init_pt preloads the ptes for a given object into the
 2905  * specified pmap. This eliminates the blast of soft faults on process startup
 2906  * and immediately after an mmap.
 2907  */
 2908 static void
 2909 mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr,
 2910     vm_object_t object, vm_pindex_t pindex, vm_size_t size)
 2911 {
 2912 
 2913         VM_OBJECT_ASSERT_WLOCKED(object);
 2914         KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 2915             ("mmu_booke_object_init_pt: non-device object"));
 2916 }
 2917 
 2918 /*
 2919  * Perform the pmap work for mincore.
 2920  */
 2921 static int
 2922 mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr,
 2923     vm_paddr_t *locked_pa)
 2924 {
 2925 
 2926         /* XXX: this should be implemented at some point */
 2927         return (0);
 2928 }
 2929 
 2930 static int
 2931 mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz,
 2932     vm_memattr_t mode)
 2933 {
 2934         vm_offset_t va;
 2935         pte_t *pte;
 2936         int i, j;
 2937         tlb_entry_t e;
 2938 
 2939         /* Check TLB1 mappings */
 2940         for (i = 0; i < TLB1_ENTRIES; i++) {
 2941                 tlb1_read_entry(&e, i);
 2942                 if (!(e.mas1 & MAS1_VALID))
 2943                         continue;
 2944                 if (addr >= e.virt && addr < e.virt + e.size)
 2945                         break;
 2946         }
 2947         if (i < TLB1_ENTRIES) {
 2948                 /* Only allow full mappings to be modified for now. */
 2949                 /* Validate the range. */
 2950                 for (j = i, va = addr; va < addr + sz; va += e.size, j++) {
 2951                         tlb1_read_entry(&e, j);
 2952                         if (va != e.virt || (sz - (va - addr) < e.size))
 2953                                 return (EINVAL);
 2954                 }
 2955                 for (va = addr; va < addr + sz; va += e.size, i++) {
 2956                         tlb1_read_entry(&e, i);
 2957                         e.mas2 &= ~MAS2_WIMGE_MASK;
 2958                         e.mas2 |= tlb_calc_wimg(e.phys, mode);
 2959 
 2960                         /*
 2961                          * Write it out to the TLB.  Should really re-sync with other
 2962                          * cores.
 2963                          */
 2964                         tlb1_write_entry(&e, i);
 2965                 }
 2966                 return (0);
 2967         }
 2968 
 2969         /* Not in TLB1, try through pmap */
 2970         /* First validate the range. */
 2971         for (va = addr; va < addr + sz; va += PAGE_SIZE) {
 2972                 pte = pte_find(mmu, kernel_pmap, va);
 2973                 if (pte == NULL || !PTE_ISVALID(pte))
 2974                         return (EINVAL);
 2975         }
 2976 
 2977         mtx_lock_spin(&tlbivax_mutex);
 2978         tlb_miss_lock();
 2979         for (va = addr; va < addr + sz; va += PAGE_SIZE) {
 2980                 pte = pte_find(mmu, kernel_pmap, va);
 2981                 *pte &= ~(PTE_MAS2_MASK << PTE_MAS2_SHIFT);
 2982                 *pte |= tlb_calc_wimg(PTE_PA(pte), mode << PTE_MAS2_SHIFT);
 2983                 tlb0_flush_entry(va);
 2984         }
 2985         tlb_miss_unlock();
 2986         mtx_unlock_spin(&tlbivax_mutex);
 2987 
 2988         return (pte_vatopa(mmu, kernel_pmap, va));
 2989 }
 2990 
 2991 /**************************************************************************/
 2992 /* TID handling */
 2993 /**************************************************************************/
 2994 
 2995 /*
 2996  * Allocate a TID. If necessary, steal one from someone else.
 2997  * The new TID is flushed from the TLB before returning.
 2998  */
 2999 static tlbtid_t
 3000 tid_alloc(pmap_t pmap)
 3001 {
 3002         tlbtid_t tid;
 3003         int thiscpu;
 3004 
 3005         KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap"));
 3006 
 3007         CTR2(KTR_PMAP, "%s: s (pmap = %p)", __func__, pmap);
 3008 
 3009         thiscpu = PCPU_GET(cpuid);
 3010 
 3011         tid = PCPU_GET(tid_next);
 3012         if (tid > TID_MAX)
 3013                 tid = TID_MIN;
 3014         PCPU_SET(tid_next, tid + 1);
 3015 
 3016         /* If we are stealing TID then clear the relevant pmap's field */
 3017         if (tidbusy[thiscpu][tid] != NULL) {
 3018 
 3019                 CTR2(KTR_PMAP, "%s: warning: stealing tid %d", __func__, tid);
 3020                 
 3021                 tidbusy[thiscpu][tid]->pm_tid[thiscpu] = TID_NONE;
 3022 
 3023                 /* Flush all entries from TLB0 matching this TID. */
 3024                 tid_flush(tid);
 3025         }
 3026 
 3027         tidbusy[thiscpu][tid] = pmap;
 3028         pmap->pm_tid[thiscpu] = tid;
 3029         __asm __volatile("msync; isync");
 3030 
 3031         CTR3(KTR_PMAP, "%s: e (%02d next = %02d)", __func__, tid,
 3032             PCPU_GET(tid_next));
 3033 
 3034         return (tid);
 3035 }
 3036 
 3037 /**************************************************************************/
 3038 /* TLB0 handling */
 3039 /**************************************************************************/
 3040 
 3041 static void
 3042 tlb_print_entry(int i, uint32_t mas1, uint32_t mas2, uint32_t mas3,
 3043     uint32_t mas7)
 3044 {
 3045         int as;
 3046         char desc[3];
 3047         tlbtid_t tid;
 3048         vm_size_t size;
 3049         unsigned int tsize;
 3050 
 3051         desc[2] = '\0';
 3052         if (mas1 & MAS1_VALID)
 3053                 desc[0] = 'V';
 3054         else
 3055                 desc[0] = ' ';
 3056 
 3057         if (mas1 & MAS1_IPROT)
 3058                 desc[1] = 'P';
 3059         else
 3060                 desc[1] = ' ';
 3061 
 3062         as = (mas1 & MAS1_TS_MASK) ? 1 : 0;
 3063         tid = MAS1_GETTID(mas1);
 3064 
 3065         tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 3066         size = 0;
 3067         if (tsize)
 3068                 size = tsize2size(tsize);
 3069 
 3070         debugf("%3d: (%s) [AS=%d] "
 3071             "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x "
 3072             "mas2(va) = 0x%08x mas3(pa) = 0x%08x mas7 = 0x%08x\n",
 3073             i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7);
 3074 }
 3075 
 3076 /* Convert TLB0 va and way number to tlb0[] table index. */
 3077 static inline unsigned int
 3078 tlb0_tableidx(vm_offset_t va, unsigned int way)
 3079 {
 3080         unsigned int idx;
 3081 
 3082         idx = (way * TLB0_ENTRIES_PER_WAY);
 3083         idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT;
 3084         return (idx);
 3085 }
 3086 
 3087 /*
 3088  * Invalidate TLB0 entry.
 3089  */
 3090 static inline void
 3091 tlb0_flush_entry(vm_offset_t va)
 3092 {
 3093 
 3094         CTR2(KTR_PMAP, "%s: s va=0x%08x", __func__, va);
 3095 
 3096         mtx_assert(&tlbivax_mutex, MA_OWNED);
 3097 
 3098         __asm __volatile("tlbivax 0, %0" :: "r"(va & MAS2_EPN_MASK));
 3099         __asm __volatile("isync; msync");
 3100         __asm __volatile("tlbsync; msync");
 3101 
 3102         CTR1(KTR_PMAP, "%s: e", __func__);
 3103 }
 3104 
 3105 /* Print out contents of the MAS registers for each TLB0 entry */
 3106 void
 3107 tlb0_print_tlbentries(void)
 3108 {
 3109         uint32_t mas0, mas1, mas2, mas3, mas7;
 3110         int entryidx, way, idx;
 3111 
 3112         debugf("TLB0 entries:\n");
 3113         for (way = 0; way < TLB0_WAYS; way ++)
 3114                 for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) {
 3115 
 3116                         mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
 3117                         mtspr(SPR_MAS0, mas0);
 3118                         __asm __volatile("isync");
 3119 
 3120                         mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT;
 3121                         mtspr(SPR_MAS2, mas2);
 3122 
 3123                         __asm __volatile("isync; tlbre");
 3124 
 3125                         mas1 = mfspr(SPR_MAS1);
 3126                         mas2 = mfspr(SPR_MAS2);
 3127                         mas3 = mfspr(SPR_MAS3);
 3128                         mas7 = mfspr(SPR_MAS7);
 3129 
 3130                         idx = tlb0_tableidx(mas2, way);
 3131                         tlb_print_entry(idx, mas1, mas2, mas3, mas7);
 3132                 }
 3133 }
 3134 
 3135 /**************************************************************************/
 3136 /* TLB1 handling */
 3137 /**************************************************************************/
 3138 
 3139 /*
 3140  * TLB1 mapping notes:
 3141  *
 3142  * TLB1[0]      Kernel text and data.
 3143  * TLB1[1-15]   Additional kernel text and data mappings (if required), PCI
 3144  *              windows, other devices mappings.
 3145  */
 3146 
 3147  /*
 3148  * Read an entry from given TLB1 slot.
 3149  */
 3150 void
 3151 tlb1_read_entry(tlb_entry_t *entry, unsigned int slot)
 3152 {
 3153         uint32_t mas0;
 3154 
 3155         KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__));
 3156 
 3157         mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot);
 3158         mtspr(SPR_MAS0, mas0);
 3159         __asm __volatile("isync; tlbre");
 3160 
 3161         entry->mas1 = mfspr(SPR_MAS1);
 3162         entry->mas2 = mfspr(SPR_MAS2);
 3163         entry->mas3 = mfspr(SPR_MAS3);
 3164 
 3165         switch ((mfpvr() >> 16) & 0xFFFF) {
 3166         case FSL_E500v2:
 3167         case FSL_E500mc:
 3168         case FSL_E5500:
 3169                 entry->mas7 = mfspr(SPR_MAS7);
 3170                 break;
 3171         default:
 3172                 entry->mas7 = 0;
 3173                 break;
 3174         }
 3175 
 3176         entry->virt = entry->mas2 & MAS2_EPN_MASK;
 3177         entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) |
 3178             (entry->mas3 & MAS3_RPN);
 3179         entry->size =
 3180             tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT);
 3181 }
 3182 
 3183 /*
 3184  * Write given entry to TLB1 hardware.
 3185  * Use 32 bit pa, clear 4 high-order bits of RPN (mas7).
 3186  */
 3187 static void
 3188 tlb1_write_entry(tlb_entry_t *e, unsigned int idx)
 3189 {
 3190         uint32_t mas0;
 3191 
 3192         //debugf("tlb1_write_entry: s\n");
 3193 
 3194         /* Select entry */
 3195         mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx);
 3196         //debugf("tlb1_write_entry: mas0 = 0x%08x\n", mas0);
 3197 
 3198         mtspr(SPR_MAS0, mas0);
 3199         __asm __volatile("isync");
 3200         mtspr(SPR_MAS1, e->mas1);
 3201         __asm __volatile("isync");
 3202         mtspr(SPR_MAS2, e->mas2);
 3203         __asm __volatile("isync");
 3204         mtspr(SPR_MAS3, e->mas3);
 3205         __asm __volatile("isync");
 3206         switch ((mfpvr() >> 16) & 0xFFFF) {
 3207         case FSL_E500mc:
 3208         case FSL_E5500:
 3209                 mtspr(SPR_MAS8, 0);
 3210                 __asm __volatile("isync");
 3211                 /* FALLTHROUGH */
 3212         case FSL_E500v2:
 3213                 mtspr(SPR_MAS7, e->mas7);
 3214                 __asm __volatile("isync");
 3215                 break;
 3216         default:
 3217                 break;
 3218         }
 3219 
 3220         __asm __volatile("tlbwe; isync; msync");
 3221 
 3222         //debugf("tlb1_write_entry: e\n");
 3223 }
 3224 
 3225 /*
 3226  * Return the largest uint value log such that 2^log <= num.
 3227  */
 3228 static unsigned int
 3229 ilog2(unsigned int num)
 3230 {
 3231         int lz;
 3232 
 3233         __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num));
 3234         return (31 - lz);
 3235 }
 3236 
 3237 /*
 3238  * Convert TLB TSIZE value to mapped region size.
 3239  */
 3240 static vm_size_t
 3241 tsize2size(unsigned int tsize)
 3242 {
 3243 
 3244         /*
 3245          * size = 4^tsize KB
 3246          * size = 4^tsize * 2^10 = 2^(2 * tsize - 10)
 3247          */
 3248 
 3249         return ((1 << (2 * tsize)) * 1024);
 3250 }
 3251 
 3252 /*
 3253  * Convert region size (must be power of 4) to TLB TSIZE value.
 3254  */
 3255 static unsigned int
 3256 size2tsize(vm_size_t size)
 3257 {
 3258 
 3259         return (ilog2(size) / 2 - 5);
 3260 }
 3261 
 3262 /*
 3263  * Register permanent kernel mapping in TLB1.
 3264  *
 3265  * Entries are created starting from index 0 (current free entry is
 3266  * kept in tlb1_idx) and are not supposed to be invalidated.
 3267  */
 3268 int
 3269 tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size,
 3270     uint32_t flags)
 3271 {
 3272         tlb_entry_t e;
 3273         uint32_t ts, tid;
 3274         int tsize, index;
 3275 
 3276         for (index = 0; index < TLB1_ENTRIES; index++) {
 3277                 tlb1_read_entry(&e, index);
 3278                 if ((e.mas1 & MAS1_VALID) == 0)
 3279                         break;
 3280                 /* Check if we're just updating the flags, and update them. */
 3281                 if (e.phys == pa && e.virt == va && e.size == size) {
 3282                         e.mas2 = (va & MAS2_EPN_MASK) | flags;
 3283                         tlb1_write_entry(&e, index);
 3284                         return (0);
 3285                 }
 3286         }
 3287         if (index >= TLB1_ENTRIES) {
 3288                 printf("tlb1_set_entry: TLB1 full!\n");
 3289                 return (-1);
 3290         }
 3291 
 3292         /* Convert size to TSIZE */
 3293         tsize = size2tsize(size);
 3294 
 3295         tid = (TID_KERNEL << MAS1_TID_SHIFT) & MAS1_TID_MASK;
 3296         /* XXX TS is hard coded to 0 for now as we only use single address space */
 3297         ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK;
 3298 
 3299         e.phys = pa;
 3300         e.virt = va;
 3301         e.size = size;
 3302         e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid;
 3303         e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK);
 3304         e.mas2 = (va & MAS2_EPN_MASK) | flags;
 3305 
 3306         /* Set supervisor RWX permission bits */
 3307         e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX;
 3308         e.mas7 = (pa >> 32) & MAS7_RPN;
 3309 
 3310         tlb1_write_entry(&e, index);
 3311 
 3312         /*
 3313          * XXX in general TLB1 updates should be propagated between CPUs,
 3314          * since current design assumes to have the same TLB1 set-up on all
 3315          * cores.
 3316          */
 3317         return (0);
 3318 }
 3319 
 3320 /*
 3321  * Map in contiguous RAM region into the TLB1 using maximum of
 3322  * KERNEL_REGION_MAX_TLB_ENTRIES entries.
 3323  *
 3324  * If necessary round up last entry size and return total size
 3325  * used by all allocated entries.
 3326  */
 3327 vm_size_t
 3328 tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size)
 3329 {
 3330         vm_size_t pgs[KERNEL_REGION_MAX_TLB_ENTRIES];
 3331         vm_size_t mapped, pgsz, base, mask;
 3332         int idx, nents;
 3333 
 3334         /* Round up to the next 1M */
 3335         size = roundup2(size, 1 << 20);
 3336 
 3337         mapped = 0;
 3338         idx = 0;
 3339         base = va;
 3340         pgsz = 64*1024*1024;
 3341         while (mapped < size) {
 3342                 while (mapped < size && idx < KERNEL_REGION_MAX_TLB_ENTRIES) {
 3343                         while (pgsz > (size - mapped))
 3344                                 pgsz >>= 2;
 3345                         pgs[idx++] = pgsz;
 3346                         mapped += pgsz;
 3347                 }
 3348 
 3349                 /* We under-map. Correct for this. */
 3350                 if (mapped < size) {
 3351                         while (pgs[idx - 1] == pgsz) {
 3352                                 idx--;
 3353                                 mapped -= pgsz;
 3354                         }
 3355                         /* XXX We may increase beyond out starting point. */
 3356                         pgsz <<= 2;
 3357                         pgs[idx++] = pgsz;
 3358                         mapped += pgsz;
 3359                 }
 3360         }
 3361 
 3362         nents = idx;
 3363         mask = pgs[0] - 1;
 3364         /* Align address to the boundary */
 3365         if (va & mask) {
 3366                 va = (va + mask) & ~mask;
 3367                 pa = (pa + mask) & ~mask;
 3368         }
 3369 
 3370         for (idx = 0; idx < nents; idx++) {
 3371                 pgsz = pgs[idx];
 3372                 debugf("%u: %llx -> %x, size=%x\n", idx, pa, va, pgsz);
 3373                 tlb1_set_entry(va, pa, pgsz,
 3374                     _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM);
 3375                 pa += pgsz;
 3376                 va += pgsz;
 3377         }
 3378 
 3379         mapped = (va - base);
 3380 #ifdef __powerpc64__
 3381         printf("mapped size 0x%016lx (wasted space 0x%16lx)\n",
 3382 #else
 3383         printf("mapped size 0x%08x (wasted space 0x%08x)\n",
 3384 #endif
 3385             mapped, mapped - size);
 3386         return (mapped);
 3387 }
 3388 
 3389 /*
 3390  * TLB1 initialization routine, to be called after the very first
 3391  * assembler level setup done in locore.S.
 3392  */
 3393 void
 3394 tlb1_init()
 3395 {
 3396         uint32_t mas0, mas1, mas2, mas3, mas7;
 3397         uint32_t tsz;
 3398 
 3399         tlb1_get_tlbconf();
 3400 
 3401         mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0);
 3402         mtspr(SPR_MAS0, mas0);
 3403         __asm __volatile("isync; tlbre");
 3404 
 3405         mas1 = mfspr(SPR_MAS1);
 3406         mas2 = mfspr(SPR_MAS2);
 3407         mas3 = mfspr(SPR_MAS3);
 3408         mas7 = mfspr(SPR_MAS7);
 3409 
 3410         kernload =  ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) |
 3411             (mas3 & MAS3_RPN);
 3412 
 3413         tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 3414         kernsize += (tsz > 0) ? tsize2size(tsz) : 0;
 3415 
 3416         /* Setup TLB miss defaults */
 3417         set_mas4_defaults();
 3418 }
 3419 
 3420 vm_offset_t 
 3421 pmap_early_io_map(vm_paddr_t pa, vm_size_t size)
 3422 {
 3423         vm_paddr_t pa_base;
 3424         vm_offset_t va, sz;
 3425         int i;
 3426         tlb_entry_t e;
 3427 
 3428         KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!"));
 3429         
 3430         for (i = 0; i < TLB1_ENTRIES; i++) {
 3431                 tlb1_read_entry(&e, i);
 3432                 if (!(e.mas1 & MAS1_VALID))
 3433                         continue;
 3434                 if (pa >= e.phys && (pa + size) <=
 3435                     (e.phys + e.size))
 3436                         return (e.virt + (pa - e.phys));
 3437         }
 3438 
 3439         pa_base = rounddown(pa, PAGE_SIZE);
 3440         size = roundup(size + (pa - pa_base), PAGE_SIZE);
 3441         tlb1_map_base = roundup2(tlb1_map_base, 1 << (ilog2(size) & ~1));
 3442         va = tlb1_map_base + (pa - pa_base);
 3443 
 3444         do {
 3445                 sz = 1 << (ilog2(size) & ~1);
 3446                 tlb1_set_entry(tlb1_map_base, pa_base, sz,
 3447                     _TLB_ENTRY_SHARED | _TLB_ENTRY_IO);
 3448                 size -= sz;
 3449                 pa_base += sz;
 3450                 tlb1_map_base += sz;
 3451         } while (size > 0);
 3452 
 3453         return (va);
 3454 }
 3455 
 3456 /*
 3457  * Setup MAS4 defaults.
 3458  * These values are loaded to MAS0-2 on a TLB miss.
 3459  */
 3460 static void
 3461 set_mas4_defaults(void)
 3462 {
 3463         uint32_t mas4;
 3464 
 3465         /* Defaults: TLB0, PID0, TSIZED=4K */
 3466         mas4 = MAS4_TLBSELD0;
 3467         mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK;
 3468 #ifdef SMP
 3469         mas4 |= MAS4_MD;
 3470 #endif
 3471         mtspr(SPR_MAS4, mas4);
 3472         __asm __volatile("isync");
 3473 }
 3474 
 3475 /*
 3476  * Print out contents of the MAS registers for each TLB1 entry
 3477  */
 3478 void
 3479 tlb1_print_tlbentries(void)
 3480 {
 3481         uint32_t mas0, mas1, mas2, mas3, mas7;
 3482         int i;
 3483 
 3484         debugf("TLB1 entries:\n");
 3485         for (i = 0; i < TLB1_ENTRIES; i++) {
 3486 
 3487                 mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i);
 3488                 mtspr(SPR_MAS0, mas0);
 3489 
 3490                 __asm __volatile("isync; tlbre");
 3491 
 3492                 mas1 = mfspr(SPR_MAS1);
 3493                 mas2 = mfspr(SPR_MAS2);
 3494                 mas3 = mfspr(SPR_MAS3);
 3495                 mas7 = mfspr(SPR_MAS7);
 3496 
 3497                 tlb_print_entry(i, mas1, mas2, mas3, mas7);
 3498         }
 3499 }
 3500 
 3501 /*
 3502  * Return 0 if the physical IO range is encompassed by one of the
 3503  * the TLB1 entries, otherwise return related error code.
 3504  */
 3505 static int
 3506 tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va)
 3507 {
 3508         uint32_t prot;
 3509         vm_paddr_t pa_start;
 3510         vm_paddr_t pa_end;
 3511         unsigned int entry_tsize;
 3512         vm_size_t entry_size;
 3513         tlb_entry_t e;
 3514 
 3515         *va = (vm_offset_t)NULL;
 3516 
 3517         tlb1_read_entry(&e, i);
 3518         /* Skip invalid entries */
 3519         if (!(e.mas1 & MAS1_VALID))
 3520                 return (EINVAL);
 3521 
 3522         /*
 3523          * The entry must be cache-inhibited, guarded, and r/w
 3524          * so it can function as an i/o page
 3525          */
 3526         prot = e.mas2 & (MAS2_I | MAS2_G);
 3527         if (prot != (MAS2_I | MAS2_G))
 3528                 return (EPERM);
 3529 
 3530         prot = e.mas3 & (MAS3_SR | MAS3_SW);
 3531         if (prot != (MAS3_SR | MAS3_SW))
 3532                 return (EPERM);
 3533 
 3534         /* The address should be within the entry range. */
 3535         entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT;
 3536         KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize"));
 3537 
 3538         entry_size = tsize2size(entry_tsize);
 3539         pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | 
 3540             (e.mas3 & MAS3_RPN);
 3541         pa_end = pa_start + entry_size;
 3542 
 3543         if ((pa < pa_start) || ((pa + size) > pa_end))
 3544                 return (ERANGE);
 3545 
 3546         /* Return virtual address of this mapping. */
 3547         *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start);
 3548         return (0);
 3549 }
 3550 
 3551 /*
 3552  * Invalidate all TLB0 entries which match the given TID. Note this is
 3553  * dedicated for cases when invalidations should NOT be propagated to other
 3554  * CPUs.
 3555  */
 3556 static void
 3557 tid_flush(tlbtid_t tid)
 3558 {
 3559         register_t msr;
 3560         uint32_t mas0, mas1, mas2;
 3561         int entry, way;
 3562 
 3563 
 3564         /* Don't evict kernel translations */
 3565         if (tid == TID_KERNEL)
 3566                 return;
 3567 
 3568         msr = mfmsr();
 3569         __asm __volatile("wrteei 0");
 3570 
 3571         for (way = 0; way < TLB0_WAYS; way++)
 3572                 for (entry = 0; entry < TLB0_ENTRIES_PER_WAY; entry++) {
 3573 
 3574                         mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way);
 3575                         mtspr(SPR_MAS0, mas0);
 3576                         __asm __volatile("isync");
 3577 
 3578                         mas2 = entry << MAS2_TLB0_ENTRY_IDX_SHIFT;
 3579                         mtspr(SPR_MAS2, mas2);
 3580 
 3581                         __asm __volatile("isync; tlbre");
 3582 
 3583                         mas1 = mfspr(SPR_MAS1);
 3584 
 3585                         if (!(mas1 & MAS1_VALID))
 3586                                 continue;
 3587                         if (((mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT) != tid)
 3588                                 continue;
 3589                         mas1 &= ~MAS1_VALID;
 3590                         mtspr(SPR_MAS1, mas1);
 3591                         __asm __volatile("isync; tlbwe; isync; msync");
 3592                 }
 3593         mtmsr(msr);
 3594 }

Cache object: 3638993c0a458c3d81cb0751456b344e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.