FreeBSD/Linux Kernel Cross Reference
sys/mips/mips/pmap.c
1 /*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42 /*
43 * Manages physical address maps.
44 *
45 * Since the information managed by this module is
46 * also stored by the logical address mapping module,
47 * this module may throw away valid virtual-to-physical
48 * mappings at almost any time. However, invalidations
49 * of virtual-to-physical mappings must be done as
50 * requested.
51 *
52 * In order to cope with hardware architectures which
53 * make virtual-to-physical map invalidates expensive,
54 * this module may delay invalidate or reduced protection
55 * operations until such time as they are actually
56 * necessary. This module is given full information as
57 * to which processors are currently using which maps,
58 * and to when physical maps must be made correct.
59 */
60
61 #include <sys/cdefs.h>
62 __FBSDID("$FreeBSD: releng/10.4/sys/mips/mips/pmap.c 310133 2016-12-16 01:06:35Z jhb $");
63
64 #include "opt_ddb.h"
65 #include "opt_pmap.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/lock.h>
70 #include <sys/mman.h>
71 #include <sys/msgbuf.h>
72 #include <sys/mutex.h>
73 #include <sys/pcpu.h>
74 #include <sys/proc.h>
75 #include <sys/rwlock.h>
76 #include <sys/sched.h>
77 #include <sys/smp.h>
78 #include <sys/sysctl.h>
79 #include <sys/vmmeter.h>
80
81 #ifdef DDB
82 #include <ddb/ddb.h>
83 #endif
84
85 #include <vm/vm.h>
86 #include <vm/vm_param.h>
87 #include <vm/vm_kern.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_extern.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_pager.h>
94 #include <vm/uma.h>
95
96 #include <machine/cache.h>
97 #include <machine/md_var.h>
98 #include <machine/tlb.h>
99
100 #undef PMAP_DEBUG
101
102 #if !defined(DIAGNOSTIC)
103 #define PMAP_INLINE __inline
104 #else
105 #define PMAP_INLINE
106 #endif
107
108 #ifdef PV_STATS
109 #define PV_STAT(x) do { x ; } while (0)
110 #else
111 #define PV_STAT(x) do { } while (0)
112 #endif
113
114 /*
115 * Get PDEs and PTEs for user/kernel address space
116 */
117 #define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1))
118 #define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1))
119 #define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1))
120 #define pmap_pde_pindex(v) ((v) >> PDRSHIFT)
121
122 #ifdef __mips_n64
123 #define NUPDE (NPDEPG * NPDEPG)
124 #define NUSERPGTBLS (NUPDE + NPDEPG)
125 #else
126 #define NUPDE (NPDEPG)
127 #define NUSERPGTBLS (NUPDE)
128 #endif
129
130 #define is_kernel_pmap(x) ((x) == kernel_pmap)
131
132 struct pmap kernel_pmap_store;
133 pd_entry_t *kernel_segmap;
134
135 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
136 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
137
138 static int nkpt;
139 unsigned pmap_max_asid; /* max ASID supported by the system */
140
141 #define PMAP_ASID_RESERVED 0
142
143 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
144
145 static void pmap_asid_alloc(pmap_t pmap);
146
147 static struct rwlock_padalign pvh_global_lock;
148
149 /*
150 * Data for the pv entry allocation mechanism
151 */
152 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
153 static int pv_entry_count;
154
155 static void free_pv_chunk(struct pv_chunk *pc);
156 static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
157 static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
158 static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
159 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
160 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
161 vm_offset_t va);
162 static vm_page_t pmap_alloc_direct_page(unsigned int index, int req);
163 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
164 vm_page_t m, vm_prot_t prot, vm_page_t mpte);
165 static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
166 pd_entry_t pde);
167 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
168 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
169 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
170 vm_offset_t va, vm_page_t m);
171 static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte);
172 static void pmap_invalidate_all(pmap_t pmap);
173 static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
174 static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m);
175
176 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
177 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags);
178 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
179 static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot);
180
181 static void pmap_invalidate_page_action(void *arg);
182 static void pmap_invalidate_range_action(void *arg);
183 static void pmap_update_page_action(void *arg);
184
185 #ifndef __mips_n64
186 /*
187 * This structure is for high memory (memory above 512Meg in 32 bit) support.
188 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to
189 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc.
190 *
191 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To
192 * access a highmem physical address on a CPU, we map the physical address to
193 * the reserved virtual address for the CPU in the kernel pagetable. This is
194 * done with interrupts disabled(although a spinlock and sched_pin would be
195 * sufficient).
196 */
197 struct local_sysmaps {
198 vm_offset_t base;
199 uint32_t saved_intr;
200 uint16_t valid1, valid2;
201 };
202 static struct local_sysmaps sysmap_lmem[MAXCPU];
203
204 static __inline void
205 pmap_alloc_lmem_map(void)
206 {
207 int i;
208
209 for (i = 0; i < MAXCPU; i++) {
210 sysmap_lmem[i].base = virtual_avail;
211 virtual_avail += PAGE_SIZE * 2;
212 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
213 }
214 }
215
216 static __inline vm_offset_t
217 pmap_lmem_map1(vm_paddr_t phys)
218 {
219 struct local_sysmaps *sysm;
220 pt_entry_t *pte, npte;
221 vm_offset_t va;
222 uint32_t intr;
223 int cpu;
224
225 intr = intr_disable();
226 cpu = PCPU_GET(cpuid);
227 sysm = &sysmap_lmem[cpu];
228 sysm->saved_intr = intr;
229 va = sysm->base;
230 npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
231 pte = pmap_pte(kernel_pmap, va);
232 *pte = npte;
233 sysm->valid1 = 1;
234 return (va);
235 }
236
237 static __inline vm_offset_t
238 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
239 {
240 struct local_sysmaps *sysm;
241 pt_entry_t *pte, npte;
242 vm_offset_t va1, va2;
243 uint32_t intr;
244 int cpu;
245
246 intr = intr_disable();
247 cpu = PCPU_GET(cpuid);
248 sysm = &sysmap_lmem[cpu];
249 sysm->saved_intr = intr;
250 va1 = sysm->base;
251 va2 = sysm->base + PAGE_SIZE;
252 npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
253 pte = pmap_pte(kernel_pmap, va1);
254 *pte = npte;
255 npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
256 pte = pmap_pte(kernel_pmap, va2);
257 *pte = npte;
258 sysm->valid1 = 1;
259 sysm->valid2 = 1;
260 return (va1);
261 }
262
263 static __inline void
264 pmap_lmem_unmap(void)
265 {
266 struct local_sysmaps *sysm;
267 pt_entry_t *pte;
268 int cpu;
269
270 cpu = PCPU_GET(cpuid);
271 sysm = &sysmap_lmem[cpu];
272 pte = pmap_pte(kernel_pmap, sysm->base);
273 *pte = PTE_G;
274 tlb_invalidate_address(kernel_pmap, sysm->base);
275 sysm->valid1 = 0;
276 if (sysm->valid2) {
277 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);
278 *pte = PTE_G;
279 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);
280 sysm->valid2 = 0;
281 }
282 intr_restore(sysm->saved_intr);
283 }
284 #else /* __mips_n64 */
285
286 static __inline void
287 pmap_alloc_lmem_map(void)
288 {
289 }
290
291 static __inline vm_offset_t
292 pmap_lmem_map1(vm_paddr_t phys)
293 {
294
295 return (0);
296 }
297
298 static __inline vm_offset_t
299 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
300 {
301
302 return (0);
303 }
304
305 static __inline vm_offset_t
306 pmap_lmem_unmap(void)
307 {
308
309 return (0);
310 }
311 #endif /* !__mips_n64 */
312
313 /*
314 * Page table entry lookup routines.
315 */
316 static __inline pd_entry_t *
317 pmap_segmap(pmap_t pmap, vm_offset_t va)
318 {
319
320 return (&pmap->pm_segtab[pmap_seg_index(va)]);
321 }
322
323 #ifdef __mips_n64
324 static __inline pd_entry_t *
325 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
326 {
327 pd_entry_t *pde;
328
329 pde = (pd_entry_t *)*pdpe;
330 return (&pde[pmap_pde_index(va)]);
331 }
332
333 static __inline pd_entry_t *
334 pmap_pde(pmap_t pmap, vm_offset_t va)
335 {
336 pd_entry_t *pdpe;
337
338 pdpe = pmap_segmap(pmap, va);
339 if (*pdpe == NULL)
340 return (NULL);
341
342 return (pmap_pdpe_to_pde(pdpe, va));
343 }
344 #else
345 static __inline pd_entry_t *
346 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
347 {
348
349 return (pdpe);
350 }
351
352 static __inline
353 pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va)
354 {
355
356 return (pmap_segmap(pmap, va));
357 }
358 #endif
359
360 static __inline pt_entry_t *
361 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
362 {
363 pt_entry_t *pte;
364
365 pte = (pt_entry_t *)*pde;
366 return (&pte[pmap_pte_index(va)]);
367 }
368
369 pt_entry_t *
370 pmap_pte(pmap_t pmap, vm_offset_t va)
371 {
372 pd_entry_t *pde;
373
374 pde = pmap_pde(pmap, va);
375 if (pde == NULL || *pde == NULL)
376 return (NULL);
377
378 return (pmap_pde_to_pte(pde, va));
379 }
380
381 vm_offset_t
382 pmap_steal_memory(vm_size_t size)
383 {
384 vm_paddr_t bank_size, pa;
385 vm_offset_t va;
386
387 size = round_page(size);
388 bank_size = phys_avail[1] - phys_avail[0];
389 while (size > bank_size) {
390 int i;
391
392 for (i = 0; phys_avail[i + 2]; i += 2) {
393 phys_avail[i] = phys_avail[i + 2];
394 phys_avail[i + 1] = phys_avail[i + 3];
395 }
396 phys_avail[i] = 0;
397 phys_avail[i + 1] = 0;
398 if (!phys_avail[0])
399 panic("pmap_steal_memory: out of memory");
400 bank_size = phys_avail[1] - phys_avail[0];
401 }
402
403 pa = phys_avail[0];
404 phys_avail[0] += size;
405 if (MIPS_DIRECT_MAPPABLE(pa) == 0)
406 panic("Out of memory below 512Meg?");
407 va = MIPS_PHYS_TO_DIRECT(pa);
408 bzero((caddr_t)va, size);
409 return (va);
410 }
411
412 /*
413 * Bootstrap the system enough to run with virtual memory. This
414 * assumes that the phys_avail array has been initialized.
415 */
416 static void
417 pmap_create_kernel_pagetable(void)
418 {
419 int i, j;
420 vm_offset_t ptaddr;
421 pt_entry_t *pte;
422 #ifdef __mips_n64
423 pd_entry_t *pde;
424 vm_offset_t pdaddr;
425 int npt, npde;
426 #endif
427
428 /*
429 * Allocate segment table for the kernel
430 */
431 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
432
433 /*
434 * Allocate second level page tables for the kernel
435 */
436 #ifdef __mips_n64
437 npde = howmany(NKPT, NPDEPG);
438 pdaddr = pmap_steal_memory(PAGE_SIZE * npde);
439 #endif
440 nkpt = NKPT;
441 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt);
442
443 /*
444 * The R[4-7]?00 stores only one copy of the Global bit in the
445 * translation lookaside buffer for each 2 page entry. Thus invalid
446 * entrys must have the Global bit set so when Entry LO and Entry HI
447 * G bits are anded together they will produce a global bit to store
448 * in the tlb.
449 */
450 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++)
451 *pte = PTE_G;
452
453 #ifdef __mips_n64
454 for (i = 0, npt = nkpt; npt > 0; i++) {
455 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE);
456 pde = (pd_entry_t *)kernel_segmap[i];
457
458 for (j = 0; j < NPDEPG && npt > 0; j++, npt--)
459 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE);
460 }
461 #else
462 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++)
463 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE));
464 #endif
465
466 PMAP_LOCK_INIT(kernel_pmap);
467 kernel_pmap->pm_segtab = kernel_segmap;
468 CPU_FILL(&kernel_pmap->pm_active);
469 TAILQ_INIT(&kernel_pmap->pm_pvchunk);
470 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
471 kernel_pmap->pm_asid[0].gen = 0;
472 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE;
473 }
474
475 void
476 pmap_bootstrap(void)
477 {
478 int i;
479 int need_local_mappings = 0;
480
481 /* Sort. */
482 again:
483 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
484 /*
485 * Keep the memory aligned on page boundary.
486 */
487 phys_avail[i] = round_page(phys_avail[i]);
488 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
489
490 if (i < 2)
491 continue;
492 if (phys_avail[i - 2] > phys_avail[i]) {
493 vm_paddr_t ptemp[2];
494
495 ptemp[0] = phys_avail[i + 0];
496 ptemp[1] = phys_avail[i + 1];
497
498 phys_avail[i + 0] = phys_avail[i - 2];
499 phys_avail[i + 1] = phys_avail[i - 1];
500
501 phys_avail[i - 2] = ptemp[0];
502 phys_avail[i - 1] = ptemp[1];
503 goto again;
504 }
505 }
506
507 /*
508 * In 32 bit, we may have memory which cannot be mapped directly.
509 * This memory will need temporary mapping before it can be
510 * accessed.
511 */
512 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1))
513 need_local_mappings = 1;
514
515 /*
516 * Copy the phys_avail[] array before we start stealing memory from it.
517 */
518 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
519 physmem_desc[i] = phys_avail[i];
520 physmem_desc[i + 1] = phys_avail[i + 1];
521 }
522
523 Maxmem = atop(phys_avail[i - 1]);
524
525 if (bootverbose) {
526 printf("Physical memory chunk(s):\n");
527 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
528 vm_paddr_t size;
529
530 size = phys_avail[i + 1] - phys_avail[i];
531 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
532 (uintmax_t) phys_avail[i],
533 (uintmax_t) phys_avail[i + 1] - 1,
534 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
535 }
536 printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem));
537 }
538 /*
539 * Steal the message buffer from the beginning of memory.
540 */
541 msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
542 msgbufinit(msgbufp, msgbufsize);
543
544 /*
545 * Steal thread0 kstack.
546 */
547 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
548
549 virtual_avail = VM_MIN_KERNEL_ADDRESS;
550 virtual_end = VM_MAX_KERNEL_ADDRESS;
551
552 #ifdef SMP
553 /*
554 * Steal some virtual address space to map the pcpu area.
555 */
556 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
557 pcpup = (struct pcpu *)virtual_avail;
558 virtual_avail += PAGE_SIZE * 2;
559
560 /*
561 * Initialize the wired TLB entry mapping the pcpu region for
562 * the BSP at 'pcpup'. Up until this point we were operating
563 * with the 'pcpup' for the BSP pointing to a virtual address
564 * in KSEG0 so there was no need for a TLB mapping.
565 */
566 mips_pcpu_tlb_init(PCPU_ADDR(0));
567
568 if (bootverbose)
569 printf("pcpu is available at virtual address %p.\n", pcpup);
570 #endif
571
572 if (need_local_mappings)
573 pmap_alloc_lmem_map();
574 pmap_create_kernel_pagetable();
575 pmap_max_asid = VMNUM_PIDS;
576 mips_wr_entryhi(0);
577 mips_wr_pagemask(0);
578
579 /*
580 * Initialize the global pv list lock.
581 */
582 rw_init(&pvh_global_lock, "pmap pv global");
583 }
584
585 /*
586 * Initialize a vm_page's machine-dependent fields.
587 */
588 void
589 pmap_page_init(vm_page_t m)
590 {
591
592 TAILQ_INIT(&m->md.pv_list);
593 m->md.pv_flags = 0;
594 }
595
596 /*
597 * Initialize the pmap module.
598 * Called by vm_init, to initialize any structures that the pmap
599 * system needs to map virtual memory.
600 */
601 void
602 pmap_init(void)
603 {
604 }
605
606 /***************************************************
607 * Low level helper routines.....
608 ***************************************************/
609
610 #ifdef SMP
611 static __inline void
612 pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
613 {
614 int cpuid, cpu, self;
615 cpuset_t active_cpus;
616
617 sched_pin();
618 if (is_kernel_pmap(pmap)) {
619 smp_rendezvous(NULL, fn, NULL, arg);
620 goto out;
621 }
622 /* Force ASID update on inactive CPUs */
623 CPU_FOREACH(cpu) {
624 if (!CPU_ISSET(cpu, &pmap->pm_active))
625 pmap->pm_asid[cpu].gen = 0;
626 }
627 cpuid = PCPU_GET(cpuid);
628 /*
629 * XXX: barrier/locking for active?
630 *
631 * Take a snapshot of active here, any further changes are ignored.
632 * tlb update/invalidate should be harmless on inactive CPUs
633 */
634 active_cpus = pmap->pm_active;
635 self = CPU_ISSET(cpuid, &active_cpus);
636 CPU_CLR(cpuid, &active_cpus);
637 /* Optimize for the case where this cpu is the only active one */
638 if (CPU_EMPTY(&active_cpus)) {
639 if (self)
640 fn(arg);
641 } else {
642 if (self)
643 CPU_SET(cpuid, &active_cpus);
644 smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg);
645 }
646 out:
647 sched_unpin();
648 }
649 #else /* !SMP */
650 static __inline void
651 pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
652 {
653 int cpuid;
654
655 if (is_kernel_pmap(pmap)) {
656 fn(arg);
657 return;
658 }
659 cpuid = PCPU_GET(cpuid);
660 if (!CPU_ISSET(cpuid, &pmap->pm_active))
661 pmap->pm_asid[cpuid].gen = 0;
662 else
663 fn(arg);
664 }
665 #endif /* SMP */
666
667 static void
668 pmap_invalidate_all(pmap_t pmap)
669 {
670
671 pmap_call_on_active_cpus(pmap,
672 (void (*)(void *))tlb_invalidate_all_user, pmap);
673 }
674
675 struct pmap_invalidate_page_arg {
676 pmap_t pmap;
677 vm_offset_t va;
678 };
679
680 static void
681 pmap_invalidate_page_action(void *arg)
682 {
683 struct pmap_invalidate_page_arg *p = arg;
684
685 tlb_invalidate_address(p->pmap, p->va);
686 }
687
688 static void
689 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
690 {
691 struct pmap_invalidate_page_arg arg;
692
693 arg.pmap = pmap;
694 arg.va = va;
695 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg);
696 }
697
698 struct pmap_invalidate_range_arg {
699 pmap_t pmap;
700 vm_offset_t sva;
701 vm_offset_t eva;
702 };
703
704 static void
705 pmap_invalidate_range_action(void *arg)
706 {
707 struct pmap_invalidate_range_arg *p = arg;
708
709 tlb_invalidate_range(p->pmap, p->sva, p->eva);
710 }
711
712 static void
713 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
714 {
715 struct pmap_invalidate_range_arg arg;
716
717 arg.pmap = pmap;
718 arg.sva = sva;
719 arg.eva = eva;
720 pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg);
721 }
722
723 struct pmap_update_page_arg {
724 pmap_t pmap;
725 vm_offset_t va;
726 pt_entry_t pte;
727 };
728
729 static void
730 pmap_update_page_action(void *arg)
731 {
732 struct pmap_update_page_arg *p = arg;
733
734 tlb_update(p->pmap, p->va, p->pte);
735 }
736
737 static void
738 pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
739 {
740 struct pmap_update_page_arg arg;
741
742 arg.pmap = pmap;
743 arg.va = va;
744 arg.pte = pte;
745 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg);
746 }
747
748 /*
749 * Routine: pmap_extract
750 * Function:
751 * Extract the physical page address associated
752 * with the given map/virtual_address pair.
753 */
754 vm_paddr_t
755 pmap_extract(pmap_t pmap, vm_offset_t va)
756 {
757 pt_entry_t *pte;
758 vm_offset_t retval = 0;
759
760 PMAP_LOCK(pmap);
761 pte = pmap_pte(pmap, va);
762 if (pte) {
763 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
764 }
765 PMAP_UNLOCK(pmap);
766 return (retval);
767 }
768
769 /*
770 * Routine: pmap_extract_and_hold
771 * Function:
772 * Atomically extract and hold the physical page
773 * with the given pmap and virtual address pair
774 * if that mapping permits the given protection.
775 */
776 vm_page_t
777 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
778 {
779 pt_entry_t pte, *ptep;
780 vm_paddr_t pa, pte_pa;
781 vm_page_t m;
782
783 m = NULL;
784 pa = 0;
785 PMAP_LOCK(pmap);
786 retry:
787 ptep = pmap_pte(pmap, va);
788 if (ptep != NULL) {
789 pte = *ptep;
790 if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
791 (prot & VM_PROT_WRITE) == 0)) {
792 pte_pa = TLBLO_PTE_TO_PA(pte);
793 if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
794 goto retry;
795 m = PHYS_TO_VM_PAGE(pte_pa);
796 vm_page_hold(m);
797 }
798 }
799 PA_UNLOCK_COND(pa);
800 PMAP_UNLOCK(pmap);
801 return (m);
802 }
803
804 /***************************************************
805 * Low level mapping routines.....
806 ***************************************************/
807
808 /*
809 * add a wired page to the kva
810 */
811 void
812 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr)
813 {
814 pt_entry_t *pte;
815 pt_entry_t opte, npte;
816
817 #ifdef PMAP_DEBUG
818 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa);
819 #endif
820
821 pte = pmap_pte(kernel_pmap, va);
822 opte = *pte;
823 npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G;
824 *pte = npte;
825 if (pte_test(&opte, PTE_V) && opte != npte)
826 pmap_update_page(kernel_pmap, va, npte);
827 }
828
829 void
830 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
831 {
832
833 KASSERT(is_cacheable_mem(pa),
834 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa));
835
836 pmap_kenter_attr(va, pa, PTE_C_CACHE);
837 }
838
839 /*
840 * remove a page from the kernel pagetables
841 */
842 /* PMAP_INLINE */ void
843 pmap_kremove(vm_offset_t va)
844 {
845 pt_entry_t *pte;
846
847 /*
848 * Write back all caches from the page being destroyed
849 */
850 mips_dcache_wbinv_range_index(va, PAGE_SIZE);
851
852 pte = pmap_pte(kernel_pmap, va);
853 *pte = PTE_G;
854 pmap_invalidate_page(kernel_pmap, va);
855 }
856
857 /*
858 * Used to map a range of physical addresses into kernel
859 * virtual address space.
860 *
861 * The value passed in '*virt' is a suggested virtual address for
862 * the mapping. Architectures which can support a direct-mapped
863 * physical to virtual region can return the appropriate address
864 * within that region, leaving '*virt' unchanged. Other
865 * architectures should map the pages starting at '*virt' and
866 * update '*virt' with the first usable address after the mapped
867 * region.
868 *
869 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
870 */
871 vm_offset_t
872 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
873 {
874 vm_offset_t va, sva;
875
876 if (MIPS_DIRECT_MAPPABLE(end - 1))
877 return (MIPS_PHYS_TO_DIRECT(start));
878
879 va = sva = *virt;
880 while (start < end) {
881 pmap_kenter(va, start);
882 va += PAGE_SIZE;
883 start += PAGE_SIZE;
884 }
885 *virt = va;
886 return (sva);
887 }
888
889 /*
890 * Add a list of wired pages to the kva
891 * this routine is only used for temporary
892 * kernel mappings that do not need to have
893 * page modification or references recorded.
894 * Note that old mappings are simply written
895 * over. The page *must* be wired.
896 */
897 void
898 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
899 {
900 int i;
901 vm_offset_t origva = va;
902
903 for (i = 0; i < count; i++) {
904 pmap_flush_pvcache(m[i]);
905 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
906 va += PAGE_SIZE;
907 }
908
909 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
910 }
911
912 /*
913 * this routine jerks page mappings from the
914 * kernel -- it is meant only for temporary mappings.
915 */
916 void
917 pmap_qremove(vm_offset_t va, int count)
918 {
919 pt_entry_t *pte;
920 vm_offset_t origva;
921
922 if (count < 1)
923 return;
924 mips_dcache_wbinv_range_index(va, PAGE_SIZE * count);
925 origva = va;
926 do {
927 pte = pmap_pte(kernel_pmap, va);
928 *pte = PTE_G;
929 va += PAGE_SIZE;
930 } while (--count > 0);
931 pmap_invalidate_range(kernel_pmap, origva, va);
932 }
933
934 /***************************************************
935 * Page table page management routines.....
936 ***************************************************/
937
938 /*
939 * Decrements a page table page's wire count, which is used to record the
940 * number of valid page table entries within the page. If the wire count
941 * drops to zero, then the page table page is unmapped. Returns TRUE if the
942 * page table page was unmapped and FALSE otherwise.
943 */
944 static PMAP_INLINE boolean_t
945 pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
946 {
947
948 --m->wire_count;
949 if (m->wire_count == 0) {
950 _pmap_unwire_ptp(pmap, va, m);
951 return (TRUE);
952 } else
953 return (FALSE);
954 }
955
956 static void
957 _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
958 {
959 pd_entry_t *pde;
960
961 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
962 /*
963 * unmap the page table page
964 */
965 #ifdef __mips_n64
966 if (m->pindex < NUPDE)
967 pde = pmap_pde(pmap, va);
968 else
969 pde = pmap_segmap(pmap, va);
970 #else
971 pde = pmap_pde(pmap, va);
972 #endif
973 *pde = 0;
974 pmap->pm_stats.resident_count--;
975
976 #ifdef __mips_n64
977 if (m->pindex < NUPDE) {
978 pd_entry_t *pdp;
979 vm_page_t pdpg;
980
981 /*
982 * Recursively decrement next level pagetable refcount
983 */
984 pdp = (pd_entry_t *)*pmap_segmap(pmap, va);
985 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp));
986 pmap_unwire_ptp(pmap, va, pdpg);
987 }
988 #endif
989
990 /*
991 * If the page is finally unwired, simply free it.
992 */
993 vm_page_free_zero(m);
994 atomic_subtract_int(&cnt.v_wire_count, 1);
995 }
996
997 /*
998 * After removing a page table entry, this routine is used to
999 * conditionally free the page, and manage the hold/wire counts.
1000 */
1001 static int
1002 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
1003 {
1004 vm_page_t mpte;
1005
1006 if (va >= VM_MAXUSER_ADDRESS)
1007 return (0);
1008 KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0"));
1009 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde));
1010 return (pmap_unwire_ptp(pmap, va, mpte));
1011 }
1012
1013 void
1014 pmap_pinit0(pmap_t pmap)
1015 {
1016 int i;
1017
1018 PMAP_LOCK_INIT(pmap);
1019 pmap->pm_segtab = kernel_segmap;
1020 CPU_ZERO(&pmap->pm_active);
1021 for (i = 0; i < MAXCPU; i++) {
1022 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1023 pmap->pm_asid[i].gen = 0;
1024 }
1025 PCPU_SET(curpmap, pmap);
1026 TAILQ_INIT(&pmap->pm_pvchunk);
1027 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1028 }
1029
1030 void
1031 pmap_grow_direct_page_cache()
1032 {
1033
1034 #ifdef __mips_n64
1035 vm_pageout_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS);
1036 #else
1037 vm_pageout_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS);
1038 #endif
1039 }
1040
1041 static vm_page_t
1042 pmap_alloc_direct_page(unsigned int index, int req)
1043 {
1044 vm_page_t m;
1045
1046 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
1047 VM_ALLOC_ZERO);
1048 if (m == NULL)
1049 return (NULL);
1050
1051 if ((m->flags & PG_ZERO) == 0)
1052 pmap_zero_page(m);
1053
1054 m->pindex = index;
1055 return (m);
1056 }
1057
1058 /*
1059 * Initialize a preallocated and zeroed pmap structure,
1060 * such as one in a vmspace structure.
1061 */
1062 int
1063 pmap_pinit(pmap_t pmap)
1064 {
1065 vm_offset_t ptdva;
1066 vm_page_t ptdpg;
1067 int i;
1068
1069 /*
1070 * allocate the page directory page
1071 */
1072 while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL)
1073 pmap_grow_direct_page_cache();
1074
1075 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
1076 pmap->pm_segtab = (pd_entry_t *)ptdva;
1077 CPU_ZERO(&pmap->pm_active);
1078 for (i = 0; i < MAXCPU; i++) {
1079 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1080 pmap->pm_asid[i].gen = 0;
1081 }
1082 TAILQ_INIT(&pmap->pm_pvchunk);
1083 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1084
1085 return (1);
1086 }
1087
1088 /*
1089 * this routine is called if the page table page is not
1090 * mapped correctly.
1091 */
1092 static vm_page_t
1093 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags)
1094 {
1095 vm_offset_t pageva;
1096 vm_page_t m;
1097
1098 /*
1099 * Find or fabricate a new pagetable page
1100 */
1101 if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) {
1102 if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
1103 PMAP_UNLOCK(pmap);
1104 rw_wunlock(&pvh_global_lock);
1105 pmap_grow_direct_page_cache();
1106 rw_wlock(&pvh_global_lock);
1107 PMAP_LOCK(pmap);
1108 }
1109
1110 /*
1111 * Indicate the need to retry. While waiting, the page
1112 * table page may have been allocated.
1113 */
1114 return (NULL);
1115 }
1116
1117 /*
1118 * Map the pagetable page into the process address space, if it
1119 * isn't already there.
1120 */
1121 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1122
1123 #ifdef __mips_n64
1124 if (ptepindex >= NUPDE) {
1125 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva;
1126 } else {
1127 pd_entry_t *pdep, *pde;
1128 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT);
1129 int pdeindex = ptepindex & (NPDEPG - 1);
1130 vm_page_t pg;
1131
1132 pdep = &pmap->pm_segtab[segindex];
1133 if (*pdep == NULL) {
1134 /* recurse for allocating page dir */
1135 if (_pmap_allocpte(pmap, NUPDE + segindex,
1136 flags) == NULL) {
1137 /* alloc failed, release current */
1138 --m->wire_count;
1139 atomic_subtract_int(&cnt.v_wire_count, 1);
1140 vm_page_free_zero(m);
1141 return (NULL);
1142 }
1143 } else {
1144 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep));
1145 pg->wire_count++;
1146 }
1147 /* Next level entry */
1148 pde = (pd_entry_t *)*pdep;
1149 pde[pdeindex] = (pd_entry_t)pageva;
1150 }
1151 #else
1152 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva;
1153 #endif
1154 pmap->pm_stats.resident_count++;
1155 return (m);
1156 }
1157
1158 static vm_page_t
1159 pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
1160 {
1161 unsigned ptepindex;
1162 pd_entry_t *pde;
1163 vm_page_t m;
1164
1165 /*
1166 * Calculate pagetable page index
1167 */
1168 ptepindex = pmap_pde_pindex(va);
1169 retry:
1170 /*
1171 * Get the page directory entry
1172 */
1173 pde = pmap_pde(pmap, va);
1174
1175 /*
1176 * If the page table page is mapped, we just increment the hold
1177 * count, and activate it.
1178 */
1179 if (pde != NULL && *pde != NULL) {
1180 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde));
1181 m->wire_count++;
1182 } else {
1183 /*
1184 * Here if the pte page isn't mapped, or if it has been
1185 * deallocated.
1186 */
1187 m = _pmap_allocpte(pmap, ptepindex, flags);
1188 if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
1189 goto retry;
1190 }
1191 return (m);
1192 }
1193
1194
1195 /***************************************************
1196 * Pmap allocation/deallocation routines.
1197 ***************************************************/
1198
1199 /*
1200 * Release any resources held by the given physical map.
1201 * Called when a pmap initialized by pmap_pinit is being released.
1202 * Should only be called if the map contains no valid mappings.
1203 */
1204 void
1205 pmap_release(pmap_t pmap)
1206 {
1207 vm_offset_t ptdva;
1208 vm_page_t ptdpg;
1209
1210 KASSERT(pmap->pm_stats.resident_count == 0,
1211 ("pmap_release: pmap resident count %ld != 0",
1212 pmap->pm_stats.resident_count));
1213
1214 ptdva = (vm_offset_t)pmap->pm_segtab;
1215 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva));
1216
1217 ptdpg->wire_count--;
1218 atomic_subtract_int(&cnt.v_wire_count, 1);
1219 vm_page_free_zero(ptdpg);
1220 }
1221
1222 /*
1223 * grow the number of kernel page table entries, if needed
1224 */
1225 void
1226 pmap_growkernel(vm_offset_t addr)
1227 {
1228 vm_page_t nkpg;
1229 pd_entry_t *pde, *pdpe;
1230 pt_entry_t *pte;
1231 int i;
1232
1233 mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1234 addr = roundup2(addr, NBSEG);
1235 if (addr - 1 >= kernel_map->max_offset)
1236 addr = kernel_map->max_offset;
1237 while (kernel_vm_end < addr) {
1238 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end);
1239 #ifdef __mips_n64
1240 if (*pdpe == 0) {
1241 /* new intermediate page table entry */
1242 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
1243 if (nkpg == NULL)
1244 panic("pmap_growkernel: no memory to grow kernel");
1245 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1246 continue; /* try again */
1247 }
1248 #endif
1249 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
1250 if (*pde != 0) {
1251 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1252 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1253 kernel_vm_end = kernel_map->max_offset;
1254 break;
1255 }
1256 continue;
1257 }
1258
1259 /*
1260 * This index is bogus, but out of the way
1261 */
1262 nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT);
1263 if (!nkpg)
1264 panic("pmap_growkernel: no memory to grow kernel");
1265 nkpt++;
1266 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1267
1268 /*
1269 * The R[4-7]?00 stores only one copy of the Global bit in
1270 * the translation lookaside buffer for each 2 page entry.
1271 * Thus invalid entrys must have the Global bit set so when
1272 * Entry LO and Entry HI G bits are anded together they will
1273 * produce a global bit to store in the tlb.
1274 */
1275 pte = (pt_entry_t *)*pde;
1276 for (i = 0; i < NPTEPG; i++)
1277 pte[i] = PTE_G;
1278
1279 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1280 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1281 kernel_vm_end = kernel_map->max_offset;
1282 break;
1283 }
1284 }
1285 }
1286
1287 /***************************************************
1288 * page management routines.
1289 ***************************************************/
1290
1291 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1292 #ifdef __mips_n64
1293 CTASSERT(_NPCM == 3);
1294 CTASSERT(_NPCPV == 168);
1295 #else
1296 CTASSERT(_NPCM == 11);
1297 CTASSERT(_NPCPV == 336);
1298 #endif
1299
1300 static __inline struct pv_chunk *
1301 pv_to_chunk(pv_entry_t pv)
1302 {
1303
1304 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1305 }
1306
1307 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1308
1309 #ifdef __mips_n64
1310 #define PC_FREE0_1 0xfffffffffffffffful
1311 #define PC_FREE2 0x000000fffffffffful
1312 #else
1313 #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
1314 #define PC_FREE10 0x0000fffful /* Free values for index 10 */
1315 #endif
1316
1317 static const u_long pc_freemask[_NPCM] = {
1318 #ifdef __mips_n64
1319 PC_FREE0_1, PC_FREE0_1, PC_FREE2
1320 #else
1321 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1322 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1323 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1324 PC_FREE0_9, PC_FREE10
1325 #endif
1326 };
1327
1328 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
1329
1330 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1331 "Current number of pv entries");
1332
1333 #ifdef PV_STATS
1334 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1335
1336 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1337 "Current number of pv entry chunks");
1338 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1339 "Current number of pv entry chunks allocated");
1340 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1341 "Current number of pv entry chunks frees");
1342 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1343 "Number of times tried to get a chunk page but failed.");
1344
1345 static long pv_entry_frees, pv_entry_allocs;
1346 static int pv_entry_spare;
1347
1348 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1349 "Current number of pv entry frees");
1350 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1351 "Current number of pv entry allocs");
1352 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1353 "Current number of spare pv entries");
1354 #endif
1355
1356 /*
1357 * We are in a serious low memory condition. Resort to
1358 * drastic measures to free some pages so we can allocate
1359 * another pv entry chunk.
1360 */
1361 static vm_page_t
1362 pmap_pv_reclaim(pmap_t locked_pmap)
1363 {
1364 struct pch newtail;
1365 struct pv_chunk *pc;
1366 pd_entry_t *pde;
1367 pmap_t pmap;
1368 pt_entry_t *pte, oldpte;
1369 pv_entry_t pv;
1370 vm_offset_t va;
1371 vm_page_t m, m_pc;
1372 u_long inuse;
1373 int bit, field, freed, idx;
1374
1375 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1376 pmap = NULL;
1377 m_pc = NULL;
1378 TAILQ_INIT(&newtail);
1379 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
1380 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1381 if (pmap != pc->pc_pmap) {
1382 if (pmap != NULL) {
1383 pmap_invalidate_all(pmap);
1384 if (pmap != locked_pmap)
1385 PMAP_UNLOCK(pmap);
1386 }
1387 pmap = pc->pc_pmap;
1388 /* Avoid deadlock and lock recursion. */
1389 if (pmap > locked_pmap)
1390 PMAP_LOCK(pmap);
1391 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
1392 pmap = NULL;
1393 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1394 continue;
1395 }
1396 }
1397
1398 /*
1399 * Destroy every non-wired, 4 KB page mapping in the chunk.
1400 */
1401 freed = 0;
1402 for (field = 0; field < _NPCM; field++) {
1403 for (inuse = ~pc->pc_map[field] & pc_freemask[field];
1404 inuse != 0; inuse &= ~(1UL << bit)) {
1405 bit = ffsl(inuse) - 1;
1406 idx = field * sizeof(inuse) * NBBY + bit;
1407 pv = &pc->pc_pventry[idx];
1408 va = pv->pv_va;
1409 pde = pmap_pde(pmap, va);
1410 KASSERT(pde != NULL && *pde != 0,
1411 ("pmap_pv_reclaim: pde"));
1412 pte = pmap_pde_to_pte(pde, va);
1413 oldpte = *pte;
1414 if (pte_test(&oldpte, PTE_W))
1415 continue;
1416 if (is_kernel_pmap(pmap))
1417 *pte = PTE_G;
1418 else
1419 *pte = 0;
1420 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte));
1421 if (pte_test(&oldpte, PTE_D))
1422 vm_page_dirty(m);
1423 if (m->md.pv_flags & PV_TABLE_REF)
1424 vm_page_aflag_set(m, PGA_REFERENCED);
1425 m->md.pv_flags &= ~PV_TABLE_REF;
1426 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1427 if (TAILQ_EMPTY(&m->md.pv_list))
1428 vm_page_aflag_clear(m, PGA_WRITEABLE);
1429 pc->pc_map[field] |= 1UL << bit;
1430 pmap_unuse_pt(pmap, va, *pde);
1431 freed++;
1432 }
1433 }
1434 if (freed == 0) {
1435 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1436 continue;
1437 }
1438 /* Every freed mapping is for a 4 KB page. */
1439 pmap->pm_stats.resident_count -= freed;
1440 PV_STAT(pv_entry_frees += freed);
1441 PV_STAT(pv_entry_spare += freed);
1442 pv_entry_count -= freed;
1443 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1444 for (field = 0; field < _NPCM; field++)
1445 if (pc->pc_map[field] != pc_freemask[field]) {
1446 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1447 pc_list);
1448 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1449
1450 /*
1451 * One freed pv entry in locked_pmap is
1452 * sufficient.
1453 */
1454 if (pmap == locked_pmap)
1455 goto out;
1456 break;
1457 }
1458 if (field == _NPCM) {
1459 PV_STAT(pv_entry_spare -= _NPCPV);
1460 PV_STAT(pc_chunk_count--);
1461 PV_STAT(pc_chunk_frees++);
1462 /* Entire chunk is free; return it. */
1463 m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(
1464 (vm_offset_t)pc));
1465 break;
1466 }
1467 }
1468 out:
1469 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
1470 if (pmap != NULL) {
1471 pmap_invalidate_all(pmap);
1472 if (pmap != locked_pmap)
1473 PMAP_UNLOCK(pmap);
1474 }
1475 return (m_pc);
1476 }
1477
1478 /*
1479 * free the pv_entry back to the free list
1480 */
1481 static void
1482 free_pv_entry(pmap_t pmap, pv_entry_t pv)
1483 {
1484 struct pv_chunk *pc;
1485 int bit, field, idx;
1486
1487 rw_assert(&pvh_global_lock, RA_WLOCKED);
1488 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1489 PV_STAT(pv_entry_frees++);
1490 PV_STAT(pv_entry_spare++);
1491 pv_entry_count--;
1492 pc = pv_to_chunk(pv);
1493 idx = pv - &pc->pc_pventry[0];
1494 field = idx / (sizeof(u_long) * NBBY);
1495 bit = idx % (sizeof(u_long) * NBBY);
1496 pc->pc_map[field] |= 1ul << bit;
1497 for (idx = 0; idx < _NPCM; idx++)
1498 if (pc->pc_map[idx] != pc_freemask[idx]) {
1499 /*
1500 * 98% of the time, pc is already at the head of the
1501 * list. If it isn't already, move it to the head.
1502 */
1503 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
1504 pc)) {
1505 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1506 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1507 pc_list);
1508 }
1509 return;
1510 }
1511 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1512 free_pv_chunk(pc);
1513 }
1514
1515 static void
1516 free_pv_chunk(struct pv_chunk *pc)
1517 {
1518 vm_page_t m;
1519
1520 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1521 PV_STAT(pv_entry_spare -= _NPCPV);
1522 PV_STAT(pc_chunk_count--);
1523 PV_STAT(pc_chunk_frees++);
1524 /* entire chunk is free, return it */
1525 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc));
1526 vm_page_unwire(m, 0);
1527 vm_page_free(m);
1528 }
1529
1530 /*
1531 * get a new pv_entry, allocating a block from the system
1532 * when needed.
1533 */
1534 static pv_entry_t
1535 get_pv_entry(pmap_t pmap, boolean_t try)
1536 {
1537 struct pv_chunk *pc;
1538 pv_entry_t pv;
1539 vm_page_t m;
1540 int bit, field, idx;
1541
1542 rw_assert(&pvh_global_lock, RA_WLOCKED);
1543 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1544 PV_STAT(pv_entry_allocs++);
1545 pv_entry_count++;
1546 retry:
1547 pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1548 if (pc != NULL) {
1549 for (field = 0; field < _NPCM; field++) {
1550 if (pc->pc_map[field]) {
1551 bit = ffsl(pc->pc_map[field]) - 1;
1552 break;
1553 }
1554 }
1555 if (field < _NPCM) {
1556 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
1557 pv = &pc->pc_pventry[idx];
1558 pc->pc_map[field] &= ~(1ul << bit);
1559 /* If this was the last item, move it to tail */
1560 for (field = 0; field < _NPCM; field++)
1561 if (pc->pc_map[field] != 0) {
1562 PV_STAT(pv_entry_spare--);
1563 return (pv); /* not full, return */
1564 }
1565 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1566 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
1567 PV_STAT(pv_entry_spare--);
1568 return (pv);
1569 }
1570 }
1571 /* No free items, allocate another chunk */
1572 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
1573 VM_ALLOC_WIRED);
1574 if (m == NULL) {
1575 if (try) {
1576 pv_entry_count--;
1577 PV_STAT(pc_chunk_tryfail++);
1578 return (NULL);
1579 }
1580 m = pmap_pv_reclaim(pmap);
1581 if (m == NULL)
1582 goto retry;
1583 }
1584 PV_STAT(pc_chunk_count++);
1585 PV_STAT(pc_chunk_allocs++);
1586 pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1587 pc->pc_pmap = pmap;
1588 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
1589 for (field = 1; field < _NPCM; field++)
1590 pc->pc_map[field] = pc_freemask[field];
1591 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1592 pv = &pc->pc_pventry[0];
1593 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1594 PV_STAT(pv_entry_spare += _NPCPV - 1);
1595 return (pv);
1596 }
1597
1598 static pv_entry_t
1599 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1600 {
1601 pv_entry_t pv;
1602
1603 rw_assert(&pvh_global_lock, RA_WLOCKED);
1604 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1605 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1606 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1607 break;
1608 }
1609 }
1610 return (pv);
1611 }
1612
1613 static void
1614 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1615 {
1616 pv_entry_t pv;
1617
1618 pv = pmap_pvh_remove(pvh, pmap, va);
1619 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1620 (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)),
1621 (u_long)va));
1622 free_pv_entry(pmap, pv);
1623 }
1624
1625 static void
1626 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1627 {
1628
1629 rw_assert(&pvh_global_lock, RA_WLOCKED);
1630 pmap_pvh_free(&m->md, pmap, va);
1631 if (TAILQ_EMPTY(&m->md.pv_list))
1632 vm_page_aflag_clear(m, PGA_WRITEABLE);
1633 }
1634
1635 /*
1636 * Conditionally create a pv entry.
1637 */
1638 static boolean_t
1639 pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1640 vm_page_t m)
1641 {
1642 pv_entry_t pv;
1643
1644 rw_assert(&pvh_global_lock, RA_WLOCKED);
1645 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1646 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1647 pv->pv_va = va;
1648 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1649 return (TRUE);
1650 } else
1651 return (FALSE);
1652 }
1653
1654 /*
1655 * pmap_remove_pte: do the things to unmap a page in a process
1656 */
1657 static int
1658 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
1659 pd_entry_t pde)
1660 {
1661 pt_entry_t oldpte;
1662 vm_page_t m;
1663 vm_paddr_t pa;
1664
1665 rw_assert(&pvh_global_lock, RA_WLOCKED);
1666 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1667
1668 /*
1669 * Write back all cache lines from the page being unmapped.
1670 */
1671 mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1672
1673 oldpte = *ptq;
1674 if (is_kernel_pmap(pmap))
1675 *ptq = PTE_G;
1676 else
1677 *ptq = 0;
1678
1679 if (pte_test(&oldpte, PTE_W))
1680 pmap->pm_stats.wired_count -= 1;
1681
1682 pmap->pm_stats.resident_count -= 1;
1683
1684 if (pte_test(&oldpte, PTE_MANAGED)) {
1685 pa = TLBLO_PTE_TO_PA(oldpte);
1686 m = PHYS_TO_VM_PAGE(pa);
1687 if (pte_test(&oldpte, PTE_D)) {
1688 KASSERT(!pte_test(&oldpte, PTE_RO),
1689 ("%s: modified page not writable: va: %p, pte: %#jx",
1690 __func__, (void *)va, (uintmax_t)oldpte));
1691 vm_page_dirty(m);
1692 }
1693 if (m->md.pv_flags & PV_TABLE_REF)
1694 vm_page_aflag_set(m, PGA_REFERENCED);
1695 m->md.pv_flags &= ~PV_TABLE_REF;
1696
1697 pmap_remove_entry(pmap, m, va);
1698 }
1699 return (pmap_unuse_pt(pmap, va, pde));
1700 }
1701
1702 /*
1703 * Remove a single page from a process address space
1704 */
1705 static void
1706 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1707 {
1708 pd_entry_t *pde;
1709 pt_entry_t *ptq;
1710
1711 rw_assert(&pvh_global_lock, RA_WLOCKED);
1712 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1713 pde = pmap_pde(pmap, va);
1714 if (pde == NULL || *pde == 0)
1715 return;
1716 ptq = pmap_pde_to_pte(pde, va);
1717
1718 /*
1719 * If there is no pte for this address, just skip it!
1720 */
1721 if (!pte_test(ptq, PTE_V))
1722 return;
1723
1724 (void)pmap_remove_pte(pmap, ptq, va, *pde);
1725 pmap_invalidate_page(pmap, va);
1726 }
1727
1728 /*
1729 * Remove the given range of addresses from the specified map.
1730 *
1731 * It is assumed that the start and end are properly
1732 * rounded to the page size.
1733 */
1734 void
1735 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1736 {
1737 pd_entry_t *pde, *pdpe;
1738 pt_entry_t *pte;
1739 vm_offset_t va, va_next;
1740
1741 /*
1742 * Perform an unsynchronized read. This is, however, safe.
1743 */
1744 if (pmap->pm_stats.resident_count == 0)
1745 return;
1746
1747 rw_wlock(&pvh_global_lock);
1748 PMAP_LOCK(pmap);
1749
1750 /*
1751 * special handling of removing one page. a very common operation
1752 * and easy to short circuit some code.
1753 */
1754 if ((sva + PAGE_SIZE) == eva) {
1755 pmap_remove_page(pmap, sva);
1756 goto out;
1757 }
1758 for (; sva < eva; sva = va_next) {
1759 pdpe = pmap_segmap(pmap, sva);
1760 #ifdef __mips_n64
1761 if (*pdpe == 0) {
1762 va_next = (sva + NBSEG) & ~SEGMASK;
1763 if (va_next < sva)
1764 va_next = eva;
1765 continue;
1766 }
1767 #endif
1768 va_next = (sva + NBPDR) & ~PDRMASK;
1769 if (va_next < sva)
1770 va_next = eva;
1771
1772 pde = pmap_pdpe_to_pde(pdpe, sva);
1773 if (*pde == NULL)
1774 continue;
1775
1776 /*
1777 * Limit our scan to either the end of the va represented
1778 * by the current page table page, or to the end of the
1779 * range being removed.
1780 */
1781 if (va_next > eva)
1782 va_next = eva;
1783
1784 va = va_next;
1785 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1786 sva += PAGE_SIZE) {
1787 if (!pte_test(pte, PTE_V)) {
1788 if (va != va_next) {
1789 pmap_invalidate_range(pmap, va, sva);
1790 va = va_next;
1791 }
1792 continue;
1793 }
1794 if (va == va_next)
1795 va = sva;
1796 if (pmap_remove_pte(pmap, pte, sva, *pde)) {
1797 sva += PAGE_SIZE;
1798 break;
1799 }
1800 }
1801 if (va != va_next)
1802 pmap_invalidate_range(pmap, va, sva);
1803 }
1804 out:
1805 rw_wunlock(&pvh_global_lock);
1806 PMAP_UNLOCK(pmap);
1807 }
1808
1809 /*
1810 * Routine: pmap_remove_all
1811 * Function:
1812 * Removes this physical page from
1813 * all physical maps in which it resides.
1814 * Reflects back modify bits to the pager.
1815 *
1816 * Notes:
1817 * Original versions of this routine were very
1818 * inefficient because they iteratively called
1819 * pmap_remove (slow...)
1820 */
1821
1822 void
1823 pmap_remove_all(vm_page_t m)
1824 {
1825 pv_entry_t pv;
1826 pmap_t pmap;
1827 pd_entry_t *pde;
1828 pt_entry_t *pte, tpte;
1829
1830 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1831 ("pmap_remove_all: page %p is not managed", m));
1832 rw_wlock(&pvh_global_lock);
1833
1834 if (m->md.pv_flags & PV_TABLE_REF)
1835 vm_page_aflag_set(m, PGA_REFERENCED);
1836
1837 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1838 pmap = PV_PMAP(pv);
1839 PMAP_LOCK(pmap);
1840
1841 /*
1842 * If it's last mapping writeback all caches from
1843 * the page being destroyed
1844 */
1845 if (TAILQ_NEXT(pv, pv_list) == NULL)
1846 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1847
1848 pmap->pm_stats.resident_count--;
1849
1850 pde = pmap_pde(pmap, pv->pv_va);
1851 KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde"));
1852 pte = pmap_pde_to_pte(pde, pv->pv_va);
1853
1854 tpte = *pte;
1855 if (is_kernel_pmap(pmap))
1856 *pte = PTE_G;
1857 else
1858 *pte = 0;
1859
1860 if (pte_test(&tpte, PTE_W))
1861 pmap->pm_stats.wired_count--;
1862
1863 /*
1864 * Update the vm_page_t clean and reference bits.
1865 */
1866 if (pte_test(&tpte, PTE_D)) {
1867 KASSERT(!pte_test(&tpte, PTE_RO),
1868 ("%s: modified page not writable: va: %p, pte: %#jx",
1869 __func__, (void *)pv->pv_va, (uintmax_t)tpte));
1870 vm_page_dirty(m);
1871 }
1872 pmap_invalidate_page(pmap, pv->pv_va);
1873
1874 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1875 pmap_unuse_pt(pmap, pv->pv_va, *pde);
1876 free_pv_entry(pmap, pv);
1877 PMAP_UNLOCK(pmap);
1878 }
1879
1880 vm_page_aflag_clear(m, PGA_WRITEABLE);
1881 m->md.pv_flags &= ~PV_TABLE_REF;
1882 rw_wunlock(&pvh_global_lock);
1883 }
1884
1885 /*
1886 * Set the physical protection on the
1887 * specified range of this map as requested.
1888 */
1889 void
1890 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1891 {
1892 pt_entry_t pbits, *pte;
1893 pd_entry_t *pde, *pdpe;
1894 vm_offset_t va, va_next;
1895 vm_paddr_t pa;
1896 vm_page_t m;
1897
1898 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1899 pmap_remove(pmap, sva, eva);
1900 return;
1901 }
1902 if (prot & VM_PROT_WRITE)
1903 return;
1904
1905 PMAP_LOCK(pmap);
1906 for (; sva < eva; sva = va_next) {
1907 pdpe = pmap_segmap(pmap, sva);
1908 #ifdef __mips_n64
1909 if (*pdpe == 0) {
1910 va_next = (sva + NBSEG) & ~SEGMASK;
1911 if (va_next < sva)
1912 va_next = eva;
1913 continue;
1914 }
1915 #endif
1916 va_next = (sva + NBPDR) & ~PDRMASK;
1917 if (va_next < sva)
1918 va_next = eva;
1919
1920 pde = pmap_pdpe_to_pde(pdpe, sva);
1921 if (*pde == NULL)
1922 continue;
1923
1924 /*
1925 * Limit our scan to either the end of the va represented
1926 * by the current page table page, or to the end of the
1927 * range being write protected.
1928 */
1929 if (va_next > eva)
1930 va_next = eva;
1931
1932 va = va_next;
1933 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1934 sva += PAGE_SIZE) {
1935 pbits = *pte;
1936 if (!pte_test(&pbits, PTE_V) || pte_test(&pbits,
1937 PTE_RO)) {
1938 if (va != va_next) {
1939 pmap_invalidate_range(pmap, va, sva);
1940 va = va_next;
1941 }
1942 continue;
1943 }
1944 pte_set(&pbits, PTE_RO);
1945 if (pte_test(&pbits, PTE_D)) {
1946 pte_clear(&pbits, PTE_D);
1947 if (pte_test(&pbits, PTE_MANAGED)) {
1948 pa = TLBLO_PTE_TO_PA(pbits);
1949 m = PHYS_TO_VM_PAGE(pa);
1950 vm_page_dirty(m);
1951 }
1952 if (va == va_next)
1953 va = sva;
1954 } else {
1955 /*
1956 * Unless PTE_D is set, any TLB entries
1957 * mapping "sva" don't allow write access, so
1958 * they needn't be invalidated.
1959 */
1960 if (va != va_next) {
1961 pmap_invalidate_range(pmap, va, sva);
1962 va = va_next;
1963 }
1964 }
1965 *pte = pbits;
1966 }
1967 if (va != va_next)
1968 pmap_invalidate_range(pmap, va, sva);
1969 }
1970 PMAP_UNLOCK(pmap);
1971 }
1972
1973 /*
1974 * Insert the given physical page (p) at
1975 * the specified virtual address (v) in the
1976 * target physical map with the protection requested.
1977 *
1978 * If specified, the page will be wired down, meaning
1979 * that the related pte can not be reclaimed.
1980 *
1981 * NB: This is the only routine which MAY NOT lazy-evaluate
1982 * or lose information. That is, this routine must actually
1983 * insert this page into the given map NOW.
1984 */
1985 int
1986 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1987 u_int flags, int8_t psind __unused)
1988 {
1989 vm_paddr_t pa, opa;
1990 pt_entry_t *pte;
1991 pt_entry_t origpte, newpte;
1992 pv_entry_t pv;
1993 vm_page_t mpte, om;
1994
1995 va &= ~PAGE_MASK;
1996 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1997 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
1998 va >= kmi.clean_eva,
1999 ("pmap_enter: managed mapping within the clean submap"));
2000 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2001 VM_OBJECT_ASSERT_LOCKED(m->object);
2002 pa = VM_PAGE_TO_PHYS(m);
2003 newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, flags, prot);
2004 if ((flags & PMAP_ENTER_WIRED) != 0)
2005 newpte |= PTE_W;
2006 if (is_kernel_pmap(pmap))
2007 newpte |= PTE_G;
2008 if (is_cacheable_mem(pa))
2009 newpte |= PTE_C_CACHE;
2010 else
2011 newpte |= PTE_C_UNCACHED;
2012
2013 mpte = NULL;
2014
2015 rw_wlock(&pvh_global_lock);
2016 PMAP_LOCK(pmap);
2017
2018 /*
2019 * In the case that a page table page is not resident, we are
2020 * creating it here.
2021 */
2022 if (va < VM_MAXUSER_ADDRESS) {
2023 mpte = pmap_allocpte(pmap, va, flags);
2024 if (mpte == NULL) {
2025 KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
2026 ("pmap_allocpte failed with sleep allowed"));
2027 rw_wunlock(&pvh_global_lock);
2028 PMAP_UNLOCK(pmap);
2029 return (KERN_RESOURCE_SHORTAGE);
2030 }
2031 }
2032 pte = pmap_pte(pmap, va);
2033
2034 /*
2035 * Page Directory table entry not valid, we need a new PT page
2036 */
2037 if (pte == NULL) {
2038 panic("pmap_enter: invalid page directory, pdir=%p, va=%p",
2039 (void *)pmap->pm_segtab, (void *)va);
2040 }
2041 om = NULL;
2042 origpte = *pte;
2043 opa = TLBLO_PTE_TO_PA(origpte);
2044
2045 /*
2046 * Mapping has not changed, must be protection or wiring change.
2047 */
2048 if (pte_test(&origpte, PTE_V) && opa == pa) {
2049 /*
2050 * Wiring change, just update stats. We don't worry about
2051 * wiring PT pages as they remain resident as long as there
2052 * are valid mappings in them. Hence, if a user page is
2053 * wired, the PT page will be also.
2054 */
2055 if (pte_test(&newpte, PTE_W) && !pte_test(&origpte, PTE_W))
2056 pmap->pm_stats.wired_count++;
2057 else if (!pte_test(&newpte, PTE_W) && pte_test(&origpte,
2058 PTE_W))
2059 pmap->pm_stats.wired_count--;
2060
2061 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO),
2062 ("%s: modified page not writable: va: %p, pte: %#jx",
2063 __func__, (void *)va, (uintmax_t)origpte));
2064
2065 /*
2066 * Remove extra pte reference
2067 */
2068 if (mpte)
2069 mpte->wire_count--;
2070
2071 if (pte_test(&origpte, PTE_MANAGED)) {
2072 m->md.pv_flags |= PV_TABLE_REF;
2073 om = m;
2074 newpte |= PTE_MANAGED;
2075 if (!pte_test(&newpte, PTE_RO))
2076 vm_page_aflag_set(m, PGA_WRITEABLE);
2077 }
2078 goto validate;
2079 }
2080
2081 pv = NULL;
2082
2083 /*
2084 * Mapping has changed, invalidate old range and fall through to
2085 * handle validating new mapping.
2086 */
2087 if (opa) {
2088 if (pte_test(&origpte, PTE_W))
2089 pmap->pm_stats.wired_count--;
2090
2091 if (pte_test(&origpte, PTE_MANAGED)) {
2092 om = PHYS_TO_VM_PAGE(opa);
2093 pv = pmap_pvh_remove(&om->md, pmap, va);
2094 }
2095 if (mpte != NULL) {
2096 mpte->wire_count--;
2097 KASSERT(mpte->wire_count > 0,
2098 ("pmap_enter: missing reference to page table page,"
2099 " va: %p", (void *)va));
2100 }
2101 } else
2102 pmap->pm_stats.resident_count++;
2103
2104 /*
2105 * Enter on the PV list if part of our managed memory.
2106 */
2107 if ((m->oflags & VPO_UNMANAGED) == 0) {
2108 m->md.pv_flags |= PV_TABLE_REF;
2109 if (pv == NULL)
2110 pv = get_pv_entry(pmap, FALSE);
2111 pv->pv_va = va;
2112 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2113 newpte |= PTE_MANAGED;
2114 if (!pte_test(&newpte, PTE_RO))
2115 vm_page_aflag_set(m, PGA_WRITEABLE);
2116 } else if (pv != NULL)
2117 free_pv_entry(pmap, pv);
2118
2119 /*
2120 * Increment counters
2121 */
2122 if (pte_test(&newpte, PTE_W))
2123 pmap->pm_stats.wired_count++;
2124
2125 validate:
2126
2127 #ifdef PMAP_DEBUG
2128 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa);
2129 #endif
2130
2131 /*
2132 * if the mapping or permission bits are different, we need to
2133 * update the pte.
2134 */
2135 if (origpte != newpte) {
2136 *pte = newpte;
2137 if (pte_test(&origpte, PTE_V)) {
2138 if (pte_test(&origpte, PTE_MANAGED) && opa != pa) {
2139 if (om->md.pv_flags & PV_TABLE_REF)
2140 vm_page_aflag_set(om, PGA_REFERENCED);
2141 om->md.pv_flags &= ~PV_TABLE_REF;
2142 }
2143 if (pte_test(&origpte, PTE_D)) {
2144 KASSERT(!pte_test(&origpte, PTE_RO),
2145 ("pmap_enter: modified page not writable:"
2146 " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte));
2147 if (pte_test(&origpte, PTE_MANAGED))
2148 vm_page_dirty(om);
2149 }
2150 if (pte_test(&origpte, PTE_MANAGED) &&
2151 TAILQ_EMPTY(&om->md.pv_list))
2152 vm_page_aflag_clear(om, PGA_WRITEABLE);
2153 pmap_update_page(pmap, va, newpte);
2154 }
2155 }
2156
2157 /*
2158 * Sync I & D caches for executable pages. Do this only if the
2159 * target pmap belongs to the current process. Otherwise, an
2160 * unresolvable TLB miss may occur.
2161 */
2162 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
2163 (prot & VM_PROT_EXECUTE)) {
2164 mips_icache_sync_range(va, PAGE_SIZE);
2165 mips_dcache_wbinv_range(va, PAGE_SIZE);
2166 }
2167 rw_wunlock(&pvh_global_lock);
2168 PMAP_UNLOCK(pmap);
2169 return (KERN_SUCCESS);
2170 }
2171
2172 /*
2173 * this code makes some *MAJOR* assumptions:
2174 * 1. Current pmap & pmap exists.
2175 * 2. Not wired.
2176 * 3. Read access.
2177 * 4. No page table pages.
2178 * but is *MUCH* faster than pmap_enter...
2179 */
2180
2181 void
2182 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2183 {
2184
2185 rw_wlock(&pvh_global_lock);
2186 PMAP_LOCK(pmap);
2187 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
2188 rw_wunlock(&pvh_global_lock);
2189 PMAP_UNLOCK(pmap);
2190 }
2191
2192 static vm_page_t
2193 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2194 vm_prot_t prot, vm_page_t mpte)
2195 {
2196 pt_entry_t *pte;
2197 vm_paddr_t pa;
2198
2199 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2200 (m->oflags & VPO_UNMANAGED) != 0,
2201 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2202 rw_assert(&pvh_global_lock, RA_WLOCKED);
2203 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2204
2205 /*
2206 * In the case that a page table page is not resident, we are
2207 * creating it here.
2208 */
2209 if (va < VM_MAXUSER_ADDRESS) {
2210 pd_entry_t *pde;
2211 unsigned ptepindex;
2212
2213 /*
2214 * Calculate pagetable page index
2215 */
2216 ptepindex = pmap_pde_pindex(va);
2217 if (mpte && (mpte->pindex == ptepindex)) {
2218 mpte->wire_count++;
2219 } else {
2220 /*
2221 * Get the page directory entry
2222 */
2223 pde = pmap_pde(pmap, va);
2224
2225 /*
2226 * If the page table page is mapped, we just
2227 * increment the hold count, and activate it.
2228 */
2229 if (pde && *pde != 0) {
2230 mpte = PHYS_TO_VM_PAGE(
2231 MIPS_DIRECT_TO_PHYS(*pde));
2232 mpte->wire_count++;
2233 } else {
2234 mpte = _pmap_allocpte(pmap, ptepindex,
2235 PMAP_ENTER_NOSLEEP);
2236 if (mpte == NULL)
2237 return (mpte);
2238 }
2239 }
2240 } else {
2241 mpte = NULL;
2242 }
2243
2244 pte = pmap_pte(pmap, va);
2245 if (pte_test(pte, PTE_V)) {
2246 if (mpte != NULL) {
2247 mpte->wire_count--;
2248 mpte = NULL;
2249 }
2250 return (mpte);
2251 }
2252
2253 /*
2254 * Enter on the PV list if part of our managed memory.
2255 */
2256 if ((m->oflags & VPO_UNMANAGED) == 0 &&
2257 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2258 if (mpte != NULL) {
2259 pmap_unwire_ptp(pmap, va, mpte);
2260 mpte = NULL;
2261 }
2262 return (mpte);
2263 }
2264
2265 /*
2266 * Increment counters
2267 */
2268 pmap->pm_stats.resident_count++;
2269
2270 pa = VM_PAGE_TO_PHYS(m);
2271
2272 /*
2273 * Now validate mapping with RO protection
2274 */
2275 *pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V;
2276 if ((m->oflags & VPO_UNMANAGED) == 0)
2277 *pte |= PTE_MANAGED;
2278
2279 if (is_cacheable_mem(pa))
2280 *pte |= PTE_C_CACHE;
2281 else
2282 *pte |= PTE_C_UNCACHED;
2283
2284 if (is_kernel_pmap(pmap))
2285 *pte |= PTE_G;
2286 else {
2287 /*
2288 * Sync I & D caches. Do this only if the target pmap
2289 * belongs to the current process. Otherwise, an
2290 * unresolvable TLB miss may occur. */
2291 if (pmap == &curproc->p_vmspace->vm_pmap) {
2292 va &= ~PAGE_MASK;
2293 mips_icache_sync_range(va, PAGE_SIZE);
2294 mips_dcache_wbinv_range(va, PAGE_SIZE);
2295 }
2296 }
2297 return (mpte);
2298 }
2299
2300 /*
2301 * Make a temporary mapping for a physical address. This is only intended
2302 * to be used for panic dumps.
2303 *
2304 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2305 */
2306 void *
2307 pmap_kenter_temporary(vm_paddr_t pa, int i)
2308 {
2309 vm_offset_t va;
2310
2311 if (i != 0)
2312 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2313 __func__);
2314
2315 if (MIPS_DIRECT_MAPPABLE(pa)) {
2316 va = MIPS_PHYS_TO_DIRECT(pa);
2317 } else {
2318 #ifndef __mips_n64 /* XXX : to be converted to new style */
2319 int cpu;
2320 register_t intr;
2321 struct local_sysmaps *sysm;
2322 pt_entry_t *pte, npte;
2323
2324 /* If this is used other than for dumps, we may need to leave
2325 * interrupts disasbled on return. If crash dumps don't work when
2326 * we get to this point, we might want to consider this (leaving things
2327 * disabled as a starting point ;-)
2328 */
2329 intr = intr_disable();
2330 cpu = PCPU_GET(cpuid);
2331 sysm = &sysmap_lmem[cpu];
2332 /* Since this is for the debugger, no locks or any other fun */
2333 npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V |
2334 PTE_G;
2335 pte = pmap_pte(kernel_pmap, sysm->base);
2336 *pte = npte;
2337 sysm->valid1 = 1;
2338 pmap_update_page(kernel_pmap, sysm->base, npte);
2339 va = sysm->base;
2340 intr_restore(intr);
2341 #endif
2342 }
2343 return ((void *)va);
2344 }
2345
2346 void
2347 pmap_kenter_temporary_free(vm_paddr_t pa)
2348 {
2349 #ifndef __mips_n64 /* XXX : to be converted to new style */
2350 int cpu;
2351 register_t intr;
2352 struct local_sysmaps *sysm;
2353 #endif
2354
2355 if (MIPS_DIRECT_MAPPABLE(pa)) {
2356 /* nothing to do for this case */
2357 return;
2358 }
2359 #ifndef __mips_n64 /* XXX : to be converted to new style */
2360 cpu = PCPU_GET(cpuid);
2361 sysm = &sysmap_lmem[cpu];
2362 if (sysm->valid1) {
2363 pt_entry_t *pte;
2364
2365 intr = intr_disable();
2366 pte = pmap_pte(kernel_pmap, sysm->base);
2367 *pte = PTE_G;
2368 pmap_invalidate_page(kernel_pmap, sysm->base);
2369 intr_restore(intr);
2370 sysm->valid1 = 0;
2371 }
2372 #endif
2373 }
2374
2375 /*
2376 * Maps a sequence of resident pages belonging to the same object.
2377 * The sequence begins with the given page m_start. This page is
2378 * mapped at the given virtual address start. Each subsequent page is
2379 * mapped at a virtual address that is offset from start by the same
2380 * amount as the page is offset from m_start within the object. The
2381 * last page in the sequence is the page with the largest offset from
2382 * m_start that can be mapped at a virtual address less than the given
2383 * virtual address end. Not every virtual page between start and end
2384 * is mapped; only those for which a resident page exists with the
2385 * corresponding offset from m_start are mapped.
2386 */
2387 void
2388 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2389 vm_page_t m_start, vm_prot_t prot)
2390 {
2391 vm_page_t m, mpte;
2392 vm_pindex_t diff, psize;
2393
2394 VM_OBJECT_ASSERT_LOCKED(m_start->object);
2395
2396 psize = atop(end - start);
2397 mpte = NULL;
2398 m = m_start;
2399 rw_wlock(&pvh_global_lock);
2400 PMAP_LOCK(pmap);
2401 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2402 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2403 prot, mpte);
2404 m = TAILQ_NEXT(m, listq);
2405 }
2406 rw_wunlock(&pvh_global_lock);
2407 PMAP_UNLOCK(pmap);
2408 }
2409
2410 /*
2411 * pmap_object_init_pt preloads the ptes for a given object
2412 * into the specified pmap. This eliminates the blast of soft
2413 * faults on process startup and immediately after an mmap.
2414 */
2415 void
2416 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2417 vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2418 {
2419 VM_OBJECT_ASSERT_WLOCKED(object);
2420 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2421 ("pmap_object_init_pt: non-device object"));
2422 }
2423
2424 /*
2425 * Clear the wired attribute from the mappings for the specified range of
2426 * addresses in the given pmap. Every valid mapping within that range
2427 * must have the wired attribute set. In contrast, invalid mappings
2428 * cannot have the wired attribute set, so they are ignored.
2429 *
2430 * The wired attribute of the page table entry is not a hardware feature,
2431 * so there is no need to invalidate any TLB entries.
2432 */
2433 void
2434 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2435 {
2436 pd_entry_t *pde, *pdpe;
2437 pt_entry_t *pte;
2438 vm_offset_t va_next;
2439
2440 PMAP_LOCK(pmap);
2441 for (; sva < eva; sva = va_next) {
2442 pdpe = pmap_segmap(pmap, sva);
2443 #ifdef __mips_n64
2444 if (*pdpe == NULL) {
2445 va_next = (sva + NBSEG) & ~SEGMASK;
2446 if (va_next < sva)
2447 va_next = eva;
2448 continue;
2449 }
2450 #endif
2451 va_next = (sva + NBPDR) & ~PDRMASK;
2452 if (va_next < sva)
2453 va_next = eva;
2454 pde = pmap_pdpe_to_pde(pdpe, sva);
2455 if (*pde == NULL)
2456 continue;
2457 if (va_next > eva)
2458 va_next = eva;
2459 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
2460 sva += PAGE_SIZE) {
2461 if (!pte_test(pte, PTE_V))
2462 continue;
2463 if (!pte_test(pte, PTE_W))
2464 panic("pmap_unwire: pte %#jx is missing PG_W",
2465 (uintmax_t)*pte);
2466 pte_clear(pte, PTE_W);
2467 pmap->pm_stats.wired_count--;
2468 }
2469 }
2470 PMAP_UNLOCK(pmap);
2471 }
2472
2473 /*
2474 * Copy the range specified by src_addr/len
2475 * from the source map to the range dst_addr/len
2476 * in the destination map.
2477 *
2478 * This routine is only advisory and need not do anything.
2479 */
2480
2481 void
2482 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2483 vm_size_t len, vm_offset_t src_addr)
2484 {
2485 }
2486
2487 /*
2488 * pmap_zero_page zeros the specified hardware page by mapping
2489 * the page into KVM and using bzero to clear its contents.
2490 *
2491 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2492 */
2493 void
2494 pmap_zero_page(vm_page_t m)
2495 {
2496 vm_offset_t va;
2497 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2498
2499 if (MIPS_DIRECT_MAPPABLE(phys)) {
2500 va = MIPS_PHYS_TO_DIRECT(phys);
2501 bzero((caddr_t)va, PAGE_SIZE);
2502 mips_dcache_wbinv_range(va, PAGE_SIZE);
2503 } else {
2504 va = pmap_lmem_map1(phys);
2505 bzero((caddr_t)va, PAGE_SIZE);
2506 mips_dcache_wbinv_range(va, PAGE_SIZE);
2507 pmap_lmem_unmap();
2508 }
2509 }
2510
2511 /*
2512 * pmap_zero_page_area zeros the specified hardware page by mapping
2513 * the page into KVM and using bzero to clear its contents.
2514 *
2515 * off and size may not cover an area beyond a single hardware page.
2516 */
2517 void
2518 pmap_zero_page_area(vm_page_t m, int off, int size)
2519 {
2520 vm_offset_t va;
2521 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2522
2523 if (MIPS_DIRECT_MAPPABLE(phys)) {
2524 va = MIPS_PHYS_TO_DIRECT(phys);
2525 bzero((char *)(caddr_t)va + off, size);
2526 mips_dcache_wbinv_range(va + off, size);
2527 } else {
2528 va = pmap_lmem_map1(phys);
2529 bzero((char *)va + off, size);
2530 mips_dcache_wbinv_range(va + off, size);
2531 pmap_lmem_unmap();
2532 }
2533 }
2534
2535 void
2536 pmap_zero_page_idle(vm_page_t m)
2537 {
2538 vm_offset_t va;
2539 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2540
2541 if (MIPS_DIRECT_MAPPABLE(phys)) {
2542 va = MIPS_PHYS_TO_DIRECT(phys);
2543 bzero((caddr_t)va, PAGE_SIZE);
2544 mips_dcache_wbinv_range(va, PAGE_SIZE);
2545 } else {
2546 va = pmap_lmem_map1(phys);
2547 bzero((caddr_t)va, PAGE_SIZE);
2548 mips_dcache_wbinv_range(va, PAGE_SIZE);
2549 pmap_lmem_unmap();
2550 }
2551 }
2552
2553 /*
2554 * pmap_copy_page copies the specified (machine independent)
2555 * page by mapping the page into virtual memory and using
2556 * bcopy to copy the page, one machine dependent page at a
2557 * time.
2558 *
2559 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2560 */
2561 void
2562 pmap_copy_page(vm_page_t src, vm_page_t dst)
2563 {
2564 vm_offset_t va_src, va_dst;
2565 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src);
2566 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst);
2567
2568 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) {
2569 /* easy case, all can be accessed via KSEG0 */
2570 /*
2571 * Flush all caches for VA that are mapped to this page
2572 * to make sure that data in SDRAM is up to date
2573 */
2574 pmap_flush_pvcache(src);
2575 mips_dcache_wbinv_range_index(
2576 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE);
2577 va_src = MIPS_PHYS_TO_DIRECT(phys_src);
2578 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst);
2579 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2580 mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2581 } else {
2582 va_src = pmap_lmem_map2(phys_src, phys_dst);
2583 va_dst = va_src + PAGE_SIZE;
2584 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2585 mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2586 pmap_lmem_unmap();
2587 }
2588 }
2589
2590 int unmapped_buf_allowed;
2591
2592 void
2593 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2594 vm_offset_t b_offset, int xfersize)
2595 {
2596 char *a_cp, *b_cp;
2597 vm_page_t a_m, b_m;
2598 vm_offset_t a_pg_offset, b_pg_offset;
2599 vm_paddr_t a_phys, b_phys;
2600 int cnt;
2601
2602 while (xfersize > 0) {
2603 a_pg_offset = a_offset & PAGE_MASK;
2604 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2605 a_m = ma[a_offset >> PAGE_SHIFT];
2606 a_phys = VM_PAGE_TO_PHYS(a_m);
2607 b_pg_offset = b_offset & PAGE_MASK;
2608 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2609 b_m = mb[b_offset >> PAGE_SHIFT];
2610 b_phys = VM_PAGE_TO_PHYS(b_m);
2611 if (MIPS_DIRECT_MAPPABLE(a_phys) &&
2612 MIPS_DIRECT_MAPPABLE(b_phys)) {
2613 pmap_flush_pvcache(a_m);
2614 mips_dcache_wbinv_range_index(
2615 MIPS_PHYS_TO_DIRECT(b_phys), PAGE_SIZE);
2616 a_cp = (char *)MIPS_PHYS_TO_DIRECT(a_phys) +
2617 a_pg_offset;
2618 b_cp = (char *)MIPS_PHYS_TO_DIRECT(b_phys) +
2619 b_pg_offset;
2620 bcopy(a_cp, b_cp, cnt);
2621 mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
2622 } else {
2623 a_cp = (char *)pmap_lmem_map2(a_phys, b_phys);
2624 b_cp = (char *)a_cp + PAGE_SIZE;
2625 a_cp += a_pg_offset;
2626 b_cp += b_pg_offset;
2627 bcopy(a_cp, b_cp, cnt);
2628 mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
2629 pmap_lmem_unmap();
2630 }
2631 a_offset += cnt;
2632 b_offset += cnt;
2633 xfersize -= cnt;
2634 }
2635 }
2636
2637 /*
2638 * Returns true if the pmap's pv is one of the first
2639 * 16 pvs linked to from this page. This count may
2640 * be changed upwards or downwards in the future; it
2641 * is only necessary that true be returned for a small
2642 * subset of pmaps for proper page aging.
2643 */
2644 boolean_t
2645 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2646 {
2647 pv_entry_t pv;
2648 int loops = 0;
2649 boolean_t rv;
2650
2651 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2652 ("pmap_page_exists_quick: page %p is not managed", m));
2653 rv = FALSE;
2654 rw_wlock(&pvh_global_lock);
2655 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2656 if (PV_PMAP(pv) == pmap) {
2657 rv = TRUE;
2658 break;
2659 }
2660 loops++;
2661 if (loops >= 16)
2662 break;
2663 }
2664 rw_wunlock(&pvh_global_lock);
2665 return (rv);
2666 }
2667
2668 /*
2669 * Remove all pages from specified address space
2670 * this aids process exit speeds. Also, this code
2671 * is special cased for current process only, but
2672 * can have the more generic (and slightly slower)
2673 * mode enabled. This is much faster than pmap_remove
2674 * in the case of running down an entire address space.
2675 */
2676 void
2677 pmap_remove_pages(pmap_t pmap)
2678 {
2679 pd_entry_t *pde;
2680 pt_entry_t *pte, tpte;
2681 pv_entry_t pv;
2682 vm_page_t m;
2683 struct pv_chunk *pc, *npc;
2684 u_long inuse, bitmask;
2685 int allfree, bit, field, idx;
2686
2687 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2688 printf("warning: pmap_remove_pages called with non-current pmap\n");
2689 return;
2690 }
2691 rw_wlock(&pvh_global_lock);
2692 PMAP_LOCK(pmap);
2693 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2694 allfree = 1;
2695 for (field = 0; field < _NPCM; field++) {
2696 inuse = ~pc->pc_map[field] & pc_freemask[field];
2697 while (inuse != 0) {
2698 bit = ffsl(inuse) - 1;
2699 bitmask = 1UL << bit;
2700 idx = field * sizeof(inuse) * NBBY + bit;
2701 pv = &pc->pc_pventry[idx];
2702 inuse &= ~bitmask;
2703
2704 pde = pmap_pde(pmap, pv->pv_va);
2705 KASSERT(pde != NULL && *pde != 0,
2706 ("pmap_remove_pages: pde"));
2707 pte = pmap_pde_to_pte(pde, pv->pv_va);
2708 if (!pte_test(pte, PTE_V))
2709 panic("pmap_remove_pages: bad pte");
2710 tpte = *pte;
2711
2712 /*
2713 * We cannot remove wired pages from a process' mapping at this time
2714 */
2715 if (pte_test(&tpte, PTE_W)) {
2716 allfree = 0;
2717 continue;
2718 }
2719 *pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2720
2721 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2722 KASSERT(m != NULL,
2723 ("pmap_remove_pages: bad tpte %#jx",
2724 (uintmax_t)tpte));
2725
2726 /*
2727 * Update the vm_page_t clean and reference bits.
2728 */
2729 if (pte_test(&tpte, PTE_D))
2730 vm_page_dirty(m);
2731
2732 /* Mark free */
2733 PV_STAT(pv_entry_frees++);
2734 PV_STAT(pv_entry_spare++);
2735 pv_entry_count--;
2736 pc->pc_map[field] |= bitmask;
2737 pmap->pm_stats.resident_count--;
2738 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2739 if (TAILQ_EMPTY(&m->md.pv_list))
2740 vm_page_aflag_clear(m, PGA_WRITEABLE);
2741 pmap_unuse_pt(pmap, pv->pv_va, *pde);
2742 }
2743 }
2744 if (allfree) {
2745 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2746 free_pv_chunk(pc);
2747 }
2748 }
2749 pmap_invalidate_all(pmap);
2750 PMAP_UNLOCK(pmap);
2751 rw_wunlock(&pvh_global_lock);
2752 }
2753
2754 /*
2755 * pmap_testbit tests bits in pte's
2756 */
2757 static boolean_t
2758 pmap_testbit(vm_page_t m, int bit)
2759 {
2760 pv_entry_t pv;
2761 pmap_t pmap;
2762 pt_entry_t *pte;
2763 boolean_t rv = FALSE;
2764
2765 if (m->oflags & VPO_UNMANAGED)
2766 return (rv);
2767
2768 rw_assert(&pvh_global_lock, RA_WLOCKED);
2769 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2770 pmap = PV_PMAP(pv);
2771 PMAP_LOCK(pmap);
2772 pte = pmap_pte(pmap, pv->pv_va);
2773 rv = pte_test(pte, bit);
2774 PMAP_UNLOCK(pmap);
2775 if (rv)
2776 break;
2777 }
2778 return (rv);
2779 }
2780
2781 /*
2782 * pmap_page_wired_mappings:
2783 *
2784 * Return the number of managed mappings to the given physical page
2785 * that are wired.
2786 */
2787 int
2788 pmap_page_wired_mappings(vm_page_t m)
2789 {
2790 pv_entry_t pv;
2791 pmap_t pmap;
2792 pt_entry_t *pte;
2793 int count;
2794
2795 count = 0;
2796 if ((m->oflags & VPO_UNMANAGED) != 0)
2797 return (count);
2798 rw_wlock(&pvh_global_lock);
2799 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2800 pmap = PV_PMAP(pv);
2801 PMAP_LOCK(pmap);
2802 pte = pmap_pte(pmap, pv->pv_va);
2803 if (pte_test(pte, PTE_W))
2804 count++;
2805 PMAP_UNLOCK(pmap);
2806 }
2807 rw_wunlock(&pvh_global_lock);
2808 return (count);
2809 }
2810
2811 /*
2812 * Clear the write and modified bits in each of the given page's mappings.
2813 */
2814 void
2815 pmap_remove_write(vm_page_t m)
2816 {
2817 pmap_t pmap;
2818 pt_entry_t pbits, *pte;
2819 pv_entry_t pv;
2820
2821 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2822 ("pmap_remove_write: page %p is not managed", m));
2823
2824 /*
2825 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2826 * set by another thread while the object is locked. Thus,
2827 * if PGA_WRITEABLE is clear, no page table entries need updating.
2828 */
2829 VM_OBJECT_ASSERT_WLOCKED(m->object);
2830 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2831 return;
2832 rw_wlock(&pvh_global_lock);
2833 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2834 pmap = PV_PMAP(pv);
2835 PMAP_LOCK(pmap);
2836 pte = pmap_pte(pmap, pv->pv_va);
2837 KASSERT(pte != NULL && pte_test(pte, PTE_V),
2838 ("page on pv_list has no pte"));
2839 pbits = *pte;
2840 if (pte_test(&pbits, PTE_D)) {
2841 pte_clear(&pbits, PTE_D);
2842 vm_page_dirty(m);
2843 }
2844 pte_set(&pbits, PTE_RO);
2845 if (pbits != *pte) {
2846 *pte = pbits;
2847 pmap_update_page(pmap, pv->pv_va, pbits);
2848 }
2849 PMAP_UNLOCK(pmap);
2850 }
2851 vm_page_aflag_clear(m, PGA_WRITEABLE);
2852 rw_wunlock(&pvh_global_lock);
2853 }
2854
2855 /*
2856 * pmap_ts_referenced:
2857 *
2858 * Return the count of reference bits for a page, clearing all of them.
2859 */
2860 int
2861 pmap_ts_referenced(vm_page_t m)
2862 {
2863
2864 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2865 ("pmap_ts_referenced: page %p is not managed", m));
2866 if (m->md.pv_flags & PV_TABLE_REF) {
2867 rw_wlock(&pvh_global_lock);
2868 m->md.pv_flags &= ~PV_TABLE_REF;
2869 rw_wunlock(&pvh_global_lock);
2870 return (1);
2871 }
2872 return (0);
2873 }
2874
2875 /*
2876 * pmap_is_modified:
2877 *
2878 * Return whether or not the specified physical page was modified
2879 * in any physical maps.
2880 */
2881 boolean_t
2882 pmap_is_modified(vm_page_t m)
2883 {
2884 boolean_t rv;
2885
2886 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2887 ("pmap_is_modified: page %p is not managed", m));
2888
2889 /*
2890 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2891 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
2892 * is clear, no PTEs can have PTE_D set.
2893 */
2894 VM_OBJECT_ASSERT_WLOCKED(m->object);
2895 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2896 return (FALSE);
2897 rw_wlock(&pvh_global_lock);
2898 rv = pmap_testbit(m, PTE_D);
2899 rw_wunlock(&pvh_global_lock);
2900 return (rv);
2901 }
2902
2903 /* N/C */
2904
2905 /*
2906 * pmap_is_prefaultable:
2907 *
2908 * Return whether or not the specified virtual address is elgible
2909 * for prefault.
2910 */
2911 boolean_t
2912 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2913 {
2914 pd_entry_t *pde;
2915 pt_entry_t *pte;
2916 boolean_t rv;
2917
2918 rv = FALSE;
2919 PMAP_LOCK(pmap);
2920 pde = pmap_pde(pmap, addr);
2921 if (pde != NULL && *pde != 0) {
2922 pte = pmap_pde_to_pte(pde, addr);
2923 rv = (*pte == 0);
2924 }
2925 PMAP_UNLOCK(pmap);
2926 return (rv);
2927 }
2928
2929 /*
2930 * Apply the given advice to the specified range of addresses within the
2931 * given pmap. Depending on the advice, clear the referenced and/or
2932 * modified flags in each mapping and set the mapped page's dirty field.
2933 */
2934 void
2935 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2936 {
2937 pd_entry_t *pde, *pdpe;
2938 pt_entry_t *pte;
2939 vm_offset_t va, va_next;
2940 vm_paddr_t pa;
2941 vm_page_t m;
2942
2943 if (advice != MADV_DONTNEED && advice != MADV_FREE)
2944 return;
2945 rw_wlock(&pvh_global_lock);
2946 PMAP_LOCK(pmap);
2947 for (; sva < eva; sva = va_next) {
2948 pdpe = pmap_segmap(pmap, sva);
2949 #ifdef __mips_n64
2950 if (*pdpe == 0) {
2951 va_next = (sva + NBSEG) & ~SEGMASK;
2952 if (va_next < sva)
2953 va_next = eva;
2954 continue;
2955 }
2956 #endif
2957 va_next = (sva + NBPDR) & ~PDRMASK;
2958 if (va_next < sva)
2959 va_next = eva;
2960
2961 pde = pmap_pdpe_to_pde(pdpe, sva);
2962 if (*pde == NULL)
2963 continue;
2964
2965 /*
2966 * Limit our scan to either the end of the va represented
2967 * by the current page table page, or to the end of the
2968 * range being write protected.
2969 */
2970 if (va_next > eva)
2971 va_next = eva;
2972
2973 va = va_next;
2974 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
2975 sva += PAGE_SIZE) {
2976 if (!pte_test(pte, PTE_MANAGED | PTE_V)) {
2977 if (va != va_next) {
2978 pmap_invalidate_range(pmap, va, sva);
2979 va = va_next;
2980 }
2981 continue;
2982 }
2983 pa = TLBLO_PTE_TO_PA(*pte);
2984 m = PHYS_TO_VM_PAGE(pa);
2985 m->md.pv_flags &= ~PV_TABLE_REF;
2986 if (pte_test(pte, PTE_D)) {
2987 if (advice == MADV_DONTNEED) {
2988 /*
2989 * Future calls to pmap_is_modified()
2990 * can be avoided by making the page
2991 * dirty now.
2992 */
2993 vm_page_dirty(m);
2994 } else {
2995 pte_clear(pte, PTE_D);
2996 if (va == va_next)
2997 va = sva;
2998 }
2999 } else {
3000 /*
3001 * Unless PTE_D is set, any TLB entries
3002 * mapping "sva" don't allow write access, so
3003 * they needn't be invalidated.
3004 */
3005 if (va != va_next) {
3006 pmap_invalidate_range(pmap, va, sva);
3007 va = va_next;
3008 }
3009 }
3010 }
3011 if (va != va_next)
3012 pmap_invalidate_range(pmap, va, sva);
3013 }
3014 rw_wunlock(&pvh_global_lock);
3015 PMAP_UNLOCK(pmap);
3016 }
3017
3018 /*
3019 * Clear the modify bits on the specified physical page.
3020 */
3021 void
3022 pmap_clear_modify(vm_page_t m)
3023 {
3024 pmap_t pmap;
3025 pt_entry_t *pte;
3026 pv_entry_t pv;
3027
3028 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3029 ("pmap_clear_modify: page %p is not managed", m));
3030 VM_OBJECT_ASSERT_WLOCKED(m->object);
3031 KASSERT(!vm_page_xbusied(m),
3032 ("pmap_clear_modify: page %p is exclusive busied", m));
3033
3034 /*
3035 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set.
3036 * If the object containing the page is locked and the page is not
3037 * write busied, then PGA_WRITEABLE cannot be concurrently set.
3038 */
3039 if ((m->aflags & PGA_WRITEABLE) == 0)
3040 return;
3041 rw_wlock(&pvh_global_lock);
3042 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
3043 pmap = PV_PMAP(pv);
3044 PMAP_LOCK(pmap);
3045 pte = pmap_pte(pmap, pv->pv_va);
3046 if (pte_test(pte, PTE_D)) {
3047 pte_clear(pte, PTE_D);
3048 pmap_update_page(pmap, pv->pv_va, *pte);
3049 }
3050 PMAP_UNLOCK(pmap);
3051 }
3052 rw_wunlock(&pvh_global_lock);
3053 }
3054
3055 /*
3056 * pmap_is_referenced:
3057 *
3058 * Return whether or not the specified physical page was referenced
3059 * in any physical maps.
3060 */
3061 boolean_t
3062 pmap_is_referenced(vm_page_t m)
3063 {
3064
3065 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3066 ("pmap_is_referenced: page %p is not managed", m));
3067 return ((m->md.pv_flags & PV_TABLE_REF) != 0);
3068 }
3069
3070 /*
3071 * Miscellaneous support routines follow
3072 */
3073
3074 /*
3075 * Map a set of physical memory pages into the kernel virtual
3076 * address space. Return a pointer to where it is mapped. This
3077 * routine is intended to be used for mapping device memory,
3078 * NOT real memory.
3079 *
3080 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit.
3081 */
3082 void *
3083 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
3084 {
3085 vm_offset_t va, tmpva, offset;
3086
3087 /*
3088 * KSEG1 maps only first 512M of phys address space. For
3089 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
3090 */
3091 if (MIPS_DIRECT_MAPPABLE(pa + size - 1))
3092 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
3093 else {
3094 offset = pa & PAGE_MASK;
3095 size = roundup(size + offset, PAGE_SIZE);
3096
3097 va = kva_alloc(size);
3098 if (!va)
3099 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3100 pa = trunc_page(pa);
3101 for (tmpva = va; size > 0;) {
3102 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED);
3103 size -= PAGE_SIZE;
3104 tmpva += PAGE_SIZE;
3105 pa += PAGE_SIZE;
3106 }
3107 }
3108
3109 return ((void *)(va + offset));
3110 }
3111
3112 void
3113 pmap_unmapdev(vm_offset_t va, vm_size_t size)
3114 {
3115 #ifndef __mips_n64
3116 vm_offset_t base, offset;
3117
3118 /* If the address is within KSEG1 then there is nothing to do */
3119 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
3120 return;
3121
3122 base = trunc_page(va);
3123 offset = va & PAGE_MASK;
3124 size = roundup(size + offset, PAGE_SIZE);
3125 kva_free(base, size);
3126 #endif
3127 }
3128
3129 /*
3130 * perform the pmap work for mincore
3131 */
3132 int
3133 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3134 {
3135 pt_entry_t *ptep, pte;
3136 vm_paddr_t pa;
3137 vm_page_t m;
3138 int val;
3139
3140 PMAP_LOCK(pmap);
3141 retry:
3142 ptep = pmap_pte(pmap, addr);
3143 pte = (ptep != NULL) ? *ptep : 0;
3144 if (!pte_test(&pte, PTE_V)) {
3145 val = 0;
3146 goto out;
3147 }
3148 val = MINCORE_INCORE;
3149 if (pte_test(&pte, PTE_D))
3150 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3151 pa = TLBLO_PTE_TO_PA(pte);
3152 if (pte_test(&pte, PTE_MANAGED)) {
3153 /*
3154 * This may falsely report the given address as
3155 * MINCORE_REFERENCED. Unfortunately, due to the lack of
3156 * per-PTE reference information, it is impossible to
3157 * determine if the address is MINCORE_REFERENCED.
3158 */
3159 m = PHYS_TO_VM_PAGE(pa);
3160 if ((m->aflags & PGA_REFERENCED) != 0)
3161 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3162 }
3163 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3164 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
3165 pte_test(&pte, PTE_MANAGED)) {
3166 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3167 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3168 goto retry;
3169 } else
3170 out:
3171 PA_UNLOCK_COND(*locked_pa);
3172 PMAP_UNLOCK(pmap);
3173 return (val);
3174 }
3175
3176 void
3177 pmap_activate(struct thread *td)
3178 {
3179 pmap_t pmap, oldpmap;
3180 struct proc *p = td->td_proc;
3181 u_int cpuid;
3182
3183 critical_enter();
3184
3185 pmap = vmspace_pmap(p->p_vmspace);
3186 oldpmap = PCPU_GET(curpmap);
3187 cpuid = PCPU_GET(cpuid);
3188
3189 if (oldpmap)
3190 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
3191 CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
3192 pmap_asid_alloc(pmap);
3193 if (td == curthread) {
3194 PCPU_SET(segbase, pmap->pm_segtab);
3195 mips_wr_entryhi(pmap->pm_asid[cpuid].asid);
3196 }
3197
3198 PCPU_SET(curpmap, pmap);
3199 critical_exit();
3200 }
3201
3202 static void
3203 pmap_sync_icache_one(void *arg __unused)
3204 {
3205
3206 mips_icache_sync_all();
3207 mips_dcache_wbinv_all();
3208 }
3209
3210 void
3211 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3212 {
3213
3214 smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
3215 }
3216
3217 /*
3218 * Increase the starting virtual address of the given mapping if a
3219 * different alignment might result in more superpage mappings.
3220 */
3221 void
3222 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3223 vm_offset_t *addr, vm_size_t size)
3224 {
3225 vm_offset_t superpage_offset;
3226
3227 if (size < NBSEG)
3228 return;
3229 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3230 offset += ptoa(object->pg_color);
3231 superpage_offset = offset & SEGMASK;
3232 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG ||
3233 (*addr & SEGMASK) == superpage_offset)
3234 return;
3235 if ((*addr & SEGMASK) < superpage_offset)
3236 *addr = (*addr & ~SEGMASK) + superpage_offset;
3237 else
3238 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset;
3239 }
3240
3241 #ifdef DDB
3242 DB_SHOW_COMMAND(ptable, ddb_pid_dump)
3243 {
3244 pmap_t pmap;
3245 struct thread *td = NULL;
3246 struct proc *p;
3247 int i, j, k;
3248 vm_paddr_t pa;
3249 vm_offset_t va;
3250
3251 if (have_addr) {
3252 td = db_lookup_thread(addr, TRUE);
3253 if (td == NULL) {
3254 db_printf("Invalid pid or tid");
3255 return;
3256 }
3257 p = td->td_proc;
3258 if (p->p_vmspace == NULL) {
3259 db_printf("No vmspace for process");
3260 return;
3261 }
3262 pmap = vmspace_pmap(p->p_vmspace);
3263 } else
3264 pmap = kernel_pmap;
3265
3266 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n",
3267 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid,
3268 pmap->pm_asid[0].gen);
3269 for (i = 0; i < NPDEPG; i++) {
3270 pd_entry_t *pdpe;
3271 pt_entry_t *pde;
3272 pt_entry_t pte;
3273
3274 pdpe = (pd_entry_t *)pmap->pm_segtab[i];
3275 if (pdpe == NULL)
3276 continue;
3277 db_printf("[%4d] %p\n", i, pdpe);
3278 #ifdef __mips_n64
3279 for (j = 0; j < NPDEPG; j++) {
3280 pde = (pt_entry_t *)pdpe[j];
3281 if (pde == NULL)
3282 continue;
3283 db_printf("\t[%4d] %p\n", j, pde);
3284 #else
3285 {
3286 j = 0;
3287 pde = (pt_entry_t *)pdpe;
3288 #endif
3289 for (k = 0; k < NPTEPG; k++) {
3290 pte = pde[k];
3291 if (pte == 0 || !pte_test(&pte, PTE_V))
3292 continue;
3293 pa = TLBLO_PTE_TO_PA(pte);
3294 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT);
3295 db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n",
3296 k, (void *)va, (uintmax_t)pte, (uintmax_t)pa);
3297 }
3298 }
3299 }
3300 }
3301 #endif
3302
3303 #if defined(DEBUG)
3304
3305 static void pads(pmap_t pm);
3306 void pmap_pvdump(vm_offset_t pa);
3307
3308 /* print address space of pmap*/
3309 static void
3310 pads(pmap_t pm)
3311 {
3312 unsigned va, i, j;
3313 pt_entry_t *ptep;
3314
3315 if (pm == kernel_pmap)
3316 return;
3317 for (i = 0; i < NPTEPG; i++)
3318 if (pm->pm_segtab[i])
3319 for (j = 0; j < NPTEPG; j++) {
3320 va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
3321 if (pm == kernel_pmap && va < KERNBASE)
3322 continue;
3323 if (pm != kernel_pmap &&
3324 va >= VM_MAXUSER_ADDRESS)
3325 continue;
3326 ptep = pmap_pte(pm, va);
3327 if (pte_test(ptep, PTE_V))
3328 printf("%x:%x ", va, *(int *)ptep);
3329 }
3330
3331 }
3332
3333 void
3334 pmap_pvdump(vm_offset_t pa)
3335 {
3336 register pv_entry_t pv;
3337 vm_page_t m;
3338
3339 printf("pa %x", pa);
3340 m = PHYS_TO_VM_PAGE(pa);
3341 for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3342 pv = TAILQ_NEXT(pv, pv_list)) {
3343 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3344 pads(pv->pv_pmap);
3345 }
3346 printf(" ");
3347 }
3348
3349 /* N/C */
3350 #endif
3351
3352
3353 /*
3354 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3355 * It takes almost as much or more time to search the TLB for a
3356 * specific ASID and flush those entries as it does to flush the entire TLB.
3357 * Therefore, when we allocate a new ASID, we just take the next number. When
3358 * we run out of numbers, we flush the TLB, increment the generation count
3359 * and start over. ASID zero is reserved for kernel use.
3360 */
3361 static void
3362 pmap_asid_alloc(pmap)
3363 pmap_t pmap;
3364 {
3365 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3366 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3367 else {
3368 if (PCPU_GET(next_asid) == pmap_max_asid) {
3369 tlb_invalidate_all_user(NULL);
3370 PCPU_SET(asid_generation,
3371 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3372 if (PCPU_GET(asid_generation) == 0) {
3373 PCPU_SET(asid_generation, 1);
3374 }
3375 PCPU_SET(next_asid, 1); /* 0 means invalid */
3376 }
3377 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3378 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3379 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3380 }
3381 }
3382
3383 static pt_entry_t
3384 init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot)
3385 {
3386 pt_entry_t rw;
3387
3388 if (!(prot & VM_PROT_WRITE))
3389 rw = PTE_V | PTE_RO;
3390 else if ((m->oflags & VPO_UNMANAGED) == 0) {
3391 if ((access & VM_PROT_WRITE) != 0)
3392 rw = PTE_V | PTE_D;
3393 else
3394 rw = PTE_V;
3395 } else
3396 /* Needn't emulate a modified bit for unmanaged pages. */
3397 rw = PTE_V | PTE_D;
3398 return (rw);
3399 }
3400
3401 /*
3402 * pmap_emulate_modified : do dirty bit emulation
3403 *
3404 * On SMP, update just the local TLB, other CPUs will update their
3405 * TLBs from PTE lazily, if they get the exception.
3406 * Returns 0 in case of sucess, 1 if the page is read only and we
3407 * need to fault.
3408 */
3409 int
3410 pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
3411 {
3412 pt_entry_t *pte;
3413
3414 PMAP_LOCK(pmap);
3415 pte = pmap_pte(pmap, va);
3416 if (pte == NULL)
3417 panic("pmap_emulate_modified: can't find PTE");
3418 #ifdef SMP
3419 /* It is possible that some other CPU changed m-bit */
3420 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
3421 tlb_update(pmap, va, *pte);
3422 PMAP_UNLOCK(pmap);
3423 return (0);
3424 }
3425 #else
3426 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D))
3427 panic("pmap_emulate_modified: invalid pte");
3428 #endif
3429 if (pte_test(pte, PTE_RO)) {
3430 PMAP_UNLOCK(pmap);
3431 return (1);
3432 }
3433 pte_set(pte, PTE_D);
3434 tlb_update(pmap, va, *pte);
3435 if (!pte_test(pte, PTE_MANAGED))
3436 panic("pmap_emulate_modified: unmanaged page");
3437 PMAP_UNLOCK(pmap);
3438 return (0);
3439 }
3440
3441 /*
3442 * Routine: pmap_kextract
3443 * Function:
3444 * Extract the physical page address associated
3445 * virtual address.
3446 */
3447 vm_paddr_t
3448 pmap_kextract(vm_offset_t va)
3449 {
3450 int mapped;
3451
3452 /*
3453 * First, the direct-mapped regions.
3454 */
3455 #if defined(__mips_n64)
3456 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END)
3457 return (MIPS_XKPHYS_TO_PHYS(va));
3458 #endif
3459 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END)
3460 return (MIPS_KSEG0_TO_PHYS(va));
3461
3462 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END)
3463 return (MIPS_KSEG1_TO_PHYS(va));
3464
3465 /*
3466 * User virtual addresses.
3467 */
3468 if (va < VM_MAXUSER_ADDRESS) {
3469 pt_entry_t *ptep;
3470
3471 if (curproc && curproc->p_vmspace) {
3472 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3473 if (ptep) {
3474 return (TLBLO_PTE_TO_PA(*ptep) |
3475 (va & PAGE_MASK));
3476 }
3477 return (0);
3478 }
3479 }
3480
3481 /*
3482 * Should be kernel virtual here, otherwise fail
3483 */
3484 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END);
3485 #if defined(__mips_n64)
3486 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END);
3487 #endif
3488 /*
3489 * Kernel virtual.
3490 */
3491
3492 if (mapped) {
3493 pt_entry_t *ptep;
3494
3495 /* Is the kernel pmap initialized? */
3496 if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
3497 /* It's inside the virtual address range */
3498 ptep = pmap_pte(kernel_pmap, va);
3499 if (ptep) {
3500 return (TLBLO_PTE_TO_PA(*ptep) |
3501 (va & PAGE_MASK));
3502 }
3503 }
3504 return (0);
3505 }
3506
3507 panic("%s for unknown address space %p.", __func__, (void *)va);
3508 }
3509
3510
3511 void
3512 pmap_flush_pvcache(vm_page_t m)
3513 {
3514 pv_entry_t pv;
3515
3516 if (m != NULL) {
3517 for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3518 pv = TAILQ_NEXT(pv, pv_list)) {
3519 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3520 }
3521 }
3522 }
Cache object: 36bb51e7255518b91d7b22c876dc5b62
|