FreeBSD/Linux Kernel Cross Reference
sys/mips/mips/pmap.c
1 /*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42 /*
43 * Manages physical address maps.
44 *
45 * Since the information managed by this module is
46 * also stored by the logical address mapping module,
47 * this module may throw away valid virtual-to-physical
48 * mappings at almost any time. However, invalidations
49 * of virtual-to-physical mappings must be done as
50 * requested.
51 *
52 * In order to cope with hardware architectures which
53 * make virtual-to-physical map invalidates expensive,
54 * this module may delay invalidate or reduced protection
55 * operations until such time as they are actually
56 * necessary. This module is given full information as
57 * to which processors are currently using which maps,
58 * and to when physical maps must be made correct.
59 */
60
61 #include <sys/cdefs.h>
62 __FBSDID("$FreeBSD$");
63
64 #include "opt_ddb.h"
65 #include "opt_pmap.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/lock.h>
70 #include <sys/mman.h>
71 #include <sys/msgbuf.h>
72 #include <sys/mutex.h>
73 #include <sys/pcpu.h>
74 #include <sys/proc.h>
75 #include <sys/rwlock.h>
76 #include <sys/sched.h>
77 #include <sys/smp.h>
78 #include <sys/sysctl.h>
79 #include <sys/vmmeter.h>
80
81 #ifdef DDB
82 #include <ddb/ddb.h>
83 #endif
84
85 #include <vm/vm.h>
86 #include <vm/vm_param.h>
87 #include <vm/vm_kern.h>
88 #include <vm/vm_page.h>
89 #include <vm/vm_map.h>
90 #include <vm/vm_object.h>
91 #include <vm/vm_extern.h>
92 #include <vm/vm_pageout.h>
93 #include <vm/vm_pager.h>
94 #include <vm/uma.h>
95
96 #include <machine/cache.h>
97 #include <machine/md_var.h>
98 #include <machine/tlb.h>
99
100 #undef PMAP_DEBUG
101
102 #if !defined(DIAGNOSTIC)
103 #define PMAP_INLINE __inline
104 #else
105 #define PMAP_INLINE
106 #endif
107
108 #ifdef PV_STATS
109 #define PV_STAT(x) do { x ; } while (0)
110 #else
111 #define PV_STAT(x) do { } while (0)
112 #endif
113
114 /*
115 * Get PDEs and PTEs for user/kernel address space
116 */
117 #define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1))
118 #define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1))
119 #define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1))
120 #define pmap_pde_pindex(v) ((v) >> PDRSHIFT)
121
122 #ifdef __mips_n64
123 #define NUPDE (NPDEPG * NPDEPG)
124 #define NUSERPGTBLS (NUPDE + NPDEPG)
125 #else
126 #define NUPDE (NPDEPG)
127 #define NUSERPGTBLS (NUPDE)
128 #endif
129
130 #define is_kernel_pmap(x) ((x) == kernel_pmap)
131
132 struct pmap kernel_pmap_store;
133 pd_entry_t *kernel_segmap;
134
135 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
136 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
137
138 static int nkpt;
139 unsigned pmap_max_asid; /* max ASID supported by the system */
140
141 #define PMAP_ASID_RESERVED 0
142
143 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
144
145 static void pmap_asid_alloc(pmap_t pmap);
146
147 static struct rwlock_padalign pvh_global_lock;
148
149 /*
150 * Data for the pv entry allocation mechanism
151 */
152 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
153 static int pv_entry_count;
154
155 static void free_pv_chunk(struct pv_chunk *pc);
156 static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
157 static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
158 static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
159 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
160 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
161 vm_offset_t va);
162 static vm_page_t pmap_alloc_direct_page(unsigned int index, int req);
163 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
164 vm_page_t m, vm_prot_t prot, vm_page_t mpte);
165 static void pmap_grow_direct_page(int req);
166 static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
167 pd_entry_t pde);
168 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
169 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
170 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
171 vm_offset_t va, vm_page_t m);
172 static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte);
173 static void pmap_invalidate_all(pmap_t pmap);
174 static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
175 static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m);
176
177 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
178 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags);
179 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t);
180 static pt_entry_t init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot);
181
182 static void pmap_invalidate_page_action(void *arg);
183 static void pmap_invalidate_range_action(void *arg);
184 static void pmap_update_page_action(void *arg);
185
186 #ifndef __mips_n64
187 /*
188 * This structure is for high memory (memory above 512Meg in 32 bit) support.
189 * The highmem area does not have a KSEG0 mapping, and we need a mechanism to
190 * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc.
191 *
192 * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To
193 * access a highmem physical address on a CPU, we map the physical address to
194 * the reserved virtual address for the CPU in the kernel pagetable. This is
195 * done with interrupts disabled(although a spinlock and sched_pin would be
196 * sufficient).
197 */
198 struct local_sysmaps {
199 vm_offset_t base;
200 uint32_t saved_intr;
201 uint16_t valid1, valid2;
202 };
203 static struct local_sysmaps sysmap_lmem[MAXCPU];
204
205 static __inline void
206 pmap_alloc_lmem_map(void)
207 {
208 int i;
209
210 for (i = 0; i < MAXCPU; i++) {
211 sysmap_lmem[i].base = virtual_avail;
212 virtual_avail += PAGE_SIZE * 2;
213 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
214 }
215 }
216
217 static __inline vm_offset_t
218 pmap_lmem_map1(vm_paddr_t phys)
219 {
220 struct local_sysmaps *sysm;
221 pt_entry_t *pte, npte;
222 vm_offset_t va;
223 uint32_t intr;
224 int cpu;
225
226 intr = intr_disable();
227 cpu = PCPU_GET(cpuid);
228 sysm = &sysmap_lmem[cpu];
229 sysm->saved_intr = intr;
230 va = sysm->base;
231 npte = TLBLO_PA_TO_PFN(phys) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
232 pte = pmap_pte(kernel_pmap, va);
233 *pte = npte;
234 sysm->valid1 = 1;
235 return (va);
236 }
237
238 static __inline vm_offset_t
239 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
240 {
241 struct local_sysmaps *sysm;
242 pt_entry_t *pte, npte;
243 vm_offset_t va1, va2;
244 uint32_t intr;
245 int cpu;
246
247 intr = intr_disable();
248 cpu = PCPU_GET(cpuid);
249 sysm = &sysmap_lmem[cpu];
250 sysm->saved_intr = intr;
251 va1 = sysm->base;
252 va2 = sysm->base + PAGE_SIZE;
253 npte = TLBLO_PA_TO_PFN(phys1) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
254 pte = pmap_pte(kernel_pmap, va1);
255 *pte = npte;
256 npte = TLBLO_PA_TO_PFN(phys2) | PTE_C_CACHE | PTE_D | PTE_V | PTE_G;
257 pte = pmap_pte(kernel_pmap, va2);
258 *pte = npte;
259 sysm->valid1 = 1;
260 sysm->valid2 = 1;
261 return (va1);
262 }
263
264 static __inline void
265 pmap_lmem_unmap(void)
266 {
267 struct local_sysmaps *sysm;
268 pt_entry_t *pte;
269 int cpu;
270
271 cpu = PCPU_GET(cpuid);
272 sysm = &sysmap_lmem[cpu];
273 pte = pmap_pte(kernel_pmap, sysm->base);
274 *pte = PTE_G;
275 tlb_invalidate_address(kernel_pmap, sysm->base);
276 sysm->valid1 = 0;
277 if (sysm->valid2) {
278 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);
279 *pte = PTE_G;
280 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);
281 sysm->valid2 = 0;
282 }
283 intr_restore(sysm->saved_intr);
284 }
285 #else /* __mips_n64 */
286
287 static __inline void
288 pmap_alloc_lmem_map(void)
289 {
290 }
291
292 static __inline vm_offset_t
293 pmap_lmem_map1(vm_paddr_t phys)
294 {
295
296 return (0);
297 }
298
299 static __inline vm_offset_t
300 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
301 {
302
303 return (0);
304 }
305
306 static __inline vm_offset_t
307 pmap_lmem_unmap(void)
308 {
309
310 return (0);
311 }
312 #endif /* !__mips_n64 */
313
314 static __inline int
315 is_cacheable_page(vm_paddr_t pa, vm_page_t m)
316 {
317
318 return ((m->md.pv_flags & PV_MEMATTR_UNCACHEABLE) == 0 &&
319 is_cacheable_mem(pa));
320
321 }
322
323 /*
324 * Page table entry lookup routines.
325 */
326 static __inline pd_entry_t *
327 pmap_segmap(pmap_t pmap, vm_offset_t va)
328 {
329
330 return (&pmap->pm_segtab[pmap_seg_index(va)]);
331 }
332
333 #ifdef __mips_n64
334 static __inline pd_entry_t *
335 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
336 {
337 pd_entry_t *pde;
338
339 pde = (pd_entry_t *)*pdpe;
340 return (&pde[pmap_pde_index(va)]);
341 }
342
343 static __inline pd_entry_t *
344 pmap_pde(pmap_t pmap, vm_offset_t va)
345 {
346 pd_entry_t *pdpe;
347
348 pdpe = pmap_segmap(pmap, va);
349 if (*pdpe == NULL)
350 return (NULL);
351
352 return (pmap_pdpe_to_pde(pdpe, va));
353 }
354 #else
355 static __inline pd_entry_t *
356 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
357 {
358
359 return (pdpe);
360 }
361
362 static __inline
363 pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va)
364 {
365
366 return (pmap_segmap(pmap, va));
367 }
368 #endif
369
370 static __inline pt_entry_t *
371 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
372 {
373 pt_entry_t *pte;
374
375 pte = (pt_entry_t *)*pde;
376 return (&pte[pmap_pte_index(va)]);
377 }
378
379 pt_entry_t *
380 pmap_pte(pmap_t pmap, vm_offset_t va)
381 {
382 pd_entry_t *pde;
383
384 pde = pmap_pde(pmap, va);
385 if (pde == NULL || *pde == NULL)
386 return (NULL);
387
388 return (pmap_pde_to_pte(pde, va));
389 }
390
391 vm_offset_t
392 pmap_steal_memory(vm_size_t size)
393 {
394 vm_paddr_t bank_size, pa;
395 vm_offset_t va;
396
397 size = round_page(size);
398 bank_size = phys_avail[1] - phys_avail[0];
399 while (size > bank_size) {
400 int i;
401
402 for (i = 0; phys_avail[i + 2]; i += 2) {
403 phys_avail[i] = phys_avail[i + 2];
404 phys_avail[i + 1] = phys_avail[i + 3];
405 }
406 phys_avail[i] = 0;
407 phys_avail[i + 1] = 0;
408 if (!phys_avail[0])
409 panic("pmap_steal_memory: out of memory");
410 bank_size = phys_avail[1] - phys_avail[0];
411 }
412
413 pa = phys_avail[0];
414 phys_avail[0] += size;
415 if (MIPS_DIRECT_MAPPABLE(pa) == 0)
416 panic("Out of memory below 512Meg?");
417 va = MIPS_PHYS_TO_DIRECT(pa);
418 bzero((caddr_t)va, size);
419 return (va);
420 }
421
422 /*
423 * Bootstrap the system enough to run with virtual memory. This
424 * assumes that the phys_avail array has been initialized.
425 */
426 static void
427 pmap_create_kernel_pagetable(void)
428 {
429 int i, j;
430 vm_offset_t ptaddr;
431 pt_entry_t *pte;
432 #ifdef __mips_n64
433 pd_entry_t *pde;
434 vm_offset_t pdaddr;
435 int npt, npde;
436 #endif
437
438 /*
439 * Allocate segment table for the kernel
440 */
441 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
442
443 /*
444 * Allocate second level page tables for the kernel
445 */
446 #ifdef __mips_n64
447 npde = howmany(NKPT, NPDEPG);
448 pdaddr = pmap_steal_memory(PAGE_SIZE * npde);
449 #endif
450 nkpt = NKPT;
451 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt);
452
453 /*
454 * The R[4-7]?00 stores only one copy of the Global bit in the
455 * translation lookaside buffer for each 2 page entry. Thus invalid
456 * entrys must have the Global bit set so when Entry LO and Entry HI
457 * G bits are anded together they will produce a global bit to store
458 * in the tlb.
459 */
460 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++)
461 *pte = PTE_G;
462
463 #ifdef __mips_n64
464 for (i = 0, npt = nkpt; npt > 0; i++) {
465 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE);
466 pde = (pd_entry_t *)kernel_segmap[i];
467
468 for (j = 0; j < NPDEPG && npt > 0; j++, npt--)
469 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE);
470 }
471 #else
472 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++)
473 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE));
474 #endif
475
476 PMAP_LOCK_INIT(kernel_pmap);
477 kernel_pmap->pm_segtab = kernel_segmap;
478 CPU_FILL(&kernel_pmap->pm_active);
479 TAILQ_INIT(&kernel_pmap->pm_pvchunk);
480 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
481 kernel_pmap->pm_asid[0].gen = 0;
482 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE;
483 }
484
485 void
486 pmap_bootstrap(void)
487 {
488 int i;
489 int need_local_mappings = 0;
490
491 /* Sort. */
492 again:
493 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
494 /*
495 * Keep the memory aligned on page boundary.
496 */
497 phys_avail[i] = round_page(phys_avail[i]);
498 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
499
500 if (i < 2)
501 continue;
502 if (phys_avail[i - 2] > phys_avail[i]) {
503 vm_paddr_t ptemp[2];
504
505 ptemp[0] = phys_avail[i + 0];
506 ptemp[1] = phys_avail[i + 1];
507
508 phys_avail[i + 0] = phys_avail[i - 2];
509 phys_avail[i + 1] = phys_avail[i - 1];
510
511 phys_avail[i - 2] = ptemp[0];
512 phys_avail[i - 1] = ptemp[1];
513 goto again;
514 }
515 }
516
517 /*
518 * In 32 bit, we may have memory which cannot be mapped directly.
519 * This memory will need temporary mapping before it can be
520 * accessed.
521 */
522 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1))
523 need_local_mappings = 1;
524
525 /*
526 * Copy the phys_avail[] array before we start stealing memory from it.
527 */
528 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
529 physmem_desc[i] = phys_avail[i];
530 physmem_desc[i + 1] = phys_avail[i + 1];
531 }
532
533 Maxmem = atop(phys_avail[i - 1]);
534
535 if (bootverbose) {
536 printf("Physical memory chunk(s):\n");
537 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
538 vm_paddr_t size;
539
540 size = phys_avail[i + 1] - phys_avail[i];
541 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
542 (uintmax_t) phys_avail[i],
543 (uintmax_t) phys_avail[i + 1] - 1,
544 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
545 }
546 printf("Maxmem is 0x%0jx\n", ptoa((uintmax_t)Maxmem));
547 }
548 /*
549 * Steal the message buffer from the beginning of memory.
550 */
551 msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
552 msgbufinit(msgbufp, msgbufsize);
553
554 /*
555 * Steal thread0 kstack.
556 */
557 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
558
559 virtual_avail = VM_MIN_KERNEL_ADDRESS;
560 virtual_end = VM_MAX_KERNEL_ADDRESS;
561
562 #ifdef SMP
563 /*
564 * Steal some virtual address space to map the pcpu area.
565 */
566 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
567 pcpup = (struct pcpu *)virtual_avail;
568 virtual_avail += PAGE_SIZE * 2;
569
570 /*
571 * Initialize the wired TLB entry mapping the pcpu region for
572 * the BSP at 'pcpup'. Up until this point we were operating
573 * with the 'pcpup' for the BSP pointing to a virtual address
574 * in KSEG0 so there was no need for a TLB mapping.
575 */
576 mips_pcpu_tlb_init(PCPU_ADDR(0));
577
578 if (bootverbose)
579 printf("pcpu is available at virtual address %p.\n", pcpup);
580 #endif
581
582 if (need_local_mappings)
583 pmap_alloc_lmem_map();
584 pmap_create_kernel_pagetable();
585 pmap_max_asid = VMNUM_PIDS;
586 mips_wr_entryhi(0);
587 mips_wr_pagemask(0);
588
589 /*
590 * Initialize the global pv list lock.
591 */
592 rw_init(&pvh_global_lock, "pmap pv global");
593 }
594
595 /*
596 * Initialize a vm_page's machine-dependent fields.
597 */
598 void
599 pmap_page_init(vm_page_t m)
600 {
601
602 TAILQ_INIT(&m->md.pv_list);
603 m->md.pv_flags = 0;
604 }
605
606 /*
607 * Initialize the pmap module.
608 * Called by vm_init, to initialize any structures that the pmap
609 * system needs to map virtual memory.
610 */
611 void
612 pmap_init(void)
613 {
614 }
615
616 /***************************************************
617 * Low level helper routines.....
618 ***************************************************/
619
620 #ifdef SMP
621 static __inline void
622 pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
623 {
624 int cpuid, cpu, self;
625 cpuset_t active_cpus;
626
627 sched_pin();
628 if (is_kernel_pmap(pmap)) {
629 smp_rendezvous(NULL, fn, NULL, arg);
630 goto out;
631 }
632 /* Force ASID update on inactive CPUs */
633 CPU_FOREACH(cpu) {
634 if (!CPU_ISSET(cpu, &pmap->pm_active))
635 pmap->pm_asid[cpu].gen = 0;
636 }
637 cpuid = PCPU_GET(cpuid);
638 /*
639 * XXX: barrier/locking for active?
640 *
641 * Take a snapshot of active here, any further changes are ignored.
642 * tlb update/invalidate should be harmless on inactive CPUs
643 */
644 active_cpus = pmap->pm_active;
645 self = CPU_ISSET(cpuid, &active_cpus);
646 CPU_CLR(cpuid, &active_cpus);
647 /* Optimize for the case where this cpu is the only active one */
648 if (CPU_EMPTY(&active_cpus)) {
649 if (self)
650 fn(arg);
651 } else {
652 if (self)
653 CPU_SET(cpuid, &active_cpus);
654 smp_rendezvous_cpus(active_cpus, NULL, fn, NULL, arg);
655 }
656 out:
657 sched_unpin();
658 }
659 #else /* !SMP */
660 static __inline void
661 pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg)
662 {
663 int cpuid;
664
665 if (is_kernel_pmap(pmap)) {
666 fn(arg);
667 return;
668 }
669 cpuid = PCPU_GET(cpuid);
670 if (!CPU_ISSET(cpuid, &pmap->pm_active))
671 pmap->pm_asid[cpuid].gen = 0;
672 else
673 fn(arg);
674 }
675 #endif /* SMP */
676
677 static void
678 pmap_invalidate_all(pmap_t pmap)
679 {
680
681 pmap_call_on_active_cpus(pmap,
682 (void (*)(void *))tlb_invalidate_all_user, pmap);
683 }
684
685 struct pmap_invalidate_page_arg {
686 pmap_t pmap;
687 vm_offset_t va;
688 };
689
690 static void
691 pmap_invalidate_page_action(void *arg)
692 {
693 struct pmap_invalidate_page_arg *p = arg;
694
695 tlb_invalidate_address(p->pmap, p->va);
696 }
697
698 static void
699 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
700 {
701 struct pmap_invalidate_page_arg arg;
702
703 arg.pmap = pmap;
704 arg.va = va;
705 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &arg);
706 }
707
708 struct pmap_invalidate_range_arg {
709 pmap_t pmap;
710 vm_offset_t sva;
711 vm_offset_t eva;
712 };
713
714 static void
715 pmap_invalidate_range_action(void *arg)
716 {
717 struct pmap_invalidate_range_arg *p = arg;
718
719 tlb_invalidate_range(p->pmap, p->sva, p->eva);
720 }
721
722 static void
723 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
724 {
725 struct pmap_invalidate_range_arg arg;
726
727 arg.pmap = pmap;
728 arg.sva = sva;
729 arg.eva = eva;
730 pmap_call_on_active_cpus(pmap, pmap_invalidate_range_action, &arg);
731 }
732
733 struct pmap_update_page_arg {
734 pmap_t pmap;
735 vm_offset_t va;
736 pt_entry_t pte;
737 };
738
739 static void
740 pmap_update_page_action(void *arg)
741 {
742 struct pmap_update_page_arg *p = arg;
743
744 tlb_update(p->pmap, p->va, p->pte);
745 }
746
747 static void
748 pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
749 {
750 struct pmap_update_page_arg arg;
751
752 arg.pmap = pmap;
753 arg.va = va;
754 arg.pte = pte;
755 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg);
756 }
757
758 /*
759 * Routine: pmap_extract
760 * Function:
761 * Extract the physical page address associated
762 * with the given map/virtual_address pair.
763 */
764 vm_paddr_t
765 pmap_extract(pmap_t pmap, vm_offset_t va)
766 {
767 pt_entry_t *pte;
768 vm_offset_t retval = 0;
769
770 PMAP_LOCK(pmap);
771 pte = pmap_pte(pmap, va);
772 if (pte) {
773 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
774 }
775 PMAP_UNLOCK(pmap);
776 return (retval);
777 }
778
779 /*
780 * Routine: pmap_extract_and_hold
781 * Function:
782 * Atomically extract and hold the physical page
783 * with the given pmap and virtual address pair
784 * if that mapping permits the given protection.
785 */
786 vm_page_t
787 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
788 {
789 pt_entry_t pte, *ptep;
790 vm_paddr_t pa, pte_pa;
791 vm_page_t m;
792
793 m = NULL;
794 pa = 0;
795 PMAP_LOCK(pmap);
796 retry:
797 ptep = pmap_pte(pmap, va);
798 if (ptep != NULL) {
799 pte = *ptep;
800 if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) ||
801 (prot & VM_PROT_WRITE) == 0)) {
802 pte_pa = TLBLO_PTE_TO_PA(pte);
803 if (vm_page_pa_tryrelock(pmap, pte_pa, &pa))
804 goto retry;
805 m = PHYS_TO_VM_PAGE(pte_pa);
806 vm_page_hold(m);
807 }
808 }
809 PA_UNLOCK_COND(pa);
810 PMAP_UNLOCK(pmap);
811 return (m);
812 }
813
814 /***************************************************
815 * Low level mapping routines.....
816 ***************************************************/
817
818 /*
819 * add a wired page to the kva
820 */
821 void
822 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr)
823 {
824 pt_entry_t *pte;
825 pt_entry_t opte, npte;
826
827 #ifdef PMAP_DEBUG
828 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa);
829 #endif
830
831 pte = pmap_pte(kernel_pmap, va);
832 opte = *pte;
833 npte = TLBLO_PA_TO_PFN(pa) | attr | PTE_D | PTE_V | PTE_G;
834 *pte = npte;
835 if (pte_test(&opte, PTE_V) && opte != npte)
836 pmap_update_page(kernel_pmap, va, npte);
837 }
838
839 void
840 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
841 {
842
843 KASSERT(is_cacheable_mem(pa),
844 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa));
845
846 pmap_kenter_attr(va, pa, PTE_C_CACHE);
847 }
848
849 /*
850 * remove a page from the kernel pagetables
851 */
852 /* PMAP_INLINE */ void
853 pmap_kremove(vm_offset_t va)
854 {
855 pt_entry_t *pte;
856
857 /*
858 * Write back all caches from the page being destroyed
859 */
860 mips_dcache_wbinv_range_index(va, PAGE_SIZE);
861
862 pte = pmap_pte(kernel_pmap, va);
863 *pte = PTE_G;
864 pmap_invalidate_page(kernel_pmap, va);
865 }
866
867 /*
868 * Used to map a range of physical addresses into kernel
869 * virtual address space.
870 *
871 * The value passed in '*virt' is a suggested virtual address for
872 * the mapping. Architectures which can support a direct-mapped
873 * physical to virtual region can return the appropriate address
874 * within that region, leaving '*virt' unchanged. Other
875 * architectures should map the pages starting at '*virt' and
876 * update '*virt' with the first usable address after the mapped
877 * region.
878 *
879 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
880 */
881 vm_offset_t
882 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
883 {
884 vm_offset_t va, sva;
885
886 if (MIPS_DIRECT_MAPPABLE(end - 1))
887 return (MIPS_PHYS_TO_DIRECT(start));
888
889 va = sva = *virt;
890 while (start < end) {
891 pmap_kenter(va, start);
892 va += PAGE_SIZE;
893 start += PAGE_SIZE;
894 }
895 *virt = va;
896 return (sva);
897 }
898
899 /*
900 * Add a list of wired pages to the kva
901 * this routine is only used for temporary
902 * kernel mappings that do not need to have
903 * page modification or references recorded.
904 * Note that old mappings are simply written
905 * over. The page *must* be wired.
906 */
907 void
908 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
909 {
910 int i;
911 vm_offset_t origva = va;
912
913 for (i = 0; i < count; i++) {
914 pmap_flush_pvcache(m[i]);
915 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
916 va += PAGE_SIZE;
917 }
918
919 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
920 }
921
922 /*
923 * this routine jerks page mappings from the
924 * kernel -- it is meant only for temporary mappings.
925 */
926 void
927 pmap_qremove(vm_offset_t va, int count)
928 {
929 pt_entry_t *pte;
930 vm_offset_t origva;
931
932 if (count < 1)
933 return;
934 mips_dcache_wbinv_range_index(va, PAGE_SIZE * count);
935 origva = va;
936 do {
937 pte = pmap_pte(kernel_pmap, va);
938 *pte = PTE_G;
939 va += PAGE_SIZE;
940 } while (--count > 0);
941 pmap_invalidate_range(kernel_pmap, origva, va);
942 }
943
944 /***************************************************
945 * Page table page management routines.....
946 ***************************************************/
947
948 /*
949 * Decrements a page table page's wire count, which is used to record the
950 * number of valid page table entries within the page. If the wire count
951 * drops to zero, then the page table page is unmapped. Returns TRUE if the
952 * page table page was unmapped and FALSE otherwise.
953 */
954 static PMAP_INLINE boolean_t
955 pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
956 {
957
958 --m->wire_count;
959 if (m->wire_count == 0) {
960 _pmap_unwire_ptp(pmap, va, m);
961 return (TRUE);
962 } else
963 return (FALSE);
964 }
965
966 static void
967 _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m)
968 {
969 pd_entry_t *pde;
970
971 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
972 /*
973 * unmap the page table page
974 */
975 #ifdef __mips_n64
976 if (m->pindex < NUPDE)
977 pde = pmap_pde(pmap, va);
978 else
979 pde = pmap_segmap(pmap, va);
980 #else
981 pde = pmap_pde(pmap, va);
982 #endif
983 *pde = 0;
984 pmap->pm_stats.resident_count--;
985
986 #ifdef __mips_n64
987 if (m->pindex < NUPDE) {
988 pd_entry_t *pdp;
989 vm_page_t pdpg;
990
991 /*
992 * Recursively decrement next level pagetable refcount
993 */
994 pdp = (pd_entry_t *)*pmap_segmap(pmap, va);
995 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp));
996 pmap_unwire_ptp(pmap, va, pdpg);
997 }
998 #endif
999
1000 /*
1001 * If the page is finally unwired, simply free it.
1002 */
1003 vm_page_free_zero(m);
1004 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1005 }
1006
1007 /*
1008 * After removing a page table entry, this routine is used to
1009 * conditionally free the page, and manage the hold/wire counts.
1010 */
1011 static int
1012 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t pde)
1013 {
1014 vm_page_t mpte;
1015
1016 if (va >= VM_MAXUSER_ADDRESS)
1017 return (0);
1018 KASSERT(pde != 0, ("pmap_unuse_pt: pde != 0"));
1019 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pde));
1020 return (pmap_unwire_ptp(pmap, va, mpte));
1021 }
1022
1023 void
1024 pmap_pinit0(pmap_t pmap)
1025 {
1026 int i;
1027
1028 PMAP_LOCK_INIT(pmap);
1029 pmap->pm_segtab = kernel_segmap;
1030 CPU_ZERO(&pmap->pm_active);
1031 for (i = 0; i < MAXCPU; i++) {
1032 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1033 pmap->pm_asid[i].gen = 0;
1034 }
1035 PCPU_SET(curpmap, pmap);
1036 TAILQ_INIT(&pmap->pm_pvchunk);
1037 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1038 }
1039
1040 static void
1041 pmap_grow_direct_page(int req)
1042 {
1043
1044 #ifdef __mips_n64
1045 VM_WAIT;
1046 #else
1047 if (!vm_page_reclaim_contig(req, 1, 0, MIPS_KSEG0_LARGEST_PHYS,
1048 PAGE_SIZE, 0))
1049 VM_WAIT;
1050 #endif
1051 }
1052
1053 static vm_page_t
1054 pmap_alloc_direct_page(unsigned int index, int req)
1055 {
1056 vm_page_t m;
1057
1058 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req | VM_ALLOC_WIRED |
1059 VM_ALLOC_ZERO);
1060 if (m == NULL)
1061 return (NULL);
1062
1063 if ((m->flags & PG_ZERO) == 0)
1064 pmap_zero_page(m);
1065
1066 m->pindex = index;
1067 return (m);
1068 }
1069
1070 /*
1071 * Initialize a preallocated and zeroed pmap structure,
1072 * such as one in a vmspace structure.
1073 */
1074 int
1075 pmap_pinit(pmap_t pmap)
1076 {
1077 vm_offset_t ptdva;
1078 vm_page_t ptdpg;
1079 int i, req_class;
1080
1081 /*
1082 * allocate the page directory page
1083 */
1084 req_class = VM_ALLOC_NORMAL;
1085 while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, req_class)) ==
1086 NULL)
1087 pmap_grow_direct_page(req_class);
1088
1089 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
1090 pmap->pm_segtab = (pd_entry_t *)ptdva;
1091 CPU_ZERO(&pmap->pm_active);
1092 for (i = 0; i < MAXCPU; i++) {
1093 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1094 pmap->pm_asid[i].gen = 0;
1095 }
1096 TAILQ_INIT(&pmap->pm_pvchunk);
1097 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1098
1099 return (1);
1100 }
1101
1102 /*
1103 * this routine is called if the page table page is not
1104 * mapped correctly.
1105 */
1106 static vm_page_t
1107 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, u_int flags)
1108 {
1109 vm_offset_t pageva;
1110 vm_page_t m;
1111 int req_class;
1112
1113 /*
1114 * Find or fabricate a new pagetable page
1115 */
1116 req_class = VM_ALLOC_NORMAL;
1117 if ((m = pmap_alloc_direct_page(ptepindex, req_class)) == NULL) {
1118 if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
1119 PMAP_UNLOCK(pmap);
1120 rw_wunlock(&pvh_global_lock);
1121 pmap_grow_direct_page(req_class);
1122 rw_wlock(&pvh_global_lock);
1123 PMAP_LOCK(pmap);
1124 }
1125
1126 /*
1127 * Indicate the need to retry. While waiting, the page
1128 * table page may have been allocated.
1129 */
1130 return (NULL);
1131 }
1132
1133 /*
1134 * Map the pagetable page into the process address space, if it
1135 * isn't already there.
1136 */
1137 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1138
1139 #ifdef __mips_n64
1140 if (ptepindex >= NUPDE) {
1141 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva;
1142 } else {
1143 pd_entry_t *pdep, *pde;
1144 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT);
1145 int pdeindex = ptepindex & (NPDEPG - 1);
1146 vm_page_t pg;
1147
1148 pdep = &pmap->pm_segtab[segindex];
1149 if (*pdep == NULL) {
1150 /* recurse for allocating page dir */
1151 if (_pmap_allocpte(pmap, NUPDE + segindex,
1152 flags) == NULL) {
1153 /* alloc failed, release current */
1154 --m->wire_count;
1155 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1156 vm_page_free_zero(m);
1157 return (NULL);
1158 }
1159 } else {
1160 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep));
1161 pg->wire_count++;
1162 }
1163 /* Next level entry */
1164 pde = (pd_entry_t *)*pdep;
1165 pde[pdeindex] = (pd_entry_t)pageva;
1166 }
1167 #else
1168 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva;
1169 #endif
1170 pmap->pm_stats.resident_count++;
1171 return (m);
1172 }
1173
1174 static vm_page_t
1175 pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
1176 {
1177 unsigned ptepindex;
1178 pd_entry_t *pde;
1179 vm_page_t m;
1180
1181 /*
1182 * Calculate pagetable page index
1183 */
1184 ptepindex = pmap_pde_pindex(va);
1185 retry:
1186 /*
1187 * Get the page directory entry
1188 */
1189 pde = pmap_pde(pmap, va);
1190
1191 /*
1192 * If the page table page is mapped, we just increment the hold
1193 * count, and activate it.
1194 */
1195 if (pde != NULL && *pde != NULL) {
1196 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde));
1197 m->wire_count++;
1198 } else {
1199 /*
1200 * Here if the pte page isn't mapped, or if it has been
1201 * deallocated.
1202 */
1203 m = _pmap_allocpte(pmap, ptepindex, flags);
1204 if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
1205 goto retry;
1206 }
1207 return (m);
1208 }
1209
1210
1211 /***************************************************
1212 * Pmap allocation/deallocation routines.
1213 ***************************************************/
1214
1215 /*
1216 * Release any resources held by the given physical map.
1217 * Called when a pmap initialized by pmap_pinit is being released.
1218 * Should only be called if the map contains no valid mappings.
1219 */
1220 void
1221 pmap_release(pmap_t pmap)
1222 {
1223 vm_offset_t ptdva;
1224 vm_page_t ptdpg;
1225
1226 KASSERT(pmap->pm_stats.resident_count == 0,
1227 ("pmap_release: pmap resident count %ld != 0",
1228 pmap->pm_stats.resident_count));
1229
1230 ptdva = (vm_offset_t)pmap->pm_segtab;
1231 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva));
1232
1233 ptdpg->wire_count--;
1234 atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1235 vm_page_free_zero(ptdpg);
1236 }
1237
1238 /*
1239 * grow the number of kernel page table entries, if needed
1240 */
1241 void
1242 pmap_growkernel(vm_offset_t addr)
1243 {
1244 vm_page_t nkpg;
1245 pd_entry_t *pde, *pdpe;
1246 pt_entry_t *pte;
1247 int i, req_class;
1248
1249 mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1250 req_class = VM_ALLOC_INTERRUPT;
1251 addr = roundup2(addr, NBSEG);
1252 if (addr - 1 >= vm_map_max(kernel_map))
1253 addr = vm_map_max(kernel_map);
1254 while (kernel_vm_end < addr) {
1255 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end);
1256 #ifdef __mips_n64
1257 if (*pdpe == 0) {
1258 /* new intermediate page table entry */
1259 nkpg = pmap_alloc_direct_page(nkpt, req_class);
1260 if (nkpg == NULL)
1261 panic("pmap_growkernel: no memory to grow kernel");
1262 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1263 continue; /* try again */
1264 }
1265 #endif
1266 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
1267 if (*pde != 0) {
1268 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1269 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1270 kernel_vm_end = vm_map_max(kernel_map);
1271 break;
1272 }
1273 continue;
1274 }
1275
1276 /*
1277 * This index is bogus, but out of the way
1278 */
1279 nkpg = pmap_alloc_direct_page(nkpt, req_class);
1280 #ifndef __mips_n64
1281 if (nkpg == NULL && vm_page_reclaim_contig(req_class, 1,
1282 0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0))
1283 nkpg = pmap_alloc_direct_page(nkpt, req_class);
1284 #endif
1285 if (nkpg == NULL)
1286 panic("pmap_growkernel: no memory to grow kernel");
1287 nkpt++;
1288 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1289
1290 /*
1291 * The R[4-7]?00 stores only one copy of the Global bit in
1292 * the translation lookaside buffer for each 2 page entry.
1293 * Thus invalid entrys must have the Global bit set so when
1294 * Entry LO and Entry HI G bits are anded together they will
1295 * produce a global bit to store in the tlb.
1296 */
1297 pte = (pt_entry_t *)*pde;
1298 for (i = 0; i < NPTEPG; i++)
1299 pte[i] = PTE_G;
1300
1301 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1302 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
1303 kernel_vm_end = vm_map_max(kernel_map);
1304 break;
1305 }
1306 }
1307 }
1308
1309 /***************************************************
1310 * page management routines.
1311 ***************************************************/
1312
1313 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1314 #ifdef __mips_n64
1315 CTASSERT(_NPCM == 3);
1316 CTASSERT(_NPCPV == 168);
1317 #else
1318 CTASSERT(_NPCM == 11);
1319 CTASSERT(_NPCPV == 336);
1320 #endif
1321
1322 static __inline struct pv_chunk *
1323 pv_to_chunk(pv_entry_t pv)
1324 {
1325
1326 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1327 }
1328
1329 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1330
1331 #ifdef __mips_n64
1332 #define PC_FREE0_1 0xfffffffffffffffful
1333 #define PC_FREE2 0x000000fffffffffful
1334 #else
1335 #define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */
1336 #define PC_FREE10 0x0000fffful /* Free values for index 10 */
1337 #endif
1338
1339 static const u_long pc_freemask[_NPCM] = {
1340 #ifdef __mips_n64
1341 PC_FREE0_1, PC_FREE0_1, PC_FREE2
1342 #else
1343 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1344 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1345 PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
1346 PC_FREE0_9, PC_FREE10
1347 #endif
1348 };
1349
1350 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
1351
1352 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1353 "Current number of pv entries");
1354
1355 #ifdef PV_STATS
1356 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1357
1358 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1359 "Current number of pv entry chunks");
1360 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1361 "Current number of pv entry chunks allocated");
1362 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1363 "Current number of pv entry chunks frees");
1364 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1365 "Number of times tried to get a chunk page but failed.");
1366
1367 static long pv_entry_frees, pv_entry_allocs;
1368 static int pv_entry_spare;
1369
1370 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1371 "Current number of pv entry frees");
1372 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1373 "Current number of pv entry allocs");
1374 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1375 "Current number of spare pv entries");
1376 #endif
1377
1378 /*
1379 * We are in a serious low memory condition. Resort to
1380 * drastic measures to free some pages so we can allocate
1381 * another pv entry chunk.
1382 */
1383 static vm_page_t
1384 pmap_pv_reclaim(pmap_t locked_pmap)
1385 {
1386 struct pch newtail;
1387 struct pv_chunk *pc;
1388 pd_entry_t *pde;
1389 pmap_t pmap;
1390 pt_entry_t *pte, oldpte;
1391 pv_entry_t pv;
1392 vm_offset_t va;
1393 vm_page_t m, m_pc;
1394 u_long inuse;
1395 int bit, field, freed, idx;
1396
1397 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1398 pmap = NULL;
1399 m_pc = NULL;
1400 TAILQ_INIT(&newtail);
1401 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
1402 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1403 if (pmap != pc->pc_pmap) {
1404 if (pmap != NULL) {
1405 pmap_invalidate_all(pmap);
1406 if (pmap != locked_pmap)
1407 PMAP_UNLOCK(pmap);
1408 }
1409 pmap = pc->pc_pmap;
1410 /* Avoid deadlock and lock recursion. */
1411 if (pmap > locked_pmap)
1412 PMAP_LOCK(pmap);
1413 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
1414 pmap = NULL;
1415 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1416 continue;
1417 }
1418 }
1419
1420 /*
1421 * Destroy every non-wired, 4 KB page mapping in the chunk.
1422 */
1423 freed = 0;
1424 for (field = 0; field < _NPCM; field++) {
1425 for (inuse = ~pc->pc_map[field] & pc_freemask[field];
1426 inuse != 0; inuse &= ~(1UL << bit)) {
1427 bit = ffsl(inuse) - 1;
1428 idx = field * sizeof(inuse) * NBBY + bit;
1429 pv = &pc->pc_pventry[idx];
1430 va = pv->pv_va;
1431 pde = pmap_pde(pmap, va);
1432 KASSERT(pde != NULL && *pde != 0,
1433 ("pmap_pv_reclaim: pde"));
1434 pte = pmap_pde_to_pte(pde, va);
1435 oldpte = *pte;
1436 if (pte_test(&oldpte, PTE_W))
1437 continue;
1438 if (is_kernel_pmap(pmap))
1439 *pte = PTE_G;
1440 else
1441 *pte = 0;
1442 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(oldpte));
1443 if (pte_test(&oldpte, PTE_D))
1444 vm_page_dirty(m);
1445 if (m->md.pv_flags & PV_TABLE_REF)
1446 vm_page_aflag_set(m, PGA_REFERENCED);
1447 m->md.pv_flags &= ~PV_TABLE_REF;
1448 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1449 if (TAILQ_EMPTY(&m->md.pv_list))
1450 vm_page_aflag_clear(m, PGA_WRITEABLE);
1451 pc->pc_map[field] |= 1UL << bit;
1452 pmap_unuse_pt(pmap, va, *pde);
1453 freed++;
1454 }
1455 }
1456 if (freed == 0) {
1457 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1458 continue;
1459 }
1460 /* Every freed mapping is for a 4 KB page. */
1461 pmap->pm_stats.resident_count -= freed;
1462 PV_STAT(pv_entry_frees += freed);
1463 PV_STAT(pv_entry_spare += freed);
1464 pv_entry_count -= freed;
1465 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1466 for (field = 0; field < _NPCM; field++)
1467 if (pc->pc_map[field] != pc_freemask[field]) {
1468 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1469 pc_list);
1470 TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
1471
1472 /*
1473 * One freed pv entry in locked_pmap is
1474 * sufficient.
1475 */
1476 if (pmap == locked_pmap)
1477 goto out;
1478 break;
1479 }
1480 if (field == _NPCM) {
1481 PV_STAT(pv_entry_spare -= _NPCPV);
1482 PV_STAT(pc_chunk_count--);
1483 PV_STAT(pc_chunk_frees++);
1484 /* Entire chunk is free; return it. */
1485 m_pc = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(
1486 (vm_offset_t)pc));
1487 break;
1488 }
1489 }
1490 out:
1491 TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
1492 if (pmap != NULL) {
1493 pmap_invalidate_all(pmap);
1494 if (pmap != locked_pmap)
1495 PMAP_UNLOCK(pmap);
1496 }
1497 return (m_pc);
1498 }
1499
1500 /*
1501 * free the pv_entry back to the free list
1502 */
1503 static void
1504 free_pv_entry(pmap_t pmap, pv_entry_t pv)
1505 {
1506 struct pv_chunk *pc;
1507 int bit, field, idx;
1508
1509 rw_assert(&pvh_global_lock, RA_WLOCKED);
1510 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1511 PV_STAT(pv_entry_frees++);
1512 PV_STAT(pv_entry_spare++);
1513 pv_entry_count--;
1514 pc = pv_to_chunk(pv);
1515 idx = pv - &pc->pc_pventry[0];
1516 field = idx / (sizeof(u_long) * NBBY);
1517 bit = idx % (sizeof(u_long) * NBBY);
1518 pc->pc_map[field] |= 1ul << bit;
1519 for (idx = 0; idx < _NPCM; idx++)
1520 if (pc->pc_map[idx] != pc_freemask[idx]) {
1521 /*
1522 * 98% of the time, pc is already at the head of the
1523 * list. If it isn't already, move it to the head.
1524 */
1525 if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
1526 pc)) {
1527 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1528 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
1529 pc_list);
1530 }
1531 return;
1532 }
1533 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1534 free_pv_chunk(pc);
1535 }
1536
1537 static void
1538 free_pv_chunk(struct pv_chunk *pc)
1539 {
1540 vm_page_t m;
1541
1542 TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1543 PV_STAT(pv_entry_spare -= _NPCPV);
1544 PV_STAT(pc_chunk_count--);
1545 PV_STAT(pc_chunk_frees++);
1546 /* entire chunk is free, return it */
1547 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS((vm_offset_t)pc));
1548 vm_page_unwire(m, PQ_NONE);
1549 vm_page_free(m);
1550 }
1551
1552 /*
1553 * get a new pv_entry, allocating a block from the system
1554 * when needed.
1555 */
1556 static pv_entry_t
1557 get_pv_entry(pmap_t pmap, boolean_t try)
1558 {
1559 struct pv_chunk *pc;
1560 pv_entry_t pv;
1561 vm_page_t m;
1562 int bit, field, idx;
1563
1564 rw_assert(&pvh_global_lock, RA_WLOCKED);
1565 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1566 PV_STAT(pv_entry_allocs++);
1567 pv_entry_count++;
1568 retry:
1569 pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1570 if (pc != NULL) {
1571 for (field = 0; field < _NPCM; field++) {
1572 if (pc->pc_map[field]) {
1573 bit = ffsl(pc->pc_map[field]) - 1;
1574 break;
1575 }
1576 }
1577 if (field < _NPCM) {
1578 idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
1579 pv = &pc->pc_pventry[idx];
1580 pc->pc_map[field] &= ~(1ul << bit);
1581 /* If this was the last item, move it to tail */
1582 for (field = 0; field < _NPCM; field++)
1583 if (pc->pc_map[field] != 0) {
1584 PV_STAT(pv_entry_spare--);
1585 return (pv); /* not full, return */
1586 }
1587 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1588 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
1589 PV_STAT(pv_entry_spare--);
1590 return (pv);
1591 }
1592 }
1593 /* No free items, allocate another chunk */
1594 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, VM_ALLOC_NORMAL |
1595 VM_ALLOC_WIRED);
1596 if (m == NULL) {
1597 if (try) {
1598 pv_entry_count--;
1599 PV_STAT(pc_chunk_tryfail++);
1600 return (NULL);
1601 }
1602 m = pmap_pv_reclaim(pmap);
1603 if (m == NULL)
1604 goto retry;
1605 }
1606 PV_STAT(pc_chunk_count++);
1607 PV_STAT(pc_chunk_allocs++);
1608 pc = (struct pv_chunk *)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1609 pc->pc_pmap = pmap;
1610 pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
1611 for (field = 1; field < _NPCM; field++)
1612 pc->pc_map[field] = pc_freemask[field];
1613 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1614 pv = &pc->pc_pventry[0];
1615 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1616 PV_STAT(pv_entry_spare += _NPCPV - 1);
1617 return (pv);
1618 }
1619
1620 static pv_entry_t
1621 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1622 {
1623 pv_entry_t pv;
1624
1625 rw_assert(&pvh_global_lock, RA_WLOCKED);
1626 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1627 if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1628 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1629 break;
1630 }
1631 }
1632 return (pv);
1633 }
1634
1635 static void
1636 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1637 {
1638 pv_entry_t pv;
1639
1640 pv = pmap_pvh_remove(pvh, pmap, va);
1641 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1642 (u_long)VM_PAGE_TO_PHYS(__containerof(pvh, struct vm_page, md)),
1643 (u_long)va));
1644 free_pv_entry(pmap, pv);
1645 }
1646
1647 static void
1648 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1649 {
1650
1651 rw_assert(&pvh_global_lock, RA_WLOCKED);
1652 pmap_pvh_free(&m->md, pmap, va);
1653 if (TAILQ_EMPTY(&m->md.pv_list))
1654 vm_page_aflag_clear(m, PGA_WRITEABLE);
1655 }
1656
1657 /*
1658 * Conditionally create a pv entry.
1659 */
1660 static boolean_t
1661 pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1662 vm_page_t m)
1663 {
1664 pv_entry_t pv;
1665
1666 rw_assert(&pvh_global_lock, RA_WLOCKED);
1667 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1668 if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
1669 pv->pv_va = va;
1670 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1671 return (TRUE);
1672 } else
1673 return (FALSE);
1674 }
1675
1676 /*
1677 * pmap_remove_pte: do the things to unmap a page in a process
1678 */
1679 static int
1680 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va,
1681 pd_entry_t pde)
1682 {
1683 pt_entry_t oldpte;
1684 vm_page_t m;
1685 vm_paddr_t pa;
1686
1687 rw_assert(&pvh_global_lock, RA_WLOCKED);
1688 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1689
1690 /*
1691 * Write back all cache lines from the page being unmapped.
1692 */
1693 mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1694
1695 oldpte = *ptq;
1696 if (is_kernel_pmap(pmap))
1697 *ptq = PTE_G;
1698 else
1699 *ptq = 0;
1700
1701 if (pte_test(&oldpte, PTE_W))
1702 pmap->pm_stats.wired_count -= 1;
1703
1704 pmap->pm_stats.resident_count -= 1;
1705
1706 if (pte_test(&oldpte, PTE_MANAGED)) {
1707 pa = TLBLO_PTE_TO_PA(oldpte);
1708 m = PHYS_TO_VM_PAGE(pa);
1709 if (pte_test(&oldpte, PTE_D)) {
1710 KASSERT(!pte_test(&oldpte, PTE_RO),
1711 ("%s: modified page not writable: va: %p, pte: %#jx",
1712 __func__, (void *)va, (uintmax_t)oldpte));
1713 vm_page_dirty(m);
1714 }
1715 if (m->md.pv_flags & PV_TABLE_REF)
1716 vm_page_aflag_set(m, PGA_REFERENCED);
1717 m->md.pv_flags &= ~PV_TABLE_REF;
1718
1719 pmap_remove_entry(pmap, m, va);
1720 }
1721 return (pmap_unuse_pt(pmap, va, pde));
1722 }
1723
1724 /*
1725 * Remove a single page from a process address space
1726 */
1727 static void
1728 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1729 {
1730 pd_entry_t *pde;
1731 pt_entry_t *ptq;
1732
1733 rw_assert(&pvh_global_lock, RA_WLOCKED);
1734 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1735 pde = pmap_pde(pmap, va);
1736 if (pde == NULL || *pde == 0)
1737 return;
1738 ptq = pmap_pde_to_pte(pde, va);
1739
1740 /*
1741 * If there is no pte for this address, just skip it!
1742 */
1743 if (!pte_test(ptq, PTE_V))
1744 return;
1745
1746 (void)pmap_remove_pte(pmap, ptq, va, *pde);
1747 pmap_invalidate_page(pmap, va);
1748 }
1749
1750 /*
1751 * Remove the given range of addresses from the specified map.
1752 *
1753 * It is assumed that the start and end are properly
1754 * rounded to the page size.
1755 */
1756 void
1757 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1758 {
1759 pd_entry_t *pde, *pdpe;
1760 pt_entry_t *pte;
1761 vm_offset_t va, va_next;
1762
1763 /*
1764 * Perform an unsynchronized read. This is, however, safe.
1765 */
1766 if (pmap->pm_stats.resident_count == 0)
1767 return;
1768
1769 rw_wlock(&pvh_global_lock);
1770 PMAP_LOCK(pmap);
1771
1772 /*
1773 * special handling of removing one page. a very common operation
1774 * and easy to short circuit some code.
1775 */
1776 if ((sva + PAGE_SIZE) == eva) {
1777 pmap_remove_page(pmap, sva);
1778 goto out;
1779 }
1780 for (; sva < eva; sva = va_next) {
1781 pdpe = pmap_segmap(pmap, sva);
1782 #ifdef __mips_n64
1783 if (*pdpe == 0) {
1784 va_next = (sva + NBSEG) & ~SEGMASK;
1785 if (va_next < sva)
1786 va_next = eva;
1787 continue;
1788 }
1789 #endif
1790 va_next = (sva + NBPDR) & ~PDRMASK;
1791 if (va_next < sva)
1792 va_next = eva;
1793
1794 pde = pmap_pdpe_to_pde(pdpe, sva);
1795 if (*pde == NULL)
1796 continue;
1797
1798 /*
1799 * Limit our scan to either the end of the va represented
1800 * by the current page table page, or to the end of the
1801 * range being removed.
1802 */
1803 if (va_next > eva)
1804 va_next = eva;
1805
1806 va = va_next;
1807 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1808 sva += PAGE_SIZE) {
1809 if (!pte_test(pte, PTE_V)) {
1810 if (va != va_next) {
1811 pmap_invalidate_range(pmap, va, sva);
1812 va = va_next;
1813 }
1814 continue;
1815 }
1816 if (va == va_next)
1817 va = sva;
1818 if (pmap_remove_pte(pmap, pte, sva, *pde)) {
1819 sva += PAGE_SIZE;
1820 break;
1821 }
1822 }
1823 if (va != va_next)
1824 pmap_invalidate_range(pmap, va, sva);
1825 }
1826 out:
1827 rw_wunlock(&pvh_global_lock);
1828 PMAP_UNLOCK(pmap);
1829 }
1830
1831 /*
1832 * Routine: pmap_remove_all
1833 * Function:
1834 * Removes this physical page from
1835 * all physical maps in which it resides.
1836 * Reflects back modify bits to the pager.
1837 *
1838 * Notes:
1839 * Original versions of this routine were very
1840 * inefficient because they iteratively called
1841 * pmap_remove (slow...)
1842 */
1843
1844 void
1845 pmap_remove_all(vm_page_t m)
1846 {
1847 pv_entry_t pv;
1848 pmap_t pmap;
1849 pd_entry_t *pde;
1850 pt_entry_t *pte, tpte;
1851
1852 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1853 ("pmap_remove_all: page %p is not managed", m));
1854 rw_wlock(&pvh_global_lock);
1855
1856 if (m->md.pv_flags & PV_TABLE_REF)
1857 vm_page_aflag_set(m, PGA_REFERENCED);
1858
1859 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1860 pmap = PV_PMAP(pv);
1861 PMAP_LOCK(pmap);
1862
1863 /*
1864 * If it's last mapping writeback all caches from
1865 * the page being destroyed
1866 */
1867 if (TAILQ_NEXT(pv, pv_list) == NULL)
1868 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1869
1870 pmap->pm_stats.resident_count--;
1871
1872 pde = pmap_pde(pmap, pv->pv_va);
1873 KASSERT(pde != NULL && *pde != 0, ("pmap_remove_all: pde"));
1874 pte = pmap_pde_to_pte(pde, pv->pv_va);
1875
1876 tpte = *pte;
1877 if (is_kernel_pmap(pmap))
1878 *pte = PTE_G;
1879 else
1880 *pte = 0;
1881
1882 if (pte_test(&tpte, PTE_W))
1883 pmap->pm_stats.wired_count--;
1884
1885 /*
1886 * Update the vm_page_t clean and reference bits.
1887 */
1888 if (pte_test(&tpte, PTE_D)) {
1889 KASSERT(!pte_test(&tpte, PTE_RO),
1890 ("%s: modified page not writable: va: %p, pte: %#jx",
1891 __func__, (void *)pv->pv_va, (uintmax_t)tpte));
1892 vm_page_dirty(m);
1893 }
1894 pmap_invalidate_page(pmap, pv->pv_va);
1895
1896 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1897 pmap_unuse_pt(pmap, pv->pv_va, *pde);
1898 free_pv_entry(pmap, pv);
1899 PMAP_UNLOCK(pmap);
1900 }
1901
1902 vm_page_aflag_clear(m, PGA_WRITEABLE);
1903 m->md.pv_flags &= ~PV_TABLE_REF;
1904 rw_wunlock(&pvh_global_lock);
1905 }
1906
1907 /*
1908 * Set the physical protection on the
1909 * specified range of this map as requested.
1910 */
1911 void
1912 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1913 {
1914 pt_entry_t pbits, *pte;
1915 pd_entry_t *pde, *pdpe;
1916 vm_offset_t va, va_next;
1917 vm_paddr_t pa;
1918 vm_page_t m;
1919
1920 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1921 pmap_remove(pmap, sva, eva);
1922 return;
1923 }
1924 if (prot & VM_PROT_WRITE)
1925 return;
1926
1927 PMAP_LOCK(pmap);
1928 for (; sva < eva; sva = va_next) {
1929 pdpe = pmap_segmap(pmap, sva);
1930 #ifdef __mips_n64
1931 if (*pdpe == 0) {
1932 va_next = (sva + NBSEG) & ~SEGMASK;
1933 if (va_next < sva)
1934 va_next = eva;
1935 continue;
1936 }
1937 #endif
1938 va_next = (sva + NBPDR) & ~PDRMASK;
1939 if (va_next < sva)
1940 va_next = eva;
1941
1942 pde = pmap_pdpe_to_pde(pdpe, sva);
1943 if (*pde == NULL)
1944 continue;
1945
1946 /*
1947 * Limit our scan to either the end of the va represented
1948 * by the current page table page, or to the end of the
1949 * range being write protected.
1950 */
1951 if (va_next > eva)
1952 va_next = eva;
1953
1954 va = va_next;
1955 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1956 sva += PAGE_SIZE) {
1957 pbits = *pte;
1958 if (!pte_test(&pbits, PTE_V) || pte_test(&pbits,
1959 PTE_RO)) {
1960 if (va != va_next) {
1961 pmap_invalidate_range(pmap, va, sva);
1962 va = va_next;
1963 }
1964 continue;
1965 }
1966 pte_set(&pbits, PTE_RO);
1967 if (pte_test(&pbits, PTE_D)) {
1968 pte_clear(&pbits, PTE_D);
1969 if (pte_test(&pbits, PTE_MANAGED)) {
1970 pa = TLBLO_PTE_TO_PA(pbits);
1971 m = PHYS_TO_VM_PAGE(pa);
1972 vm_page_dirty(m);
1973 }
1974 if (va == va_next)
1975 va = sva;
1976 } else {
1977 /*
1978 * Unless PTE_D is set, any TLB entries
1979 * mapping "sva" don't allow write access, so
1980 * they needn't be invalidated.
1981 */
1982 if (va != va_next) {
1983 pmap_invalidate_range(pmap, va, sva);
1984 va = va_next;
1985 }
1986 }
1987 *pte = pbits;
1988 }
1989 if (va != va_next)
1990 pmap_invalidate_range(pmap, va, sva);
1991 }
1992 PMAP_UNLOCK(pmap);
1993 }
1994
1995 /*
1996 * Insert the given physical page (p) at
1997 * the specified virtual address (v) in the
1998 * target physical map with the protection requested.
1999 *
2000 * If specified, the page will be wired down, meaning
2001 * that the related pte can not be reclaimed.
2002 *
2003 * NB: This is the only routine which MAY NOT lazy-evaluate
2004 * or lose information. That is, this routine must actually
2005 * insert this page into the given map NOW.
2006 */
2007 int
2008 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
2009 u_int flags, int8_t psind __unused)
2010 {
2011 vm_paddr_t pa, opa;
2012 pt_entry_t *pte;
2013 pt_entry_t origpte, newpte;
2014 pv_entry_t pv;
2015 vm_page_t mpte, om;
2016
2017 va &= ~PAGE_MASK;
2018 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
2019 KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
2020 va >= kmi.clean_eva,
2021 ("pmap_enter: managed mapping within the clean submap"));
2022 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
2023 VM_OBJECT_ASSERT_LOCKED(m->object);
2024 pa = VM_PAGE_TO_PHYS(m);
2025 newpte = TLBLO_PA_TO_PFN(pa) | init_pte_prot(m, flags, prot);
2026 if ((flags & PMAP_ENTER_WIRED) != 0)
2027 newpte |= PTE_W;
2028 if (is_kernel_pmap(pmap))
2029 newpte |= PTE_G;
2030 if (is_cacheable_page(pa, m))
2031 newpte |= PTE_C_CACHE;
2032 else
2033 newpte |= PTE_C_UNCACHED;
2034
2035 mpte = NULL;
2036
2037 rw_wlock(&pvh_global_lock);
2038 PMAP_LOCK(pmap);
2039
2040 /*
2041 * In the case that a page table page is not resident, we are
2042 * creating it here.
2043 */
2044 if (va < VM_MAXUSER_ADDRESS) {
2045 mpte = pmap_allocpte(pmap, va, flags);
2046 if (mpte == NULL) {
2047 KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
2048 ("pmap_allocpte failed with sleep allowed"));
2049 rw_wunlock(&pvh_global_lock);
2050 PMAP_UNLOCK(pmap);
2051 return (KERN_RESOURCE_SHORTAGE);
2052 }
2053 }
2054 pte = pmap_pte(pmap, va);
2055
2056 /*
2057 * Page Directory table entry not valid, we need a new PT page
2058 */
2059 if (pte == NULL) {
2060 panic("pmap_enter: invalid page directory, pdir=%p, va=%p",
2061 (void *)pmap->pm_segtab, (void *)va);
2062 }
2063 om = NULL;
2064 origpte = *pte;
2065 opa = TLBLO_PTE_TO_PA(origpte);
2066
2067 /*
2068 * Mapping has not changed, must be protection or wiring change.
2069 */
2070 if (pte_test(&origpte, PTE_V) && opa == pa) {
2071 /*
2072 * Wiring change, just update stats. We don't worry about
2073 * wiring PT pages as they remain resident as long as there
2074 * are valid mappings in them. Hence, if a user page is
2075 * wired, the PT page will be also.
2076 */
2077 if (pte_test(&newpte, PTE_W) && !pte_test(&origpte, PTE_W))
2078 pmap->pm_stats.wired_count++;
2079 else if (!pte_test(&newpte, PTE_W) && pte_test(&origpte,
2080 PTE_W))
2081 pmap->pm_stats.wired_count--;
2082
2083 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO),
2084 ("%s: modified page not writable: va: %p, pte: %#jx",
2085 __func__, (void *)va, (uintmax_t)origpte));
2086
2087 /*
2088 * Remove extra pte reference
2089 */
2090 if (mpte)
2091 mpte->wire_count--;
2092
2093 if (pte_test(&origpte, PTE_MANAGED)) {
2094 m->md.pv_flags |= PV_TABLE_REF;
2095 om = m;
2096 newpte |= PTE_MANAGED;
2097 if (!pte_test(&newpte, PTE_RO))
2098 vm_page_aflag_set(m, PGA_WRITEABLE);
2099 }
2100 goto validate;
2101 }
2102
2103 pv = NULL;
2104
2105 /*
2106 * Mapping has changed, invalidate old range and fall through to
2107 * handle validating new mapping.
2108 */
2109 if (opa) {
2110 if (pte_test(&origpte, PTE_W))
2111 pmap->pm_stats.wired_count--;
2112
2113 if (pte_test(&origpte, PTE_MANAGED)) {
2114 om = PHYS_TO_VM_PAGE(opa);
2115 pv = pmap_pvh_remove(&om->md, pmap, va);
2116 }
2117 if (mpte != NULL) {
2118 mpte->wire_count--;
2119 KASSERT(mpte->wire_count > 0,
2120 ("pmap_enter: missing reference to page table page,"
2121 " va: %p", (void *)va));
2122 }
2123 } else
2124 pmap->pm_stats.resident_count++;
2125
2126 /*
2127 * Enter on the PV list if part of our managed memory.
2128 */
2129 if ((m->oflags & VPO_UNMANAGED) == 0) {
2130 m->md.pv_flags |= PV_TABLE_REF;
2131 if (pv == NULL)
2132 pv = get_pv_entry(pmap, FALSE);
2133 pv->pv_va = va;
2134 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
2135 newpte |= PTE_MANAGED;
2136 if (!pte_test(&newpte, PTE_RO))
2137 vm_page_aflag_set(m, PGA_WRITEABLE);
2138 } else if (pv != NULL)
2139 free_pv_entry(pmap, pv);
2140
2141 /*
2142 * Increment counters
2143 */
2144 if (pte_test(&newpte, PTE_W))
2145 pmap->pm_stats.wired_count++;
2146
2147 validate:
2148
2149 #ifdef PMAP_DEBUG
2150 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa);
2151 #endif
2152
2153 /*
2154 * if the mapping or permission bits are different, we need to
2155 * update the pte.
2156 */
2157 if (origpte != newpte) {
2158 *pte = newpte;
2159 if (pte_test(&origpte, PTE_V)) {
2160 if (pte_test(&origpte, PTE_MANAGED) && opa != pa) {
2161 if (om->md.pv_flags & PV_TABLE_REF)
2162 vm_page_aflag_set(om, PGA_REFERENCED);
2163 om->md.pv_flags &= ~PV_TABLE_REF;
2164 }
2165 if (pte_test(&origpte, PTE_D)) {
2166 KASSERT(!pte_test(&origpte, PTE_RO),
2167 ("pmap_enter: modified page not writable:"
2168 " va: %p, pte: %#jx", (void *)va, (uintmax_t)origpte));
2169 if (pte_test(&origpte, PTE_MANAGED))
2170 vm_page_dirty(om);
2171 }
2172 if (pte_test(&origpte, PTE_MANAGED) &&
2173 TAILQ_EMPTY(&om->md.pv_list))
2174 vm_page_aflag_clear(om, PGA_WRITEABLE);
2175 pmap_update_page(pmap, va, newpte);
2176 }
2177 }
2178
2179 /*
2180 * Sync I & D caches for executable pages. Do this only if the
2181 * target pmap belongs to the current process. Otherwise, an
2182 * unresolvable TLB miss may occur.
2183 */
2184 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
2185 (prot & VM_PROT_EXECUTE)) {
2186 mips_icache_sync_range(va, PAGE_SIZE);
2187 mips_dcache_wbinv_range(va, PAGE_SIZE);
2188 }
2189 rw_wunlock(&pvh_global_lock);
2190 PMAP_UNLOCK(pmap);
2191 return (KERN_SUCCESS);
2192 }
2193
2194 /*
2195 * this code makes some *MAJOR* assumptions:
2196 * 1. Current pmap & pmap exists.
2197 * 2. Not wired.
2198 * 3. Read access.
2199 * 4. No page table pages.
2200 * but is *MUCH* faster than pmap_enter...
2201 */
2202
2203 void
2204 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2205 {
2206
2207 rw_wlock(&pvh_global_lock);
2208 PMAP_LOCK(pmap);
2209 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
2210 rw_wunlock(&pvh_global_lock);
2211 PMAP_UNLOCK(pmap);
2212 }
2213
2214 static vm_page_t
2215 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2216 vm_prot_t prot, vm_page_t mpte)
2217 {
2218 pt_entry_t *pte;
2219 vm_paddr_t pa;
2220
2221 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2222 (m->oflags & VPO_UNMANAGED) != 0,
2223 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2224 rw_assert(&pvh_global_lock, RA_WLOCKED);
2225 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2226
2227 /*
2228 * In the case that a page table page is not resident, we are
2229 * creating it here.
2230 */
2231 if (va < VM_MAXUSER_ADDRESS) {
2232 pd_entry_t *pde;
2233 unsigned ptepindex;
2234
2235 /*
2236 * Calculate pagetable page index
2237 */
2238 ptepindex = pmap_pde_pindex(va);
2239 if (mpte && (mpte->pindex == ptepindex)) {
2240 mpte->wire_count++;
2241 } else {
2242 /*
2243 * Get the page directory entry
2244 */
2245 pde = pmap_pde(pmap, va);
2246
2247 /*
2248 * If the page table page is mapped, we just
2249 * increment the hold count, and activate it.
2250 */
2251 if (pde && *pde != 0) {
2252 mpte = PHYS_TO_VM_PAGE(
2253 MIPS_DIRECT_TO_PHYS(*pde));
2254 mpte->wire_count++;
2255 } else {
2256 mpte = _pmap_allocpte(pmap, ptepindex,
2257 PMAP_ENTER_NOSLEEP);
2258 if (mpte == NULL)
2259 return (mpte);
2260 }
2261 }
2262 } else {
2263 mpte = NULL;
2264 }
2265
2266 pte = pmap_pte(pmap, va);
2267 if (pte_test(pte, PTE_V)) {
2268 if (mpte != NULL) {
2269 mpte->wire_count--;
2270 mpte = NULL;
2271 }
2272 return (mpte);
2273 }
2274
2275 /*
2276 * Enter on the PV list if part of our managed memory.
2277 */
2278 if ((m->oflags & VPO_UNMANAGED) == 0 &&
2279 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2280 if (mpte != NULL) {
2281 pmap_unwire_ptp(pmap, va, mpte);
2282 mpte = NULL;
2283 }
2284 return (mpte);
2285 }
2286
2287 /*
2288 * Increment counters
2289 */
2290 pmap->pm_stats.resident_count++;
2291
2292 pa = VM_PAGE_TO_PHYS(m);
2293
2294 /*
2295 * Now validate mapping with RO protection
2296 */
2297 *pte = PTE_RO | TLBLO_PA_TO_PFN(pa) | PTE_V;
2298 if ((m->oflags & VPO_UNMANAGED) == 0)
2299 *pte |= PTE_MANAGED;
2300
2301 if (is_cacheable_page(pa, m))
2302 *pte |= PTE_C_CACHE;
2303 else
2304 *pte |= PTE_C_UNCACHED;
2305
2306 if (is_kernel_pmap(pmap))
2307 *pte |= PTE_G;
2308 else {
2309 /*
2310 * Sync I & D caches. Do this only if the target pmap
2311 * belongs to the current process. Otherwise, an
2312 * unresolvable TLB miss may occur. */
2313 if (pmap == &curproc->p_vmspace->vm_pmap) {
2314 va &= ~PAGE_MASK;
2315 mips_icache_sync_range(va, PAGE_SIZE);
2316 mips_dcache_wbinv_range(va, PAGE_SIZE);
2317 }
2318 }
2319 return (mpte);
2320 }
2321
2322 /*
2323 * Make a temporary mapping for a physical address. This is only intended
2324 * to be used for panic dumps.
2325 *
2326 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2327 */
2328 void *
2329 pmap_kenter_temporary(vm_paddr_t pa, int i)
2330 {
2331 vm_offset_t va;
2332
2333 if (i != 0)
2334 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2335 __func__);
2336
2337 if (MIPS_DIRECT_MAPPABLE(pa)) {
2338 va = MIPS_PHYS_TO_DIRECT(pa);
2339 } else {
2340 #ifndef __mips_n64 /* XXX : to be converted to new style */
2341 int cpu;
2342 register_t intr;
2343 struct local_sysmaps *sysm;
2344 pt_entry_t *pte, npte;
2345
2346 /* If this is used other than for dumps, we may need to leave
2347 * interrupts disasbled on return. If crash dumps don't work when
2348 * we get to this point, we might want to consider this (leaving things
2349 * disabled as a starting point ;-)
2350 */
2351 intr = intr_disable();
2352 cpu = PCPU_GET(cpuid);
2353 sysm = &sysmap_lmem[cpu];
2354 /* Since this is for the debugger, no locks or any other fun */
2355 npte = TLBLO_PA_TO_PFN(pa) | PTE_C_CACHE | PTE_D | PTE_V |
2356 PTE_G;
2357 pte = pmap_pte(kernel_pmap, sysm->base);
2358 *pte = npte;
2359 sysm->valid1 = 1;
2360 pmap_update_page(kernel_pmap, sysm->base, npte);
2361 va = sysm->base;
2362 intr_restore(intr);
2363 #endif
2364 }
2365 return ((void *)va);
2366 }
2367
2368 void
2369 pmap_kenter_temporary_free(vm_paddr_t pa)
2370 {
2371 #ifndef __mips_n64 /* XXX : to be converted to new style */
2372 int cpu;
2373 register_t intr;
2374 struct local_sysmaps *sysm;
2375 #endif
2376
2377 if (MIPS_DIRECT_MAPPABLE(pa)) {
2378 /* nothing to do for this case */
2379 return;
2380 }
2381 #ifndef __mips_n64 /* XXX : to be converted to new style */
2382 cpu = PCPU_GET(cpuid);
2383 sysm = &sysmap_lmem[cpu];
2384 if (sysm->valid1) {
2385 pt_entry_t *pte;
2386
2387 intr = intr_disable();
2388 pte = pmap_pte(kernel_pmap, sysm->base);
2389 *pte = PTE_G;
2390 pmap_invalidate_page(kernel_pmap, sysm->base);
2391 intr_restore(intr);
2392 sysm->valid1 = 0;
2393 }
2394 #endif
2395 }
2396
2397 /*
2398 * Maps a sequence of resident pages belonging to the same object.
2399 * The sequence begins with the given page m_start. This page is
2400 * mapped at the given virtual address start. Each subsequent page is
2401 * mapped at a virtual address that is offset from start by the same
2402 * amount as the page is offset from m_start within the object. The
2403 * last page in the sequence is the page with the largest offset from
2404 * m_start that can be mapped at a virtual address less than the given
2405 * virtual address end. Not every virtual page between start and end
2406 * is mapped; only those for which a resident page exists with the
2407 * corresponding offset from m_start are mapped.
2408 */
2409 void
2410 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2411 vm_page_t m_start, vm_prot_t prot)
2412 {
2413 vm_page_t m, mpte;
2414 vm_pindex_t diff, psize;
2415
2416 VM_OBJECT_ASSERT_LOCKED(m_start->object);
2417
2418 psize = atop(end - start);
2419 mpte = NULL;
2420 m = m_start;
2421 rw_wlock(&pvh_global_lock);
2422 PMAP_LOCK(pmap);
2423 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2424 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2425 prot, mpte);
2426 m = TAILQ_NEXT(m, listq);
2427 }
2428 rw_wunlock(&pvh_global_lock);
2429 PMAP_UNLOCK(pmap);
2430 }
2431
2432 /*
2433 * pmap_object_init_pt preloads the ptes for a given object
2434 * into the specified pmap. This eliminates the blast of soft
2435 * faults on process startup and immediately after an mmap.
2436 */
2437 void
2438 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2439 vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2440 {
2441 VM_OBJECT_ASSERT_WLOCKED(object);
2442 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2443 ("pmap_object_init_pt: non-device object"));
2444 }
2445
2446 /*
2447 * Clear the wired attribute from the mappings for the specified range of
2448 * addresses in the given pmap. Every valid mapping within that range
2449 * must have the wired attribute set. In contrast, invalid mappings
2450 * cannot have the wired attribute set, so they are ignored.
2451 *
2452 * The wired attribute of the page table entry is not a hardware feature,
2453 * so there is no need to invalidate any TLB entries.
2454 */
2455 void
2456 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2457 {
2458 pd_entry_t *pde, *pdpe;
2459 pt_entry_t *pte;
2460 vm_offset_t va_next;
2461
2462 PMAP_LOCK(pmap);
2463 for (; sva < eva; sva = va_next) {
2464 pdpe = pmap_segmap(pmap, sva);
2465 #ifdef __mips_n64
2466 if (*pdpe == NULL) {
2467 va_next = (sva + NBSEG) & ~SEGMASK;
2468 if (va_next < sva)
2469 va_next = eva;
2470 continue;
2471 }
2472 #endif
2473 va_next = (sva + NBPDR) & ~PDRMASK;
2474 if (va_next < sva)
2475 va_next = eva;
2476 pde = pmap_pdpe_to_pde(pdpe, sva);
2477 if (*pde == NULL)
2478 continue;
2479 if (va_next > eva)
2480 va_next = eva;
2481 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
2482 sva += PAGE_SIZE) {
2483 if (!pte_test(pte, PTE_V))
2484 continue;
2485 if (!pte_test(pte, PTE_W))
2486 panic("pmap_unwire: pte %#jx is missing PG_W",
2487 (uintmax_t)*pte);
2488 pte_clear(pte, PTE_W);
2489 pmap->pm_stats.wired_count--;
2490 }
2491 }
2492 PMAP_UNLOCK(pmap);
2493 }
2494
2495 /*
2496 * Copy the range specified by src_addr/len
2497 * from the source map to the range dst_addr/len
2498 * in the destination map.
2499 *
2500 * This routine is only advisory and need not do anything.
2501 */
2502
2503 void
2504 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2505 vm_size_t len, vm_offset_t src_addr)
2506 {
2507 }
2508
2509 /*
2510 * pmap_zero_page zeros the specified hardware page by mapping
2511 * the page into KVM and using bzero to clear its contents.
2512 *
2513 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2514 */
2515 void
2516 pmap_zero_page(vm_page_t m)
2517 {
2518 vm_offset_t va;
2519 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2520
2521 if (MIPS_DIRECT_MAPPABLE(phys)) {
2522 va = MIPS_PHYS_TO_DIRECT(phys);
2523 bzero((caddr_t)va, PAGE_SIZE);
2524 mips_dcache_wbinv_range(va, PAGE_SIZE);
2525 } else {
2526 va = pmap_lmem_map1(phys);
2527 bzero((caddr_t)va, PAGE_SIZE);
2528 mips_dcache_wbinv_range(va, PAGE_SIZE);
2529 pmap_lmem_unmap();
2530 }
2531 }
2532
2533 /*
2534 * pmap_zero_page_area zeros the specified hardware page by mapping
2535 * the page into KVM and using bzero to clear its contents.
2536 *
2537 * off and size may not cover an area beyond a single hardware page.
2538 */
2539 void
2540 pmap_zero_page_area(vm_page_t m, int off, int size)
2541 {
2542 vm_offset_t va;
2543 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2544
2545 if (MIPS_DIRECT_MAPPABLE(phys)) {
2546 va = MIPS_PHYS_TO_DIRECT(phys);
2547 bzero((char *)(caddr_t)va + off, size);
2548 mips_dcache_wbinv_range(va + off, size);
2549 } else {
2550 va = pmap_lmem_map1(phys);
2551 bzero((char *)va + off, size);
2552 mips_dcache_wbinv_range(va + off, size);
2553 pmap_lmem_unmap();
2554 }
2555 }
2556
2557 void
2558 pmap_zero_page_idle(vm_page_t m)
2559 {
2560 vm_offset_t va;
2561 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2562
2563 if (MIPS_DIRECT_MAPPABLE(phys)) {
2564 va = MIPS_PHYS_TO_DIRECT(phys);
2565 bzero((caddr_t)va, PAGE_SIZE);
2566 mips_dcache_wbinv_range(va, PAGE_SIZE);
2567 } else {
2568 va = pmap_lmem_map1(phys);
2569 bzero((caddr_t)va, PAGE_SIZE);
2570 mips_dcache_wbinv_range(va, PAGE_SIZE);
2571 pmap_lmem_unmap();
2572 }
2573 }
2574
2575 /*
2576 * pmap_copy_page copies the specified (machine independent)
2577 * page by mapping the page into virtual memory and using
2578 * bcopy to copy the page, one machine dependent page at a
2579 * time.
2580 *
2581 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2582 */
2583 void
2584 pmap_copy_page(vm_page_t src, vm_page_t dst)
2585 {
2586 vm_offset_t va_src, va_dst;
2587 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src);
2588 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst);
2589
2590 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) {
2591 /* easy case, all can be accessed via KSEG0 */
2592 /*
2593 * Flush all caches for VA that are mapped to this page
2594 * to make sure that data in SDRAM is up to date
2595 */
2596 pmap_flush_pvcache(src);
2597 mips_dcache_wbinv_range_index(
2598 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE);
2599 va_src = MIPS_PHYS_TO_DIRECT(phys_src);
2600 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst);
2601 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2602 mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2603 } else {
2604 va_src = pmap_lmem_map2(phys_src, phys_dst);
2605 va_dst = va_src + PAGE_SIZE;
2606 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2607 mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2608 pmap_lmem_unmap();
2609 }
2610 }
2611
2612 int unmapped_buf_allowed;
2613
2614 void
2615 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2616 vm_offset_t b_offset, int xfersize)
2617 {
2618 char *a_cp, *b_cp;
2619 vm_page_t a_m, b_m;
2620 vm_offset_t a_pg_offset, b_pg_offset;
2621 vm_paddr_t a_phys, b_phys;
2622 int cnt;
2623
2624 while (xfersize > 0) {
2625 a_pg_offset = a_offset & PAGE_MASK;
2626 cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2627 a_m = ma[a_offset >> PAGE_SHIFT];
2628 a_phys = VM_PAGE_TO_PHYS(a_m);
2629 b_pg_offset = b_offset & PAGE_MASK;
2630 cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2631 b_m = mb[b_offset >> PAGE_SHIFT];
2632 b_phys = VM_PAGE_TO_PHYS(b_m);
2633 if (MIPS_DIRECT_MAPPABLE(a_phys) &&
2634 MIPS_DIRECT_MAPPABLE(b_phys)) {
2635 pmap_flush_pvcache(a_m);
2636 mips_dcache_wbinv_range_index(
2637 MIPS_PHYS_TO_DIRECT(b_phys), PAGE_SIZE);
2638 a_cp = (char *)MIPS_PHYS_TO_DIRECT(a_phys) +
2639 a_pg_offset;
2640 b_cp = (char *)MIPS_PHYS_TO_DIRECT(b_phys) +
2641 b_pg_offset;
2642 bcopy(a_cp, b_cp, cnt);
2643 mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
2644 } else {
2645 a_cp = (char *)pmap_lmem_map2(a_phys, b_phys);
2646 b_cp = (char *)a_cp + PAGE_SIZE;
2647 a_cp += a_pg_offset;
2648 b_cp += b_pg_offset;
2649 bcopy(a_cp, b_cp, cnt);
2650 mips_dcache_wbinv_range((vm_offset_t)b_cp, cnt);
2651 pmap_lmem_unmap();
2652 }
2653 a_offset += cnt;
2654 b_offset += cnt;
2655 xfersize -= cnt;
2656 }
2657 }
2658
2659 vm_offset_t
2660 pmap_quick_enter_page(vm_page_t m)
2661 {
2662 #if defined(__mips_n64)
2663 return MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
2664 #else
2665 vm_paddr_t pa;
2666 struct local_sysmaps *sysm;
2667 pt_entry_t *pte;
2668
2669 pa = VM_PAGE_TO_PHYS(m);
2670
2671 if (MIPS_DIRECT_MAPPABLE(pa)) {
2672 if (m->md.pv_flags & PV_MEMATTR_UNCACHEABLE)
2673 return (MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
2674 else
2675 return (MIPS_PHYS_TO_DIRECT(pa));
2676 }
2677 critical_enter();
2678 sysm = &sysmap_lmem[PCPU_GET(cpuid)];
2679
2680 KASSERT(sysm->valid1 == 0, ("pmap_quick_enter_page: PTE busy"));
2681
2682 pte = pmap_pte(kernel_pmap, sysm->base);
2683 *pte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G |
2684 (is_cacheable_page(pa, m) ? PTE_C_CACHE : PTE_C_UNCACHED);
2685 sysm->valid1 = 1;
2686
2687 return (sysm->base);
2688 #endif
2689 }
2690
2691 void
2692 pmap_quick_remove_page(vm_offset_t addr)
2693 {
2694 mips_dcache_wbinv_range(addr, PAGE_SIZE);
2695
2696 #if !defined(__mips_n64)
2697 struct local_sysmaps *sysm;
2698 pt_entry_t *pte;
2699
2700 if (addr >= MIPS_KSEG0_START && addr < MIPS_KSEG0_END)
2701 return;
2702
2703 sysm = &sysmap_lmem[PCPU_GET(cpuid)];
2704
2705 KASSERT(sysm->valid1 != 0,
2706 ("pmap_quick_remove_page: PTE not in use"));
2707 KASSERT(sysm->base == addr,
2708 ("pmap_quick_remove_page: invalid address"));
2709
2710 pte = pmap_pte(kernel_pmap, addr);
2711 *pte = PTE_G;
2712 tlb_invalidate_address(kernel_pmap, addr);
2713 sysm->valid1 = 0;
2714 critical_exit();
2715 #endif
2716 }
2717
2718 /*
2719 * Returns true if the pmap's pv is one of the first
2720 * 16 pvs linked to from this page. This count may
2721 * be changed upwards or downwards in the future; it
2722 * is only necessary that true be returned for a small
2723 * subset of pmaps for proper page aging.
2724 */
2725 boolean_t
2726 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2727 {
2728 pv_entry_t pv;
2729 int loops = 0;
2730 boolean_t rv;
2731
2732 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2733 ("pmap_page_exists_quick: page %p is not managed", m));
2734 rv = FALSE;
2735 rw_wlock(&pvh_global_lock);
2736 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2737 if (PV_PMAP(pv) == pmap) {
2738 rv = TRUE;
2739 break;
2740 }
2741 loops++;
2742 if (loops >= 16)
2743 break;
2744 }
2745 rw_wunlock(&pvh_global_lock);
2746 return (rv);
2747 }
2748
2749 /*
2750 * Remove all pages from specified address space
2751 * this aids process exit speeds. Also, this code
2752 * is special cased for current process only, but
2753 * can have the more generic (and slightly slower)
2754 * mode enabled. This is much faster than pmap_remove
2755 * in the case of running down an entire address space.
2756 */
2757 void
2758 pmap_remove_pages(pmap_t pmap)
2759 {
2760 pd_entry_t *pde;
2761 pt_entry_t *pte, tpte;
2762 pv_entry_t pv;
2763 vm_page_t m;
2764 struct pv_chunk *pc, *npc;
2765 u_long inuse, bitmask;
2766 int allfree, bit, field, idx;
2767
2768 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2769 printf("warning: pmap_remove_pages called with non-current pmap\n");
2770 return;
2771 }
2772 rw_wlock(&pvh_global_lock);
2773 PMAP_LOCK(pmap);
2774 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2775 allfree = 1;
2776 for (field = 0; field < _NPCM; field++) {
2777 inuse = ~pc->pc_map[field] & pc_freemask[field];
2778 while (inuse != 0) {
2779 bit = ffsl(inuse) - 1;
2780 bitmask = 1UL << bit;
2781 idx = field * sizeof(inuse) * NBBY + bit;
2782 pv = &pc->pc_pventry[idx];
2783 inuse &= ~bitmask;
2784
2785 pde = pmap_pde(pmap, pv->pv_va);
2786 KASSERT(pde != NULL && *pde != 0,
2787 ("pmap_remove_pages: pde"));
2788 pte = pmap_pde_to_pte(pde, pv->pv_va);
2789 if (!pte_test(pte, PTE_V))
2790 panic("pmap_remove_pages: bad pte");
2791 tpte = *pte;
2792
2793 /*
2794 * We cannot remove wired pages from a process' mapping at this time
2795 */
2796 if (pte_test(&tpte, PTE_W)) {
2797 allfree = 0;
2798 continue;
2799 }
2800 *pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2801
2802 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2803 KASSERT(m != NULL,
2804 ("pmap_remove_pages: bad tpte %#jx",
2805 (uintmax_t)tpte));
2806
2807 /*
2808 * Update the vm_page_t clean and reference bits.
2809 */
2810 if (pte_test(&tpte, PTE_D))
2811 vm_page_dirty(m);
2812
2813 /* Mark free */
2814 PV_STAT(pv_entry_frees++);
2815 PV_STAT(pv_entry_spare++);
2816 pv_entry_count--;
2817 pc->pc_map[field] |= bitmask;
2818 pmap->pm_stats.resident_count--;
2819 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2820 if (TAILQ_EMPTY(&m->md.pv_list))
2821 vm_page_aflag_clear(m, PGA_WRITEABLE);
2822 pmap_unuse_pt(pmap, pv->pv_va, *pde);
2823 }
2824 }
2825 if (allfree) {
2826 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2827 free_pv_chunk(pc);
2828 }
2829 }
2830 pmap_invalidate_all(pmap);
2831 PMAP_UNLOCK(pmap);
2832 rw_wunlock(&pvh_global_lock);
2833 }
2834
2835 /*
2836 * pmap_testbit tests bits in pte's
2837 */
2838 static boolean_t
2839 pmap_testbit(vm_page_t m, int bit)
2840 {
2841 pv_entry_t pv;
2842 pmap_t pmap;
2843 pt_entry_t *pte;
2844 boolean_t rv = FALSE;
2845
2846 if (m->oflags & VPO_UNMANAGED)
2847 return (rv);
2848
2849 rw_assert(&pvh_global_lock, RA_WLOCKED);
2850 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2851 pmap = PV_PMAP(pv);
2852 PMAP_LOCK(pmap);
2853 pte = pmap_pte(pmap, pv->pv_va);
2854 rv = pte_test(pte, bit);
2855 PMAP_UNLOCK(pmap);
2856 if (rv)
2857 break;
2858 }
2859 return (rv);
2860 }
2861
2862 /*
2863 * pmap_page_wired_mappings:
2864 *
2865 * Return the number of managed mappings to the given physical page
2866 * that are wired.
2867 */
2868 int
2869 pmap_page_wired_mappings(vm_page_t m)
2870 {
2871 pv_entry_t pv;
2872 pmap_t pmap;
2873 pt_entry_t *pte;
2874 int count;
2875
2876 count = 0;
2877 if ((m->oflags & VPO_UNMANAGED) != 0)
2878 return (count);
2879 rw_wlock(&pvh_global_lock);
2880 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2881 pmap = PV_PMAP(pv);
2882 PMAP_LOCK(pmap);
2883 pte = pmap_pte(pmap, pv->pv_va);
2884 if (pte_test(pte, PTE_W))
2885 count++;
2886 PMAP_UNLOCK(pmap);
2887 }
2888 rw_wunlock(&pvh_global_lock);
2889 return (count);
2890 }
2891
2892 /*
2893 * Clear the write and modified bits in each of the given page's mappings.
2894 */
2895 void
2896 pmap_remove_write(vm_page_t m)
2897 {
2898 pmap_t pmap;
2899 pt_entry_t pbits, *pte;
2900 pv_entry_t pv;
2901
2902 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2903 ("pmap_remove_write: page %p is not managed", m));
2904
2905 /*
2906 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2907 * set by another thread while the object is locked. Thus,
2908 * if PGA_WRITEABLE is clear, no page table entries need updating.
2909 */
2910 VM_OBJECT_ASSERT_WLOCKED(m->object);
2911 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2912 return;
2913 rw_wlock(&pvh_global_lock);
2914 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2915 pmap = PV_PMAP(pv);
2916 PMAP_LOCK(pmap);
2917 pte = pmap_pte(pmap, pv->pv_va);
2918 KASSERT(pte != NULL && pte_test(pte, PTE_V),
2919 ("page on pv_list has no pte"));
2920 pbits = *pte;
2921 if (pte_test(&pbits, PTE_D)) {
2922 pte_clear(&pbits, PTE_D);
2923 vm_page_dirty(m);
2924 }
2925 pte_set(&pbits, PTE_RO);
2926 if (pbits != *pte) {
2927 *pte = pbits;
2928 pmap_update_page(pmap, pv->pv_va, pbits);
2929 }
2930 PMAP_UNLOCK(pmap);
2931 }
2932 vm_page_aflag_clear(m, PGA_WRITEABLE);
2933 rw_wunlock(&pvh_global_lock);
2934 }
2935
2936 /*
2937 * pmap_ts_referenced:
2938 *
2939 * Return the count of reference bits for a page, clearing all of them.
2940 */
2941 int
2942 pmap_ts_referenced(vm_page_t m)
2943 {
2944
2945 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2946 ("pmap_ts_referenced: page %p is not managed", m));
2947 if (m->md.pv_flags & PV_TABLE_REF) {
2948 rw_wlock(&pvh_global_lock);
2949 m->md.pv_flags &= ~PV_TABLE_REF;
2950 rw_wunlock(&pvh_global_lock);
2951 return (1);
2952 }
2953 return (0);
2954 }
2955
2956 /*
2957 * pmap_is_modified:
2958 *
2959 * Return whether or not the specified physical page was modified
2960 * in any physical maps.
2961 */
2962 boolean_t
2963 pmap_is_modified(vm_page_t m)
2964 {
2965 boolean_t rv;
2966
2967 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2968 ("pmap_is_modified: page %p is not managed", m));
2969
2970 /*
2971 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2972 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE
2973 * is clear, no PTEs can have PTE_D set.
2974 */
2975 VM_OBJECT_ASSERT_WLOCKED(m->object);
2976 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2977 return (FALSE);
2978 rw_wlock(&pvh_global_lock);
2979 rv = pmap_testbit(m, PTE_D);
2980 rw_wunlock(&pvh_global_lock);
2981 return (rv);
2982 }
2983
2984 /* N/C */
2985
2986 /*
2987 * pmap_is_prefaultable:
2988 *
2989 * Return whether or not the specified virtual address is elgible
2990 * for prefault.
2991 */
2992 boolean_t
2993 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2994 {
2995 pd_entry_t *pde;
2996 pt_entry_t *pte;
2997 boolean_t rv;
2998
2999 rv = FALSE;
3000 PMAP_LOCK(pmap);
3001 pde = pmap_pde(pmap, addr);
3002 if (pde != NULL && *pde != 0) {
3003 pte = pmap_pde_to_pte(pde, addr);
3004 rv = (*pte == 0);
3005 }
3006 PMAP_UNLOCK(pmap);
3007 return (rv);
3008 }
3009
3010 /*
3011 * Apply the given advice to the specified range of addresses within the
3012 * given pmap. Depending on the advice, clear the referenced and/or
3013 * modified flags in each mapping and set the mapped page's dirty field.
3014 */
3015 void
3016 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
3017 {
3018 pd_entry_t *pde, *pdpe;
3019 pt_entry_t *pte;
3020 vm_offset_t va, va_next;
3021 vm_paddr_t pa;
3022 vm_page_t m;
3023
3024 if (advice != MADV_DONTNEED && advice != MADV_FREE)
3025 return;
3026 rw_wlock(&pvh_global_lock);
3027 PMAP_LOCK(pmap);
3028 for (; sva < eva; sva = va_next) {
3029 pdpe = pmap_segmap(pmap, sva);
3030 #ifdef __mips_n64
3031 if (*pdpe == 0) {
3032 va_next = (sva + NBSEG) & ~SEGMASK;
3033 if (va_next < sva)
3034 va_next = eva;
3035 continue;
3036 }
3037 #endif
3038 va_next = (sva + NBPDR) & ~PDRMASK;
3039 if (va_next < sva)
3040 va_next = eva;
3041
3042 pde = pmap_pdpe_to_pde(pdpe, sva);
3043 if (*pde == NULL)
3044 continue;
3045
3046 /*
3047 * Limit our scan to either the end of the va represented
3048 * by the current page table page, or to the end of the
3049 * range being write protected.
3050 */
3051 if (va_next > eva)
3052 va_next = eva;
3053
3054 va = va_next;
3055 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
3056 sva += PAGE_SIZE) {
3057 if (!pte_test(pte, PTE_MANAGED | PTE_V)) {
3058 if (va != va_next) {
3059 pmap_invalidate_range(pmap, va, sva);
3060 va = va_next;
3061 }
3062 continue;
3063 }
3064 pa = TLBLO_PTE_TO_PA(*pte);
3065 m = PHYS_TO_VM_PAGE(pa);
3066 m->md.pv_flags &= ~PV_TABLE_REF;
3067 if (pte_test(pte, PTE_D)) {
3068 if (advice == MADV_DONTNEED) {
3069 /*
3070 * Future calls to pmap_is_modified()
3071 * can be avoided by making the page
3072 * dirty now.
3073 */
3074 vm_page_dirty(m);
3075 } else {
3076 pte_clear(pte, PTE_D);
3077 if (va == va_next)
3078 va = sva;
3079 }
3080 } else {
3081 /*
3082 * Unless PTE_D is set, any TLB entries
3083 * mapping "sva" don't allow write access, so
3084 * they needn't be invalidated.
3085 */
3086 if (va != va_next) {
3087 pmap_invalidate_range(pmap, va, sva);
3088 va = va_next;
3089 }
3090 }
3091 }
3092 if (va != va_next)
3093 pmap_invalidate_range(pmap, va, sva);
3094 }
3095 rw_wunlock(&pvh_global_lock);
3096 PMAP_UNLOCK(pmap);
3097 }
3098
3099 /*
3100 * Clear the modify bits on the specified physical page.
3101 */
3102 void
3103 pmap_clear_modify(vm_page_t m)
3104 {
3105 pmap_t pmap;
3106 pt_entry_t *pte;
3107 pv_entry_t pv;
3108
3109 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3110 ("pmap_clear_modify: page %p is not managed", m));
3111 VM_OBJECT_ASSERT_WLOCKED(m->object);
3112 KASSERT(!vm_page_xbusied(m),
3113 ("pmap_clear_modify: page %p is exclusive busied", m));
3114
3115 /*
3116 * If the page is not PGA_WRITEABLE, then no PTEs can have PTE_D set.
3117 * If the object containing the page is locked and the page is not
3118 * write busied, then PGA_WRITEABLE cannot be concurrently set.
3119 */
3120 if ((m->aflags & PGA_WRITEABLE) == 0)
3121 return;
3122 rw_wlock(&pvh_global_lock);
3123 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
3124 pmap = PV_PMAP(pv);
3125 PMAP_LOCK(pmap);
3126 pte = pmap_pte(pmap, pv->pv_va);
3127 if (pte_test(pte, PTE_D)) {
3128 pte_clear(pte, PTE_D);
3129 pmap_update_page(pmap, pv->pv_va, *pte);
3130 }
3131 PMAP_UNLOCK(pmap);
3132 }
3133 rw_wunlock(&pvh_global_lock);
3134 }
3135
3136 /*
3137 * pmap_is_referenced:
3138 *
3139 * Return whether or not the specified physical page was referenced
3140 * in any physical maps.
3141 */
3142 boolean_t
3143 pmap_is_referenced(vm_page_t m)
3144 {
3145
3146 KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3147 ("pmap_is_referenced: page %p is not managed", m));
3148 return ((m->md.pv_flags & PV_TABLE_REF) != 0);
3149 }
3150
3151 /*
3152 * Miscellaneous support routines follow
3153 */
3154
3155 /*
3156 * Map a set of physical memory pages into the kernel virtual
3157 * address space. Return a pointer to where it is mapped. This
3158 * routine is intended to be used for mapping device memory,
3159 * NOT real memory.
3160 *
3161 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit.
3162 */
3163 void *
3164 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
3165 {
3166 vm_offset_t va, tmpva, offset;
3167
3168 /*
3169 * KSEG1 maps only first 512M of phys address space. For
3170 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
3171 */
3172 if (MIPS_DIRECT_MAPPABLE(pa + size - 1))
3173 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
3174 else {
3175 offset = pa & PAGE_MASK;
3176 size = roundup(size + offset, PAGE_SIZE);
3177
3178 va = kva_alloc(size);
3179 if (!va)
3180 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
3181 pa = trunc_page(pa);
3182 for (tmpva = va; size > 0;) {
3183 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED);
3184 size -= PAGE_SIZE;
3185 tmpva += PAGE_SIZE;
3186 pa += PAGE_SIZE;
3187 }
3188 }
3189
3190 return ((void *)(va + offset));
3191 }
3192
3193 void
3194 pmap_unmapdev(vm_offset_t va, vm_size_t size)
3195 {
3196 #ifndef __mips_n64
3197 vm_offset_t base, offset;
3198
3199 /* If the address is within KSEG1 then there is nothing to do */
3200 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
3201 return;
3202
3203 base = trunc_page(va);
3204 offset = va & PAGE_MASK;
3205 size = roundup(size + offset, PAGE_SIZE);
3206 kva_free(base, size);
3207 #endif
3208 }
3209
3210 /*
3211 * perform the pmap work for mincore
3212 */
3213 int
3214 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3215 {
3216 pt_entry_t *ptep, pte;
3217 vm_paddr_t pa;
3218 vm_page_t m;
3219 int val;
3220
3221 PMAP_LOCK(pmap);
3222 retry:
3223 ptep = pmap_pte(pmap, addr);
3224 pte = (ptep != NULL) ? *ptep : 0;
3225 if (!pte_test(&pte, PTE_V)) {
3226 val = 0;
3227 goto out;
3228 }
3229 val = MINCORE_INCORE;
3230 if (pte_test(&pte, PTE_D))
3231 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3232 pa = TLBLO_PTE_TO_PA(pte);
3233 if (pte_test(&pte, PTE_MANAGED)) {
3234 /*
3235 * This may falsely report the given address as
3236 * MINCORE_REFERENCED. Unfortunately, due to the lack of
3237 * per-PTE reference information, it is impossible to
3238 * determine if the address is MINCORE_REFERENCED.
3239 */
3240 m = PHYS_TO_VM_PAGE(pa);
3241 if ((m->aflags & PGA_REFERENCED) != 0)
3242 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3243 }
3244 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3245 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
3246 pte_test(&pte, PTE_MANAGED)) {
3247 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3248 if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3249 goto retry;
3250 } else
3251 out:
3252 PA_UNLOCK_COND(*locked_pa);
3253 PMAP_UNLOCK(pmap);
3254 return (val);
3255 }
3256
3257 void
3258 pmap_activate(struct thread *td)
3259 {
3260 pmap_t pmap, oldpmap;
3261 struct proc *p = td->td_proc;
3262 u_int cpuid;
3263
3264 critical_enter();
3265
3266 pmap = vmspace_pmap(p->p_vmspace);
3267 oldpmap = PCPU_GET(curpmap);
3268 cpuid = PCPU_GET(cpuid);
3269
3270 if (oldpmap)
3271 CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
3272 CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
3273 pmap_asid_alloc(pmap);
3274 if (td == curthread) {
3275 PCPU_SET(segbase, pmap->pm_segtab);
3276 mips_wr_entryhi(pmap->pm_asid[cpuid].asid);
3277 }
3278
3279 PCPU_SET(curpmap, pmap);
3280 critical_exit();
3281 }
3282
3283 static void
3284 pmap_sync_icache_one(void *arg __unused)
3285 {
3286
3287 mips_icache_sync_all();
3288 mips_dcache_wbinv_all();
3289 }
3290
3291 void
3292 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
3293 {
3294
3295 smp_rendezvous(NULL, pmap_sync_icache_one, NULL, NULL);
3296 }
3297
3298 /*
3299 * Increase the starting virtual address of the given mapping if a
3300 * different alignment might result in more superpage mappings.
3301 */
3302 void
3303 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3304 vm_offset_t *addr, vm_size_t size)
3305 {
3306 vm_offset_t superpage_offset;
3307
3308 if (size < PDRSIZE)
3309 return;
3310 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
3311 offset += ptoa(object->pg_color);
3312 superpage_offset = offset & PDRMASK;
3313 if (size - ((PDRSIZE - superpage_offset) & PDRMASK) < PDRSIZE ||
3314 (*addr & PDRMASK) == superpage_offset)
3315 return;
3316 if ((*addr & PDRMASK) < superpage_offset)
3317 *addr = (*addr & ~PDRMASK) + superpage_offset;
3318 else
3319 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
3320 }
3321
3322 #ifdef DDB
3323 DB_SHOW_COMMAND(ptable, ddb_pid_dump)
3324 {
3325 pmap_t pmap;
3326 struct thread *td = NULL;
3327 struct proc *p;
3328 int i, j, k;
3329 vm_paddr_t pa;
3330 vm_offset_t va;
3331
3332 if (have_addr) {
3333 td = db_lookup_thread(addr, true);
3334 if (td == NULL) {
3335 db_printf("Invalid pid or tid");
3336 return;
3337 }
3338 p = td->td_proc;
3339 if (p->p_vmspace == NULL) {
3340 db_printf("No vmspace for process");
3341 return;
3342 }
3343 pmap = vmspace_pmap(p->p_vmspace);
3344 } else
3345 pmap = kernel_pmap;
3346
3347 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n",
3348 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid,
3349 pmap->pm_asid[0].gen);
3350 for (i = 0; i < NPDEPG; i++) {
3351 pd_entry_t *pdpe;
3352 pt_entry_t *pde;
3353 pt_entry_t pte;
3354
3355 pdpe = (pd_entry_t *)pmap->pm_segtab[i];
3356 if (pdpe == NULL)
3357 continue;
3358 db_printf("[%4d] %p\n", i, pdpe);
3359 #ifdef __mips_n64
3360 for (j = 0; j < NPDEPG; j++) {
3361 pde = (pt_entry_t *)pdpe[j];
3362 if (pde == NULL)
3363 continue;
3364 db_printf("\t[%4d] %p\n", j, pde);
3365 #else
3366 {
3367 j = 0;
3368 pde = (pt_entry_t *)pdpe;
3369 #endif
3370 for (k = 0; k < NPTEPG; k++) {
3371 pte = pde[k];
3372 if (pte == 0 || !pte_test(&pte, PTE_V))
3373 continue;
3374 pa = TLBLO_PTE_TO_PA(pte);
3375 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT);
3376 db_printf("\t\t[%04d] va: %p pte: %8jx pa:%jx\n",
3377 k, (void *)va, (uintmax_t)pte, (uintmax_t)pa);
3378 }
3379 }
3380 }
3381 }
3382 #endif
3383
3384 /*
3385 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3386 * It takes almost as much or more time to search the TLB for a
3387 * specific ASID and flush those entries as it does to flush the entire TLB.
3388 * Therefore, when we allocate a new ASID, we just take the next number. When
3389 * we run out of numbers, we flush the TLB, increment the generation count
3390 * and start over. ASID zero is reserved for kernel use.
3391 */
3392 static void
3393 pmap_asid_alloc(pmap)
3394 pmap_t pmap;
3395 {
3396 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3397 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3398 else {
3399 if (PCPU_GET(next_asid) == pmap_max_asid) {
3400 tlb_invalidate_all_user(NULL);
3401 PCPU_SET(asid_generation,
3402 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3403 if (PCPU_GET(asid_generation) == 0) {
3404 PCPU_SET(asid_generation, 1);
3405 }
3406 PCPU_SET(next_asid, 1); /* 0 means invalid */
3407 }
3408 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3409 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3410 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3411 }
3412 }
3413
3414 static pt_entry_t
3415 init_pte_prot(vm_page_t m, vm_prot_t access, vm_prot_t prot)
3416 {
3417 pt_entry_t rw;
3418
3419 if (!(prot & VM_PROT_WRITE))
3420 rw = PTE_V | PTE_RO;
3421 else if ((m->oflags & VPO_UNMANAGED) == 0) {
3422 if ((access & VM_PROT_WRITE) != 0)
3423 rw = PTE_V | PTE_D;
3424 else
3425 rw = PTE_V;
3426 } else
3427 /* Needn't emulate a modified bit for unmanaged pages. */
3428 rw = PTE_V | PTE_D;
3429 return (rw);
3430 }
3431
3432 /*
3433 * pmap_emulate_modified : do dirty bit emulation
3434 *
3435 * On SMP, update just the local TLB, other CPUs will update their
3436 * TLBs from PTE lazily, if they get the exception.
3437 * Returns 0 in case of sucess, 1 if the page is read only and we
3438 * need to fault.
3439 */
3440 int
3441 pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
3442 {
3443 pt_entry_t *pte;
3444
3445 PMAP_LOCK(pmap);
3446 pte = pmap_pte(pmap, va);
3447 if (pte == NULL)
3448 panic("pmap_emulate_modified: can't find PTE");
3449 #ifdef SMP
3450 /* It is possible that some other CPU changed m-bit */
3451 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
3452 tlb_update(pmap, va, *pte);
3453 PMAP_UNLOCK(pmap);
3454 return (0);
3455 }
3456 #else
3457 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D))
3458 panic("pmap_emulate_modified: invalid pte");
3459 #endif
3460 if (pte_test(pte, PTE_RO)) {
3461 PMAP_UNLOCK(pmap);
3462 return (1);
3463 }
3464 pte_set(pte, PTE_D);
3465 tlb_update(pmap, va, *pte);
3466 if (!pte_test(pte, PTE_MANAGED))
3467 panic("pmap_emulate_modified: unmanaged page");
3468 PMAP_UNLOCK(pmap);
3469 return (0);
3470 }
3471
3472 /*
3473 * Routine: pmap_kextract
3474 * Function:
3475 * Extract the physical page address associated
3476 * virtual address.
3477 */
3478 vm_paddr_t
3479 pmap_kextract(vm_offset_t va)
3480 {
3481 int mapped;
3482
3483 /*
3484 * First, the direct-mapped regions.
3485 */
3486 #if defined(__mips_n64)
3487 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END)
3488 return (MIPS_XKPHYS_TO_PHYS(va));
3489 #endif
3490 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END)
3491 return (MIPS_KSEG0_TO_PHYS(va));
3492
3493 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END)
3494 return (MIPS_KSEG1_TO_PHYS(va));
3495
3496 /*
3497 * User virtual addresses.
3498 */
3499 if (va < VM_MAXUSER_ADDRESS) {
3500 pt_entry_t *ptep;
3501
3502 if (curproc && curproc->p_vmspace) {
3503 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3504 if (ptep) {
3505 return (TLBLO_PTE_TO_PA(*ptep) |
3506 (va & PAGE_MASK));
3507 }
3508 return (0);
3509 }
3510 }
3511
3512 /*
3513 * Should be kernel virtual here, otherwise fail
3514 */
3515 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END);
3516 #if defined(__mips_n64)
3517 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END);
3518 #endif
3519 /*
3520 * Kernel virtual.
3521 */
3522
3523 if (mapped) {
3524 pt_entry_t *ptep;
3525
3526 /* Is the kernel pmap initialized? */
3527 if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
3528 /* It's inside the virtual address range */
3529 ptep = pmap_pte(kernel_pmap, va);
3530 if (ptep) {
3531 return (TLBLO_PTE_TO_PA(*ptep) |
3532 (va & PAGE_MASK));
3533 }
3534 }
3535 return (0);
3536 }
3537
3538 panic("%s for unknown address space %p.", __func__, (void *)va);
3539 }
3540
3541
3542 void
3543 pmap_flush_pvcache(vm_page_t m)
3544 {
3545 pv_entry_t pv;
3546
3547 if (m != NULL) {
3548 for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3549 pv = TAILQ_NEXT(pv, pv_list)) {
3550 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3551 }
3552 }
3553 }
3554
3555 void
3556 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3557 {
3558
3559 /*
3560 * It appears that this function can only be called before any mappings
3561 * for the page are established. If this ever changes, this code will
3562 * need to walk the pv_list and make each of the existing mappings
3563 * uncacheable, being careful to sync caches and PTEs (and maybe
3564 * invalidate TLB?) for any current mapping it modifies.
3565 */
3566 if (TAILQ_FIRST(&m->md.pv_list) != NULL)
3567 panic("Can't change memattr on page with existing mappings");
3568
3569 /*
3570 * The only memattr we support is UNCACHEABLE, translate the (semi-)MI
3571 * representation of that into our internal flag in the page MD struct.
3572 */
3573 if (ma == VM_MEMATTR_UNCACHEABLE)
3574 m->md.pv_flags |= PV_MEMATTR_UNCACHEABLE;
3575 else
3576 m->md.pv_flags &= ~PV_MEMATTR_UNCACHEABLE;
3577 }
Cache object: 10bddc4bfdf00d588f6eb0ed244f2944
|