FreeBSD/Linux Kernel Cross Reference
sys/mips/mips/pmap.c
1 /*
2 * Copyright (c) 1991 Regents of the University of California.
3 * All rights reserved.
4 * Copyright (c) 1994 John S. Dyson
5 * All rights reserved.
6 * Copyright (c) 1994 David Greenman
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * the Systems Programming Group of the University of Utah Computer
11 * Science Department and William Jolitz of UUNET Technologies Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
38 * from: src/sys/i386/i386/pmap.c,v 1.250.2.8 2000/11/21 00:09:14 ps
39 * JNPR: pmap.c,v 1.11.2.1 2007/08/16 11:51:06 girish
40 */
41
42 /*
43 * Manages physical address maps.
44 *
45 * In addition to hardware address maps, this
46 * module is called upon to provide software-use-only
47 * maps which may or may not be stored in the same
48 * form as hardware maps. These pseudo-maps are
49 * used to store intermediate results from copy
50 * operations to and from address spaces.
51 *
52 * Since the information managed by this module is
53 * also stored by the logical address mapping module,
54 * this module may throw away valid virtual-to-physical
55 * mappings at almost any time. However, invalidations
56 * of virtual-to-physical mappings must be done as
57 * requested.
58 *
59 * In order to cope with hardware architectures which
60 * make virtual-to-physical map invalidates expensive,
61 * this module may delay invalidate or reduced protection
62 * operations until such time as they are actually
63 * necessary. This module is given full information as
64 * to which processors are currently using which maps,
65 * and to when physical maps must be made correct.
66 */
67
68 #include <sys/cdefs.h>
69 __FBSDID("$FreeBSD: releng/8.4/sys/mips/mips/pmap.c 228256 2011-12-04 07:28:50Z alc $");
70
71 #include "opt_ddb.h"
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/proc.h>
76 #include <sys/msgbuf.h>
77 #include <sys/vmmeter.h>
78 #include <sys/mman.h>
79 #include <sys/smp.h>
80 #ifdef DDB
81 #include <ddb/ddb.h>
82 #endif
83
84 #include <vm/vm.h>
85 #include <vm/vm_param.h>
86 #include <vm/vm_phys.h>
87 #include <sys/lock.h>
88 #include <sys/mutex.h>
89 #include <vm/vm_kern.h>
90 #include <vm/vm_page.h>
91 #include <vm/vm_map.h>
92 #include <vm/vm_object.h>
93 #include <vm/vm_extern.h>
94 #include <vm/vm_pageout.h>
95 #include <vm/vm_pager.h>
96 #include <vm/uma.h>
97 #include <sys/pcpu.h>
98 #include <sys/sched.h>
99 #ifdef SMP
100 #include <sys/smp.h>
101 #endif
102
103 #include <machine/cache.h>
104 #include <machine/md_var.h>
105 #include <machine/tlb.h>
106
107 #undef PMAP_DEBUG
108
109 #ifndef PMAP_SHPGPERPROC
110 #define PMAP_SHPGPERPROC 200
111 #endif
112
113 #if !defined(DIAGNOSTIC)
114 #define PMAP_INLINE __inline
115 #else
116 #define PMAP_INLINE
117 #endif
118
119 /*
120 * Get PDEs and PTEs for user/kernel address space
121 *
122 * XXX The & for pmap_segshift() is wrong, as is the fact that it doesn't
123 * trim off gratuitous bits of the address space. By having the &
124 * there, we break defining NUSERPGTBLS below because the address space
125 * is defined such that it ends immediately after NPDEPG*NPTEPG*PAGE_SIZE,
126 * so we end up getting NUSERPGTBLS of 0.
127 */
128 #define pmap_seg_index(v) (((v) >> SEGSHIFT) & (NPDEPG - 1))
129 #define pmap_pde_index(v) (((v) >> PDRSHIFT) & (NPDEPG - 1))
130 #define pmap_pte_index(v) (((v) >> PAGE_SHIFT) & (NPTEPG - 1))
131 #define pmap_pde_pindex(v) ((v) >> PDRSHIFT)
132
133 #ifdef __mips_n64
134 #define NUPDE (NPDEPG * NPDEPG)
135 #define NUSERPGTBLS (NUPDE + NPDEPG)
136 #else
137 #define NUPDE (NPDEPG)
138 #define NUSERPGTBLS (NUPDE)
139 #endif
140
141 #define is_kernel_pmap(x) ((x) == kernel_pmap)
142
143 struct pmap kernel_pmap_store;
144 pd_entry_t *kernel_segmap;
145
146 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
147 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
148
149 static int nkpt;
150 unsigned pmap_max_asid; /* max ASID supported by the system */
151
152 #define PMAP_ASID_RESERVED 0
153
154 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
155
156 static void pmap_asid_alloc(pmap_t pmap);
157
158 /*
159 * Data for the pv entry allocation mechanism
160 */
161 static uma_zone_t pvzone;
162 static struct vm_object pvzone_obj;
163 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
164
165 static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
166 static pv_entry_t get_pv_entry(pmap_t locked_pmap);
167 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
168 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
169 vm_offset_t va);
170 static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem);
171 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
172 vm_page_t m, vm_prot_t prot, vm_page_t mpte);
173 static int pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va);
174 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
175 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va);
176 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte,
177 vm_offset_t va, vm_page_t m);
178 static void pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte);
179 static void pmap_invalidate_all(pmap_t pmap);
180 static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va);
181 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m);
182
183 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
184 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
185 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
186 static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot);
187 static vm_page_t pmap_alloc_pte_page(unsigned int index, int req);
188 static void pmap_grow_pte_page_cache(void);
189
190 #ifdef SMP
191 static void pmap_invalidate_page_action(void *arg);
192 static void pmap_invalidate_all_action(void *arg);
193 static void pmap_update_page_action(void *arg);
194 #endif
195
196 #ifndef __mips_n64
197 /*
198 * This structure is for high memory (memory above 512Meg in 32 bit)
199 * This memory area does not have direct mapping, so we a mechanism to do
200 * temporary per-CPU mapping to access these addresses.
201 *
202 * At bootup we reserve 2 virtual pages per CPU for mapping highmem pages, to
203 * access a highmem physical address on a CPU, we will disable interrupts and
204 * add the mapping from the reserved virtual address for the CPU to the physical
205 * address in the kernel pagetable.
206 */
207 struct local_sysmaps {
208 vm_offset_t base;
209 uint32_t saved_intr;
210 uint16_t valid1, valid2;
211 };
212 static struct local_sysmaps sysmap_lmem[MAXCPU];
213
214 static __inline void
215 pmap_alloc_lmem_map(void)
216 {
217 int i;
218
219 for (i = 0; i < MAXCPU; i++) {
220 sysmap_lmem[i].base = virtual_avail;
221 virtual_avail += PAGE_SIZE * 2;
222 sysmap_lmem[i].valid1 = sysmap_lmem[i].valid2 = 0;
223 }
224 }
225
226 static __inline vm_offset_t
227 pmap_lmem_map1(vm_paddr_t phys)
228 {
229 struct local_sysmaps *sysm;
230 pt_entry_t *pte, npte;
231 vm_offset_t va;
232 uint32_t intr;
233 int cpu;
234
235 intr = intr_disable();
236 cpu = PCPU_GET(cpuid);
237 sysm = &sysmap_lmem[cpu];
238 sysm->saved_intr = intr;
239 va = sysm->base;
240 npte = TLBLO_PA_TO_PFN(phys) |
241 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE;
242 pte = pmap_pte(kernel_pmap, va);
243 *pte = npte;
244 sysm->valid1 = 1;
245 return (va);
246 }
247
248 static __inline vm_offset_t
249 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
250 {
251 struct local_sysmaps *sysm;
252 pt_entry_t *pte, npte;
253 vm_offset_t va1, va2;
254 uint32_t intr;
255 int cpu;
256
257 intr = intr_disable();
258 cpu = PCPU_GET(cpuid);
259 sysm = &sysmap_lmem[cpu];
260 sysm->saved_intr = intr;
261 va1 = sysm->base;
262 va2 = sysm->base + PAGE_SIZE;
263 npte = TLBLO_PA_TO_PFN(phys1) |
264 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE;
265 pte = pmap_pte(kernel_pmap, va1);
266 *pte = npte;
267 npte = TLBLO_PA_TO_PFN(phys2) |
268 PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE;
269 pte = pmap_pte(kernel_pmap, va2);
270 *pte = npte;
271 sysm->valid1 = 1;
272 sysm->valid2 = 1;
273 return (va1);
274 }
275
276 static __inline void
277 pmap_lmem_unmap(void)
278 {
279 struct local_sysmaps *sysm;
280 pt_entry_t *pte;
281 int cpu;
282
283 cpu = PCPU_GET(cpuid);
284 sysm = &sysmap_lmem[cpu];
285 pte = pmap_pte(kernel_pmap, sysm->base);
286 *pte = PTE_G;
287 tlb_invalidate_address(kernel_pmap, sysm->base);
288 sysm->valid1 = 0;
289 if (sysm->valid2) {
290 pte = pmap_pte(kernel_pmap, sysm->base + PAGE_SIZE);
291 *pte = PTE_G;
292 tlb_invalidate_address(kernel_pmap, sysm->base + PAGE_SIZE);
293 sysm->valid2 = 0;
294 }
295 intr_restore(sysm->saved_intr);
296 }
297 #else /* __mips_n64 */
298
299 static __inline void
300 pmap_alloc_lmem_map(void)
301 {
302 }
303
304 static __inline vm_offset_t
305 pmap_lmem_map1(vm_paddr_t phys)
306 {
307
308 return (0);
309 }
310
311 static __inline vm_offset_t
312 pmap_lmem_map2(vm_paddr_t phys1, vm_paddr_t phys2)
313 {
314
315 return (0);
316 }
317
318 static __inline vm_offset_t
319 pmap_lmem_unmap(void)
320 {
321
322 return (0);
323 }
324 #endif /* !__mips_n64 */
325
326 /*
327 * Page table entry lookup routines.
328 */
329 static __inline pd_entry_t *
330 pmap_segmap(pmap_t pmap, vm_offset_t va)
331 {
332
333 return (&pmap->pm_segtab[pmap_seg_index(va)]);
334 }
335
336 #ifdef __mips_n64
337 static __inline pd_entry_t *
338 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
339 {
340 pd_entry_t *pde;
341
342 pde = (pd_entry_t *)*pdpe;
343 return (&pde[pmap_pde_index(va)]);
344 }
345
346 static __inline pd_entry_t *
347 pmap_pde(pmap_t pmap, vm_offset_t va)
348 {
349 pd_entry_t *pdpe;
350
351 pdpe = pmap_segmap(pmap, va);
352 if (pdpe == NULL || *pdpe == NULL)
353 return (NULL);
354
355 return (pmap_pdpe_to_pde(pdpe, va));
356 }
357 #else
358 static __inline pd_entry_t *
359 pmap_pdpe_to_pde(pd_entry_t *pdpe, vm_offset_t va)
360 {
361
362 return (pdpe);
363 }
364
365 static __inline
366 pd_entry_t *pmap_pde(pmap_t pmap, vm_offset_t va)
367 {
368
369 return (pmap_segmap(pmap, va));
370 }
371 #endif
372
373 static __inline pt_entry_t *
374 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
375 {
376 pt_entry_t *pte;
377
378 pte = (pt_entry_t *)*pde;
379 return (&pte[pmap_pte_index(va)]);
380 }
381
382 pt_entry_t *
383 pmap_pte(pmap_t pmap, vm_offset_t va)
384 {
385 pd_entry_t *pde;
386
387 pde = pmap_pde(pmap, va);
388 if (pde == NULL || *pde == NULL)
389 return (NULL);
390
391 return (pmap_pde_to_pte(pde, va));
392 }
393
394 vm_offset_t
395 pmap_steal_memory(vm_size_t size)
396 {
397 vm_size_t bank_size;
398 vm_offset_t pa, va;
399
400 size = round_page(size);
401
402 bank_size = phys_avail[1] - phys_avail[0];
403 while (size > bank_size) {
404 int i;
405
406 for (i = 0; phys_avail[i + 2]; i += 2) {
407 phys_avail[i] = phys_avail[i + 2];
408 phys_avail[i + 1] = phys_avail[i + 3];
409 }
410 phys_avail[i] = 0;
411 phys_avail[i + 1] = 0;
412 if (!phys_avail[0])
413 panic("pmap_steal_memory: out of memory");
414 bank_size = phys_avail[1] - phys_avail[0];
415 }
416
417 pa = phys_avail[0];
418 phys_avail[0] += size;
419 if (MIPS_DIRECT_MAPPABLE(pa) == 0)
420 panic("Out of memory below 512Meg?");
421 va = MIPS_PHYS_TO_DIRECT(pa);
422 bzero((caddr_t)va, size);
423 return (va);
424 }
425
426 /*
427 * Bootstrap the system enough to run with virtual memory. This
428 * assumes that the phys_avail array has been initialized.
429 */
430 static void
431 pmap_create_kernel_pagetable(void)
432 {
433 int i, j;
434 vm_offset_t ptaddr;
435 pt_entry_t *pte;
436 #ifdef __mips_n64
437 pd_entry_t *pde;
438 vm_offset_t pdaddr;
439 int npt, npde;
440 #endif
441
442 /*
443 * Allocate segment table for the kernel
444 */
445 kernel_segmap = (pd_entry_t *)pmap_steal_memory(PAGE_SIZE);
446
447 /*
448 * Allocate second level page tables for the kernel
449 */
450 #ifdef __mips_n64
451 npde = howmany(NKPT, NPDEPG);
452 pdaddr = pmap_steal_memory(PAGE_SIZE * npde);
453 #endif
454 nkpt = NKPT;
455 ptaddr = pmap_steal_memory(PAGE_SIZE * nkpt);
456
457 /*
458 * The R[4-7]?00 stores only one copy of the Global bit in the
459 * translation lookaside buffer for each 2 page entry. Thus invalid
460 * entrys must have the Global bit set so when Entry LO and Entry HI
461 * G bits are anded together they will produce a global bit to store
462 * in the tlb.
463 */
464 for (i = 0, pte = (pt_entry_t *)ptaddr; i < (nkpt * NPTEPG); i++, pte++)
465 *pte = PTE_G;
466
467 #ifdef __mips_n64
468 for (i = 0, npt = nkpt; npt > 0; i++) {
469 kernel_segmap[i] = (pd_entry_t)(pdaddr + i * PAGE_SIZE);
470 pde = (pd_entry_t *)kernel_segmap[i];
471
472 for (j = 0; j < NPDEPG && npt > 0; j++, npt--)
473 pde[j] = (pd_entry_t)(ptaddr + (i * NPDEPG + j) * PAGE_SIZE);
474 }
475 #else
476 for (i = 0, j = pmap_seg_index(VM_MIN_KERNEL_ADDRESS); i < nkpt; i++, j++)
477 kernel_segmap[j] = (pd_entry_t)(ptaddr + (i * PAGE_SIZE));
478 #endif
479
480 PMAP_LOCK_INIT(kernel_pmap);
481 kernel_pmap->pm_segtab = kernel_segmap;
482 kernel_pmap->pm_active = ~0;
483 TAILQ_INIT(&kernel_pmap->pm_pvlist);
484 kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
485 kernel_pmap->pm_asid[0].gen = 0;
486 kernel_vm_end += nkpt * NPTEPG * PAGE_SIZE;
487 }
488
489 void
490 pmap_bootstrap(void)
491 {
492 int i;
493 int need_local_mappings = 0;
494
495 /* Sort. */
496 again:
497 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
498 /*
499 * Keep the memory aligned on page boundary.
500 */
501 phys_avail[i] = round_page(phys_avail[i]);
502 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
503
504 if (i < 2)
505 continue;
506 if (phys_avail[i - 2] > phys_avail[i]) {
507 vm_paddr_t ptemp[2];
508
509 ptemp[0] = phys_avail[i + 0];
510 ptemp[1] = phys_avail[i + 1];
511
512 phys_avail[i + 0] = phys_avail[i - 2];
513 phys_avail[i + 1] = phys_avail[i - 1];
514
515 phys_avail[i - 2] = ptemp[0];
516 phys_avail[i - 1] = ptemp[1];
517 goto again;
518 }
519 }
520
521 /*
522 * In 32 bit, we may have memory which cannot be mapped directly
523 * this memory will need temporary mapping before it can be
524 * accessed.
525 */
526 if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1]))
527 need_local_mappings = 1;
528
529 /*
530 * Copy the phys_avail[] array before we start stealing memory from it.
531 */
532 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
533 physmem_desc[i] = phys_avail[i];
534 physmem_desc[i + 1] = phys_avail[i + 1];
535 }
536
537 Maxmem = atop(phys_avail[i - 1]);
538
539 if (bootverbose) {
540 printf("Physical memory chunk(s):\n");
541 for (i = 0; phys_avail[i + 1] != 0; i += 2) {
542 vm_paddr_t size;
543
544 size = phys_avail[i + 1] - phys_avail[i];
545 printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n",
546 (uintmax_t) phys_avail[i],
547 (uintmax_t) phys_avail[i + 1] - 1,
548 (uintmax_t) size, (uintmax_t) size / PAGE_SIZE);
549 }
550 printf("Maxmem is 0x%0lx\n", ptoa(Maxmem));
551 }
552 /*
553 * Steal the message buffer from the beginning of memory.
554 */
555 msgbufp = (struct msgbuf *)pmap_steal_memory(msgbufsize);
556 msgbufinit(msgbufp, msgbufsize);
557
558 /*
559 * Steal thread0 kstack.
560 */
561 kstack0 = pmap_steal_memory(KSTACK_PAGES << PAGE_SHIFT);
562
563 virtual_avail = VM_MIN_KERNEL_ADDRESS;
564 virtual_end = VM_MAX_KERNEL_ADDRESS;
565
566 #ifdef SMP
567 /*
568 * Steal some virtual address space to map the pcpu area.
569 */
570 virtual_avail = roundup2(virtual_avail, PAGE_SIZE * 2);
571 pcpup = (struct pcpu *)virtual_avail;
572 virtual_avail += PAGE_SIZE * 2;
573
574 /*
575 * Initialize the wired TLB entry mapping the pcpu region for
576 * the BSP at 'pcpup'. Up until this point we were operating
577 * with the 'pcpup' for the BSP pointing to a virtual address
578 * in KSEG0 so there was no need for a TLB mapping.
579 */
580 mips_pcpu_tlb_init(PCPU_ADDR(0));
581
582 if (bootverbose)
583 printf("pcpu is available at virtual address %p.\n", pcpup);
584 #endif
585
586 if (need_local_mappings)
587 pmap_alloc_lmem_map();
588 pmap_create_kernel_pagetable();
589 pmap_max_asid = VMNUM_PIDS;
590 mips_wr_entryhi(0);
591 mips_wr_pagemask(0);
592 }
593
594 /*
595 * Initialize a vm_page's machine-dependent fields.
596 */
597 void
598 pmap_page_init(vm_page_t m)
599 {
600
601 TAILQ_INIT(&m->md.pv_list);
602 m->md.pv_list_count = 0;
603 m->md.pv_flags = 0;
604 }
605
606 /*
607 * Initialize the pmap module.
608 * Called by vm_init, to initialize any structures that the pmap
609 * system needs to map virtual memory.
610 * pmap_init has been enhanced to support in a fairly consistant
611 * way, discontiguous physical memory.
612 */
613 void
614 pmap_init(void)
615 {
616
617 /*
618 * Initialize the address space (zone) for the pv entries. Set a
619 * high water mark so that the system can recover from excessive
620 * numbers of pv entries.
621 */
622 pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
623 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
624 pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count;
625 pv_entry_high_water = 9 * (pv_entry_max / 10);
626 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
627 }
628
629 /***************************************************
630 * Low level helper routines.....
631 ***************************************************/
632
633 static __inline void
634 pmap_invalidate_all_local(pmap_t pmap)
635 {
636
637 if (pmap == kernel_pmap) {
638 tlb_invalidate_all();
639 return;
640 }
641 if (pmap->pm_active & PCPU_GET(cpumask))
642 tlb_invalidate_all_user(pmap);
643 else
644 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
645 }
646
647 #ifdef SMP
648 static void
649 pmap_invalidate_all(pmap_t pmap)
650 {
651
652 smp_rendezvous(0, pmap_invalidate_all_action, 0, pmap);
653 }
654
655 static void
656 pmap_invalidate_all_action(void *arg)
657 {
658
659 pmap_invalidate_all_local((pmap_t)arg);
660 }
661 #else
662 static void
663 pmap_invalidate_all(pmap_t pmap)
664 {
665
666 pmap_invalidate_all_local(pmap);
667 }
668 #endif
669
670 static __inline void
671 pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va)
672 {
673
674 if (is_kernel_pmap(pmap)) {
675 tlb_invalidate_address(pmap, va);
676 return;
677 }
678 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
679 return;
680 else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
681 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
682 return;
683 }
684 tlb_invalidate_address(pmap, va);
685 }
686
687 #ifdef SMP
688 struct pmap_invalidate_page_arg {
689 pmap_t pmap;
690 vm_offset_t va;
691 };
692
693 static void
694 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
695 {
696 struct pmap_invalidate_page_arg arg;
697
698 arg.pmap = pmap;
699 arg.va = va;
700 smp_rendezvous(0, pmap_invalidate_page_action, 0, &arg);
701 }
702
703 static void
704 pmap_invalidate_page_action(void *arg)
705 {
706 struct pmap_invalidate_page_arg *p = arg;
707
708 pmap_invalidate_page_local(p->pmap, p->va);
709 }
710 #else
711 static void
712 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
713 {
714
715 pmap_invalidate_page_local(pmap, va);
716 }
717 #endif
718
719 static __inline void
720 pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
721 {
722
723 if (is_kernel_pmap(pmap)) {
724 tlb_update(pmap, va, pte);
725 return;
726 }
727 if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
728 return;
729 else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
730 pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
731 return;
732 }
733 tlb_update(pmap, va, pte);
734 }
735
736 #ifdef SMP
737 struct pmap_update_page_arg {
738 pmap_t pmap;
739 vm_offset_t va;
740 pt_entry_t pte;
741 };
742
743 static void
744 pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
745 {
746 struct pmap_update_page_arg arg;
747
748 arg.pmap = pmap;
749 arg.va = va;
750 arg.pte = pte;
751 smp_rendezvous(0, pmap_update_page_action, 0, &arg);
752 }
753
754 static void
755 pmap_update_page_action(void *arg)
756 {
757 struct pmap_update_page_arg *p = arg;
758
759 pmap_update_page_local(p->pmap, p->va, p->pte);
760 }
761 #else
762 static void
763 pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
764 {
765
766 pmap_update_page_local(pmap, va, pte);
767 }
768 #endif
769
770 /*
771 * Routine: pmap_extract
772 * Function:
773 * Extract the physical page address associated
774 * with the given map/virtual_address pair.
775 */
776 vm_paddr_t
777 pmap_extract(pmap_t pmap, vm_offset_t va)
778 {
779 pt_entry_t *pte;
780 vm_offset_t retval = 0;
781
782 PMAP_LOCK(pmap);
783 pte = pmap_pte(pmap, va);
784 if (pte) {
785 retval = TLBLO_PTE_TO_PA(*pte) | (va & PAGE_MASK);
786 }
787 PMAP_UNLOCK(pmap);
788 return (retval);
789 }
790
791 /*
792 * Routine: pmap_extract_and_hold
793 * Function:
794 * Atomically extract and hold the physical page
795 * with the given pmap and virtual address pair
796 * if that mapping permits the given protection.
797 */
798 vm_page_t
799 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
800 {
801 pt_entry_t pte;
802 vm_page_t m;
803
804 m = NULL;
805 vm_page_lock_queues();
806 PMAP_LOCK(pmap);
807 pte = *pmap_pte(pmap, va);
808 if (pte != 0 && pte_test(&pte, PTE_V) &&
809 (pte_test(&pte, PTE_D) || (prot & VM_PROT_WRITE) == 0)) {
810 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(pte));
811 vm_page_hold(m);
812 }
813 vm_page_unlock_queues();
814 PMAP_UNLOCK(pmap);
815 return (m);
816 }
817
818 /***************************************************
819 * Low level mapping routines.....
820 ***************************************************/
821
822 /*
823 * add a wired page to the kva
824 */
825 void
826 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int attr)
827 {
828 pt_entry_t *pte;
829 pt_entry_t opte, npte;
830
831 #ifdef PMAP_DEBUG
832 printf("pmap_kenter: va: %p -> pa: %p\n", (void *)va, (void *)pa);
833 #endif
834 npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W | attr;
835
836 pte = pmap_pte(kernel_pmap, va);
837 opte = *pte;
838 *pte = npte;
839 if (pte_test(&opte, PTE_V) && opte != npte)
840 pmap_update_page(kernel_pmap, va, npte);
841 }
842
843 void
844 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
845 {
846
847 KASSERT(is_cacheable_mem(pa),
848 ("pmap_kenter: memory at 0x%lx is not cacheable", (u_long)pa));
849
850 pmap_kenter_attr(va, pa, PTE_C_CACHE);
851 }
852
853 /*
854 * remove a page from the kernel pagetables
855 */
856 /* PMAP_INLINE */ void
857 pmap_kremove(vm_offset_t va)
858 {
859 pt_entry_t *pte;
860
861 /*
862 * Write back all caches from the page being destroyed
863 */
864 mips_dcache_wbinv_range_index(va, PAGE_SIZE);
865
866 pte = pmap_pte(kernel_pmap, va);
867 *pte = PTE_G;
868 pmap_invalidate_page(kernel_pmap, va);
869 }
870
871 /*
872 * Used to map a range of physical addresses into kernel
873 * virtual address space.
874 *
875 * The value passed in '*virt' is a suggested virtual address for
876 * the mapping. Architectures which can support a direct-mapped
877 * physical to virtual region can return the appropriate address
878 * within that region, leaving '*virt' unchanged. Other
879 * architectures should map the pages starting at '*virt' and
880 * update '*virt' with the first usable address after the mapped
881 * region.
882 *
883 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
884 */
885 vm_offset_t
886 pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
887 {
888 vm_offset_t va, sva;
889
890 if (MIPS_DIRECT_MAPPABLE(end))
891 return (MIPS_PHYS_TO_DIRECT(start));
892
893 va = sva = *virt;
894 while (start < end) {
895 pmap_kenter(va, start);
896 va += PAGE_SIZE;
897 start += PAGE_SIZE;
898 }
899 *virt = va;
900 return (sva);
901 }
902
903 /*
904 * Add a list of wired pages to the kva
905 * this routine is only used for temporary
906 * kernel mappings that do not need to have
907 * page modification or references recorded.
908 * Note that old mappings are simply written
909 * over. The page *must* be wired.
910 */
911 void
912 pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
913 {
914 int i;
915 vm_offset_t origva = va;
916
917 for (i = 0; i < count; i++) {
918 pmap_flush_pvcache(m[i]);
919 pmap_kenter(va, VM_PAGE_TO_PHYS(m[i]));
920 va += PAGE_SIZE;
921 }
922
923 mips_dcache_wbinv_range_index(origva, PAGE_SIZE*count);
924 }
925
926 /*
927 * this routine jerks page mappings from the
928 * kernel -- it is meant only for temporary mappings.
929 */
930 void
931 pmap_qremove(vm_offset_t va, int count)
932 {
933 /*
934 * No need to wb/inv caches here,
935 * pmap_kremove will do it for us
936 */
937
938 while (count-- > 0) {
939 pmap_kremove(va);
940 va += PAGE_SIZE;
941 }
942 }
943
944 /***************************************************
945 * Page table page management routines.....
946 ***************************************************/
947
948 /* Revision 1.507
949 *
950 * Simplify the reference counting of page table pages. Specifically, use
951 * the page table page's wired count rather than its hold count to contain
952 * the reference count.
953 */
954
955 /*
956 * This routine unholds page table pages, and if the hold count
957 * drops to zero, then it decrements the wire count.
958 */
959 static PMAP_INLINE int
960 pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
961 {
962 --m->wire_count;
963 if (m->wire_count == 0)
964 return (_pmap_unwire_pte_hold(pmap, va, m));
965 else
966 return (0);
967 }
968
969 static int
970 _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
971 {
972 pd_entry_t *pde;
973
974 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
975 /*
976 * unmap the page table page
977 */
978 #ifdef __mips_n64
979 if (m->pindex < NUPDE)
980 pde = pmap_pde(pmap, va);
981 else
982 pde = pmap_segmap(pmap, va);
983 #else
984 pde = pmap_pde(pmap, va);
985 #endif
986 *pde = 0;
987 pmap->pm_stats.resident_count--;
988
989 #ifdef __mips_n64
990 if (m->pindex < NUPDE) {
991 pd_entry_t *pdp;
992 vm_page_t pdpg;
993
994 /*
995 * Recursively decrement next level pagetable refcount
996 */
997 pdp = (pd_entry_t *)*pmap_segmap(pmap, va);
998 pdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pdp));
999 pmap_unwire_pte_hold(pmap, va, pdpg);
1000 }
1001 #endif
1002 if (pmap->pm_ptphint == m)
1003 pmap->pm_ptphint = NULL;
1004
1005 /*
1006 * If the page is finally unwired, simply free it.
1007 */
1008 vm_page_free_zero(m);
1009 atomic_subtract_int(&cnt.v_wire_count, 1);
1010 return (1);
1011 }
1012
1013 /*
1014 * After removing a page table entry, this routine is used to
1015 * conditionally free the page, and manage the hold/wire counts.
1016 */
1017 static int
1018 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
1019 {
1020 unsigned ptepindex;
1021 pd_entry_t pteva;
1022
1023 if (va >= VM_MAXUSER_ADDRESS)
1024 return (0);
1025
1026 if (mpte == NULL) {
1027 ptepindex = pmap_pde_pindex(va);
1028 if (pmap->pm_ptphint &&
1029 (pmap->pm_ptphint->pindex == ptepindex)) {
1030 mpte = pmap->pm_ptphint;
1031 } else {
1032 pteva = *pmap_pde(pmap, va);
1033 mpte = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(pteva));
1034 pmap->pm_ptphint = mpte;
1035 }
1036 }
1037 return (pmap_unwire_pte_hold(pmap, va, mpte));
1038 }
1039
1040 void
1041 pmap_pinit0(pmap_t pmap)
1042 {
1043 int i;
1044
1045 PMAP_LOCK_INIT(pmap);
1046 pmap->pm_segtab = kernel_segmap;
1047 pmap->pm_active = 0;
1048 pmap->pm_ptphint = NULL;
1049 for (i = 0; i < MAXCPU; i++) {
1050 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1051 pmap->pm_asid[i].gen = 0;
1052 }
1053 PCPU_SET(curpmap, pmap);
1054 TAILQ_INIT(&pmap->pm_pvlist);
1055 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1056 }
1057
1058 static void
1059 pmap_grow_pte_page_cache()
1060 {
1061
1062 #ifdef __mips_n64
1063 vm_contig_grow_cache(3, 0, MIPS_XKPHYS_LARGEST_PHYS);
1064 #else
1065 vm_contig_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS);
1066 #endif
1067 }
1068
1069 static vm_page_t
1070 pmap_alloc_pte_page(unsigned int index, int req)
1071 {
1072 vm_page_t m;
1073
1074 m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, req);
1075 if (m == NULL)
1076 return (NULL);
1077
1078 if ((m->flags & PG_ZERO) == 0)
1079 pmap_zero_page(m);
1080
1081 m->pindex = index;
1082 atomic_add_int(&cnt.v_wire_count, 1);
1083 m->wire_count = 1;
1084 return (m);
1085 }
1086
1087 /*
1088 * Initialize a preallocated and zeroed pmap structure,
1089 * such as one in a vmspace structure.
1090 */
1091 int
1092 pmap_pinit(pmap_t pmap)
1093 {
1094 vm_offset_t ptdva;
1095 vm_page_t ptdpg;
1096 int i;
1097
1098 PMAP_LOCK_INIT(pmap);
1099
1100 /*
1101 * allocate the page directory page
1102 */
1103 while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL)
1104 pmap_grow_pte_page_cache();
1105
1106 ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
1107 pmap->pm_segtab = (pd_entry_t *)ptdva;
1108 pmap->pm_active = 0;
1109 pmap->pm_ptphint = NULL;
1110 for (i = 0; i < MAXCPU; i++) {
1111 pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
1112 pmap->pm_asid[i].gen = 0;
1113 }
1114 TAILQ_INIT(&pmap->pm_pvlist);
1115 bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
1116
1117 return (1);
1118 }
1119
1120 /*
1121 * this routine is called if the page table page is not
1122 * mapped correctly.
1123 */
1124 static vm_page_t
1125 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
1126 {
1127 vm_offset_t pageva;
1128 vm_page_t m;
1129
1130 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1131 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1132 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1133
1134 /*
1135 * Find or fabricate a new pagetable page
1136 */
1137 if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) {
1138 if (flags & M_WAITOK) {
1139 PMAP_UNLOCK(pmap);
1140 vm_page_unlock_queues();
1141 pmap_grow_pte_page_cache();
1142 vm_page_lock_queues();
1143 PMAP_LOCK(pmap);
1144 }
1145
1146 /*
1147 * Indicate the need to retry. While waiting, the page
1148 * table page may have been allocated.
1149 */
1150 return (NULL);
1151 }
1152
1153 /*
1154 * Map the pagetable page into the process address space, if it
1155 * isn't already there.
1156 */
1157 pageva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m));
1158
1159 #ifdef __mips_n64
1160 if (ptepindex >= NUPDE) {
1161 pmap->pm_segtab[ptepindex - NUPDE] = (pd_entry_t)pageva;
1162 } else {
1163 pd_entry_t *pdep, *pde;
1164 int segindex = ptepindex >> (SEGSHIFT - PDRSHIFT);
1165 int pdeindex = ptepindex & (NPDEPG - 1);
1166 vm_page_t pg;
1167
1168 pdep = &pmap->pm_segtab[segindex];
1169 if (*pdep == NULL) {
1170 /* recurse for allocating page dir */
1171 if (_pmap_allocpte(pmap, NUPDE + segindex,
1172 flags) == NULL) {
1173 /* alloc failed, release current */
1174 --m->wire_count;
1175 atomic_subtract_int(&cnt.v_wire_count, 1);
1176 vm_page_free_zero(m);
1177 return (NULL);
1178 }
1179 } else {
1180 pg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pdep));
1181 pg->wire_count++;
1182 }
1183 /* Next level entry */
1184 pde = (pd_entry_t *)*pdep;
1185 pde[pdeindex] = (pd_entry_t)pageva;
1186 pmap->pm_ptphint = m;
1187 }
1188 #else
1189 pmap->pm_segtab[ptepindex] = (pd_entry_t)pageva;
1190 #endif
1191 pmap->pm_stats.resident_count++;
1192
1193 /*
1194 * Set the page table hint
1195 */
1196 pmap->pm_ptphint = m;
1197 return (m);
1198 }
1199
1200 static vm_page_t
1201 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
1202 {
1203 unsigned ptepindex;
1204 pd_entry_t *pde;
1205 vm_page_t m;
1206
1207 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
1208 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
1209 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
1210
1211 /*
1212 * Calculate pagetable page index
1213 */
1214 ptepindex = pmap_pde_pindex(va);
1215 retry:
1216 /*
1217 * Get the page directory entry
1218 */
1219 pde = pmap_pde(pmap, va);
1220
1221 /*
1222 * If the page table page is mapped, we just increment the hold
1223 * count, and activate it.
1224 */
1225 if (pde != NULL && *pde != NULL) {
1226 /*
1227 * In order to get the page table page, try the hint first.
1228 */
1229 if (pmap->pm_ptphint &&
1230 (pmap->pm_ptphint->pindex == ptepindex)) {
1231 m = pmap->pm_ptphint;
1232 } else {
1233 m = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(*pde));
1234 pmap->pm_ptphint = m;
1235 }
1236 m->wire_count++;
1237 } else {
1238 /*
1239 * Here if the pte page isn't mapped, or if it has been
1240 * deallocated.
1241 */
1242 m = _pmap_allocpte(pmap, ptepindex, flags);
1243 if (m == NULL && (flags & M_WAITOK))
1244 goto retry;
1245 }
1246 return (m);
1247 }
1248
1249
1250 /***************************************************
1251 * Pmap allocation/deallocation routines.
1252 ***************************************************/
1253 /*
1254 * Revision 1.397
1255 * - Merged pmap_release and pmap_release_free_page. When pmap_release is
1256 * called only the page directory page(s) can be left in the pmap pte
1257 * object, since all page table pages will have been freed by
1258 * pmap_remove_pages and pmap_remove. In addition, there can only be one
1259 * reference to the pmap and the page directory is wired, so the page(s)
1260 * can never be busy. So all there is to do is clear the magic mappings
1261 * from the page directory and free the page(s).
1262 */
1263
1264
1265 /*
1266 * Release any resources held by the given physical map.
1267 * Called when a pmap initialized by pmap_pinit is being released.
1268 * Should only be called if the map contains no valid mappings.
1269 */
1270 void
1271 pmap_release(pmap_t pmap)
1272 {
1273 vm_offset_t ptdva;
1274 vm_page_t ptdpg;
1275
1276 KASSERT(pmap->pm_stats.resident_count == 0,
1277 ("pmap_release: pmap resident count %ld != 0",
1278 pmap->pm_stats.resident_count));
1279
1280 ptdva = (vm_offset_t)pmap->pm_segtab;
1281 ptdpg = PHYS_TO_VM_PAGE(MIPS_DIRECT_TO_PHYS(ptdva));
1282
1283 ptdpg->wire_count--;
1284 atomic_subtract_int(&cnt.v_wire_count, 1);
1285 vm_page_free_zero(ptdpg);
1286 PMAP_LOCK_DESTROY(pmap);
1287 }
1288
1289 /*
1290 * grow the number of kernel page table entries, if needed
1291 */
1292 void
1293 pmap_growkernel(vm_offset_t addr)
1294 {
1295 vm_page_t nkpg;
1296 pd_entry_t *pde, *pdpe;
1297 pt_entry_t *pte;
1298 int i;
1299
1300 mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1301 addr = roundup2(addr, NBSEG);
1302 if (addr - 1 >= kernel_map->max_offset)
1303 addr = kernel_map->max_offset;
1304 while (kernel_vm_end < addr) {
1305 pdpe = pmap_segmap(kernel_pmap, kernel_vm_end);
1306 #ifdef __mips_n64
1307 if (*pdpe == 0) {
1308 /* new intermediate page table entry */
1309 nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT);
1310 if (nkpg == NULL)
1311 panic("pmap_growkernel: no memory to grow kernel");
1312 *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1313 continue; /* try again */
1314 }
1315 #endif
1316 pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
1317 if (*pde != 0) {
1318 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1319 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1320 kernel_vm_end = kernel_map->max_offset;
1321 break;
1322 }
1323 continue;
1324 }
1325
1326 /*
1327 * This index is bogus, but out of the way
1328 */
1329 nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT);
1330 if (!nkpg)
1331 panic("pmap_growkernel: no memory to grow kernel");
1332 nkpt++;
1333 *pde = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg));
1334
1335 /*
1336 * The R[4-7]?00 stores only one copy of the Global bit in
1337 * the translation lookaside buffer for each 2 page entry.
1338 * Thus invalid entrys must have the Global bit set so when
1339 * Entry LO and Entry HI G bits are anded together they will
1340 * produce a global bit to store in the tlb.
1341 */
1342 pte = (pt_entry_t *)*pde;
1343 for (i = 0; i < NPTEPG; i++)
1344 pte[i] = PTE_G;
1345
1346 kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
1347 if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1348 kernel_vm_end = kernel_map->max_offset;
1349 break;
1350 }
1351 }
1352 }
1353
1354 /***************************************************
1355 * page management routines.
1356 ***************************************************/
1357
1358 /*
1359 * free the pv_entry back to the free list
1360 */
1361 static PMAP_INLINE void
1362 free_pv_entry(pv_entry_t pv)
1363 {
1364
1365 pv_entry_count--;
1366 uma_zfree(pvzone, pv);
1367 }
1368
1369 /*
1370 * get a new pv_entry, allocating a block from the system
1371 * when needed.
1372 * the memory allocation is performed bypassing the malloc code
1373 * because of the possibility of allocations at interrupt time.
1374 */
1375 static pv_entry_t
1376 get_pv_entry(pmap_t locked_pmap)
1377 {
1378 static const struct timeval printinterval = { 60, 0 };
1379 static struct timeval lastprint;
1380 struct vpgqueues *vpq;
1381 pt_entry_t *pte, oldpte;
1382 pmap_t pmap;
1383 pv_entry_t allocated_pv, next_pv, pv;
1384 vm_offset_t va;
1385 vm_page_t m;
1386
1387 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
1388 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1389 allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
1390 if (allocated_pv != NULL) {
1391 pv_entry_count++;
1392 if (pv_entry_count > pv_entry_high_water)
1393 pagedaemon_wakeup();
1394 else
1395 return (allocated_pv);
1396 }
1397 /*
1398 * Reclaim pv entries: At first, destroy mappings to inactive
1399 * pages. After that, if a pv entry is still needed, destroy
1400 * mappings to active pages.
1401 */
1402 if (ratecheck(&lastprint, &printinterval))
1403 printf("Approaching the limit on PV entries, "
1404 "increase the vm.pmap.shpgperproc tunable.\n");
1405 vpq = &vm_page_queues[PQ_INACTIVE];
1406 retry:
1407 TAILQ_FOREACH(m, &vpq->pl, pageq) {
1408 if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
1409 continue;
1410 TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
1411 va = pv->pv_va;
1412 pmap = pv->pv_pmap;
1413 /* Avoid deadlock and lock recursion. */
1414 if (pmap > locked_pmap)
1415 PMAP_LOCK(pmap);
1416 else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
1417 continue;
1418 pmap->pm_stats.resident_count--;
1419 pte = pmap_pte(pmap, va);
1420 KASSERT(pte != NULL, ("pte"));
1421 oldpte = *pte;
1422 if (is_kernel_pmap(pmap))
1423 *pte = PTE_G;
1424 else
1425 *pte = 0;
1426 KASSERT(!pte_test(&oldpte, PTE_W),
1427 ("wired pte for unwired page"));
1428 if (m->md.pv_flags & PV_TABLE_REF)
1429 vm_page_flag_set(m, PG_REFERENCED);
1430 if (pte_test(&oldpte, PTE_D))
1431 vm_page_dirty(m);
1432 pmap_invalidate_page(pmap, va);
1433 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1434 m->md.pv_list_count--;
1435 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1436 pmap_unuse_pt(pmap, va, pv->pv_ptem);
1437 if (pmap != locked_pmap)
1438 PMAP_UNLOCK(pmap);
1439 if (allocated_pv == NULL)
1440 allocated_pv = pv;
1441 else
1442 free_pv_entry(pv);
1443 }
1444 if (TAILQ_EMPTY(&m->md.pv_list)) {
1445 vm_page_flag_clear(m, PG_WRITEABLE);
1446 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1447 }
1448 }
1449 if (allocated_pv == NULL) {
1450 if (vpq == &vm_page_queues[PQ_INACTIVE]) {
1451 vpq = &vm_page_queues[PQ_ACTIVE];
1452 goto retry;
1453 }
1454 panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
1455 }
1456 return (allocated_pv);
1457 }
1458
1459 /*
1460 * Revision 1.370
1461 *
1462 * Move pmap_collect() out of the machine-dependent code, rename it
1463 * to reflect its new location, and add page queue and flag locking.
1464 *
1465 * Notes: (1) alpha, i386, and ia64 had identical implementations
1466 * of pmap_collect() in terms of machine-independent interfaces;
1467 * (2) sparc64 doesn't require it; (3) powerpc had it as a TODO.
1468 *
1469 * MIPS implementation was identical to alpha [Junos 8.2]
1470 */
1471
1472 /*
1473 * If it is the first entry on the list, it is actually
1474 * in the header and we must copy the following entry up
1475 * to the header. Otherwise we must search the list for
1476 * the entry. In either case we free the now unused entry.
1477 */
1478
1479 static pv_entry_t
1480 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1481 {
1482 pv_entry_t pv;
1483
1484 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1485 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1486 if (pvh->pv_list_count < pmap->pm_stats.resident_count) {
1487 TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) {
1488 if (pmap == pv->pv_pmap && va == pv->pv_va)
1489 break;
1490 }
1491 } else {
1492 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
1493 if (va == pv->pv_va)
1494 break;
1495 }
1496 }
1497 if (pv != NULL) {
1498 TAILQ_REMOVE(&pvh->pv_list, pv, pv_list);
1499 pvh->pv_list_count--;
1500 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
1501 }
1502 return (pv);
1503 }
1504
1505 static void
1506 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1507 {
1508 pv_entry_t pv;
1509
1510 pv = pmap_pvh_remove(pvh, pmap, va);
1511 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found, pa %lx va %lx",
1512 (u_long)VM_PAGE_TO_PHYS(member2struct(vm_page, md, pvh)),
1513 (u_long)va));
1514 free_pv_entry(pv);
1515 }
1516
1517 static void
1518 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
1519 {
1520
1521 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1522 pmap_pvh_free(&m->md, pmap, va);
1523 if (TAILQ_EMPTY(&m->md.pv_list))
1524 vm_page_flag_clear(m, PG_WRITEABLE);
1525 }
1526
1527 /*
1528 * Conditionally create a pv entry.
1529 */
1530 static boolean_t
1531 pmap_try_insert_pv_entry(pmap_t pmap, vm_page_t mpte, vm_offset_t va,
1532 vm_page_t m)
1533 {
1534 pv_entry_t pv;
1535
1536 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1537 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1538 if (pv_entry_count < pv_entry_high_water &&
1539 (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
1540 pv_entry_count++;
1541 pv->pv_va = va;
1542 pv->pv_pmap = pmap;
1543 pv->pv_ptem = mpte;
1544 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1545 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1546 m->md.pv_list_count++;
1547 return (TRUE);
1548 } else
1549 return (FALSE);
1550 }
1551
1552 /*
1553 * pmap_remove_pte: do the things to unmap a page in a process
1554 */
1555 static int
1556 pmap_remove_pte(struct pmap *pmap, pt_entry_t *ptq, vm_offset_t va)
1557 {
1558 pt_entry_t oldpte;
1559 vm_page_t m;
1560 vm_offset_t pa;
1561
1562 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1563 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1564
1565 oldpte = *ptq;
1566 if (is_kernel_pmap(pmap))
1567 *ptq = PTE_G;
1568 else
1569 *ptq = 0;
1570
1571 if (pte_test(&oldpte, PTE_W))
1572 pmap->pm_stats.wired_count -= 1;
1573
1574 pmap->pm_stats.resident_count -= 1;
1575 pa = TLBLO_PTE_TO_PA(oldpte);
1576
1577 if (page_is_managed(pa)) {
1578 m = PHYS_TO_VM_PAGE(pa);
1579 if (pte_test(&oldpte, PTE_D)) {
1580 KASSERT(!pte_test(&oldpte, PTE_RO),
1581 ("%s: modified page not writable: va: %p, pte: 0x%x",
1582 __func__, (void *)va, oldpte));
1583 vm_page_dirty(m);
1584 }
1585 if (m->md.pv_flags & PV_TABLE_REF)
1586 vm_page_flag_set(m, PG_REFERENCED);
1587 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1588
1589 pmap_remove_entry(pmap, m, va);
1590 }
1591 return (pmap_unuse_pt(pmap, va, NULL));
1592 }
1593
1594 /*
1595 * Remove a single page from a process address space
1596 */
1597 static void
1598 pmap_remove_page(struct pmap *pmap, vm_offset_t va)
1599 {
1600 pt_entry_t *ptq;
1601
1602 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
1603 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1604 ptq = pmap_pte(pmap, va);
1605
1606 /*
1607 * if there is no pte for this address, just skip it!!!
1608 */
1609 if (!ptq || !pte_test(ptq, PTE_V)) {
1610 return;
1611 }
1612
1613 /*
1614 * Write back all caches from the page being destroyed
1615 */
1616 mips_dcache_wbinv_range_index(va, PAGE_SIZE);
1617
1618 /*
1619 * get a local va for mappings for this pmap.
1620 */
1621 (void)pmap_remove_pte(pmap, ptq, va);
1622 pmap_invalidate_page(pmap, va);
1623
1624 return;
1625 }
1626
1627 /*
1628 * Remove the given range of addresses from the specified map.
1629 *
1630 * It is assumed that the start and end are properly
1631 * rounded to the page size.
1632 */
1633 void
1634 pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva)
1635 {
1636 vm_offset_t va_next;
1637 pd_entry_t *pde, *pdpe;
1638 pt_entry_t *pte;
1639
1640 if (pmap == NULL)
1641 return;
1642
1643 if (pmap->pm_stats.resident_count == 0)
1644 return;
1645
1646 vm_page_lock_queues();
1647 PMAP_LOCK(pmap);
1648
1649 /*
1650 * special handling of removing one page. a very common operation
1651 * and easy to short circuit some code.
1652 */
1653 if ((sva + PAGE_SIZE) == eva) {
1654 pmap_remove_page(pmap, sva);
1655 goto out;
1656 }
1657 for (; sva < eva; sva = va_next) {
1658 pdpe = pmap_segmap(pmap, sva);
1659 #ifdef __mips_n64
1660 if (*pdpe == 0) {
1661 va_next = (sva + NBSEG) & ~SEGMASK;
1662 if (va_next < sva)
1663 va_next = eva;
1664 continue;
1665 }
1666 #endif
1667 va_next = (sva + NBPDR) & ~PDRMASK;
1668 if (va_next < sva)
1669 va_next = eva;
1670
1671 pde = pmap_pdpe_to_pde(pdpe, sva);
1672 if (*pde == 0)
1673 continue;
1674 if (va_next > eva)
1675 va_next = eva;
1676 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next;
1677 pte++, sva += PAGE_SIZE) {
1678 pmap_remove_page(pmap, sva);
1679 }
1680 }
1681 out:
1682 vm_page_unlock_queues();
1683 PMAP_UNLOCK(pmap);
1684 }
1685
1686 /*
1687 * Routine: pmap_remove_all
1688 * Function:
1689 * Removes this physical page from
1690 * all physical maps in which it resides.
1691 * Reflects back modify bits to the pager.
1692 *
1693 * Notes:
1694 * Original versions of this routine were very
1695 * inefficient because they iteratively called
1696 * pmap_remove (slow...)
1697 */
1698
1699 void
1700 pmap_remove_all(vm_page_t m)
1701 {
1702 pv_entry_t pv;
1703 pt_entry_t *pte, tpte;
1704
1705 KASSERT((m->flags & PG_FICTITIOUS) == 0,
1706 ("pmap_remove_all: page %p is fictitious", m));
1707 vm_page_lock_queues();
1708
1709 if (m->md.pv_flags & PV_TABLE_REF)
1710 vm_page_flag_set(m, PG_REFERENCED);
1711
1712 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1713 PMAP_LOCK(pv->pv_pmap);
1714
1715 /*
1716 * If it's last mapping writeback all caches from
1717 * the page being destroyed
1718 */
1719 if (m->md.pv_list_count == 1)
1720 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
1721
1722 pv->pv_pmap->pm_stats.resident_count--;
1723
1724 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
1725
1726 tpte = *pte;
1727 if (is_kernel_pmap(pv->pv_pmap))
1728 *pte = PTE_G;
1729 else
1730 *pte = 0;
1731
1732 if (pte_test(&tpte, PTE_W))
1733 pv->pv_pmap->pm_stats.wired_count--;
1734
1735 /*
1736 * Update the vm_page_t clean and reference bits.
1737 */
1738 if (pte_test(&tpte, PTE_D)) {
1739 KASSERT(!pte_test(&tpte, PTE_RO),
1740 ("%s: modified page not writable: va: %p, pte: 0x%x",
1741 __func__, (void *)pv->pv_va, tpte));
1742 vm_page_dirty(m);
1743 }
1744 pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
1745
1746 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
1747 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
1748 m->md.pv_list_count--;
1749 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
1750 PMAP_UNLOCK(pv->pv_pmap);
1751 free_pv_entry(pv);
1752 }
1753
1754 vm_page_flag_clear(m, PG_WRITEABLE);
1755 m->md.pv_flags &= ~(PV_TABLE_REF | PV_TABLE_MOD);
1756 vm_page_unlock_queues();
1757 }
1758
1759 /*
1760 * Set the physical protection on the
1761 * specified range of this map as requested.
1762 */
1763 void
1764 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1765 {
1766 pt_entry_t *pte;
1767 pd_entry_t *pde, *pdpe;
1768 vm_offset_t va_next;
1769
1770 if (pmap == NULL)
1771 return;
1772
1773 if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1774 pmap_remove(pmap, sva, eva);
1775 return;
1776 }
1777 if (prot & VM_PROT_WRITE)
1778 return;
1779
1780 vm_page_lock_queues();
1781 PMAP_LOCK(pmap);
1782 for (; sva < eva; sva = va_next) {
1783 pt_entry_t pbits;
1784 vm_page_t m;
1785 vm_paddr_t pa;
1786
1787 pdpe = pmap_segmap(pmap, sva);
1788 #ifdef __mips_n64
1789 if (*pdpe == 0) {
1790 va_next = (sva + NBSEG) & ~SEGMASK;
1791 if (va_next < sva)
1792 va_next = eva;
1793 continue;
1794 }
1795 #endif
1796 va_next = (sva + NBPDR) & ~PDRMASK;
1797 if (va_next < sva)
1798 va_next = eva;
1799
1800 pde = pmap_pdpe_to_pde(pdpe, sva);
1801 if (pde == NULL || *pde == NULL)
1802 continue;
1803 if (va_next > eva)
1804 va_next = eva;
1805
1806 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
1807 sva += PAGE_SIZE) {
1808
1809 /* Skip invalid PTEs */
1810 if (!pte_test(pte, PTE_V))
1811 continue;
1812 pbits = *pte;
1813 pa = TLBLO_PTE_TO_PA(pbits);
1814 if (page_is_managed(pa) && pte_test(&pbits, PTE_D)) {
1815 m = PHYS_TO_VM_PAGE(pa);
1816 vm_page_dirty(m);
1817 m->md.pv_flags &= ~PV_TABLE_MOD;
1818 }
1819 pte_clear(&pbits, PTE_D);
1820 pte_set(&pbits, PTE_RO);
1821
1822 if (pbits != *pte) {
1823 *pte = pbits;
1824 pmap_update_page(pmap, sva, pbits);
1825 }
1826 }
1827 }
1828 vm_page_unlock_queues();
1829 PMAP_UNLOCK(pmap);
1830 }
1831
1832 /*
1833 * Insert the given physical page (p) at
1834 * the specified virtual address (v) in the
1835 * target physical map with the protection requested.
1836 *
1837 * If specified, the page will be wired down, meaning
1838 * that the related pte can not be reclaimed.
1839 *
1840 * NB: This is the only routine which MAY NOT lazy-evaluate
1841 * or lose information. That is, this routine must actually
1842 * insert this page into the given map NOW.
1843 */
1844 void
1845 pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
1846 vm_prot_t prot, boolean_t wired)
1847 {
1848 vm_offset_t pa, opa;
1849 pt_entry_t *pte;
1850 pt_entry_t origpte, newpte;
1851 pv_entry_t pv;
1852 vm_page_t mpte, om;
1853 int rw = 0;
1854
1855 if (pmap == NULL)
1856 return;
1857
1858 va &= ~PAGE_MASK;
1859 KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
1860 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 ||
1861 (m->oflags & VPO_BUSY) != 0,
1862 ("pmap_enter: page %p is not busy", m));
1863
1864 mpte = NULL;
1865
1866 vm_page_lock_queues();
1867 PMAP_LOCK(pmap);
1868
1869 /*
1870 * In the case that a page table page is not resident, we are
1871 * creating it here.
1872 */
1873 if (va < VM_MAXUSER_ADDRESS) {
1874 mpte = pmap_allocpte(pmap, va, M_WAITOK);
1875 }
1876 pte = pmap_pte(pmap, va);
1877
1878 /*
1879 * Page Directory table entry not valid, we need a new PT page
1880 */
1881 if (pte == NULL) {
1882 panic("pmap_enter: invalid page directory, pdir=%p, va=%p",
1883 (void *)pmap->pm_segtab, (void *)va);
1884 }
1885 pa = VM_PAGE_TO_PHYS(m);
1886 om = NULL;
1887 origpte = *pte;
1888 opa = TLBLO_PTE_TO_PA(origpte);
1889
1890 /*
1891 * Mapping has not changed, must be protection or wiring change.
1892 */
1893 if (pte_test(&origpte, PTE_V) && opa == pa) {
1894 /*
1895 * Wiring change, just update stats. We don't worry about
1896 * wiring PT pages as they remain resident as long as there
1897 * are valid mappings in them. Hence, if a user page is
1898 * wired, the PT page will be also.
1899 */
1900 if (wired && !pte_test(&origpte, PTE_W))
1901 pmap->pm_stats.wired_count++;
1902 else if (!wired && pte_test(&origpte, PTE_W))
1903 pmap->pm_stats.wired_count--;
1904
1905 KASSERT(!pte_test(&origpte, PTE_D | PTE_RO),
1906 ("%s: modified page not writable: va: %p, pte: 0x%x",
1907 __func__, (void *)va, origpte));
1908
1909 /*
1910 * Remove extra pte reference
1911 */
1912 if (mpte)
1913 mpte->wire_count--;
1914
1915 if (page_is_managed(opa)) {
1916 om = m;
1917 }
1918 goto validate;
1919 }
1920
1921 pv = NULL;
1922
1923 /*
1924 * Mapping has changed, invalidate old range and fall through to
1925 * handle validating new mapping.
1926 */
1927 if (opa) {
1928 if (pte_test(&origpte, PTE_W))
1929 pmap->pm_stats.wired_count--;
1930
1931 if (page_is_managed(opa)) {
1932 om = PHYS_TO_VM_PAGE(opa);
1933 pv = pmap_pvh_remove(&om->md, pmap, va);
1934 }
1935 if (mpte != NULL) {
1936 mpte->wire_count--;
1937 KASSERT(mpte->wire_count > 0,
1938 ("pmap_enter: missing reference to page table page,"
1939 " va: %p", (void *)va));
1940 }
1941 } else
1942 pmap->pm_stats.resident_count++;
1943
1944 /*
1945 * Enter on the PV list if part of our managed memory. Note that we
1946 * raise IPL while manipulating pv_table since pmap_enter can be
1947 * called at interrupt time.
1948 */
1949 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
1950 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
1951 ("pmap_enter: managed mapping within the clean submap"));
1952 if (pv == NULL)
1953 pv = get_pv_entry(pmap);
1954 pv->pv_va = va;
1955 pv->pv_pmap = pmap;
1956 pv->pv_ptem = mpte;
1957 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
1958 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
1959 m->md.pv_list_count++;
1960 } else if (pv != NULL)
1961 free_pv_entry(pv);
1962
1963 /*
1964 * Increment counters
1965 */
1966 if (wired)
1967 pmap->pm_stats.wired_count++;
1968
1969 validate:
1970 if ((access & VM_PROT_WRITE) != 0)
1971 m->md.pv_flags |= PV_TABLE_MOD | PV_TABLE_REF;
1972 rw = init_pte_prot(va, m, prot);
1973
1974 #ifdef PMAP_DEBUG
1975 printf("pmap_enter: va: %p -> pa: %p\n", (void *)va, (void *)pa);
1976 #endif
1977 /*
1978 * Now validate mapping with desired protection/wiring.
1979 */
1980 newpte = TLBLO_PA_TO_PFN(pa) | rw | PTE_V;
1981
1982 if (is_cacheable_mem(pa))
1983 newpte |= PTE_C_CACHE;
1984 else
1985 newpte |= PTE_C_UNCACHED;
1986
1987 if (wired)
1988 newpte |= PTE_W;
1989
1990 if (is_kernel_pmap(pmap))
1991 newpte |= PTE_G;
1992
1993 /*
1994 * if the mapping or permission bits are different, we need to
1995 * update the pte.
1996 */
1997 if (origpte != newpte) {
1998 if (pte_test(&origpte, PTE_V)) {
1999 *pte = newpte;
2000 if (page_is_managed(opa) && (opa != pa)) {
2001 if (om->md.pv_flags & PV_TABLE_REF)
2002 vm_page_flag_set(om, PG_REFERENCED);
2003 om->md.pv_flags &=
2004 ~(PV_TABLE_REF | PV_TABLE_MOD);
2005 }
2006 if (pte_test(&origpte, PTE_D)) {
2007 KASSERT(!pte_test(&origpte, PTE_RO),
2008 ("pmap_enter: modified page not writable:"
2009 " va: %p, pte: 0x%x", (void *)va, origpte));
2010 if (page_is_managed(opa))
2011 vm_page_dirty(om);
2012 }
2013 if (page_is_managed(opa) &&
2014 TAILQ_EMPTY(&om->md.pv_list))
2015 vm_page_flag_clear(om, PG_WRITEABLE);
2016 } else {
2017 *pte = newpte;
2018 }
2019 }
2020 pmap_update_page(pmap, va, newpte);
2021
2022 /*
2023 * Sync I & D caches for executable pages. Do this only if the
2024 * target pmap belongs to the current process. Otherwise, an
2025 * unresolvable TLB miss may occur.
2026 */
2027 if (!is_kernel_pmap(pmap) && (pmap == &curproc->p_vmspace->vm_pmap) &&
2028 (prot & VM_PROT_EXECUTE)) {
2029 mips_icache_sync_range(va, PAGE_SIZE);
2030 mips_dcache_wbinv_range(va, PAGE_SIZE);
2031 }
2032 vm_page_unlock_queues();
2033 PMAP_UNLOCK(pmap);
2034 }
2035
2036 /*
2037 * this code makes some *MAJOR* assumptions:
2038 * 1. Current pmap & pmap exists.
2039 * 2. Not wired.
2040 * 3. Read access.
2041 * 4. No page table pages.
2042 * but is *MUCH* faster than pmap_enter...
2043 */
2044
2045 void
2046 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2047 {
2048
2049 vm_page_lock_queues();
2050 PMAP_LOCK(pmap);
2051 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
2052 vm_page_unlock_queues();
2053 PMAP_UNLOCK(pmap);
2054 }
2055
2056 static vm_page_t
2057 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2058 vm_prot_t prot, vm_page_t mpte)
2059 {
2060 pt_entry_t *pte;
2061 vm_offset_t pa;
2062
2063 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2064 (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
2065 ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2066 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2067 PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2068
2069 /*
2070 * In the case that a page table page is not resident, we are
2071 * creating it here.
2072 */
2073 if (va < VM_MAXUSER_ADDRESS) {
2074 pd_entry_t *pde;
2075 unsigned ptepindex;
2076
2077 /*
2078 * Calculate pagetable page index
2079 */
2080 ptepindex = pmap_pde_pindex(va);
2081 if (mpte && (mpte->pindex == ptepindex)) {
2082 mpte->wire_count++;
2083 } else {
2084 /*
2085 * Get the page directory entry
2086 */
2087 pde = pmap_pde(pmap, va);
2088
2089 /*
2090 * If the page table page is mapped, we just
2091 * increment the hold count, and activate it.
2092 */
2093 if (pde && *pde != 0) {
2094 if (pmap->pm_ptphint &&
2095 (pmap->pm_ptphint->pindex == ptepindex)) {
2096 mpte = pmap->pm_ptphint;
2097 } else {
2098 mpte = PHYS_TO_VM_PAGE(
2099 MIPS_DIRECT_TO_PHYS(*pde));
2100 pmap->pm_ptphint = mpte;
2101 }
2102 mpte->wire_count++;
2103 } else {
2104 mpte = _pmap_allocpte(pmap, ptepindex,
2105 M_NOWAIT);
2106 if (mpte == NULL)
2107 return (mpte);
2108 }
2109 }
2110 } else {
2111 mpte = NULL;
2112 }
2113
2114 pte = pmap_pte(pmap, va);
2115 if (pte_test(pte, PTE_V)) {
2116 if (mpte != NULL) {
2117 mpte->wire_count--;
2118 mpte = NULL;
2119 }
2120 return (mpte);
2121 }
2122
2123 /*
2124 * Enter on the PV list if part of our managed memory.
2125 */
2126 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
2127 !pmap_try_insert_pv_entry(pmap, mpte, va, m)) {
2128 if (mpte != NULL) {
2129 pmap_unwire_pte_hold(pmap, va, mpte);
2130 mpte = NULL;
2131 }
2132 return (mpte);
2133 }
2134
2135 /*
2136 * Increment counters
2137 */
2138 pmap->pm_stats.resident_count++;
2139
2140 pa = VM_PAGE_TO_PHYS(m);
2141
2142 /*
2143 * Now validate mapping with RO protection
2144 */
2145 *pte = TLBLO_PA_TO_PFN(pa) | PTE_V;
2146
2147 if (is_cacheable_mem(pa))
2148 *pte |= PTE_C_CACHE;
2149 else
2150 *pte |= PTE_C_UNCACHED;
2151
2152 if (is_kernel_pmap(pmap))
2153 *pte |= PTE_G;
2154 else {
2155 *pte |= PTE_RO;
2156 /*
2157 * Sync I & D caches. Do this only if the target pmap
2158 * belongs to the current process. Otherwise, an
2159 * unresolvable TLB miss may occur. */
2160 if (pmap == &curproc->p_vmspace->vm_pmap) {
2161 va &= ~PAGE_MASK;
2162 mips_icache_sync_range(va, PAGE_SIZE);
2163 mips_dcache_wbinv_range(va, PAGE_SIZE);
2164 }
2165 }
2166 return (mpte);
2167 }
2168
2169 /*
2170 * Make a temporary mapping for a physical address. This is only intended
2171 * to be used for panic dumps.
2172 *
2173 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2174 */
2175 void *
2176 pmap_kenter_temporary(vm_paddr_t pa, int i)
2177 {
2178 vm_offset_t va;
2179
2180 if (i != 0)
2181 printf("%s: ERROR!!! More than one page of virtual address mapping not supported\n",
2182 __func__);
2183
2184 if (MIPS_DIRECT_MAPPABLE(pa)) {
2185 va = MIPS_PHYS_TO_DIRECT(pa);
2186 } else {
2187 #ifndef __mips_n64 /* XXX : to be converted to new style */
2188 int cpu;
2189 register_t intr;
2190 struct local_sysmaps *sysm;
2191 pt_entry_t *pte, npte;
2192
2193 /* If this is used other than for dumps, we may need to leave
2194 * interrupts disasbled on return. If crash dumps don't work when
2195 * we get to this point, we might want to consider this (leaving things
2196 * disabled as a starting point ;-)
2197 */
2198 intr = intr_disable();
2199 cpu = PCPU_GET(cpuid);
2200 sysm = &sysmap_lmem[cpu];
2201 /* Since this is for the debugger, no locks or any other fun */
2202 npte = TLBLO_PA_TO_PFN(pa) | PTE_D | PTE_V | PTE_G | PTE_W | PTE_C_CACHE;
2203 pte = pmap_pte(kernel_pmap, sysm->base);
2204 *pte = npte;
2205 sysm->valid1 = 1;
2206 pmap_update_page(kernel_pmap, sysm->base, npte);
2207 va = sysm->base;
2208 intr_restore(intr);
2209 #endif
2210 }
2211 return ((void *)va);
2212 }
2213
2214 void
2215 pmap_kenter_temporary_free(vm_paddr_t pa)
2216 {
2217 #ifndef __mips_n64 /* XXX : to be converted to new style */
2218 int cpu;
2219 register_t intr;
2220 struct local_sysmaps *sysm;
2221 #endif
2222
2223 if (MIPS_DIRECT_MAPPABLE(pa)) {
2224 /* nothing to do for this case */
2225 return;
2226 }
2227 #ifndef __mips_n64 /* XXX : to be converted to new style */
2228 cpu = PCPU_GET(cpuid);
2229 sysm = &sysmap_lmem[cpu];
2230 if (sysm->valid1) {
2231 pt_entry_t *pte;
2232
2233 intr = intr_disable();
2234 pte = pmap_pte(kernel_pmap, sysm->base);
2235 *pte = PTE_G;
2236 pmap_invalidate_page(kernel_pmap, sysm->base);
2237 intr_restore(intr);
2238 sysm->valid1 = 0;
2239 }
2240 #endif
2241 }
2242
2243 /*
2244 * Moved the code to Machine Independent
2245 * vm_map_pmap_enter()
2246 */
2247
2248 /*
2249 * Maps a sequence of resident pages belonging to the same object.
2250 * The sequence begins with the given page m_start. This page is
2251 * mapped at the given virtual address start. Each subsequent page is
2252 * mapped at a virtual address that is offset from start by the same
2253 * amount as the page is offset from m_start within the object. The
2254 * last page in the sequence is the page with the largest offset from
2255 * m_start that can be mapped at a virtual address less than the given
2256 * virtual address end. Not every virtual page between start and end
2257 * is mapped; only those for which a resident page exists with the
2258 * corresponding offset from m_start are mapped.
2259 */
2260 void
2261 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2262 vm_page_t m_start, vm_prot_t prot)
2263 {
2264 vm_page_t m, mpte;
2265 vm_pindex_t diff, psize;
2266
2267 VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
2268 psize = atop(end - start);
2269 mpte = NULL;
2270 m = m_start;
2271 vm_page_lock_queues();
2272 PMAP_LOCK(pmap);
2273 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2274 mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
2275 prot, mpte);
2276 m = TAILQ_NEXT(m, listq);
2277 }
2278 vm_page_unlock_queues();
2279 PMAP_UNLOCK(pmap);
2280 }
2281
2282 /*
2283 * pmap_object_init_pt preloads the ptes for a given object
2284 * into the specified pmap. This eliminates the blast of soft
2285 * faults on process startup and immediately after an mmap.
2286 */
2287 void
2288 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
2289 vm_object_t object, vm_pindex_t pindex, vm_size_t size)
2290 {
2291 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
2292 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2293 ("pmap_object_init_pt: non-device object"));
2294 }
2295
2296 /*
2297 * Routine: pmap_change_wiring
2298 * Function: Change the wiring attribute for a map/virtual-address
2299 * pair.
2300 * In/out conditions:
2301 * The mapping must already exist in the pmap.
2302 */
2303 void
2304 pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
2305 {
2306 pt_entry_t *pte;
2307
2308 if (pmap == NULL)
2309 return;
2310
2311 PMAP_LOCK(pmap);
2312 pte = pmap_pte(pmap, va);
2313
2314 if (wired && !pte_test(pte, PTE_W))
2315 pmap->pm_stats.wired_count++;
2316 else if (!wired && pte_test(pte, PTE_W))
2317 pmap->pm_stats.wired_count--;
2318
2319 /*
2320 * Wiring is not a hardware characteristic so there is no need to
2321 * invalidate TLB.
2322 */
2323 if (wired)
2324 pte_set(pte, PTE_W);
2325 else
2326 pte_clear(pte, PTE_W);
2327 PMAP_UNLOCK(pmap);
2328 }
2329
2330 /*
2331 * Copy the range specified by src_addr/len
2332 * from the source map to the range dst_addr/len
2333 * in the destination map.
2334 *
2335 * This routine is only advisory and need not do anything.
2336 */
2337
2338 void
2339 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
2340 vm_size_t len, vm_offset_t src_addr)
2341 {
2342 }
2343
2344 /*
2345 * pmap_zero_page zeros the specified hardware page by mapping
2346 * the page into KVM and using bzero to clear its contents.
2347 *
2348 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2349 */
2350 void
2351 pmap_zero_page(vm_page_t m)
2352 {
2353 vm_offset_t va;
2354 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2355
2356 if (MIPS_DIRECT_MAPPABLE(phys)) {
2357 va = MIPS_PHYS_TO_DIRECT(phys);
2358 bzero((caddr_t)va, PAGE_SIZE);
2359 mips_dcache_wbinv_range(va, PAGE_SIZE);
2360 } else {
2361 va = pmap_lmem_map1(phys);
2362 bzero((caddr_t)va, PAGE_SIZE);
2363 mips_dcache_wbinv_range(va, PAGE_SIZE);
2364 pmap_lmem_unmap();
2365 }
2366 }
2367
2368 /*
2369 * pmap_zero_page_area zeros the specified hardware page by mapping
2370 * the page into KVM and using bzero to clear its contents.
2371 *
2372 * off and size may not cover an area beyond a single hardware page.
2373 */
2374 void
2375 pmap_zero_page_area(vm_page_t m, int off, int size)
2376 {
2377 vm_offset_t va;
2378 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2379
2380 if (MIPS_DIRECT_MAPPABLE(phys)) {
2381 va = MIPS_PHYS_TO_DIRECT(phys);
2382 bzero((char *)(caddr_t)va + off, size);
2383 mips_dcache_wbinv_range(va + off, size);
2384 } else {
2385 va = pmap_lmem_map1(phys);
2386 bzero((char *)va + off, size);
2387 mips_dcache_wbinv_range(va + off, size);
2388 pmap_lmem_unmap();
2389 }
2390 }
2391
2392 void
2393 pmap_zero_page_idle(vm_page_t m)
2394 {
2395 vm_offset_t va;
2396 vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
2397
2398 if (MIPS_DIRECT_MAPPABLE(phys)) {
2399 va = MIPS_PHYS_TO_DIRECT(phys);
2400 bzero((caddr_t)va, PAGE_SIZE);
2401 mips_dcache_wbinv_range(va, PAGE_SIZE);
2402 } else {
2403 va = pmap_lmem_map1(phys);
2404 bzero((caddr_t)va, PAGE_SIZE);
2405 mips_dcache_wbinv_range(va, PAGE_SIZE);
2406 pmap_lmem_unmap();
2407 }
2408 }
2409
2410 /*
2411 * pmap_copy_page copies the specified (machine independent)
2412 * page by mapping the page into virtual memory and using
2413 * bcopy to copy the page, one machine dependent page at a
2414 * time.
2415 *
2416 * Use XKPHYS for 64 bit, and KSEG0 where possible for 32 bit.
2417 */
2418 void
2419 pmap_copy_page(vm_page_t src, vm_page_t dst)
2420 {
2421 vm_offset_t va_src, va_dst;
2422 vm_paddr_t phys_src = VM_PAGE_TO_PHYS(src);
2423 vm_paddr_t phys_dst = VM_PAGE_TO_PHYS(dst);
2424
2425 if (MIPS_DIRECT_MAPPABLE(phys_src) && MIPS_DIRECT_MAPPABLE(phys_dst)) {
2426 /* easy case, all can be accessed via KSEG0 */
2427 /*
2428 * Flush all caches for VA that are mapped to this page
2429 * to make sure that data in SDRAM is up to date
2430 */
2431 pmap_flush_pvcache(src);
2432 mips_dcache_wbinv_range_index(
2433 MIPS_PHYS_TO_DIRECT(phys_dst), PAGE_SIZE);
2434 va_src = MIPS_PHYS_TO_DIRECT(phys_src);
2435 va_dst = MIPS_PHYS_TO_DIRECT(phys_dst);
2436 bcopy((caddr_t)va_src, (caddr_t)va_dst, PAGE_SIZE);
2437 mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2438 } else {
2439 va_src = pmap_lmem_map2(phys_src, phys_dst);
2440 va_dst = va_src + PAGE_SIZE;
2441 bcopy((void *)va_src, (void *)va_dst, PAGE_SIZE);
2442 mips_dcache_wbinv_range(va_dst, PAGE_SIZE);
2443 pmap_lmem_unmap();
2444 }
2445 }
2446
2447 /*
2448 * Returns true if the pmap's pv is one of the first
2449 * 16 pvs linked to from this page. This count may
2450 * be changed upwards or downwards in the future; it
2451 * is only necessary that true be returned for a small
2452 * subset of pmaps for proper page aging.
2453 */
2454 boolean_t
2455 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2456 {
2457 pv_entry_t pv;
2458 int loops = 0;
2459 boolean_t rv;
2460
2461 KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0,
2462 ("pmap_page_exists_quick: page %p is not managed", m));
2463 rv = FALSE;
2464 vm_page_lock_queues();
2465 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2466 if (pv->pv_pmap == pmap) {
2467 rv = TRUE;
2468 break;
2469 }
2470 loops++;
2471 if (loops >= 16)
2472 break;
2473 }
2474 vm_page_unlock_queues();
2475 return (rv);
2476 }
2477
2478 /*
2479 * Remove all pages from specified address space
2480 * this aids process exit speeds. Also, this code
2481 * is special cased for current process only, but
2482 * can have the more generic (and slightly slower)
2483 * mode enabled. This is much faster than pmap_remove
2484 * in the case of running down an entire address space.
2485 */
2486 void
2487 pmap_remove_pages(pmap_t pmap)
2488 {
2489 pt_entry_t *pte, tpte;
2490 pv_entry_t pv, npv;
2491 vm_page_t m;
2492
2493 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
2494 printf("warning: pmap_remove_pages called with non-current pmap\n");
2495 return;
2496 }
2497 vm_page_lock_queues();
2498 PMAP_LOCK(pmap);
2499 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv != NULL; pv = npv) {
2500
2501 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2502 if (!pte_test(pte, PTE_V))
2503 panic("pmap_remove_pages: page on pm_pvlist has no pte");
2504 tpte = *pte;
2505
2506 /*
2507 * We cannot remove wired pages from a process' mapping at this time
2508 */
2509 if (pte_test(&tpte, PTE_W)) {
2510 npv = TAILQ_NEXT(pv, pv_plist);
2511 continue;
2512 }
2513 *pte = is_kernel_pmap(pmap) ? PTE_G : 0;
2514
2515 m = PHYS_TO_VM_PAGE(TLBLO_PTE_TO_PA(tpte));
2516 KASSERT(m != NULL,
2517 ("pmap_remove_pages: bad tpte %x", tpte));
2518
2519 pv->pv_pmap->pm_stats.resident_count--;
2520
2521 /*
2522 * Update the vm_page_t clean and reference bits.
2523 */
2524 if (pte_test(&tpte, PTE_D)) {
2525 vm_page_dirty(m);
2526 }
2527 npv = TAILQ_NEXT(pv, pv_plist);
2528 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
2529
2530 m->md.pv_list_count--;
2531 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
2532 if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
2533 vm_page_flag_clear(m, PG_WRITEABLE);
2534 }
2535 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
2536 free_pv_entry(pv);
2537 }
2538 pmap_invalidate_all(pmap);
2539 PMAP_UNLOCK(pmap);
2540 vm_page_unlock_queues();
2541 }
2542
2543 /*
2544 * pmap_testbit tests bits in pte's
2545 * note that the testbit/changebit routines are inline,
2546 * and a lot of things compile-time evaluate.
2547 */
2548 static boolean_t
2549 pmap_testbit(vm_page_t m, int bit)
2550 {
2551 pv_entry_t pv;
2552 pt_entry_t *pte;
2553 boolean_t rv = FALSE;
2554
2555 if (m->flags & PG_FICTITIOUS)
2556 return (rv);
2557
2558 if (TAILQ_FIRST(&m->md.pv_list) == NULL)
2559 return (rv);
2560
2561 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2562 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2563 PMAP_LOCK(pv->pv_pmap);
2564 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2565 rv = pte_test(pte, bit);
2566 PMAP_UNLOCK(pv->pv_pmap);
2567 if (rv)
2568 break;
2569 }
2570 return (rv);
2571 }
2572
2573 /*
2574 * this routine is used to clear dirty bits in ptes
2575 */
2576 static __inline void
2577 pmap_changebit(vm_page_t m, int bit, boolean_t setem)
2578 {
2579 pv_entry_t pv;
2580 pt_entry_t *pte;
2581
2582 if (m->flags & PG_FICTITIOUS)
2583 return;
2584
2585 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2586 /*
2587 * Loop over all current mappings setting/clearing as appropos If
2588 * setting RO do we need to clear the VAC?
2589 */
2590 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2591 PMAP_LOCK(pv->pv_pmap);
2592 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2593 if (setem) {
2594 *pte |= bit;
2595 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2596 } else {
2597 pt_entry_t pbits = *pte;
2598
2599 if (pbits & bit) {
2600 if (bit == PTE_D) {
2601 if (pbits & PTE_D)
2602 vm_page_dirty(m);
2603 *pte = (pbits & ~PTE_D) | PTE_RO;
2604 } else {
2605 *pte = pbits & ~bit;
2606 }
2607 pmap_update_page(pv->pv_pmap, pv->pv_va, *pte);
2608 }
2609 }
2610 PMAP_UNLOCK(pv->pv_pmap);
2611 }
2612 if (!setem && bit == PTE_D)
2613 vm_page_flag_clear(m, PG_WRITEABLE);
2614 }
2615
2616 /*
2617 * pmap_page_wired_mappings:
2618 *
2619 * Return the number of managed mappings to the given physical page
2620 * that are wired.
2621 */
2622 int
2623 pmap_page_wired_mappings(vm_page_t m)
2624 {
2625 pv_entry_t pv;
2626 pmap_t pmap;
2627 pt_entry_t *pte;
2628 int count;
2629
2630 count = 0;
2631 if ((m->flags & PG_FICTITIOUS) != 0)
2632 return (count);
2633 vm_page_lock_queues();
2634 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
2635 pmap = pv->pv_pmap;
2636 PMAP_LOCK(pmap);
2637 pte = pmap_pte(pmap, pv->pv_va);
2638 if (pte_test(pte, PTE_W))
2639 count++;
2640 PMAP_UNLOCK(pmap);
2641 }
2642 vm_page_unlock_queues();
2643 return (count);
2644 }
2645
2646 /*
2647 * Clear the write and modified bits in each of the given page's mappings.
2648 */
2649 void
2650 pmap_remove_write(vm_page_t m)
2651 {
2652 pv_entry_t pv, npv;
2653 vm_offset_t va;
2654 pt_entry_t *pte;
2655
2656 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2657 if ((m->flags & PG_WRITEABLE) == 0)
2658 return;
2659
2660 /*
2661 * Loop over all current mappings setting/clearing as appropos.
2662 */
2663 for (pv = TAILQ_FIRST(&m->md.pv_list); pv; pv = npv) {
2664 npv = TAILQ_NEXT(pv, pv_plist);
2665 pte = pmap_pte(pv->pv_pmap, pv->pv_va);
2666 if (pte == NULL || !pte_test(pte, PTE_V))
2667 panic("page on pm_pvlist has no pte");
2668
2669 va = pv->pv_va;
2670 pmap_protect(pv->pv_pmap, va, va + PAGE_SIZE,
2671 VM_PROT_READ | VM_PROT_EXECUTE);
2672 }
2673 vm_page_flag_clear(m, PG_WRITEABLE);
2674 }
2675
2676 /*
2677 * pmap_ts_referenced:
2678 *
2679 * Return the count of reference bits for a page, clearing all of them.
2680 */
2681 int
2682 pmap_ts_referenced(vm_page_t m)
2683 {
2684
2685 if (m->flags & PG_FICTITIOUS)
2686 return (0);
2687
2688 if (m->md.pv_flags & PV_TABLE_REF) {
2689 m->md.pv_flags &= ~PV_TABLE_REF;
2690 return (1);
2691 }
2692 return (0);
2693 }
2694
2695 /*
2696 * pmap_is_modified:
2697 *
2698 * Return whether or not the specified physical page was modified
2699 * in any physical maps.
2700 */
2701 boolean_t
2702 pmap_is_modified(vm_page_t m)
2703 {
2704
2705 if (m->flags & PG_FICTITIOUS)
2706 return (FALSE);
2707
2708 if (m->md.pv_flags & PV_TABLE_MOD)
2709 return (TRUE);
2710 else
2711 return (pmap_testbit(m, PTE_D));
2712 }
2713
2714 /* N/C */
2715
2716 /*
2717 * pmap_is_prefaultable:
2718 *
2719 * Return whether or not the specified virtual address is elgible
2720 * for prefault.
2721 */
2722 boolean_t
2723 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2724 {
2725 pd_entry_t *pde;
2726 pt_entry_t *pte;
2727 boolean_t rv;
2728
2729 rv = FALSE;
2730 PMAP_LOCK(pmap);
2731 pde = pmap_pde(pmap, addr);
2732 if (pde != NULL && *pde != 0) {
2733 pte = pmap_pde_to_pte(pde, addr);
2734 rv = (*pte == 0);
2735 }
2736 PMAP_UNLOCK(pmap);
2737 return (rv);
2738 }
2739
2740 /*
2741 * Clear the modify bits on the specified physical page.
2742 */
2743 void
2744 pmap_clear_modify(vm_page_t m)
2745 {
2746 if (m->flags & PG_FICTITIOUS)
2747 return;
2748 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2749 if (m->md.pv_flags & PV_TABLE_MOD) {
2750 pmap_changebit(m, PTE_D, FALSE);
2751 m->md.pv_flags &= ~PV_TABLE_MOD;
2752 }
2753 }
2754
2755 /*
2756 * pmap_clear_reference:
2757 *
2758 * Clear the reference bit on the specified physical page.
2759 */
2760 void
2761 pmap_clear_reference(vm_page_t m)
2762 {
2763 if (m->flags & PG_FICTITIOUS)
2764 return;
2765
2766 mtx_assert(&vm_page_queue_mtx, MA_OWNED);
2767 if (m->md.pv_flags & PV_TABLE_REF) {
2768 m->md.pv_flags &= ~PV_TABLE_REF;
2769 }
2770 }
2771
2772 /*
2773 * Miscellaneous support routines follow
2774 */
2775
2776 /*
2777 * Map a set of physical memory pages into the kernel virtual
2778 * address space. Return a pointer to where it is mapped. This
2779 * routine is intended to be used for mapping device memory,
2780 * NOT real memory.
2781 */
2782
2783 /*
2784 * Map a set of physical memory pages into the kernel virtual
2785 * address space. Return a pointer to where it is mapped. This
2786 * routine is intended to be used for mapping device memory,
2787 * NOT real memory.
2788 *
2789 * Use XKPHYS uncached for 64 bit, and KSEG1 where possible for 32 bit.
2790 */
2791 void *
2792 pmap_mapdev(vm_offset_t pa, vm_size_t size)
2793 {
2794 vm_offset_t va, tmpva, offset;
2795
2796 /*
2797 * KSEG1 maps only first 512M of phys address space. For
2798 * pa > 0x20000000 we should make proper mapping * using pmap_kenter.
2799 */
2800 if (MIPS_DIRECT_MAPPABLE(pa + size - 1))
2801 return ((void *)MIPS_PHYS_TO_DIRECT_UNCACHED(pa));
2802 else {
2803 offset = pa & PAGE_MASK;
2804 size = roundup(size + offset, PAGE_SIZE);
2805
2806 va = kmem_alloc_nofault(kernel_map, size);
2807 if (!va)
2808 panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
2809 pa = trunc_page(pa);
2810 for (tmpva = va; size > 0;) {
2811 pmap_kenter_attr(tmpva, pa, PTE_C_UNCACHED);
2812 size -= PAGE_SIZE;
2813 tmpva += PAGE_SIZE;
2814 pa += PAGE_SIZE;
2815 }
2816 }
2817
2818 return ((void *)(va + offset));
2819 }
2820
2821 void
2822 pmap_unmapdev(vm_offset_t va, vm_size_t size)
2823 {
2824 #ifndef __mips_n64
2825 vm_offset_t base, offset, tmpva;
2826
2827 /* If the address is within KSEG1 then there is nothing to do */
2828 if (va >= MIPS_KSEG1_START && va <= MIPS_KSEG1_END)
2829 return;
2830
2831 base = trunc_page(va);
2832 offset = va & PAGE_MASK;
2833 size = roundup(size + offset, PAGE_SIZE);
2834 for (tmpva = base; tmpva < base + size; tmpva += PAGE_SIZE)
2835 pmap_kremove(tmpva);
2836 kmem_free(kernel_map, base, size);
2837 #endif
2838 }
2839
2840 /*
2841 * perform the pmap work for mincore
2842 */
2843 int
2844 pmap_mincore(pmap_t pmap, vm_offset_t addr)
2845 {
2846 pt_entry_t *ptep, pte;
2847 vm_page_t m;
2848 int val = 0;
2849
2850 PMAP_LOCK(pmap);
2851 ptep = pmap_pte(pmap, addr);
2852 pte = (ptep != NULL) ? *ptep : 0;
2853 PMAP_UNLOCK(pmap);
2854
2855 if (pte_test(&pte, PTE_V)) {
2856 vm_offset_t pa;
2857
2858 val = MINCORE_INCORE;
2859 pa = TLBLO_PTE_TO_PA(pte);
2860 if (!page_is_managed(pa))
2861 return (val);
2862
2863 m = PHYS_TO_VM_PAGE(pa);
2864
2865 /*
2866 * Modified by us
2867 */
2868 if (pte_test(&pte, PTE_D))
2869 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
2870 /*
2871 * Modified by someone
2872 */
2873 else {
2874 vm_page_lock_queues();
2875 if (m->dirty || pmap_is_modified(m))
2876 val |= MINCORE_MODIFIED_OTHER;
2877 vm_page_unlock_queues();
2878 }
2879 /*
2880 * Referenced by us or someone
2881 */
2882 vm_page_lock_queues();
2883 if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
2884 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
2885 vm_page_flag_set(m, PG_REFERENCED);
2886 }
2887 vm_page_unlock_queues();
2888 }
2889 return (val);
2890 }
2891
2892
2893 void
2894 pmap_activate(struct thread *td)
2895 {
2896 pmap_t pmap, oldpmap;
2897 struct proc *p = td->td_proc;
2898
2899 critical_enter();
2900
2901 pmap = vmspace_pmap(p->p_vmspace);
2902 oldpmap = PCPU_GET(curpmap);
2903
2904 if (oldpmap)
2905 atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
2906 atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
2907 pmap_asid_alloc(pmap);
2908 if (td == curthread) {
2909 PCPU_SET(segbase, pmap->pm_segtab);
2910 mips_wr_entryhi(pmap->pm_asid[PCPU_GET(cpuid)].asid);
2911 }
2912
2913 PCPU_SET(curpmap, pmap);
2914 critical_exit();
2915 }
2916
2917 void
2918 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2919 {
2920 }
2921
2922 /*
2923 * Increase the starting virtual address of the given mapping if a
2924 * different alignment might result in more superpage mappings.
2925 */
2926 void
2927 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2928 vm_offset_t *addr, vm_size_t size)
2929 {
2930 vm_offset_t superpage_offset;
2931
2932 if (size < NBSEG)
2933 return;
2934 if (object != NULL && (object->flags & OBJ_COLORED) != 0)
2935 offset += ptoa(object->pg_color);
2936 superpage_offset = offset & SEGMASK;
2937 if (size - ((NBSEG - superpage_offset) & SEGMASK) < NBSEG ||
2938 (*addr & SEGMASK) == superpage_offset)
2939 return;
2940 if ((*addr & SEGMASK) < superpage_offset)
2941 *addr = (*addr & ~SEGMASK) + superpage_offset;
2942 else
2943 *addr = ((*addr + SEGMASK) & ~SEGMASK) + superpage_offset;
2944 }
2945
2946 /*
2947 * Increase the starting virtual address of the given mapping so
2948 * that it is aligned to not be the second page in a TLB entry.
2949 * This routine assumes that the length is appropriately-sized so
2950 * that the allocation does not share a TLB entry at all if required.
2951 */
2952 void
2953 pmap_align_tlb(vm_offset_t *addr)
2954 {
2955 if ((*addr & PAGE_SIZE) == 0)
2956 return;
2957 *addr += PAGE_SIZE;
2958 return;
2959 }
2960
2961 #ifdef DDB
2962 DB_SHOW_COMMAND(ptable, ddb_pid_dump)
2963 {
2964 pmap_t pmap;
2965 struct thread *td = NULL;
2966 struct proc *p;
2967 int i, j, k;
2968 vm_paddr_t pa;
2969 vm_offset_t va;
2970
2971 if (have_addr) {
2972 td = db_lookup_thread(addr, TRUE);
2973 if (td == NULL) {
2974 db_printf("Invalid pid or tid");
2975 return;
2976 }
2977 p = td->td_proc;
2978 if (p->p_vmspace == NULL) {
2979 db_printf("No vmspace for process");
2980 return;
2981 }
2982 pmap = vmspace_pmap(p->p_vmspace);
2983 } else
2984 pmap = kernel_pmap;
2985
2986 db_printf("pmap:%p segtab:%p asid:%x generation:%x\n",
2987 pmap, pmap->pm_segtab, pmap->pm_asid[0].asid,
2988 pmap->pm_asid[0].gen);
2989 for (i = 0; i < NPDEPG; i++) {
2990 pd_entry_t *pdpe;
2991 pt_entry_t *pde;
2992 pt_entry_t pte;
2993
2994 pdpe = (pd_entry_t *)pmap->pm_segtab[i];
2995 if (pdpe == NULL)
2996 continue;
2997 db_printf("[%4d] %p\n", i, pdpe);
2998 #ifdef __mips_n64
2999 for (j = 0; j < NPDEPG; j++) {
3000 pde = (pt_entry_t *)pdpe[j];
3001 if (pde == NULL)
3002 continue;
3003 db_printf("\t[%4d] %p\n", j, pde);
3004 #else
3005 {
3006 j = 0;
3007 pde = (pt_entry_t *)pdpe;
3008 #endif
3009 for (k = 0; k < NPTEPG; k++) {
3010 pte = pde[k];
3011 if (pte == 0 || !pte_test(&pte, PTE_V))
3012 continue;
3013 pa = TLBLO_PTE_TO_PA(pte);
3014 va = ((u_long)i << SEGSHIFT) | (j << PDRSHIFT) | (k << PAGE_SHIFT);
3015 db_printf("\t\t[%04d] va: %p pte: %8x pa:%lx\n",
3016 k, (void *)va, pte, (u_long)pa);
3017 }
3018 }
3019 }
3020 }
3021 #endif
3022
3023 #if defined(DEBUG)
3024
3025 static void pads(pmap_t pm);
3026 void pmap_pvdump(vm_offset_t pa);
3027
3028 /* print address space of pmap*/
3029 static void
3030 pads(pmap_t pm)
3031 {
3032 unsigned va, i, j;
3033 pt_entry_t *ptep;
3034
3035 if (pm == kernel_pmap)
3036 return;
3037 for (i = 0; i < NPTEPG; i++)
3038 if (pm->pm_segtab[i])
3039 for (j = 0; j < NPTEPG; j++) {
3040 va = (i << SEGSHIFT) + (j << PAGE_SHIFT);
3041 if (pm == kernel_pmap && va < KERNBASE)
3042 continue;
3043 if (pm != kernel_pmap &&
3044 va >= VM_MAXUSER_ADDRESS)
3045 continue;
3046 ptep = pmap_pte(pm, va);
3047 if (pmap_pte_v(ptep))
3048 printf("%x:%x ", va, *(int *)ptep);
3049 }
3050
3051 }
3052
3053 void
3054 pmap_pvdump(vm_offset_t pa)
3055 {
3056 register pv_entry_t pv;
3057 vm_page_t m;
3058
3059 printf("pa %x", pa);
3060 m = PHYS_TO_VM_PAGE(pa);
3061 for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3062 pv = TAILQ_NEXT(pv, pv_list)) {
3063 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
3064 pads(pv->pv_pmap);
3065 }
3066 printf(" ");
3067 }
3068
3069 /* N/C */
3070 #endif
3071
3072
3073 /*
3074 * Allocate TLB address space tag (called ASID or TLBPID) and return it.
3075 * It takes almost as much or more time to search the TLB for a
3076 * specific ASID and flush those entries as it does to flush the entire TLB.
3077 * Therefore, when we allocate a new ASID, we just take the next number. When
3078 * we run out of numbers, we flush the TLB, increment the generation count
3079 * and start over. ASID zero is reserved for kernel use.
3080 */
3081 static void
3082 pmap_asid_alloc(pmap)
3083 pmap_t pmap;
3084 {
3085 if (pmap->pm_asid[PCPU_GET(cpuid)].asid != PMAP_ASID_RESERVED &&
3086 pmap->pm_asid[PCPU_GET(cpuid)].gen == PCPU_GET(asid_generation));
3087 else {
3088 if (PCPU_GET(next_asid) == pmap_max_asid) {
3089 tlb_invalidate_all_user(NULL);
3090 PCPU_SET(asid_generation,
3091 (PCPU_GET(asid_generation) + 1) & ASIDGEN_MASK);
3092 if (PCPU_GET(asid_generation) == 0) {
3093 PCPU_SET(asid_generation, 1);
3094 }
3095 PCPU_SET(next_asid, 1); /* 0 means invalid */
3096 }
3097 pmap->pm_asid[PCPU_GET(cpuid)].asid = PCPU_GET(next_asid);
3098 pmap->pm_asid[PCPU_GET(cpuid)].gen = PCPU_GET(asid_generation);
3099 PCPU_SET(next_asid, PCPU_GET(next_asid) + 1);
3100 }
3101 }
3102
3103 int
3104 page_is_managed(vm_offset_t pa)
3105 {
3106 vm_offset_t pgnum = mips_btop(pa);
3107
3108 if (pgnum >= first_page) {
3109 vm_page_t m;
3110
3111 m = PHYS_TO_VM_PAGE(pa);
3112 if (m == NULL)
3113 return (0);
3114 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0)
3115 return (1);
3116 }
3117 return (0);
3118 }
3119
3120 static int
3121 init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot)
3122 {
3123 int rw;
3124
3125 if (!(prot & VM_PROT_WRITE))
3126 rw = PTE_V | PTE_RO | PTE_C_CACHE;
3127 else if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
3128 if ((m->md.pv_flags & PV_TABLE_MOD) != 0)
3129 rw = PTE_V | PTE_D | PTE_C_CACHE;
3130 else
3131 rw = PTE_V | PTE_C_CACHE;
3132 vm_page_flag_set(m, PG_WRITEABLE);
3133 } else
3134 /* Needn't emulate a modified bit for unmanaged pages. */
3135 rw = PTE_V | PTE_D | PTE_C_CACHE;
3136 return (rw);
3137 }
3138
3139 /*
3140 * pmap_emulate_modified : do dirty bit emulation
3141 *
3142 * On SMP, update just the local TLB, other CPUs will update their
3143 * TLBs from PTE lazily, if they get the exception.
3144 * Returns 0 in case of sucess, 1 if the page is read only and we
3145 * need to fault.
3146 */
3147 int
3148 pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
3149 {
3150 vm_page_t m;
3151 pt_entry_t *pte;
3152 vm_offset_t pa;
3153
3154 PMAP_LOCK(pmap);
3155 pte = pmap_pte(pmap, va);
3156 if (pte == NULL)
3157 panic("pmap_emulate_modified: can't find PTE");
3158 #ifdef SMP
3159 /* It is possible that some other CPU changed m-bit */
3160 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) {
3161 pmap_update_page_local(pmap, va, *pte);
3162 PMAP_UNLOCK(pmap);
3163 return (0);
3164 }
3165 #else
3166 if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D))
3167 panic("pmap_emulate_modified: invalid pte");
3168 #endif
3169 if (pte_test(pte, PTE_RO)) {
3170 /* write to read only page in the kernel */
3171 PMAP_UNLOCK(pmap);
3172 return (1);
3173 }
3174 pte_set(pte, PTE_D);
3175 pmap_update_page_local(pmap, va, *pte);
3176 pa = TLBLO_PTE_TO_PA(*pte);
3177 if (!page_is_managed(pa))
3178 panic("pmap_emulate_modified: unmanaged page");
3179 m = PHYS_TO_VM_PAGE(pa);
3180 m->md.pv_flags |= (PV_TABLE_REF | PV_TABLE_MOD);
3181 PMAP_UNLOCK(pmap);
3182 return (0);
3183 }
3184
3185 /*
3186 * Routine: pmap_kextract
3187 * Function:
3188 * Extract the physical page address associated
3189 * virtual address.
3190 */
3191 /* PMAP_INLINE */ vm_offset_t
3192 pmap_kextract(vm_offset_t va)
3193 {
3194 int mapped;
3195
3196 /*
3197 * First, the direct-mapped regions.
3198 */
3199 #if defined(__mips_n64)
3200 if (va >= MIPS_XKPHYS_START && va < MIPS_XKPHYS_END)
3201 return (MIPS_XKPHYS_TO_PHYS(va));
3202 #endif
3203 if (va >= MIPS_KSEG0_START && va < MIPS_KSEG0_END)
3204 return (MIPS_KSEG0_TO_PHYS(va));
3205
3206 if (va >= MIPS_KSEG1_START && va < MIPS_KSEG1_END)
3207 return (MIPS_KSEG1_TO_PHYS(va));
3208
3209 /*
3210 * User virtual addresses.
3211 */
3212 if (va < VM_MAXUSER_ADDRESS) {
3213 pt_entry_t *ptep;
3214
3215 if (curproc && curproc->p_vmspace) {
3216 ptep = pmap_pte(&curproc->p_vmspace->vm_pmap, va);
3217 if (ptep) {
3218 return (TLBLO_PTE_TO_PA(*ptep) |
3219 (va & PAGE_MASK));
3220 }
3221 return (0);
3222 }
3223 }
3224
3225 /*
3226 * Should be kernel virtual here, otherwise fail
3227 */
3228 mapped = (va >= MIPS_KSEG2_START || va < MIPS_KSEG2_END);
3229 #if defined(__mips_n64)
3230 mapped = mapped || (va >= MIPS_XKSEG_START || va < MIPS_XKSEG_END);
3231 #endif
3232 /*
3233 * Kernel virtual.
3234 */
3235
3236 if (mapped) {
3237 pt_entry_t *ptep;
3238
3239 /* Is the kernel pmap initialized? */
3240 if (kernel_pmap->pm_active) {
3241 /* It's inside the virtual address range */
3242 ptep = pmap_pte(kernel_pmap, va);
3243 if (ptep) {
3244 return (TLBLO_PTE_TO_PA(*ptep) |
3245 (va & PAGE_MASK));
3246 }
3247 }
3248 return (0);
3249 }
3250
3251 panic("%s for unknown address space %p.", __func__, (void *)va);
3252 }
3253
3254
3255 void
3256 pmap_flush_pvcache(vm_page_t m)
3257 {
3258 pv_entry_t pv;
3259
3260 if (m != NULL) {
3261 for (pv = TAILQ_FIRST(&m->md.pv_list); pv;
3262 pv = TAILQ_NEXT(pv, pv_list)) {
3263 mips_dcache_wbinv_range_index(pv->pv_va, PAGE_SIZE);
3264 }
3265 }
3266 }
Cache object: a87113a5289a07f346417a66c87770fe
|