FreeBSD/Linux Kernel Cross Reference
sys/pc/mmu.c
1 /*
2 * Memory mappings. Life was easier when 2G of memory was enough.
3 *
4 * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5 * (9load sits under 1M during the load). The memory from KZERO to the
6 * top of memory is mapped 1-1 with physical memory, starting at physical
7 * address 0. All kernel memory and data structures (i.e., the entries stored
8 * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9 * then the kernel can only have 256MB of memory for itself.
10 *
11 * The 256M below KZERO comprises three parts. The lowest 4M is the
12 * virtual page table, a virtual address representation of the current
13 * page table tree. The second 4M is used for temporary per-process
14 * mappings managed by kmap and kunmap. The remaining 248M is used
15 * for global (shared by all procs and all processors) device memory
16 * mappings and managed by vmap and vunmap. The total amount (256M)
17 * could probably be reduced somewhat if desired. The largest device
18 * mapping is that of the video card, and even though modern video cards
19 * have embarrassing amounts of memory, the video drivers only use one
20 * frame buffer worth (at most 16M). Each is described in more detail below.
21 *
22 * The VPT is a 4M frame constructed by inserting the pdb into itself.
23 * This short-circuits one level of the page tables, with the result that
24 * the contents of second-level page tables can be accessed at VPT.
25 * We use the VPT to edit the page tables (see mmu) after inserting them
26 * into the page directory. It is a convenient mechanism for mapping what
27 * might be otherwise-inaccessible pages. The idea was borrowed from
28 * the Exokernel.
29 *
30 * The VPT doesn't solve all our problems, because we still need to
31 * prepare page directories before we can install them. For that, we
32 * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33 */
34
35 #include "u.h"
36 #include "../port/lib.h"
37 #include "mem.h"
38 #include "dat.h"
39 #include "fns.h"
40 #include "io.h"
41
42 /*
43 * Simple segment descriptors with no translation.
44 */
45 #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46 #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47 #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48 #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
49 ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50
51 Segdesc gdt[NGDT] =
52 {
53 [NULLSEG] { 0, 0}, /* null descriptor */
54 [KDSEG] DATASEGM(0), /* kernel data/stack */
55 [KESEG] EXECSEGM(0), /* kernel code */
56 [UDSEG] DATASEGM(3), /* user data/stack */
57 [UESEG] EXECSEGM(3), /* user code */
58 [TSSSEG] TSSSEGM(0,0), /* tss segment */
59 [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
60 };
61
62 static int didmmuinit;
63 static void taskswitch(ulong, ulong);
64 static void memglobal(void);
65
66 #define vpt ((ulong*)VPT)
67 #define VPTX(va) (((ulong)(va))>>12)
68 #define vpd (vpt+VPTX(VPT))
69
70 void
71 mmuinit0(void)
72 {
73 memmove(m->gdt, gdt, sizeof gdt);
74 }
75
76 void
77 mmuinit(void)
78 {
79 ulong x, *p;
80 ushort ptr[3];
81
82 didmmuinit = 1;
83
84 if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
85 VPT, vpd, KMAP);
86
87 memglobal();
88 m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
89
90 m->tss = malloc(sizeof(Tss));
91 memset(m->tss, 0, sizeof(Tss));
92 m->tss->iomap = 0xDFFF<<16;
93
94 /*
95 * We used to keep the GDT in the Mach structure, but it
96 * turns out that that slows down access to the rest of the
97 * page. Since the Mach structure is accessed quite often,
98 * it pays off anywhere from a factor of 1.25 to 2 on real
99 * hardware to separate them (the AMDs are more sensitive
100 * than Intels in this regard). Under VMware it pays off
101 * a factor of about 10 to 100.
102 */
103 memmove(m->gdt, gdt, sizeof gdt);
104 x = (ulong)m->tss;
105 m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
106 m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
107
108 ptr[0] = sizeof(gdt)-1;
109 x = (ulong)m->gdt;
110 ptr[1] = x & 0xFFFF;
111 ptr[2] = (x>>16) & 0xFFFF;
112 lgdt(ptr);
113
114 ptr[0] = sizeof(Segdesc)*256-1;
115 x = IDTADDR;
116 ptr[1] = x & 0xFFFF;
117 ptr[2] = (x>>16) & 0xFFFF;
118 lidt(ptr);
119
120 /* make kernel text unwritable */
121 for(x = KTZERO; x < (ulong)etext; x += BY2PG){
122 p = mmuwalk(m->pdb, x, 2, 0);
123 if(p == nil)
124 panic("mmuinit");
125 *p &= ~PTEWRITE;
126 }
127
128 taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
129 ltr(TSSSEL);
130 }
131
132 /*
133 * On processors that support it, we set the PTEGLOBAL bit in
134 * page table and page directory entries that map kernel memory.
135 * Doing this tells the processor not to bother flushing them
136 * from the TLB when doing the TLB flush associated with a
137 * context switch (write to CR3). Since kernel memory mappings
138 * are never removed, this is safe. (If we ever remove kernel memory
139 * mappings, we can do a full flush by turning off the PGE bit in CR4,
140 * writing to CR3, and then turning the PGE bit back on.)
141 *
142 * See also mmukmap below.
143 *
144 * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
145 */
146 static void
147 memglobal(void)
148 {
149 int i, j;
150 ulong *pde, *pte;
151
152 /* only need to do this once, on bootstrap processor */
153 if(m->machno != 0)
154 return;
155
156 if(!m->havepge)
157 return;
158
159 pde = m->pdb;
160 for(i=PDX(KZERO); i<1024; i++){
161 if(pde[i] & PTEVALID){
162 pde[i] |= PTEGLOBAL;
163 if(!(pde[i] & PTESIZE)){
164 pte = KADDR(pde[i]&~(BY2PG-1));
165 for(j=0; j<1024; j++)
166 if(pte[j] & PTEVALID)
167 pte[j] |= PTEGLOBAL;
168 }
169 }
170 }
171 }
172
173 /*
174 * Flush all the user-space and device-mapping mmu info
175 * for this process, because something has been deleted.
176 * It will be paged back in on demand.
177 */
178 void
179 flushmmu(void)
180 {
181 int s;
182
183 s = splhi();
184 up->newtlb = 1;
185 mmuswitch(up);
186 splx(s);
187 }
188
189 /*
190 * Flush a single page mapping from the tlb.
191 */
192 void
193 flushpg(ulong va)
194 {
195 if(X86FAMILY(m->cpuidax) >= 4)
196 invlpg(va);
197 else
198 putcr3(getcr3());
199 }
200
201 /*
202 * Allocate a new page for a page directory.
203 * We keep a small cache of pre-initialized
204 * page directories in each mach.
205 */
206 static Page*
207 mmupdballoc(void)
208 {
209 int s;
210 Page *page;
211 ulong *pdb;
212
213 s = splhi();
214 m->pdballoc++;
215 if(m->pdbpool == 0){
216 spllo();
217 page = newpage(0, 0, 0);
218 page->va = (ulong)vpd;
219 splhi();
220 pdb = tmpmap(page);
221 memmove(pdb, m->pdb, BY2PG);
222 pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
223 tmpunmap(pdb);
224 }else{
225 page = m->pdbpool;
226 m->pdbpool = page->next;
227 m->pdbcnt--;
228 }
229 splx(s);
230 return page;
231 }
232
233 static void
234 mmupdbfree(Proc *proc, Page *p)
235 {
236 if(islo())
237 panic("mmupdbfree: islo");
238 m->pdbfree++;
239 if(m->pdbcnt >= 10){
240 p->next = proc->mmufree;
241 proc->mmufree = p;
242 }else{
243 p->next = m->pdbpool;
244 m->pdbpool = p;
245 m->pdbcnt++;
246 }
247 }
248
249 /*
250 * A user-space memory segment has been deleted, or the
251 * process is exiting. Clear all the pde entries for user-space
252 * memory mappings and device mappings. Any entries that
253 * are needed will be paged back in as necessary.
254 */
255 static void
256 mmuptefree(Proc* proc)
257 {
258 int s;
259 ulong *pdb;
260 Page **last, *page;
261
262 if(proc->mmupdb == nil || proc->mmuused == nil)
263 return;
264 s = splhi();
265 pdb = tmpmap(proc->mmupdb);
266 last = &proc->mmuused;
267 for(page = *last; page; page = page->next){
268 pdb[page->daddr] = 0;
269 last = &page->next;
270 }
271 tmpunmap(pdb);
272 splx(s);
273 *last = proc->mmufree;
274 proc->mmufree = proc->mmuused;
275 proc->mmuused = 0;
276 }
277
278 static void
279 taskswitch(ulong pdb, ulong stack)
280 {
281 Tss *tss;
282
283 tss = m->tss;
284 tss->ss0 = KDSEL;
285 tss->esp0 = stack;
286 tss->ss1 = KDSEL;
287 tss->esp1 = stack;
288 tss->ss2 = KDSEL;
289 tss->esp2 = stack;
290 putcr3(pdb);
291 }
292
293 void
294 mmuswitch(Proc* proc)
295 {
296 ulong *pdb;
297
298 if(proc->newtlb){
299 mmuptefree(proc);
300 proc->newtlb = 0;
301 }
302
303 if(proc->mmupdb){
304 pdb = tmpmap(proc->mmupdb);
305 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
306 tmpunmap(pdb);
307 taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
308 }else
309 taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
310 }
311
312 /*
313 * Release any pages allocated for a page directory base or page-tables
314 * for this process:
315 * switch to the prototype pdb for this processor (m->pdb);
316 * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
317 * onto the process' free list (proc->mmufree). This has the side-effect of
318 * cleaning any user entries in the pdb (proc->mmupdb);
319 * if there's a pdb put it in the cache of pre-initialised pdb's
320 * for this processor (m->pdbpool) or on the process' free list;
321 * finally, place any pages freed back into the free pool (palloc).
322 * This routine is only called from schedinit() with palloc locked.
323 */
324 void
325 mmurelease(Proc* proc)
326 {
327 Page *page, *next;
328 ulong *pdb;
329
330 if(islo())
331 panic("mmurelease: islo");
332 taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
333 if(proc->kmaptable){
334 if(proc->mmupdb == nil)
335 panic("mmurelease: no mmupdb");
336 if(--proc->kmaptable->ref)
337 panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
338 if(proc->nkmap)
339 panic("mmurelease: nkmap %d", proc->nkmap);
340 /*
341 * remove kmaptable from pdb before putting pdb up for reuse.
342 */
343 pdb = tmpmap(proc->mmupdb);
344 if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
345 panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
346 pdb[PDX(KMAP)], proc->kmaptable->pa);
347 pdb[PDX(KMAP)] = 0;
348 tmpunmap(pdb);
349 /*
350 * move kmaptable to free list.
351 */
352 pagechainhead(proc->kmaptable);
353 proc->kmaptable = 0;
354 }
355 if(proc->mmupdb){
356 mmuptefree(proc);
357 mmupdbfree(proc, proc->mmupdb);
358 proc->mmupdb = 0;
359 }
360 for(page = proc->mmufree; page; page = next){
361 next = page->next;
362 if(--page->ref)
363 panic("mmurelease: page->ref %d", page->ref);
364 pagechainhead(page);
365 }
366 if(proc->mmufree && palloc.r.p)
367 wakeup(&palloc.r);
368 proc->mmufree = 0;
369 }
370
371 /*
372 * Allocate and install pdb for the current process.
373 */
374 static void
375 upallocpdb(void)
376 {
377 int s;
378 ulong *pdb;
379 Page *page;
380
381 if(up->mmupdb != nil)
382 return;
383 page = mmupdballoc();
384 s = splhi();
385 if(up->mmupdb != nil){
386 /*
387 * Perhaps we got an interrupt while
388 * mmupdballoc was sleeping and that
389 * interrupt allocated an mmupdb?
390 * Seems unlikely.
391 */
392 mmupdbfree(up, page);
393 splx(s);
394 return;
395 }
396 pdb = tmpmap(page);
397 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
398 tmpunmap(pdb);
399 up->mmupdb = page;
400 putcr3(up->mmupdb->pa);
401 splx(s);
402 }
403
404 /*
405 * Update the mmu in response to a user fault. pa may have PTEWRITE set.
406 */
407 void
408 putmmu(ulong va, ulong pa, Page*)
409 {
410 int old, s;
411 Page *page;
412
413 if(up->mmupdb == nil)
414 upallocpdb();
415
416 /*
417 * We should be able to get through this with interrupts
418 * turned on (if we get interrupted we'll just pick up
419 * where we left off) but we get many faults accessing
420 * vpt[] near the end of this function, and they always happen
421 * after the process has been switched out and then
422 * switched back, usually many times in a row (perhaps
423 * it cannot switch back successfully for some reason).
424 *
425 * In any event, I'm tired of searching for this bug.
426 * Turn off interrupts during putmmu even though
427 * we shouldn't need to. - rsc
428 */
429
430 s = splhi();
431 if(!(vpd[PDX(va)]&PTEVALID)){
432 if(up->mmufree == 0){
433 spllo();
434 page = newpage(0, 0, 0);
435 splhi();
436 }
437 else{
438 page = up->mmufree;
439 up->mmufree = page->next;
440 }
441 vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
442 /* page is now mapped into the VPT - clear it */
443 memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
444 page->daddr = PDX(va);
445 page->next = up->mmuused;
446 up->mmuused = page;
447 }
448 old = vpt[VPTX(va)];
449 vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
450 if(old&PTEVALID)
451 flushpg(va);
452 if(getcr3() != up->mmupdb->pa)
453 print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
454 splx(s);
455 }
456
457 /*
458 * Double-check the user MMU.
459 * Error checking only.
460 */
461 void
462 checkmmu(ulong va, ulong pa)
463 {
464 if(up->mmupdb == 0)
465 return;
466 if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
467 return;
468 if(PPN(vpt[VPTX(va)]) != pa)
469 print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
470 up->pid, up->text,
471 va, pa, vpt[VPTX(va)]);
472 }
473
474 /*
475 * Walk the page-table pointed to by pdb and return a pointer
476 * to the entry for virtual address va at the requested level.
477 * If the entry is invalid and create isn't requested then bail
478 * out early. Otherwise, for the 2nd level walk, allocate a new
479 * page-table page and register it in the 1st level. This is used
480 * only to edit kernel mappings, which use pages from kernel memory,
481 * so it's okay to use KADDR to look at the tables.
482 */
483 ulong*
484 mmuwalk(ulong* pdb, ulong va, int level, int create)
485 {
486 ulong *table;
487 void *map;
488
489 table = &pdb[PDX(va)];
490 if(!(*table & PTEVALID) && create == 0)
491 return 0;
492
493 switch(level){
494
495 default:
496 return 0;
497
498 case 1:
499 return table;
500
501 case 2:
502 if(*table & PTESIZE)
503 panic("mmuwalk2: va %luX entry %luX", va, *table);
504 if(!(*table & PTEVALID)){
505 /*
506 * Have to call low-level allocator from
507 * memory.c if we haven't set up the xalloc
508 * tables yet.
509 */
510 if(didmmuinit)
511 map = xspanalloc(BY2PG, BY2PG, 0);
512 else
513 map = rampage();
514 if(map == nil)
515 panic("mmuwalk xspanalloc failed");
516 *table = PADDR(map)|PTEWRITE|PTEVALID;
517 }
518 table = KADDR(PPN(*table));
519 return &table[PTX(va)];
520 }
521 }
522
523 /*
524 * Device mappings are shared by all procs and processors and
525 * live in the virtual range VMAP to VMAP+VMAPSIZE. The master
526 * copy of the mappings is stored in mach0->pdb, and they are
527 * paged in from there as necessary by vmapsync during faults.
528 */
529
530 static Lock vmaplock;
531
532 static int findhole(ulong *a, int n, int count);
533 static ulong vmapalloc(ulong size);
534 static void pdbunmap(ulong*, ulong, int);
535
536 /*
537 * Add a device mapping to the vmap range.
538 */
539 void*
540 vmap(ulong pa, int size)
541 {
542 int osize;
543 ulong o, va;
544
545 /*
546 * might be asking for less than a page.
547 */
548 osize = size;
549 o = pa & (BY2PG-1);
550 pa -= o;
551 size += o;
552
553 size = ROUND(size, BY2PG);
554 if(pa == 0){
555 print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
556 return nil;
557 }
558 ilock(&vmaplock);
559 if((va = vmapalloc(size)) == 0
560 || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
561 iunlock(&vmaplock);
562 return 0;
563 }
564 iunlock(&vmaplock);
565 /* avoid trap on local processor
566 for(i=0; i<size; i+=4*MB)
567 vmapsync(va+i);
568 */
569 USED(osize);
570 // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
571 return (void*)(va + o);
572 }
573
574 static int
575 findhole(ulong *a, int n, int count)
576 {
577 int have, i;
578
579 have = 0;
580 for(i=0; i<n; i++){
581 if(a[i] == 0)
582 have++;
583 else
584 have = 0;
585 if(have >= count)
586 return i+1 - have;
587 }
588 return -1;
589 }
590
591 /*
592 * Look for free space in the vmap.
593 */
594 static ulong
595 vmapalloc(ulong size)
596 {
597 int i, n, o;
598 ulong *vpdb;
599 int vpdbsize;
600
601 vpdb = &MACHP(0)->pdb[PDX(VMAP)];
602 vpdbsize = VMAPSIZE/(4*MB);
603
604 if(size >= 4*MB){
605 n = (size+4*MB-1) / (4*MB);
606 if((o = findhole(vpdb, vpdbsize, n)) != -1)
607 return VMAP + o*4*MB;
608 return 0;
609 }
610 n = (size+BY2PG-1) / BY2PG;
611 for(i=0; i<vpdbsize; i++)
612 if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
613 if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
614 return VMAP + i*4*MB + o*BY2PG;
615 if((o = findhole(vpdb, vpdbsize, 1)) != -1)
616 return VMAP + o*4*MB;
617
618 /*
619 * could span page directory entries, but not worth the trouble.
620 * not going to be very much contention.
621 */
622 return 0;
623 }
624
625 /*
626 * Remove a device mapping from the vmap range.
627 * Since pdbunmap does not remove page tables, just entries,
628 * the call need not be interlocked with vmap.
629 */
630 void
631 vunmap(void *v, int size)
632 {
633 int i;
634 ulong va, o;
635 Mach *nm;
636 Proc *p;
637
638 /*
639 * might not be aligned
640 */
641 va = (ulong)v;
642 o = va&(BY2PG-1);
643 va -= o;
644 size += o;
645 size = ROUND(size, BY2PG);
646
647 if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
648 panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
649 va, size, getcallerpc(&va));
650
651 pdbunmap(MACHP(0)->pdb, va, size);
652
653 /*
654 * Flush mapping from all the tlbs and copied pdbs.
655 * This can be (and is) slow, since it is called only rarely.
656 * It is possible for vunmap to be called with up == nil,
657 * e.g. from the reset/init driver routines during system
658 * boot. In that case it suffices to flush the MACH(0) TLB
659 * and return.
660 */
661 if(!active.thunderbirdsarego){
662 putcr3(PADDR(MACHP(0)->pdb));
663 return;
664 }
665 for(i=0; i<conf.nproc; i++){
666 p = proctab(i);
667 if(p->state == Dead)
668 continue;
669 if(p != up)
670 p->newtlb = 1;
671 }
672 for(i=0; i<conf.nmach; i++){
673 nm = MACHP(i);
674 if(nm != m)
675 nm->flushmmu = 1;
676 }
677 flushmmu();
678 for(i=0; i<conf.nmach; i++){
679 nm = MACHP(i);
680 if(nm != m)
681 while((active.machs&(1<<nm->machno)) && nm->flushmmu)
682 ;
683 }
684 }
685
686 /*
687 * Add kernel mappings for pa -> va for a section of size bytes.
688 */
689 int
690 pdbmap(ulong *pdb, ulong pa, ulong va, int size)
691 {
692 int pse;
693 ulong pgsz, *pte, *table;
694 ulong flag, off;
695
696 flag = pa&0xFFF;
697 pa &= ~0xFFF;
698
699 if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
700 pse = 1;
701 else
702 pse = 0;
703
704 for(off=0; off<size; off+=pgsz){
705 table = &pdb[PDX(va+off)];
706 if((*table&PTEVALID) && (*table&PTESIZE))
707 panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
708 va+off, pa+off, *table);
709
710 /*
711 * Check if it can be mapped using a 4MB page:
712 * va, pa aligned and size >= 4MB and processor can do it.
713 */
714 if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
715 *table = (pa+off)|flag|PTESIZE|PTEVALID;
716 pgsz = 4*MB;
717 }else{
718 pte = mmuwalk(pdb, va+off, 2, 1);
719 if(*pte&PTEVALID)
720 panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
721 va+off, pa+off, *pte);
722 *pte = (pa+off)|flag|PTEVALID;
723 pgsz = BY2PG;
724 }
725 }
726 return 0;
727 }
728
729 /*
730 * Remove mappings. Must already exist, for sanity.
731 * Only used for kernel mappings, so okay to use KADDR.
732 */
733 static void
734 pdbunmap(ulong *pdb, ulong va, int size)
735 {
736 ulong vae;
737 ulong *table;
738
739 vae = va+size;
740 while(va < vae){
741 table = &pdb[PDX(va)];
742 if(!(*table & PTEVALID)){
743 panic("vunmap: not mapped");
744 /*
745 va = (va+4*MB-1) & ~(4*MB-1);
746 continue;
747 */
748 }
749 if(*table & PTESIZE){
750 *table = 0;
751 va = (va+4*MB-1) & ~(4*MB-1);
752 continue;
753 }
754 table = KADDR(PPN(*table));
755 if(!(table[PTX(va)] & PTEVALID))
756 panic("vunmap: not mapped");
757 table[PTX(va)] = 0;
758 va += BY2PG;
759 }
760 }
761
762 /*
763 * Handle a fault by bringing vmap up to date.
764 * Only copy pdb entries and they never go away,
765 * so no locking needed.
766 */
767 int
768 vmapsync(ulong va)
769 {
770 ulong entry, *table;
771
772 if(va < VMAP || va >= VMAP+VMAPSIZE)
773 return 0;
774
775 entry = MACHP(0)->pdb[PDX(va)];
776 if(!(entry&PTEVALID))
777 return 0;
778 if(!(entry&PTESIZE)){
779 /* make sure entry will help the fault */
780 table = KADDR(PPN(entry));
781 if(!(table[PTX(va)]&PTEVALID))
782 return 0;
783 }
784 vpd[PDX(va)] = entry;
785 /*
786 * TLB doesn't cache negative results, so no flush needed.
787 */
788 return 1;
789 }
790
791
792 /*
793 * KMap is used to map individual pages into virtual memory.
794 * It is rare to have more than a few KMaps at a time (in the
795 * absence of interrupts, only two at a time are ever used,
796 * but interrupts can stack). The mappings are local to a process,
797 * so we can use the same range of virtual address space for
798 * all processes without any coordination.
799 */
800 #define kpt (vpt+VPTX(KMAP))
801 #define NKPT (KMAPSIZE/BY2PG)
802
803 KMap*
804 kmap(Page *page)
805 {
806 int i, o, s;
807
808 if(up == nil)
809 panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
810 if(up->mmupdb == nil)
811 upallocpdb();
812 if(up->nkmap < 0)
813 panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
814
815 /*
816 * Splhi shouldn't be necessary here, but paranoia reigns.
817 * See comment in putmmu above.
818 */
819 s = splhi();
820 up->nkmap++;
821 if(!(vpd[PDX(KMAP)]&PTEVALID)){
822 /* allocate page directory */
823 if(KMAPSIZE > BY2XPG)
824 panic("bad kmapsize");
825 if(up->kmaptable != nil)
826 panic("kmaptable");
827 spllo();
828 up->kmaptable = newpage(0, 0, 0);
829 splhi();
830 vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
831 flushpg((ulong)kpt);
832 memset(kpt, 0, BY2PG);
833 kpt[0] = page->pa|PTEWRITE|PTEVALID;
834 up->lastkmap = 0;
835 splx(s);
836 return (KMap*)KMAP;
837 }
838 if(up->kmaptable == nil)
839 panic("no kmaptable");
840 o = up->lastkmap+1;
841 for(i=0; i<NKPT; i++){
842 if(kpt[(i+o)%NKPT] == 0){
843 o = (i+o)%NKPT;
844 kpt[o] = page->pa|PTEWRITE|PTEVALID;
845 up->lastkmap = o;
846 splx(s);
847 return (KMap*)(KMAP+o*BY2PG);
848 }
849 }
850 panic("out of kmap");
851 return nil;
852 }
853
854 void
855 kunmap(KMap *k)
856 {
857 ulong va;
858
859 va = (ulong)k;
860 if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
861 panic("kunmap: no kmaps");
862 if(va < KMAP || va >= KMAP+KMAPSIZE)
863 panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
864 if(!(vpt[VPTX(va)]&PTEVALID))
865 panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
866 up->nkmap--;
867 if(up->nkmap < 0)
868 panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
869 vpt[VPTX(va)] = 0;
870 flushpg(va);
871 }
872
873 /*
874 * Temporary one-page mapping used to edit page directories.
875 *
876 * The fasttmp #define controls whether the code optimizes
877 * the case where the page is already mapped in the physical
878 * memory window.
879 */
880 #define fasttmp 1
881
882 void*
883 tmpmap(Page *p)
884 {
885 ulong i;
886 ulong *entry;
887
888 if(islo())
889 panic("tmpaddr: islo");
890
891 if(fasttmp && p->pa < -KZERO)
892 return KADDR(p->pa);
893
894 /*
895 * PDX(TMPADDR) == PDX(MACHADDR), so this
896 * entry is private to the processor and shared
897 * between up->mmupdb (if any) and m->pdb.
898 */
899 entry = &vpt[VPTX(TMPADDR)];
900 if(!(*entry&PTEVALID)){
901 for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
902 print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
903 panic("tmpmap: no entry");
904 }
905 if(PPN(*entry) != PPN(TMPADDR-KZERO))
906 panic("tmpmap: already mapped entry=%#.8lux", *entry);
907 *entry = p->pa|PTEWRITE|PTEVALID;
908 flushpg(TMPADDR);
909 return (void*)TMPADDR;
910 }
911
912 void
913 tmpunmap(void *v)
914 {
915 ulong *entry;
916
917 if(islo())
918 panic("tmpaddr: islo");
919 if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
920 return;
921 if(v != (void*)TMPADDR)
922 panic("tmpunmap: bad address");
923 entry = &vpt[VPTX(TMPADDR)];
924 if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
925 panic("tmpmap: not mapped entry=%#.8lux", *entry);
926 *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
927 flushpg(TMPADDR);
928 }
929
930 /*
931 * These could go back to being macros once the kernel is debugged,
932 * but the extra checking is nice to have.
933 */
934 void*
935 kaddr(ulong pa)
936 {
937 if(pa > (ulong)-KZERO)
938 panic("kaddr: pa=%#.8lux", pa);
939 return (void*)(pa+KZERO);
940 }
941
942 ulong
943 paddr(void *v)
944 {
945 ulong va;
946
947 va = (ulong)v;
948 if(va < KZERO)
949 panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
950 return va-KZERO;
951 }
952
953 /*
954 * More debugging.
955 */
956 void
957 countpagerefs(ulong *ref, int print)
958 {
959 int i, n;
960 Mach *mm;
961 Page *pg;
962 Proc *p;
963
964 n = 0;
965 for(i=0; i<conf.nproc; i++){
966 p = proctab(i);
967 if(p->mmupdb){
968 if(print){
969 if(ref[pagenumber(p->mmupdb)])
970 iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
971 p->mmupdb->pa, i, p->pid);
972 continue;
973 }
974 if(ref[pagenumber(p->mmupdb)]++ == 0)
975 n++;
976 else
977 iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
978 p->mmupdb->pa, i, p->pid);
979 }
980 if(p->kmaptable){
981 if(print){
982 if(ref[pagenumber(p->kmaptable)])
983 iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
984 p->kmaptable->pa, i, p->pid);
985 continue;
986 }
987 if(ref[pagenumber(p->kmaptable)]++ == 0)
988 n++;
989 else
990 iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
991 p->kmaptable->pa, i, p->pid);
992 }
993 for(pg=p->mmuused; pg; pg=pg->next){
994 if(print){
995 if(ref[pagenumber(pg)])
996 iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
997 pg->pa, i, p->pid);
998 continue;
999 }
1000 if(ref[pagenumber(pg)]++ == 0)
1001 n++;
1002 else
1003 iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
1004 pg->pa, i, p->pid);
1005 }
1006 for(pg=p->mmufree; pg; pg=pg->next){
1007 if(print){
1008 if(ref[pagenumber(pg)])
1009 iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
1010 pg->pa, i, p->pid);
1011 continue;
1012 }
1013 if(ref[pagenumber(pg)]++ == 0)
1014 n++;
1015 else
1016 iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
1017 pg->pa, i, p->pid);
1018 }
1019 }
1020 if(!print)
1021 iprint("%d pages in proc mmu\n", n);
1022 n = 0;
1023 for(i=0; i<conf.nmach; i++){
1024 mm = MACHP(i);
1025 for(pg=mm->pdbpool; pg; pg=pg->next){
1026 if(print){
1027 if(ref[pagenumber(pg)])
1028 iprint("page %#.8lux is in cpu%d pdbpool\n",
1029 pg->pa, i);
1030 continue;
1031 }
1032 if(ref[pagenumber(pg)]++ == 0)
1033 n++;
1034 else
1035 iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
1036 pg->pa, i);
1037 }
1038 }
1039 if(!print){
1040 iprint("%d pages in mach pdbpools\n", n);
1041 for(i=0; i<conf.nmach; i++)
1042 iprint("cpu%d: %d pdballoc, %d pdbfree\n",
1043 i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
1044 }
1045 }
1046
1047 void
1048 checkfault(ulong, ulong)
1049 {
1050 }
1051
1052 /*
1053 * Return the number of bytes that can be accessed via KADDR(pa).
1054 * If pa is not a valid argument to KADDR, return 0.
1055 */
1056 ulong
1057 cankaddr(ulong pa)
1058 {
1059 if(pa >= -KZERO)
1060 return 0;
1061 return -KZERO - pa;
1062 }
1063
Cache object: b5b170125b272fc32f6269bb4c450a2d
|