FreeBSD/Linux Kernel Cross Reference
sys/intel/pmap.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: pmap.c,v $
29 * Revision 2.18 93/01/14 17:32:25 danner
30 * Lock pmap_object!
31 * [92/09/22 dbg]
32 * Lock pmap_object!
33 * [92/09/22 dbg]
34 *
35 * Revision 2.17 92/04/06 23:12:15 rpd
36 * Its time to let us map page 0.
37 * [92/04/03 rvb]
38 *
39 * Revision 2.16 92/04/01 19:32:48 rpd
40 * Removed pmap_remove_attributes.
41 * [92/03/25 rpd]
42 *
43 * Revision 2.15 92/01/14 16:44:01 rpd
44 * Removed pmap_list_resident_pages.
45 * [91/12/31 rpd]
46 *
47 * Revision 2.14 91/12/10 16:32:15 jsb
48 * Fixes from Intel
49 * [91/12/10 15:51:38 jsb]
50 *
51 * Revision 2.13 91/11/18 17:37:09 rvb
52 * Up morevm for NORMA.
53 *
54 * Revision 2.12 91/08/28 11:13:08 jsb
55 * From Intel SSD: add data cache flush in INVALIDATE_TLB to work around
56 * some more subtle unknown bug in page table caching; allow user access
57 * to various bits of I/O space?
58 * [91/08/26 18:27:04 jsb]
59 *
60 * Revision 2.11 91/06/17 15:45:52 jsb
61 * Fixed reference to XEOD_OFF_PH for i860ipsc dcm module.
62 * [91/06/17 10:43:40 jsb]
63 *
64 * Revision 2.10 91/06/06 17:05:03 jsb
65 * Defined SPLVM, SPLX as null (vs. splvm, splx) in uniprocessor case.
66 * [91/05/13 17:12:34 jsb]
67 *
68 * Revision 2.9 91/05/18 14:31:14 rpd
69 * Moved pmap_free_pages, pmap_next_page to a model-dependent file.
70 * [91/05/15 rpd]
71 *
72 * Make sure hole_start and hole_end are page-aligned.
73 * [91/05/01 rpd]
74 *
75 * Removed pmap_update.
76 * [91/04/12 rpd]
77 *
78 * Added inuse_ptepages_count.
79 * Added vm_page_fictitious_addr assertions.
80 * [91/04/10 rpd]
81 * Added check_simple_locks to pmap_expand.
82 * [91/03/31 rpd]
83 * Changed vm_page_init to vm_page_insert.
84 * Added pmap_free_pages, pmap_next_page, pmap_virtual_space.
85 * [91/03/25 rpd]
86 *
87 * Revision 2.8 91/05/14 16:30:24 mrt
88 * Correcting copyright
89 *
90 * Revision 2.7 91/05/08 12:46:31 dbg
91 * Add volatile declarations where needed.
92 * Move pmap_valid_page to model_dependent file.
93 * [91/04/26 14:41:31 dbg]
94 *
95 * Revision 2.6 91/03/16 14:47:31 rpd
96 * Removed some incorrect (?) assertions.
97 * [91/03/13 14:18:51 rpd]
98 *
99 * Updated for new kmem_alloc interface.
100 * [91/03/03 rpd]
101 * Added continuation argument to VM_PAGE_WAIT.
102 * [91/02/05 rpd]
103 *
104 * Revision 2.5 91/02/14 14:08:11 mrt
105 * Fixed pmap_expand to use vm_page_grab/VM_PAGE_WAIT.
106 * [91/01/12 rpd]
107 *
108 * Revision 2.4 91/02/05 17:20:34 mrt
109 * Changed to new Mach copyright
110 * [91/01/31 18:17:35 mrt]
111 *
112 * Revision 2.3 91/01/08 15:12:47 rpd
113 * Changed pmap_collect to ignore the kernel pmap.
114 * [91/01/03 rpd]
115 *
116 * Revision 2.2 90/12/04 14:50:28 jsb
117 * First checkin (for intel directory).
118 * [90/12/03 21:54:31 jsb]
119 *
120 * Revision 2.9 90/11/26 14:48:44 rvb
121 * Slight error in pmap_valid_page. Pages > last_addr
122 * must be invalid. (They are probably device buffers.)
123 * [90/11/23 10:00:56 rvb]
124 *
125 * Revision 2.8 90/11/24 15:14:47 jsb
126 * Replaced "0x1000" in pmap_valid_page with "first_addr".
127 * [90/11/24 11:49:04 jsb]
128 *
129 * Revision 2.7 90/11/05 14:27:27 rpd
130 * Replace (va < vm_first_phys || va > vm_last_phys) with test
131 * using valid page. Otherwise, video buffer memory is treated as
132 * valid memory and setting dirty bits leads to disasterous results.
133 * [90/11/05 rvb]
134 *
135 * Define pmap_valid_page: [0x1000..cnvmem * 1024) and
136 * [first_avail..)
137 * as useable memory
138 * [90/09/05 rvb]
139 *
140 * Revision 2.6 90/09/09 14:31:39 rpd
141 * Use decl_simple_lock_data.
142 * [90/08/30 rpd]
143 *
144 * Revision 2.5 90/08/06 15:07:05 rwd
145 * Fix bugs in pmap_remove, pmap_protect, phys_attribute routines.
146 * Allocate pte pages directly from vm_resident page list, via a
147 * pmap_object.
148 * [90/07/17 dbg]
149 *
150 * Revision 2.4 90/06/19 22:57:46 rpd
151 * Made MOREVM a variable; increased to 28 meg.
152 * Commented out pte_to_phys assertions.
153 * [90/06/04 rpd]
154 *
155 * Revision 2.3 90/06/02 14:48:40 rpd
156 * Added dummy pmap_list_resident_pages, under MACH_VM_DEBUG.
157 * [90/05/31 rpd]
158 *
159 * Revision 2.2 90/05/03 15:37:04 dbg
160 * Define separate Write and User bits instead of protection codes.
161 * Write-protect kernel data by invalidating it; the 386 ignores
162 * write permission in supervisor mode.
163 * [90/03/25 dbg]
164 *
165 * Fix pmap_collect to look for VA that maps page table page.
166 * Since page table pages are allocated with kmem_alloc, their
167 * virtual and physical addresses are not necessarily the same.
168 * Rewrite pmap_remove to skip address range when PDE is invalid.
169 * Combine pmap_remove_all and pmap_copy_on_write into pmap_page_protect.
170 * Add reference bits.
171 * [90/03/21 dbg]
172 *
173 * Fix for pure kernel. kpde and kptes are dynamically allocated
174 * by assembly code. Reverse CHIPBUG test (what was this, Bob?)
175 * [90/02/14 dbg]
176 *
177 * Revision 1.8.1.3 89/12/28 12:43:18 rvb
178 * v_avail gets phystokv(av_start), in case esym != end.
179 * [89/12/26 rvb]
180 *
181 * Revision 1.8.1.2 89/12/21 17:59:15 rvb
182 * Revision 1.11 89/11/27 22:54:27 kupfer
183 * kernacc() moved here from locore (from Lance).
184 *
185 * Revision 1.10 89/10/24 13:31:38 lance
186 * Eliminate the boot-time `pause that refreshes'
187 *
188 * Revision 1.8 89/09/20 17:26:47 rvb
189 * The OLIVETTI CACHE bug strikes again. I am leaving this code in
190 * as it for now so we can sync up. BUT all this stuff is going to
191 * be on a run time switch or a ifdef real soon.
192 * [89/09/20 rvb]
193 *
194 * Revision 1.7 89/07/17 10:38:18 rvb
195 * pmap_map_bd now flushes the tlb with a call to pmap_update.
196 * [Lance Berc]
197 *
198 * Revision 1.6 89/04/05 12:59:14 rvb
199 * Can not use zone anymore for directory, since alignment is not
200 * guaranteed. Besides the directory is a page.
201 * [89/03/30 rvb]
202 *
203 * Move extern out of function scope for gcc.
204 * [89/03/04 rvb]
205 *
206 * Revision 1.5 89/03/09 20:03:25 rpd
207 * More cleanup.
208 *
209 * Revision 1.4 89/02/26 12:33:06 gm0w
210 * Changes for cleanup.
211 *
212 * 31-Dec-88 Robert Baron (rvb) at Carnegie-Mellon University
213 * Derived from MACH2.0 vax release.
214 *
215 * 17-Jan-88 David Golub (dbg) at Carnegie-Mellon University
216 * Use cpus_idle, not the scheduler's cpu_idle, to determine when a
217 * cpu does not need to be interrupted. The two are not
218 * synchronized.
219 *
220 */
221
222 /*
223 * File: pmap.c
224 * Author: Avadis Tevanian, Jr., Michael Wayne Young
225 * (These guys wrote the Vax version)
226 *
227 * Physical Map management code for Intel i386, i486, and i860.
228 *
229 * Manages physical address maps.
230 *
231 * In addition to hardware address maps, this
232 * module is called upon to provide software-use-only
233 * maps which may or may not be stored in the same
234 * form as hardware maps. These pseudo-maps are
235 * used to store intermediate results from copy
236 * operations to and from address spaces.
237 *
238 * Since the information managed by this module is
239 * also stored by the logical address mapping module,
240 * this module may throw away valid virtual-to-physical
241 * mappings at almost any time. However, invalidations
242 * of virtual-to-physical mappings must be done as
243 * requested.
244 *
245 * In order to cope with hardware architectures which
246 * make virtual-to-physical map invalidates expensive,
247 * this module may delay invalidate or reduced protection
248 * operations until such time as they are actually
249 * necessary. This module is given full information as
250 * to which processors are currently using which maps,
251 * and to when physical maps must be made correct.
252 */
253
254 #include <cpus.h>
255
256 #include <mach/machine/vm_types.h>
257
258 #include <mach/boolean.h>
259 #include <kern/thread.h>
260 #include <kern/zalloc.h>
261
262 #include <kern/lock.h>
263
264 #include <vm/pmap.h>
265 #include <vm/vm_map.h>
266 #include <vm/vm_kern.h>
267 #include <mach/vm_param.h>
268 #include <mach/vm_prot.h>
269 #include <vm/vm_object.h>
270 #include <vm/vm_page.h>
271 #include <vm/vm_user.h>
272
273 #include <mach/machine/vm_param.h>
274 #include <machine/thread.h>
275 #if i860
276 #include <i860ipsc/nodehw.h>
277 #endif
278
279 #ifdef ORC
280 #define OLIVETTICACHE 1
281 #endif ORC
282
283 #ifndef OLIVETTICACHE
284 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
285 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
286 #else OLIVETTICACHE
287
288 /* This gross kludgery is needed for Olivetti XP7 & XP9 boxes to get
289 * around an apparent hardware bug. Other than at startup it doesn't
290 * affect run-time performacne very much, so we leave it in for all
291 * machines.
292 */
293 extern unsigned *pstart();
294 #define CACHE_LINE 8
295 #define CACHE_SIZE 512
296 #define CACHE_PAGE 0x1000;
297
298 #define WRITE_PTE(pte_p, pte_entry) { write_pte(pte_p, pte_entry); }
299
300 write_pte(pte_p, pte_entry)
301 pt_entry_t *pte_p, pte_entry;
302 {
303 unsigned long count;
304 volatile unsigned long hold, *addr1, *addr2;
305
306 if ( pte_entry != *pte_p )
307 *pte_p = pte_entry;
308 else {
309 /* This isn't necessarily the optimal algorithm */
310 addr1 = (unsigned long *)pstart;
311 for (count = 0; count < CACHE_SIZE; count++) {
312 addr2 = addr1 + CACHE_PAGE;
313 hold = *addr1; /* clear cache bank - A - */
314 hold = *addr2; /* clear cache bank - B - */
315 addr1 += CACHE_LINE;
316 }
317 }
318 }
319
320 #define WRITE_PTE_FAST(pte_p, pte_entry)*pte_p = pte_entry;
321
322 #endif OLIVETTICACHE
323
324 /*
325 * Private data structures.
326 */
327
328 /*
329 * For each vm_page_t, there is a list of all currently
330 * valid virtual mappings of that page. An entry is
331 * a pv_entry_t; the list is the pv_table.
332 */
333
334 typedef struct pv_entry {
335 struct pv_entry *next; /* next pv_entry */
336 pmap_t pmap; /* pmap where mapping lies */
337 vm_offset_t va; /* virtual address for mapping */
338 } *pv_entry_t;
339
340 #define PV_ENTRY_NULL ((pv_entry_t) 0)
341
342 pv_entry_t pv_head_table; /* array of entries, one per page */
343
344 /*
345 * pv_list entries are kept on a list that can only be accessed
346 * with the pmap system locked (at SPLVM, not in the cpus_active set).
347 * The list is refilled from the pv_list_zone if it becomes empty.
348 */
349 pv_entry_t pv_free_list; /* free list at SPLVM */
350 decl_simple_lock_data(, pv_free_list_lock)
351
352 #define PV_ALLOC(pv_e) { \
353 simple_lock(&pv_free_list_lock); \
354 if ((pv_e = pv_free_list) != 0) { \
355 pv_free_list = pv_e->next; \
356 } \
357 simple_unlock(&pv_free_list_lock); \
358 }
359
360 #define PV_FREE(pv_e) { \
361 simple_lock(&pv_free_list_lock); \
362 pv_e->next = pv_free_list; \
363 pv_free_list = pv_e; \
364 simple_unlock(&pv_free_list_lock); \
365 }
366
367 zone_t pv_list_zone; /* zone of pv_entry structures */
368
369 /*
370 * Each entry in the pv_head_table is locked by a bit in the
371 * pv_lock_table. The lock bits are accessed by the physical
372 * address of the page they lock.
373 */
374
375 char *pv_lock_table; /* pointer to array of bits */
376 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
377
378 /*
379 * First and last physical addresses that we maintain any information
380 * for. Initialized to zero so that pmap operations done before
381 * pmap_init won't touch any non-existent structures.
382 */
383 vm_offset_t vm_first_phys = (vm_offset_t) 0;
384 vm_offset_t vm_last_phys = (vm_offset_t) 0;
385 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
386
387 /*
388 * Index into pv_head table, its lock bits, and the modify/reference
389 * bits starting at vm_first_phys.
390 */
391
392 #define pa_index(pa) (atop(pa - vm_first_phys))
393
394 #define pai_to_pvh(pai) (&pv_head_table[pai])
395 #define lock_pvh_pai(pai) (bit_lock(pai, pv_lock_table))
396 #define unlock_pvh_pai(pai) (bit_unlock(pai, pv_lock_table))
397
398 /*
399 * Array of physical page attribites for managed pages.
400 * One byte per physical page.
401 */
402 char *pmap_phys_attributes;
403
404 /*
405 * Physical page attributes. Copy bits from PTE definition.
406 */
407 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
408 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
409
410 /*
411 * Amount of virtual memory mapped by one
412 * page-directory entry.
413 */
414 #define PDE_MAPPED_SIZE (pdetova(1))
415
416 /*
417 * We allocate page table pages directly from the VM system
418 * through this object. It maps physical memory.
419 */
420 vm_object_t pmap_object = VM_OBJECT_NULL;
421
422 /*
423 * Locking and TLB invalidation
424 */
425
426 /*
427 * Locking Protocols:
428 *
429 * There are two structures in the pmap module that need locking:
430 * the pmaps themselves, and the per-page pv_lists (which are locked
431 * by locking the pv_lock_table entry that corresponds to the pv_head
432 * for the list in question.) Most routines want to lock a pmap and
433 * then do operations in it that require pv_list locking -- however
434 * pmap_remove_all and pmap_copy_on_write operate on a physical page
435 * basis and want to do the locking in the reverse order, i.e. lock
436 * a pv_list and then go through all the pmaps referenced by that list.
437 * To protect against deadlock between these two cases, the pmap_lock
438 * is used. There are three different locking protocols as a result:
439 *
440 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
441 * the pmap.
442 *
443 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
444 * lock on the pmap_lock (shared read), then lock the pmap
445 * and finally the pv_lists as needed [i.e. pmap lock before
446 * pv_list lock.]
447 *
448 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
449 * Get a write lock on the pmap_lock (exclusive write); this
450 * also guaranteees exclusive access to the pv_lists. Lock the
451 * pmaps as needed.
452 *
453 * At no time may any routine hold more than one pmap lock or more than
454 * one pv_list lock. Because interrupt level routines can allocate
455 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
456 * kernel_pmap can only be held at splvm.
457 */
458
459 #if NCPUS > 1
460 /*
461 * We raise the interrupt level to splvm, to block interprocessor
462 * interrupts during pmap operations. We must take the CPU out of
463 * the cpus_active set while interrupts are blocked.
464 */
465 #define SPLVM(spl) { \
466 spl = splvm(); \
467 i_bit_clear(cpu_number(), &cpus_active); \
468 }
469
470 #define SPLX(spl) { \
471 i_bit_set(cpu_number(), &cpus_active); \
472 splx(spl); \
473 }
474
475 /*
476 * Lock on pmap system
477 */
478 lock_data_t pmap_system_lock;
479
480 #define PMAP_READ_LOCK(pmap, spl) { \
481 SPLVM(spl); \
482 lock_read(&pmap_system_lock); \
483 simple_lock(&(pmap)->lock); \
484 }
485
486 #define PMAP_WRITE_LOCK(spl) { \
487 SPLVM(spl); \
488 lock_write(&pmap_system_lock); \
489 }
490
491 #define PMAP_READ_UNLOCK(pmap, spl) { \
492 simple_unlock(&(pmap)->lock); \
493 lock_read_done(&pmap_system_lock); \
494 SPLX(spl); \
495 }
496
497 #define PMAP_WRITE_UNLOCK(spl) { \
498 lock_write_done(&pmap_system_lock); \
499 SPLX(spl); \
500 }
501
502 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
503 simple_lock(&(pmap)->lock); \
504 lock_write_to_read(&pmap_system_lock); \
505 }
506
507 #define LOCK_PVH(index) (lock_pvh_pai(index))
508
509 #define UNLOCK_PVH(index) (unlock_pvh_pai(index))
510
511 #define PMAP_UPDATE_TLBS(pmap, s, e) \
512 { \
513 cpu_set cpu_mask = 1 << cpu_number(); \
514 cpu_set users; \
515 \
516 /* Since the pmap is locked, other updates are locked */ \
517 /* out, and any pmap_activate has finished. */ \
518 \
519 /* find other cpus using the pmap */ \
520 users = (pmap)->cpus_using & ~cpu_mask; \
521 if (users) { \
522 /* signal them, and wait for them to finish */ \
523 /* using the pmap */ \
524 signal_cpus(users, (pmap), (s), (e)); \
525 while ((pmap)->cpus_using & cpus_active & ~cpu_mask) \
526 continue; \
527 } \
528 \
529 /* invalidate our own TLB if pmap is in use */ \
530 if ((pmap)->cpus_using & cpu_mask) { \
531 INVALIDATE_TLB((s), (e)); \
532 } \
533 }
534
535 #else NCPUS > 1
536
537 #define SPLVM(spl)
538 #define SPLX(spl)
539
540 #define PMAP_READ_LOCK(pmap, spl) SPLVM(spl)
541 #define PMAP_WRITE_LOCK(spl) SPLVM(spl)
542 #define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl)
543 #define PMAP_WRITE_UNLOCK(spl) SPLX(spl)
544 #define PMAP_WRITE_TO_READ_LOCK(pmap)
545
546 #define LOCK_PVH(index)
547 #define UNLOCK_PVH(index)
548
549 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
550 /* invalidate our own TLB if pmap is in use */ \
551 if ((pmap)->cpus_using) { \
552 INVALIDATE_TLB((s), (e)); \
553 } \
554 }
555
556 #endif NCPUS > 1
557
558 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
559
560 #if i860
561 /* Do a data cache flush until we find the caching bug XXX prp */
562 #define INVALIDATE_TLB(s, e) { \
563 flush(); \
564 flush_tlb(); \
565 }
566 #else i860
567 #define INVALIDATE_TLB(s, e) { \
568 flush_tlb(); \
569 }
570 #endif i860
571
572
573 #if NCPUS > 1
574 /*
575 * Structures to keep track of pending TLB invalidations
576 */
577
578 #define UPDATE_LIST_SIZE 4
579
580 struct pmap_update_item {
581 pmap_t pmap; /* pmap to invalidate */
582 vm_offset_t start; /* start address to invalidate */
583 vm_offset_t end; /* end address to invalidate */
584 } ;
585
586 typedef struct pmap_update_item *pmap_update_item_t;
587
588 /*
589 * List of pmap updates. If the list overflows,
590 * the last entry is changed to invalidate all.
591 */
592 struct pmap_update_list {
593 decl_simple_lock_data(, lock)
594 int count;
595 struct pmap_update_item item[UPDATE_LIST_SIZE];
596 } ;
597 typedef struct pmap_update_list *pmap_update_list_t;
598
599 struct pmap_update_list cpu_update_list[NCPUS];
600
601 #endif NCPUS > 1
602
603 /*
604 * Other useful macros.
605 */
606 #define current_pmap() (vm_map_pmap(current_thread()->task->map))
607 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
608
609 struct pmap kernel_pmap_store;
610 pmap_t kernel_pmap;
611
612 struct zone *pmap_zone; /* zone of pmap structures */
613
614 int pmap_debug = 0; /* flag for debugging prints */
615 int ptes_per_vm_page; /* number of hardware ptes needed
616 to map one VM page. */
617 unsigned int inuse_ptepages_count = 0; /* debugging */
618
619 extern char end;
620 /*
621 * Page directory for kernel.
622 */
623 pt_entry_t *kpde = 0; /* set by start.s - keep out of bss */
624 #if i860
625 extern pt_entry_t kpde_page;
626 #endif
627
628 void pmap_remove_range(); /* forward */
629 #if NCPUS > 1
630 void signal_cpus(); /* forward */
631 #endif NCPUS > 1
632
633 #if i860
634 /*
635 * Paging flag
636 */
637 int paging_enabled = 0;
638 #endif
639
640 /*
641 * Given an offset and a map, compute the address of the
642 * pte. If the address is invalid with respect to the map
643 * then PT_ENTRY_NULL is returned (and the map may need to grow).
644 *
645 * This is only used internally.
646 */
647 pt_entry_t *pmap_pte(pmap, addr)
648 register pmap_t pmap;
649 register vm_offset_t addr;
650 {
651 #if i860
652 pt_entry_t *ptp;
653 #else
654 register pt_entry_t *ptp;
655 #endif
656 register pt_entry_t pte;
657
658 if (pmap->dirbase == 0)
659 return(PT_ENTRY_NULL);
660 pte = pmap->dirbase[pdenum(addr)];
661 if ((pte & INTEL_PTE_VALID) == 0)
662 return(PT_ENTRY_NULL);
663 ptp = (pt_entry_t *)ptetokv(pte);
664 return(&ptp[ptenum(addr)]);
665
666 }
667
668 #define pmap_pde(pmap, addr) (&(pmap)->dirbase[pdenum(addr)])
669
670 #define DEBUG_PTE_PAGE 0
671
672 #if DEBUG_PTE_PAGE
673 void ptep_check(ptep)
674 ptep_t ptep;
675 {
676 register pt_entry_t *pte, *epte;
677 int ctu, ctw;
678
679 /* check the use and wired counts */
680 if (ptep == PTE_PAGE_NULL)
681 return;
682 pte = pmap_pte(ptep->pmap, ptep->va);
683 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
684 ctu = 0;
685 ctw = 0;
686 while (pte < epte) {
687 if (pte->pfn != 0) {
688 ctu++;
689 if (pte->wired)
690 ctw++;
691 }
692 pte += ptes_per_vm_page;
693 }
694
695 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
696 printf("use %d wired %d - actual use %d wired %d\n",
697 ptep->use_count, ptep->wired_count, ctu, ctw);
698 panic("pte count");
699 }
700 }
701 #endif DEBUG_PTE_PAGE
702
703 /*
704 * Map memory at initialization. The physical addresses being
705 * mapped are not managed and are never unmapped.
706 *
707 * For now, VM is already on, we only need to map the
708 * specified memory.
709 */
710 vm_offset_t pmap_map(virt, start, end, prot)
711 register vm_offset_t virt;
712 register vm_offset_t start;
713 register vm_offset_t end;
714 register int prot;
715 {
716 register int ps;
717
718 ps = PAGE_SIZE;
719 while (start < end) {
720 pmap_enter(kernel_pmap, virt, start, prot, FALSE);
721 virt += ps;
722 start += ps;
723 }
724 return(virt);
725 }
726
727 /*
728 * Back-door routine for mapping kernel VM at initialization.
729 * Useful for mapping memory outside the range
730 * [vm_first_phys, vm_last_phys) (i.e., devices).
731 * Otherwise like pmap_map.
732 #if i860
733 * Sets no-cache bit.
734 #endif
735 */
736 vm_offset_t pmap_map_bd(virt, start, end, prot)
737 register vm_offset_t virt;
738 register vm_offset_t start;
739 register vm_offset_t end;
740 vm_prot_t prot;
741 {
742 register pt_entry_t template;
743 register pt_entry_t *pte;
744
745 template = pa_to_pte(start)
746 #if i860
747 | INTEL_PTE_NCACHE
748 #endif
749 | INTEL_PTE_VALID;
750 if (prot & VM_PROT_WRITE)
751 template |= INTEL_PTE_WRITE;
752
753 while (start < end) {
754 pte = pmap_pte(kernel_pmap, virt);
755 if (pte == PT_ENTRY_NULL)
756 panic("pmap_map_bd: Invalid kernel address\n");
757 WRITE_PTE_FAST(pte, template)
758 pte_increment_pa(template);
759 virt += PAGE_SIZE;
760 start += PAGE_SIZE;
761 }
762 return(virt);
763 }
764
765 extern int cnvmem;
766 extern char *first_avail;
767 extern vm_offset_t virtual_avail, virtual_end;
768 extern vm_offset_t avail_start, avail_end;
769
770 /*
771 * Bootstrap the system enough to run with virtual memory.
772 * Map the kernel's code and data, and allocate the system page table.
773 * Called with mapping OFF. Page_size must already be set.
774 *
775 * Parameters:
776 * load_start: PA where kernel was loaded
777 * avail_start PA of first available physical page -
778 * after kernel page tables
779 * avail_end PA of last available physical page
780 * virtual_avail VA of first available page -
781 * after kernel page tables
782 * virtual_end VA of last available page -
783 * end of kernel address space
784 *
785 * &start_text start of kernel text
786 * &etext end of kernel text
787 */
788
789 vm_size_t morevm = 40 * 1024 * 1024; /* VM space for kernel map */
790
791 void pmap_bootstrap(load_start)
792 vm_offset_t load_start;
793 {
794 vm_offset_t va, tva;
795 pt_entry_t template;
796 pt_entry_t *pde, *pte, *ptend;
797 #if i860
798 vm_offset_t sva;
799 pt_entry_t *pt_pte, *tpt;
800 pt_entry_t *ppde, *ppte;
801
802 /*
803 * Mapping is turned OFF, we must reference only physical addresses.
804 * The load image of the system is to be mapped 1-1 physical = virtual.
805 *
806 * This code will only work if VM_MIN_KERNEL_ADDRESS
807 * equals PHYS_RAM_ADDRESS.
808 */
809 #endif
810
811 /*
812 * Set ptes_per_vm_page for general use.
813 */
814 ptes_per_vm_page = page_size / INTEL_PGBYTES;
815
816 /*
817 * The kernel's pmap is statically allocated so we don't
818 * have to use pmap_create, which is unlikely to work
819 * correctly at this part of the boot sequence.
820 */
821
822 kernel_pmap = &kernel_pmap_store;
823
824 #if NCPUS > 1
825 lock_init(&pmap_system_lock, FALSE); /* NOT a sleep lock */
826 #endif NCPUS > 1
827
828 simple_lock_init(&kernel_pmap->lock);
829
830 kernel_pmap->ref_count = 1;
831
832 /*
833 * The kernel page directory has been allocated;
834 * its virtual address is in kpde.
835 *
836 #if i860
837 * No kernel page table pages have been allocated
838 #else
839 * Enough kernel page table pages have been allocated
840 #endif
841 * to map low system memory, kernel text, kernel data/bss,
842 * kdb's symbols, and the page directory and page tables.
843 *
844 * No other physical memory has been allocated.
845 */
846 #if i860
847 kpde = &kpde_page;
848 kernel_pmap->dirbase = kpde;
849 #endif
850
851 /*
852 * Start mapping virtual memory to physical memory, 1-1,
853 #if i860
854 * from load point to end of memory,
855 * virtual = physical.
856 #else
857 * at end of mapped memory.
858 #endif
859 */
860 virtual_avail = phystokv(avail_start);
861 virtual_end = phystokv(avail_end);
862
863 #if i860
864 bzero((char *)kpde, INTEL_PGBYTES);
865 #endif
866 pde = kpde;
867 #if i860
868 pde += pdenum(load_start);
869 pte = 0; ptend = 0;
870 #else
871 pde += pdenum(virtual_avail);
872 if (pte_to_pa(*pde) == 0) {
873 /* This pte has not been allocated */
874 pte = 0; ptend = 0;
875 }
876 else {
877 pte = (pt_entry_t *)ptetokv(*pde);
878 /* first pte of page */
879 ptend = pte+NPTES; /* last pte of page */
880 pte += ptenum(virtual_avail); /* point to pte that
881 maps first avail VA */
882 pde++; /* point pde to first empty slot */
883 }
884 #endif
885
886 #if i860
887 template = pa_to_pte(load_start)
888 #else
889 template = pa_to_pte(avail_start)
890 #endif
891 | INTEL_PTE_VALID | INTEL_PTE_WRITE;
892
893 #if i860
894 tva = virtual_end;
895 sva = virtual_avail;
896 for (va = load_start; va < tva; va += INTEL_PGBYTES) {
897 #else
898 for (va = virtual_avail; va < virtual_end; va += INTEL_PGBYTES) {
899 #endif
900 if (pte >= ptend) {
901 pte = (pt_entry_t *)virtual_avail;
902 ptend = pte + NPTES;
903 virtual_avail = (vm_offset_t)ptend;
904 #if i860
905 *pde = pa_to_pte((vm_offset_t)pte)
906 #else
907 *pde = pa_to_pte((vm_offset_t)pte - VM_MIN_KERNEL_ADDRESS)
908 #endif
909 | INTEL_PTE_VALID
910 | INTEL_PTE_WRITE;
911 pde++;
912 }
913 WRITE_PTE_FAST(pte, template)
914 pte++;
915 pte_increment_pa(template);
916 }
917 #if i860
918 /* kvtophys should now work in phys range */
919
920 /*
921 * Mark page table pages non-cacheable
922 */
923
924 pt_pte = (pt_entry_t *)pte_to_pa(*(kpde + pdenum(sva))) + ptenum(sva);
925
926 for (va = load_start; va < tva; va += INTEL_PGBYTES*NPTES) {
927 /* Mark page table non-cacheable */
928 *pt_pte |= INTEL_PTE_NCACHE;
929 pt_pte++;
930 }
931
932 /*
933 * Map I/O space
934 */
935
936 ppde = kpde;
937 ppde += pdenum(IO_BASE);
938
939 if (pte_to_pa(*ppde) == 0) {
940 /* This pte has not been allocated */
941 ppte = (pt_entry_t *)kvtophys(virtual_avail);
942 ptend = ppte + NPTES;
943 virtual_avail = phystokv((vm_offset_t)ptend);
944 *ppde = pa_to_pte((vm_offset_t)ppte)
945 | INTEL_PTE_VALID
946 | INTEL_PTE_WRITE;
947 pte = ptend;
948
949 /* Mark page table non-cacheable */
950 *pt_pte |= INTEL_PTE_NCACHE;
951 pt_pte++;
952
953 bzero((char *)ppte, INTEL_PGBYTES);
954 } else {
955 ppte = (pt_entry_t *)(*ppde); /* first pte of page */
956 }
957 *ppde |= INTEL_PTE_USER;
958
959
960 WRITE_PTE(ppte + ptenum(FIFO_ADDR),
961 pa_to_pte(FIFO_ADDR_PH)
962 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
963
964 WRITE_PTE(ppte + ptenum(FIFO_ADDR + XEOD_OFF),
965 pa_to_pte(FIFO_ADDR_PH + XEOD_OFF_PH)
966 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
967
968 /* XXX Allowed user access to control reg - cfj */
969 WRITE_PTE(ppte + ptenum(CSR_ADDR),
970 pa_to_pte(CSR_ADDR_PH)
971 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE | INTEL_PTE_USER);
972
973 /* XXX Allowed user access to perf reg - cfj */
974 WRITE_PTE(ppte + ptenum(PERFCNT_ADDR),
975 pa_to_pte(PERFCNT_ADDR_PH)
976 | INTEL_PTE_VALID | INTEL_PTE_USER | INTEL_PTE_NCACHE | INTEL_PTE_USER);
977
978 WRITE_PTE(ppte + ptenum(UART_ADDR),
979 pa_to_pte(UART_ADDR_PH)
980 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
981
982 WRITE_PTE(ppte + ptenum(0xFFFFF000),
983 pa_to_pte(avail_end)
984 | INTEL_PTE_VALID | INTEL_PTE_WRITE);
985 avail_start = kvtophys(virtual_avail);
986 #else
987 avail_start = virtual_avail - VM_MIN_KERNEL_ADDRESS;
988 #endif
989
990 /*
991 * startup requires additional virtual memory (for tables, buffers,
992 * etc.). The kd driver may also require some of that memory to
993 * access the graphics board.
994 *
995 */
996 *(int *)&template = 0;
997 virtual_end += morevm;
998 for (tva = va; tva < virtual_end; tva += INTEL_PGBYTES) {
999 if (pte >= ptend) {
1000 #if i860
1001 pte = (pt_entry_t *)kvtophys(virtual_avail);
1002 #else
1003 pte = (pt_entry_t *)virtual_avail;
1004 #endif
1005 ptend = pte + NPTES;
1006 #if i860
1007 virtual_avail = phystokv((vm_offset_t)ptend);
1008 #else
1009 virtual_avail = (vm_offset_t)ptend;
1010 #endif
1011 avail_start += INTEL_PGBYTES;
1012 #if i860
1013 *pde = pa_to_pte((vm_offset_t)pte)
1014 #else
1015 *pde = pa_to_pte((vm_offset_t)pte - VM_MIN_KERNEL_ADDRESS)
1016 #endif
1017 | INTEL_PTE_VALID
1018 | INTEL_PTE_WRITE;
1019 pde++;
1020 #if i860
1021 /* Mark page table non-cacheable */
1022 *pt_pte |= INTEL_PTE_NCACHE;
1023 pt_pte++;
1024 #endif
1025 }
1026 WRITE_PTE_FAST(pte, template)
1027 pte++;
1028 }
1029 virtual_avail = va;
1030 /*
1031 * c.f. comment above
1032 *
1033 */
1034 virtual_end = va + morevm;
1035 while (pte < ptend)
1036 *pte++ = 0;
1037 /*
1038 * invalidate virtual addresses at 0
1039 */
1040 kpde[0] = 0;
1041 #if i860
1042 #else
1043 kernel_pmap->dirbase = kpde;
1044 #endif
1045 printf("Kernel virtual space from 0x%x to 0x%x.\n",
1046 #if i860
1047 sva, virtual_end);
1048 #else
1049 VM_MIN_KERNEL_ADDRESS, virtual_end);
1050 #endif
1051 printf("Available physical space from 0x%x to 0x%x\n",
1052 avail_start, avail_end);
1053 #if i860
1054 /*
1055 * Turn on mapping
1056 */
1057
1058 flush_and_ctxsw(kernel_pmap->dirbase);
1059 paging_enabled = 1;
1060
1061 printf("Paging enabled.\n");
1062 #endif
1063 }
1064
1065 void pmap_virtual_space(startp, endp)
1066 vm_offset_t *startp;
1067 vm_offset_t *endp;
1068 {
1069 *startp = virtual_avail;
1070 *endp = virtual_end;
1071 }
1072
1073 /*
1074 * Initialize the pmap module.
1075 * Called by vm_init, to initialize any structures that the pmap
1076 * system needs to map virtual memory.
1077 */
1078 void pmap_init()
1079 {
1080 register long npages;
1081 vm_offset_t addr;
1082 register vm_size_t s;
1083 int i;
1084
1085 /*
1086 * Allocate memory for the pv_head_table and its lock bits,
1087 * the modify bit array, and the pte_page table.
1088 */
1089
1090 npages = atop(avail_end - avail_start);
1091 s = (vm_size_t) (sizeof(struct pv_entry) * npages
1092 + pv_lock_table_size(npages)
1093 + npages);
1094
1095 s = round_page(s);
1096 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
1097 panic("pmap_init");
1098 bzero((char *) addr, s);
1099
1100 /*
1101 * Allocate the structures first to preserve word-alignment.
1102 */
1103 pv_head_table = (pv_entry_t) addr;
1104 addr = (vm_offset_t) (pv_head_table + npages);
1105
1106 pv_lock_table = (char *) addr;
1107 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1108
1109 pmap_phys_attributes = (char *) addr;
1110
1111 /*
1112 * Create the zone of physical maps,
1113 * and of the physical-to-virtual entries.
1114 */
1115 s = (vm_size_t) sizeof(struct pmap);
1116 pmap_zone = zinit(s, 400*s, 4096, FALSE, "pmap"); /* XXX */
1117 s = (vm_size_t) sizeof(struct pv_entry);
1118 pv_list_zone = zinit(s, 10000*s, 4096, FALSE, "pv_list"); /* XXX */
1119
1120 #if NCPUS > 1
1121 /*
1122 * Set up the pmap request lists
1123 */
1124 for (i = 0; i < NCPUS; i++) {
1125 pmap_update_list_t up = &cpu_update_list[i];
1126
1127 simple_lock_init(&up->lock);
1128 up->count = 0;
1129 }
1130 #endif NCPUS > 1
1131
1132 /*
1133 * Only now, when all of the data structures are allocated,
1134 * can we set vm_first_phys and vm_last_phys. If we set them
1135 * too soon, the kmem_alloc_wired above will try to use these
1136 * data structures and blow up.
1137 */
1138
1139 vm_first_phys = avail_start;
1140 vm_last_phys = avail_end;
1141 pmap_initialized = TRUE;
1142 }
1143
1144 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1145
1146 boolean_t pmap_verify_free(phys)
1147 vm_offset_t phys;
1148 {
1149 pv_entry_t pv_h;
1150 int pai;
1151 int spl;
1152 boolean_t result;
1153
1154 assert(phys != vm_page_fictitious_addr);
1155 if (!pmap_initialized)
1156 return(TRUE);
1157
1158 if (!pmap_valid_page(phys))
1159 return(FALSE);
1160
1161 PMAP_WRITE_LOCK(spl);
1162
1163 pai = pa_index(phys);
1164 pv_h = pai_to_pvh(pai);
1165
1166 result = (pv_h->pmap == PMAP_NULL);
1167 PMAP_WRITE_UNLOCK(spl);
1168
1169 return(result);
1170 }
1171
1172 /*
1173 * Routine: pmap_page_table_page_alloc
1174 *
1175 * Allocates a new physical page to be used as a page-table page.
1176 *
1177 * Must be called with the pmap system and the pmap unlocked,
1178 * since these must be unlocked to use vm_page_grab.
1179 */
1180 vm_offset_t
1181 pmap_page_table_page_alloc()
1182 {
1183 register vm_page_t m;
1184 register vm_offset_t pa;
1185
1186 check_simple_locks();
1187
1188 /*
1189 * We cannot allocate the pmap_object in pmap_init,
1190 * because it is called before the zone package is up.
1191 * Allocate it now if it is missing.
1192 */
1193 if (pmap_object == VM_OBJECT_NULL)
1194 pmap_object = vm_object_allocate(mem_size);
1195
1196 /*
1197 * Allocate a VM page for the level 2 page table entries.
1198 */
1199 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1200 VM_PAGE_WAIT((void (*)()) 0);
1201
1202 /*
1203 * Map the page to its physical address so that it
1204 * can be found later.
1205 */
1206 pa = m->phys_addr;
1207 vm_object_lock(pmap_object);
1208 vm_page_insert(m, pmap_object, pa);
1209 vm_page_lock_queues();
1210 vm_page_wire(m);
1211 inuse_ptepages_count++;
1212 vm_page_unlock_queues();
1213 vm_object_unlock(pmap_object);
1214
1215 /*
1216 * Zero the page.
1217 */
1218 bzero(phystokv(pa), PAGE_SIZE);
1219
1220 #if i860
1221 /*
1222 * Mark the page table page(s) non-cacheable.
1223 */
1224 {
1225 int i = ptes_per_vm_page;
1226 pt_entry_t *pdp;
1227
1228 pdp = pmap_pte(kernel_pmap, pa);
1229 do {
1230 *pdp |= INTEL_PTE_NCACHE;
1231 pdp++;
1232 } while (--i > 0);
1233 }
1234 #endif
1235 return pa;
1236 }
1237
1238 /*
1239 * Deallocate a page-table page.
1240 * The page-table page must have all mappings removed,
1241 * and be removed from its page directory.
1242 */
1243 void
1244 pmap_page_table_page_dealloc(pa)
1245 vm_offset_t pa;
1246 {
1247 vm_page_t m;
1248
1249 vm_object_lock(pmap_object);
1250 m = vm_page_lookup(pmap_object, pa);
1251 vm_page_lock_queues();
1252 vm_page_free(m);
1253 inuse_ptepages_count--;
1254 vm_page_unlock_queues();
1255 vm_object_unlock(pmap_object);
1256 }
1257
1258 /*
1259 * Create and return a physical map.
1260 *
1261 * If the size specified for the map
1262 * is zero, the map is an actual physical
1263 * map, and may be referenced by the
1264 * hardware.
1265 *
1266 * If the size specified is non-zero,
1267 * the map will be used in software only, and
1268 * is bounded by that size.
1269 */
1270 pmap_t pmap_create(size)
1271 vm_size_t size;
1272 {
1273 register pmap_t p;
1274 register pmap_statistics_t stats;
1275
1276 /*
1277 * A software use-only map doesn't even need a map.
1278 */
1279
1280 if (size != 0) {
1281 return(PMAP_NULL);
1282 }
1283
1284 /*
1285 * Allocate a pmap struct from the pmap_zone. Then allocate
1286 * the page descriptor table from the pd_zone.
1287 */
1288
1289 p = (pmap_t) zalloc(pmap_zone);
1290 if (p == PMAP_NULL)
1291 panic("pmap_create");
1292
1293 if (kmem_alloc_wired(kernel_map,
1294 (vm_offset_t *)&p->dirbase, INTEL_PGBYTES)
1295 != KERN_SUCCESS)
1296 panic("pmap_create");
1297
1298 bcopy(kpde, p->dirbase, INTEL_PGBYTES);
1299 p->ref_count = 1;
1300
1301 simple_lock_init(&p->lock);
1302 p->cpus_using = 0;
1303
1304 /*
1305 * Initialize statistics.
1306 */
1307
1308 stats = &p->stats;
1309 stats->resident_count = 0;
1310 stats->wired_count = 0;
1311
1312 return(p);
1313 }
1314
1315 /*
1316 * Retire the given physical map from service.
1317 * Should only be called if the map contains
1318 * no valid mappings.
1319 */
1320
1321 void pmap_destroy(p)
1322 register pmap_t p;
1323 {
1324 register pt_entry_t *pdep;
1325 register vm_offset_t pa;
1326 register int c, s;
1327 register vm_page_t m;
1328
1329 if (p == PMAP_NULL)
1330 return;
1331
1332 SPLVM(s);
1333 simple_lock(&p->lock);
1334 c = --p->ref_count;
1335 simple_unlock(&p->lock);
1336 SPLX(s);
1337
1338 if (c != 0) {
1339 return; /* still in use */
1340 }
1341
1342 /*
1343 * Free the memory maps, then the
1344 * pmap structure.
1345 */
1346 for (pdep = p->dirbase;
1347 pdep < &p->dirbase[pdenum(VM_MIN_KERNEL_ADDRESS)];
1348 pdep += ptes_per_vm_page) {
1349 if (*pdep & INTEL_PTE_VALID) {
1350 pa = pte_to_pa(*pdep);
1351 vm_object_lock(pmap_object);
1352 m = vm_page_lookup(pmap_object, pa);
1353 if (m == VM_PAGE_NULL)
1354 panic("pmap_destroy: pte page not in object");
1355 vm_page_lock_queues();
1356 vm_page_free(m);
1357 inuse_ptepages_count--;
1358 vm_page_unlock_queues();
1359 vm_object_unlock(pmap_object);
1360 }
1361 }
1362 kmem_free(kernel_map, p->dirbase, INTEL_PGBYTES);
1363 zfree(pmap_zone, (vm_offset_t) p);
1364 }
1365
1366 /*
1367 * Add a reference to the specified pmap.
1368 */
1369
1370 void pmap_reference(p)
1371 register pmap_t p;
1372 {
1373 int s;
1374 if (p != PMAP_NULL) {
1375 SPLVM(s);
1376 simple_lock(&p->lock);
1377 p->ref_count++;
1378 simple_unlock(&p->lock);
1379 SPLX(s);
1380 }
1381 }
1382
1383 /*
1384 * Remove a range of hardware page-table entries.
1385 * The entries given are the first (inclusive)
1386 * and last (exclusive) entries for the VM pages.
1387 * The virtual address is the va for the first pte.
1388 *
1389 * The pmap must be locked.
1390 * If the pmap is not the kernel pmap, the range must lie
1391 * entirely within one pte-page. This is NOT checked.
1392 * Assumes that the pte-page exists.
1393 */
1394
1395 /* static */
1396 void pmap_remove_range(pmap, va, spte, epte)
1397 pmap_t pmap;
1398 vm_offset_t va;
1399 pt_entry_t *spte;
1400 pt_entry_t *epte;
1401 {
1402 register pt_entry_t *cpte;
1403 int num_removed, num_unwired;
1404 int pai;
1405 vm_offset_t pa;
1406
1407 #if DEBUG_PTE_PAGE
1408 if (pmap != kernel_pmap)
1409 ptep_check(get_pte_page(spte));
1410 #endif DEBUG_PTE_PAGE
1411 num_removed = 0;
1412 num_unwired = 0;
1413
1414 for (cpte = spte; cpte < epte;
1415 cpte += ptes_per_vm_page, va += PAGE_SIZE) {
1416
1417 if (*cpte == 0)
1418 continue;
1419 pa = pte_to_pa(*cpte);
1420
1421 num_removed++;
1422 if (*cpte & INTEL_PTE_WIRED)
1423 num_unwired++;
1424
1425 if (!valid_page(pa)) {
1426
1427 /*
1428 * Outside range of managed physical memory.
1429 * Just remove the mappings.
1430 */
1431 register int i = ptes_per_vm_page;
1432 register pt_entry_t *lpte = cpte;
1433 do {
1434 *lpte = 0;
1435 lpte++;
1436 } while (--i > 0);
1437 continue;
1438 }
1439
1440 pai = pa_index(pa);
1441 LOCK_PVH(pai);
1442
1443 /*
1444 * Get the modify and reference bits.
1445 */
1446 {
1447 register int i;
1448 register pt_entry_t *lpte;
1449
1450 i = ptes_per_vm_page;
1451 lpte = cpte;
1452 do {
1453 pmap_phys_attributes[pai] |=
1454 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1455 *lpte = 0;
1456 lpte++;
1457 } while (--i > 0);
1458 }
1459
1460 /*
1461 * Remove the mapping from the pvlist for
1462 * this physical page.
1463 */
1464 {
1465 register pv_entry_t pv_h, prev, cur;
1466
1467 pv_h = pai_to_pvh(pai);
1468 if (pv_h->pmap == PMAP_NULL) {
1469 panic("pmap_remove: null pv_list!");
1470 }
1471 if (pv_h->va == va && pv_h->pmap == pmap) {
1472 /*
1473 * Header is the pv_entry. Copy the next one
1474 * to header and free the next one (we cannot
1475 * free the header)
1476 */
1477 cur = pv_h->next;
1478 if (cur != PV_ENTRY_NULL) {
1479 *pv_h = *cur;
1480 PV_FREE(cur);
1481 }
1482 else {
1483 pv_h->pmap = PMAP_NULL;
1484 }
1485 }
1486 else {
1487 cur = pv_h;
1488 do {
1489 prev = cur;
1490 if ((cur = prev->next) == PV_ENTRY_NULL) {
1491 panic("pmap-remove: mapping not in pv_list!");
1492 }
1493 } while (cur->va != va || cur->pmap != pmap);
1494 prev->next = cur->next;
1495 PV_FREE(cur);
1496 }
1497 UNLOCK_PVH(pai);
1498 }
1499 }
1500
1501 /*
1502 * Update the counts
1503 */
1504 pmap->stats.resident_count -= num_removed;
1505 pmap->stats.wired_count -= num_unwired;
1506 }
1507
1508 /*
1509 * Remove the given range of addresses
1510 * from the specified map.
1511 *
1512 * It is assumed that the start and end are properly
1513 * rounded to the hardware page size.
1514 */
1515
1516 void pmap_remove(map, s, e)
1517 pmap_t map;
1518 vm_offset_t s, e;
1519 {
1520 int spl;
1521 register pt_entry_t *pde;
1522 register pt_entry_t *spte, *epte;
1523 vm_offset_t l;
1524
1525 if (map == PMAP_NULL)
1526 return;
1527
1528 PMAP_READ_LOCK(map, spl);
1529
1530 /*
1531 * Invalidate the translation buffer first
1532 */
1533 PMAP_UPDATE_TLBS(map, s, e);
1534
1535 pde = pmap_pde(map, s);
1536 while (s < e) {
1537 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1538 if (l > e)
1539 l = e;
1540 if (*pde & INTEL_PTE_VALID) {
1541 spte = (pt_entry_t *)ptetokv(*pde);
1542 spte = &spte[ptenum(s)];
1543 epte = &spte[intel_btop(l-s)];
1544 pmap_remove_range(map, s, spte, epte);
1545 }
1546 s = l;
1547 pde++;
1548 }
1549
1550 PMAP_READ_UNLOCK(map, spl);
1551 }
1552
1553 /*
1554 * Routine: pmap_page_protect
1555 *
1556 * Function:
1557 * Lower the permission for all mappings to a given
1558 * page.
1559 */
1560 void pmap_page_protect(phys, prot)
1561 vm_offset_t phys;
1562 vm_prot_t prot;
1563 {
1564 pv_entry_t pv_h, prev;
1565 register pv_entry_t pv_e;
1566 register pt_entry_t *pte;
1567 int pai;
1568 register pmap_t pmap;
1569 int spl;
1570 boolean_t remove;
1571
1572 assert(phys != vm_page_fictitious_addr);
1573 if (!valid_page(phys)) {
1574 /*
1575 * Not a managed page.
1576 */
1577 return;
1578 }
1579
1580 /*
1581 * Determine the new protection.
1582 */
1583 switch (prot) {
1584 case VM_PROT_READ:
1585 case VM_PROT_READ|VM_PROT_EXECUTE:
1586 remove = FALSE;
1587 break;
1588 case VM_PROT_ALL:
1589 return; /* nothing to do */
1590 default:
1591 remove = TRUE;
1592 break;
1593 }
1594
1595 /*
1596 * Lock the pmap system first, since we will be changing
1597 * several pmaps.
1598 */
1599
1600 PMAP_WRITE_LOCK(spl);
1601
1602 pai = pa_index(phys);
1603 pv_h = pai_to_pvh(pai);
1604
1605 /*
1606 * Walk down PV list, changing or removing all mappings.
1607 * We do not have to lock the pv_list because we have
1608 * the entire pmap system locked.
1609 */
1610 if (pv_h->pmap != PMAP_NULL) {
1611
1612 prev = pv_e = pv_h;
1613 do {
1614 pmap = pv_e->pmap;
1615 /*
1616 * Lock the pmap to block pmap_extract and similar routines.
1617 */
1618 simple_lock(&pmap->lock);
1619
1620 {
1621 register vm_offset_t va;
1622
1623 va = pv_e->va;
1624 pte = pmap_pte(pmap, va);
1625
1626 /*
1627 * Consistency checks.
1628 */
1629 /* assert(*pte & INTEL_PTE_VALID); XXX */
1630 /* assert(pte_to_phys(*pte) == phys); */
1631
1632 /*
1633 * Invalidate TLBs for all CPUs using this mapping.
1634 */
1635 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1636 }
1637
1638 /*
1639 * Remove the mapping if new protection is NONE
1640 * or if write-protecting a kernel mapping.
1641 */
1642 if (remove || pmap == kernel_pmap) {
1643 /*
1644 * Remove the mapping, collecting any modify bits.
1645 */
1646 if (*pte & INTEL_PTE_WIRED)
1647 panic("pmap_remove_all removing a wired page");
1648
1649 {
1650 register int i = ptes_per_vm_page;
1651
1652 do {
1653 pmap_phys_attributes[pai] |=
1654 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1655 *pte++ = 0;
1656 } while (--i > 0);
1657 }
1658
1659 pmap->stats.resident_count--;
1660
1661 /*
1662 * Remove the pv_entry.
1663 */
1664 if (pv_e == pv_h) {
1665 /*
1666 * Fix up head later.
1667 */
1668 pv_h->pmap = PMAP_NULL;
1669 }
1670 else {
1671 /*
1672 * Delete this entry.
1673 */
1674 prev->next = pv_e->next;
1675 PV_FREE(pv_e);
1676 }
1677 }
1678 else {
1679 /*
1680 * Write-protect.
1681 */
1682 register int i = ptes_per_vm_page;
1683
1684 do {
1685 *pte &= ~INTEL_PTE_WRITE;
1686 pte++;
1687 } while (--i > 0);
1688
1689 /*
1690 * Advance prev.
1691 */
1692 prev = pv_e;
1693 }
1694
1695 simple_unlock(&pmap->lock);
1696
1697 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1698
1699 /*
1700 * If pv_head mapping was removed, fix it up.
1701 */
1702 if (pv_h->pmap == PMAP_NULL) {
1703 pv_e = pv_h->next;
1704 if (pv_e != PV_ENTRY_NULL) {
1705 *pv_h = *pv_e;
1706 PV_FREE(pv_e);
1707 }
1708 }
1709 }
1710
1711 PMAP_WRITE_UNLOCK(spl);
1712 }
1713
1714 /*
1715 * Set the physical protection on the
1716 * specified range of this map as requested.
1717 * Will not increase permissions.
1718 */
1719 void pmap_protect(map, s, e, prot)
1720 pmap_t map;
1721 vm_offset_t s, e;
1722 vm_prot_t prot;
1723 {
1724 register pt_entry_t *pde;
1725 register pt_entry_t *spte, *epte;
1726 vm_offset_t l;
1727 int spl;
1728
1729 if (map == PMAP_NULL)
1730 return;
1731
1732 /*
1733 * Determine the new protection.
1734 */
1735 switch (prot) {
1736 case VM_PROT_READ:
1737 case VM_PROT_READ|VM_PROT_EXECUTE:
1738 break;
1739 case VM_PROT_READ|VM_PROT_WRITE:
1740 case VM_PROT_ALL:
1741 return; /* nothing to do */
1742 default:
1743 pmap_remove(map, s, e);
1744 return;
1745 }
1746
1747 /*
1748 * If write-protecting in the kernel pmap,
1749 * remove the mappings; the i386 ignores
1750 * the write-permission bit in kernel mode.
1751 *
1752 * XXX should be #if'd for i386
1753 */
1754 if (map == kernel_pmap) {
1755 pmap_remove(map, s, e);
1756 return;
1757 }
1758
1759 SPLVM(spl);
1760 simple_lock(&map->lock);
1761
1762 /*
1763 * Invalidate the translation buffer first
1764 */
1765 PMAP_UPDATE_TLBS(map, s, e);
1766
1767 pde = pmap_pde(map, s);
1768 while (s < e) {
1769 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1770 if (l > e)
1771 l = e;
1772 if (*pde & INTEL_PTE_VALID) {
1773 spte = (pt_entry_t *)ptetokv(*pde);
1774 spte = &spte[ptenum(s)];
1775 epte = &spte[intel_btop(l-s)];
1776
1777 while (spte < epte) {
1778 if (*spte & INTEL_PTE_VALID)
1779 *spte &= ~INTEL_PTE_WRITE;
1780 spte++;
1781 }
1782 }
1783 s = l;
1784 pde++;
1785 }
1786
1787 simple_unlock(&map->lock);
1788 SPLX(spl);
1789 }
1790
1791 /*
1792 * Insert the given physical page (p) at
1793 * the specified virtual address (v) in the
1794 * target physical map with the protection requested.
1795 *
1796 * If specified, the page will be wired down, meaning
1797 * that the related pte can not be reclaimed.
1798 *
1799 * NB: This is the only routine which MAY NOT lazy-evaluate
1800 * or lose information. That is, this routine must actually
1801 * insert this page into the given map NOW.
1802 */
1803 void pmap_enter(pmap, v, pa, prot, wired)
1804 register pmap_t pmap;
1805 vm_offset_t v;
1806 register vm_offset_t pa;
1807 vm_prot_t prot;
1808 boolean_t wired;
1809 {
1810 register pt_entry_t *pte;
1811 register pv_entry_t pv_h;
1812 register int i, pai;
1813 pv_entry_t pv_e;
1814 pt_entry_t template;
1815 int spl;
1816 vm_offset_t old_pa;
1817
1818 assert(pa != vm_page_fictitious_addr);
1819 if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
1820 if (pmap == PMAP_NULL)
1821 return;
1822
1823 if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
1824 && !wired /* hack for io_wire */ ) {
1825 /*
1826 * Because the 386 ignores write protection in kernel mode,
1827 * we cannot enter a read-only kernel mapping, and must
1828 * remove an existing mapping if changing it.
1829 *
1830 * XXX should be #if'd for i386
1831 */
1832 PMAP_READ_LOCK(pmap, spl);
1833
1834 pte = pmap_pte(pmap, v);
1835 if (pte != PT_ENTRY_NULL && *pte != 0) {
1836 /*
1837 * Invalidate the translation buffer,
1838 * then remove the mapping.
1839 */
1840 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
1841 pmap_remove_range(pmap, v, pte,
1842 pte + ptes_per_vm_page);
1843 }
1844 PMAP_READ_UNLOCK(pmap, spl);
1845 return;
1846 }
1847
1848 /*
1849 * Must allocate a new pvlist entry while we're unlocked;
1850 * zalloc may cause pageout (which will lock the pmap system).
1851 * If we determine we need a pvlist entry, we will unlock
1852 * and allocate one. Then we will retry, throughing away
1853 * the allocated entry later (if we no longer need it).
1854 */
1855 pv_e = PV_ENTRY_NULL;
1856 Retry:
1857 PMAP_READ_LOCK(pmap, spl);
1858
1859 /*
1860 * Expand pmap to include this pte. Assume that
1861 * pmap is always expanded to include enough hardware
1862 * pages to map one VM page.
1863 */
1864
1865 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1866 /*
1867 * Need to allocate a new page-table page.
1868 */
1869 vm_offset_t ptp;
1870 pt_entry_t *pdp;
1871 int i;
1872
1873 if (pmap == kernel_pmap) {
1874 /*
1875 * Would have to enter the new page-table page in
1876 * EVERY pmap.
1877 */
1878 panic("pmap_expand kernel pmap to %#x", v);
1879 }
1880
1881 /*
1882 * Unlock the pmap and allocate a new page-table page.
1883 */
1884 PMAP_READ_UNLOCK(pmap, spl);
1885
1886 ptp = pmap_page_table_page_alloc();
1887
1888 /*
1889 * Re-lock the pmap and check that another thread has
1890 * not already allocated the page-table page. If it
1891 * has, discard the new page-table page (and try
1892 * again to make sure).
1893 */
1894 PMAP_READ_LOCK(pmap, spl);
1895
1896 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
1897 /*
1898 * Oops...
1899 */
1900 PMAP_READ_UNLOCK(pmap, spl);
1901 pmap_page_table_page_dealloc(ptp);
1902 PMAP_READ_LOCK(pmap, spl);
1903 continue;
1904 }
1905
1906 /*
1907 * Enter the new page table page in the page directory.
1908 */
1909 i = ptes_per_vm_page;
1910 pdp = &pmap->dirbase[pdenum(v) & ~(i-1)];
1911 do {
1912 *pdp = pa_to_pte(ptp) | INTEL_PTE_VALID
1913 | INTEL_PTE_USER
1914 | INTEL_PTE_WRITE;
1915 pdp++;
1916 ptp += INTEL_PGBYTES;
1917 } while (--i > 0);
1918 #if i860
1919 /*
1920 * Flush the data cache.
1921 */
1922 flush();
1923 #endif /* i860 */
1924
1925 /*
1926 * Now, get the address of the page-table entry.
1927 */
1928 continue;
1929 }
1930
1931 /*
1932 * Special case if the physical page is already mapped
1933 * at this address.
1934 */
1935 old_pa = pte_to_pa(*pte);
1936 if (*pte && old_pa == pa) {
1937 /*
1938 * May be changing its wired attribute or protection
1939 */
1940
1941 if (wired && !(*pte & INTEL_PTE_WIRED))
1942 pmap->stats.wired_count++;
1943 else if (!wired && (*pte & INTEL_PTE_WIRED))
1944 pmap->stats.wired_count--;
1945
1946 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1947 if (pmap != kernel_pmap)
1948 template |= INTEL_PTE_USER;
1949 if (prot & VM_PROT_WRITE)
1950 template |= INTEL_PTE_WRITE;
1951 if (wired)
1952 template |= INTEL_PTE_WIRED;
1953 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
1954 i = ptes_per_vm_page;
1955 do {
1956 if (*pte & INTEL_PTE_MOD)
1957 template |= INTEL_PTE_MOD;
1958 WRITE_PTE(pte, template)
1959 pte++;
1960 pte_increment_pa(template);
1961 } while (--i > 0);
1962 }
1963 else {
1964
1965 /*
1966 * Remove old mapping from the PV list if necessary.
1967 */
1968 if (*pte) {
1969 /*
1970 * Invalidate the translation buffer,
1971 * then remove the mapping.
1972 */
1973 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
1974
1975 /*
1976 * Don't free the pte page if removing last
1977 * mapping - we will immediately replace it.
1978 */
1979 pmap_remove_range(pmap, v, pte,
1980 pte + ptes_per_vm_page);
1981 }
1982
1983 if (valid_page(pa)) {
1984
1985 /*
1986 * Enter the mapping in the PV list for this
1987 * physical page.
1988 */
1989
1990 pai = pa_index(pa);
1991 LOCK_PVH(pai);
1992 pv_h = pai_to_pvh(pai);
1993
1994 if (pv_h->pmap == PMAP_NULL) {
1995 /*
1996 * No mappings yet
1997 */
1998 pv_h->va = v;
1999 pv_h->pmap = pmap;
2000 pv_h->next = PV_ENTRY_NULL;
2001 }
2002 else {
2003 #if DEBUG
2004 {
2005 /* check that this mapping is not already there */
2006 pv_entry_t e = pv_h;
2007 while (e != PV_ENTRY_NULL) {
2008 if (e->pmap == pmap && e->va == v)
2009 panic("pmap_enter: already in pv_list");
2010 e = e->next;
2011 }
2012 }
2013 #endif DEBUG
2014
2015 /*
2016 * Add new pv_entry after header.
2017 */
2018 if (pv_e == PV_ENTRY_NULL) {
2019 PV_ALLOC(pv_e);
2020 if (pv_e == PV_ENTRY_NULL) {
2021 UNLOCK_PVH(pai);
2022 PMAP_READ_UNLOCK(pmap, spl);
2023
2024 /*
2025 * Refill from zone.
2026 */
2027 pv_e = (pv_entry_t) zalloc(pv_list_zone);
2028 goto Retry;
2029 }
2030 }
2031 pv_e->va = v;
2032 pv_e->pmap = pmap;
2033 pv_e->next = pv_h->next;
2034 pv_h->next = pv_e;
2035 /*
2036 * Remember that we used the pvlist entry.
2037 */
2038 pv_e = PV_ENTRY_NULL;
2039 }
2040 UNLOCK_PVH(pai);
2041 }
2042
2043 /*
2044 * And count the mapping.
2045 */
2046
2047 pmap->stats.resident_count++;
2048 if (wired)
2049 pmap->stats.wired_count++;
2050
2051 /*
2052 * Build a template to speed up entering -
2053 * only the pfn changes.
2054 */
2055 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2056 if (pmap != kernel_pmap)
2057 template |= INTEL_PTE_USER;
2058 if (prot & VM_PROT_WRITE)
2059 template |= INTEL_PTE_WRITE;
2060 if (wired)
2061 template |= INTEL_PTE_WIRED;
2062 i = ptes_per_vm_page;
2063 do {
2064 WRITE_PTE(pte, template)
2065 pte++;
2066 pte_increment_pa(template);
2067 } while (--i > 0);
2068 }
2069
2070 if (pv_e != PV_ENTRY_NULL) {
2071 PV_FREE(pv_e);
2072 }
2073
2074 PMAP_READ_UNLOCK(pmap, spl);
2075 }
2076
2077 /*
2078 * Routine: pmap_change_wiring
2079 * Function: Change the wiring attribute for a map/virtual-address
2080 * pair.
2081 * In/out conditions:
2082 * The mapping must already exist in the pmap.
2083 */
2084 void pmap_change_wiring(map, v, wired)
2085 register pmap_t map;
2086 vm_offset_t v;
2087 boolean_t wired;
2088 {
2089 register pt_entry_t *pte;
2090 register int i;
2091 int spl;
2092
2093 /*
2094 * We must grab the pmap system lock because we may
2095 * change a pte_page queue.
2096 */
2097 PMAP_READ_LOCK(map, spl);
2098
2099 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2100 panic("pmap_change_wiring: pte missing");
2101
2102 if (wired && !(*pte & INTEL_PTE_WIRED)) {
2103 /*
2104 * wiring down mapping
2105 */
2106 map->stats.wired_count++;
2107 i = ptes_per_vm_page;
2108 do {
2109 *pte++ |= INTEL_PTE_WIRED;
2110 } while (--i > 0);
2111 }
2112 else if (!wired && (*pte & INTEL_PTE_WIRED)) {
2113 /*
2114 * unwiring mapping
2115 */
2116 map->stats.wired_count--;
2117 i = ptes_per_vm_page;
2118 do {
2119 *pte &= ~INTEL_PTE_WIRED;
2120 } while (--i > 0);
2121 }
2122
2123 PMAP_READ_UNLOCK(map, spl);
2124 }
2125
2126 /*
2127 * Routine: pmap_extract
2128 * Function:
2129 * Extract the physical page address associated
2130 * with the given map/virtual_address pair.
2131 */
2132
2133 vm_offset_t pmap_extract(pmap, va)
2134 register pmap_t pmap;
2135 vm_offset_t va;
2136 {
2137 register pt_entry_t *pte;
2138 register vm_offset_t pa;
2139 int spl;
2140
2141 SPLVM(spl);
2142 simple_lock(&pmap->lock);
2143 if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
2144 pa = (vm_offset_t) 0;
2145 else if (!(*pte & INTEL_PTE_VALID))
2146 pa = (vm_offset_t) 0;
2147 else
2148 pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
2149 simple_unlock(&pmap->lock);
2150 SPLX(spl);
2151 return(pa);
2152 }
2153
2154 /*
2155 * Copy the range specified by src_addr/len
2156 * from the source map to the range dst_addr/len
2157 * in the destination map.
2158 *
2159 * This routine is only advisory and need not do anything.
2160 */
2161 #if 0
2162 void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2163 pmap_t dst_pmap;
2164 pmap_t src_pmap;
2165 vm_offset_t dst_addr;
2166 vm_size_t len;
2167 vm_offset_t src_addr;
2168 {
2169 #ifdef lint
2170 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2171 #endif lint
2172 }
2173 #endif 0
2174
2175 /*
2176 * Routine: pmap_collect
2177 * Function:
2178 * Garbage collects the physical map system for
2179 * pages which are no longer used.
2180 * Success need not be guaranteed -- that is, there
2181 * may well be pages which are not referenced, but
2182 * others may be collected.
2183 * Usage:
2184 * Called by the pageout daemon when pages are scarce.
2185 */
2186 void pmap_collect(p)
2187 pmap_t p;
2188 {
2189 register pt_entry_t *pdp, *ptp;
2190 pt_entry_t *eptp;
2191 vm_offset_t pa;
2192 int spl, wired;
2193
2194 if (p == PMAP_NULL)
2195 return;
2196
2197 if (p == kernel_pmap)
2198 return;
2199
2200 /*
2201 * Garbage collect map.
2202 */
2203 PMAP_READ_LOCK(p, spl);
2204 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2205
2206 for (pdp = p->dirbase;
2207 pdp < &p->dirbase[pdenum(VM_MIN_KERNEL_ADDRESS)];
2208 pdp += ptes_per_vm_page)
2209 {
2210 if (*pdp & INTEL_PTE_VALID) {
2211
2212 pa = pte_to_pa(*pdp);
2213 ptp = (pt_entry_t *)phystokv(pa);
2214 eptp = ptp + NPTES*ptes_per_vm_page;
2215
2216 /*
2217 * If the pte page has any wired mappings, we cannot
2218 * free it.
2219 */
2220 wired = 0;
2221 {
2222 register pt_entry_t *ptep;
2223 for (ptep = ptp; ptep < eptp; ptep++) {
2224 if (*ptep & INTEL_PTE_WIRED) {
2225 wired = 1;
2226 break;
2227 }
2228 }
2229 }
2230 if (!wired) {
2231 /*
2232 * Remove the virtual addresses mapped by this pte page.
2233 */
2234 pmap_remove_range(p,
2235 pdetova(pdp - p->dirbase),
2236 ptp,
2237 eptp);
2238
2239 /*
2240 * Invalidate the page directory pointer.
2241 */
2242 {
2243 register int i = ptes_per_vm_page;
2244 register pt_entry_t *pdep = pdp;
2245 do {
2246 *pdep++ = 0;
2247 } while (--i > 0);
2248 }
2249
2250 PMAP_READ_UNLOCK(p, spl);
2251
2252 /*
2253 * And free the pte page itself.
2254 */
2255 {
2256 register vm_page_t m;
2257
2258 vm_object_lock(pmap_object);
2259 m = vm_page_lookup(pmap_object, pa);
2260 if (m == VM_PAGE_NULL)
2261 panic("pmap_collect: pte page not in object");
2262 vm_page_lock_queues();
2263 vm_page_free(m);
2264 inuse_ptepages_count--;
2265 vm_page_unlock_queues();
2266 vm_object_unlock(pmap_object);
2267 }
2268
2269 PMAP_READ_LOCK(p, spl);
2270 }
2271 }
2272 }
2273 PMAP_READ_UNLOCK(p, spl);
2274 return;
2275
2276 }
2277
2278 /*
2279 * Routine: pmap_activate
2280 * Function:
2281 * Binds the given physical map to the given
2282 * processor, and returns a hardware map description.
2283 */
2284 #if 0
2285 void pmap_activate(my_pmap, th, my_cpu)
2286 register pmap_t my_pmap;
2287 thread_t th;
2288 int my_cpu;
2289 {
2290 PMAP_ACTIVATE(my_pmap, th, my_cpu);
2291 }
2292 #endif 0
2293
2294 /*
2295 * Routine: pmap_deactivate
2296 * Function:
2297 * Indicates that the given physical map is no longer
2298 * in use on the specified processor. (This is a macro
2299 * in pmap.h)
2300 */
2301 #if 0
2302 void pmap_deactivate(pmap, th, which_cpu)
2303 pmap_t pmap;
2304 thread_t th;
2305 int which_cpu;
2306 {
2307 #ifdef lint
2308 pmap++; th++; which_cpu++;
2309 #endif lint
2310 PMAP_DEACTIVATE(pmap, th, which_cpu);
2311 }
2312 #endif 0
2313
2314 /*
2315 * Routine: pmap_kernel
2316 * Function:
2317 * Returns the physical map handle for the kernel.
2318 */
2319 #if 0
2320 pmap_t pmap_kernel()
2321 {
2322 return (kernel_pmap);
2323 }
2324 #endif 0
2325
2326 /*
2327 * pmap_zero_page zeros the specified (machine independent) page.
2328 * See machine/phys.c or machine/phys.s for implementation.
2329 */
2330 #if 0
2331 pmap_zero_page(phys)
2332 register vm_offset_t phys;
2333 {
2334 register int i;
2335
2336 assert(phys != vm_page_fictitious_addr);
2337 i = PAGE_SIZE / INTEL_PGBYTES;
2338 phys = intel_pfn(phys);
2339
2340 while (i--)
2341 zero_phys(phys++);
2342 }
2343 #endif 0
2344
2345 /*
2346 * pmap_copy_page copies the specified (machine independent) page.
2347 * See machine/phys.c or machine/phys.s for implementation.
2348 */
2349 #if 0
2350 pmap_copy_page(src, dst)
2351 vm_offset_t src, dst;
2352 {
2353 int i;
2354
2355 assert(src != vm_page_fictitious_addr);
2356 assert(dst != vm_page_fictitious_addr);
2357 i = PAGE_SIZE / INTEL_PGBYTES;
2358
2359 while (i--) {
2360 copy_phys(intel_pfn(src), intel_pfn(dst));
2361 src += INTEL_PGBYTES;
2362 dst += INTEL_PGBYTES;
2363 }
2364 }
2365 #endif 0
2366
2367 /*
2368 * Routine: pmap_pageable
2369 * Function:
2370 * Make the specified pages (by pmap, offset)
2371 * pageable (or not) as requested.
2372 *
2373 * A page which is not pageable may not take
2374 * a fault; therefore, its page table entry
2375 * must remain valid for the duration.
2376 *
2377 * This routine is merely advisory; pmap_enter
2378 * will specify that these pages are to be wired
2379 * down (or not) as appropriate.
2380 */
2381 pmap_pageable(pmap, start, end, pageable)
2382 pmap_t pmap;
2383 vm_offset_t start;
2384 vm_offset_t end;
2385 boolean_t pageable;
2386 {
2387 #ifdef lint
2388 pmap++; start++; end++; pageable++;
2389 #endif lint
2390 }
2391
2392 /*
2393 * Clear specified attribute bits.
2394 */
2395 void
2396 phys_attribute_clear(phys, bits)
2397 vm_offset_t phys;
2398 int bits;
2399 {
2400 pv_entry_t pv_h;
2401 register pv_entry_t pv_e;
2402 register pt_entry_t *pte;
2403 int pai;
2404 register pmap_t pmap;
2405 int spl;
2406
2407 assert(phys != vm_page_fictitious_addr);
2408 if (!valid_page(phys)) {
2409 /*
2410 * Not a managed page.
2411 */
2412 return;
2413 }
2414
2415 /*
2416 * Lock the pmap system first, since we will be changing
2417 * several pmaps.
2418 */
2419
2420 PMAP_WRITE_LOCK(spl);
2421
2422 pai = pa_index(phys);
2423 pv_h = pai_to_pvh(pai);
2424
2425 /*
2426 * Walk down PV list, clearing all modify or reference bits.
2427 * We do not have to lock the pv_list because we have
2428 * the entire pmap system locked.
2429 */
2430 if (pv_h->pmap != PMAP_NULL) {
2431 /*
2432 * There are some mappings.
2433 */
2434 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2435
2436 pmap = pv_e->pmap;
2437 /*
2438 * Lock the pmap to block pmap_extract and similar routines.
2439 */
2440 simple_lock(&pmap->lock);
2441
2442 {
2443 register vm_offset_t va;
2444
2445 va = pv_e->va;
2446 pte = pmap_pte(pmap, va);
2447
2448 #if 0
2449 /*
2450 * Consistency checks.
2451 */
2452 assert(*pte & INTEL_PTE_VALID);
2453 /* assert(pte_to_phys(*pte) == phys); */
2454 #endif
2455
2456 /*
2457 * Invalidate TLBs for all CPUs using this mapping.
2458 */
2459 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
2460 }
2461
2462 /*
2463 * Clear modify or reference bits.
2464 */
2465 {
2466 register int i = ptes_per_vm_page;
2467 do {
2468 *pte &= ~bits;
2469 } while (--i > 0);
2470 }
2471 simple_unlock(&pmap->lock);
2472 }
2473 }
2474
2475 pmap_phys_attributes[pai] &= ~bits;
2476
2477 PMAP_WRITE_UNLOCK(spl);
2478 }
2479
2480 /*
2481 * Check specified attribute bits.
2482 */
2483 boolean_t
2484 phys_attribute_test(phys, bits)
2485 vm_offset_t phys;
2486 int bits;
2487 {
2488 pv_entry_t pv_h;
2489 register pv_entry_t pv_e;
2490 register pt_entry_t *pte;
2491 int pai;
2492 register pmap_t pmap;
2493 int spl;
2494
2495 assert(phys != vm_page_fictitious_addr);
2496 if (!valid_page(phys)) {
2497 /*
2498 * Not a managed page.
2499 */
2500 return (FALSE);
2501 }
2502
2503 /*
2504 * Lock the pmap system first, since we will be checking
2505 * several pmaps.
2506 */
2507
2508 PMAP_WRITE_LOCK(spl);
2509
2510 pai = pa_index(phys);
2511 pv_h = pai_to_pvh(pai);
2512
2513 if (pmap_phys_attributes[pai] & bits) {
2514 PMAP_WRITE_UNLOCK(spl);
2515 return (TRUE);
2516 }
2517
2518 /*
2519 * Walk down PV list, checking all mappings.
2520 * We do not have to lock the pv_list because we have
2521 * the entire pmap system locked.
2522 */
2523 if (pv_h->pmap != PMAP_NULL) {
2524 /*
2525 * There are some mappings.
2526 */
2527 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2528
2529 pmap = pv_e->pmap;
2530 /*
2531 * Lock the pmap to block pmap_extract and similar routines.
2532 */
2533 simple_lock(&pmap->lock);
2534
2535 {
2536 register vm_offset_t va;
2537
2538 va = pv_e->va;
2539 pte = pmap_pte(pmap, va);
2540
2541 #if 0
2542 /*
2543 * Consistency checks.
2544 */
2545 assert(*pte & INTEL_PTE_VALID);
2546 /* assert(pte_to_phys(*pte) == phys); */
2547 #endif
2548 }
2549
2550 /*
2551 * Check modify or reference bits.
2552 */
2553 {
2554 register int i = ptes_per_vm_page;
2555
2556 do {
2557 if (*pte & bits) {
2558 simple_unlock(&pmap->lock);
2559 PMAP_WRITE_UNLOCK(spl);
2560 return (TRUE);
2561 }
2562 } while (--i > 0);
2563 }
2564 simple_unlock(&pmap->lock);
2565 }
2566 }
2567 PMAP_WRITE_UNLOCK(spl);
2568 return (FALSE);
2569 }
2570
2571 /*
2572 * Clear the modify bits on the specified physical page.
2573 */
2574
2575 void pmap_clear_modify(phys)
2576 register vm_offset_t phys;
2577 {
2578 phys_attribute_clear(phys, PHYS_MODIFIED);
2579 }
2580
2581 /*
2582 * pmap_is_modified:
2583 *
2584 * Return whether or not the specified physical page is modified
2585 * by any physical maps.
2586 */
2587
2588 boolean_t pmap_is_modified(phys)
2589 register vm_offset_t phys;
2590 {
2591 return (phys_attribute_test(phys, PHYS_MODIFIED));
2592 }
2593
2594 /*
2595 * pmap_clear_reference:
2596 *
2597 * Clear the reference bit on the specified physical page.
2598 */
2599
2600 void pmap_clear_reference(phys)
2601 vm_offset_t phys;
2602 {
2603 phys_attribute_clear(phys, PHYS_REFERENCED);
2604 }
2605
2606 /*
2607 * pmap_is_referenced:
2608 *
2609 * Return whether or not the specified physical page is referenced
2610 * by any physical maps.
2611 */
2612
2613 boolean_t pmap_is_referenced(phys)
2614 vm_offset_t phys;
2615 {
2616 return (phys_attribute_test(phys, PHYS_REFERENCED));
2617 }
2618
2619 #if NCPUS > 1
2620 /*
2621 * TLB Coherence Code (TLB "shootdown" code)
2622 *
2623 * Threads that belong to the same task share the same address space and
2624 * hence share a pmap. However, they may run on distinct cpus and thus
2625 * have distinct TLBs that cache page table entries. In order to guarantee
2626 * the TLBs are consistent, whenever a pmap is changed, all threads that
2627 * are active in that pmap must have their TLB updated. To keep track of
2628 * this information, the set of cpus that are currently using a pmap is
2629 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2630 * pmap_deactivate add and remove, respectively, a cpu from this set.
2631 * Since the TLBs are not addressable over the bus, each processor must
2632 * flush its own TLB; a processor that needs to invalidate another TLB
2633 * needs to interrupt the processor that owns that TLB to signal the
2634 * update.
2635 *
2636 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2637 * cpus using the pmap are signaled to invalidate. All threads that need
2638 * to activate a pmap must wait for the lock to clear to await any updates
2639 * in progress before using the pmap. They must ACQUIRE the lock to add
2640 * their cpu to the cpus_using set. An implicit assumption made
2641 * throughout the TLB code is that all kernel code that runs at or higher
2642 * than splvm blocks out update interrupts, and that such code does not
2643 * touch pageable pages.
2644 *
2645 * A shootdown interrupt serves another function besides signaling a
2646 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2647 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2648 * preventing user code from making implicit pmap updates while the
2649 * sending processor is performing its update. (This could happen via a
2650 * user data write reference that turns on the modify bit in the page
2651 * table). It must wait for any kernel updates that may have started
2652 * concurrently with a user pmap update because the IPC code
2653 * changes mappings.
2654 * Spinning on the VALUES of the locks is sufficient (rather than
2655 * having to acquire the locks) because any updates that occur subsequent
2656 * to finding the lock unlocked will be signaled via another interrupt.
2657 * (This assumes the interrupt is cleared before the low level interrupt code
2658 * calls pmap_update_interrupt()).
2659 *
2660 * The signaling processor must wait for any implicit updates in progress
2661 * to terminate before continuing with its update. Thus it must wait for an
2662 * acknowledgement of the interrupt from each processor for which such
2663 * references could be made. For maintaining this information, a set
2664 * cpus_active is used. A cpu is in this set if and only if it can
2665 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2666 * this set; when all such cpus are removed, it is safe to update.
2667 *
2668 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2669 * be at least at the priority of the interprocessor interrupt
2670 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2671 * kernel update; it would spin forever in pmap_update_interrupt() trying
2672 * to acquire the user pmap lock it had already acquired. Furthermore A
2673 * must remove itself from cpus_active. Otherwise, another cpu holding
2674 * the lock (B) could be in the process of sending an update signal to A,
2675 * and thus be waiting for A to remove itself from cpus_active. If A is
2676 * spinning on the lock at priority this will never happen and a deadlock
2677 * will result.
2678 */
2679
2680 /*
2681 * Signal another CPU that it must flush its TLB
2682 */
2683 void signal_cpus(use_list, pmap, start, end)
2684 cpu_set use_list;
2685 pmap_t pmap;
2686 vm_offset_t start, end;
2687 {
2688 register int which_cpu, j;
2689 register pmap_update_list_t update_list_p;
2690
2691 while ((which_cpu = ffs(use_list)) != 0) {
2692 which_cpu -= 1; /* convert to 0 origin */
2693
2694 update_list_p = &cpu_update_list[which_cpu];
2695 simple_lock(&update_list_p->lock);
2696
2697 j = update_list_p->count;
2698 if (j >= UPDATE_LIST_SIZE) {
2699 /*
2700 * list overflowed. Change last item to
2701 * indicate overflow.
2702 */
2703 update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
2704 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
2705 update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
2706 }
2707 else {
2708 update_list_p->item[j].pmap = pmap;
2709 update_list_p->item[j].start = start;
2710 update_list_p->item[j].end = end;
2711 update_list_p->count = j+1;
2712 }
2713 cpu_update_needed[which_cpu] = TRUE;
2714 simple_unlock(&update_list_p->lock);
2715
2716 if ((cpus_idle & (1 << which_cpu)) == 0)
2717 interrupt_processor(which_cpu);
2718 use_list &= ~(1 << which_cpu);
2719 }
2720 }
2721
2722 void process_pmap_updates(my_pmap)
2723 register pmap_t my_pmap;
2724 {
2725 register int my_cpu = cpu_number();
2726 register pmap_update_list_t update_list_p;
2727 register int j;
2728 register pmap_t pmap;
2729
2730 update_list_p = &cpu_update_list[my_cpu];
2731 simple_lock(&update_list_p->lock);
2732
2733 for (j = 0; j < update_list_p->count; j++) {
2734 pmap = update_list_p->item[j].pmap;
2735 if (pmap == my_pmap ||
2736 pmap == kernel_pmap) {
2737
2738 INVALIDATE_TLB(update_list_p->item[j].start,
2739 update_list_p->item[j].end);
2740 }
2741 }
2742 update_list_p->count = 0;
2743 cpu_update_needed[my_cpu] = FALSE;
2744 simple_unlock(&update_list_p->lock);
2745 }
2746
2747 /*
2748 * Interrupt routine for TBIA requested from other processor.
2749 */
2750 void pmap_update_interrupt()
2751 {
2752 register int my_cpu;
2753 register pmap_t my_pmap;
2754 int s;
2755
2756 my_cpu = cpu_number();
2757
2758 /*
2759 * Exit now if we're idle. We'll pick up the update request
2760 * when we go active, and we must not put ourselves back in
2761 * the active set because we'll never process the interrupt
2762 * while we're idle (thus hanging the system).
2763 */
2764 if (cpus_idle & (1 << my_cpu))
2765 return;
2766
2767 if (current_thread() == THREAD_NULL)
2768 my_pmap = kernel_pmap;
2769 else {
2770 my_pmap = current_pmap();
2771 if (!pmap_in_use(my_pmap, my_cpu))
2772 my_pmap = kernel_pmap;
2773 }
2774
2775 /*
2776 * Raise spl to splvm (above splip) to block out pmap_extract
2777 * from IO code (which would put this cpu back in the active
2778 * set).
2779 */
2780 s = splvm();
2781
2782 do {
2783
2784 /*
2785 * Indicate that we're not using either user or kernel
2786 * pmap.
2787 */
2788 i_bit_clear(my_cpu, &cpus_active);
2789
2790 /*
2791 * Wait for any pmap updates in progress, on either user
2792 * or kernel pmap.
2793 */
2794 while (*(volatile int *)&my_pmap->lock.lock_data ||
2795 *(volatile int *)&kernel_pmap->lock.lock_data)
2796 continue;
2797
2798 process_pmap_updates(my_pmap);
2799
2800 i_bit_set(my_cpu, &cpus_active);
2801
2802 } while (cpu_update_needed[my_cpu]);
2803
2804 splx(s);
2805 }
2806 #else NCPUS > 1
2807 /*
2808 * Dummy routine to satisfy external reference.
2809 */
2810 void pmap_update_interrupt()
2811 {
2812 /* should never be called. */
2813 }
2814 #endif NCPUS > 1
2815
2816 #if i860 /* akp */
2817 void set_dirbase(dirbase)
2818 register vm_offset_t dirbase;
2819 {
2820 /*flush();*/
2821 /*flush_tlb();*/
2822 flush_and_ctxsw(dirbase);
2823 }
2824 #endif i860
Cache object: 518310b800b9419352b4db155456b933
|