FreeBSD/Linux Kernel Cross Reference
sys/intel/pmap.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993,1992,1991,1990,1989,1988 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: pmap.c,v $
29 * Revision 2.19 93/11/17 16:55:03 dbg
30 * Cleaned up lint. ANSI-fied.
31 * [93/06/17 dbg]
32 *
33 * Revision 2.18 93/01/14 17:32:25 danner
34 * Lock pmap_object!
35 * [92/09/22 dbg]
36 *
37 * Revision 2.17 92/04/06 23:12:15 rpd
38 * It's time to let us map page 0.
39 * [92/04/03 rvb]
40 *
41 * Revision 2.16 92/04/01 19:32:48 rpd
42 * Removed pmap_remove_attributes.
43 * [92/03/25 rpd]
44 *
45 * Revision 2.15 92/01/14 16:44:01 rpd
46 * Removed pmap_list_resident_pages.
47 * [91/12/31 rpd]
48 *
49 * Revision 2.14 91/12/10 16:32:15 jsb
50 * Fixes from Intel
51 * [91/12/10 15:51:38 jsb]
52 *
53 * Revision 2.13 91/11/18 17:37:09 rvb
54 * Up morevm for NORMA.
55 *
56 * Revision 2.12 91/08/28 11:13:08 jsb
57 * From Intel SSD: add data cache flush in INVALIDATE_TLB to work around
58 * some more subtle unknown bug in page table caching; allow user access
59 * to various bits of I/O space?
60 * [91/08/26 18:27:04 jsb]
61 *
62 * Revision 2.11 91/06/17 15:45:52 jsb
63 * Fixed reference to XEOD_OFF_PH for i860ipsc dcm module.
64 * [91/06/17 10:43:40 jsb]
65 *
66 * Revision 2.10 91/06/06 17:05:03 jsb
67 * Defined SPLVM, SPLX as null (vs. splvm, splx) in uniprocessor case.
68 * [91/05/13 17:12:34 jsb]
69 *
70 * Revision 2.9 91/05/18 14:31:14 rpd
71 * Moved pmap_free_pages, pmap_next_page to a model-dependent file.
72 * [91/05/15 rpd]
73 *
74 * Make sure hole_start and hole_end are page-aligned.
75 * [91/05/01 rpd]
76 *
77 * Removed pmap_update.
78 * [91/04/12 rpd]
79 *
80 * Added inuse_ptepages_count.
81 * Added vm_page_fictitious_addr assertions.
82 * [91/04/10 rpd]
83 * Added check_simple_locks to pmap_expand.
84 * [91/03/31 rpd]
85 * Changed vm_page_init to vm_page_insert.
86 * Added pmap_free_pages, pmap_next_page, pmap_virtual_space.
87 * [91/03/25 rpd]
88 *
89 * Revision 2.8 91/05/14 16:30:24 mrt
90 * Correcting copyright
91 *
92 * Revision 2.7 91/05/08 12:46:31 dbg
93 * Add volatile declarations where needed.
94 * Move pmap_valid_page to model_dependent file.
95 * [91/04/26 14:41:31 dbg]
96 *
97 * Revision 2.6 91/03/16 14:47:31 rpd
98 * Removed some incorrect (?) assertions.
99 * [91/03/13 14:18:51 rpd]
100 *
101 * Updated for new kmem_alloc interface.
102 * [91/03/03 rpd]
103 * Added continuation argument to VM_PAGE_WAIT.
104 * [91/02/05 rpd]
105 *
106 * Revision 2.5 91/02/14 14:08:11 mrt
107 * Fixed pmap_expand to use vm_page_grab/VM_PAGE_WAIT.
108 * [91/01/12 rpd]
109 *
110 * Revision 2.4 91/02/05 17:20:34 mrt
111 * Changed to new Mach copyright
112 * [91/01/31 18:17:35 mrt]
113 *
114 * Revision 2.3 91/01/08 15:12:47 rpd
115 * Changed pmap_collect to ignore the kernel pmap.
116 * [91/01/03 rpd]
117 *
118 * Revision 2.2 90/12/04 14:50:28 jsb
119 * First checkin (for intel directory).
120 * [90/12/03 21:54:31 jsb]
121 *
122 * Revision 2.9 90/11/26 14:48:44 rvb
123 * Slight error in pmap_valid_page. Pages > last_addr
124 * must be invalid. (They are probably device buffers.)
125 * [90/11/23 10:00:56 rvb]
126 *
127 * Revision 2.8 90/11/24 15:14:47 jsb
128 * Replaced "0x1000" in pmap_valid_page with "first_addr".
129 * [90/11/24 11:49:04 jsb]
130 *
131 * Revision 2.7 90/11/05 14:27:27 rpd
132 * Replace (va < vm_first_phys || va > vm_last_phys) with test
133 * using valid page. Otherwise, video buffer memory is treated as
134 * valid memory and setting dirty bits leads to disasterous results.
135 * [90/11/05 rvb]
136 *
137 * Define pmap_valid_page: [0x1000..cnvmem * 1024) and
138 * [first_avail..)
139 * as useable memory
140 * [90/09/05 rvb]
141 *
142 * Revision 2.6 90/09/09 14:31:39 rpd
143 * Use decl_simple_lock_data.
144 * [90/08/30 rpd]
145 *
146 * Revision 2.5 90/08/06 15:07:05 rwd
147 * Fix bugs in pmap_remove, pmap_protect, phys_attribute routines.
148 * Allocate pte pages directly from vm_resident page list, via a
149 * pmap_object.
150 * [90/07/17 dbg]
151 *
152 * Revision 2.4 90/06/19 22:57:46 rpd
153 * Made MOREVM a variable; increased to 28 meg.
154 * Commented out pte_to_phys assertions.
155 * [90/06/04 rpd]
156 *
157 * Revision 2.3 90/06/02 14:48:40 rpd
158 * Added dummy pmap_list_resident_pages, under MACH_VM_DEBUG.
159 * [90/05/31 rpd]
160 *
161 * Revision 2.2 90/05/03 15:37:04 dbg
162 * Define separate Write and User bits instead of protection codes.
163 * Write-protect kernel data by invalidating it; the 386 ignores
164 * write permission in supervisor mode.
165 * [90/03/25 dbg]
166 *
167 * Fix pmap_collect to look for VA that maps page table page.
168 * Since page table pages are allocated with kmem_alloc, their
169 * virtual and physical addresses are not necessarily the same.
170 * Rewrite pmap_remove to skip address range when PDE is invalid.
171 * Combine pmap_remove_all and pmap_copy_on_write into pmap_page_protect.
172 * Add reference bits.
173 * [90/03/21 dbg]
174 *
175 * Fix for pure kernel. kpde and kptes are dynamically allocated
176 * by assembly code. Reverse CHIPBUG test (what was this, Bob?)
177 * [90/02/14 dbg]
178 *
179 * Revision 1.8.1.3 89/12/28 12:43:18 rvb
180 * v_avail gets phystokv(av_start), in case esym != end.
181 * [89/12/26 rvb]
182 *
183 * Revision 1.8.1.2 89/12/21 17:59:15 rvb
184 * Revision 1.11 89/11/27 22:54:27 kupfer
185 * kernacc() moved here from locore (from Lance).
186 *
187 * Revision 1.10 89/10/24 13:31:38 lance
188 * Eliminate the boot-time `pause that refreshes'
189 *
190 * Revision 1.8 89/09/20 17:26:47 rvb
191 * The OLIVETTI CACHE bug strikes again. I am leaving this code in
192 * as it for now so we can sync up. BUT all this stuff is going to
193 * be on a run time switch or a ifdef real soon.
194 * [89/09/20 rvb]
195 *
196 * Revision 1.7 89/07/17 10:38:18 rvb
197 * pmap_map_bd now flushes the tlb with a call to pmap_update.
198 * [Lance Berc]
199 *
200 * Revision 1.6 89/04/05 12:59:14 rvb
201 * Can not use zone anymore for directory, since alignment is not
202 * guaranteed. Besides the directory is a page.
203 * [89/03/30 rvb]
204 *
205 * Move extern out of function scope for gcc.
206 * [89/03/04 rvb]
207 *
208 * Revision 1.5 89/03/09 20:03:25 rpd
209 * More cleanup.
210 *
211 * Revision 1.4 89/02/26 12:33:06 gm0w
212 * Changes for cleanup.
213 *
214 * 31-Dec-88 Robert Baron (rvb) at Carnegie-Mellon University
215 * Derived from MACH2.0 vax release.
216 *
217 * 17-Jan-88 David Golub (dbg) at Carnegie-Mellon University
218 * Use cpus_idle, not the scheduler's cpu_idle, to determine when a
219 * cpu does not need to be interrupted. The two are not
220 * synchronized.
221 *
222 */
223
224 /*
225 * File: pmap.c
226 * Author: Avadis Tevanian, Jr., Michael Wayne Young
227 * (These guys wrote the Vax version)
228 *
229 * Physical Map management code for Intel i386, i486, and i860.
230 *
231 * Manages physical address maps.
232 *
233 * In addition to hardware address maps, this
234 * module is called upon to provide software-use-only
235 * maps which may or may not be stored in the same
236 * form as hardware maps. These pseudo-maps are
237 * used to store intermediate results from copy
238 * operations to and from address spaces.
239 *
240 * Since the information managed by this module is
241 * also stored by the logical address mapping module,
242 * this module may throw away valid virtual-to-physical
243 * mappings at almost any time. However, invalidations
244 * of virtual-to-physical mappings must be done as
245 * requested.
246 *
247 * In order to cope with hardware architectures which
248 * make virtual-to-physical map invalidates expensive,
249 * this module may delay invalidate or reduced protection
250 * operations until such time as they are actually
251 * necessary. This module is given full information as
252 * to which processors are currently using which maps,
253 * and to when physical maps must be made correct.
254 */
255
256 #include <cpus.h>
257
258 #include <mach/machine/vm_types.h>
259
260 #include <mach/boolean.h>
261
262 #include <kern/kern_io.h>
263 #include <kern/memory.h>
264 #include <kern/thread.h>
265 #include <kern/zalloc.h>
266
267 #include <kern/lock.h>
268
269 #include <vm/pmap.h>
270 #include <vm/vm_map.h>
271 #include <vm/vm_kern.h>
272 #include <mach/vm_param.h>
273 #include <mach/vm_prot.h>
274 #include <vm/vm_object.h>
275 #include <vm/vm_page.h>
276 #include <vm/vm_user.h>
277
278 #include <mach/machine/vm_param.h>
279 #include <machine/thread.h>
280 #if i860
281 #include <i860ipsc/nodehw.h>
282 #endif
283
284 #ifdef ORC
285 #define OLIVETTICACHE 1
286 #endif /* ORC */
287
288 #ifndef OLIVETTICACHE
289 #define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry);
290 #define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry);
291 #else /* OLIVETTICACHE */
292
293 /* This gross kludgery is needed for Olivetti XP7 & XP9 boxes to get
294 * around an apparent hardware bug. Other than at startup it doesn't
295 * affect run-time performacne very much, so we leave it in for all
296 * machines.
297 */
298 extern unsigned *pstart();
299 #define CACHE_LINE 8
300 #define CACHE_SIZE 512
301 #define CACHE_PAGE 0x1000;
302
303 #define WRITE_PTE(pte_p, pte_entry) { write_pte(pte_p, pte_entry); }
304
305 void write_pte(
306 pt_entry_t *pte_p,
307 pt_entry_t pte_entry)
308 {
309 unsigned long count;
310 volatile unsigned long hold, *addr1, *addr2;
311
312 if ( pte_entry != *pte_p )
313 *pte_p = pte_entry;
314 else {
315 /* This isn't necessarily the optimal algorithm */
316 addr1 = (unsigned long *)pstart;
317 for (count = 0; count < CACHE_SIZE; count++) {
318 addr2 = addr1 + CACHE_PAGE;
319 hold = *addr1; /* clear cache bank - A - */
320 hold = *addr2; /* clear cache bank - B - */
321 addr1 += CACHE_LINE;
322 }
323 }
324 }
325
326 #define WRITE_PTE_FAST(pte_p, pte_entry)*pte_p = pte_entry;
327
328 #endif /* OLIVETTICACHE */
329
330 /*
331 * Private data structures.
332 */
333
334 /*
335 * For each vm_page_t, there is a list of all currently
336 * valid virtual mappings of that page. An entry is
337 * a pv_entry_t; the list is the pv_table.
338 */
339
340 typedef struct pv_entry {
341 struct pv_entry *next; /* next pv_entry */
342 pmap_t pmap; /* pmap where mapping lies */
343 vm_offset_t va; /* virtual address for mapping */
344 } *pv_entry_t;
345
346 #define PV_ENTRY_NULL ((pv_entry_t) 0)
347
348 pv_entry_t pv_head_table; /* array of entries, one per page */
349
350 /*
351 * pv_list entries are kept on a list that can only be accessed
352 * with the pmap system locked (at SPLVM, not in the cpus_active set).
353 * The list is refilled from the pv_list_zone if it becomes empty.
354 */
355 pv_entry_t pv_free_list; /* free list at SPLVM */
356 decl_simple_lock_data(, pv_free_list_lock)
357
358 #define PV_ALLOC(pv_e) { \
359 simple_lock(&pv_free_list_lock); \
360 if ((pv_e = pv_free_list) != 0) { \
361 pv_free_list = pv_e->next; \
362 } \
363 simple_unlock(&pv_free_list_lock); \
364 }
365
366 #define PV_FREE(pv_e) { \
367 simple_lock(&pv_free_list_lock); \
368 pv_e->next = pv_free_list; \
369 pv_free_list = pv_e; \
370 simple_unlock(&pv_free_list_lock); \
371 }
372
373 zone_t pv_list_zone; /* zone of pv_entry structures */
374
375 /*
376 * Each entry in the pv_head_table is locked by a bit in the
377 * pv_lock_table. The lock bits are accessed by the physical
378 * address of the page they lock.
379 */
380
381 char *pv_lock_table; /* pointer to array of bits */
382 #define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
383
384 /*
385 * First and last physical addresses that we maintain any information
386 * for. Initialized to zero so that pmap operations done before
387 * pmap_init won't touch any non-existent structures.
388 */
389 vm_offset_t vm_first_phys = (vm_offset_t) 0;
390 vm_offset_t vm_last_phys = (vm_offset_t) 0;
391 boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */
392
393 /*
394 * Index into pv_head table, its lock bits, and the modify/reference
395 * bits starting at vm_first_phys.
396 */
397
398 #define pa_index(pa) (atop(pa - vm_first_phys))
399
400 #define pai_to_pvh(pai) (&pv_head_table[pai])
401 #define lock_pvh_pai(pai) (bit_lock(pai, pv_lock_table))
402 #define unlock_pvh_pai(pai) (bit_unlock(pai, pv_lock_table))
403
404 /*
405 * Array of physical page attribites for managed pages.
406 * One byte per physical page.
407 */
408 char *pmap_phys_attributes;
409
410 /*
411 * Physical page attributes. Copy bits from PTE definition.
412 */
413 #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
414 #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
415
416 /*
417 * Amount of virtual memory mapped by one
418 * page-directory entry.
419 */
420 #define PDE_MAPPED_SIZE (pdetova(1))
421
422 /*
423 * We allocate page table pages directly from the VM system
424 * through this object. It maps physical memory.
425 */
426 vm_object_t pmap_object = VM_OBJECT_NULL;
427
428 /*
429 * Locking and TLB invalidation
430 */
431
432 /*
433 * Locking Protocols:
434 *
435 * There are two structures in the pmap module that need locking:
436 * the pmaps themselves, and the per-page pv_lists (which are locked
437 * by locking the pv_lock_table entry that corresponds to the pv_head
438 * for the list in question.) Most routines want to lock a pmap and
439 * then do operations in it that require pv_list locking -- however
440 * pmap_remove_all and pmap_copy_on_write operate on a physical page
441 * basis and want to do the locking in the reverse order, i.e. lock
442 * a pv_list and then go through all the pmaps referenced by that list.
443 * To protect against deadlock between these two cases, the pmap_lock
444 * is used. There are three different locking protocols as a result:
445 *
446 * 1. pmap operations only (pmap_extract, pmap_access, ...) Lock only
447 * the pmap.
448 *
449 * 2. pmap-based operations (pmap_enter, pmap_remove, ...) Get a read
450 * lock on the pmap_lock (shared read), then lock the pmap
451 * and finally the pv_lists as needed [i.e. pmap lock before
452 * pv_list lock.]
453 *
454 * 3. pv_list-based operations (pmap_remove_all, pmap_copy_on_write, ...)
455 * Get a write lock on the pmap_lock (exclusive write); this
456 * also guaranteees exclusive access to the pv_lists. Lock the
457 * pmaps as needed.
458 *
459 * At no time may any routine hold more than one pmap lock or more than
460 * one pv_list lock. Because interrupt level routines can allocate
461 * mbufs and cause pmap_enter's, the pmap_lock and the lock on the
462 * kernel_pmap can only be held at splvm.
463 */
464
465 #if NCPUS > 1
466 /*
467 * We raise the interrupt level to splvm, to block interprocessor
468 * interrupts during pmap operations. We must take the CPU out of
469 * the cpus_active set while interrupts are blocked.
470 */
471 #define SPLVM(spl) { \
472 spl = splvm(); \
473 i_bit_clear(cpu_number(), &cpus_active); \
474 }
475
476 #define SPLX(spl) { \
477 i_bit_set(cpu_number(), &cpus_active); \
478 splx(spl); \
479 }
480
481 /*
482 * Lock on pmap system
483 */
484 lock_data_t pmap_system_lock;
485
486 #define PMAP_READ_LOCK(pmap, spl) { \
487 SPLVM(spl); \
488 lock_read(&pmap_system_lock); \
489 simple_lock(&(pmap)->lock); \
490 }
491
492 #define PMAP_WRITE_LOCK(spl) { \
493 SPLVM(spl); \
494 lock_write(&pmap_system_lock); \
495 }
496
497 #define PMAP_READ_UNLOCK(pmap, spl) { \
498 simple_unlock(&(pmap)->lock); \
499 lock_read_done(&pmap_system_lock); \
500 SPLX(spl); \
501 }
502
503 #define PMAP_WRITE_UNLOCK(spl) { \
504 lock_write_done(&pmap_system_lock); \
505 SPLX(spl); \
506 }
507
508 #define PMAP_WRITE_TO_READ_LOCK(pmap) { \
509 simple_lock(&(pmap)->lock); \
510 lock_write_to_read(&pmap_system_lock); \
511 }
512
513 #define LOCK_PVH(index) (lock_pvh_pai(index))
514
515 #define UNLOCK_PVH(index) (unlock_pvh_pai(index))
516
517 #define PMAP_UPDATE_TLBS(pmap, s, e) \
518 { \
519 cpu_set cpu_mask = 1 << cpu_number(); \
520 cpu_set users; \
521 \
522 /* Since the pmap is locked, other updates are locked */ \
523 /* out, and any pmap_activate has finished. */ \
524 \
525 /* find other cpus using the pmap */ \
526 users = (pmap)->cpus_using & ~cpu_mask; \
527 if (users) { \
528 /* signal them, and wait for them to finish */ \
529 /* using the pmap */ \
530 signal_cpus(users, (pmap), (s), (e)); \
531 while ((pmap)->cpus_using & cpus_active & ~cpu_mask) \
532 continue; \
533 } \
534 \
535 /* invalidate our own TLB if pmap is in use */ \
536 if ((pmap)->cpus_using & cpu_mask) { \
537 INVALIDATE_TLB((s), (e)); \
538 } \
539 }
540
541 #else /* NCPUS > 1 */
542
543 #define SPLVM(spl)
544 #define SPLX(spl)
545
546 #define PMAP_READ_LOCK(pmap, spl) SPLVM(spl)
547 #define PMAP_WRITE_LOCK(spl) SPLVM(spl)
548 #define PMAP_READ_UNLOCK(pmap, spl) SPLX(spl)
549 #define PMAP_WRITE_UNLOCK(spl) SPLX(spl)
550 #define PMAP_WRITE_TO_READ_LOCK(pmap)
551
552 #define LOCK_PVH(index)
553 #define UNLOCK_PVH(index)
554
555 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
556 /* invalidate our own TLB if pmap is in use */ \
557 if ((pmap)->cpus_using) { \
558 INVALIDATE_TLB((s), (e)); \
559 } \
560 }
561
562 #endif /* NCPUS > 1 */
563
564 #define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */
565
566 #if i860
567 /* Do a data cache flush until we find the caching bug XXX prp */
568 #define INVALIDATE_TLB(s, e) { \
569 flush(); \
570 flush_tlb(); \
571 }
572 #else /* i860 */
573 #define INVALIDATE_TLB(s, e) { \
574 flush_tlb(); \
575 }
576 #endif /* i860 */
577
578
579 #if NCPUS > 1
580 /*
581 * Structures to keep track of pending TLB invalidations
582 */
583
584 #define UPDATE_LIST_SIZE 4
585
586 struct pmap_update_item {
587 pmap_t pmap; /* pmap to invalidate */
588 vm_offset_t start; /* start address to invalidate */
589 vm_offset_t end; /* end address to invalidate */
590 } ;
591
592 typedef struct pmap_update_item *pmap_update_item_t;
593
594 /*
595 * List of pmap updates. If the list overflows,
596 * the last entry is changed to invalidate all.
597 */
598 struct pmap_update_list {
599 decl_simple_lock_data(, lock)
600 int count;
601 struct pmap_update_item item[UPDATE_LIST_SIZE];
602 } ;
603 typedef struct pmap_update_list *pmap_update_list_t;
604
605 struct pmap_update_list cpu_update_list[NCPUS];
606
607 /*
608 * List of cpus that are actively using mapped memory. Any
609 * pmap update operation must wait for all cpus in this list.
610 * Update operations must still be queued to cpus not in this
611 * list.
612 */
613 cpu_set cpus_active;
614
615 /*
616 * List of cpus that are idle, but still operating, and will want
617 * to see any kernel pmap updates when they become active.
618 */
619 cpu_set cpus_idle;
620
621 /*
622 * Quick test for pmap update requests.
623 */
624 volatile boolean_t cpu_update_needed[NCPUS];
625
626 #endif /* NCPUS > 1 */
627
628 /*
629 * Other useful macros.
630 */
631 #define current_pmap() (vm_map_pmap(current_thread()->task->map))
632 #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0)
633
634 struct pmap kernel_pmap_store;
635 pmap_t kernel_pmap;
636
637 struct zone *pmap_zone; /* zone of pmap structures */
638
639 int pmap_debug = 0; /* flag for debugging prints */
640 int ptes_per_vm_page; /* number of hardware ptes needed
641 to map one VM page. */
642 unsigned int inuse_ptepages_count = 0; /* debugging */
643
644 extern char end;
645 /*
646 * Page directory for kernel.
647 */
648 pt_entry_t *kpde = 0; /* set by start.s - keep out of bss */
649 #if i860
650 extern pt_entry_t kpde_page;
651 #endif
652
653 #if NCPUS > 1
654 void signal_cpus(cpu_set, pmap_t, vm_offset_t, vm_offset_t);
655 /* forward */
656 #endif /* NCPUS > 1 */
657
658 #if i860
659 /*
660 * Paging flag
661 */
662 int paging_enabled = 0;
663 #endif
664
665 /*
666 * Given an offset and a map, compute the address of the
667 * pte. If the address is invalid with respect to the map
668 * then PT_ENTRY_NULL is returned (and the map may need to grow).
669 *
670 * This is only used internally.
671 */
672 pt_entry_t *pmap_pte(
673 register pmap_t pmap,
674 register vm_offset_t addr)
675 {
676 #if i860
677 pt_entry_t *ptp;
678 #else
679 register pt_entry_t *ptp;
680 #endif
681 register pt_entry_t pte;
682
683 if (pmap->dirbase == 0)
684 return(PT_ENTRY_NULL);
685 pte = pmap->dirbase[pdenum(addr)];
686 if ((pte & INTEL_PTE_VALID) == 0)
687 return(PT_ENTRY_NULL);
688 ptp = (pt_entry_t *)ptetokv(pte);
689 return &ptp[ptenum(addr)];
690
691 }
692
693 #define pmap_pde(pmap, addr) (&(pmap)->dirbase[pdenum(addr)])
694
695 #define DEBUG_PTE_PAGE 0
696
697 #if DEBUG_PTE_PAGE
698 void ptep_check(
699 ptep_t ptep)
700 {
701 register pt_entry_t *pte, *epte;
702 int ctu, ctw;
703
704 /* check the use and wired counts */
705 if (ptep == PTE_PAGE_NULL)
706 return;
707 pte = pmap_pte(ptep->pmap, ptep->va);
708 epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t);
709 ctu = 0;
710 ctw = 0;
711 while (pte < epte) {
712 if (pte->pfn != 0) {
713 ctu++;
714 if (pte->wired)
715 ctw++;
716 }
717 pte += ptes_per_vm_page;
718 }
719
720 if (ctu != ptep->use_count || ctw != ptep->wired_count) {
721 printf("use %d wired %d - actual use %d wired %d\n",
722 ptep->use_count, ptep->wired_count, ctu, ctw);
723 panic("pte count");
724 }
725 }
726 #endif /* DEBUG_PTE_PAGE */
727
728 /*
729 * Map memory at initialization. The physical addresses being
730 * mapped are not managed and are never unmapped.
731 *
732 * For now, VM is already on, we only need to map the
733 * specified memory.
734 */
735 vm_offset_t pmap_map(
736 register vm_offset_t virt,
737 register vm_offset_t start,
738 register vm_offset_t end,
739 register int prot)
740 {
741 register int ps;
742
743 ps = PAGE_SIZE;
744 while (start < end) {
745 pmap_enter(kernel_pmap, virt, start, prot, FALSE);
746 virt += ps;
747 start += ps;
748 }
749 return virt;
750 }
751
752 /*
753 * Back-door routine for mapping kernel VM at initialization.
754 * Useful for mapping memory outside the range
755 * [vm_first_phys, vm_last_phys) (i.e., devices).
756 * Otherwise like pmap_map.
757 #if i860
758 * Sets no-cache bit.
759 #endif
760 */
761 vm_offset_t pmap_map_bd(
762 register vm_offset_t virt,
763 register vm_offset_t start,
764 register vm_offset_t end,
765 vm_prot_t prot)
766 {
767 register pt_entry_t template;
768 register pt_entry_t *pte;
769
770 template = pa_to_pte(start)
771 #if i860
772 | INTEL_PTE_NCACHE
773 #endif
774 | INTEL_PTE_VALID;
775 if (prot & VM_PROT_WRITE)
776 template |= INTEL_PTE_WRITE;
777
778 while (start < end) {
779 pte = pmap_pte(kernel_pmap, virt);
780 if (pte == PT_ENTRY_NULL)
781 panic("pmap_map_bd: Invalid kernel address\n");
782 WRITE_PTE_FAST(pte, template)
783 pte_increment_pa(template);
784 virt += PAGE_SIZE;
785 start += PAGE_SIZE;
786 }
787 return virt;
788 }
789
790 extern int cnvmem;
791 extern char *first_avail;
792 extern vm_offset_t virtual_avail, virtual_end;
793 extern vm_offset_t avail_start, avail_end;
794
795 /*
796 * Bootstrap the system enough to run with virtual memory.
797 * Map the kernel's code and data, and allocate the system page table.
798 * Called with mapping OFF. Page_size must already be set.
799 *
800 * Parameters:
801 * load_start: PA where kernel was loaded
802 * avail_start PA of first available physical page -
803 * after kernel page tables
804 * avail_end PA of last available physical page
805 * virtual_avail VA of first available page -
806 * after kernel page tables
807 * virtual_end VA of last available page -
808 * end of kernel address space
809 *
810 * &start_text start of kernel text
811 * &etext end of kernel text
812 */
813
814 vm_size_t morevm = 40 * 1024 * 1024; /* VM space for kernel map */
815
816 void pmap_bootstrap(
817 vm_offset_t load_start)
818 {
819 vm_offset_t va, tva;
820 pt_entry_t template;
821 pt_entry_t *pde, *pte, *ptend;
822 #if i860
823 vm_offset_t sva;
824 pt_entry_t *pt_pte, *tpt;
825 pt_entry_t *ppde, *ppte;
826
827 /*
828 * Mapping is turned OFF, we must reference only physical addresses.
829 * The load image of the system is to be mapped 1-1 physical = virtual.
830 *
831 * This code will only work if VM_MIN_KERNEL_ADDRESS
832 * equals PHYS_RAM_ADDRESS.
833 */
834 #endif
835
836 /*
837 * Set ptes_per_vm_page for general use.
838 */
839 ptes_per_vm_page = page_size / INTEL_PGBYTES;
840
841 /*
842 * The kernel's pmap is statically allocated so we don't
843 * have to use pmap_create, which is unlikely to work
844 * correctly at this part of the boot sequence.
845 */
846
847 kernel_pmap = &kernel_pmap_store;
848
849 #if NCPUS > 1
850 lock_init(&pmap_system_lock, FALSE); /* NOT a sleep lock */
851 #endif /* NCPUS > 1 */
852
853 simple_lock_init(&kernel_pmap->lock);
854
855 kernel_pmap->ref_count = 1;
856
857 /*
858 * The kernel page directory has been allocated;
859 * its virtual address is in kpde.
860 *
861 #if i860
862 * No kernel page table pages have been allocated
863 #else
864 * Enough kernel page table pages have been allocated
865 #endif
866 * to map low system memory, kernel text, kernel data/bss,
867 * kdb's symbols, and the page directory and page tables.
868 *
869 * No other physical memory has been allocated.
870 */
871 #if i860
872 kpde = &kpde_page;
873 kernel_pmap->dirbase = kpde;
874 #endif
875
876 /*
877 * Start mapping virtual memory to physical memory, 1-1,
878 #if i860
879 * from load point to end of memory,
880 * virtual = physical.
881 #else
882 * at end of mapped memory.
883 #endif
884 */
885 virtual_avail = phystokv(avail_start);
886 virtual_end = phystokv(avail_end);
887
888 #if i860
889 bzero(kpde, INTEL_PGBYTES);
890 #endif
891 pde = kpde;
892 #if i860
893 pde += pdenum(load_start);
894 pte = 0; ptend = 0;
895 #else
896 pde += pdenum(virtual_avail);
897 if (pte_to_pa(*pde) == 0) {
898 /* This pte has not been allocated */
899 pte = 0; ptend = 0;
900 }
901 else {
902 pte = (pt_entry_t *)ptetokv(*pde);
903 /* first pte of page */
904 ptend = pte+NPTES; /* last pte of page */
905 pte += ptenum(virtual_avail); /* point to pte that
906 maps first avail VA */
907 pde++; /* point pde to first empty slot */
908 }
909 #endif
910
911 #if i860
912 template = pa_to_pte(load_start)
913 #else
914 template = pa_to_pte(avail_start)
915 #endif
916 | INTEL_PTE_VALID | INTEL_PTE_WRITE;
917
918 #if i860
919 tva = virtual_end;
920 sva = virtual_avail;
921 for (va = load_start; va < tva; va += INTEL_PGBYTES) {
922 #else
923 for (va = virtual_avail; va < virtual_end; va += INTEL_PGBYTES) {
924 #endif
925 if (pte >= ptend) {
926 pte = (pt_entry_t *)virtual_avail;
927 ptend = pte + NPTES;
928 virtual_avail = (vm_offset_t)ptend;
929 #if i860
930 *pde = pa_to_pte((vm_offset_t)pte)
931 #else
932 *pde = pa_to_pte((vm_offset_t)pte - VM_MIN_KERNEL_ADDRESS)
933 #endif
934 | INTEL_PTE_VALID
935 | INTEL_PTE_WRITE;
936 pde++;
937 }
938 WRITE_PTE_FAST(pte, template)
939 pte++;
940 pte_increment_pa(template);
941 }
942 #if i860
943 /* kvtophys should now work in phys range */
944
945 /*
946 * Mark page table pages non-cacheable
947 */
948
949 pt_pte = (pt_entry_t *)pte_to_pa(*(kpde + pdenum(sva))) + ptenum(sva);
950
951 for (va = load_start; va < tva; va += INTEL_PGBYTES*NPTES) {
952 /* Mark page table non-cacheable */
953 *pt_pte |= INTEL_PTE_NCACHE;
954 pt_pte++;
955 }
956
957 /*
958 * Map I/O space
959 */
960
961 ppde = kpde;
962 ppde += pdenum(IO_BASE);
963
964 if (pte_to_pa(*ppde) == 0) {
965 /* This pte has not been allocated */
966 ppte = (pt_entry_t *)kvtophys(virtual_avail);
967 ptend = ppte + NPTES;
968 virtual_avail = phystokv((vm_offset_t)ptend);
969 *ppde = pa_to_pte((vm_offset_t)ppte)
970 | INTEL_PTE_VALID
971 | INTEL_PTE_WRITE;
972 pte = ptend;
973
974 /* Mark page table non-cacheable */
975 *pt_pte |= INTEL_PTE_NCACHE;
976 pt_pte++;
977
978 bzero(ppte, INTEL_PGBYTES);
979 } else {
980 ppte = (pt_entry_t *)(*ppde); /* first pte of page */
981 }
982 *ppde |= INTEL_PTE_USER;
983
984
985 WRITE_PTE(ppte + ptenum(FIFO_ADDR),
986 pa_to_pte(FIFO_ADDR_PH)
987 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
988
989 WRITE_PTE(ppte + ptenum(FIFO_ADDR + XEOD_OFF),
990 pa_to_pte(FIFO_ADDR_PH + XEOD_OFF_PH)
991 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
992
993 /* XXX Allowed user access to control reg - cfj */
994 WRITE_PTE(ppte + ptenum(CSR_ADDR),
995 pa_to_pte(CSR_ADDR_PH)
996 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE | INTEL_PTE_USER);
997
998 /* XXX Allowed user access to perf reg - cfj */
999 WRITE_PTE(ppte + ptenum(PERFCNT_ADDR),
1000 pa_to_pte(PERFCNT_ADDR_PH)
1001 | INTEL_PTE_VALID | INTEL_PTE_USER | INTEL_PTE_NCACHE | INTEL_PTE_USER);
1002
1003 WRITE_PTE(ppte + ptenum(UART_ADDR),
1004 pa_to_pte(UART_ADDR_PH)
1005 | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_NCACHE);
1006
1007 WRITE_PTE(ppte + ptenum(0xFFFFF000),
1008 pa_to_pte(avail_end)
1009 | INTEL_PTE_VALID | INTEL_PTE_WRITE);
1010 avail_start = kvtophys(virtual_avail);
1011 #else
1012 avail_start = virtual_avail - VM_MIN_KERNEL_ADDRESS;
1013 #endif
1014
1015 /*
1016 * startup requires additional virtual memory (for tables, buffers,
1017 * etc.). The kd driver may also require some of that memory to
1018 * access the graphics board.
1019 *
1020 */
1021 *(int *)&template = 0;
1022 virtual_end += morevm;
1023 for (tva = va; tva < virtual_end; tva += INTEL_PGBYTES) {
1024 if (pte >= ptend) {
1025 #if i860
1026 pte = (pt_entry_t *)kvtophys(virtual_avail);
1027 #else
1028 pte = (pt_entry_t *)virtual_avail;
1029 #endif
1030 ptend = pte + NPTES;
1031 #if i860
1032 virtual_avail = phystokv((vm_offset_t)ptend);
1033 #else
1034 virtual_avail = (vm_offset_t)ptend;
1035 #endif
1036 avail_start += INTEL_PGBYTES;
1037 #if i860
1038 *pde = pa_to_pte((vm_offset_t)pte)
1039 #else
1040 *pde = pa_to_pte((vm_offset_t)pte - VM_MIN_KERNEL_ADDRESS)
1041 #endif
1042 | INTEL_PTE_VALID
1043 | INTEL_PTE_WRITE;
1044 pde++;
1045 #if i860
1046 /* Mark page table non-cacheable */
1047 *pt_pte |= INTEL_PTE_NCACHE;
1048 pt_pte++;
1049 #endif
1050 }
1051 WRITE_PTE_FAST(pte, template)
1052 pte++;
1053 }
1054 virtual_avail = va;
1055 /*
1056 * c.f. comment above
1057 *
1058 */
1059 virtual_end = va + morevm;
1060 while (pte < ptend)
1061 *pte++ = 0;
1062 /*
1063 * invalidate virtual addresses at 0
1064 */
1065 kpde[0] = 0;
1066 #if i860
1067 #else
1068 kernel_pmap->dirbase = kpde;
1069 #endif
1070 printf("Kernel virtual space from 0x%x to 0x%x.\n",
1071 #if i860
1072 sva, virtual_end);
1073 #else
1074 VM_MIN_KERNEL_ADDRESS, virtual_end);
1075 #endif
1076 printf("Available physical space from 0x%x to 0x%x\n",
1077 avail_start, avail_end);
1078 #if i860
1079 /*
1080 * Turn on mapping
1081 */
1082
1083 flush_and_ctxsw(kernel_pmap->dirbase);
1084 paging_enabled = 1;
1085
1086 printf("Paging enabled.\n");
1087 #endif
1088 }
1089
1090 void pmap_virtual_space(
1091 vm_offset_t *startp,
1092 vm_offset_t *endp)
1093 {
1094 *startp = virtual_avail;
1095 *endp = virtual_end;
1096 }
1097
1098 /*
1099 * Initialize the pmap module.
1100 * Called by vm_init, to initialize any structures that the pmap
1101 * system needs to map virtual memory.
1102 */
1103 void pmap_init(void)
1104 {
1105 register long npages;
1106 vm_offset_t addr;
1107 register vm_size_t s;
1108 #if NCPUS > 1
1109 int i;
1110 #endif
1111
1112 /*
1113 * Allocate memory for the pv_head_table and its lock bits,
1114 * the modify bit array, and the pte_page table.
1115 */
1116
1117 npages = atop(avail_end - avail_start);
1118 s = (vm_size_t) (sizeof(struct pv_entry) * npages
1119 + pv_lock_table_size(npages)
1120 + npages);
1121
1122 s = round_page(s);
1123 if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS)
1124 panic("pmap_init");
1125 bzero((void *) addr, s);
1126
1127 /*
1128 * Allocate the structures first to preserve word-alignment.
1129 */
1130 pv_head_table = (pv_entry_t) addr;
1131 addr = (vm_offset_t) (pv_head_table + npages);
1132
1133 pv_lock_table = (char *) addr;
1134 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages));
1135
1136 pmap_phys_attributes = (char *) addr;
1137
1138 /*
1139 * Create the zone of physical maps,
1140 * and of the physical-to-virtual entries.
1141 */
1142 s = (vm_size_t) sizeof(struct pmap);
1143 pmap_zone = zinit(s, 400*s, 4096, FALSE, "pmap"); /* XXX */
1144 s = (vm_size_t) sizeof(struct pv_entry);
1145 pv_list_zone = zinit(s, 10000*s, 4096, FALSE, "pv_list"); /* XXX */
1146
1147 #if NCPUS > 1
1148 /*
1149 * Set up the pmap request lists
1150 */
1151 for (i = 0; i < NCPUS; i++) {
1152 pmap_update_list_t up = &cpu_update_list[i];
1153
1154 simple_lock_init(&up->lock);
1155 up->count = 0;
1156 }
1157 #endif /* NCPUS > 1 */
1158
1159 /*
1160 * Only now, when all of the data structures are allocated,
1161 * can we set vm_first_phys and vm_last_phys. If we set them
1162 * too soon, the kmem_alloc_wired above will try to use these
1163 * data structures and blow up.
1164 */
1165
1166 vm_first_phys = avail_start;
1167 vm_last_phys = avail_end;
1168 pmap_initialized = TRUE;
1169 }
1170
1171 extern boolean_t pmap_valid_page(vm_offset_t pa);
1172
1173 #define valid_page(x) (pmap_initialized && pmap_valid_page(x))
1174
1175 boolean_t pmap_verify_free(
1176 vm_offset_t phys)
1177 {
1178 pv_entry_t pv_h;
1179 int pai;
1180 boolean_t result;
1181 #if NCPUS > 1
1182 spl_t spl;
1183 #endif
1184
1185 assert(phys != vm_page_fictitious_addr);
1186 if (!pmap_initialized)
1187 return TRUE;
1188
1189 if (!pmap_valid_page(phys))
1190 return FALSE;
1191
1192 PMAP_WRITE_LOCK(spl);
1193
1194 pai = pa_index(phys);
1195 pv_h = pai_to_pvh(pai);
1196
1197 result = (pv_h->pmap == PMAP_NULL);
1198 PMAP_WRITE_UNLOCK(spl);
1199
1200 return result;
1201 }
1202
1203 /*
1204 * Routine: pmap_page_table_page_alloc
1205 *
1206 * Allocates a new physical page to be used as a page-table page.
1207 *
1208 * Must be called with the pmap system and the pmap unlocked,
1209 * since these must be unlocked to use vm_page_grab.
1210 */
1211 vm_offset_t
1212 pmap_page_table_page_alloc(void)
1213 {
1214 register vm_page_t m;
1215 register vm_offset_t pa;
1216
1217 check_simple_locks();
1218
1219 /*
1220 * We cannot allocate the pmap_object in pmap_init,
1221 * because it is called before the zone package is up.
1222 * Allocate it now if it is missing.
1223 */
1224 if (pmap_object == VM_OBJECT_NULL)
1225 pmap_object = vm_object_allocate(mem_size);
1226
1227 /*
1228 * Allocate a VM page for the level 2 page table entries.
1229 */
1230 while ((m = vm_page_grab()) == VM_PAGE_NULL)
1231 VM_PAGE_WAIT(CONTINUE_NULL);
1232
1233 /*
1234 * Map the page to its physical address so that it
1235 * can be found later.
1236 */
1237 pa = m->phys_addr;
1238 vm_object_lock(pmap_object);
1239 vm_page_insert(m, pmap_object, pa);
1240 vm_page_lock_queues();
1241 vm_page_wire(m);
1242 inuse_ptepages_count++;
1243 vm_page_unlock_queues();
1244 vm_object_unlock(pmap_object);
1245
1246 /*
1247 * Zero the page.
1248 */
1249 bzero((void *) phystokv(pa), PAGE_SIZE);
1250
1251 #if i860
1252 /*
1253 * Mark the page table page(s) non-cacheable.
1254 */
1255 {
1256 int i = ptes_per_vm_page;
1257 pt_entry_t *pdp;
1258
1259 pdp = pmap_pte(kernel_pmap, pa);
1260 do {
1261 *pdp |= INTEL_PTE_NCACHE;
1262 pdp++;
1263 } while (--i > 0);
1264 }
1265 #endif
1266 return pa;
1267 }
1268
1269 /*
1270 * Deallocate a page-table page.
1271 * The page-table page must have all mappings removed,
1272 * and be removed from its page directory.
1273 */
1274 void
1275 pmap_page_table_page_dealloc(
1276 vm_offset_t pa)
1277 {
1278 vm_page_t m;
1279
1280 vm_object_lock(pmap_object);
1281 m = vm_page_lookup(pmap_object, pa);
1282 vm_page_lock_queues();
1283 vm_page_free(m);
1284 inuse_ptepages_count--;
1285 vm_page_unlock_queues();
1286 vm_object_unlock(pmap_object);
1287 }
1288
1289 /*
1290 * Create and return a physical map.
1291 *
1292 * If the size specified for the map
1293 * is zero, the map is an actual physical
1294 * map, and may be referenced by the
1295 * hardware.
1296 *
1297 * If the size specified is non-zero,
1298 * the map will be used in software only, and
1299 * is bounded by that size.
1300 */
1301 pmap_t pmap_create(
1302 vm_size_t size)
1303 {
1304 register pmap_t p;
1305 register pmap_statistics_t stats;
1306
1307 /*
1308 * A software use-only map doesn't even need a map.
1309 */
1310
1311 if (size != 0) {
1312 return PMAP_NULL;
1313 }
1314
1315 /*
1316 * Allocate a pmap struct from the pmap_zone. Then allocate
1317 * the page descriptor table from the pd_zone.
1318 */
1319
1320 p = (pmap_t) zalloc(pmap_zone);
1321 if (p == PMAP_NULL)
1322 panic("pmap_create");
1323
1324 if (kmem_alloc_wired(kernel_map,
1325 (vm_offset_t *)&p->dirbase, INTEL_PGBYTES)
1326 != KERN_SUCCESS)
1327 panic("pmap_create");
1328
1329 bcopy(kpde, p->dirbase, INTEL_PGBYTES);
1330 p->ref_count = 1;
1331
1332 simple_lock_init(&p->lock);
1333 p->cpus_using = 0;
1334
1335 /*
1336 * Initialize statistics.
1337 */
1338
1339 stats = &p->stats;
1340 stats->resident_count = 0;
1341 stats->wired_count = 0;
1342
1343 return p;
1344 }
1345
1346 /*
1347 * Retire the given physical map from service.
1348 * Should only be called if the map contains
1349 * no valid mappings.
1350 */
1351
1352 void pmap_destroy(
1353 register pmap_t p)
1354 {
1355 register pt_entry_t *pdep;
1356 register vm_offset_t pa;
1357 register int c;
1358 register vm_page_t m;
1359 #if NCPUS > 1
1360 spl_t s;
1361 #endif
1362
1363 if (p == PMAP_NULL)
1364 return;
1365
1366 SPLVM(s);
1367 simple_lock(&p->lock);
1368 c = --p->ref_count;
1369 simple_unlock(&p->lock);
1370 SPLX(s);
1371
1372 if (c != 0) {
1373 return; /* still in use */
1374 }
1375
1376 /*
1377 * Free the memory maps, then the
1378 * pmap structure.
1379 */
1380 for (pdep = p->dirbase;
1381 pdep < &p->dirbase[pdenum(VM_MIN_KERNEL_ADDRESS)];
1382 pdep += ptes_per_vm_page) {
1383 if (*pdep & INTEL_PTE_VALID) {
1384 pa = pte_to_pa(*pdep);
1385 vm_object_lock(pmap_object);
1386 m = vm_page_lookup(pmap_object, pa);
1387 if (m == VM_PAGE_NULL)
1388 panic("pmap_destroy: pte page not in object");
1389 vm_page_lock_queues();
1390 vm_page_free(m);
1391 inuse_ptepages_count--;
1392 vm_page_unlock_queues();
1393 vm_object_unlock(pmap_object);
1394 }
1395 }
1396 kmem_free(kernel_map, (vm_offset_t) p->dirbase, INTEL_PGBYTES);
1397 zfree(pmap_zone, (vm_offset_t) p);
1398 }
1399
1400 /*
1401 * Add a reference to the specified pmap.
1402 */
1403
1404 void pmap_reference(
1405 register pmap_t p)
1406 {
1407 #if NCPUS > 1
1408 spl_t s;
1409 #endif
1410
1411 if (p != PMAP_NULL) {
1412 SPLVM(s);
1413 simple_lock(&p->lock);
1414 p->ref_count++;
1415 simple_unlock(&p->lock);
1416 SPLX(s);
1417 }
1418 }
1419
1420 /*
1421 * Remove a range of hardware page-table entries.
1422 * The entries given are the first (inclusive)
1423 * and last (exclusive) entries for the VM pages.
1424 * The virtual address is the va for the first pte.
1425 *
1426 * The pmap must be locked.
1427 * If the pmap is not the kernel pmap, the range must lie
1428 * entirely within one pte-page. This is NOT checked.
1429 * Assumes that the pte-page exists.
1430 */
1431
1432 /* static */
1433 void pmap_remove_range(
1434 pmap_t pmap,
1435 vm_offset_t va,
1436 pt_entry_t *spte,
1437 pt_entry_t *epte)
1438 {
1439 register pt_entry_t *cpte;
1440 int num_removed, num_unwired;
1441 int pai;
1442 vm_offset_t pa;
1443
1444 #if DEBUG_PTE_PAGE
1445 if (pmap != kernel_pmap)
1446 ptep_check(get_pte_page(spte));
1447 #endif /* DEBUG_PTE_PAGE */
1448 num_removed = 0;
1449 num_unwired = 0;
1450
1451 for (cpte = spte; cpte < epte;
1452 cpte += ptes_per_vm_page, va += PAGE_SIZE) {
1453
1454 if (*cpte == 0)
1455 continue;
1456 pa = pte_to_pa(*cpte);
1457
1458 num_removed++;
1459 if (*cpte & INTEL_PTE_WIRED)
1460 num_unwired++;
1461
1462 if (!valid_page(pa)) {
1463
1464 /*
1465 * Outside range of managed physical memory.
1466 * Just remove the mappings.
1467 */
1468 register int i = ptes_per_vm_page;
1469 register pt_entry_t *lpte = cpte;
1470 do {
1471 *lpte = 0;
1472 lpte++;
1473 } while (--i > 0);
1474 continue;
1475 }
1476
1477 pai = pa_index(pa);
1478 LOCK_PVH(pai);
1479
1480 /*
1481 * Get the modify and reference bits.
1482 */
1483 {
1484 register int i;
1485 register pt_entry_t *lpte;
1486
1487 i = ptes_per_vm_page;
1488 lpte = cpte;
1489 do {
1490 pmap_phys_attributes[pai] |=
1491 *lpte & (PHYS_MODIFIED|PHYS_REFERENCED);
1492 *lpte = 0;
1493 lpte++;
1494 } while (--i > 0);
1495 }
1496
1497 /*
1498 * Remove the mapping from the pvlist for
1499 * this physical page.
1500 */
1501 {
1502 register pv_entry_t pv_h, prev, cur;
1503
1504 pv_h = pai_to_pvh(pai);
1505 if (pv_h->pmap == PMAP_NULL) {
1506 panic("pmap_remove: null pv_list!");
1507 }
1508 if (pv_h->va == va && pv_h->pmap == pmap) {
1509 /*
1510 * Header is the pv_entry. Copy the next one
1511 * to header and free the next one (we cannot
1512 * free the header)
1513 */
1514 cur = pv_h->next;
1515 if (cur != PV_ENTRY_NULL) {
1516 *pv_h = *cur;
1517 PV_FREE(cur);
1518 }
1519 else {
1520 pv_h->pmap = PMAP_NULL;
1521 }
1522 }
1523 else {
1524 cur = pv_h;
1525 do {
1526 prev = cur;
1527 if ((cur = prev->next) == PV_ENTRY_NULL) {
1528 panic("pmap-remove: mapping not in pv_list!");
1529 }
1530 } while (cur->va != va || cur->pmap != pmap);
1531 prev->next = cur->next;
1532 PV_FREE(cur);
1533 }
1534 UNLOCK_PVH(pai);
1535 }
1536 }
1537
1538 /*
1539 * Update the counts
1540 */
1541 pmap->stats.resident_count -= num_removed;
1542 pmap->stats.wired_count -= num_unwired;
1543 }
1544
1545 /*
1546 * Remove the given range of addresses
1547 * from the specified map.
1548 *
1549 * It is assumed that the start and end are properly
1550 * rounded to the hardware page size.
1551 */
1552
1553 void pmap_remove(
1554 pmap_t map,
1555 vm_offset_t s,
1556 vm_offset_t e)
1557 {
1558 register pt_entry_t *pde;
1559 register pt_entry_t *spte, *epte;
1560 vm_offset_t l;
1561 #if NCPUS > 1
1562 spl_t spl;
1563 #endif
1564
1565 if (map == PMAP_NULL)
1566 return;
1567
1568 PMAP_READ_LOCK(map, spl);
1569
1570 /*
1571 * Invalidate the translation buffer first
1572 */
1573 PMAP_UPDATE_TLBS(map, s, e);
1574
1575 pde = pmap_pde(map, s);
1576 while (s < e) {
1577 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1578 if (l > e)
1579 l = e;
1580 if (*pde & INTEL_PTE_VALID) {
1581 spte = (pt_entry_t *)ptetokv(*pde);
1582 spte = &spte[ptenum(s)];
1583 epte = &spte[intel_btop(l-s)];
1584 pmap_remove_range(map, s, spte, epte);
1585 }
1586 s = l;
1587 pde++;
1588 }
1589
1590 PMAP_READ_UNLOCK(map, spl);
1591 }
1592
1593 /*
1594 * Routine: pmap_page_protect
1595 *
1596 * Function:
1597 * Lower the permission for all mappings to a given
1598 * page.
1599 */
1600 void pmap_page_protect(
1601 vm_offset_t phys,
1602 vm_prot_t prot)
1603 {
1604 pv_entry_t pv_h, prev;
1605 register pv_entry_t pv_e;
1606 register pt_entry_t *pte;
1607 int pai;
1608 register pmap_t pmap;
1609 boolean_t remove;
1610 #if NCPUS > 1
1611 spl_t spl;
1612 #endif
1613
1614 assert(phys != vm_page_fictitious_addr);
1615 if (!valid_page(phys)) {
1616 /*
1617 * Not a managed page.
1618 */
1619 return;
1620 }
1621
1622 /*
1623 * Determine the new protection.
1624 */
1625 switch (prot) {
1626 case VM_PROT_READ:
1627 case VM_PROT_READ|VM_PROT_EXECUTE:
1628 remove = FALSE;
1629 break;
1630 case VM_PROT_ALL:
1631 return; /* nothing to do */
1632 default:
1633 remove = TRUE;
1634 break;
1635 }
1636
1637 /*
1638 * Lock the pmap system first, since we will be changing
1639 * several pmaps.
1640 */
1641
1642 PMAP_WRITE_LOCK(spl);
1643
1644 pai = pa_index(phys);
1645 pv_h = pai_to_pvh(pai);
1646
1647 /*
1648 * Walk down PV list, changing or removing all mappings.
1649 * We do not have to lock the pv_list because we have
1650 * the entire pmap system locked.
1651 */
1652 if (pv_h->pmap != PMAP_NULL) {
1653
1654 prev = pv_e = pv_h;
1655 do {
1656 pmap = pv_e->pmap;
1657 /*
1658 * Lock the pmap to block pmap_extract and similar routines.
1659 */
1660 simple_lock(&pmap->lock);
1661
1662 {
1663 register vm_offset_t va;
1664
1665 va = pv_e->va;
1666 pte = pmap_pte(pmap, va);
1667
1668 /*
1669 * Consistency checks.
1670 */
1671 /* assert(*pte & INTEL_PTE_VALID); XXX */
1672 /* assert(pte_to_phys(*pte) == phys); */
1673
1674 /*
1675 * Invalidate TLBs for all CPUs using this mapping.
1676 */
1677 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1678 }
1679
1680 /*
1681 * Remove the mapping if new protection is NONE
1682 * or if write-protecting a kernel mapping.
1683 */
1684 if (remove || pmap == kernel_pmap) {
1685 /*
1686 * Remove the mapping, collecting any modify bits.
1687 */
1688 if (*pte & INTEL_PTE_WIRED)
1689 panic("pmap_remove_all removing a wired page");
1690
1691 {
1692 register int i = ptes_per_vm_page;
1693
1694 do {
1695 pmap_phys_attributes[pai] |=
1696 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1697 *pte++ = 0;
1698 } while (--i > 0);
1699 }
1700
1701 pmap->stats.resident_count--;
1702
1703 /*
1704 * Remove the pv_entry.
1705 */
1706 if (pv_e == pv_h) {
1707 /*
1708 * Fix up head later.
1709 */
1710 pv_h->pmap = PMAP_NULL;
1711 }
1712 else {
1713 /*
1714 * Delete this entry.
1715 */
1716 prev->next = pv_e->next;
1717 PV_FREE(pv_e);
1718 }
1719 }
1720 else {
1721 /*
1722 * Write-protect.
1723 */
1724 register int i = ptes_per_vm_page;
1725
1726 do {
1727 *pte &= ~INTEL_PTE_WRITE;
1728 pte++;
1729 } while (--i > 0);
1730
1731 /*
1732 * Advance prev.
1733 */
1734 prev = pv_e;
1735 }
1736
1737 simple_unlock(&pmap->lock);
1738
1739 } while ((pv_e = prev->next) != PV_ENTRY_NULL);
1740
1741 /*
1742 * If pv_head mapping was removed, fix it up.
1743 */
1744 if (pv_h->pmap == PMAP_NULL) {
1745 pv_e = pv_h->next;
1746 if (pv_e != PV_ENTRY_NULL) {
1747 *pv_h = *pv_e;
1748 PV_FREE(pv_e);
1749 }
1750 }
1751 }
1752
1753 PMAP_WRITE_UNLOCK(spl);
1754 }
1755
1756 /*
1757 * Set the physical protection on the
1758 * specified range of this map as requested.
1759 * Will not increase permissions.
1760 */
1761 void pmap_protect(
1762 pmap_t map,
1763 vm_offset_t s,
1764 vm_offset_t e,
1765 vm_prot_t prot)
1766 {
1767 register pt_entry_t *pde;
1768 register pt_entry_t *spte, *epte;
1769 vm_offset_t l;
1770 #if NCPUS > 1
1771 spl_t spl;
1772 #endif
1773
1774 if (map == PMAP_NULL)
1775 return;
1776
1777 /*
1778 * Determine the new protection.
1779 */
1780 switch (prot) {
1781 case VM_PROT_READ:
1782 case VM_PROT_READ|VM_PROT_EXECUTE:
1783 break;
1784 case VM_PROT_READ|VM_PROT_WRITE:
1785 case VM_PROT_ALL:
1786 return; /* nothing to do */
1787 default:
1788 pmap_remove(map, s, e);
1789 return;
1790 }
1791
1792 /*
1793 * If write-protecting in the kernel pmap,
1794 * remove the mappings; the i386 ignores
1795 * the write-permission bit in kernel mode.
1796 *
1797 * XXX should be #if'd for i386
1798 */
1799 if (map == kernel_pmap) {
1800 pmap_remove(map, s, e);
1801 return;
1802 }
1803
1804 SPLVM(spl);
1805 simple_lock(&map->lock);
1806
1807 /*
1808 * Invalidate the translation buffer first
1809 */
1810 PMAP_UPDATE_TLBS(map, s, e);
1811
1812 pde = pmap_pde(map, s);
1813 while (s < e) {
1814 l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
1815 if (l > e)
1816 l = e;
1817 if (*pde & INTEL_PTE_VALID) {
1818 spte = (pt_entry_t *)ptetokv(*pde);
1819 spte = &spte[ptenum(s)];
1820 epte = &spte[intel_btop(l-s)];
1821
1822 while (spte < epte) {
1823 if (*spte & INTEL_PTE_VALID)
1824 *spte &= ~INTEL_PTE_WRITE;
1825 spte++;
1826 }
1827 }
1828 s = l;
1829 pde++;
1830 }
1831
1832 simple_unlock(&map->lock);
1833 SPLX(spl);
1834 }
1835
1836 /*
1837 * Insert the given physical page (p) at
1838 * the specified virtual address (v) in the
1839 * target physical map with the protection requested.
1840 *
1841 * If specified, the page will be wired down, meaning
1842 * that the related pte can not be reclaimed.
1843 *
1844 * NB: This is the only routine which MAY NOT lazy-evaluate
1845 * or lose information. That is, this routine must actually
1846 * insert this page into the given map NOW.
1847 */
1848 void pmap_enter(
1849 register pmap_t pmap,
1850 vm_offset_t v,
1851 register vm_offset_t pa,
1852 vm_prot_t prot,
1853 boolean_t wired)
1854 {
1855 register pt_entry_t *pte;
1856 register pv_entry_t pv_h;
1857 register int i, pai;
1858 pv_entry_t pv_e;
1859 pt_entry_t template;
1860 vm_offset_t old_pa;
1861 #if NCPUS > 1
1862 spl_t spl;
1863 #endif
1864
1865 assert(pa != vm_page_fictitious_addr);
1866 if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
1867 if (pmap == PMAP_NULL)
1868 return;
1869
1870 if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
1871 && !wired /* hack for io_wire */ ) {
1872 /*
1873 * Because the 386 ignores write protection in kernel mode,
1874 * we cannot enter a read-only kernel mapping, and must
1875 * remove an existing mapping if changing it.
1876 *
1877 * XXX should be #if'd for i386
1878 */
1879 PMAP_READ_LOCK(pmap, spl);
1880
1881 pte = pmap_pte(pmap, v);
1882 if (pte != PT_ENTRY_NULL && *pte != 0) {
1883 /*
1884 * Invalidate the translation buffer,
1885 * then remove the mapping.
1886 */
1887 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
1888 pmap_remove_range(pmap, v, pte,
1889 pte + ptes_per_vm_page);
1890 }
1891 PMAP_READ_UNLOCK(pmap, spl);
1892 return;
1893 }
1894
1895 /*
1896 * Must allocate a new pvlist entry while we're unlocked;
1897 * zalloc may cause pageout (which will lock the pmap system).
1898 * If we determine we need a pvlist entry, we will unlock
1899 * and allocate one. Then we will retry, throughing away
1900 * the allocated entry later (if we no longer need it).
1901 */
1902 pv_e = PV_ENTRY_NULL;
1903 Retry:
1904 PMAP_READ_LOCK(pmap, spl);
1905
1906 /*
1907 * Expand pmap to include this pte. Assume that
1908 * pmap is always expanded to include enough hardware
1909 * pages to map one VM page.
1910 */
1911
1912 while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
1913 /*
1914 * Need to allocate a new page-table page.
1915 */
1916 vm_offset_t ptp;
1917 pt_entry_t *pdp;
1918 int i;
1919
1920 if (pmap == kernel_pmap) {
1921 /*
1922 * Would have to enter the new page-table page in
1923 * EVERY pmap.
1924 */
1925 panic("pmap_expand kernel pmap to %#x", v);
1926 }
1927
1928 /*
1929 * Unlock the pmap and allocate a new page-table page.
1930 */
1931 PMAP_READ_UNLOCK(pmap, spl);
1932
1933 ptp = pmap_page_table_page_alloc();
1934
1935 /*
1936 * Re-lock the pmap and check that another thread has
1937 * not already allocated the page-table page. If it
1938 * has, discard the new page-table page (and try
1939 * again to make sure).
1940 */
1941 PMAP_READ_LOCK(pmap, spl);
1942
1943 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
1944 /*
1945 * Oops...
1946 */
1947 PMAP_READ_UNLOCK(pmap, spl);
1948 pmap_page_table_page_dealloc(ptp);
1949 PMAP_READ_LOCK(pmap, spl);
1950 continue;
1951 }
1952
1953 /*
1954 * Enter the new page table page in the page directory.
1955 */
1956 i = ptes_per_vm_page;
1957 pdp = &pmap->dirbase[pdenum(v) & ~(i-1)];
1958 do {
1959 *pdp = pa_to_pte(ptp) | INTEL_PTE_VALID
1960 | INTEL_PTE_USER
1961 | INTEL_PTE_WRITE;
1962 pdp++;
1963 ptp += INTEL_PGBYTES;
1964 } while (--i > 0);
1965 #if i860
1966 /*
1967 * Flush the data cache.
1968 */
1969 flush();
1970 #endif /* i860 */
1971
1972 /*
1973 * Now, get the address of the page-table entry.
1974 */
1975 continue;
1976 }
1977
1978 /*
1979 * Special case if the physical page is already mapped
1980 * at this address.
1981 */
1982 old_pa = pte_to_pa(*pte);
1983 if (*pte && old_pa == pa) {
1984 /*
1985 * May be changing its wired attribute or protection
1986 */
1987
1988 if (wired && !(*pte & INTEL_PTE_WIRED))
1989 pmap->stats.wired_count++;
1990 else if (!wired && (*pte & INTEL_PTE_WIRED))
1991 pmap->stats.wired_count--;
1992
1993 template = pa_to_pte(pa) | INTEL_PTE_VALID;
1994 if (pmap != kernel_pmap)
1995 template |= INTEL_PTE_USER;
1996 if (prot & VM_PROT_WRITE)
1997 template |= INTEL_PTE_WRITE;
1998 if (wired)
1999 template |= INTEL_PTE_WIRED;
2000 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
2001 i = ptes_per_vm_page;
2002 do {
2003 if (*pte & INTEL_PTE_MOD)
2004 template |= INTEL_PTE_MOD;
2005 WRITE_PTE(pte, template)
2006 pte++;
2007 pte_increment_pa(template);
2008 } while (--i > 0);
2009 }
2010 else {
2011
2012 /*
2013 * Remove old mapping from the PV list if necessary.
2014 */
2015 if (*pte) {
2016 /*
2017 * Invalidate the translation buffer,
2018 * then remove the mapping.
2019 */
2020 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
2021
2022 /*
2023 * Don't free the pte page if removing last
2024 * mapping - we will immediately replace it.
2025 */
2026 pmap_remove_range(pmap, v, pte,
2027 pte + ptes_per_vm_page);
2028 }
2029
2030 if (valid_page(pa)) {
2031
2032 /*
2033 * Enter the mapping in the PV list for this
2034 * physical page.
2035 */
2036
2037 pai = pa_index(pa);
2038 LOCK_PVH(pai);
2039 pv_h = pai_to_pvh(pai);
2040
2041 if (pv_h->pmap == PMAP_NULL) {
2042 /*
2043 * No mappings yet
2044 */
2045 pv_h->va = v;
2046 pv_h->pmap = pmap;
2047 pv_h->next = PV_ENTRY_NULL;
2048 }
2049 else {
2050 #if DEBUG
2051 {
2052 /* check that this mapping is not already there */
2053 pv_entry_t e = pv_h;
2054 while (e != PV_ENTRY_NULL) {
2055 if (e->pmap == pmap && e->va == v)
2056 panic("pmap_enter: already in pv_list");
2057 e = e->next;
2058 }
2059 }
2060 #endif /* DEBUG */
2061
2062 /*
2063 * Add new pv_entry after header.
2064 */
2065 if (pv_e == PV_ENTRY_NULL) {
2066 PV_ALLOC(pv_e);
2067 if (pv_e == PV_ENTRY_NULL) {
2068 UNLOCK_PVH(pai);
2069 PMAP_READ_UNLOCK(pmap, spl);
2070
2071 /*
2072 * Refill from zone.
2073 */
2074 pv_e = (pv_entry_t) zalloc(pv_list_zone);
2075 goto Retry;
2076 }
2077 }
2078 pv_e->va = v;
2079 pv_e->pmap = pmap;
2080 pv_e->next = pv_h->next;
2081 pv_h->next = pv_e;
2082 /*
2083 * Remember that we used the pvlist entry.
2084 */
2085 pv_e = PV_ENTRY_NULL;
2086 }
2087 UNLOCK_PVH(pai);
2088 }
2089
2090 /*
2091 * And count the mapping.
2092 */
2093
2094 pmap->stats.resident_count++;
2095 if (wired)
2096 pmap->stats.wired_count++;
2097
2098 /*
2099 * Build a template to speed up entering -
2100 * only the pfn changes.
2101 */
2102 template = pa_to_pte(pa) | INTEL_PTE_VALID;
2103 if (pmap != kernel_pmap)
2104 template |= INTEL_PTE_USER;
2105 if (prot & VM_PROT_WRITE)
2106 template |= INTEL_PTE_WRITE;
2107 if (wired)
2108 template |= INTEL_PTE_WIRED;
2109 i = ptes_per_vm_page;
2110 do {
2111 WRITE_PTE(pte, template)
2112 pte++;
2113 pte_increment_pa(template);
2114 } while (--i > 0);
2115 }
2116
2117 if (pv_e != PV_ENTRY_NULL) {
2118 PV_FREE(pv_e);
2119 }
2120
2121 PMAP_READ_UNLOCK(pmap, spl);
2122 }
2123
2124 /*
2125 * Routine: pmap_change_wiring
2126 * Function: Change the wiring attribute for a map/virtual-address
2127 * pair.
2128 * In/out conditions:
2129 * The mapping must already exist in the pmap.
2130 */
2131 void pmap_change_wiring(
2132 register pmap_t map,
2133 vm_offset_t v,
2134 boolean_t wired)
2135 {
2136 register pt_entry_t *pte;
2137 register int i;
2138 #if NCPUS > 1
2139 spl_t spl;
2140 #endif
2141
2142 /*
2143 * We must grab the pmap system lock because we may
2144 * change a pte_page queue.
2145 */
2146 PMAP_READ_LOCK(map, spl);
2147
2148 if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL)
2149 panic("pmap_change_wiring: pte missing");
2150
2151 if (wired && !(*pte & INTEL_PTE_WIRED)) {
2152 /*
2153 * wiring down mapping
2154 */
2155 map->stats.wired_count++;
2156 i = ptes_per_vm_page;
2157 do {
2158 *pte++ |= INTEL_PTE_WIRED;
2159 } while (--i > 0);
2160 }
2161 else if (!wired && (*pte & INTEL_PTE_WIRED)) {
2162 /*
2163 * unwiring mapping
2164 */
2165 map->stats.wired_count--;
2166 i = ptes_per_vm_page;
2167 do {
2168 *pte &= ~INTEL_PTE_WIRED;
2169 } while (--i > 0);
2170 }
2171
2172 PMAP_READ_UNLOCK(map, spl);
2173 }
2174
2175 /*
2176 * Routine: pmap_extract
2177 * Function:
2178 * Extract the physical page address associated
2179 * with the given map/virtual_address pair.
2180 */
2181
2182 vm_offset_t pmap_extract(
2183 register pmap_t pmap,
2184 vm_offset_t va)
2185 {
2186 register pt_entry_t *pte;
2187 register vm_offset_t pa;
2188 #if NCPUS > 1
2189 spl_t spl;
2190 #endif
2191
2192 SPLVM(spl);
2193 simple_lock(&pmap->lock);
2194 if ((pte = pmap_pte(pmap, va)) == PT_ENTRY_NULL)
2195 pa = (vm_offset_t) 0;
2196 else if (!(*pte & INTEL_PTE_VALID))
2197 pa = (vm_offset_t) 0;
2198 else
2199 pa = pte_to_pa(*pte) + (va & INTEL_OFFMASK);
2200 simple_unlock(&pmap->lock);
2201 SPLX(spl);
2202 return pa;
2203 }
2204
2205 /*
2206 * Copy the range specified by src_addr/len
2207 * from the source map to the range dst_addr/len
2208 * in the destination map.
2209 *
2210 * This routine is only advisory and need not do anything.
2211 */
2212 #if 0
2213 void pmap_copy(
2214 pmap_t dst_pmap,
2215 pmap_t src_pmap,
2216 vm_offset_t dst_addr,
2217 vm_size_t len,
2218 vm_offset_t src_addr)
2219 {
2220 #ifdef lint
2221 dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++;
2222 #endif /* lint */
2223 }
2224 #endif /* 0 */
2225
2226 /*
2227 * Routine: pmap_collect
2228 * Function:
2229 * Garbage collects the physical map system for
2230 * pages which are no longer used.
2231 * Success need not be guaranteed -- that is, there
2232 * may well be pages which are not referenced, but
2233 * others may be collected.
2234 * Usage:
2235 * Called by the pageout daemon when pages are scarce.
2236 */
2237 void pmap_collect(
2238 pmap_t p)
2239 {
2240 register pt_entry_t *pdp, *ptp;
2241 pt_entry_t *eptp;
2242 vm_offset_t pa;
2243 boolean_t wired;
2244 #if NCPUS > 1
2245 spl_t spl;
2246 #endif
2247
2248 if (p == PMAP_NULL)
2249 return;
2250
2251 if (p == kernel_pmap)
2252 return;
2253
2254 /*
2255 * Garbage collect map.
2256 */
2257 PMAP_READ_LOCK(p, spl);
2258 PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2259
2260 for (pdp = p->dirbase;
2261 pdp < &p->dirbase[pdenum(VM_MIN_KERNEL_ADDRESS)];
2262 pdp += ptes_per_vm_page)
2263 {
2264 if (*pdp & INTEL_PTE_VALID) {
2265
2266 pa = pte_to_pa(*pdp);
2267 ptp = (pt_entry_t *)phystokv(pa);
2268 eptp = ptp + NPTES*ptes_per_vm_page;
2269
2270 /*
2271 * If the pte page has any wired mappings, we cannot
2272 * free it.
2273 */
2274 wired = FALSE;
2275 {
2276 register pt_entry_t *ptep;
2277 for (ptep = ptp; ptep < eptp; ptep++) {
2278 if (*ptep & INTEL_PTE_WIRED) {
2279 wired = TRUE;
2280 break;
2281 }
2282 }
2283 }
2284 if (!wired) {
2285 /*
2286 * Remove the virtual addresses mapped by this pte page.
2287 */
2288 pmap_remove_range(p,
2289 pdetova(pdp - p->dirbase),
2290 ptp,
2291 eptp);
2292
2293 /*
2294 * Invalidate the page directory pointer.
2295 */
2296 {
2297 register int i = ptes_per_vm_page;
2298 register pt_entry_t *pdep = pdp;
2299 do {
2300 *pdep++ = 0;
2301 } while (--i > 0);
2302 }
2303
2304 PMAP_READ_UNLOCK(p, spl);
2305
2306 /*
2307 * And free the pte page itself.
2308 */
2309 {
2310 register vm_page_t m;
2311
2312 vm_object_lock(pmap_object);
2313 m = vm_page_lookup(pmap_object, pa);
2314 if (m == VM_PAGE_NULL)
2315 panic("pmap_collect: pte page not in object");
2316 vm_page_lock_queues();
2317 vm_page_free(m);
2318 inuse_ptepages_count--;
2319 vm_page_unlock_queues();
2320 vm_object_unlock(pmap_object);
2321 }
2322
2323 PMAP_READ_LOCK(p, spl);
2324 }
2325 }
2326 }
2327 PMAP_READ_UNLOCK(p, spl);
2328 return;
2329
2330 }
2331
2332 /*
2333 * Routine: pmap_activate
2334 * Function:
2335 * Binds the given physical map to the given
2336 * processor.
2337 */
2338 #if 0
2339 void pmap_activate(
2340 register pmap_t my_pmap,
2341 thread_t th,
2342 int my_cpu)
2343 {
2344 PMAP_ACTIVATE(my_pmap, th, my_cpu);
2345 }
2346 #endif /* 0 */
2347
2348 /*
2349 * Routine: pmap_deactivate
2350 * Function:
2351 * Indicates that the given physical map is no longer
2352 * in use on the specified processor. (This is a macro
2353 * in pmap.h)
2354 */
2355 #if 0
2356 void pmap_deactivate(
2357 pmap_t pmap,
2358 thread_t th,
2359 int which_cpu)
2360 {
2361 #ifdef lint
2362 pmap++; th++; which_cpu++;
2363 #endif /* lint */
2364 PMAP_DEACTIVATE(pmap, th, which_cpu);
2365 }
2366 #endif /* 0 */
2367
2368 /*
2369 * Routine: pmap_kernel
2370 * Function:
2371 * Returns the physical map handle for the kernel.
2372 */
2373 #if 0
2374 pmap_t pmap_kernel(void)
2375 {
2376 return (kernel_pmap);
2377 }
2378 #endif /* 0 */
2379
2380 /*
2381 * pmap_zero_page zeros the specified (machine independent) page.
2382 * See machine/phys.c or machine/phys.s for implementation.
2383 */
2384 #if 0
2385 pmap_zero_page(
2386 register vm_offset_t phys)
2387 {
2388 register int i;
2389
2390 assert(phys != vm_page_fictitious_addr);
2391 i = PAGE_SIZE / INTEL_PGBYTES;
2392 phys = intel_pfn(phys);
2393
2394 while (i--)
2395 zero_phys(phys++);
2396 }
2397 #endif /* 0 */
2398
2399 /*
2400 * pmap_copy_page copies the specified (machine independent) page.
2401 * See machine/phys.c or machine/phys.s for implementation.
2402 */
2403 #if 0
2404 pmap_copy_page(
2405 vm_offset_t src,
2406 vm_offset_t dst)
2407 {
2408 int i;
2409
2410 assert(src != vm_page_fictitious_addr);
2411 assert(dst != vm_page_fictitious_addr);
2412 i = PAGE_SIZE / INTEL_PGBYTES;
2413
2414 while (i--) {
2415 copy_phys(intel_pfn(src), intel_pfn(dst));
2416 src += INTEL_PGBYTES;
2417 dst += INTEL_PGBYTES;
2418 }
2419 }
2420 #endif /* 0 */
2421
2422 /*
2423 * Routine: pmap_pageable
2424 * Function:
2425 * Make the specified pages (by pmap, offset)
2426 * pageable (or not) as requested.
2427 *
2428 * A page which is not pageable may not take
2429 * a fault; therefore, its page table entry
2430 * must remain valid for the duration.
2431 *
2432 * This routine is merely advisory; pmap_enter
2433 * will specify that these pages are to be wired
2434 * down (or not) as appropriate.
2435 */
2436 void pmap_pageable(
2437 pmap_t pmap,
2438 vm_offset_t start,
2439 vm_offset_t end,
2440 boolean_t pageable)
2441 {
2442 #ifdef lint
2443 pmap++; start++; end++; pageable++;
2444 #endif /* lint */
2445 }
2446
2447 /*
2448 * Clear specified attribute bits.
2449 */
2450 void
2451 phys_attribute_clear(
2452 vm_offset_t phys,
2453 int bits)
2454 {
2455 pv_entry_t pv_h;
2456 register pv_entry_t pv_e;
2457 register pt_entry_t *pte;
2458 int pai;
2459 register pmap_t pmap;
2460 #if NCPUS > 1
2461 spl_t spl;
2462 #endif
2463
2464 assert(phys != vm_page_fictitious_addr);
2465 if (!valid_page(phys)) {
2466 /*
2467 * Not a managed page.
2468 */
2469 return;
2470 }
2471
2472 /*
2473 * Lock the pmap system first, since we will be changing
2474 * several pmaps.
2475 */
2476
2477 PMAP_WRITE_LOCK(spl);
2478
2479 pai = pa_index(phys);
2480 pv_h = pai_to_pvh(pai);
2481
2482 /*
2483 * Walk down PV list, clearing all modify or reference bits.
2484 * We do not have to lock the pv_list because we have
2485 * the entire pmap system locked.
2486 */
2487 if (pv_h->pmap != PMAP_NULL) {
2488 /*
2489 * There are some mappings.
2490 */
2491 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2492
2493 pmap = pv_e->pmap;
2494 /*
2495 * Lock the pmap to block pmap_extract and similar routines.
2496 */
2497 simple_lock(&pmap->lock);
2498
2499 {
2500 register vm_offset_t va;
2501
2502 va = pv_e->va;
2503 pte = pmap_pte(pmap, va);
2504
2505 #if 0
2506 /*
2507 * Consistency checks.
2508 */
2509 assert(*pte & INTEL_PTE_VALID);
2510 /* assert(pte_to_phys(*pte) == phys); */
2511 #endif
2512
2513 /*
2514 * Invalidate TLBs for all CPUs using this mapping.
2515 */
2516 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
2517 }
2518
2519 /*
2520 * Clear modify or reference bits.
2521 */
2522 {
2523 register int i = ptes_per_vm_page;
2524 do {
2525 *pte &= ~bits;
2526 } while (--i > 0);
2527 }
2528 simple_unlock(&pmap->lock);
2529 }
2530 }
2531
2532 pmap_phys_attributes[pai] &= ~bits;
2533
2534 PMAP_WRITE_UNLOCK(spl);
2535 }
2536
2537 /*
2538 * Check specified attribute bits.
2539 */
2540 boolean_t
2541 phys_attribute_test(
2542 vm_offset_t phys,
2543 int bits)
2544 {
2545 pv_entry_t pv_h;
2546 register pv_entry_t pv_e;
2547 register pt_entry_t *pte;
2548 int pai;
2549 register pmap_t pmap;
2550 #if NCPUS > 1
2551 spl_t spl;
2552 #endif
2553
2554 assert(phys != vm_page_fictitious_addr);
2555 if (!valid_page(phys)) {
2556 /*
2557 * Not a managed page.
2558 */
2559 return FALSE;
2560 }
2561
2562 /*
2563 * Lock the pmap system first, since we will be checking
2564 * several pmaps.
2565 */
2566
2567 PMAP_WRITE_LOCK(spl);
2568
2569 pai = pa_index(phys);
2570 pv_h = pai_to_pvh(pai);
2571
2572 if (pmap_phys_attributes[pai] & bits) {
2573 PMAP_WRITE_UNLOCK(spl);
2574 return TRUE;
2575 }
2576
2577 /*
2578 * Walk down PV list, checking all mappings.
2579 * We do not have to lock the pv_list because we have
2580 * the entire pmap system locked.
2581 */
2582 if (pv_h->pmap != PMAP_NULL) {
2583 /*
2584 * There are some mappings.
2585 */
2586 for (pv_e = pv_h; pv_e != PV_ENTRY_NULL; pv_e = pv_e->next) {
2587
2588 pmap = pv_e->pmap;
2589 /*
2590 * Lock the pmap to block pmap_extract and similar routines.
2591 */
2592 simple_lock(&pmap->lock);
2593
2594 {
2595 register vm_offset_t va;
2596
2597 va = pv_e->va;
2598 pte = pmap_pte(pmap, va);
2599
2600 #if 0
2601 /*
2602 * Consistency checks.
2603 */
2604 assert(*pte & INTEL_PTE_VALID);
2605 /* assert(pte_to_phys(*pte) == phys); */
2606 #endif
2607 }
2608
2609 /*
2610 * Check modify or reference bits.
2611 */
2612 {
2613 register int i = ptes_per_vm_page;
2614
2615 do {
2616 if (*pte & bits) {
2617 simple_unlock(&pmap->lock);
2618 PMAP_WRITE_UNLOCK(spl);
2619 return TRUE;
2620 }
2621 } while (--i > 0);
2622 }
2623 simple_unlock(&pmap->lock);
2624 }
2625 }
2626 PMAP_WRITE_UNLOCK(spl);
2627 return FALSE;
2628 }
2629
2630 /*
2631 * Clear the modify bits on the specified physical page.
2632 */
2633
2634 void pmap_clear_modify(
2635 register vm_offset_t phys)
2636 {
2637 phys_attribute_clear(phys, PHYS_MODIFIED);
2638 }
2639
2640 /*
2641 * pmap_is_modified:
2642 *
2643 * Return whether or not the specified physical page is modified
2644 * by any physical maps.
2645 */
2646
2647 boolean_t pmap_is_modified(
2648 register vm_offset_t phys)
2649 {
2650 return phys_attribute_test(phys, PHYS_MODIFIED);
2651 }
2652
2653 /*
2654 * pmap_clear_reference:
2655 *
2656 * Clear the reference bit on the specified physical page.
2657 */
2658
2659 void pmap_clear_reference(
2660 vm_offset_t phys)
2661 {
2662 phys_attribute_clear(phys, PHYS_REFERENCED);
2663 }
2664
2665 /*
2666 * pmap_is_referenced:
2667 *
2668 * Return whether or not the specified physical page is referenced
2669 * by any physical maps.
2670 */
2671
2672 boolean_t pmap_is_referenced(
2673 vm_offset_t phys)
2674 {
2675 return phys_attribute_test(phys, PHYS_REFERENCED);
2676 }
2677
2678 #if NCPUS > 1
2679 /*
2680 * TLB Coherence Code (TLB "shootdown" code)
2681 *
2682 * Threads that belong to the same task share the same address space and
2683 * hence share a pmap. However, they may run on distinct cpus and thus
2684 * have distinct TLBs that cache page table entries. In order to guarantee
2685 * the TLBs are consistent, whenever a pmap is changed, all threads that
2686 * are active in that pmap must have their TLB updated. To keep track of
2687 * this information, the set of cpus that are currently using a pmap is
2688 * maintained within each pmap structure (cpus_using). Pmap_activate() and
2689 * pmap_deactivate add and remove, respectively, a cpu from this set.
2690 * Since the TLBs are not addressable over the bus, each processor must
2691 * flush its own TLB; a processor that needs to invalidate another TLB
2692 * needs to interrupt the processor that owns that TLB to signal the
2693 * update.
2694 *
2695 * Whenever a pmap is updated, the lock on that pmap is locked, and all
2696 * cpus using the pmap are signaled to invalidate. All threads that need
2697 * to activate a pmap must wait for the lock to clear to await any updates
2698 * in progress before using the pmap. They must ACQUIRE the lock to add
2699 * their cpu to the cpus_using set. An implicit assumption made
2700 * throughout the TLB code is that all kernel code that runs at or higher
2701 * than splvm blocks out update interrupts, and that such code does not
2702 * touch pageable pages.
2703 *
2704 * A shootdown interrupt serves another function besides signaling a
2705 * processor to invalidate. The interrupt routine (pmap_update_interrupt)
2706 * waits for the both the pmap lock (and the kernel pmap lock) to clear,
2707 * preventing user code from making implicit pmap updates while the
2708 * sending processor is performing its update. (This could happen via a
2709 * user data write reference that turns on the modify bit in the page
2710 * table). It must wait for any kernel updates that may have started
2711 * concurrently with a user pmap update because the IPC code
2712 * changes mappings.
2713 * Spinning on the VALUES of the locks is sufficient (rather than
2714 * having to acquire the locks) because any updates that occur subsequent
2715 * to finding the lock unlocked will be signaled via another interrupt.
2716 * (This assumes the interrupt is cleared before the low level interrupt code
2717 * calls pmap_update_interrupt()).
2718 *
2719 * The signaling processor must wait for any implicit updates in progress
2720 * to terminate before continuing with its update. Thus it must wait for an
2721 * acknowledgement of the interrupt from each processor for which such
2722 * references could be made. For maintaining this information, a set
2723 * cpus_active is used. A cpu is in this set if and only if it can
2724 * use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from
2725 * this set; when all such cpus are removed, it is safe to update.
2726 *
2727 * Before attempting to acquire the update lock on a pmap, a cpu (A) must
2728 * be at least at the priority of the interprocessor interrupt
2729 * (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a
2730 * kernel update; it would spin forever in pmap_update_interrupt() trying
2731 * to acquire the user pmap lock it had already acquired. Furthermore A
2732 * must remove itself from cpus_active. Otherwise, another cpu holding
2733 * the lock (B) could be in the process of sending an update signal to A,
2734 * and thus be waiting for A to remove itself from cpus_active. If A is
2735 * spinning on the lock at priority this will never happen and a deadlock
2736 * will result.
2737 */
2738
2739 /*
2740 * Signal another CPU that it must flush its TLB
2741 */
2742 void signal_cpus(
2743 cpu_set use_list,
2744 pmap_t pmap,
2745 vm_offset_t start,
2746 vm_offset_t end)
2747 {
2748 register int which_cpu, j;
2749 register pmap_update_list_t update_list_p;
2750
2751 while ((which_cpu = ffs(use_list)) != 0) {
2752 which_cpu -= 1; /* convert to 0 origin */
2753
2754 update_list_p = &cpu_update_list[which_cpu];
2755 simple_lock(&update_list_p->lock);
2756
2757 j = update_list_p->count;
2758 if (j >= UPDATE_LIST_SIZE) {
2759 /*
2760 * list overflowed. Change last item to
2761 * indicate overflow.
2762 */
2763 update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap;
2764 update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS;
2765 update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS;
2766 }
2767 else {
2768 update_list_p->item[j].pmap = pmap;
2769 update_list_p->item[j].start = start;
2770 update_list_p->item[j].end = end;
2771 update_list_p->count = j+1;
2772 }
2773 cpu_update_needed[which_cpu] = TRUE;
2774 simple_unlock(&update_list_p->lock);
2775
2776 if ((cpus_idle & (1 << which_cpu)) == 0)
2777 interrupt_processor(which_cpu);
2778 use_list &= ~(1 << which_cpu);
2779 }
2780 }
2781
2782 void process_pmap_updates(
2783 register pmap_t my_pmap)
2784 {
2785 register int my_cpu = cpu_number();
2786 register pmap_update_list_t update_list_p;
2787 register int j;
2788 register pmap_t pmap;
2789
2790 update_list_p = &cpu_update_list[my_cpu];
2791 simple_lock(&update_list_p->lock);
2792
2793 for (j = 0; j < update_list_p->count; j++) {
2794 pmap = update_list_p->item[j].pmap;
2795 if (pmap == my_pmap ||
2796 pmap == kernel_pmap) {
2797
2798 INVALIDATE_TLB(update_list_p->item[j].start,
2799 update_list_p->item[j].end);
2800 }
2801 }
2802 update_list_p->count = 0;
2803 cpu_update_needed[my_cpu] = FALSE;
2804 simple_unlock(&update_list_p->lock);
2805 }
2806
2807 /*
2808 * Interrupt routine for TBIA requested from other processor.
2809 */
2810 void pmap_update_interrupt(void)
2811 {
2812 register int my_cpu;
2813 register pmap_t my_pmap;
2814 int s;
2815
2816 my_cpu = cpu_number();
2817
2818 /*
2819 * Exit now if we're idle. We'll pick up the update request
2820 * when we go active, and we must not put ourselves back in
2821 * the active set because we'll never process the interrupt
2822 * while we're idle (thus hanging the system).
2823 */
2824 if (cpus_idle & (1 << my_cpu))
2825 return;
2826
2827 if (current_thread() == THREAD_NULL)
2828 my_pmap = kernel_pmap;
2829 else {
2830 my_pmap = current_pmap();
2831 if (!pmap_in_use(my_pmap, my_cpu))
2832 my_pmap = kernel_pmap;
2833 }
2834
2835 /*
2836 * Raise spl to splvm (above splip) to block out pmap_extract
2837 * from IO code (which would put this cpu back in the active
2838 * set).
2839 */
2840 s = splvm();
2841
2842 do {
2843
2844 /*
2845 * Indicate that we're not using either user or kernel
2846 * pmap.
2847 */
2848 i_bit_clear(my_cpu, &cpus_active);
2849
2850 /*
2851 * Wait for any pmap updates in progress, on either user
2852 * or kernel pmap.
2853 */
2854 while (*(volatile int *)&my_pmap->lock.lock_data ||
2855 *(volatile int *)&kernel_pmap->lock.lock_data)
2856 continue;
2857
2858 process_pmap_updates(my_pmap);
2859
2860 i_bit_set(my_cpu, &cpus_active);
2861
2862 } while (cpu_update_needed[my_cpu]);
2863
2864 splx(s);
2865 }
2866 #else /* NCPUS > 1 */
2867 /*
2868 * Dummy routine to satisfy external reference.
2869 */
2870 void pmap_update_interrupt(void)
2871 {
2872 /* should never be called. */
2873 }
2874 #endif /* NCPUS > 1 */
2875
2876 #if i860 /* akp */
2877 void set_dirbase(
2878 register vm_offset_t dirbase)
2879 {
2880 /*flush();*/
2881 /*flush_tlb();*/
2882 flush_and_ctxsw(dirbase);
2883 }
2884 #endif /* i860 */
Cache object: 686635158c25464bbd24b932619f0897
|