1 /*-
2 * Copyright (c) 1982, 1986 The Regents of the University of California.
3 * Copyright (c) 1989, 1990 William Jolitz
4 * Copyright (c) 1994 John Dyson
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department, and William Jolitz.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91
40 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
41 * $FreeBSD$
42 */
43
44 #include "npx.h"
45 #include "opt_user_ldt.h"
46 #include "opt_vm86.h"
47 #ifdef PC98
48 #include "opt_pc98.h"
49 #endif
50
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/malloc.h>
55 #include <sys/buf.h>
56 #include <sys/vnode.h>
57 #include <sys/vmmeter.h>
58 #include <sys/kernel.h>
59 #include <sys/sysctl.h>
60
61 #include <machine/clock.h>
62 #include <machine/cpu.h>
63 #include <machine/md_var.h>
64 #ifdef SMP
65 #include <machine/smp.h>
66 #endif
67 #ifdef VM86
68 #include <machine/pcb_ext.h>
69 #include <machine/vm86.h>
70 #endif
71
72 #include <vm/vm.h>
73 #include <vm/vm_param.h>
74 #include <vm/vm_prot.h>
75 #include <sys/lock.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_page.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_extern.h>
80
81 #include <sys/user.h>
82
83 #ifdef PC98
84 #include <pc98/pc98/pc98.h>
85 #else
86 #include <i386/isa/isa.h>
87 #endif
88
89 static void cpu_reset_real __P((void));
90 #ifdef SMP
91 static void cpu_reset_proxy __P((void));
92 static u_int cpu_reset_proxyid;
93 static volatile u_int cpu_reset_proxy_active;
94 #endif
95
96 /*
97 * quick version of vm_fault
98 */
99 void
100 vm_fault_quick(v, prot)
101 caddr_t v;
102 int prot;
103 {
104 if (prot & VM_PROT_WRITE)
105 subyte(v, fubyte(v));
106 else
107 fubyte(v);
108 }
109
110 /*
111 * Finish a fork operation, with process p2 nearly set up.
112 * Copy and update the pcb, set up the stack so that the child
113 * ready to run and return to user mode.
114 */
115 void
116 cpu_fork(p1, p2)
117 register struct proc *p1, *p2;
118 {
119 struct pcb *pcb2 = &p2->p_addr->u_pcb;
120
121 #if NNPX > 0
122 /* Ensure that p1's pcb is up to date. */
123 if (npxproc == p1)
124 npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
125 #endif
126
127 /* Copy p1's pcb. */
128 p2->p_addr->u_pcb = p1->p_addr->u_pcb;
129
130 /*
131 * Create a new fresh stack for the new process.
132 * Copy the trap frame for the return to user mode as if from a
133 * syscall. This copies the user mode register values.
134 */
135 p2->p_md.md_regs = (struct trapframe *)
136 #ifdef VM86
137 ((int)p2->p_addr + UPAGES * PAGE_SIZE - 16) - 1;
138 #else
139 ((int)p2->p_addr + UPAGES * PAGE_SIZE) - 1;
140 #endif /* VM86 */
141 *p2->p_md.md_regs = *p1->p_md.md_regs;
142
143 /*
144 * Set registers for trampoline to user mode. Leave space for the
145 * return address on stack. These are the kernel mode register values.
146 */
147 pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir);
148 pcb2->pcb_edi = p2->p_md.md_regs->tf_edi;
149 pcb2->pcb_esi = (int)fork_return;
150 pcb2->pcb_ebp = p2->p_md.md_regs->tf_ebp;
151 pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *);
152 pcb2->pcb_ebx = (int)p2;
153 pcb2->pcb_eip = (int)fork_trampoline;
154 /*
155 * pcb2->pcb_ldt: duplicated below, if necessary.
156 * pcb2->pcb_ldt_len: cloned above.
157 * pcb2->pcb_savefpu: cloned above.
158 * pcb2->pcb_flags: cloned above (always 0 here?).
159 * pcb2->pcb_onfault: cloned above (always NULL here?).
160 */
161
162 #ifdef SMP
163 pcb2->pcb_mpnest = 1;
164 #endif
165 #ifdef VM86
166 /*
167 * XXX don't copy the i/o pages. this should probably be fixed.
168 */
169 pcb2->pcb_ext = 0;
170 #endif
171
172 #ifdef USER_LDT
173 /* Copy the LDT, if necessary. */
174 if (pcb2->pcb_ldt != 0) {
175 union descriptor *new_ldt;
176 size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor);
177
178 new_ldt = (union descriptor *)kmem_alloc(kernel_map, len);
179 bcopy(pcb2->pcb_ldt, new_ldt, len);
180 pcb2->pcb_ldt = (caddr_t)new_ldt;
181 }
182 #endif
183
184 /*
185 * Now, cpu_switch() can schedule the new process.
186 * pcb_esp is loaded pointing to the cpu_switch() stack frame
187 * containing the return address when exiting cpu_switch.
188 * This will normally be to proc_trampoline(), which will have
189 * %ebx loaded with the new proc's pointer. proc_trampoline()
190 * will set up a stack to call fork_return(p, frame); to complete
191 * the return to user-mode.
192 */
193 }
194
195 /*
196 * Intercept the return address from a freshly forked process that has NOT
197 * been scheduled yet.
198 *
199 * This is needed to make kernel threads stay in kernel mode.
200 */
201 void
202 cpu_set_fork_handler(p, func, arg)
203 struct proc *p;
204 void (*func) __P((void *));
205 void *arg;
206 {
207 /*
208 * Note that the trap frame follows the args, so the function
209 * is really called like this: func(arg, frame);
210 */
211 p->p_addr->u_pcb.pcb_esi = (int) func; /* function */
212 p->p_addr->u_pcb.pcb_ebx = (int) arg; /* first arg */
213 }
214
215 void
216 cpu_exit(p)
217 register struct proc *p;
218 {
219 #if defined(USER_LDT) || defined(VM86)
220 struct pcb *pcb = &p->p_addr->u_pcb;
221 #endif
222
223 #if NNPX > 0
224 npxexit(p);
225 #endif /* NNPX */
226 #ifdef VM86
227 if (pcb->pcb_ext != 0) {
228 /*
229 * XXX do we need to move the TSS off the allocated pages
230 * before freeing them? (not done here)
231 */
232 kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
233 ctob(IOPAGES + 1));
234 pcb->pcb_ext = 0;
235 }
236 #endif
237 #ifdef USER_LDT
238 if (pcb->pcb_ldt != 0) {
239 if (pcb == curpcb) {
240 lldt(_default_ldt);
241 currentldt = _default_ldt;
242 }
243 kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ldt,
244 pcb->pcb_ldt_len * sizeof(union descriptor));
245 pcb->pcb_ldt_len = (int)pcb->pcb_ldt = 0;
246 }
247 #endif
248 cnt.v_swtch++;
249 cpu_switch(p);
250 panic("cpu_exit");
251 }
252
253 void
254 cpu_wait(p)
255 struct proc *p;
256 {
257 /* drop per-process resources */
258 pmap_dispose_proc(p);
259
260 /* and clean-out the vmspace */
261 vmspace_free(p->p_vmspace);
262 }
263
264 /*
265 * Dump the machine specific header information at the start of a core dump.
266 */
267 int
268 cpu_coredump(p, vp, cred)
269 struct proc *p;
270 struct vnode *vp;
271 struct ucred *cred;
272 {
273 int error;
274 caddr_t tempuser;
275
276 tempuser = malloc(ctob(UPAGES), M_TEMP, M_WAITOK);
277 if (!tempuser)
278 return EINVAL;
279
280 bzero(tempuser, ctob(UPAGES));
281 bcopy(p->p_addr, tempuser, sizeof(struct user));
282 bcopy(p->p_md.md_regs,
283 tempuser + ((caddr_t) p->p_md.md_regs - (caddr_t) p->p_addr),
284 sizeof(struct trapframe));
285
286 error = vn_rdwr(UIO_WRITE, vp, (caddr_t) tempuser,
287 ctob(UPAGES),
288 (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT,
289 cred, (int *)NULL, p);
290
291 free(tempuser, M_TEMP);
292
293 return error;
294 }
295
296 #ifdef notyet
297 static void
298 setredzone(pte, vaddr)
299 u_short *pte;
300 caddr_t vaddr;
301 {
302 /* eventually do this by setting up an expand-down stack segment
303 for ss0: selector, allowing stack access down to top of u.
304 this means though that protection violations need to be handled
305 thru a double fault exception that must do an integral task
306 switch to a known good context, within which a dump can be
307 taken. a sensible scheme might be to save the initial context
308 used by sched (that has physical memory mapped 1:1 at bottom)
309 and take the dump while still in mapped mode */
310 }
311 #endif
312
313 /*
314 * Convert kernel VA to physical address
315 */
316 u_long
317 kvtop(void *addr)
318 {
319 vm_offset_t va;
320
321 va = pmap_kextract((vm_offset_t)addr);
322 if (va == 0)
323 panic("kvtop: zero page frame");
324 return((int)va);
325 }
326
327 /*
328 * Map an IO request into kernel virtual address space.
329 *
330 * All requests are (re)mapped into kernel VA space.
331 * Notice that we use b_bufsize for the size of the buffer
332 * to be mapped. b_bcount might be modified by the driver.
333 */
334 void
335 vmapbuf(bp)
336 register struct buf *bp;
337 {
338 register caddr_t addr, v, kva;
339 vm_offset_t pa;
340
341 if ((bp->b_flags & B_PHYS) == 0)
342 panic("vmapbuf");
343
344 for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
345 addr < bp->b_data + bp->b_bufsize;
346 addr += PAGE_SIZE, v += PAGE_SIZE) {
347 /*
348 * Do the vm_fault if needed; do the copy-on-write thing
349 * when reading stuff off device into memory.
350 */
351 vm_fault_quick(addr,
352 (bp->b_flags&B_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
353 pa = trunc_page(pmap_kextract((vm_offset_t) addr));
354 if (pa == 0)
355 panic("vmapbuf: page not present");
356 vm_page_hold(PHYS_TO_VM_PAGE(pa));
357 pmap_kenter((vm_offset_t) v, pa);
358 }
359
360 kva = bp->b_saveaddr;
361 bp->b_saveaddr = bp->b_data;
362 bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
363 }
364
365 /*
366 * Free the io map PTEs associated with this IO operation.
367 * We also invalidate the TLB entries and restore the original b_addr.
368 */
369 void
370 vunmapbuf(bp)
371 register struct buf *bp;
372 {
373 register caddr_t addr;
374 vm_offset_t pa;
375
376 if ((bp->b_flags & B_PHYS) == 0)
377 panic("vunmapbuf");
378
379 for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
380 addr < bp->b_data + bp->b_bufsize;
381 addr += PAGE_SIZE) {
382 pa = trunc_page(pmap_kextract((vm_offset_t) addr));
383 pmap_kremove((vm_offset_t) addr);
384 vm_page_unhold(PHYS_TO_VM_PAGE(pa));
385 }
386
387 bp->b_data = bp->b_saveaddr;
388 }
389
390 /*
391 * Force reset the processor by invalidating the entire address space!
392 */
393
394 #ifdef SMP
395 static void
396 cpu_reset_proxy()
397 {
398 u_int saved_mp_lock;
399
400 cpu_reset_proxy_active = 1;
401 while (cpu_reset_proxy_active == 1)
402 ; /* Wait for other cpu to disable interupts */
403 saved_mp_lock = mp_lock;
404 mp_lock = 1;
405 printf("cpu_reset_proxy: Grabbed mp lock for BSP\n");
406 cpu_reset_proxy_active = 3;
407 while (cpu_reset_proxy_active == 3)
408 ; /* Wait for other cpu to enable interrupts */
409 stop_cpus((1<<cpu_reset_proxyid));
410 printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
411 DELAY(1000000);
412 cpu_reset_real();
413 }
414 #endif
415
416 void
417 cpu_reset()
418 {
419 #ifdef SMP
420 if (smp_active == 0) {
421 cpu_reset_real();
422 /* NOTREACHED */
423 } else {
424
425 u_int map;
426 int cnt;
427 printf("cpu_reset called on cpu#%d\n",cpuid);
428
429 map = other_cpus & ~ stopped_cpus;
430
431 if (map != 0) {
432 printf("cpu_reset: Stopping other CPUs\n");
433 stop_cpus(map); /* Stop all other CPUs */
434 }
435
436 if (cpuid == 0) {
437 DELAY(1000000);
438 cpu_reset_real();
439 /* NOTREACHED */
440 } else {
441 /* We are not BSP (CPU #0) */
442
443 cpu_reset_proxyid = cpuid;
444 cpustop_restartfunc = cpu_reset_proxy;
445 printf("cpu_reset: Restarting BSP\n");
446 started_cpus = (1<<0); /* Restart CPU #0 */
447
448 cnt = 0;
449 while (cpu_reset_proxy_active == 0 && cnt < 10000000)
450 cnt++; /* Wait for BSP to announce restart */
451 if (cpu_reset_proxy_active == 0)
452 printf("cpu_reset: Failed to restart BSP\n");
453 __asm __volatile("cli" : : : "memory");
454 cpu_reset_proxy_active = 2;
455 cnt = 0;
456 while (cpu_reset_proxy_active == 2 && cnt < 10000000)
457 cnt++; /* Do nothing */
458 if (cpu_reset_proxy_active == 2) {
459 printf("cpu_reset: BSP did not grab mp lock\n");
460 cpu_reset_real(); /* XXX: Bogus ? */
461 }
462 cpu_reset_proxy_active = 4;
463 __asm __volatile("sti" : : : "memory");
464 while (1);
465 /* NOTREACHED */
466 }
467 }
468 #else
469 cpu_reset_real();
470 #endif
471 }
472
473 static void
474 cpu_reset_real()
475 {
476
477 #ifdef PC98
478 /*
479 * Attempt to do a CPU reset via CPU reset port.
480 */
481 disable_intr();
482 if ((inb(0x35) & 0xa0) != 0xa0) {
483 outb(0x37, 0x0f); /* SHUT0 = 0. */
484 outb(0x37, 0x0b); /* SHUT1 = 0. */
485 }
486 outb(0xf0, 0x00); /* Reset. */
487 #else
488 /*
489 * Attempt to do a CPU reset via the keyboard controller,
490 * do not turn of the GateA20, as any machine that fails
491 * to do the reset here would then end up in no man's land.
492 */
493
494 #if !defined(BROKEN_KEYBOARD_RESET)
495 outb(IO_KBD + 4, 0xFE);
496 DELAY(500000); /* wait 0.5 sec to see if that did it */
497 printf("Keyboard reset did not work, attempting CPU shutdown\n");
498 DELAY(1000000); /* wait 1 sec for printf to complete */
499 #endif
500 #endif /* PC98 */
501 /* force a shutdown by unmapping entire address space ! */
502 bzero((caddr_t) PTD, PAGE_SIZE);
503
504 /* "good night, sweet prince .... <THUNK!>" */
505 invltlb();
506 /* NOTREACHED */
507 while(1);
508 }
509
510 #ifndef VM_STACK
511 /*
512 * Grow the user stack to allow for 'sp'. This version grows the stack in
513 * chunks of SGROWSIZ.
514 */
515 int
516 grow(p, sp)
517 struct proc *p;
518 u_int sp;
519 {
520 unsigned int nss;
521 caddr_t v;
522 struct vmspace *vm = p->p_vmspace;
523
524 if ((caddr_t)sp <= vm->vm_maxsaddr || sp >= USRSTACK)
525 return (1);
526
527 nss = roundup(USRSTACK - sp, PAGE_SIZE);
528
529 if (nss > p->p_rlimit[RLIMIT_STACK].rlim_cur)
530 return (0);
531
532 if (vm->vm_ssize && roundup(vm->vm_ssize << PAGE_SHIFT,
533 SGROWSIZ) < nss) {
534 int grow_amount;
535 /*
536 * If necessary, grow the VM that the stack occupies
537 * to allow for the rlimit. This allows us to not have
538 * to allocate all of the VM up-front in execve (which
539 * is expensive).
540 * Grow the VM by the amount requested rounded up to
541 * the nearest SGROWSIZ to provide for some hysteresis.
542 */
543 grow_amount = roundup((nss - (vm->vm_ssize << PAGE_SHIFT)), SGROWSIZ);
544 v = (char *)USRSTACK - roundup(vm->vm_ssize << PAGE_SHIFT,
545 SGROWSIZ) - grow_amount;
546 /*
547 * If there isn't enough room to extend by SGROWSIZ, then
548 * just extend to the maximum size
549 */
550 if (v < vm->vm_maxsaddr) {
551 v = vm->vm_maxsaddr;
552 grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT);
553 }
554 if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v,
555 grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) {
556 return (0);
557 }
558 vm->vm_ssize += grow_amount >> PAGE_SHIFT;
559 }
560
561 return (1);
562 }
563 #else
564 int
565 grow_stack(p, sp)
566 struct proc *p;
567 u_int sp;
568 {
569 int rv;
570
571 rv = vm_map_growstack (p, sp);
572 if (rv != KERN_SUCCESS)
573 return (0);
574
575 return (1);
576 }
577 #endif
578
579
580 static int cnt_prezero;
581
582 SYSCTL_INT(_vm_stats_misc, OID_AUTO,
583 cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
584
585 /*
586 * Implement the pre-zeroed page mechanism.
587 * This routine is called from the idle loop.
588 */
589 int
590 vm_page_zero_idle()
591 {
592 static int free_rover;
593 vm_page_t m;
594 int s;
595
596 /*
597 * XXX
598 * We stop zeroing pages when there are sufficent prezeroed pages.
599 * This threshold isn't really needed, except we want to
600 * bypass unneeded calls to vm_page_list_find, and the
601 * associated cache flush and latency. The pre-zero will
602 * still be called when there are significantly more
603 * non-prezeroed pages than zeroed pages. The threshold
604 * of half the number of reserved pages is arbitrary, but
605 * approximately the right amount. Eventually, we should
606 * perhaps interrupt the zero operation when a process
607 * is found to be ready to run.
608 */
609 if (cnt.v_free_count - vm_page_zero_count <= cnt.v_free_reserved / 2)
610 return (0);
611 #ifdef SMP
612 if (try_mplock()) {
613 #endif
614 s = splvm();
615 __asm __volatile("sti" : : : "memory");
616 m = vm_page_list_find(PQ_FREE, free_rover);
617 if (m != NULL) {
618 --(*vm_page_queues[m->queue].lcnt);
619 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq);
620 m->queue = PQ_NONE;
621 splx(s);
622 #if 0
623 rel_mplock();
624 #endif
625 pmap_zero_page(VM_PAGE_TO_PHYS(m));
626 #if 0
627 get_mplock();
628 #endif
629 (void)splvm();
630 m->queue = PQ_ZERO + m->pc;
631 ++(*vm_page_queues[m->queue].lcnt);
632 TAILQ_INSERT_HEAD(vm_page_queues[m->queue].pl, m,
633 pageq);
634 free_rover = (free_rover + PQ_PRIME3) & PQ_L2_MASK;
635 ++vm_page_zero_count;
636 ++cnt_prezero;
637 }
638 splx(s);
639 __asm __volatile("cli" : : : "memory");
640 #ifdef SMP
641 rel_mplock();
642 #endif
643 return (1);
644 #ifdef SMP
645 }
646 #endif
647 return (0);
648 }
649
650 /*
651 * Software interrupt handler for queued VM system processing.
652 */
653 void
654 swi_vm()
655 {
656 if (busdma_swi_pending != 0)
657 busdma_swi();
658 }
659
660 /*
661 * Tell whether this address is in some physical memory region.
662 * Currently used by the kernel coredump code in order to avoid
663 * dumping the ``ISA memory hole'' which could cause indefinite hangs,
664 * or other unpredictable behaviour.
665 */
666
667 #include "isa.h"
668
669 int
670 is_physical_memory(addr)
671 vm_offset_t addr;
672 {
673
674 #if NISA > 0
675 /* The ISA ``memory hole''. */
676 if (addr >= 0xa0000 && addr < 0x100000)
677 return 0;
678 #endif
679
680 /*
681 * stuff other tests for known memory-mapped devices (PCI?)
682 * here
683 */
684
685 return 1;
686 }
Cache object: 91703ba297a1d1f17958e65ba1565ef0
|