FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 * $FreeBSD: releng/5.0/sys/i386/i386/trap.c 106542 2002-11-07 01:34:23Z davidxu $
39 */
40
41 /*
42 * 386 Trap and System call handling
43 */
44
45 #include "opt_clock.h"
46 #include "opt_cpu.h"
47 #include "opt_ddb.h"
48 #include "opt_isa.h"
49 #include "opt_ktrace.h"
50 #include "opt_npx.h"
51 #include "opt_trap.h"
52
53 #include <sys/param.h>
54 #include <sys/bus.h>
55 #include <sys/systm.h>
56 #include <sys/proc.h>
57 #include <sys/kse.h>
58 #include <sys/pioctl.h>
59 #include <sys/kernel.h>
60 #include <sys/ktr.h>
61 #include <sys/lock.h>
62 #include <sys/mutex.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
65 #include <sys/syscall.h>
66 #include <sys/sysctl.h>
67 #include <sys/sysent.h>
68 #include <sys/uio.h>
69 #include <sys/vmmeter.h>
70 #ifdef KTRACE
71 #include <sys/ktrace.h>
72 #endif
73
74 #include <vm/vm.h>
75 #include <vm/vm_param.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_extern.h>
81
82 #include <machine/cpu.h>
83 #include <machine/md_var.h>
84 #include <machine/pcb.h>
85 #ifdef SMP
86 #include <machine/smp.h>
87 #endif
88 #include <machine/tss.h>
89
90 #include <i386/isa/icu.h>
91 #include <i386/isa/intr_machdep.h>
92
93 #ifdef POWERFAIL_NMI
94 #include <sys/syslog.h>
95 #include <machine/clock.h>
96 #endif
97
98 #include <machine/vm86.h>
99
100 #include <ddb/ddb.h>
101
102 #include <sys/sysctl.h>
103
104 int (*pmath_emulate)(struct trapframe *);
105
106 extern void trap(struct trapframe frame);
107 #ifdef I386_CPU
108 extern int trapwrite(unsigned addr);
109 #endif
110 extern void syscall(struct trapframe frame);
111
112 static int trap_pfault(struct trapframe *, int, vm_offset_t);
113 static void trap_fatal(struct trapframe *, vm_offset_t);
114 void dblfault_handler(void);
115
116 extern inthand_t IDTVEC(lcall_syscall);
117
118 #define MAX_TRAP_MSG 28
119 static char *trap_msg[] = {
120 "", /* 0 unused */
121 "privileged instruction fault", /* 1 T_PRIVINFLT */
122 "", /* 2 unused */
123 "breakpoint instruction fault", /* 3 T_BPTFLT */
124 "", /* 4 unused */
125 "", /* 5 unused */
126 "arithmetic trap", /* 6 T_ARITHTRAP */
127 "", /* 7 unused */
128 "", /* 8 unused */
129 "general protection fault", /* 9 T_PROTFLT */
130 "trace trap", /* 10 T_TRCTRAP */
131 "", /* 11 unused */
132 "page fault", /* 12 T_PAGEFLT */
133 "", /* 13 unused */
134 "alignment fault", /* 14 T_ALIGNFLT */
135 "", /* 15 unused */
136 "", /* 16 unused */
137 "", /* 17 unused */
138 "integer divide fault", /* 18 T_DIVIDE */
139 "non-maskable interrupt trap", /* 19 T_NMI */
140 "overflow trap", /* 20 T_OFLOW */
141 "FPU bounds check fault", /* 21 T_BOUND */
142 "FPU device not available", /* 22 T_DNA */
143 "double fault", /* 23 T_DOUBLEFLT */
144 "FPU operand fetch fault", /* 24 T_FPOPFLT */
145 "invalid TSS fault", /* 25 T_TSSFLT */
146 "segment not present fault", /* 26 T_SEGNPFLT */
147 "stack fault", /* 27 T_STKFLT */
148 "machine check trap", /* 28 T_MCHK */
149 };
150
151 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
152 extern int has_f00f_bug;
153 #endif
154
155 #ifdef DDB
156 static int ddb_on_nmi = 1;
157 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW,
158 &ddb_on_nmi, 0, "Go to DDB on NMI");
159 #endif
160 static int panic_on_nmi = 1;
161 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
162 &panic_on_nmi, 0, "Panic on NMI");
163
164 #ifdef WITNESS
165 extern char *syscallnames[];
166 #endif
167
168 #ifdef DEVICE_POLLING
169 extern u_int32_t poll_in_trap;
170 extern int ether_poll(int count);
171 #endif /* DEVICE_POLLING */
172
173 /*
174 * Exception, fault, and trap interface to the FreeBSD kernel.
175 * This common code is called from assembly language IDT gate entry
176 * routines that prepare a suitable stack frame, and restore this
177 * frame after the exception has been processed.
178 */
179
180 void
181 trap(frame)
182 struct trapframe frame;
183 {
184 struct thread *td = curthread;
185 struct proc *p = td->td_proc;
186 u_int sticks = 0;
187 int i = 0, ucode = 0, type, code;
188 vm_offset_t eva;
189 #ifdef POWERFAIL_NMI
190 static int lastalert = 0;
191 #endif
192
193 atomic_add_int(&cnt.v_trap, 1);
194 type = frame.tf_trapno;
195
196 #ifdef DDB
197 if (db_active) {
198 eva = (type == T_PAGEFLT ? rcr2() : 0);
199 trap_fatal(&frame, eva);
200 goto out;
201 }
202 #endif
203
204 if ((frame.tf_eflags & PSL_I) == 0) {
205 /*
206 * Buggy application or kernel code has disabled
207 * interrupts and then trapped. Enabling interrupts
208 * now is wrong, but it is better than running with
209 * interrupts disabled until they are accidentally
210 * enabled later.
211 */
212 if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
213 printf(
214 "pid %ld (%s): trap %d with interrupts disabled\n",
215 (long)curproc->p_pid, curproc->p_comm, type);
216 else if (type != T_BPTFLT && type != T_TRCTRAP) {
217 /*
218 * XXX not quite right, since this may be for a
219 * multiple fault in user mode.
220 */
221 printf("kernel trap %d with interrupts disabled\n",
222 type);
223 /*
224 * Page faults need interrupts diasabled until later,
225 * and we shouldn't enable interrupts while holding a
226 * spin lock.
227 */
228 if (type != T_PAGEFLT && PCPU_GET(spinlocks) == NULL)
229 enable_intr();
230 }
231 }
232
233 eva = 0;
234 code = frame.tf_err;
235 if (type == T_PAGEFLT) {
236 /*
237 * For some Cyrix CPUs, %cr2 is clobbered by
238 * interrupts. This problem is worked around by using
239 * an interrupt gate for the pagefault handler. We
240 * are finally ready to read %cr2 and then must
241 * reenable interrupts.
242 *
243 * If we get a page fault while holding a spin lock, then
244 * it is most likely a fatal kernel page fault. The kernel
245 * is already going to panic trying to get a sleep lock to
246 * do the VM lookup, so just consider it a fatal trap so the
247 * kernel can print out a useful trap message and even get
248 * to the debugger.
249 */
250 eva = rcr2();
251 if (PCPU_GET(spinlocks) == NULL)
252 enable_intr();
253 else
254 trap_fatal(&frame, eva);
255 }
256
257 #ifdef DEVICE_POLLING
258 if (poll_in_trap)
259 ether_poll(poll_in_trap);
260 #endif /* DEVICE_POLLING */
261
262 if ((ISPL(frame.tf_cs) == SEL_UPL) ||
263 ((frame.tf_eflags & PSL_VM) &&
264 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
265 /* user trap */
266
267 sticks = td->td_kse->ke_sticks;
268 td->td_frame = &frame;
269 if (td->td_ucred != p->p_ucred)
270 cred_update_thread(td);
271
272 /*
273 * First check that we shouldn't just abort.
274 * But check if we are the single thread first!
275 * XXX p_singlethread not locked, but should be safe.
276 */
277 if ((p->p_flag & P_WEXIT) && (p->p_singlethread != td)) {
278 PROC_LOCK(p);
279 mtx_lock_spin(&sched_lock);
280 thread_exit();
281 /* NOTREACHED */
282 }
283
284 switch (type) {
285 case T_PRIVINFLT: /* privileged instruction fault */
286 ucode = type;
287 i = SIGILL;
288 break;
289
290 case T_BPTFLT: /* bpt instruction fault */
291 case T_TRCTRAP: /* trace trap */
292 frame.tf_eflags &= ~PSL_T;
293 i = SIGTRAP;
294 break;
295
296 case T_ARITHTRAP: /* arithmetic trap */
297 #ifdef DEV_NPX
298 ucode = npxtrap();
299 if (ucode == -1)
300 goto userout;
301 #else
302 ucode = code;
303 #endif
304 i = SIGFPE;
305 break;
306
307 /*
308 * The following two traps can happen in
309 * vm86 mode, and, if so, we want to handle
310 * them specially.
311 */
312 case T_PROTFLT: /* general protection fault */
313 case T_STKFLT: /* stack fault */
314 if (frame.tf_eflags & PSL_VM) {
315 i = vm86_emulate((struct vm86frame *)&frame);
316 if (i == 0)
317 goto user;
318 break;
319 }
320 /* FALLTHROUGH */
321
322 case T_SEGNPFLT: /* segment not present fault */
323 case T_TSSFLT: /* invalid TSS fault */
324 case T_DOUBLEFLT: /* double fault */
325 default:
326 ucode = code + BUS_SEGM_FAULT ;
327 i = SIGBUS;
328 break;
329
330 case T_PAGEFLT: /* page fault */
331 i = trap_pfault(&frame, TRUE, eva);
332 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
333 if (i == -2) {
334 /*
335 * The f00f hack workaround has triggered, so
336 * treat the fault as an illegal instruction
337 * (T_PRIVINFLT) instead of a page fault.
338 */
339 type = frame.tf_trapno = T_PRIVINFLT;
340
341 /* Proceed as in that case. */
342 ucode = type;
343 i = SIGILL;
344 break;
345 }
346 #endif
347 if (i == -1)
348 goto userout;
349 if (i == 0)
350 goto user;
351
352 ucode = T_PAGEFLT;
353 break;
354
355 case T_DIVIDE: /* integer divide fault */
356 ucode = FPE_INTDIV;
357 i = SIGFPE;
358 break;
359
360 #ifdef DEV_ISA
361 case T_NMI:
362 #ifdef POWERFAIL_NMI
363 #ifndef TIMER_FREQ
364 # define TIMER_FREQ 1193182
365 #endif
366 mtx_lock(&Giant);
367 if (time_second - lastalert > 10) {
368 log(LOG_WARNING, "NMI: power fail\n");
369 sysbeep(TIMER_FREQ/880, hz);
370 lastalert = time_second;
371 }
372 mtx_unlock(&Giant);
373 goto userout;
374 #else /* !POWERFAIL_NMI */
375 /* machine/parity/power fail/"kitchen sink" faults */
376 /* XXX Giant */
377 if (isa_nmi(code) == 0) {
378 #ifdef DDB
379 /*
380 * NMI can be hooked up to a pushbutton
381 * for debugging.
382 */
383 if (ddb_on_nmi) {
384 printf ("NMI ... going to debugger\n");
385 kdb_trap (type, 0, &frame);
386 }
387 #endif /* DDB */
388 goto userout;
389 } else if (panic_on_nmi)
390 panic("NMI indicates hardware failure");
391 break;
392 #endif /* POWERFAIL_NMI */
393 #endif /* DEV_ISA */
394
395 case T_OFLOW: /* integer overflow fault */
396 ucode = FPE_INTOVF;
397 i = SIGFPE;
398 break;
399
400 case T_BOUND: /* bounds check fault */
401 ucode = FPE_FLTSUB;
402 i = SIGFPE;
403 break;
404
405 case T_DNA:
406 #ifdef DEV_NPX
407 /* transparent fault (due to context switch "late") */
408 if (npxdna())
409 goto userout;
410 #endif
411 if (!pmath_emulate) {
412 i = SIGFPE;
413 ucode = FPE_FPU_NP_TRAP;
414 break;
415 }
416 mtx_lock(&Giant);
417 i = (*pmath_emulate)(&frame);
418 mtx_unlock(&Giant);
419 if (i == 0) {
420 if (!(frame.tf_eflags & PSL_T))
421 goto userout;
422 frame.tf_eflags &= ~PSL_T;
423 i = SIGTRAP;
424 }
425 /* else ucode = emulator_only_knows() XXX */
426 break;
427
428 case T_FPOPFLT: /* FPU operand fetch fault */
429 ucode = T_FPOPFLT;
430 i = SIGILL;
431 break;
432
433 case T_XMMFLT: /* SIMD floating-point exception */
434 ucode = 0; /* XXX */
435 i = SIGFPE;
436 break;
437 }
438 } else {
439 /* kernel trap */
440
441 KASSERT(cold || td->td_ucred != NULL,
442 ("kernel trap doesn't have ucred"));
443 switch (type) {
444 case T_PAGEFLT: /* page fault */
445 (void) trap_pfault(&frame, FALSE, eva);
446 goto out;
447
448 case T_DNA:
449 #ifdef DEV_NPX
450 /*
451 * The kernel is apparently using npx for copying.
452 * XXX this should be fatal unless the kernel has
453 * registered such use.
454 */
455 if (npxdna())
456 goto out;
457 #endif
458 break;
459
460 /*
461 * The following two traps can happen in
462 * vm86 mode, and, if so, we want to handle
463 * them specially.
464 */
465 case T_PROTFLT: /* general protection fault */
466 case T_STKFLT: /* stack fault */
467 if (frame.tf_eflags & PSL_VM) {
468 i = vm86_emulate((struct vm86frame *)&frame);
469 if (i != 0)
470 /*
471 * returns to original process
472 */
473 vm86_trap((struct vm86frame *)&frame);
474 goto out;
475 }
476 if (type == T_STKFLT)
477 break;
478
479 /* FALL THROUGH */
480
481 case T_SEGNPFLT: /* segment not present fault */
482 if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
483 break;
484
485 if (td->td_intr_nesting_level != 0)
486 break;
487
488 /*
489 * Invalid %fs's and %gs's can be created using
490 * procfs or PT_SETREGS or by invalidating the
491 * underlying LDT entry. This causes a fault
492 * in kernel mode when the kernel attempts to
493 * switch contexts. Lose the bad context
494 * (XXX) so that we can continue, and generate
495 * a signal.
496 */
497 if (frame.tf_eip == (int)cpu_switch_load_gs) {
498 PCPU_GET(curpcb)->pcb_gs = 0;
499 PROC_LOCK(p);
500 psignal(p, SIGBUS);
501 PROC_UNLOCK(p);
502 goto out;
503 }
504
505 /*
506 * Invalid segment selectors and out of bounds
507 * %eip's and %esp's can be set up in user mode.
508 * This causes a fault in kernel mode when the
509 * kernel tries to return to user mode. We want
510 * to get this fault so that we can fix the
511 * problem here and not have to check all the
512 * selectors and pointers when the user changes
513 * them.
514 */
515 if (frame.tf_eip == (int)doreti_iret) {
516 frame.tf_eip = (int)doreti_iret_fault;
517 goto out;
518 }
519 if (frame.tf_eip == (int)doreti_popl_ds) {
520 frame.tf_eip = (int)doreti_popl_ds_fault;
521 goto out;
522 }
523 if (frame.tf_eip == (int)doreti_popl_es) {
524 frame.tf_eip = (int)doreti_popl_es_fault;
525 goto out;
526 }
527 if (frame.tf_eip == (int)doreti_popl_fs) {
528 frame.tf_eip = (int)doreti_popl_fs_fault;
529 goto out;
530 }
531 if (PCPU_GET(curpcb) != NULL &&
532 PCPU_GET(curpcb)->pcb_onfault != NULL) {
533 frame.tf_eip =
534 (int)PCPU_GET(curpcb)->pcb_onfault;
535 goto out;
536 }
537 break;
538
539 case T_TSSFLT:
540 /*
541 * PSL_NT can be set in user mode and isn't cleared
542 * automatically when the kernel is entered. This
543 * causes a TSS fault when the kernel attempts to
544 * `iret' because the TSS link is uninitialized. We
545 * want to get this fault so that we can fix the
546 * problem here and not every time the kernel is
547 * entered.
548 */
549 if (frame.tf_eflags & PSL_NT) {
550 frame.tf_eflags &= ~PSL_NT;
551 goto out;
552 }
553 break;
554
555 case T_TRCTRAP: /* trace trap */
556 if (frame.tf_eip == (int)IDTVEC(lcall_syscall)) {
557 /*
558 * We've just entered system mode via the
559 * syscall lcall. Continue single stepping
560 * silently until the syscall handler has
561 * saved the flags.
562 */
563 goto out;
564 }
565 if (frame.tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
566 /*
567 * The syscall handler has now saved the
568 * flags. Stop single stepping it.
569 */
570 frame.tf_eflags &= ~PSL_T;
571 goto out;
572 }
573 /*
574 * Ignore debug register trace traps due to
575 * accesses in the user's address space, which
576 * can happen under several conditions such as
577 * if a user sets a watchpoint on a buffer and
578 * then passes that buffer to a system call.
579 * We still want to get TRCTRAPS for addresses
580 * in kernel space because that is useful when
581 * debugging the kernel.
582 */
583 /* XXX Giant */
584 if (user_dbreg_trap() &&
585 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
586 /*
587 * Reset breakpoint bits because the
588 * processor doesn't
589 */
590 load_dr6(rdr6() & 0xfffffff0);
591 goto out;
592 }
593 /*
594 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
595 */
596 case T_BPTFLT:
597 /*
598 * If DDB is enabled, let it handle the debugger trap.
599 * Otherwise, debugger traps "can't happen".
600 */
601 #ifdef DDB
602 /* XXX Giant */
603 if (kdb_trap (type, 0, &frame))
604 goto out;
605 #endif
606 break;
607
608 #ifdef DEV_ISA
609 case T_NMI:
610 #ifdef POWERFAIL_NMI
611 mtx_lock(&Giant);
612 if (time_second - lastalert > 10) {
613 log(LOG_WARNING, "NMI: power fail\n");
614 sysbeep(TIMER_FREQ/880, hz);
615 lastalert = time_second;
616 }
617 mtx_unlock(&Giant);
618 goto out;
619 #else /* !POWERFAIL_NMI */
620 /* XXX Giant */
621 /* machine/parity/power fail/"kitchen sink" faults */
622 if (isa_nmi(code) == 0) {
623 #ifdef DDB
624 /*
625 * NMI can be hooked up to a pushbutton
626 * for debugging.
627 */
628 if (ddb_on_nmi) {
629 printf ("NMI ... going to debugger\n");
630 kdb_trap (type, 0, &frame);
631 }
632 #endif /* DDB */
633 goto out;
634 } else if (panic_on_nmi == 0)
635 goto out;
636 /* FALLTHROUGH */
637 #endif /* POWERFAIL_NMI */
638 #endif /* DEV_ISA */
639 }
640
641 trap_fatal(&frame, eva);
642 goto out;
643 }
644
645 /* Translate fault for emulators (e.g. Linux) */
646 if (*p->p_sysent->sv_transtrap)
647 i = (*p->p_sysent->sv_transtrap)(i, type);
648
649 trapsignal(p, i, ucode);
650
651 #ifdef DEBUG
652 if (type <= MAX_TRAP_MSG) {
653 uprintf("fatal process exception: %s",
654 trap_msg[type]);
655 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
656 uprintf(", fault VA = 0x%lx", (u_long)eva);
657 uprintf("\n");
658 }
659 #endif
660
661 user:
662 userret(td, &frame, sticks);
663 mtx_assert(&Giant, MA_NOTOWNED);
664 userout:
665 #ifdef DIAGNOSTIC
666 cred_free_thread(td);
667 #endif
668 out:
669 return;
670 }
671
672 static int
673 trap_pfault(frame, usermode, eva)
674 struct trapframe *frame;
675 int usermode;
676 vm_offset_t eva;
677 {
678 vm_offset_t va;
679 struct vmspace *vm = NULL;
680 vm_map_t map = 0;
681 int rv = 0;
682 vm_prot_t ftype;
683 struct thread *td = curthread;
684 struct proc *p = td->td_proc;
685
686 va = trunc_page(eva);
687 if (va >= KERNBASE) {
688 /*
689 * Don't allow user-mode faults in kernel address space.
690 * An exception: if the faulting address is the invalid
691 * instruction entry in the IDT, then the Intel Pentium
692 * F00F bug workaround was triggered, and we need to
693 * treat it is as an illegal instruction, and not a page
694 * fault.
695 */
696 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
697 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
698 return -2;
699 #endif
700 if (usermode)
701 goto nogo;
702
703 map = kernel_map;
704 } else {
705 /*
706 * This is a fault on non-kernel virtual memory.
707 * vm is initialized above to NULL. If curproc is NULL
708 * or curproc->p_vmspace is NULL the fault is fatal.
709 */
710 if (p != NULL)
711 vm = p->p_vmspace;
712
713 if (vm == NULL)
714 goto nogo;
715
716 map = &vm->vm_map;
717 }
718
719 if (frame->tf_err & PGEX_W)
720 ftype = VM_PROT_WRITE;
721 else
722 ftype = VM_PROT_READ;
723
724 if (map != kernel_map) {
725 /*
726 * Keep swapout from messing with us during this
727 * critical time.
728 */
729 PROC_LOCK(p);
730 ++p->p_lock;
731 PROC_UNLOCK(p);
732
733 /* Fault in the user page: */
734 rv = vm_fault(map, va, ftype,
735 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
736 : VM_FAULT_NORMAL);
737
738 PROC_LOCK(p);
739 --p->p_lock;
740 PROC_UNLOCK(p);
741 } else {
742 /*
743 * Don't have to worry about process locking or stacks in the
744 * kernel.
745 */
746 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
747 }
748 if (rv == KERN_SUCCESS)
749 return (0);
750 nogo:
751 if (!usermode) {
752 if (td->td_intr_nesting_level == 0 &&
753 PCPU_GET(curpcb) != NULL &&
754 PCPU_GET(curpcb)->pcb_onfault != NULL) {
755 frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
756 return (0);
757 }
758 trap_fatal(frame, eva);
759 return (-1);
760 }
761
762 /* kludge to pass faulting virtual address to sendsig */
763 frame->tf_err = eva;
764
765 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
766 }
767
768 static void
769 trap_fatal(frame, eva)
770 struct trapframe *frame;
771 vm_offset_t eva;
772 {
773 int code, type, ss, esp;
774 struct soft_segment_descriptor softseg;
775
776 code = frame->tf_err;
777 type = frame->tf_trapno;
778 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
779
780 if (type <= MAX_TRAP_MSG)
781 printf("\n\nFatal trap %d: %s while in %s mode\n",
782 type, trap_msg[type],
783 frame->tf_eflags & PSL_VM ? "vm86" :
784 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
785 #ifdef SMP
786 /* two separate prints in case of a trap on an unmapped page */
787 printf("cpuid = %d; ", PCPU_GET(cpuid));
788 printf("lapic.id = %08x\n", lapic.id);
789 #endif
790 if (type == T_PAGEFLT) {
791 printf("fault virtual address = 0x%x\n", eva);
792 printf("fault code = %s %s, %s\n",
793 code & PGEX_U ? "user" : "supervisor",
794 code & PGEX_W ? "write" : "read",
795 code & PGEX_P ? "protection violation" : "page not present");
796 }
797 printf("instruction pointer = 0x%x:0x%x\n",
798 frame->tf_cs & 0xffff, frame->tf_eip);
799 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
800 ss = frame->tf_ss & 0xffff;
801 esp = frame->tf_esp;
802 } else {
803 ss = GSEL(GDATA_SEL, SEL_KPL);
804 esp = (int)&frame->tf_esp;
805 }
806 printf("stack pointer = 0x%x:0x%x\n", ss, esp);
807 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp);
808 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n",
809 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
810 printf(" = DPL %d, pres %d, def32 %d, gran %d\n",
811 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
812 softseg.ssd_gran);
813 printf("processor eflags = ");
814 if (frame->tf_eflags & PSL_T)
815 printf("trace trap, ");
816 if (frame->tf_eflags & PSL_I)
817 printf("interrupt enabled, ");
818 if (frame->tf_eflags & PSL_NT)
819 printf("nested task, ");
820 if (frame->tf_eflags & PSL_RF)
821 printf("resume, ");
822 if (frame->tf_eflags & PSL_VM)
823 printf("vm86, ");
824 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
825 printf("current process = ");
826 if (curproc) {
827 printf("%lu (%s)\n",
828 (u_long)curproc->p_pid, curproc->p_comm ?
829 curproc->p_comm : "");
830 } else {
831 printf("Idle\n");
832 }
833
834 #ifdef KDB
835 if (kdb_trap(&psl))
836 return;
837 #endif
838 #ifdef DDB
839 if ((debugger_on_panic || db_active) && kdb_trap(type, 0, frame))
840 return;
841 #endif
842 printf("trap number = %d\n", type);
843 if (type <= MAX_TRAP_MSG)
844 panic("%s", trap_msg[type]);
845 else
846 panic("unknown/reserved trap");
847 }
848
849 /*
850 * Double fault handler. Called when a fault occurs while writing
851 * a frame for a trap/exception onto the stack. This usually occurs
852 * when the stack overflows (such is the case with infinite recursion,
853 * for example).
854 *
855 * XXX Note that the current PTD gets replaced by IdlePTD when the
856 * task switch occurs. This means that the stack that was active at
857 * the time of the double fault is not available at <kstack> unless
858 * the machine was idle when the double fault occurred. The downside
859 * of this is that "trace <ebp>" in ddb won't work.
860 */
861 void
862 dblfault_handler()
863 {
864 printf("\nFatal double fault:\n");
865 printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
866 printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
867 printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
868 #ifdef SMP
869 /* two separate prints in case of a trap on an unmapped page */
870 printf("cpuid = %d; ", PCPU_GET(cpuid));
871 printf("lapic.id = %08x\n", lapic.id);
872 #endif
873 panic("double fault");
874 }
875
876 #ifdef I386_CPU
877 /*
878 * Compensate for 386 brain damage (missing URKR).
879 * This is a little simpler than the pagefault handler in trap() because
880 * it the page tables have already been faulted in and high addresses
881 * are thrown out early for other reasons.
882 */
883 int trapwrite(addr)
884 unsigned addr;
885 {
886 struct thread *td;
887 struct proc *p;
888 vm_offset_t va;
889 struct vmspace *vm;
890 int rv;
891
892 va = trunc_page((vm_offset_t)addr);
893 /*
894 * XXX - MAX is END. Changed > to >= for temp. fix.
895 */
896 if (va >= VM_MAXUSER_ADDRESS)
897 return (1);
898
899 td = curthread;
900 p = td->td_proc;
901 vm = p->p_vmspace;
902
903 PROC_LOCK(p);
904 ++p->p_lock;
905 PROC_UNLOCK(p);
906
907 /*
908 * fault the data page
909 */
910 rv = vm_fault(&vm->vm_map, va, VM_PROT_WRITE, VM_FAULT_DIRTY);
911
912 PROC_LOCK(p);
913 --p->p_lock;
914 PROC_UNLOCK(p);
915
916 if (rv != KERN_SUCCESS)
917 return 1;
918
919 return (0);
920 }
921 #endif
922
923 /*
924 * syscall - system call request C handler
925 *
926 * A system call is essentially treated as a trap.
927 */
928 void
929 syscall(frame)
930 struct trapframe frame;
931 {
932 caddr_t params;
933 struct sysent *callp;
934 struct thread *td = curthread;
935 struct proc *p = td->td_proc;
936 register_t orig_tf_eflags;
937 u_int sticks;
938 int error;
939 int narg;
940 int args[8];
941 u_int code;
942
943 /*
944 * note: PCPU_LAZY_INC() can only be used if we can afford
945 * occassional inaccuracy in the count.
946 */
947 PCPU_LAZY_INC(cnt.v_syscall);
948
949 #ifdef DIAGNOSTIC
950 if (ISPL(frame.tf_cs) != SEL_UPL) {
951 mtx_lock(&Giant); /* try to stabilize the system XXX */
952 panic("syscall");
953 /* NOT REACHED */
954 mtx_unlock(&Giant);
955 }
956 #endif
957 KASSERT((td->td_kse != NULL), ("syscall: kse/thread UNLINKED"));
958 KASSERT((td->td_kse->ke_thread == td), ("syscall:kse/thread mismatch"));
959
960 sticks = td->td_kse->ke_sticks;
961 td->td_frame = &frame;
962 if (td->td_ucred != p->p_ucred)
963 cred_update_thread(td);
964 if (p->p_flag & P_KSES)
965 thread_user_enter(p, td);
966 params = (caddr_t)frame.tf_esp + sizeof(int);
967 code = frame.tf_eax;
968 orig_tf_eflags = frame.tf_eflags;
969
970 if (p->p_sysent->sv_prepsyscall) {
971 /*
972 * The prep code is MP aware.
973 */
974 (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms);
975 } else {
976 /*
977 * Need to check if this is a 32 bit or 64 bit syscall.
978 * fuword is MP aware.
979 */
980 if (code == SYS_syscall) {
981 /*
982 * Code is first argument, followed by actual args.
983 */
984 code = fuword(params);
985 params += sizeof(int);
986 } else if (code == SYS___syscall) {
987 /*
988 * Like syscall, but code is a quad, so as to maintain
989 * quad alignment for the rest of the arguments.
990 */
991 code = fuword(params);
992 params += sizeof(quad_t);
993 }
994 }
995
996 if (p->p_sysent->sv_mask)
997 code &= p->p_sysent->sv_mask;
998
999 if (code >= p->p_sysent->sv_size)
1000 callp = &p->p_sysent->sv_table[0];
1001 else
1002 callp = &p->p_sysent->sv_table[code];
1003
1004 narg = callp->sy_narg & SYF_ARGMASK;
1005
1006 /*
1007 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
1008 */
1009 if (params != NULL && narg != 0)
1010 error = copyin(params, (caddr_t)args,
1011 (u_int)(narg * sizeof(int)));
1012 else
1013 error = 0;
1014
1015 #ifdef KTRACE
1016 if (KTRPOINT(td, KTR_SYSCALL))
1017 ktrsyscall(code, narg, args);
1018 #endif
1019
1020 /*
1021 * Try to run the syscall without Giant if the syscall
1022 * is MP safe.
1023 */
1024 if ((callp->sy_narg & SYF_MPSAFE) == 0)
1025 mtx_lock(&Giant);
1026
1027 if (error == 0) {
1028 td->td_retval[0] = 0;
1029 td->td_retval[1] = frame.tf_edx;
1030
1031 STOPEVENT(p, S_SCE, narg);
1032
1033 error = (*callp->sy_call)(td, args);
1034 }
1035
1036 switch (error) {
1037 case 0:
1038 frame.tf_eax = td->td_retval[0];
1039 frame.tf_edx = td->td_retval[1];
1040 frame.tf_eflags &= ~PSL_C;
1041 break;
1042
1043 case ERESTART:
1044 /*
1045 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
1046 * int 0x80 is 2 bytes. We saved this in tf_err.
1047 */
1048 frame.tf_eip -= frame.tf_err;
1049 break;
1050
1051 case EJUSTRETURN:
1052 break;
1053
1054 default:
1055 if (p->p_sysent->sv_errsize) {
1056 if (error >= p->p_sysent->sv_errsize)
1057 error = -1; /* XXX */
1058 else
1059 error = p->p_sysent->sv_errtbl[error];
1060 }
1061 frame.tf_eax = error;
1062 frame.tf_eflags |= PSL_C;
1063 break;
1064 }
1065
1066 /*
1067 * Release Giant if we previously set it.
1068 */
1069 if ((callp->sy_narg & SYF_MPSAFE) == 0)
1070 mtx_unlock(&Giant);
1071
1072 /*
1073 * Traced syscall.
1074 */
1075 if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
1076 frame.tf_eflags &= ~PSL_T;
1077 trapsignal(p, SIGTRAP, 0);
1078 }
1079
1080 /*
1081 * Handle reschedule and other end-of-syscall issues
1082 */
1083 userret(td, &frame, sticks);
1084
1085 #ifdef KTRACE
1086 if (KTRPOINT(td, KTR_SYSRET))
1087 ktrsysret(code, error, td->td_retval[0]);
1088 #endif
1089
1090 /*
1091 * This works because errno is findable through the
1092 * register set. If we ever support an emulation where this
1093 * is not the case, this code will need to be revisited.
1094 */
1095 STOPEVENT(p, S_SCX, code);
1096
1097 #ifdef DIAGNOSTIC
1098 cred_free_thread(td);
1099 #endif
1100
1101 #ifdef WITNESS
1102 if (witness_list(td)) {
1103 panic("system call %s returning with mutex(s) held\n",
1104 syscallnames[code]);
1105 }
1106 #endif
1107 mtx_assert(&sched_lock, MA_NOTOWNED);
1108 mtx_assert(&Giant, MA_NOTOWNED);
1109 }
1110
Cache object: 56e2645b120c3923eb88d877eeb98c0a
|