FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/9.1/sys/i386/i386/trap.c 236238 2012-05-29 14:50:21Z fabient $");
42
43 /*
44 * 386 Trap and System call handling
45 */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_hwpmc_hooks.h"
50 #include "opt_isa.h"
51 #include "opt_kdb.h"
52 #include "opt_kdtrace.h"
53 #include "opt_npx.h"
54 #include "opt_trap.h"
55
56 #include <sys/param.h>
57 #include <sys/bus.h>
58 #include <sys/systm.h>
59 #include <sys/proc.h>
60 #include <sys/pioctl.h>
61 #include <sys/ptrace.h>
62 #include <sys/kdb.h>
63 #include <sys/kernel.h>
64 #include <sys/ktr.h>
65 #include <sys/lock.h>
66 #include <sys/mutex.h>
67 #include <sys/resourcevar.h>
68 #include <sys/signalvar.h>
69 #include <sys/syscall.h>
70 #include <sys/sysctl.h>
71 #include <sys/sysent.h>
72 #include <sys/uio.h>
73 #include <sys/vmmeter.h>
74 #ifdef HWPMC_HOOKS
75 #include <sys/pmckern.h>
76 PMC_SOFT_DEFINE( , , page_fault, all);
77 PMC_SOFT_DEFINE( , , page_fault, read);
78 PMC_SOFT_DEFINE( , , page_fault, write);
79 #endif
80 #include <security/audit/audit.h>
81
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_kern.h>
86 #include <vm/vm_map.h>
87 #include <vm/vm_page.h>
88 #include <vm/vm_extern.h>
89
90 #include <machine/cpu.h>
91 #include <machine/intr_machdep.h>
92 #include <x86/mca.h>
93 #include <machine/md_var.h>
94 #include <machine/pcb.h>
95 #ifdef SMP
96 #include <machine/smp.h>
97 #endif
98 #include <machine/tss.h>
99 #include <machine/vm86.h>
100
101 #ifdef POWERFAIL_NMI
102 #include <sys/syslog.h>
103 #include <machine/clock.h>
104 #endif
105
106 #ifdef KDTRACE_HOOKS
107 #include <sys/dtrace_bsd.h>
108
109 /*
110 * This is a hook which is initialised by the dtrace module
111 * to handle traps which might occur during DTrace probe
112 * execution.
113 */
114 dtrace_trap_func_t dtrace_trap_func;
115
116 dtrace_doubletrap_func_t dtrace_doubletrap_func;
117
118 /*
119 * This is a hook which is initialised by the systrace module
120 * when it is loaded. This keeps the DTrace syscall provider
121 * implementation opaque.
122 */
123 systrace_probe_func_t systrace_probe_func;
124
125 /*
126 * These hooks are necessary for the pid, usdt and fasttrap providers.
127 */
128 dtrace_fasttrap_probe_ptr_t dtrace_fasttrap_probe_ptr;
129 dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr;
130 dtrace_return_probe_ptr_t dtrace_return_probe_ptr;
131 #endif
132
133 extern void trap(struct trapframe *frame);
134 extern void syscall(struct trapframe *frame);
135
136 static int trap_pfault(struct trapframe *, int, vm_offset_t);
137 static void trap_fatal(struct trapframe *, vm_offset_t);
138 void dblfault_handler(void);
139
140 extern inthand_t IDTVEC(lcall_syscall);
141
142 #define MAX_TRAP_MSG 33
143 static char *trap_msg[] = {
144 "", /* 0 unused */
145 "privileged instruction fault", /* 1 T_PRIVINFLT */
146 "", /* 2 unused */
147 "breakpoint instruction fault", /* 3 T_BPTFLT */
148 "", /* 4 unused */
149 "", /* 5 unused */
150 "arithmetic trap", /* 6 T_ARITHTRAP */
151 "", /* 7 unused */
152 "", /* 8 unused */
153 "general protection fault", /* 9 T_PROTFLT */
154 "trace trap", /* 10 T_TRCTRAP */
155 "", /* 11 unused */
156 "page fault", /* 12 T_PAGEFLT */
157 "", /* 13 unused */
158 "alignment fault", /* 14 T_ALIGNFLT */
159 "", /* 15 unused */
160 "", /* 16 unused */
161 "", /* 17 unused */
162 "integer divide fault", /* 18 T_DIVIDE */
163 "non-maskable interrupt trap", /* 19 T_NMI */
164 "overflow trap", /* 20 T_OFLOW */
165 "FPU bounds check fault", /* 21 T_BOUND */
166 "FPU device not available", /* 22 T_DNA */
167 "double fault", /* 23 T_DOUBLEFLT */
168 "FPU operand fetch fault", /* 24 T_FPOPFLT */
169 "invalid TSS fault", /* 25 T_TSSFLT */
170 "segment not present fault", /* 26 T_SEGNPFLT */
171 "stack fault", /* 27 T_STKFLT */
172 "machine check trap", /* 28 T_MCHK */
173 "SIMD floating-point exception", /* 29 T_XMMFLT */
174 "reserved (unknown) fault", /* 30 T_RESERVED */
175 "", /* 31 unused (reserved) */
176 "DTrace pid return trap", /* 32 T_DTRACE_RET */
177 "DTrace fasttrap probe trap", /* 33 T_DTRACE_PROBE */
178
179 };
180
181 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
182 extern int has_f00f_bug;
183 #endif
184
185 #ifdef KDB
186 static int kdb_on_nmi = 1;
187 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
188 &kdb_on_nmi, 0, "Go to KDB on NMI");
189 TUNABLE_INT("machdep.kdb_on_nmi", &kdb_on_nmi);
190 #endif
191 static int panic_on_nmi = 1;
192 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
193 &panic_on_nmi, 0, "Panic on NMI");
194 TUNABLE_INT("machdep.panic_on_nmi", &panic_on_nmi);
195 static int prot_fault_translation = 0;
196 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
197 &prot_fault_translation, 0, "Select signal to deliver on protection fault");
198
199 /*
200 * Exception, fault, and trap interface to the FreeBSD kernel.
201 * This common code is called from assembly language IDT gate entry
202 * routines that prepare a suitable stack frame, and restore this
203 * frame after the exception has been processed.
204 */
205
206 void
207 trap(struct trapframe *frame)
208 {
209 struct thread *td = curthread;
210 struct proc *p = td->td_proc;
211 int i = 0, ucode = 0, code;
212 u_int type;
213 register_t addr = 0;
214 vm_offset_t eva;
215 ksiginfo_t ksi;
216 #ifdef POWERFAIL_NMI
217 static int lastalert = 0;
218 #endif
219
220 PCPU_INC(cnt.v_trap);
221 type = frame->tf_trapno;
222
223 #ifdef SMP
224 /* Handler for NMI IPIs used for stopping CPUs. */
225 if (type == T_NMI) {
226 if (ipi_nmi_handler() == 0)
227 goto out;
228 }
229 #endif /* SMP */
230
231 #ifdef KDB
232 if (kdb_active) {
233 kdb_reenter();
234 goto out;
235 }
236 #endif
237
238 if (type == T_RESERVED) {
239 trap_fatal(frame, 0);
240 goto out;
241 }
242
243 #ifdef HWPMC_HOOKS
244 /*
245 * CPU PMCs interrupt using an NMI so we check for that first.
246 * If the HWPMC module is active, 'pmc_hook' will point to
247 * the function to be called. A return value of '1' from the
248 * hook means that the NMI was handled by it and that we can
249 * return immediately.
250 */
251 if (type == T_NMI && pmc_intr &&
252 (*pmc_intr)(PCPU_GET(cpuid), frame))
253 goto out;
254 #endif
255
256 if (type == T_MCHK) {
257 mca_intr();
258 goto out;
259 }
260
261 #ifdef KDTRACE_HOOKS
262 /*
263 * A trap can occur while DTrace executes a probe. Before
264 * executing the probe, DTrace blocks re-scheduling and sets
265 * a flag in it's per-cpu flags to indicate that it doesn't
266 * want to fault. On returning from the probe, the no-fault
267 * flag is cleared and finally re-scheduling is enabled.
268 *
269 * If the DTrace kernel module has registered a trap handler,
270 * call it and if it returns non-zero, assume that it has
271 * handled the trap and modified the trap frame so that this
272 * function can return normally.
273 */
274 if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
275 type == T_BPTFLT) {
276 struct reg regs;
277
278 fill_frame_regs(frame, ®s);
279 if (type == T_DTRACE_PROBE &&
280 dtrace_fasttrap_probe_ptr != NULL &&
281 dtrace_fasttrap_probe_ptr(®s) == 0)
282 goto out;
283 if (type == T_BPTFLT &&
284 dtrace_pid_probe_ptr != NULL &&
285 dtrace_pid_probe_ptr(®s) == 0)
286 goto out;
287 if (type == T_DTRACE_RET &&
288 dtrace_return_probe_ptr != NULL &&
289 dtrace_return_probe_ptr(®s) == 0)
290 goto out;
291 }
292 if ((type == T_PROTFLT || type == T_PAGEFLT) &&
293 dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
294 goto out;
295 #endif
296
297 if ((frame->tf_eflags & PSL_I) == 0) {
298 /*
299 * Buggy application or kernel code has disabled
300 * interrupts and then trapped. Enabling interrupts
301 * now is wrong, but it is better than running with
302 * interrupts disabled until they are accidentally
303 * enabled later.
304 */
305 if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
306 uprintf(
307 "pid %ld (%s): trap %d with interrupts disabled\n",
308 (long)curproc->p_pid, curthread->td_name, type);
309 else if (type != T_BPTFLT && type != T_TRCTRAP &&
310 frame->tf_eip != (int)cpu_switch_load_gs) {
311 /*
312 * XXX not quite right, since this may be for a
313 * multiple fault in user mode.
314 */
315 printf("kernel trap %d with interrupts disabled\n",
316 type);
317 /*
318 * Page faults need interrupts disabled until later,
319 * and we shouldn't enable interrupts while holding
320 * a spin lock or if servicing an NMI.
321 */
322 if (type != T_NMI && type != T_PAGEFLT &&
323 td->td_md.md_spinlock_count == 0)
324 enable_intr();
325 }
326 }
327 eva = 0;
328 code = frame->tf_err;
329 if (type == T_PAGEFLT) {
330 /*
331 * For some Cyrix CPUs, %cr2 is clobbered by
332 * interrupts. This problem is worked around by using
333 * an interrupt gate for the pagefault handler. We
334 * are finally ready to read %cr2 and conditionally
335 * reenable interrupts. If we hold a spin lock, then
336 * we must not reenable interrupts. This might be a
337 * spurious page fault.
338 */
339 eva = rcr2();
340 if (td->td_md.md_spinlock_count == 0)
341 enable_intr();
342 }
343
344 if ((ISPL(frame->tf_cs) == SEL_UPL) ||
345 ((frame->tf_eflags & PSL_VM) &&
346 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
347 /* user trap */
348
349 td->td_pticks = 0;
350 td->td_frame = frame;
351 addr = frame->tf_eip;
352 if (td->td_ucred != p->p_ucred)
353 cred_update_thread(td);
354
355 switch (type) {
356 case T_PRIVINFLT: /* privileged instruction fault */
357 i = SIGILL;
358 ucode = ILL_PRVOPC;
359 break;
360
361 case T_BPTFLT: /* bpt instruction fault */
362 case T_TRCTRAP: /* trace trap */
363 enable_intr();
364 frame->tf_eflags &= ~PSL_T;
365 i = SIGTRAP;
366 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
367 break;
368
369 case T_ARITHTRAP: /* arithmetic trap */
370 #ifdef DEV_NPX
371 ucode = npxtrap();
372 if (ucode == -1)
373 goto userout;
374 #else
375 ucode = 0;
376 #endif
377 i = SIGFPE;
378 break;
379
380 /*
381 * The following two traps can happen in
382 * vm86 mode, and, if so, we want to handle
383 * them specially.
384 */
385 case T_PROTFLT: /* general protection fault */
386 case T_STKFLT: /* stack fault */
387 if (frame->tf_eflags & PSL_VM) {
388 i = vm86_emulate((struct vm86frame *)frame);
389 if (i == 0)
390 goto user;
391 break;
392 }
393 i = SIGBUS;
394 ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
395 break;
396 case T_SEGNPFLT: /* segment not present fault */
397 i = SIGBUS;
398 ucode = BUS_ADRERR;
399 break;
400 case T_TSSFLT: /* invalid TSS fault */
401 i = SIGBUS;
402 ucode = BUS_OBJERR;
403 break;
404 case T_DOUBLEFLT: /* double fault */
405 default:
406 i = SIGBUS;
407 ucode = BUS_OBJERR;
408 break;
409
410 case T_PAGEFLT: /* page fault */
411
412 i = trap_pfault(frame, TRUE, eva);
413 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
414 if (i == -2) {
415 /*
416 * The f00f hack workaround has triggered, so
417 * treat the fault as an illegal instruction
418 * (T_PRIVINFLT) instead of a page fault.
419 */
420 type = frame->tf_trapno = T_PRIVINFLT;
421
422 /* Proceed as in that case. */
423 ucode = ILL_PRVOPC;
424 i = SIGILL;
425 break;
426 }
427 #endif
428 if (i == -1)
429 goto userout;
430 if (i == 0)
431 goto user;
432
433 if (i == SIGSEGV)
434 ucode = SEGV_MAPERR;
435 else {
436 if (prot_fault_translation == 0) {
437 /*
438 * Autodetect.
439 * This check also covers the images
440 * without the ABI-tag ELF note.
441 */
442 if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
443 && p->p_osrel >= P_OSREL_SIGSEGV) {
444 i = SIGSEGV;
445 ucode = SEGV_ACCERR;
446 } else {
447 i = SIGBUS;
448 ucode = BUS_PAGE_FAULT;
449 }
450 } else if (prot_fault_translation == 1) {
451 /*
452 * Always compat mode.
453 */
454 i = SIGBUS;
455 ucode = BUS_PAGE_FAULT;
456 } else {
457 /*
458 * Always SIGSEGV mode.
459 */
460 i = SIGSEGV;
461 ucode = SEGV_ACCERR;
462 }
463 }
464 addr = eva;
465 break;
466
467 case T_DIVIDE: /* integer divide fault */
468 ucode = FPE_INTDIV;
469 i = SIGFPE;
470 break;
471
472 #ifdef DEV_ISA
473 case T_NMI:
474 #ifdef POWERFAIL_NMI
475 #ifndef TIMER_FREQ
476 # define TIMER_FREQ 1193182
477 #endif
478 if (time_second - lastalert > 10) {
479 log(LOG_WARNING, "NMI: power fail\n");
480 sysbeep(880, hz);
481 lastalert = time_second;
482 }
483 goto userout;
484 #else /* !POWERFAIL_NMI */
485 /* machine/parity/power fail/"kitchen sink" faults */
486 if (isa_nmi(code) == 0) {
487 #ifdef KDB
488 /*
489 * NMI can be hooked up to a pushbutton
490 * for debugging.
491 */
492 if (kdb_on_nmi) {
493 printf ("NMI ... going to debugger\n");
494 kdb_trap(type, 0, frame);
495 }
496 #endif /* KDB */
497 goto userout;
498 } else if (panic_on_nmi)
499 panic("NMI indicates hardware failure");
500 break;
501 #endif /* POWERFAIL_NMI */
502 #endif /* DEV_ISA */
503
504 case T_OFLOW: /* integer overflow fault */
505 ucode = FPE_INTOVF;
506 i = SIGFPE;
507 break;
508
509 case T_BOUND: /* bounds check fault */
510 ucode = FPE_FLTSUB;
511 i = SIGFPE;
512 break;
513
514 case T_DNA:
515 #ifdef DEV_NPX
516 KASSERT(PCB_USER_FPU(td->td_pcb),
517 ("kernel FPU ctx has leaked"));
518 /* transparent fault (due to context switch "late") */
519 if (npxdna())
520 goto userout;
521 #endif
522 uprintf("pid %d killed due to lack of floating point\n",
523 p->p_pid);
524 i = SIGKILL;
525 ucode = 0;
526 break;
527
528 case T_FPOPFLT: /* FPU operand fetch fault */
529 ucode = ILL_COPROC;
530 i = SIGILL;
531 break;
532
533 case T_XMMFLT: /* SIMD floating-point exception */
534 ucode = 0; /* XXX */
535 i = SIGFPE;
536 break;
537 }
538 } else {
539 /* kernel trap */
540
541 KASSERT(cold || td->td_ucred != NULL,
542 ("kernel trap doesn't have ucred"));
543 switch (type) {
544 case T_PAGEFLT: /* page fault */
545 (void) trap_pfault(frame, FALSE, eva);
546 goto out;
547
548 case T_DNA:
549 #ifdef DEV_NPX
550 KASSERT(!PCB_USER_FPU(td->td_pcb),
551 ("Unregistered use of FPU in kernel"));
552 if (npxdna())
553 goto out;
554 #endif
555 break;
556
557 case T_ARITHTRAP: /* arithmetic trap */
558 case T_XMMFLT: /* SIMD floating-point exception */
559 case T_FPOPFLT: /* FPU operand fetch fault */
560 /*
561 * XXXKIB for now disable any FPU traps in kernel
562 * handler registration seems to be overkill
563 */
564 trap_fatal(frame, 0);
565 goto out;
566
567 /*
568 * The following two traps can happen in
569 * vm86 mode, and, if so, we want to handle
570 * them specially.
571 */
572 case T_PROTFLT: /* general protection fault */
573 case T_STKFLT: /* stack fault */
574 if (frame->tf_eflags & PSL_VM) {
575 i = vm86_emulate((struct vm86frame *)frame);
576 if (i != 0)
577 /*
578 * returns to original process
579 */
580 vm86_trap((struct vm86frame *)frame);
581 goto out;
582 }
583 if (type == T_STKFLT)
584 break;
585
586 /* FALL THROUGH */
587
588 case T_SEGNPFLT: /* segment not present fault */
589 if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
590 break;
591
592 /*
593 * Invalid %fs's and %gs's can be created using
594 * procfs or PT_SETREGS or by invalidating the
595 * underlying LDT entry. This causes a fault
596 * in kernel mode when the kernel attempts to
597 * switch contexts. Lose the bad context
598 * (XXX) so that we can continue, and generate
599 * a signal.
600 */
601 if (frame->tf_eip == (int)cpu_switch_load_gs) {
602 PCPU_GET(curpcb)->pcb_gs = 0;
603 #if 0
604 PROC_LOCK(p);
605 kern_psignal(p, SIGBUS);
606 PROC_UNLOCK(p);
607 #endif
608 goto out;
609 }
610
611 if (td->td_intr_nesting_level != 0)
612 break;
613
614 /*
615 * Invalid segment selectors and out of bounds
616 * %eip's and %esp's can be set up in user mode.
617 * This causes a fault in kernel mode when the
618 * kernel tries to return to user mode. We want
619 * to get this fault so that we can fix the
620 * problem here and not have to check all the
621 * selectors and pointers when the user changes
622 * them.
623 */
624 if (frame->tf_eip == (int)doreti_iret) {
625 frame->tf_eip = (int)doreti_iret_fault;
626 goto out;
627 }
628 if (frame->tf_eip == (int)doreti_popl_ds) {
629 frame->tf_eip = (int)doreti_popl_ds_fault;
630 goto out;
631 }
632 if (frame->tf_eip == (int)doreti_popl_es) {
633 frame->tf_eip = (int)doreti_popl_es_fault;
634 goto out;
635 }
636 if (frame->tf_eip == (int)doreti_popl_fs) {
637 frame->tf_eip = (int)doreti_popl_fs_fault;
638 goto out;
639 }
640 if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
641 frame->tf_eip =
642 (int)PCPU_GET(curpcb)->pcb_onfault;
643 goto out;
644 }
645 break;
646
647 case T_TSSFLT:
648 /*
649 * PSL_NT can be set in user mode and isn't cleared
650 * automatically when the kernel is entered. This
651 * causes a TSS fault when the kernel attempts to
652 * `iret' because the TSS link is uninitialized. We
653 * want to get this fault so that we can fix the
654 * problem here and not every time the kernel is
655 * entered.
656 */
657 if (frame->tf_eflags & PSL_NT) {
658 frame->tf_eflags &= ~PSL_NT;
659 goto out;
660 }
661 break;
662
663 case T_TRCTRAP: /* trace trap */
664 if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
665 /*
666 * We've just entered system mode via the
667 * syscall lcall. Continue single stepping
668 * silently until the syscall handler has
669 * saved the flags.
670 */
671 goto out;
672 }
673 if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
674 /*
675 * The syscall handler has now saved the
676 * flags. Stop single stepping it.
677 */
678 frame->tf_eflags &= ~PSL_T;
679 goto out;
680 }
681 /*
682 * Ignore debug register trace traps due to
683 * accesses in the user's address space, which
684 * can happen under several conditions such as
685 * if a user sets a watchpoint on a buffer and
686 * then passes that buffer to a system call.
687 * We still want to get TRCTRAPS for addresses
688 * in kernel space because that is useful when
689 * debugging the kernel.
690 */
691 if (user_dbreg_trap() &&
692 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
693 /*
694 * Reset breakpoint bits because the
695 * processor doesn't
696 */
697 load_dr6(rdr6() & 0xfffffff0);
698 goto out;
699 }
700 /*
701 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
702 */
703 case T_BPTFLT:
704 /*
705 * If KDB is enabled, let it handle the debugger trap.
706 * Otherwise, debugger traps "can't happen".
707 */
708 #ifdef KDB
709 if (kdb_trap(type, 0, frame))
710 goto out;
711 #endif
712 break;
713
714 #ifdef DEV_ISA
715 case T_NMI:
716 #ifdef POWERFAIL_NMI
717 if (time_second - lastalert > 10) {
718 log(LOG_WARNING, "NMI: power fail\n");
719 sysbeep(880, hz);
720 lastalert = time_second;
721 }
722 goto out;
723 #else /* !POWERFAIL_NMI */
724 /* machine/parity/power fail/"kitchen sink" faults */
725 if (isa_nmi(code) == 0) {
726 #ifdef KDB
727 /*
728 * NMI can be hooked up to a pushbutton
729 * for debugging.
730 */
731 if (kdb_on_nmi) {
732 printf ("NMI ... going to debugger\n");
733 kdb_trap(type, 0, frame);
734 }
735 #endif /* KDB */
736 goto out;
737 } else if (panic_on_nmi == 0)
738 goto out;
739 /* FALLTHROUGH */
740 #endif /* POWERFAIL_NMI */
741 #endif /* DEV_ISA */
742 }
743
744 trap_fatal(frame, eva);
745 goto out;
746 }
747
748 /* Translate fault for emulators (e.g. Linux) */
749 if (*p->p_sysent->sv_transtrap)
750 i = (*p->p_sysent->sv_transtrap)(i, type);
751
752 ksiginfo_init_trap(&ksi);
753 ksi.ksi_signo = i;
754 ksi.ksi_code = ucode;
755 ksi.ksi_addr = (void *)addr;
756 ksi.ksi_trapno = type;
757 trapsignal(td, &ksi);
758
759 #ifdef DEBUG
760 if (type <= MAX_TRAP_MSG) {
761 uprintf("fatal process exception: %s",
762 trap_msg[type]);
763 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
764 uprintf(", fault VA = 0x%lx", (u_long)eva);
765 uprintf("\n");
766 }
767 #endif
768
769 user:
770 userret(td, frame);
771 mtx_assert(&Giant, MA_NOTOWNED);
772 KASSERT(PCB_USER_FPU(td->td_pcb),
773 ("Return from trap with kernel FPU ctx leaked"));
774 userout:
775 out:
776 return;
777 }
778
779 static int
780 trap_pfault(frame, usermode, eva)
781 struct trapframe *frame;
782 int usermode;
783 vm_offset_t eva;
784 {
785 vm_offset_t va;
786 struct vmspace *vm = NULL;
787 vm_map_t map;
788 int rv = 0;
789 vm_prot_t ftype;
790 struct thread *td = curthread;
791 struct proc *p = td->td_proc;
792
793 if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
794 /*
795 * Due to both processor errata and lazy TLB invalidation when
796 * access restrictions are removed from virtual pages, memory
797 * accesses that are allowed by the physical mapping layer may
798 * nonetheless cause one spurious page fault per virtual page.
799 * When the thread is executing a "no faulting" section that
800 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
801 * every page fault is treated as a spurious page fault,
802 * unless it accesses the same virtual address as the most
803 * recent page fault within the same "no faulting" section.
804 */
805 if (td->td_md.md_spurflt_addr != eva ||
806 (td->td_pflags & TDP_RESETSPUR) != 0) {
807 /*
808 * Do nothing to the TLB. A stale TLB entry is
809 * flushed automatically by a page fault.
810 */
811 td->td_md.md_spurflt_addr = eva;
812 td->td_pflags &= ~TDP_RESETSPUR;
813 return (0);
814 }
815 } else {
816 /*
817 * If we get a page fault while in a critical section, then
818 * it is most likely a fatal kernel page fault. The kernel
819 * is already going to panic trying to get a sleep lock to
820 * do the VM lookup, so just consider it a fatal trap so the
821 * kernel can print out a useful trap message and even get
822 * to the debugger.
823 *
824 * If we get a page fault while holding a non-sleepable
825 * lock, then it is most likely a fatal kernel page fault.
826 * If WITNESS is enabled, then it's going to whine about
827 * bogus LORs with various VM locks, so just skip to the
828 * fatal trap handling directly.
829 */
830 if (td->td_critnest != 0 ||
831 WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
832 "Kernel page fault") != 0) {
833 trap_fatal(frame, eva);
834 return (-1);
835 }
836 }
837 va = trunc_page(eva);
838 if (va >= KERNBASE) {
839 /*
840 * Don't allow user-mode faults in kernel address space.
841 * An exception: if the faulting address is the invalid
842 * instruction entry in the IDT, then the Intel Pentium
843 * F00F bug workaround was triggered, and we need to
844 * treat it is as an illegal instruction, and not a page
845 * fault.
846 */
847 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
848 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
849 return -2;
850 #endif
851 if (usermode)
852 goto nogo;
853
854 map = kernel_map;
855 } else {
856 /*
857 * This is a fault on non-kernel virtual memory.
858 * vm is initialized above to NULL. If curproc is NULL
859 * or curproc->p_vmspace is NULL the fault is fatal.
860 */
861 if (p != NULL)
862 vm = p->p_vmspace;
863
864 if (vm == NULL)
865 goto nogo;
866
867 map = &vm->vm_map;
868 if (!usermode && (td->td_intr_nesting_level != 0 ||
869 PCPU_GET(curpcb)->pcb_onfault == NULL)) {
870 trap_fatal(frame, eva);
871 return (-1);
872 }
873 }
874
875 /*
876 * PGEX_I is defined only if the execute disable bit capability is
877 * supported and enabled.
878 */
879 if (frame->tf_err & PGEX_W)
880 ftype = VM_PROT_WRITE;
881 #ifdef PAE
882 else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
883 ftype = VM_PROT_EXECUTE;
884 #endif
885 else
886 ftype = VM_PROT_READ;
887
888 if (map != kernel_map) {
889 /*
890 * Keep swapout from messing with us during this
891 * critical time.
892 */
893 PROC_LOCK(p);
894 ++p->p_lock;
895 PROC_UNLOCK(p);
896
897 /* Fault in the user page: */
898 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
899
900 PROC_LOCK(p);
901 --p->p_lock;
902 PROC_UNLOCK(p);
903 } else {
904 /*
905 * Don't have to worry about process locking or stacks in the
906 * kernel.
907 */
908 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
909 }
910 if (rv == KERN_SUCCESS) {
911 #ifdef HWPMC_HOOKS
912 if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
913 PMC_SOFT_CALL_TF( , , page_fault, all, frame);
914 if (ftype == VM_PROT_READ)
915 PMC_SOFT_CALL_TF( , , page_fault, read,
916 frame);
917 else
918 PMC_SOFT_CALL_TF( , , page_fault, write,
919 frame);
920 }
921 #endif
922 return (0);
923 }
924 nogo:
925 if (!usermode) {
926 if (td->td_intr_nesting_level == 0 &&
927 PCPU_GET(curpcb)->pcb_onfault != NULL) {
928 frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
929 return (0);
930 }
931 trap_fatal(frame, eva);
932 return (-1);
933 }
934
935 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
936 }
937
938 static void
939 trap_fatal(frame, eva)
940 struct trapframe *frame;
941 vm_offset_t eva;
942 {
943 int code, ss, esp;
944 u_int type;
945 struct soft_segment_descriptor softseg;
946 char *msg;
947
948 code = frame->tf_err;
949 type = frame->tf_trapno;
950 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
951
952 if (type <= MAX_TRAP_MSG)
953 msg = trap_msg[type];
954 else
955 msg = "UNKNOWN";
956 printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
957 frame->tf_eflags & PSL_VM ? "vm86" :
958 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
959 #ifdef SMP
960 /* two separate prints in case of a trap on an unmapped page */
961 printf("cpuid = %d; ", PCPU_GET(cpuid));
962 printf("apic id = %02x\n", PCPU_GET(apic_id));
963 #endif
964 if (type == T_PAGEFLT) {
965 printf("fault virtual address = 0x%x\n", eva);
966 printf("fault code = %s %s, %s\n",
967 code & PGEX_U ? "user" : "supervisor",
968 code & PGEX_W ? "write" : "read",
969 code & PGEX_P ? "protection violation" : "page not present");
970 }
971 printf("instruction pointer = 0x%x:0x%x\n",
972 frame->tf_cs & 0xffff, frame->tf_eip);
973 if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
974 ss = frame->tf_ss & 0xffff;
975 esp = frame->tf_esp;
976 } else {
977 ss = GSEL(GDATA_SEL, SEL_KPL);
978 esp = (int)&frame->tf_esp;
979 }
980 printf("stack pointer = 0x%x:0x%x\n", ss, esp);
981 printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp);
982 printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n",
983 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
984 printf(" = DPL %d, pres %d, def32 %d, gran %d\n",
985 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
986 softseg.ssd_gran);
987 printf("processor eflags = ");
988 if (frame->tf_eflags & PSL_T)
989 printf("trace trap, ");
990 if (frame->tf_eflags & PSL_I)
991 printf("interrupt enabled, ");
992 if (frame->tf_eflags & PSL_NT)
993 printf("nested task, ");
994 if (frame->tf_eflags & PSL_RF)
995 printf("resume, ");
996 if (frame->tf_eflags & PSL_VM)
997 printf("vm86, ");
998 printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
999 printf("current process = ");
1000 if (curproc) {
1001 printf("%lu (%s)\n", (u_long)curproc->p_pid, curthread->td_name);
1002 } else {
1003 printf("Idle\n");
1004 }
1005
1006 #ifdef KDB
1007 if (debugger_on_panic || kdb_active) {
1008 frame->tf_err = eva; /* smuggle fault address to ddb */
1009 if (kdb_trap(type, 0, frame)) {
1010 frame->tf_err = code; /* restore error code */
1011 return;
1012 }
1013 frame->tf_err = code; /* restore error code */
1014 }
1015 #endif
1016 printf("trap number = %d\n", type);
1017 if (type <= MAX_TRAP_MSG)
1018 panic("%s", trap_msg[type]);
1019 else
1020 panic("unknown/reserved trap");
1021 }
1022
1023 /*
1024 * Double fault handler. Called when a fault occurs while writing
1025 * a frame for a trap/exception onto the stack. This usually occurs
1026 * when the stack overflows (such is the case with infinite recursion,
1027 * for example).
1028 *
1029 * XXX Note that the current PTD gets replaced by IdlePTD when the
1030 * task switch occurs. This means that the stack that was active at
1031 * the time of the double fault is not available at <kstack> unless
1032 * the machine was idle when the double fault occurred. The downside
1033 * of this is that "trace <ebp>" in ddb won't work.
1034 */
1035 void
1036 dblfault_handler()
1037 {
1038 #ifdef KDTRACE_HOOKS
1039 if (dtrace_doubletrap_func != NULL)
1040 (*dtrace_doubletrap_func)();
1041 #endif
1042 printf("\nFatal double fault:\n");
1043 printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
1044 printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
1045 printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
1046 #ifdef SMP
1047 /* two separate prints in case of a trap on an unmapped page */
1048 printf("cpuid = %d; ", PCPU_GET(cpuid));
1049 printf("apic id = %02x\n", PCPU_GET(apic_id));
1050 #endif
1051 panic("double fault");
1052 }
1053
1054 int
1055 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
1056 {
1057 struct proc *p;
1058 struct trapframe *frame;
1059 caddr_t params;
1060 int error;
1061
1062 p = td->td_proc;
1063 frame = td->td_frame;
1064
1065 params = (caddr_t)frame->tf_esp + sizeof(int);
1066 sa->code = frame->tf_eax;
1067
1068 /*
1069 * Need to check if this is a 32 bit or 64 bit syscall.
1070 */
1071 if (sa->code == SYS_syscall) {
1072 /*
1073 * Code is first argument, followed by actual args.
1074 */
1075 sa->code = fuword(params);
1076 params += sizeof(int);
1077 } else if (sa->code == SYS___syscall) {
1078 /*
1079 * Like syscall, but code is a quad, so as to maintain
1080 * quad alignment for the rest of the arguments.
1081 */
1082 sa->code = fuword(params);
1083 params += sizeof(quad_t);
1084 }
1085
1086 if (p->p_sysent->sv_mask)
1087 sa->code &= p->p_sysent->sv_mask;
1088 if (sa->code >= p->p_sysent->sv_size)
1089 sa->callp = &p->p_sysent->sv_table[0];
1090 else
1091 sa->callp = &p->p_sysent->sv_table[sa->code];
1092 sa->narg = sa->callp->sy_narg;
1093
1094 if (params != NULL && sa->narg != 0)
1095 error = copyin(params, (caddr_t)sa->args,
1096 (u_int)(sa->narg * sizeof(int)));
1097 else
1098 error = 0;
1099
1100 if (error == 0) {
1101 td->td_retval[0] = 0;
1102 td->td_retval[1] = frame->tf_edx;
1103 }
1104
1105 return (error);
1106 }
1107
1108 #include "../../kern/subr_syscall.c"
1109
1110 /*
1111 * syscall - system call request C handler. A system call is
1112 * essentially treated as a trap by reusing the frame layout.
1113 */
1114 void
1115 syscall(struct trapframe *frame)
1116 {
1117 struct thread *td;
1118 struct syscall_args sa;
1119 register_t orig_tf_eflags;
1120 int error;
1121 ksiginfo_t ksi;
1122
1123 #ifdef DIAGNOSTIC
1124 if (ISPL(frame->tf_cs) != SEL_UPL) {
1125 panic("syscall");
1126 /* NOT REACHED */
1127 }
1128 #endif
1129 orig_tf_eflags = frame->tf_eflags;
1130
1131 td = curthread;
1132 td->td_frame = frame;
1133
1134 error = syscallenter(td, &sa);
1135
1136 /*
1137 * Traced syscall.
1138 */
1139 if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
1140 frame->tf_eflags &= ~PSL_T;
1141 ksiginfo_init_trap(&ksi);
1142 ksi.ksi_signo = SIGTRAP;
1143 ksi.ksi_code = TRAP_TRACE;
1144 ksi.ksi_addr = (void *)frame->tf_eip;
1145 trapsignal(td, &ksi);
1146 }
1147
1148 KASSERT(PCB_USER_FPU(td->td_pcb),
1149 ("System call %s returning with kernel FPU ctx leaked",
1150 syscallname(td->td_proc, sa.code)));
1151 KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
1152 ("System call %s returning with mangled pcb_save",
1153 syscallname(td->td_proc, sa.code)));
1154
1155 syscallret(td, error, &sa);
1156 }
Cache object: 56f841e2d5b4430eafde27731139db51
|