FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/9.0/sys/amd64/amd64/trap.c 236953 2012-06-12 12:10:10Z bz $");
42
43 /*
44 * AMD64 Trap and System call handling
45 */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_hwpmc_hooks.h"
50 #include "opt_isa.h"
51 #include "opt_kdb.h"
52 #include "opt_kdtrace.h"
53
54 #include <sys/param.h>
55 #include <sys/bus.h>
56 #include <sys/systm.h>
57 #include <sys/proc.h>
58 #include <sys/pioctl.h>
59 #include <sys/ptrace.h>
60 #include <sys/kdb.h>
61 #include <sys/kernel.h>
62 #include <sys/ktr.h>
63 #include <sys/lock.h>
64 #include <sys/mutex.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/syscall.h>
68 #include <sys/sysctl.h>
69 #include <sys/sysent.h>
70 #include <sys/uio.h>
71 #include <sys/vmmeter.h>
72 #ifdef HWPMC_HOOKS
73 #include <sys/pmckern.h>
74 #endif
75
76 #include <vm/vm.h>
77 #include <vm/vm_param.h>
78 #include <vm/pmap.h>
79 #include <vm/vm_kern.h>
80 #include <vm/vm_map.h>
81 #include <vm/vm_page.h>
82 #include <vm/vm_extern.h>
83
84 #include <machine/cpu.h>
85 #include <machine/intr_machdep.h>
86 #include <x86/mca.h>
87 #include <machine/md_var.h>
88 #include <machine/pcb.h>
89 #ifdef SMP
90 #include <machine/smp.h>
91 #endif
92 #include <machine/tss.h>
93
94 #ifdef KDTRACE_HOOKS
95 #include <sys/dtrace_bsd.h>
96
97 /*
98 * This is a hook which is initialised by the dtrace module
99 * to handle traps which might occur during DTrace probe
100 * execution.
101 */
102 dtrace_trap_func_t dtrace_trap_func;
103
104 dtrace_doubletrap_func_t dtrace_doubletrap_func;
105
106 /*
107 * This is a hook which is initialised by the systrace module
108 * when it is loaded. This keeps the DTrace syscall provider
109 * implementation opaque.
110 */
111 systrace_probe_func_t systrace_probe_func;
112
113 /*
114 * These hooks are necessary for the pid, usdt and fasttrap providers.
115 */
116 dtrace_fasttrap_probe_ptr_t dtrace_fasttrap_probe_ptr;
117 dtrace_pid_probe_ptr_t dtrace_pid_probe_ptr;
118 dtrace_return_probe_ptr_t dtrace_return_probe_ptr;
119 #endif
120
121 extern void trap(struct trapframe *frame);
122 extern void syscall(struct trapframe *frame);
123 void dblfault_handler(struct trapframe *frame);
124
125 static int trap_pfault(struct trapframe *, int);
126 static void trap_fatal(struct trapframe *, vm_offset_t);
127
128 #define MAX_TRAP_MSG 33
129 static char *trap_msg[] = {
130 "", /* 0 unused */
131 "privileged instruction fault", /* 1 T_PRIVINFLT */
132 "", /* 2 unused */
133 "breakpoint instruction fault", /* 3 T_BPTFLT */
134 "", /* 4 unused */
135 "", /* 5 unused */
136 "arithmetic trap", /* 6 T_ARITHTRAP */
137 "", /* 7 unused */
138 "", /* 8 unused */
139 "general protection fault", /* 9 T_PROTFLT */
140 "trace trap", /* 10 T_TRCTRAP */
141 "", /* 11 unused */
142 "page fault", /* 12 T_PAGEFLT */
143 "", /* 13 unused */
144 "alignment fault", /* 14 T_ALIGNFLT */
145 "", /* 15 unused */
146 "", /* 16 unused */
147 "", /* 17 unused */
148 "integer divide fault", /* 18 T_DIVIDE */
149 "non-maskable interrupt trap", /* 19 T_NMI */
150 "overflow trap", /* 20 T_OFLOW */
151 "FPU bounds check fault", /* 21 T_BOUND */
152 "FPU device not available", /* 22 T_DNA */
153 "double fault", /* 23 T_DOUBLEFLT */
154 "FPU operand fetch fault", /* 24 T_FPOPFLT */
155 "invalid TSS fault", /* 25 T_TSSFLT */
156 "segment not present fault", /* 26 T_SEGNPFLT */
157 "stack fault", /* 27 T_STKFLT */
158 "machine check trap", /* 28 T_MCHK */
159 "SIMD floating-point exception", /* 29 T_XMMFLT */
160 "reserved (unknown) fault", /* 30 T_RESERVED */
161 "", /* 31 unused (reserved) */
162 "DTrace pid return trap", /* 32 T_DTRACE_RET */
163 "DTrace fasttrap probe trap", /* 33 T_DTRACE_PROBE */
164 };
165
166 #ifdef KDB
167 static int kdb_on_nmi = 1;
168 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
169 &kdb_on_nmi, 0, "Go to KDB on NMI");
170 TUNABLE_INT("machdep.kdb_on_nmi", &kdb_on_nmi);
171 #endif
172 static int panic_on_nmi = 1;
173 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
174 &panic_on_nmi, 0, "Panic on NMI");
175 TUNABLE_INT("machdep.panic_on_nmi", &panic_on_nmi);
176 static int prot_fault_translation = 0;
177 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
178 &prot_fault_translation, 0, "Select signal to deliver on protection fault");
179
180 /*
181 * Exception, fault, and trap interface to the FreeBSD kernel.
182 * This common code is called from assembly language IDT gate entry
183 * routines that prepare a suitable stack frame, and restore this
184 * frame after the exception has been processed.
185 */
186
187 void
188 trap(struct trapframe *frame)
189 {
190 struct thread *td = curthread;
191 struct proc *p = td->td_proc;
192 int i = 0, ucode = 0, code;
193 u_int type;
194 register_t addr = 0;
195 ksiginfo_t ksi;
196
197 PCPU_INC(cnt.v_trap);
198 type = frame->tf_trapno;
199
200 #ifdef SMP
201 /* Handler for NMI IPIs used for stopping CPUs. */
202 if (type == T_NMI) {
203 if (ipi_nmi_handler() == 0)
204 goto out;
205 }
206 #endif /* SMP */
207
208 #ifdef KDB
209 if (kdb_active) {
210 kdb_reenter();
211 goto out;
212 }
213 #endif
214
215 if (type == T_RESERVED) {
216 trap_fatal(frame, 0);
217 goto out;
218 }
219
220 #ifdef HWPMC_HOOKS
221 /*
222 * CPU PMCs interrupt using an NMI. If the PMC module is
223 * active, pass the 'rip' value to the PMC module's interrupt
224 * handler. A return value of '1' from the handler means that
225 * the NMI was handled by it and we can return immediately.
226 */
227 if (type == T_NMI && pmc_intr &&
228 (*pmc_intr)(PCPU_GET(cpuid), frame))
229 goto out;
230 #endif
231
232 if (type == T_MCHK) {
233 if (!mca_intr())
234 trap_fatal(frame, 0);
235 goto out;
236 }
237
238 #ifdef KDTRACE_HOOKS
239 /*
240 * A trap can occur while DTrace executes a probe. Before
241 * executing the probe, DTrace blocks re-scheduling and sets
242 * a flag in it's per-cpu flags to indicate that it doesn't
243 * want to fault. On returning from the probe, the no-fault
244 * flag is cleared and finally re-scheduling is enabled.
245 *
246 * If the DTrace kernel module has registered a trap handler,
247 * call it and if it returns non-zero, assume that it has
248 * handled the trap and modified the trap frame so that this
249 * function can return normally.
250 */
251 if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
252 type == T_BPTFLT) {
253 struct reg regs;
254
255 fill_frame_regs(frame, ®s);
256 if (type == T_DTRACE_PROBE &&
257 dtrace_fasttrap_probe_ptr != NULL &&
258 dtrace_fasttrap_probe_ptr(®s) == 0)
259 goto out;
260 else if (type == T_BPTFLT &&
261 dtrace_pid_probe_ptr != NULL &&
262 dtrace_pid_probe_ptr(®s) == 0)
263 goto out;
264 else if (type == T_DTRACE_RET &&
265 dtrace_return_probe_ptr != NULL &&
266 dtrace_return_probe_ptr(®s) == 0)
267 goto out;
268 }
269 if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
270 goto out;
271 #endif
272
273 if ((frame->tf_rflags & PSL_I) == 0) {
274 /*
275 * Buggy application or kernel code has disabled
276 * interrupts and then trapped. Enabling interrupts
277 * now is wrong, but it is better than running with
278 * interrupts disabled until they are accidentally
279 * enabled later.
280 */
281 if (ISPL(frame->tf_cs) == SEL_UPL)
282 uprintf(
283 "pid %ld (%s): trap %d with interrupts disabled\n",
284 (long)curproc->p_pid, curthread->td_name, type);
285 else if (type != T_NMI && type != T_BPTFLT &&
286 type != T_TRCTRAP) {
287 /*
288 * XXX not quite right, since this may be for a
289 * multiple fault in user mode.
290 */
291 printf("kernel trap %d with interrupts disabled\n",
292 type);
293
294 /*
295 * We shouldn't enable interrupts while holding a
296 * spin lock.
297 */
298 if (td->td_md.md_spinlock_count == 0)
299 enable_intr();
300 }
301 }
302
303 code = frame->tf_err;
304 if (type == T_PAGEFLT) {
305 /*
306 * If we get a page fault while in a critical section, then
307 * it is most likely a fatal kernel page fault. The kernel
308 * is already going to panic trying to get a sleep lock to
309 * do the VM lookup, so just consider it a fatal trap so the
310 * kernel can print out a useful trap message and even get
311 * to the debugger.
312 *
313 * If we get a page fault while holding a non-sleepable
314 * lock, then it is most likely a fatal kernel page fault.
315 * If WITNESS is enabled, then it's going to whine about
316 * bogus LORs with various VM locks, so just skip to the
317 * fatal trap handling directly.
318 */
319 if (td->td_critnest != 0 ||
320 WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
321 "Kernel page fault") != 0)
322 trap_fatal(frame, frame->tf_addr);
323 }
324
325 if (ISPL(frame->tf_cs) == SEL_UPL) {
326 /* user trap */
327
328 td->td_pticks = 0;
329 td->td_frame = frame;
330 addr = frame->tf_rip;
331 if (td->td_ucred != p->p_ucred)
332 cred_update_thread(td);
333
334 switch (type) {
335 case T_PRIVINFLT: /* privileged instruction fault */
336 i = SIGILL;
337 ucode = ILL_PRVOPC;
338 break;
339
340 case T_BPTFLT: /* bpt instruction fault */
341 case T_TRCTRAP: /* trace trap */
342 enable_intr();
343 frame->tf_rflags &= ~PSL_T;
344 i = SIGTRAP;
345 ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
346 break;
347
348 case T_ARITHTRAP: /* arithmetic trap */
349 ucode = fputrap();
350 if (ucode == -1)
351 goto userout;
352 i = SIGFPE;
353 break;
354
355 case T_PROTFLT: /* general protection fault */
356 i = SIGBUS;
357 ucode = BUS_OBJERR;
358 break;
359 case T_STKFLT: /* stack fault */
360 case T_SEGNPFLT: /* segment not present fault */
361 i = SIGBUS;
362 ucode = BUS_ADRERR;
363 break;
364 case T_TSSFLT: /* invalid TSS fault */
365 i = SIGBUS;
366 ucode = BUS_OBJERR;
367 break;
368 case T_DOUBLEFLT: /* double fault */
369 default:
370 i = SIGBUS;
371 ucode = BUS_OBJERR;
372 break;
373
374 case T_PAGEFLT: /* page fault */
375 addr = frame->tf_addr;
376 i = trap_pfault(frame, TRUE);
377 if (i == -1)
378 goto userout;
379 if (i == 0)
380 goto user;
381
382 if (i == SIGSEGV)
383 ucode = SEGV_MAPERR;
384 else {
385 if (prot_fault_translation == 0) {
386 /*
387 * Autodetect.
388 * This check also covers the images
389 * without the ABI-tag ELF note.
390 */
391 if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
392 && p->p_osrel >= P_OSREL_SIGSEGV) {
393 i = SIGSEGV;
394 ucode = SEGV_ACCERR;
395 } else {
396 i = SIGBUS;
397 ucode = BUS_PAGE_FAULT;
398 }
399 } else if (prot_fault_translation == 1) {
400 /*
401 * Always compat mode.
402 */
403 i = SIGBUS;
404 ucode = BUS_PAGE_FAULT;
405 } else {
406 /*
407 * Always SIGSEGV mode.
408 */
409 i = SIGSEGV;
410 ucode = SEGV_ACCERR;
411 }
412 }
413 break;
414
415 case T_DIVIDE: /* integer divide fault */
416 ucode = FPE_INTDIV;
417 i = SIGFPE;
418 break;
419
420 #ifdef DEV_ISA
421 case T_NMI:
422 /* machine/parity/power fail/"kitchen sink" faults */
423 if (isa_nmi(code) == 0) {
424 #ifdef KDB
425 /*
426 * NMI can be hooked up to a pushbutton
427 * for debugging.
428 */
429 if (kdb_on_nmi) {
430 printf ("NMI ... going to debugger\n");
431 kdb_trap(type, 0, frame);
432 }
433 #endif /* KDB */
434 goto userout;
435 } else if (panic_on_nmi)
436 panic("NMI indicates hardware failure");
437 break;
438 #endif /* DEV_ISA */
439
440 case T_OFLOW: /* integer overflow fault */
441 ucode = FPE_INTOVF;
442 i = SIGFPE;
443 break;
444
445 case T_BOUND: /* bounds check fault */
446 ucode = FPE_FLTSUB;
447 i = SIGFPE;
448 break;
449
450 case T_DNA:
451 /* transparent fault (due to context switch "late") */
452 KASSERT(PCB_USER_FPU(td->td_pcb),
453 ("kernel FPU ctx has leaked"));
454 fpudna();
455 goto userout;
456
457 case T_FPOPFLT: /* FPU operand fetch fault */
458 ucode = ILL_COPROC;
459 i = SIGILL;
460 break;
461
462 case T_XMMFLT: /* SIMD floating-point exception */
463 ucode = 0; /* XXX */
464 i = SIGFPE;
465 break;
466 }
467 } else {
468 /* kernel trap */
469
470 KASSERT(cold || td->td_ucred != NULL,
471 ("kernel trap doesn't have ucred"));
472 switch (type) {
473 case T_PAGEFLT: /* page fault */
474 (void) trap_pfault(frame, FALSE);
475 goto out;
476
477 case T_DNA:
478 KASSERT(!PCB_USER_FPU(td->td_pcb),
479 ("Unregistered use of FPU in kernel"));
480 fpudna();
481 goto out;
482
483 case T_ARITHTRAP: /* arithmetic trap */
484 case T_XMMFLT: /* SIMD floating-point exception */
485 case T_FPOPFLT: /* FPU operand fetch fault */
486 /*
487 * XXXKIB for now disable any FPU traps in kernel
488 * handler registration seems to be overkill
489 */
490 trap_fatal(frame, 0);
491 goto out;
492
493 case T_STKFLT: /* stack fault */
494 break;
495
496 case T_PROTFLT: /* general protection fault */
497 case T_SEGNPFLT: /* segment not present fault */
498 if (td->td_intr_nesting_level != 0)
499 break;
500
501 /*
502 * Invalid segment selectors and out of bounds
503 * %rip's and %rsp's can be set up in user mode.
504 * This causes a fault in kernel mode when the
505 * kernel tries to return to user mode. We want
506 * to get this fault so that we can fix the
507 * problem here and not have to check all the
508 * selectors and pointers when the user changes
509 * them.
510 */
511 if (frame->tf_rip == (long)doreti_iret) {
512 frame->tf_rip = (long)doreti_iret_fault;
513 goto out;
514 }
515 if (frame->tf_rip == (long)ld_ds) {
516 frame->tf_rip = (long)ds_load_fault;
517 goto out;
518 }
519 if (frame->tf_rip == (long)ld_es) {
520 frame->tf_rip = (long)es_load_fault;
521 goto out;
522 }
523 if (frame->tf_rip == (long)ld_fs) {
524 frame->tf_rip = (long)fs_load_fault;
525 goto out;
526 }
527 if (frame->tf_rip == (long)ld_gs) {
528 frame->tf_rip = (long)gs_load_fault;
529 goto out;
530 }
531 if (frame->tf_rip == (long)ld_gsbase) {
532 frame->tf_rip = (long)gsbase_load_fault;
533 goto out;
534 }
535 if (frame->tf_rip == (long)ld_fsbase) {
536 frame->tf_rip = (long)fsbase_load_fault;
537 goto out;
538 }
539 if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
540 frame->tf_rip =
541 (long)PCPU_GET(curpcb)->pcb_onfault;
542 goto out;
543 }
544 break;
545
546 case T_TSSFLT:
547 /*
548 * PSL_NT can be set in user mode and isn't cleared
549 * automatically when the kernel is entered. This
550 * causes a TSS fault when the kernel attempts to
551 * `iret' because the TSS link is uninitialized. We
552 * want to get this fault so that we can fix the
553 * problem here and not every time the kernel is
554 * entered.
555 */
556 if (frame->tf_rflags & PSL_NT) {
557 frame->tf_rflags &= ~PSL_NT;
558 goto out;
559 }
560 break;
561
562 case T_TRCTRAP: /* trace trap */
563 /*
564 * Ignore debug register trace traps due to
565 * accesses in the user's address space, which
566 * can happen under several conditions such as
567 * if a user sets a watchpoint on a buffer and
568 * then passes that buffer to a system call.
569 * We still want to get TRCTRAPS for addresses
570 * in kernel space because that is useful when
571 * debugging the kernel.
572 */
573 if (user_dbreg_trap()) {
574 /*
575 * Reset breakpoint bits because the
576 * processor doesn't
577 */
578 /* XXX check upper bits here */
579 load_dr6(rdr6() & 0xfffffff0);
580 goto out;
581 }
582 /*
583 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
584 */
585 case T_BPTFLT:
586 /*
587 * If KDB is enabled, let it handle the debugger trap.
588 * Otherwise, debugger traps "can't happen".
589 */
590 #ifdef KDB
591 if (kdb_trap(type, 0, frame))
592 goto out;
593 #endif
594 break;
595
596 #ifdef DEV_ISA
597 case T_NMI:
598 /* machine/parity/power fail/"kitchen sink" faults */
599 if (isa_nmi(code) == 0) {
600 #ifdef KDB
601 /*
602 * NMI can be hooked up to a pushbutton
603 * for debugging.
604 */
605 if (kdb_on_nmi) {
606 printf ("NMI ... going to debugger\n");
607 kdb_trap(type, 0, frame);
608 }
609 #endif /* KDB */
610 goto out;
611 } else if (panic_on_nmi == 0)
612 goto out;
613 /* FALLTHROUGH */
614 #endif /* DEV_ISA */
615 }
616
617 trap_fatal(frame, 0);
618 goto out;
619 }
620
621 /* Translate fault for emulators (e.g. Linux) */
622 if (*p->p_sysent->sv_transtrap)
623 i = (*p->p_sysent->sv_transtrap)(i, type);
624
625 ksiginfo_init_trap(&ksi);
626 ksi.ksi_signo = i;
627 ksi.ksi_code = ucode;
628 ksi.ksi_trapno = type;
629 ksi.ksi_addr = (void *)addr;
630 trapsignal(td, &ksi);
631
632 user:
633 userret(td, frame);
634 mtx_assert(&Giant, MA_NOTOWNED);
635 KASSERT(PCB_USER_FPU(td->td_pcb),
636 ("Return from trap with kernel FPU ctx leaked"));
637 userout:
638 out:
639 return;
640 }
641
642 static int
643 trap_pfault(frame, usermode)
644 struct trapframe *frame;
645 int usermode;
646 {
647 vm_offset_t va;
648 struct vmspace *vm = NULL;
649 vm_map_t map;
650 int rv = 0;
651 vm_prot_t ftype;
652 struct thread *td = curthread;
653 struct proc *p = td->td_proc;
654 vm_offset_t eva = frame->tf_addr;
655
656 va = trunc_page(eva);
657 if (va >= VM_MIN_KERNEL_ADDRESS) {
658 /*
659 * Don't allow user-mode faults in kernel address space.
660 */
661 if (usermode)
662 goto nogo;
663
664 map = kernel_map;
665 } else {
666 /*
667 * This is a fault on non-kernel virtual memory.
668 * vm is initialized above to NULL. If curproc is NULL
669 * or curproc->p_vmspace is NULL the fault is fatal.
670 */
671 if (p != NULL)
672 vm = p->p_vmspace;
673
674 if (vm == NULL)
675 goto nogo;
676
677 map = &vm->vm_map;
678
679 /*
680 * When accessing a usermode address, kernel must be
681 * ready to accept the page fault, and provide a
682 * handling routine. Since accessing the address
683 * without the handler is a bug, do not try to handle
684 * it normally, and panic immediately.
685 */
686 if (!usermode && (td->td_intr_nesting_level != 0 ||
687 PCPU_GET(curpcb)->pcb_onfault == NULL)) {
688 trap_fatal(frame, eva);
689 return (-1);
690 }
691 }
692
693 /*
694 * PGEX_I is defined only if the execute disable bit capability is
695 * supported and enabled.
696 */
697 if (frame->tf_err & PGEX_W)
698 ftype = VM_PROT_WRITE;
699 else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
700 ftype = VM_PROT_EXECUTE;
701 else
702 ftype = VM_PROT_READ;
703
704 if (map != kernel_map) {
705 /*
706 * Keep swapout from messing with us during this
707 * critical time.
708 */
709 PROC_LOCK(p);
710 ++p->p_lock;
711 PROC_UNLOCK(p);
712
713 /* Fault in the user page: */
714 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
715
716 PROC_LOCK(p);
717 --p->p_lock;
718 PROC_UNLOCK(p);
719 } else {
720 /*
721 * Don't have to worry about process locking or stacks in the
722 * kernel.
723 */
724 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
725 }
726 if (rv == KERN_SUCCESS)
727 return (0);
728 nogo:
729 if (!usermode) {
730 if (td->td_intr_nesting_level == 0 &&
731 PCPU_GET(curpcb)->pcb_onfault != NULL) {
732 frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
733 return (0);
734 }
735 trap_fatal(frame, eva);
736 return (-1);
737 }
738
739 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
740 }
741
742 static void
743 trap_fatal(frame, eva)
744 struct trapframe *frame;
745 vm_offset_t eva;
746 {
747 int code, ss;
748 u_int type;
749 long esp;
750 struct soft_segment_descriptor softseg;
751 char *msg;
752
753 code = frame->tf_err;
754 type = frame->tf_trapno;
755 sdtossd(&gdt[NGDT * PCPU_GET(cpuid) + IDXSEL(frame->tf_cs & 0xffff)],
756 &softseg);
757
758 if (type <= MAX_TRAP_MSG)
759 msg = trap_msg[type];
760 else
761 msg = "UNKNOWN";
762 printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
763 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
764 #ifdef SMP
765 /* two separate prints in case of a trap on an unmapped page */
766 printf("cpuid = %d; ", PCPU_GET(cpuid));
767 printf("apic id = %02x\n", PCPU_GET(apic_id));
768 #endif
769 if (type == T_PAGEFLT) {
770 printf("fault virtual address = 0x%lx\n", eva);
771 printf("fault code = %s %s %s, %s\n",
772 code & PGEX_U ? "user" : "supervisor",
773 code & PGEX_W ? "write" : "read",
774 code & PGEX_I ? "instruction" : "data",
775 code & PGEX_P ? "protection violation" : "page not present");
776 }
777 printf("instruction pointer = 0x%lx:0x%lx\n",
778 frame->tf_cs & 0xffff, frame->tf_rip);
779 if (ISPL(frame->tf_cs) == SEL_UPL) {
780 ss = frame->tf_ss & 0xffff;
781 esp = frame->tf_rsp;
782 } else {
783 ss = GSEL(GDATA_SEL, SEL_KPL);
784 esp = (long)&frame->tf_rsp;
785 }
786 printf("stack pointer = 0x%x:0x%lx\n", ss, esp);
787 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
788 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
789 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
790 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
791 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
792 softseg.ssd_gran);
793 printf("processor eflags = ");
794 if (frame->tf_rflags & PSL_T)
795 printf("trace trap, ");
796 if (frame->tf_rflags & PSL_I)
797 printf("interrupt enabled, ");
798 if (frame->tf_rflags & PSL_NT)
799 printf("nested task, ");
800 if (frame->tf_rflags & PSL_RF)
801 printf("resume, ");
802 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
803 printf("current process = ");
804 if (curproc) {
805 printf("%lu (%s)\n",
806 (u_long)curproc->p_pid, curthread->td_name ?
807 curthread->td_name : "");
808 } else {
809 printf("Idle\n");
810 }
811
812 #ifdef KDB
813 if (debugger_on_panic || kdb_active)
814 if (kdb_trap(type, 0, frame))
815 return;
816 #endif
817 printf("trap number = %d\n", type);
818 if (type <= MAX_TRAP_MSG)
819 panic("%s", trap_msg[type]);
820 else
821 panic("unknown/reserved trap");
822 }
823
824 /*
825 * Double fault handler. Called when a fault occurs while writing
826 * a frame for a trap/exception onto the stack. This usually occurs
827 * when the stack overflows (such is the case with infinite recursion,
828 * for example).
829 */
830 void
831 dblfault_handler(struct trapframe *frame)
832 {
833 #ifdef KDTRACE_HOOKS
834 if (dtrace_doubletrap_func != NULL)
835 (*dtrace_doubletrap_func)();
836 #endif
837 printf("\nFatal double fault\n");
838 printf("rip = 0x%lx\n", frame->tf_rip);
839 printf("rsp = 0x%lx\n", frame->tf_rsp);
840 printf("rbp = 0x%lx\n", frame->tf_rbp);
841 #ifdef SMP
842 /* two separate prints in case of a trap on an unmapped page */
843 printf("cpuid = %d; ", PCPU_GET(cpuid));
844 printf("apic id = %02x\n", PCPU_GET(apic_id));
845 #endif
846 panic("double fault");
847 }
848
849 int
850 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
851 {
852 struct proc *p;
853 struct trapframe *frame;
854 register_t *argp;
855 caddr_t params;
856 int reg, regcnt, error;
857
858 p = td->td_proc;
859 frame = td->td_frame;
860 reg = 0;
861 regcnt = 6;
862
863 params = (caddr_t)frame->tf_rsp + sizeof(register_t);
864 sa->code = frame->tf_rax;
865
866 if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
867 sa->code = frame->tf_rdi;
868 reg++;
869 regcnt--;
870 }
871 if (p->p_sysent->sv_mask)
872 sa->code &= p->p_sysent->sv_mask;
873
874 if (sa->code >= p->p_sysent->sv_size)
875 sa->callp = &p->p_sysent->sv_table[0];
876 else
877 sa->callp = &p->p_sysent->sv_table[sa->code];
878
879 sa->narg = sa->callp->sy_narg;
880 KASSERT(sa->narg <= sizeof(sa->args) / sizeof(sa->args[0]),
881 ("Too many syscall arguments!"));
882 error = 0;
883 argp = &frame->tf_rdi;
884 argp += reg;
885 bcopy(argp, sa->args, sizeof(sa->args[0]) * regcnt);
886 if (sa->narg > regcnt) {
887 KASSERT(params != NULL, ("copyin args with no params!"));
888 error = copyin(params, &sa->args[regcnt],
889 (sa->narg - regcnt) * sizeof(sa->args[0]));
890 }
891
892 if (error == 0) {
893 td->td_retval[0] = 0;
894 td->td_retval[1] = frame->tf_rdx;
895 }
896
897 return (error);
898 }
899
900 #include "../../kern/subr_syscall.c"
901
902 /*
903 * syscall - system call request C handler
904 *
905 * A system call is essentially treated as a trap.
906 */
907 void
908 amd64_syscall(struct thread *td, int traced)
909 {
910 struct syscall_args sa;
911 int error;
912 ksiginfo_t ksi;
913
914 #ifdef DIAGNOSTIC
915 if (ISPL(td->td_frame->tf_cs) != SEL_UPL) {
916 panic("syscall");
917 /* NOT REACHED */
918 }
919 #endif
920 error = syscallenter(td, &sa);
921
922 /*
923 * Traced syscall.
924 */
925 if (__predict_false(traced)) {
926 td->td_frame->tf_rflags &= ~PSL_T;
927 ksiginfo_init_trap(&ksi);
928 ksi.ksi_signo = SIGTRAP;
929 ksi.ksi_code = TRAP_TRACE;
930 ksi.ksi_addr = (void *)td->td_frame->tf_rip;
931 trapsignal(td, &ksi);
932 }
933
934 KASSERT(PCB_USER_FPU(td->td_pcb),
935 ("System call %s returing with kernel FPU ctx leaked",
936 syscallname(td->td_proc, sa.code)));
937 KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
938 ("System call %s returning with mangled pcb_save",
939 syscallname(td->td_proc, sa.code)));
940
941 syscallret(td, error, &sa);
942
943 /*
944 * If the user-supplied value of %rip is not a canonical
945 * address, then some CPUs will trigger a ring 0 #GP during
946 * the sysret instruction. However, the fault handler would
947 * execute with the user's %gs and %rsp in ring 0 which would
948 * not be safe. Instead, preemptively kill the thread with a
949 * SIGBUS.
950 */
951 if (td->td_frame->tf_rip >= VM_MAXUSER_ADDRESS) {
952 ksiginfo_init_trap(&ksi);
953 ksi.ksi_signo = SIGBUS;
954 ksi.ksi_code = BUS_OBJERR;
955 ksi.ksi_trapno = T_PROTFLT;
956 ksi.ksi_addr = (void *)td->td_frame->tf_rip;
957 trapsignal(td, &ksi);
958 }
959 }
Cache object: ff923986006a0348e1e718eccda9e0e0
|