FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/6.4/sys/amd64/amd64/trap.c 173828 2007-11-21 16:41:51Z jhb $");
42
43 /*
44 * AMD64 Trap and System call handling
45 */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_hwpmc_hooks.h"
50 #include "opt_isa.h"
51 #include "opt_kdb.h"
52 #include "opt_ktrace.h"
53
54 #include <sys/param.h>
55 #include <sys/bus.h>
56 #include <sys/systm.h>
57 #include <sys/proc.h>
58 #include <sys/pioctl.h>
59 #include <sys/ptrace.h>
60 #include <sys/kdb.h>
61 #include <sys/kernel.h>
62 #include <sys/ktr.h>
63 #include <sys/lock.h>
64 #include <sys/mutex.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/syscall.h>
68 #include <sys/sysctl.h>
69 #include <sys/sysent.h>
70 #include <sys/uio.h>
71 #include <sys/vmmeter.h>
72 #ifdef KTRACE
73 #include <sys/ktrace.h>
74 #endif
75 #ifdef HWPMC_HOOKS
76 #include <sys/pmckern.h>
77 #endif
78
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_kern.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_extern.h>
86
87 #include <machine/cpu.h>
88 #include <machine/intr_machdep.h>
89 #include <machine/md_var.h>
90 #include <machine/pcb.h>
91 #ifdef SMP
92 #include <machine/smp.h>
93 #endif
94 #include <machine/tss.h>
95 #include <security/audit/audit.h>
96
97 extern void trap(struct trapframe frame);
98 extern void syscall(struct trapframe frame);
99 void dblfault_handler(struct trapframe frame);
100
101 static int trap_pfault(struct trapframe *, int);
102 static void trap_fatal(struct trapframe *, vm_offset_t);
103
104 #define MAX_TRAP_MSG 30
105 static char *trap_msg[] = {
106 "", /* 0 unused */
107 "privileged instruction fault", /* 1 T_PRIVINFLT */
108 "", /* 2 unused */
109 "breakpoint instruction fault", /* 3 T_BPTFLT */
110 "", /* 4 unused */
111 "", /* 5 unused */
112 "arithmetic trap", /* 6 T_ARITHTRAP */
113 "", /* 7 unused */
114 "", /* 8 unused */
115 "general protection fault", /* 9 T_PROTFLT */
116 "trace trap", /* 10 T_TRCTRAP */
117 "", /* 11 unused */
118 "page fault", /* 12 T_PAGEFLT */
119 "", /* 13 unused */
120 "alignment fault", /* 14 T_ALIGNFLT */
121 "", /* 15 unused */
122 "", /* 16 unused */
123 "", /* 17 unused */
124 "integer divide fault", /* 18 T_DIVIDE */
125 "non-maskable interrupt trap", /* 19 T_NMI */
126 "overflow trap", /* 20 T_OFLOW */
127 "FPU bounds check fault", /* 21 T_BOUND */
128 "FPU device not available", /* 22 T_DNA */
129 "double fault", /* 23 T_DOUBLEFLT */
130 "FPU operand fetch fault", /* 24 T_FPOPFLT */
131 "invalid TSS fault", /* 25 T_TSSFLT */
132 "segment not present fault", /* 26 T_SEGNPFLT */
133 "stack fault", /* 27 T_STKFLT */
134 "machine check trap", /* 28 T_MCHK */
135 "SIMD floating-point exception", /* 29 T_XMMFLT */
136 "reserved (unknown) fault", /* 30 T_RESERVED */
137 };
138
139 #ifdef KDB
140 static int kdb_on_nmi = 1;
141 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
142 &kdb_on_nmi, 0, "Go to KDB on NMI");
143 #endif
144 static int panic_on_nmi = 1;
145 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
146 &panic_on_nmi, 0, "Panic on NMI");
147
148 #ifdef WITNESS
149 extern char *syscallnames[];
150 #endif
151
152 /*
153 * Exception, fault, and trap interface to the FreeBSD kernel.
154 * This common code is called from assembly language IDT gate entry
155 * routines that prepare a suitable stack frame, and restore this
156 * frame after the exception has been processed.
157 */
158
159 void
160 trap(frame)
161 struct trapframe frame;
162 {
163 struct thread *td = curthread;
164 struct proc *p = td->td_proc;
165 u_int sticks = 0, type;
166 int i = 0, ucode = 0, code;
167
168 PCPU_LAZY_INC(cnt.v_trap);
169 type = frame.tf_trapno;
170
171 #ifdef KDB_STOP_NMI
172 /* Handler for NMI IPIs used for debugging */
173 if (type == T_NMI) {
174 if (ipi_nmi_handler() == 0)
175 goto out;
176 }
177 #endif /* KDB_STOP_NMI */
178
179 #ifdef KDB
180 if (kdb_active) {
181 kdb_reenter();
182 goto out;
183 }
184 #endif
185
186 #ifdef HWPMC_HOOKS
187 /*
188 * CPU PMCs interrupt using an NMI. If the PMC module is
189 * active, pass the 'rip' value to the PMC module's interrupt
190 * handler. A return value of '1' from the handler means that
191 * the NMI was handled by it and we can return immediately.
192 */
193 if (type == T_NMI && pmc_intr &&
194 (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame.tf_rip,
195 TRAPF_USERMODE(&frame)))
196 goto out;
197 #endif
198
199 if ((frame.tf_rflags & PSL_I) == 0) {
200 /*
201 * Buggy application or kernel code has disabled
202 * interrupts and then trapped. Enabling interrupts
203 * now is wrong, but it is better than running with
204 * interrupts disabled until they are accidentally
205 * enabled later.
206 */
207 if (ISPL(frame.tf_cs) == SEL_UPL)
208 printf(
209 "pid %ld (%s): trap %d with interrupts disabled\n",
210 (long)curproc->p_pid, curproc->p_comm, type);
211 else if (type != T_NMI && type != T_BPTFLT &&
212 type != T_TRCTRAP) {
213 /*
214 * XXX not quite right, since this may be for a
215 * multiple fault in user mode.
216 */
217 printf("kernel trap %d with interrupts disabled\n",
218 type);
219 /*
220 * We shouldn't enable interrupts while in a critical
221 * section.
222 */
223 if (td->td_critnest == 0)
224 enable_intr();
225 }
226 }
227
228 code = frame.tf_err;
229 if (type == T_PAGEFLT) {
230 /*
231 * If we get a page fault while in a critical section, then
232 * it is most likely a fatal kernel page fault. The kernel
233 * is already going to panic trying to get a sleep lock to
234 * do the VM lookup, so just consider it a fatal trap so the
235 * kernel can print out a useful trap message and even get
236 * to the debugger.
237 */
238 if (td->td_critnest != 0)
239 trap_fatal(&frame, frame.tf_addr);
240 }
241
242 if (ISPL(frame.tf_cs) == SEL_UPL) {
243 /* user trap */
244
245 sticks = td->td_sticks;
246 td->td_frame = &frame;
247 if (td->td_ucred != p->p_ucred)
248 cred_update_thread(td);
249
250 switch (type) {
251 case T_PRIVINFLT: /* privileged instruction fault */
252 ucode = type;
253 i = SIGILL;
254 break;
255
256 case T_BPTFLT: /* bpt instruction fault */
257 case T_TRCTRAP: /* trace trap */
258 enable_intr();
259 frame.tf_rflags &= ~PSL_T;
260 i = SIGTRAP;
261 break;
262
263 case T_ARITHTRAP: /* arithmetic trap */
264 ucode = fputrap();
265 if (ucode == -1)
266 goto userout;
267 i = SIGFPE;
268 break;
269
270 case T_PROTFLT: /* general protection fault */
271 case T_STKFLT: /* stack fault */
272 case T_SEGNPFLT: /* segment not present fault */
273 case T_TSSFLT: /* invalid TSS fault */
274 case T_DOUBLEFLT: /* double fault */
275 default:
276 ucode = code + BUS_SEGM_FAULT ;
277 i = SIGBUS;
278 break;
279
280 case T_PAGEFLT: /* page fault */
281 if (td->td_pflags & TDP_SA)
282 thread_user_enter(td);
283 i = trap_pfault(&frame, TRUE);
284 if (i == -1)
285 goto userout;
286 if (i == 0)
287 goto user;
288
289 ucode = T_PAGEFLT;
290 break;
291
292 case T_DIVIDE: /* integer divide fault */
293 ucode = FPE_INTDIV;
294 i = SIGFPE;
295 break;
296
297 #ifdef DEV_ISA
298 case T_NMI:
299 /* machine/parity/power fail/"kitchen sink" faults */
300 /* XXX Giant */
301 if (isa_nmi(code) == 0) {
302 #ifdef KDB
303 /*
304 * NMI can be hooked up to a pushbutton
305 * for debugging.
306 */
307 if (kdb_on_nmi) {
308 printf ("NMI ... going to debugger\n");
309 kdb_trap(type, 0, &frame);
310 }
311 #endif /* KDB */
312 goto userout;
313 } else if (panic_on_nmi)
314 panic("NMI indicates hardware failure");
315 break;
316 #endif /* DEV_ISA */
317
318 case T_OFLOW: /* integer overflow fault */
319 ucode = FPE_INTOVF;
320 i = SIGFPE;
321 break;
322
323 case T_BOUND: /* bounds check fault */
324 ucode = FPE_FLTSUB;
325 i = SIGFPE;
326 break;
327
328 case T_DNA:
329 /* transparent fault (due to context switch "late") */
330 if (fpudna())
331 goto userout;
332 i = SIGFPE;
333 ucode = FPE_FPU_NP_TRAP;
334 break;
335
336 case T_FPOPFLT: /* FPU operand fetch fault */
337 ucode = T_FPOPFLT;
338 i = SIGILL;
339 break;
340
341 case T_XMMFLT: /* SIMD floating-point exception */
342 ucode = 0; /* XXX */
343 i = SIGFPE;
344 break;
345 }
346 } else {
347 /* kernel trap */
348
349 KASSERT(cold || td->td_ucred != NULL,
350 ("kernel trap doesn't have ucred"));
351 switch (type) {
352 case T_PAGEFLT: /* page fault */
353 (void) trap_pfault(&frame, FALSE);
354 goto out;
355
356 case T_DNA:
357 /*
358 * The kernel is apparently using fpu for copying.
359 * XXX this should be fatal unless the kernel has
360 * registered such use.
361 */
362 if (fpudna()) {
363 printf("fpudna in kernel mode!\n");
364 goto out;
365 }
366 break;
367
368 case T_STKFLT: /* stack fault */
369 break;
370
371 case T_PROTFLT: /* general protection fault */
372 case T_SEGNPFLT: /* segment not present fault */
373 if (td->td_intr_nesting_level != 0)
374 break;
375
376 /*
377 * Invalid segment selectors and out of bounds
378 * %rip's and %rsp's can be set up in user mode.
379 * This causes a fault in kernel mode when the
380 * kernel tries to return to user mode. We want
381 * to get this fault so that we can fix the
382 * problem here and not have to check all the
383 * selectors and pointers when the user changes
384 * them.
385 */
386 if (frame.tf_rip == (long)doreti_iret) {
387 frame.tf_rip = (long)doreti_iret_fault;
388 goto out;
389 }
390 if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
391 frame.tf_rip =
392 (long)PCPU_GET(curpcb)->pcb_onfault;
393 goto out;
394 }
395 break;
396
397 case T_TSSFLT:
398 /*
399 * PSL_NT can be set in user mode and isn't cleared
400 * automatically when the kernel is entered. This
401 * causes a TSS fault when the kernel attempts to
402 * `iret' because the TSS link is uninitialized. We
403 * want to get this fault so that we can fix the
404 * problem here and not every time the kernel is
405 * entered.
406 */
407 if (frame.tf_rflags & PSL_NT) {
408 frame.tf_rflags &= ~PSL_NT;
409 goto out;
410 }
411 break;
412
413 case T_TRCTRAP: /* trace trap */
414 /*
415 * Ignore debug register trace traps due to
416 * accesses in the user's address space, which
417 * can happen under several conditions such as
418 * if a user sets a watchpoint on a buffer and
419 * then passes that buffer to a system call.
420 * We still want to get TRCTRAPS for addresses
421 * in kernel space because that is useful when
422 * debugging the kernel.
423 */
424 if (user_dbreg_trap()) {
425 /*
426 * Reset breakpoint bits because the
427 * processor doesn't
428 */
429 /* XXX check upper bits here */
430 load_dr6(rdr6() & 0xfffffff0);
431 goto out;
432 }
433 /*
434 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
435 */
436 case T_BPTFLT:
437 /*
438 * If KDB is enabled, let it handle the debugger trap.
439 * Otherwise, debugger traps "can't happen".
440 */
441 #ifdef KDB
442 /* XXX Giant */
443 if (kdb_trap(type, 0, &frame))
444 goto out;
445 #endif
446 break;
447
448 #ifdef DEV_ISA
449 case T_NMI:
450 /* XXX Giant */
451 /* machine/parity/power fail/"kitchen sink" faults */
452 if (isa_nmi(code) == 0) {
453 #ifdef KDB
454 /*
455 * NMI can be hooked up to a pushbutton
456 * for debugging.
457 */
458 if (kdb_on_nmi) {
459 printf ("NMI ... going to debugger\n");
460 kdb_trap(type, 0, &frame);
461 }
462 #endif /* KDB */
463 goto out;
464 } else if (panic_on_nmi == 0)
465 goto out;
466 /* FALLTHROUGH */
467 #endif /* DEV_ISA */
468 }
469
470 trap_fatal(&frame, 0);
471 goto out;
472 }
473
474 /* Translate fault for emulators (e.g. Linux) */
475 if (*p->p_sysent->sv_transtrap)
476 i = (*p->p_sysent->sv_transtrap)(i, type);
477
478 trapsignal(td, i, ucode);
479
480 #ifdef DEBUG
481 if (type <= MAX_TRAP_MSG) {
482 uprintf("fatal process exception: %s",
483 trap_msg[type]);
484 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
485 uprintf(", fault VA = 0x%lx", frame.tf_addr);
486 uprintf("\n");
487 }
488 #endif
489
490 user:
491 userret(td, &frame, sticks);
492 mtx_assert(&Giant, MA_NOTOWNED);
493 userout:
494 out:
495 return;
496 }
497
498 static int
499 trap_pfault(frame, usermode)
500 struct trapframe *frame;
501 int usermode;
502 {
503 vm_offset_t va;
504 struct vmspace *vm = NULL;
505 vm_map_t map = 0;
506 int rv = 0;
507 vm_prot_t ftype;
508 struct thread *td = curthread;
509 struct proc *p = td->td_proc;
510 vm_offset_t eva = frame->tf_addr;
511
512 va = trunc_page(eva);
513 if (va >= KERNBASE) {
514 /*
515 * Don't allow user-mode faults in kernel address space.
516 */
517 if (usermode)
518 goto nogo;
519
520 map = kernel_map;
521 } else {
522 /*
523 * This is a fault on non-kernel virtual memory.
524 * vm is initialized above to NULL. If curproc is NULL
525 * or curproc->p_vmspace is NULL the fault is fatal.
526 */
527 if (p != NULL)
528 vm = p->p_vmspace;
529
530 if (vm == NULL)
531 goto nogo;
532
533 map = &vm->vm_map;
534 }
535
536 /*
537 * PGEX_I is defined only if the execute disable bit capability is
538 * supported and enabled.
539 */
540 if (frame->tf_err & PGEX_W)
541 ftype = VM_PROT_WRITE;
542 else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
543 ftype = VM_PROT_EXECUTE;
544 else
545 ftype = VM_PROT_READ;
546
547 if (map != kernel_map) {
548 /*
549 * Keep swapout from messing with us during this
550 * critical time.
551 */
552 PROC_LOCK(p);
553 ++p->p_lock;
554 PROC_UNLOCK(p);
555
556 /* Fault in the user page: */
557 rv = vm_fault(map, va, ftype,
558 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
559 : VM_FAULT_NORMAL);
560
561 PROC_LOCK(p);
562 --p->p_lock;
563 PROC_UNLOCK(p);
564 } else {
565 /*
566 * Don't have to worry about process locking or stacks in the
567 * kernel.
568 */
569 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
570 }
571 if (rv == KERN_SUCCESS)
572 return (0);
573 nogo:
574 if (!usermode) {
575 if (td->td_intr_nesting_level == 0 &&
576 PCPU_GET(curpcb)->pcb_onfault != NULL) {
577 frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
578 return (0);
579 }
580 trap_fatal(frame, eva);
581 return (-1);
582 }
583
584 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
585 }
586
587 static void
588 trap_fatal(frame, eva)
589 struct trapframe *frame;
590 vm_offset_t eva;
591 {
592 int code, ss;
593 u_int type;
594 long esp;
595 struct soft_segment_descriptor softseg;
596 char *msg;
597
598 code = frame->tf_err;
599 type = frame->tf_trapno;
600 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
601
602 if (type <= MAX_TRAP_MSG)
603 msg = trap_msg[type];
604 else
605 msg = "UNKNOWN";
606 printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
607 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
608 #ifdef SMP
609 /* two separate prints in case of a trap on an unmapped page */
610 printf("cpuid = %d; ", PCPU_GET(cpuid));
611 printf("apic id = %02x\n", PCPU_GET(apic_id));
612 #endif
613 if (type == T_PAGEFLT) {
614 printf("fault virtual address = 0x%lx\n", eva);
615 printf("fault code = %s %s %s, %s\n",
616 code & PGEX_U ? "user" : "supervisor",
617 code & PGEX_W ? "write" : "read",
618 code & PGEX_I ? "instruction" : "data",
619 code & PGEX_P ? "protection violation" : "page not present");
620 }
621 printf("instruction pointer = 0x%lx:0x%lx\n",
622 frame->tf_cs & 0xffff, frame->tf_rip);
623 if (ISPL(frame->tf_cs) == SEL_UPL) {
624 ss = frame->tf_ss & 0xffff;
625 esp = frame->tf_rsp;
626 } else {
627 ss = GSEL(GDATA_SEL, SEL_KPL);
628 esp = (long)&frame->tf_rsp;
629 }
630 printf("stack pointer = 0x%x:0x%lx\n", ss, esp);
631 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
632 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
633 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
634 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
635 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
636 softseg.ssd_gran);
637 printf("processor eflags = ");
638 if (frame->tf_rflags & PSL_T)
639 printf("trace trap, ");
640 if (frame->tf_rflags & PSL_I)
641 printf("interrupt enabled, ");
642 if (frame->tf_rflags & PSL_NT)
643 printf("nested task, ");
644 if (frame->tf_rflags & PSL_RF)
645 printf("resume, ");
646 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
647 printf("current process = ");
648 if (curproc) {
649 printf("%lu (%s)\n",
650 (u_long)curproc->p_pid, curproc->p_comm ?
651 curproc->p_comm : "");
652 } else {
653 printf("Idle\n");
654 }
655
656 #ifdef KDB
657 if (debugger_on_panic || kdb_active) {
658 register_t rflags;
659 rflags = intr_disable();
660 if (kdb_trap(type, 0, frame)) {
661 intr_restore(rflags);
662 return;
663 }
664 intr_restore(rflags);
665 }
666 #endif
667 printf("trap number = %d\n", type);
668 if (type <= MAX_TRAP_MSG)
669 panic("%s", trap_msg[type]);
670 else
671 panic("unknown/reserved trap");
672 }
673
674 /*
675 * Double fault handler. Called when a fault occurs while writing
676 * a frame for a trap/exception onto the stack. This usually occurs
677 * when the stack overflows (such is the case with infinite recursion,
678 * for example).
679 */
680 void
681 dblfault_handler(struct trapframe frame)
682 {
683 printf("\nFatal double fault\n");
684 printf("rip = 0x%lx\n", frame.tf_rip);
685 printf("rsp = 0x%lx\n", frame.tf_rsp);
686 printf("rbp = 0x%lx\n", frame.tf_rbp);
687 #ifdef SMP
688 /* two separate prints in case of a trap on an unmapped page */
689 printf("cpuid = %d; ", PCPU_GET(cpuid));
690 printf("apic id = %02x\n", PCPU_GET(apic_id));
691 #endif
692 panic("double fault");
693 }
694
695 /*
696 * syscall - system call request C handler
697 *
698 * A system call is essentially treated as a trap.
699 */
700 void
701 syscall(frame)
702 struct trapframe frame;
703 {
704 caddr_t params;
705 struct sysent *callp;
706 struct thread *td = curthread;
707 struct proc *p = td->td_proc;
708 register_t orig_tf_rflags;
709 u_int sticks;
710 int error;
711 int narg;
712 register_t args[8];
713 register_t *argp;
714 u_int code;
715 int reg, regcnt;
716
717 /*
718 * note: PCPU_LAZY_INC() can only be used if we can afford
719 * occassional inaccuracy in the count.
720 */
721 PCPU_LAZY_INC(cnt.v_syscall);
722
723 #ifdef DIAGNOSTIC
724 if (ISPL(frame.tf_cs) != SEL_UPL) {
725 mtx_lock(&Giant); /* try to stabilize the system XXX */
726 panic("syscall");
727 /* NOT REACHED */
728 mtx_unlock(&Giant);
729 }
730 #endif
731
732 reg = 0;
733 regcnt = 6;
734 sticks = td->td_sticks;
735 td->td_frame = &frame;
736 if (td->td_ucred != p->p_ucred)
737 cred_update_thread(td);
738 if (p->p_flag & P_SA)
739 thread_user_enter(td);
740 params = (caddr_t)frame.tf_rsp + sizeof(register_t);
741 code = frame.tf_rax;
742 orig_tf_rflags = frame.tf_rflags;
743
744 if (p->p_sysent->sv_prepsyscall) {
745 /*
746 * The prep code is MP aware.
747 */
748 (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, ¶ms);
749 } else {
750 if (code == SYS_syscall || code == SYS___syscall) {
751 code = frame.tf_rdi;
752 reg++;
753 regcnt--;
754 }
755 }
756
757 if (p->p_sysent->sv_mask)
758 code &= p->p_sysent->sv_mask;
759
760 if (code >= p->p_sysent->sv_size)
761 callp = &p->p_sysent->sv_table[0];
762 else
763 callp = &p->p_sysent->sv_table[code];
764
765 narg = callp->sy_narg & SYF_ARGMASK;
766
767 /*
768 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
769 */
770 KASSERT(narg <= sizeof(args) / sizeof(args[0]),
771 ("Too many syscall arguments!"));
772 error = 0;
773 argp = &frame.tf_rdi;
774 argp += reg;
775 bcopy(argp, args, sizeof(args[0]) * regcnt);
776 if (narg > regcnt) {
777 KASSERT(params != NULL, ("copyin args with no params!"));
778 error = copyin(params, &args[regcnt],
779 (narg - regcnt) * sizeof(args[0]));
780 }
781 argp = &args[0];
782
783 #ifdef KTRACE
784 if (KTRPOINT(td, KTR_SYSCALL))
785 ktrsyscall(code, narg, argp);
786 #endif
787
788 CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
789 td->td_proc->p_pid, td->td_proc->p_comm, code);
790
791 if (error == 0) {
792 td->td_retval[0] = 0;
793 td->td_retval[1] = frame.tf_rdx;
794
795 STOPEVENT(p, S_SCE, narg);
796
797 PTRACESTOP_SC(p, td, S_PT_SCE);
798
799 if ((callp->sy_narg & SYF_MPSAFE) == 0) {
800 mtx_lock(&Giant);
801 AUDIT_SYSCALL_ENTER(code, td);
802 error = (*callp->sy_call)(td, argp);
803 AUDIT_SYSCALL_EXIT(error, td);
804 mtx_unlock(&Giant);
805 } else {
806 AUDIT_SYSCALL_ENTER(code, td);
807 error = (*callp->sy_call)(td, argp);
808 AUDIT_SYSCALL_EXIT(error, td);
809 }
810 }
811
812 switch (error) {
813 case 0:
814 frame.tf_rax = td->td_retval[0];
815 frame.tf_rdx = td->td_retval[1];
816 frame.tf_rflags &= ~PSL_C;
817 break;
818
819 case ERESTART:
820 /*
821 * Reconstruct pc, we know that 'syscall' is 2 bytes.
822 * We have to do a full context restore so that %r10
823 * (which was holding the value of %rcx) is restored for
824 * the next iteration.
825 */
826 frame.tf_rip -= frame.tf_err;
827 frame.tf_r10 = frame.tf_rcx;
828 td->td_pcb->pcb_flags |= PCB_FULLCTX;
829 break;
830
831 case EJUSTRETURN:
832 break;
833
834 default:
835 if (p->p_sysent->sv_errsize) {
836 if (error >= p->p_sysent->sv_errsize)
837 error = -1; /* XXX */
838 else
839 error = p->p_sysent->sv_errtbl[error];
840 }
841 frame.tf_rax = error;
842 frame.tf_rflags |= PSL_C;
843 break;
844 }
845
846 /*
847 * Traced syscall.
848 */
849 if (orig_tf_rflags & PSL_T) {
850 frame.tf_rflags &= ~PSL_T;
851 trapsignal(td, SIGTRAP, 0);
852 }
853
854 /*
855 * Handle reschedule and other end-of-syscall issues
856 */
857 userret(td, &frame, sticks);
858
859 CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
860 td->td_proc->p_pid, td->td_proc->p_comm, code);
861
862 #ifdef KTRACE
863 if (KTRPOINT(td, KTR_SYSRET))
864 ktrsysret(code, error, td->td_retval[0]);
865 #endif
866
867 /*
868 * This works because errno is findable through the
869 * register set. If we ever support an emulation where this
870 * is not the case, this code will need to be revisited.
871 */
872 STOPEVENT(p, S_SCX, code);
873
874 PTRACESTOP_SC(p, td, S_PT_SCX);
875
876 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
877 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
878 mtx_assert(&sched_lock, MA_NOTOWNED);
879 mtx_assert(&Giant, MA_NOTOWNED);
880 }
Cache object: a641e014731698ef10f5d78183cb5728
|