FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/6.0/sys/amd64/amd64/trap.c 151061 2005-10-07 14:00:06Z glebius $");
42
43 /*
44 * AMD64 Trap and System call handling
45 */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_hwpmc_hooks.h"
50 #include "opt_isa.h"
51 #include "opt_kdb.h"
52 #include "opt_ktrace.h"
53
54 #include <sys/param.h>
55 #include <sys/bus.h>
56 #include <sys/systm.h>
57 #include <sys/proc.h>
58 #include <sys/pioctl.h>
59 #include <sys/ptrace.h>
60 #include <sys/kdb.h>
61 #include <sys/kernel.h>
62 #include <sys/ktr.h>
63 #include <sys/lock.h>
64 #include <sys/mutex.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/syscall.h>
68 #include <sys/sysctl.h>
69 #include <sys/sysent.h>
70 #include <sys/uio.h>
71 #include <sys/vmmeter.h>
72 #ifdef KTRACE
73 #include <sys/ktrace.h>
74 #endif
75 #ifdef HWPMC_HOOKS
76 #include <sys/pmckern.h>
77 #endif
78
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_kern.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_extern.h>
86
87 #include <machine/cpu.h>
88 #include <machine/intr_machdep.h>
89 #include <machine/md_var.h>
90 #include <machine/pcb.h>
91 #ifdef SMP
92 #include <machine/smp.h>
93 #endif
94 #include <machine/tss.h>
95
96 extern void trap(struct trapframe frame);
97 extern void syscall(struct trapframe frame);
98
99 static int trap_pfault(struct trapframe *, int);
100 static void trap_fatal(struct trapframe *, vm_offset_t);
101 void dblfault_handler(void);
102
103 #define MAX_TRAP_MSG 28
104 static char *trap_msg[] = {
105 "", /* 0 unused */
106 "privileged instruction fault", /* 1 T_PRIVINFLT */
107 "", /* 2 unused */
108 "breakpoint instruction fault", /* 3 T_BPTFLT */
109 "", /* 4 unused */
110 "", /* 5 unused */
111 "arithmetic trap", /* 6 T_ARITHTRAP */
112 "", /* 7 unused */
113 "", /* 8 unused */
114 "general protection fault", /* 9 T_PROTFLT */
115 "trace trap", /* 10 T_TRCTRAP */
116 "", /* 11 unused */
117 "page fault", /* 12 T_PAGEFLT */
118 "", /* 13 unused */
119 "alignment fault", /* 14 T_ALIGNFLT */
120 "", /* 15 unused */
121 "", /* 16 unused */
122 "", /* 17 unused */
123 "integer divide fault", /* 18 T_DIVIDE */
124 "non-maskable interrupt trap", /* 19 T_NMI */
125 "overflow trap", /* 20 T_OFLOW */
126 "FPU bounds check fault", /* 21 T_BOUND */
127 "FPU device not available", /* 22 T_DNA */
128 "double fault", /* 23 T_DOUBLEFLT */
129 "FPU operand fetch fault", /* 24 T_FPOPFLT */
130 "invalid TSS fault", /* 25 T_TSSFLT */
131 "segment not present fault", /* 26 T_SEGNPFLT */
132 "stack fault", /* 27 T_STKFLT */
133 "machine check trap", /* 28 T_MCHK */
134 };
135
136 #ifdef KDB
137 static int kdb_on_nmi = 1;
138 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
139 &kdb_on_nmi, 0, "Go to KDB on NMI");
140 #endif
141 static int panic_on_nmi = 1;
142 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
143 &panic_on_nmi, 0, "Panic on NMI");
144
145 #ifdef WITNESS
146 extern char *syscallnames[];
147 #endif
148
149 /*
150 * Exception, fault, and trap interface to the FreeBSD kernel.
151 * This common code is called from assembly language IDT gate entry
152 * routines that prepare a suitable stack frame, and restore this
153 * frame after the exception has been processed.
154 */
155
156 void
157 trap(frame)
158 struct trapframe frame;
159 {
160 struct thread *td = curthread;
161 struct proc *p = td->td_proc;
162 u_int sticks = 0;
163 int i = 0, ucode = 0, type, code;
164
165 PCPU_LAZY_INC(cnt.v_trap);
166 type = frame.tf_trapno;
167
168 #ifdef KDB_STOP_NMI
169 /* Handler for NMI IPIs used for debugging */
170 if (type == T_NMI) {
171 if (ipi_nmi_handler() == 0)
172 goto out;
173 }
174 #endif /* KDB_STOP_NMI */
175
176 #ifdef KDB
177 if (kdb_active) {
178 kdb_reenter();
179 goto out;
180 }
181 #endif
182
183 #ifdef HWPMC_HOOKS
184 /*
185 * CPU PMCs interrupt using an NMI. If the PMC module is
186 * active, pass the 'rip' value to the PMC module's interrupt
187 * handler. A return value of '1' from the handler means that
188 * the NMI was handled by it and we can return immediately.
189 */
190 if (type == T_NMI && pmc_intr &&
191 (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame.tf_rip,
192 TRAPF_USERMODE(&frame)))
193 goto out;
194 #endif
195
196 if ((frame.tf_rflags & PSL_I) == 0) {
197 /*
198 * Buggy application or kernel code has disabled
199 * interrupts and then trapped. Enabling interrupts
200 * now is wrong, but it is better than running with
201 * interrupts disabled until they are accidentally
202 * enabled later.
203 */
204 if (ISPL(frame.tf_cs) == SEL_UPL)
205 printf(
206 "pid %ld (%s): trap %d with interrupts disabled\n",
207 (long)curproc->p_pid, curproc->p_comm, type);
208 else if (type != T_NMI && type != T_BPTFLT &&
209 type != T_TRCTRAP) {
210 /*
211 * XXX not quite right, since this may be for a
212 * multiple fault in user mode.
213 */
214 printf("kernel trap %d with interrupts disabled\n",
215 type);
216 /*
217 * We shouldn't enable interrupts while in a critical
218 * section.
219 */
220 if (td->td_critnest == 0)
221 enable_intr();
222 }
223 }
224
225 code = frame.tf_err;
226 if (type == T_PAGEFLT) {
227 /*
228 * If we get a page fault while in a critical section, then
229 * it is most likely a fatal kernel page fault. The kernel
230 * is already going to panic trying to get a sleep lock to
231 * do the VM lookup, so just consider it a fatal trap so the
232 * kernel can print out a useful trap message and even get
233 * to the debugger.
234 */
235 if (td->td_critnest != 0)
236 trap_fatal(&frame, frame.tf_addr);
237 }
238
239 if (ISPL(frame.tf_cs) == SEL_UPL) {
240 /* user trap */
241
242 sticks = td->td_sticks;
243 td->td_frame = &frame;
244 if (td->td_ucred != p->p_ucred)
245 cred_update_thread(td);
246
247 switch (type) {
248 case T_PRIVINFLT: /* privileged instruction fault */
249 ucode = type;
250 i = SIGILL;
251 break;
252
253 case T_BPTFLT: /* bpt instruction fault */
254 case T_TRCTRAP: /* trace trap */
255 enable_intr();
256 frame.tf_rflags &= ~PSL_T;
257 i = SIGTRAP;
258 break;
259
260 case T_ARITHTRAP: /* arithmetic trap */
261 ucode = fputrap();
262 if (ucode == -1)
263 goto userout;
264 i = SIGFPE;
265 break;
266
267 case T_PROTFLT: /* general protection fault */
268 case T_STKFLT: /* stack fault */
269 case T_SEGNPFLT: /* segment not present fault */
270 case T_TSSFLT: /* invalid TSS fault */
271 case T_DOUBLEFLT: /* double fault */
272 default:
273 ucode = code + BUS_SEGM_FAULT ;
274 i = SIGBUS;
275 break;
276
277 case T_PAGEFLT: /* page fault */
278 if (td->td_pflags & TDP_SA)
279 thread_user_enter(td);
280 i = trap_pfault(&frame, TRUE);
281 if (i == -1)
282 goto userout;
283 if (i == 0)
284 goto user;
285
286 ucode = T_PAGEFLT;
287 break;
288
289 case T_DIVIDE: /* integer divide fault */
290 ucode = FPE_INTDIV;
291 i = SIGFPE;
292 break;
293
294 #ifdef DEV_ISA
295 case T_NMI:
296 /* machine/parity/power fail/"kitchen sink" faults */
297 /* XXX Giant */
298 if (isa_nmi(code) == 0) {
299 #ifdef KDB
300 /*
301 * NMI can be hooked up to a pushbutton
302 * for debugging.
303 */
304 if (kdb_on_nmi) {
305 printf ("NMI ... going to debugger\n");
306 kdb_trap(type, 0, &frame);
307 }
308 #endif /* KDB */
309 goto userout;
310 } else if (panic_on_nmi)
311 panic("NMI indicates hardware failure");
312 break;
313 #endif /* DEV_ISA */
314
315 case T_OFLOW: /* integer overflow fault */
316 ucode = FPE_INTOVF;
317 i = SIGFPE;
318 break;
319
320 case T_BOUND: /* bounds check fault */
321 ucode = FPE_FLTSUB;
322 i = SIGFPE;
323 break;
324
325 case T_DNA:
326 /* transparent fault (due to context switch "late") */
327 if (fpudna())
328 goto userout;
329 i = SIGFPE;
330 ucode = FPE_FPU_NP_TRAP;
331 break;
332
333 case T_FPOPFLT: /* FPU operand fetch fault */
334 ucode = T_FPOPFLT;
335 i = SIGILL;
336 break;
337
338 case T_XMMFLT: /* SIMD floating-point exception */
339 ucode = 0; /* XXX */
340 i = SIGFPE;
341 break;
342 }
343 } else {
344 /* kernel trap */
345
346 KASSERT(cold || td->td_ucred != NULL,
347 ("kernel trap doesn't have ucred"));
348 switch (type) {
349 case T_PAGEFLT: /* page fault */
350 (void) trap_pfault(&frame, FALSE);
351 goto out;
352
353 case T_DNA:
354 /*
355 * The kernel is apparently using fpu for copying.
356 * XXX this should be fatal unless the kernel has
357 * registered such use.
358 */
359 if (fpudna()) {
360 printf("fpudna in kernel mode!\n");
361 goto out;
362 }
363 break;
364
365 case T_STKFLT: /* stack fault */
366 break;
367
368 case T_PROTFLT: /* general protection fault */
369 case T_SEGNPFLT: /* segment not present fault */
370 if (td->td_intr_nesting_level != 0)
371 break;
372
373 /*
374 * Invalid segment selectors and out of bounds
375 * %rip's and %rsp's can be set up in user mode.
376 * This causes a fault in kernel mode when the
377 * kernel tries to return to user mode. We want
378 * to get this fault so that we can fix the
379 * problem here and not have to check all the
380 * selectors and pointers when the user changes
381 * them.
382 */
383 if (frame.tf_rip == (long)doreti_iret) {
384 frame.tf_rip = (long)doreti_iret_fault;
385 goto out;
386 }
387 if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
388 frame.tf_rip =
389 (long)PCPU_GET(curpcb)->pcb_onfault;
390 goto out;
391 }
392 break;
393
394 case T_TSSFLT:
395 /*
396 * PSL_NT can be set in user mode and isn't cleared
397 * automatically when the kernel is entered. This
398 * causes a TSS fault when the kernel attempts to
399 * `iret' because the TSS link is uninitialized. We
400 * want to get this fault so that we can fix the
401 * problem here and not every time the kernel is
402 * entered.
403 */
404 if (frame.tf_rflags & PSL_NT) {
405 frame.tf_rflags &= ~PSL_NT;
406 goto out;
407 }
408 break;
409
410 case T_TRCTRAP: /* trace trap */
411 /*
412 * Ignore debug register trace traps due to
413 * accesses in the user's address space, which
414 * can happen under several conditions such as
415 * if a user sets a watchpoint on a buffer and
416 * then passes that buffer to a system call.
417 * We still want to get TRCTRAPS for addresses
418 * in kernel space because that is useful when
419 * debugging the kernel.
420 */
421 if (user_dbreg_trap()) {
422 /*
423 * Reset breakpoint bits because the
424 * processor doesn't
425 */
426 /* XXX check upper bits here */
427 load_dr6(rdr6() & 0xfffffff0);
428 goto out;
429 }
430 /*
431 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
432 */
433 case T_BPTFLT:
434 /*
435 * If KDB is enabled, let it handle the debugger trap.
436 * Otherwise, debugger traps "can't happen".
437 */
438 #ifdef KDB
439 /* XXX Giant */
440 if (kdb_trap(type, 0, &frame))
441 goto out;
442 #endif
443 break;
444
445 #ifdef DEV_ISA
446 case T_NMI:
447 /* XXX Giant */
448 /* machine/parity/power fail/"kitchen sink" faults */
449 if (isa_nmi(code) == 0) {
450 #ifdef KDB
451 /*
452 * NMI can be hooked up to a pushbutton
453 * for debugging.
454 */
455 if (kdb_on_nmi) {
456 printf ("NMI ... going to debugger\n");
457 kdb_trap(type, 0, &frame);
458 }
459 #endif /* KDB */
460 goto out;
461 } else if (panic_on_nmi == 0)
462 goto out;
463 /* FALLTHROUGH */
464 #endif /* DEV_ISA */
465 }
466
467 trap_fatal(&frame, 0);
468 goto out;
469 }
470
471 /* Translate fault for emulators (e.g. Linux) */
472 if (*p->p_sysent->sv_transtrap)
473 i = (*p->p_sysent->sv_transtrap)(i, type);
474
475 trapsignal(td, i, ucode);
476
477 #ifdef DEBUG
478 if (type <= MAX_TRAP_MSG) {
479 uprintf("fatal process exception: %s",
480 trap_msg[type]);
481 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
482 uprintf(", fault VA = 0x%lx", frame.tf_addr);
483 uprintf("\n");
484 }
485 #endif
486
487 user:
488 userret(td, &frame, sticks);
489 mtx_assert(&Giant, MA_NOTOWNED);
490 userout:
491 out:
492 return;
493 }
494
495 static int
496 trap_pfault(frame, usermode)
497 struct trapframe *frame;
498 int usermode;
499 {
500 vm_offset_t va;
501 struct vmspace *vm = NULL;
502 vm_map_t map = 0;
503 int rv = 0;
504 vm_prot_t ftype;
505 struct thread *td = curthread;
506 struct proc *p = td->td_proc;
507 vm_offset_t eva = frame->tf_addr;
508
509 va = trunc_page(eva);
510 if (va >= KERNBASE) {
511 /*
512 * Don't allow user-mode faults in kernel address space.
513 */
514 if (usermode)
515 goto nogo;
516
517 map = kernel_map;
518 } else {
519 /*
520 * This is a fault on non-kernel virtual memory.
521 * vm is initialized above to NULL. If curproc is NULL
522 * or curproc->p_vmspace is NULL the fault is fatal.
523 */
524 if (p != NULL)
525 vm = p->p_vmspace;
526
527 if (vm == NULL)
528 goto nogo;
529
530 map = &vm->vm_map;
531 }
532
533 if (frame->tf_err & PGEX_W)
534 ftype = VM_PROT_WRITE;
535 else
536 ftype = VM_PROT_READ;
537
538 if (map != kernel_map) {
539 /*
540 * Keep swapout from messing with us during this
541 * critical time.
542 */
543 PROC_LOCK(p);
544 ++p->p_lock;
545 PROC_UNLOCK(p);
546
547 /* Fault in the user page: */
548 rv = vm_fault(map, va, ftype,
549 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
550 : VM_FAULT_NORMAL);
551
552 PROC_LOCK(p);
553 --p->p_lock;
554 PROC_UNLOCK(p);
555 } else {
556 /*
557 * Don't have to worry about process locking or stacks in the
558 * kernel.
559 */
560 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
561 }
562 if (rv == KERN_SUCCESS)
563 return (0);
564 nogo:
565 if (!usermode) {
566 if (td->td_intr_nesting_level == 0 &&
567 PCPU_GET(curpcb)->pcb_onfault != NULL) {
568 frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
569 return (0);
570 }
571 trap_fatal(frame, eva);
572 return (-1);
573 }
574
575 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
576 }
577
578 static void
579 trap_fatal(frame, eva)
580 struct trapframe *frame;
581 vm_offset_t eva;
582 {
583 int code, type, ss;
584 long esp;
585 struct soft_segment_descriptor softseg;
586
587 code = frame->tf_err;
588 type = frame->tf_trapno;
589 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
590
591 if (type <= MAX_TRAP_MSG)
592 printf("\n\nFatal trap %d: %s while in %s mode\n",
593 type, trap_msg[type],
594 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
595 #ifdef SMP
596 /* two separate prints in case of a trap on an unmapped page */
597 printf("cpuid = %d; ", PCPU_GET(cpuid));
598 printf("apic id = %02x\n", PCPU_GET(apic_id));
599 #endif
600 if (type == T_PAGEFLT) {
601 printf("fault virtual address = 0x%lx\n", eva);
602 printf("fault code = %s %s, %s\n",
603 code & PGEX_U ? "user" : "supervisor",
604 code & PGEX_W ? "write" : "read",
605 code & PGEX_P ? "protection violation" : "page not present");
606 }
607 printf("instruction pointer = 0x%lx:0x%lx\n",
608 frame->tf_cs & 0xffff, frame->tf_rip);
609 if (ISPL(frame->tf_cs) == SEL_UPL) {
610 ss = frame->tf_ss & 0xffff;
611 esp = frame->tf_rsp;
612 } else {
613 ss = GSEL(GDATA_SEL, SEL_KPL);
614 esp = (long)&frame->tf_rsp;
615 }
616 printf("stack pointer = 0x%x:0x%lx\n", ss, esp);
617 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
618 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
619 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
620 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
621 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
622 softseg.ssd_gran);
623 printf("processor eflags = ");
624 if (frame->tf_rflags & PSL_T)
625 printf("trace trap, ");
626 if (frame->tf_rflags & PSL_I)
627 printf("interrupt enabled, ");
628 if (frame->tf_rflags & PSL_NT)
629 printf("nested task, ");
630 if (frame->tf_rflags & PSL_RF)
631 printf("resume, ");
632 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
633 printf("current process = ");
634 if (curproc) {
635 printf("%lu (%s)\n",
636 (u_long)curproc->p_pid, curproc->p_comm ?
637 curproc->p_comm : "");
638 } else {
639 printf("Idle\n");
640 }
641
642 #ifdef KDB
643 if (debugger_on_panic || kdb_active) {
644 register_t rflags;
645 rflags = intr_disable();
646 if (kdb_trap(type, 0, frame)) {
647 intr_restore(rflags);
648 return;
649 }
650 intr_restore(rflags);
651 }
652 #endif
653 printf("trap number = %d\n", type);
654 if (type <= MAX_TRAP_MSG)
655 panic("%s", trap_msg[type]);
656 else
657 panic("unknown/reserved trap");
658 }
659
660 /*
661 * Double fault handler. Called when a fault occurs while writing
662 * a frame for a trap/exception onto the stack. This usually occurs
663 * when the stack overflows (such is the case with infinite recursion,
664 * for example).
665 */
666 void
667 dblfault_handler()
668 {
669 printf("\nFatal double fault\n");
670 #ifdef SMP
671 /* two separate prints in case of a trap on an unmapped page */
672 printf("cpuid = %d; ", PCPU_GET(cpuid));
673 printf("apic id = %02x\n", PCPU_GET(apic_id));
674 #endif
675 panic("double fault");
676 }
677
678 /*
679 * syscall - system call request C handler
680 *
681 * A system call is essentially treated as a trap.
682 */
683 void
684 syscall(frame)
685 struct trapframe frame;
686 {
687 caddr_t params;
688 struct sysent *callp;
689 struct thread *td = curthread;
690 struct proc *p = td->td_proc;
691 register_t orig_tf_rflags;
692 u_int sticks;
693 int error;
694 int narg;
695 register_t args[8];
696 register_t *argp;
697 u_int code;
698 int reg, regcnt;
699
700 /*
701 * note: PCPU_LAZY_INC() can only be used if we can afford
702 * occassional inaccuracy in the count.
703 */
704 PCPU_LAZY_INC(cnt.v_syscall);
705
706 #ifdef DIAGNOSTIC
707 if (ISPL(frame.tf_cs) != SEL_UPL) {
708 mtx_lock(&Giant); /* try to stabilize the system XXX */
709 panic("syscall");
710 /* NOT REACHED */
711 mtx_unlock(&Giant);
712 }
713 #endif
714
715 reg = 0;
716 regcnt = 6;
717 sticks = td->td_sticks;
718 td->td_frame = &frame;
719 if (td->td_ucred != p->p_ucred)
720 cred_update_thread(td);
721 if (p->p_flag & P_SA)
722 thread_user_enter(td);
723 params = (caddr_t)frame.tf_rsp + sizeof(register_t);
724 code = frame.tf_rax;
725 orig_tf_rflags = frame.tf_rflags;
726
727 if (p->p_sysent->sv_prepsyscall) {
728 /*
729 * The prep code is MP aware.
730 */
731 (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, ¶ms);
732 } else {
733 if (code == SYS_syscall || code == SYS___syscall) {
734 code = frame.tf_rdi;
735 reg++;
736 regcnt--;
737 }
738 }
739
740 if (p->p_sysent->sv_mask)
741 code &= p->p_sysent->sv_mask;
742
743 if (code >= p->p_sysent->sv_size)
744 callp = &p->p_sysent->sv_table[0];
745 else
746 callp = &p->p_sysent->sv_table[code];
747
748 narg = callp->sy_narg & SYF_ARGMASK;
749
750 /*
751 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
752 */
753 KASSERT(narg <= sizeof(args) / sizeof(args[0]),
754 ("Too many syscall arguments!"));
755 error = 0;
756 argp = &frame.tf_rdi;
757 argp += reg;
758 bcopy(argp, args, sizeof(args[0]) * regcnt);
759 if (narg > regcnt) {
760 KASSERT(params != NULL, ("copyin args with no params!"));
761 error = copyin(params, &args[regcnt],
762 (narg - regcnt) * sizeof(args[0]));
763 }
764 argp = &args[0];
765
766 #ifdef KTRACE
767 if (KTRPOINT(td, KTR_SYSCALL))
768 ktrsyscall(code, narg, argp);
769 #endif
770
771 CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
772 td->td_proc->p_pid, td->td_proc->p_comm, code);
773
774 if (error == 0) {
775 td->td_retval[0] = 0;
776 td->td_retval[1] = frame.tf_rdx;
777
778 STOPEVENT(p, S_SCE, narg);
779
780 PTRACESTOP_SC(p, td, S_PT_SCE);
781
782 if ((callp->sy_narg & SYF_MPSAFE) == 0) {
783 mtx_lock(&Giant);
784 error = (*callp->sy_call)(td, argp);
785 mtx_unlock(&Giant);
786 } else
787 error = (*callp->sy_call)(td, argp);
788 }
789
790 switch (error) {
791 case 0:
792 frame.tf_rax = td->td_retval[0];
793 frame.tf_rdx = td->td_retval[1];
794 frame.tf_rflags &= ~PSL_C;
795 break;
796
797 case ERESTART:
798 /*
799 * Reconstruct pc, we know that 'syscall' is 2 bytes.
800 * We have to do a full context restore so that %r10
801 * (which was holding the value of %rcx) is restored for
802 * the next iteration.
803 */
804 frame.tf_rip -= frame.tf_err;
805 frame.tf_r10 = frame.tf_rcx;
806 td->td_pcb->pcb_flags |= PCB_FULLCTX;
807 break;
808
809 case EJUSTRETURN:
810 break;
811
812 default:
813 if (p->p_sysent->sv_errsize) {
814 if (error >= p->p_sysent->sv_errsize)
815 error = -1; /* XXX */
816 else
817 error = p->p_sysent->sv_errtbl[error];
818 }
819 frame.tf_rax = error;
820 frame.tf_rflags |= PSL_C;
821 break;
822 }
823
824 /*
825 * Traced syscall.
826 */
827 if (orig_tf_rflags & PSL_T) {
828 frame.tf_rflags &= ~PSL_T;
829 trapsignal(td, SIGTRAP, 0);
830 }
831
832 /*
833 * Handle reschedule and other end-of-syscall issues
834 */
835 userret(td, &frame, sticks);
836
837 CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
838 td->td_proc->p_pid, td->td_proc->p_comm, code);
839
840 #ifdef KTRACE
841 if (KTRPOINT(td, KTR_SYSRET))
842 ktrsysret(code, error, td->td_retval[0]);
843 #endif
844
845 /*
846 * This works because errno is findable through the
847 * register set. If we ever support an emulation where this
848 * is not the case, this code will need to be revisited.
849 */
850 STOPEVENT(p, S_SCX, code);
851
852 PTRACESTOP_SC(p, td, S_PT_SCX);
853
854 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
855 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
856 mtx_assert(&sched_lock, MA_NOTOWNED);
857 mtx_assert(&Giant, MA_NOTOWNED);
858 }
Cache object: 5318723c16ed31e76bb4477da49c293e
|