FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 /*
44 * AMD64 Trap and System call handling
45 */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_isa.h"
50 #include "opt_ktrace.h"
51
52 #include <sys/param.h>
53 #include <sys/bus.h>
54 #include <sys/systm.h>
55 #include <sys/proc.h>
56 #include <sys/pioctl.h>
57 #include <sys/ptrace.h>
58 #include <sys/kdb.h>
59 #include <sys/kernel.h>
60 #include <sys/ktr.h>
61 #include <sys/lock.h>
62 #include <sys/mutex.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
65 #include <sys/syscall.h>
66 #include <sys/sysctl.h>
67 #include <sys/sysent.h>
68 #include <sys/uio.h>
69 #include <sys/vmmeter.h>
70 #ifdef KTRACE
71 #include <sys/ktrace.h>
72 #endif
73
74 #include <vm/vm.h>
75 #include <vm/vm_param.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_extern.h>
81
82 #include <machine/cpu.h>
83 #include <machine/intr_machdep.h>
84 #include <machine/md_var.h>
85 #include <machine/pcb.h>
86 #ifdef SMP
87 #include <machine/smp.h>
88 #endif
89 #include <machine/tss.h>
90
91 extern void trap(struct trapframe frame);
92 extern void syscall(struct trapframe frame);
93
94 static int trap_pfault(struct trapframe *, int);
95 static void trap_fatal(struct trapframe *, vm_offset_t);
96 void dblfault_handler(void);
97
98 #define MAX_TRAP_MSG 30
99 static char *trap_msg[] = {
100 "", /* 0 unused */
101 "privileged instruction fault", /* 1 T_PRIVINFLT */
102 "", /* 2 unused */
103 "breakpoint instruction fault", /* 3 T_BPTFLT */
104 "", /* 4 unused */
105 "", /* 5 unused */
106 "arithmetic trap", /* 6 T_ARITHTRAP */
107 "", /* 7 unused */
108 "", /* 8 unused */
109 "general protection fault", /* 9 T_PROTFLT */
110 "trace trap", /* 10 T_TRCTRAP */
111 "", /* 11 unused */
112 "page fault", /* 12 T_PAGEFLT */
113 "", /* 13 unused */
114 "alignment fault", /* 14 T_ALIGNFLT */
115 "", /* 15 unused */
116 "", /* 16 unused */
117 "", /* 17 unused */
118 "integer divide fault", /* 18 T_DIVIDE */
119 "non-maskable interrupt trap", /* 19 T_NMI */
120 "overflow trap", /* 20 T_OFLOW */
121 "FPU bounds check fault", /* 21 T_BOUND */
122 "FPU device not available", /* 22 T_DNA */
123 "double fault", /* 23 T_DOUBLEFLT */
124 "FPU operand fetch fault", /* 24 T_FPOPFLT */
125 "invalid TSS fault", /* 25 T_TSSFLT */
126 "segment not present fault", /* 26 T_SEGNPFLT */
127 "stack fault", /* 27 T_STKFLT */
128 "machine check trap", /* 28 T_MCHK */
129 "SIMD floating-point exception", /* 29 T_XMMFLT */
130 "reserved (unknown) fault", /* 30 T_RESERVED */
131 };
132
133 #ifdef KDB
134 static int kdb_on_nmi = 1;
135 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
136 &kdb_on_nmi, 0, "Go to KDB on NMI");
137 #endif
138 static int panic_on_nmi = 1;
139 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
140 &panic_on_nmi, 0, "Panic on NMI");
141
142 #ifdef WITNESS
143 extern char *syscallnames[];
144 #endif
145
146 #ifdef DEVICE_POLLING
147 extern u_int32_t poll_in_trap;
148 extern int ether_poll(int count);
149 #endif /* DEVICE_POLLING */
150
151 /*
152 * Exception, fault, and trap interface to the FreeBSD kernel.
153 * This common code is called from assembly language IDT gate entry
154 * routines that prepare a suitable stack frame, and restore this
155 * frame after the exception has been processed.
156 */
157
158 void
159 trap(frame)
160 struct trapframe frame;
161 {
162 struct thread *td = curthread;
163 struct proc *p = td->td_proc;
164 u_int sticks = 0;
165 int i = 0, ucode = 0, type, code;
166
167 atomic_add_int(&cnt.v_trap, 1);
168 type = frame.tf_trapno;
169
170 #ifdef KDB_STOP_NMI
171 /* Handler for NMI IPIs used for debugging */
172 if (type == T_NMI) {
173 if (ipi_nmi_handler() == 0)
174 goto out;
175 }
176 #endif /* KDB_STOP_NMI */
177
178 #ifdef KDB
179 if (kdb_active) {
180 kdb_reenter();
181 goto out;
182 }
183 #endif
184
185 if ((frame.tf_rflags & PSL_I) == 0) {
186 /*
187 * Buggy application or kernel code has disabled
188 * interrupts and then trapped. Enabling interrupts
189 * now is wrong, but it is better than running with
190 * interrupts disabled until they are accidentally
191 * enabled later.
192 */
193 if (ISPL(frame.tf_cs) == SEL_UPL)
194 printf(
195 "pid %ld (%s): trap %d with interrupts disabled\n",
196 (long)curproc->p_pid, curproc->p_comm, type);
197 else if (type != T_BPTFLT && type != T_TRCTRAP) {
198 /*
199 * XXX not quite right, since this may be for a
200 * multiple fault in user mode.
201 */
202 printf("kernel trap %d with interrupts disabled\n",
203 type);
204 /*
205 * We shouldn't enable interrupts while in a critical
206 * section.
207 */
208 if (td->td_critnest == 0)
209 enable_intr();
210 }
211 }
212
213 code = frame.tf_err;
214 if (type == T_PAGEFLT) {
215 /*
216 * If we get a page fault while in a critical section, then
217 * it is most likely a fatal kernel page fault. The kernel
218 * is already going to panic trying to get a sleep lock to
219 * do the VM lookup, so just consider it a fatal trap so the
220 * kernel can print out a useful trap message and even get
221 * to the debugger.
222 */
223 if (td->td_critnest != 0)
224 trap_fatal(&frame, frame.tf_addr);
225 }
226
227 #ifdef DEVICE_POLLING
228 if (poll_in_trap)
229 ether_poll(poll_in_trap);
230 #endif /* DEVICE_POLLING */
231
232 if (ISPL(frame.tf_cs) == SEL_UPL) {
233 /* user trap */
234
235 sticks = td->td_sticks;
236 td->td_frame = &frame;
237 if (td->td_ucred != p->p_ucred)
238 cred_update_thread(td);
239
240 switch (type) {
241 case T_PRIVINFLT: /* privileged instruction fault */
242 ucode = type;
243 i = SIGILL;
244 break;
245
246 case T_BPTFLT: /* bpt instruction fault */
247 case T_TRCTRAP: /* trace trap */
248 enable_intr();
249 frame.tf_rflags &= ~PSL_T;
250 i = SIGTRAP;
251 break;
252
253 case T_ARITHTRAP: /* arithmetic trap */
254 ucode = fputrap();
255 if (ucode == -1)
256 goto userout;
257 i = SIGFPE;
258 break;
259
260 case T_PROTFLT: /* general protection fault */
261 case T_STKFLT: /* stack fault */
262 case T_SEGNPFLT: /* segment not present fault */
263 case T_TSSFLT: /* invalid TSS fault */
264 case T_DOUBLEFLT: /* double fault */
265 default:
266 ucode = code + BUS_SEGM_FAULT ;
267 i = SIGBUS;
268 break;
269
270 case T_PAGEFLT: /* page fault */
271 if (td->td_pflags & TDP_SA)
272 thread_user_enter(td);
273 i = trap_pfault(&frame, TRUE);
274 if (i == -1)
275 goto userout;
276 if (i == 0)
277 goto user;
278
279 ucode = T_PAGEFLT;
280 break;
281
282 case T_DIVIDE: /* integer divide fault */
283 ucode = FPE_INTDIV;
284 i = SIGFPE;
285 break;
286
287 #ifdef DEV_ISA
288 case T_NMI:
289 /* machine/parity/power fail/"kitchen sink" faults */
290 /* XXX Giant */
291 if (isa_nmi(code) == 0) {
292 #ifdef KDB
293 /*
294 * NMI can be hooked up to a pushbutton
295 * for debugging.
296 */
297 if (kdb_on_nmi) {
298 printf ("NMI ... going to debugger\n");
299 kdb_trap(type, 0, &frame);
300 }
301 #endif /* KDB */
302 goto userout;
303 } else if (panic_on_nmi)
304 panic("NMI indicates hardware failure");
305 break;
306 #endif /* DEV_ISA */
307
308 case T_OFLOW: /* integer overflow fault */
309 ucode = FPE_INTOVF;
310 i = SIGFPE;
311 break;
312
313 case T_BOUND: /* bounds check fault */
314 ucode = FPE_FLTSUB;
315 i = SIGFPE;
316 break;
317
318 case T_DNA:
319 /* transparent fault (due to context switch "late") */
320 if (fpudna())
321 goto userout;
322 i = SIGFPE;
323 ucode = FPE_FPU_NP_TRAP;
324 break;
325
326 case T_FPOPFLT: /* FPU operand fetch fault */
327 ucode = T_FPOPFLT;
328 i = SIGILL;
329 break;
330
331 case T_XMMFLT: /* SIMD floating-point exception */
332 ucode = 0; /* XXX */
333 i = SIGFPE;
334 break;
335 }
336 } else {
337 /* kernel trap */
338
339 KASSERT(cold || td->td_ucred != NULL,
340 ("kernel trap doesn't have ucred"));
341 switch (type) {
342 case T_PAGEFLT: /* page fault */
343 (void) trap_pfault(&frame, FALSE);
344 goto out;
345
346 case T_DNA:
347 /*
348 * The kernel is apparently using fpu for copying.
349 * XXX this should be fatal unless the kernel has
350 * registered such use.
351 */
352 if (fpudna()) {
353 printf("fpudna in kernel mode!\n");
354 goto out;
355 }
356 break;
357
358 case T_STKFLT: /* stack fault */
359 break;
360
361 case T_PROTFLT: /* general protection fault */
362 case T_SEGNPFLT: /* segment not present fault */
363 if (td->td_intr_nesting_level != 0)
364 break;
365
366 /*
367 * Invalid segment selectors and out of bounds
368 * %eip's and %esp's can be set up in user mode.
369 * This causes a fault in kernel mode when the
370 * kernel tries to return to user mode. We want
371 * to get this fault so that we can fix the
372 * problem here and not have to check all the
373 * selectors and pointers when the user changes
374 * them.
375 */
376 if (frame.tf_rip == (long)doreti_iret) {
377 frame.tf_rip = (long)doreti_iret_fault;
378 goto out;
379 }
380 if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
381 frame.tf_rip =
382 (long)PCPU_GET(curpcb)->pcb_onfault;
383 goto out;
384 }
385 break;
386
387 case T_TSSFLT:
388 /*
389 * PSL_NT can be set in user mode and isn't cleared
390 * automatically when the kernel is entered. This
391 * causes a TSS fault when the kernel attempts to
392 * `iret' because the TSS link is uninitialized. We
393 * want to get this fault so that we can fix the
394 * problem here and not every time the kernel is
395 * entered.
396 */
397 if (frame.tf_rflags & PSL_NT) {
398 frame.tf_rflags &= ~PSL_NT;
399 goto out;
400 }
401 break;
402
403 case T_TRCTRAP: /* trace trap */
404 /*
405 * Ignore debug register trace traps due to
406 * accesses in the user's address space, which
407 * can happen under several conditions such as
408 * if a user sets a watchpoint on a buffer and
409 * then passes that buffer to a system call.
410 * We still want to get TRCTRAPS for addresses
411 * in kernel space because that is useful when
412 * debugging the kernel.
413 */
414 if (user_dbreg_trap()) {
415 /*
416 * Reset breakpoint bits because the
417 * processor doesn't
418 */
419 /* XXX check upper bits here */
420 load_dr6(rdr6() & 0xfffffff0);
421 goto out;
422 }
423 /*
424 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
425 */
426 case T_BPTFLT:
427 /*
428 * If KDB is enabled, let it handle the debugger trap.
429 * Otherwise, debugger traps "can't happen".
430 */
431 #ifdef KDB
432 /* XXX Giant */
433 if (kdb_trap(type, 0, &frame))
434 goto out;
435 #endif
436 break;
437
438 #ifdef DEV_ISA
439 case T_NMI:
440 /* XXX Giant */
441 /* machine/parity/power fail/"kitchen sink" faults */
442 if (isa_nmi(code) == 0) {
443 #ifdef KDB
444 /*
445 * NMI can be hooked up to a pushbutton
446 * for debugging.
447 */
448 if (kdb_on_nmi) {
449 printf ("NMI ... going to debugger\n");
450 kdb_trap(type, 0, &frame);
451 }
452 #endif /* KDB */
453 goto out;
454 } else if (panic_on_nmi == 0)
455 goto out;
456 /* FALLTHROUGH */
457 #endif /* DEV_ISA */
458 }
459
460 trap_fatal(&frame, 0);
461 goto out;
462 }
463
464 /* Translate fault for emulators (e.g. Linux) */
465 if (*p->p_sysent->sv_transtrap)
466 i = (*p->p_sysent->sv_transtrap)(i, type);
467
468 trapsignal(td, i, ucode);
469
470 #ifdef DEBUG
471 if (type <= MAX_TRAP_MSG) {
472 uprintf("fatal process exception: %s",
473 trap_msg[type]);
474 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
475 uprintf(", fault VA = 0x%lx", frame.tf_addr);
476 uprintf("\n");
477 }
478 #endif
479
480 user:
481 userret(td, &frame, sticks);
482 mtx_assert(&Giant, MA_NOTOWNED);
483 userout:
484 out:
485 return;
486 }
487
488 static int
489 trap_pfault(frame, usermode)
490 struct trapframe *frame;
491 int usermode;
492 {
493 vm_offset_t va;
494 struct vmspace *vm = NULL;
495 vm_map_t map = 0;
496 int rv = 0;
497 vm_prot_t ftype;
498 struct thread *td = curthread;
499 struct proc *p = td->td_proc;
500 vm_offset_t eva = frame->tf_addr;
501
502 va = trunc_page(eva);
503 if (va >= KERNBASE) {
504 /*
505 * Don't allow user-mode faults in kernel address space.
506 */
507 if (usermode)
508 goto nogo;
509
510 map = kernel_map;
511 } else {
512 /*
513 * This is a fault on non-kernel virtual memory.
514 * vm is initialized above to NULL. If curproc is NULL
515 * or curproc->p_vmspace is NULL the fault is fatal.
516 */
517 if (p != NULL)
518 vm = p->p_vmspace;
519
520 if (vm == NULL)
521 goto nogo;
522
523 map = &vm->vm_map;
524 }
525
526 if (frame->tf_err & PGEX_W)
527 ftype = VM_PROT_WRITE;
528 else
529 ftype = VM_PROT_READ;
530
531 if (map != kernel_map) {
532 /*
533 * Keep swapout from messing with us during this
534 * critical time.
535 */
536 PROC_LOCK(p);
537 ++p->p_lock;
538 PROC_UNLOCK(p);
539
540 /* Fault in the user page: */
541 rv = vm_fault(map, va, ftype,
542 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
543 : VM_FAULT_NORMAL);
544
545 PROC_LOCK(p);
546 --p->p_lock;
547 PROC_UNLOCK(p);
548 } else {
549 /*
550 * Don't have to worry about process locking or stacks in the
551 * kernel.
552 */
553 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
554 }
555 if (rv == KERN_SUCCESS)
556 return (0);
557 nogo:
558 if (!usermode) {
559 if (td->td_intr_nesting_level == 0 &&
560 PCPU_GET(curpcb)->pcb_onfault != NULL) {
561 frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
562 return (0);
563 }
564 trap_fatal(frame, eva);
565 return (-1);
566 }
567
568 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
569 }
570
571 static void
572 trap_fatal(frame, eva)
573 struct trapframe *frame;
574 vm_offset_t eva;
575 {
576 int code, type, ss;
577 long esp;
578 struct soft_segment_descriptor softseg;
579 char *msg;
580
581 code = frame->tf_err;
582 type = frame->tf_trapno;
583 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
584
585 if (type <= MAX_TRAP_MSG)
586 msg = trap_msg[type];
587 else
588 msg = "UNKNOWN";
589 printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
590 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
591 #ifdef SMP
592 /* two separate prints in case of a trap on an unmapped page */
593 printf("cpuid = %d; ", PCPU_GET(cpuid));
594 printf("apic id = %02x\n", PCPU_GET(apic_id));
595 #endif
596 if (type == T_PAGEFLT) {
597 printf("fault virtual address = 0x%lx\n", eva);
598 printf("fault code = %s %s, %s\n",
599 code & PGEX_U ? "user" : "supervisor",
600 code & PGEX_W ? "write" : "read",
601 code & PGEX_P ? "protection violation" : "page not present");
602 }
603 printf("instruction pointer = 0x%lx:0x%lx\n",
604 frame->tf_cs & 0xffff, frame->tf_rip);
605 if (ISPL(frame->tf_cs) == SEL_UPL) {
606 ss = frame->tf_ss & 0xffff;
607 esp = frame->tf_rsp;
608 } else {
609 ss = GSEL(GDATA_SEL, SEL_KPL);
610 esp = (long)&frame->tf_rsp;
611 }
612 printf("stack pointer = 0x%x:0x%lx\n", ss, esp);
613 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
614 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
615 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
616 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
617 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
618 softseg.ssd_gran);
619 printf("processor eflags = ");
620 if (frame->tf_rflags & PSL_T)
621 printf("trace trap, ");
622 if (frame->tf_rflags & PSL_I)
623 printf("interrupt enabled, ");
624 if (frame->tf_rflags & PSL_NT)
625 printf("nested task, ");
626 if (frame->tf_rflags & PSL_RF)
627 printf("resume, ");
628 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
629 printf("current process = ");
630 if (curproc) {
631 printf("%lu (%s)\n",
632 (u_long)curproc->p_pid, curproc->p_comm ?
633 curproc->p_comm : "");
634 } else {
635 printf("Idle\n");
636 }
637
638 #ifdef KDB
639 if (kdb_trap(type, 0, frame))
640 return;
641 #endif
642 printf("trap number = %d\n", type);
643 if (type <= MAX_TRAP_MSG)
644 panic("%s", trap_msg[type]);
645 else
646 panic("unknown/reserved trap");
647 }
648
649 /*
650 * Double fault handler. Called when a fault occurs while writing
651 * a frame for a trap/exception onto the stack. This usually occurs
652 * when the stack overflows (such is the case with infinite recursion,
653 * for example).
654 */
655 void
656 dblfault_handler()
657 {
658 printf("\nFatal double fault\n");
659 #ifdef SMP
660 /* two separate prints in case of a trap on an unmapped page */
661 printf("cpuid = %d; ", PCPU_GET(cpuid));
662 printf("apic id = %02x\n", PCPU_GET(apic_id));
663 #endif
664 panic("double fault");
665 }
666
667 /*
668 * syscall - system call request C handler
669 *
670 * A system call is essentially treated as a trap.
671 */
672 void
673 syscall(frame)
674 struct trapframe frame;
675 {
676 caddr_t params;
677 struct sysent *callp;
678 struct thread *td = curthread;
679 struct proc *p = td->td_proc;
680 register_t orig_tf_rflags;
681 u_int sticks;
682 int error;
683 int narg;
684 register_t args[8];
685 register_t *argp;
686 u_int code;
687 int reg, regcnt;
688
689 /*
690 * note: PCPU_LAZY_INC() can only be used if we can afford
691 * occassional inaccuracy in the count.
692 */
693 PCPU_LAZY_INC(cnt.v_syscall);
694
695 #ifdef DIAGNOSTIC
696 if (ISPL(frame.tf_cs) != SEL_UPL) {
697 mtx_lock(&Giant); /* try to stabilize the system XXX */
698 panic("syscall");
699 /* NOT REACHED */
700 mtx_unlock(&Giant);
701 }
702 #endif
703
704 reg = 0;
705 regcnt = 6;
706 sticks = td->td_sticks;
707 td->td_frame = &frame;
708 if (td->td_ucred != p->p_ucred)
709 cred_update_thread(td);
710 if (p->p_flag & P_SA)
711 thread_user_enter(td);
712 params = (caddr_t)frame.tf_rsp + sizeof(register_t);
713 code = frame.tf_rax;
714 orig_tf_rflags = frame.tf_rflags;
715
716 if (p->p_sysent->sv_prepsyscall) {
717 /*
718 * The prep code is MP aware.
719 */
720 (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, ¶ms);
721 } else {
722 if (code == SYS_syscall || code == SYS___syscall) {
723 code = frame.tf_rdi;
724 reg++;
725 regcnt--;
726 }
727 }
728
729 if (p->p_sysent->sv_mask)
730 code &= p->p_sysent->sv_mask;
731
732 if (code >= p->p_sysent->sv_size)
733 callp = &p->p_sysent->sv_table[0];
734 else
735 callp = &p->p_sysent->sv_table[code];
736
737 narg = callp->sy_narg & SYF_ARGMASK;
738
739 /*
740 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
741 */
742 KASSERT(narg <= sizeof(args) / sizeof(args[0]),
743 ("Too many syscall arguments!"));
744 error = 0;
745 argp = &frame.tf_rdi;
746 argp += reg;
747 bcopy(argp, args, sizeof(args[0]) * regcnt);
748 if (narg > regcnt) {
749 KASSERT(params != NULL, ("copyin args with no params!"));
750 error = copyin(params, &args[regcnt],
751 (narg - regcnt) * sizeof(args[0]));
752 }
753 argp = &args[0];
754
755 #ifdef KTRACE
756 if (KTRPOINT(td, KTR_SYSCALL))
757 ktrsyscall(code, narg, argp);
758 #endif
759
760 CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
761 td->td_proc->p_pid, td->td_proc->p_comm, code);
762
763 if (error == 0) {
764 td->td_retval[0] = 0;
765 td->td_retval[1] = frame.tf_rdx;
766
767 STOPEVENT(p, S_SCE, narg);
768
769 PTRACESTOP_SC(p, td, S_PT_SCE);
770
771 if ((callp->sy_narg & SYF_MPSAFE) == 0) {
772 mtx_lock(&Giant);
773 error = (*callp->sy_call)(td, argp);
774 mtx_unlock(&Giant);
775 } else
776 error = (*callp->sy_call)(td, argp);
777 }
778
779 switch (error) {
780 case 0:
781 frame.tf_rax = td->td_retval[0];
782 frame.tf_rdx = td->td_retval[1];
783 frame.tf_rflags &= ~PSL_C;
784 break;
785
786 case ERESTART:
787 /*
788 * Reconstruct pc, we know that 'syscall' is 2 bytes.
789 * We have to do a full context restore so that %r10
790 * (which was holding the value of %rcx) is restored for
791 * the next iteration.
792 */
793 frame.tf_rip -= frame.tf_err;
794 frame.tf_r10 = frame.tf_rcx;
795 td->td_pcb->pcb_flags |= PCB_FULLCTX;
796 break;
797
798 case EJUSTRETURN:
799 break;
800
801 default:
802 if (p->p_sysent->sv_errsize) {
803 if (error >= p->p_sysent->sv_errsize)
804 error = -1; /* XXX */
805 else
806 error = p->p_sysent->sv_errtbl[error];
807 }
808 frame.tf_rax = error;
809 frame.tf_rflags |= PSL_C;
810 break;
811 }
812
813 /*
814 * Traced syscall.
815 */
816 if (orig_tf_rflags & PSL_T) {
817 frame.tf_rflags &= ~PSL_T;
818 trapsignal(td, SIGTRAP, 0);
819 }
820
821 /*
822 * Handle reschedule and other end-of-syscall issues
823 */
824 userret(td, &frame, sticks);
825
826 CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
827 td->td_proc->p_pid, td->td_proc->p_comm, code);
828
829 #ifdef KTRACE
830 if (KTRPOINT(td, KTR_SYSRET))
831 ktrsysret(code, error, td->td_retval[0]);
832 #endif
833
834 /*
835 * This works because errno is findable through the
836 * register set. If we ever support an emulation where this
837 * is not the case, this code will need to be revisited.
838 */
839 STOPEVENT(p, S_SCX, code);
840
841 PTRACESTOP_SC(p, td, S_PT_SCX);
842
843 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
844 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
845 mtx_assert(&sched_lock, MA_NOTOWNED);
846 mtx_assert(&Giant, MA_NOTOWNED);
847 }
Cache object: 8eff4f1116982cfd711ed8352e9cca7c
|