FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD: releng/5.2/sys/amd64/amd64/trap.c 122940 2003-11-21 03:02:00Z peter $");
42
43 /*
44 * AMD64 Trap and System call handling
45 */
46
47 #include "opt_clock.h"
48 #include "opt_cpu.h"
49 #include "opt_ddb.h"
50 #include "opt_isa.h"
51 #include "opt_ktrace.h"
52
53 #include <sys/param.h>
54 #include <sys/bus.h>
55 #include <sys/systm.h>
56 #include <sys/proc.h>
57 #include <sys/pioctl.h>
58 #include <sys/ptrace.h>
59 #include <sys/kernel.h>
60 #include <sys/ktr.h>
61 #include <sys/lock.h>
62 #include <sys/mutex.h>
63 #include <sys/resourcevar.h>
64 #include <sys/signalvar.h>
65 #include <sys/syscall.h>
66 #include <sys/sysctl.h>
67 #include <sys/sysent.h>
68 #include <sys/uio.h>
69 #include <sys/vmmeter.h>
70 #ifdef KTRACE
71 #include <sys/ktrace.h>
72 #endif
73
74 #include <vm/vm.h>
75 #include <vm/vm_param.h>
76 #include <vm/pmap.h>
77 #include <vm/vm_kern.h>
78 #include <vm/vm_map.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_extern.h>
81
82 #include <machine/cpu.h>
83 #include <machine/intr_machdep.h>
84 #include <machine/md_var.h>
85 #include <machine/pcb.h>
86 #ifdef SMP
87 #include <machine/smp.h>
88 #endif
89 #include <machine/tss.h>
90
91 #include <ddb/ddb.h>
92
93 extern void trap(struct trapframe frame);
94 extern void syscall(struct trapframe frame);
95
96 static int trap_pfault(struct trapframe *, int);
97 static void trap_fatal(struct trapframe *, vm_offset_t);
98 void dblfault_handler(void);
99
100 #define MAX_TRAP_MSG 28
101 static char *trap_msg[] = {
102 "", /* 0 unused */
103 "privileged instruction fault", /* 1 T_PRIVINFLT */
104 "", /* 2 unused */
105 "breakpoint instruction fault", /* 3 T_BPTFLT */
106 "", /* 4 unused */
107 "", /* 5 unused */
108 "arithmetic trap", /* 6 T_ARITHTRAP */
109 "", /* 7 unused */
110 "", /* 8 unused */
111 "general protection fault", /* 9 T_PROTFLT */
112 "trace trap", /* 10 T_TRCTRAP */
113 "", /* 11 unused */
114 "page fault", /* 12 T_PAGEFLT */
115 "", /* 13 unused */
116 "alignment fault", /* 14 T_ALIGNFLT */
117 "", /* 15 unused */
118 "", /* 16 unused */
119 "", /* 17 unused */
120 "integer divide fault", /* 18 T_DIVIDE */
121 "non-maskable interrupt trap", /* 19 T_NMI */
122 "overflow trap", /* 20 T_OFLOW */
123 "FPU bounds check fault", /* 21 T_BOUND */
124 "FPU device not available", /* 22 T_DNA */
125 "double fault", /* 23 T_DOUBLEFLT */
126 "FPU operand fetch fault", /* 24 T_FPOPFLT */
127 "invalid TSS fault", /* 25 T_TSSFLT */
128 "segment not present fault", /* 26 T_SEGNPFLT */
129 "stack fault", /* 27 T_STKFLT */
130 "machine check trap", /* 28 T_MCHK */
131 };
132
133 #ifdef DDB
134 static int ddb_on_nmi = 1;
135 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW,
136 &ddb_on_nmi, 0, "Go to DDB on NMI");
137 #endif
138 static int panic_on_nmi = 1;
139 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
140 &panic_on_nmi, 0, "Panic on NMI");
141
142 #ifdef WITNESS
143 extern char *syscallnames[];
144 #endif
145
146 #ifdef DEVICE_POLLING
147 extern u_int32_t poll_in_trap;
148 extern int ether_poll(int count);
149 #endif /* DEVICE_POLLING */
150
151 /*
152 * Exception, fault, and trap interface to the FreeBSD kernel.
153 * This common code is called from assembly language IDT gate entry
154 * routines that prepare a suitable stack frame, and restore this
155 * frame after the exception has been processed.
156 */
157
158 void
159 trap(frame)
160 struct trapframe frame;
161 {
162 struct thread *td = curthread;
163 struct proc *p = td->td_proc;
164 u_int sticks = 0;
165 int i = 0, ucode = 0, type, code;
166
167 atomic_add_int(&cnt.v_trap, 1);
168 type = frame.tf_trapno;
169
170 #ifdef DDB
171 if (db_active) {
172 vm_offset_t eva;
173 eva = (type == T_PAGEFLT ? frame.tf_addr : 0);
174 trap_fatal(&frame, eva);
175 goto out;
176 }
177 #endif
178
179 if ((frame.tf_rflags & PSL_I) == 0) {
180 /*
181 * Buggy application or kernel code has disabled
182 * interrupts and then trapped. Enabling interrupts
183 * now is wrong, but it is better than running with
184 * interrupts disabled until they are accidentally
185 * enabled later.
186 */
187 if (ISPL(frame.tf_cs) == SEL_UPL)
188 printf(
189 "pid %ld (%s): trap %d with interrupts disabled\n",
190 (long)curproc->p_pid, curproc->p_comm, type);
191 else if (type != T_BPTFLT && type != T_TRCTRAP) {
192 /*
193 * XXX not quite right, since this may be for a
194 * multiple fault in user mode.
195 */
196 printf("kernel trap %d with interrupts disabled\n",
197 type);
198 /*
199 * We shouldn't enable interrupts while in a critical
200 * section.
201 */
202 if (td->td_critnest == 0)
203 enable_intr();
204 }
205 }
206
207 code = frame.tf_err;
208 if (type == T_PAGEFLT) {
209 /*
210 * If we get a page fault while in a critical section, then
211 * it is most likely a fatal kernel page fault. The kernel
212 * is already going to panic trying to get a sleep lock to
213 * do the VM lookup, so just consider it a fatal trap so the
214 * kernel can print out a useful trap message and even get
215 * to the debugger.
216 */
217 if (td->td_critnest != 0)
218 trap_fatal(&frame, frame.tf_addr);
219 }
220
221 #ifdef DEVICE_POLLING
222 if (poll_in_trap)
223 ether_poll(poll_in_trap);
224 #endif /* DEVICE_POLLING */
225
226 if (ISPL(frame.tf_cs) == SEL_UPL) {
227 /* user trap */
228
229 sticks = td->td_sticks;
230 td->td_frame = &frame;
231 if (td->td_ucred != p->p_ucred)
232 cred_update_thread(td);
233
234 switch (type) {
235 case T_PRIVINFLT: /* privileged instruction fault */
236 ucode = type;
237 i = SIGILL;
238 break;
239
240 case T_BPTFLT: /* bpt instruction fault */
241 case T_TRCTRAP: /* trace trap */
242 enable_intr();
243 frame.tf_rflags &= ~PSL_T;
244 i = SIGTRAP;
245 break;
246
247 case T_ARITHTRAP: /* arithmetic trap */
248 ucode = fputrap();
249 if (ucode == -1)
250 goto userout;
251 i = SIGFPE;
252 break;
253
254 case T_PROTFLT: /* general protection fault */
255 case T_STKFLT: /* stack fault */
256 case T_SEGNPFLT: /* segment not present fault */
257 case T_TSSFLT: /* invalid TSS fault */
258 case T_DOUBLEFLT: /* double fault */
259 default:
260 ucode = code + BUS_SEGM_FAULT ;
261 i = SIGBUS;
262 break;
263
264 case T_PAGEFLT: /* page fault */
265 if (td->td_flags & TDF_SA)
266 thread_user_enter(p, td);
267 i = trap_pfault(&frame, TRUE);
268 if (i == -1)
269 goto userout;
270 if (i == 0)
271 goto user;
272
273 ucode = T_PAGEFLT;
274 break;
275
276 case T_DIVIDE: /* integer divide fault */
277 ucode = FPE_INTDIV;
278 i = SIGFPE;
279 break;
280
281 #ifdef DEV_ISA
282 case T_NMI:
283 /* machine/parity/power fail/"kitchen sink" faults */
284 /* XXX Giant */
285 if (isa_nmi(code) == 0) {
286 #ifdef DDB
287 /*
288 * NMI can be hooked up to a pushbutton
289 * for debugging.
290 */
291 if (ddb_on_nmi) {
292 printf ("NMI ... going to debugger\n");
293 kdb_trap (type, 0, &frame);
294 }
295 #endif /* DDB */
296 goto userout;
297 } else if (panic_on_nmi)
298 panic("NMI indicates hardware failure");
299 break;
300 #endif /* DEV_ISA */
301
302 case T_OFLOW: /* integer overflow fault */
303 ucode = FPE_INTOVF;
304 i = SIGFPE;
305 break;
306
307 case T_BOUND: /* bounds check fault */
308 ucode = FPE_FLTSUB;
309 i = SIGFPE;
310 break;
311
312 case T_DNA:
313 /* transparent fault (due to context switch "late") */
314 if (fpudna())
315 goto userout;
316 i = SIGFPE;
317 ucode = FPE_FPU_NP_TRAP;
318 break;
319
320 case T_FPOPFLT: /* FPU operand fetch fault */
321 ucode = T_FPOPFLT;
322 i = SIGILL;
323 break;
324
325 case T_XMMFLT: /* SIMD floating-point exception */
326 ucode = 0; /* XXX */
327 i = SIGFPE;
328 break;
329 }
330 } else {
331 /* kernel trap */
332
333 KASSERT(cold || td->td_ucred != NULL,
334 ("kernel trap doesn't have ucred"));
335 switch (type) {
336 case T_PAGEFLT: /* page fault */
337 (void) trap_pfault(&frame, FALSE);
338 goto out;
339
340 case T_DNA:
341 /*
342 * The kernel is apparently using fpu for copying.
343 * XXX this should be fatal unless the kernel has
344 * registered such use.
345 */
346 if (fpudna()) {
347 printf("fpudna in kernel mode!\n");
348 goto out;
349 }
350 break;
351
352 case T_STKFLT: /* stack fault */
353 break;
354
355 case T_PROTFLT: /* general protection fault */
356 case T_SEGNPFLT: /* segment not present fault */
357 if (td->td_intr_nesting_level != 0)
358 break;
359
360 /*
361 * Invalid segment selectors and out of bounds
362 * %eip's and %esp's can be set up in user mode.
363 * This causes a fault in kernel mode when the
364 * kernel tries to return to user mode. We want
365 * to get this fault so that we can fix the
366 * problem here and not have to check all the
367 * selectors and pointers when the user changes
368 * them.
369 */
370 if (frame.tf_rip == (long)doreti_iret) {
371 frame.tf_rip = (long)doreti_iret_fault;
372 goto out;
373 }
374 if (PCPU_GET(curpcb) != NULL &&
375 PCPU_GET(curpcb)->pcb_onfault != NULL) {
376 frame.tf_rip =
377 (long)PCPU_GET(curpcb)->pcb_onfault;
378 goto out;
379 }
380 break;
381
382 case T_TSSFLT:
383 /*
384 * PSL_NT can be set in user mode and isn't cleared
385 * automatically when the kernel is entered. This
386 * causes a TSS fault when the kernel attempts to
387 * `iret' because the TSS link is uninitialized. We
388 * want to get this fault so that we can fix the
389 * problem here and not every time the kernel is
390 * entered.
391 */
392 if (frame.tf_rflags & PSL_NT) {
393 frame.tf_rflags &= ~PSL_NT;
394 goto out;
395 }
396 break;
397
398 case T_TRCTRAP: /* trace trap */
399 /*
400 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
401 */
402 case T_BPTFLT:
403 /*
404 * If DDB is enabled, let it handle the debugger trap.
405 * Otherwise, debugger traps "can't happen".
406 */
407 #ifdef DDB
408 /* XXX Giant */
409 if (kdb_trap (type, 0, &frame))
410 goto out;
411 #endif
412 break;
413
414 #ifdef DEV_ISA
415 case T_NMI:
416 /* XXX Giant */
417 /* machine/parity/power fail/"kitchen sink" faults */
418 if (isa_nmi(code) == 0) {
419 #ifdef DDB
420 /*
421 * NMI can be hooked up to a pushbutton
422 * for debugging.
423 */
424 if (ddb_on_nmi) {
425 printf ("NMI ... going to debugger\n");
426 kdb_trap (type, 0, &frame);
427 }
428 #endif /* DDB */
429 goto out;
430 } else if (panic_on_nmi == 0)
431 goto out;
432 /* FALLTHROUGH */
433 #endif /* DEV_ISA */
434 }
435
436 trap_fatal(&frame, 0);
437 goto out;
438 }
439
440 /* Translate fault for emulators (e.g. Linux) */
441 if (*p->p_sysent->sv_transtrap)
442 i = (*p->p_sysent->sv_transtrap)(i, type);
443
444 trapsignal(td, i, ucode);
445
446 #ifdef DEBUG
447 if (type <= MAX_TRAP_MSG) {
448 uprintf("fatal process exception: %s",
449 trap_msg[type]);
450 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
451 uprintf(", fault VA = 0x%lx", frame.tf_addr);
452 uprintf("\n");
453 }
454 #endif
455
456 user:
457 userret(td, &frame, sticks);
458 mtx_assert(&Giant, MA_NOTOWNED);
459 userout:
460 #ifdef DIAGNOSTIC
461 cred_free_thread(td);
462 #endif
463 out:
464 return;
465 }
466
467 static int
468 trap_pfault(frame, usermode)
469 struct trapframe *frame;
470 int usermode;
471 {
472 vm_offset_t va;
473 struct vmspace *vm = NULL;
474 vm_map_t map = 0;
475 int rv = 0;
476 vm_prot_t ftype;
477 struct thread *td = curthread;
478 struct proc *p = td->td_proc;
479 vm_offset_t eva = frame->tf_addr;
480
481 va = trunc_page(eva);
482 if (va >= KERNBASE) {
483 /*
484 * Don't allow user-mode faults in kernel address space.
485 */
486 if (usermode)
487 goto nogo;
488
489 map = kernel_map;
490 } else {
491 /*
492 * This is a fault on non-kernel virtual memory.
493 * vm is initialized above to NULL. If curproc is NULL
494 * or curproc->p_vmspace is NULL the fault is fatal.
495 */
496 if (p != NULL)
497 vm = p->p_vmspace;
498
499 if (vm == NULL)
500 goto nogo;
501
502 map = &vm->vm_map;
503 }
504
505 if (frame->tf_err & PGEX_W)
506 ftype = VM_PROT_WRITE;
507 else
508 ftype = VM_PROT_READ;
509
510 if (map != kernel_map) {
511 /*
512 * Keep swapout from messing with us during this
513 * critical time.
514 */
515 PROC_LOCK(p);
516 ++p->p_lock;
517 PROC_UNLOCK(p);
518
519 /* Fault in the user page: */
520 rv = vm_fault(map, va, ftype,
521 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
522 : VM_FAULT_NORMAL);
523
524 PROC_LOCK(p);
525 --p->p_lock;
526 PROC_UNLOCK(p);
527 } else {
528 /*
529 * Don't have to worry about process locking or stacks in the
530 * kernel.
531 */
532 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
533 }
534 if (rv == KERN_SUCCESS)
535 return (0);
536 nogo:
537 if (!usermode) {
538 if (td->td_intr_nesting_level == 0 &&
539 PCPU_GET(curpcb) != NULL &&
540 PCPU_GET(curpcb)->pcb_onfault != NULL) {
541 frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
542 return (0);
543 }
544 trap_fatal(frame, eva);
545 return (-1);
546 }
547
548 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
549 }
550
551 static void
552 trap_fatal(frame, eva)
553 struct trapframe *frame;
554 vm_offset_t eva;
555 {
556 int code, type, ss;
557 long esp;
558 struct soft_segment_descriptor softseg;
559
560 code = frame->tf_err;
561 type = frame->tf_trapno;
562 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
563
564 if (type <= MAX_TRAP_MSG)
565 printf("\n\nFatal trap %d: %s while in %s mode\n",
566 type, trap_msg[type],
567 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
568 #ifdef SMP
569 /* two separate prints in case of a trap on an unmapped page */
570 printf("cpuid = %d; ", PCPU_GET(cpuid));
571 printf("apic id = %02x\n", PCPU_GET(apic_id));
572 #endif
573 if (type == T_PAGEFLT) {
574 printf("fault virtual address = 0x%lx\n", eva);
575 printf("fault code = %s %s, %s\n",
576 code & PGEX_U ? "user" : "supervisor",
577 code & PGEX_W ? "write" : "read",
578 code & PGEX_P ? "protection violation" : "page not present");
579 }
580 printf("instruction pointer = 0x%lx:0x%lx\n",
581 frame->tf_cs & 0xffff, frame->tf_rip);
582 if (ISPL(frame->tf_cs) == SEL_UPL) {
583 ss = frame->tf_ss & 0xffff;
584 esp = frame->tf_rsp;
585 } else {
586 ss = GSEL(GDATA_SEL, SEL_KPL);
587 esp = (long)&frame->tf_rsp;
588 }
589 printf("stack pointer = 0x%x:0x%lx\n", ss, esp);
590 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
591 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
592 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
593 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
594 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
595 softseg.ssd_gran);
596 printf("processor eflags = ");
597 if (frame->tf_rflags & PSL_T)
598 printf("trace trap, ");
599 if (frame->tf_rflags & PSL_I)
600 printf("interrupt enabled, ");
601 if (frame->tf_rflags & PSL_NT)
602 printf("nested task, ");
603 if (frame->tf_rflags & PSL_RF)
604 printf("resume, ");
605 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
606 printf("current process = ");
607 if (curproc) {
608 printf("%lu (%s)\n",
609 (u_long)curproc->p_pid, curproc->p_comm ?
610 curproc->p_comm : "");
611 } else {
612 printf("Idle\n");
613 }
614
615 #ifdef KDB
616 if (kdb_trap(&psl))
617 return;
618 #endif
619 #ifdef DDB
620 if ((debugger_on_panic || db_active) && kdb_trap(type, 0, frame))
621 return;
622 #endif
623 printf("trap number = %d\n", type);
624 if (type <= MAX_TRAP_MSG)
625 panic("%s", trap_msg[type]);
626 else
627 panic("unknown/reserved trap");
628 }
629
630 /*
631 * Double fault handler. Called when a fault occurs while writing
632 * a frame for a trap/exception onto the stack. This usually occurs
633 * when the stack overflows (such is the case with infinite recursion,
634 * for example).
635 */
636 void
637 dblfault_handler()
638 {
639 printf("\nFatal double fault\n");
640 #ifdef SMP
641 /* two separate prints in case of a trap on an unmapped page */
642 printf("cpuid = %d; ", PCPU_GET(cpuid));
643 printf("apic id = %02x\n", PCPU_GET(apic_id));
644 #endif
645 panic("double fault");
646 }
647
648 /*
649 * syscall - system call request C handler
650 *
651 * A system call is essentially treated as a trap.
652 */
653 void
654 syscall(frame)
655 struct trapframe frame;
656 {
657 caddr_t params;
658 struct sysent *callp;
659 struct thread *td = curthread;
660 struct proc *p = td->td_proc;
661 register_t orig_tf_rflags;
662 u_int sticks;
663 int error;
664 int narg;
665 register_t args[8];
666 register_t *argp;
667 u_int code;
668 int reg, regcnt;
669
670 /*
671 * note: PCPU_LAZY_INC() can only be used if we can afford
672 * occassional inaccuracy in the count.
673 */
674 PCPU_LAZY_INC(cnt.v_syscall);
675
676 #ifdef DIAGNOSTIC
677 if (ISPL(frame.tf_cs) != SEL_UPL) {
678 mtx_lock(&Giant); /* try to stabilize the system XXX */
679 panic("syscall");
680 /* NOT REACHED */
681 mtx_unlock(&Giant);
682 }
683 #endif
684
685 reg = 0;
686 regcnt = 6;
687 sticks = td->td_sticks;
688 td->td_frame = &frame;
689 if (td->td_ucred != p->p_ucred)
690 cred_update_thread(td);
691 if (p->p_flag & P_SA)
692 thread_user_enter(p, td);
693 params = (caddr_t)frame.tf_rsp + sizeof(register_t);
694 code = frame.tf_rax;
695 orig_tf_rflags = frame.tf_rflags;
696
697 if (p->p_sysent->sv_prepsyscall) {
698 /*
699 * The prep code is MP aware.
700 */
701 (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, ¶ms);
702 } else {
703 if (code == SYS_syscall || code == SYS___syscall) {
704 code = frame.tf_rdi;
705 reg++;
706 regcnt--;
707 }
708 }
709
710 if (p->p_sysent->sv_mask)
711 code &= p->p_sysent->sv_mask;
712
713 if (code >= p->p_sysent->sv_size)
714 callp = &p->p_sysent->sv_table[0];
715 else
716 callp = &p->p_sysent->sv_table[code];
717
718 narg = callp->sy_narg & SYF_ARGMASK;
719
720 /*
721 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
722 */
723 if (narg <= regcnt) {
724 argp = &frame.tf_rdi;
725 argp += reg;
726 error = 0;
727 } else {
728 KASSERT(narg <= sizeof(args) / sizeof(args[0]),
729 ("Too many syscall arguments!"));
730 KASSERT(params != NULL, ("copyin args with no params!"));
731 argp = &frame.tf_rdi;
732 argp += reg;
733 bcopy(argp, args, sizeof(args[0]) * regcnt);
734 error = copyin(params, &args[regcnt],
735 (narg - regcnt) * sizeof(args[0]));
736 argp = &args[0];
737 }
738
739 #ifdef KTRACE
740 if (KTRPOINT(td, KTR_SYSCALL))
741 ktrsyscall(code, narg, argp);
742 #endif
743
744 /*
745 * Try to run the syscall without Giant if the syscall
746 * is MP safe.
747 */
748 if ((callp->sy_narg & SYF_MPSAFE) == 0)
749 mtx_lock(&Giant);
750
751 if (error == 0) {
752 td->td_retval[0] = 0;
753 td->td_retval[1] = frame.tf_rdx;
754
755 STOPEVENT(p, S_SCE, narg);
756
757 PTRACESTOP_SC(p, td, S_PT_SCE);
758
759 error = (*callp->sy_call)(td, argp);
760 }
761
762 switch (error) {
763 case 0:
764 frame.tf_rax = td->td_retval[0];
765 frame.tf_rdx = td->td_retval[1];
766 frame.tf_rflags &= ~PSL_C;
767 break;
768
769 case ERESTART:
770 /*
771 * Reconstruct pc, we know that 'syscall' is 2 bytes.
772 * We have to do a full context restore so that %r10
773 * (which was holding the value of %rcx) is restored for
774 * the next iteration.
775 */
776 frame.tf_rip -= frame.tf_err;
777 frame.tf_r10 = frame.tf_rcx;
778 td->td_pcb->pcb_flags |= PCB_FULLCTX;
779 break;
780
781 case EJUSTRETURN:
782 break;
783
784 default:
785 if (p->p_sysent->sv_errsize) {
786 if (error >= p->p_sysent->sv_errsize)
787 error = -1; /* XXX */
788 else
789 error = p->p_sysent->sv_errtbl[error];
790 }
791 frame.tf_rax = error;
792 frame.tf_rflags |= PSL_C;
793 break;
794 }
795
796 /*
797 * Release Giant if we previously set it.
798 */
799 if ((callp->sy_narg & SYF_MPSAFE) == 0)
800 mtx_unlock(&Giant);
801
802 /*
803 * Traced syscall.
804 */
805 if (orig_tf_rflags & PSL_T) {
806 frame.tf_rflags &= ~PSL_T;
807 trapsignal(td, SIGTRAP, 0);
808 }
809
810 /*
811 * Handle reschedule and other end-of-syscall issues
812 */
813 userret(td, &frame, sticks);
814
815 #ifdef KTRACE
816 if (KTRPOINT(td, KTR_SYSRET))
817 ktrsysret(code, error, td->td_retval[0]);
818 #endif
819
820 /*
821 * This works because errno is findable through the
822 * register set. If we ever support an emulation where this
823 * is not the case, this code will need to be revisited.
824 */
825 STOPEVENT(p, S_SCX, code);
826
827 PTRACESTOP_SC(p, td, S_PT_SCX);
828
829 #ifdef DIAGNOSTIC
830 cred_free_thread(td);
831 #endif
832 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
833 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
834 mtx_assert(&sched_lock, MA_NOTOWNED);
835 mtx_assert(&Giant, MA_NOTOWNED);
836 }
Cache object: 1d7f76662bfcdb8d6af29c72c7591c37
|