FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c
1 /*-
2 * Copyright (C) 1994, David Greenman
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * the University of Utah, and William Jolitz.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91
38 * $FreeBSD: releng/5.1/sys/amd64/amd64/trap.c 115737 2003-06-02 21:57:08Z peter $
39 */
40
41 /*
42 * AMD64 Trap and System call handling
43 */
44
45 #include "opt_clock.h"
46 #include "opt_cpu.h"
47 #include "opt_ddb.h"
48 #include "opt_isa.h"
49 #include "opt_ktrace.h"
50
51 #include <sys/param.h>
52 #include <sys/bus.h>
53 #include <sys/systm.h>
54 #include <sys/proc.h>
55 #include <sys/pioctl.h>
56 #include <sys/kernel.h>
57 #include <sys/ktr.h>
58 #include <sys/lock.h>
59 #include <sys/mutex.h>
60 #include <sys/resourcevar.h>
61 #include <sys/signalvar.h>
62 #include <sys/syscall.h>
63 #include <sys/sysctl.h>
64 #include <sys/sysent.h>
65 #include <sys/uio.h>
66 #include <sys/vmmeter.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_param.h>
73 #include <vm/pmap.h>
74 #include <vm/vm_kern.h>
75 #include <vm/vm_map.h>
76 #include <vm/vm_page.h>
77 #include <vm/vm_extern.h>
78
79 #include <machine/cpu.h>
80 #include <machine/md_var.h>
81 #include <machine/pcb.h>
82 #include <machine/tss.h>
83
84 #include <amd64/isa/icu.h>
85 #include <amd64/isa/intr_machdep.h>
86
87 #include <ddb/ddb.h>
88
89 #include <sys/sysctl.h>
90
91 extern void trap(struct trapframe frame);
92 extern void syscall(struct trapframe frame);
93
94 static int trap_pfault(struct trapframe *, int);
95 static void trap_fatal(struct trapframe *, vm_offset_t);
96 void dblfault_handler(void);
97
98 #define MAX_TRAP_MSG 28
99 static char *trap_msg[] = {
100 "", /* 0 unused */
101 "privileged instruction fault", /* 1 T_PRIVINFLT */
102 "", /* 2 unused */
103 "breakpoint instruction fault", /* 3 T_BPTFLT */
104 "", /* 4 unused */
105 "", /* 5 unused */
106 "arithmetic trap", /* 6 T_ARITHTRAP */
107 "", /* 7 unused */
108 "", /* 8 unused */
109 "general protection fault", /* 9 T_PROTFLT */
110 "trace trap", /* 10 T_TRCTRAP */
111 "", /* 11 unused */
112 "page fault", /* 12 T_PAGEFLT */
113 "", /* 13 unused */
114 "alignment fault", /* 14 T_ALIGNFLT */
115 "", /* 15 unused */
116 "", /* 16 unused */
117 "", /* 17 unused */
118 "integer divide fault", /* 18 T_DIVIDE */
119 "non-maskable interrupt trap", /* 19 T_NMI */
120 "overflow trap", /* 20 T_OFLOW */
121 "FPU bounds check fault", /* 21 T_BOUND */
122 "FPU device not available", /* 22 T_DNA */
123 "double fault", /* 23 T_DOUBLEFLT */
124 "FPU operand fetch fault", /* 24 T_FPOPFLT */
125 "invalid TSS fault", /* 25 T_TSSFLT */
126 "segment not present fault", /* 26 T_SEGNPFLT */
127 "stack fault", /* 27 T_STKFLT */
128 "machine check trap", /* 28 T_MCHK */
129 };
130
131 #ifdef DDB
132 static int ddb_on_nmi = 1;
133 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW,
134 &ddb_on_nmi, 0, "Go to DDB on NMI");
135 #endif
136 static int panic_on_nmi = 1;
137 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
138 &panic_on_nmi, 0, "Panic on NMI");
139
140 #ifdef WITNESS
141 extern char *syscallnames[];
142 #endif
143
144 #ifdef DEVICE_POLLING
145 extern u_int32_t poll_in_trap;
146 extern int ether_poll(int count);
147 #endif /* DEVICE_POLLING */
148
149 /*
150 * Exception, fault, and trap interface to the FreeBSD kernel.
151 * This common code is called from assembly language IDT gate entry
152 * routines that prepare a suitable stack frame, and restore this
153 * frame after the exception has been processed.
154 */
155
156 void
157 trap(frame)
158 struct trapframe frame;
159 {
160 struct thread *td = curthread;
161 struct proc *p = td->td_proc;
162 u_int sticks = 0;
163 int i = 0, ucode = 0, type, code;
164
165 atomic_add_int(&cnt.v_trap, 1);
166 type = frame.tf_trapno;
167
168 #ifdef DDB
169 if (db_active) {
170 vm_offset_t eva;
171 eva = (type == T_PAGEFLT ? frame.tf_addr : 0);
172 trap_fatal(&frame, eva);
173 goto out;
174 }
175 #endif
176
177 if ((frame.tf_rflags & PSL_I) == 0) {
178 /*
179 * Buggy application or kernel code has disabled
180 * interrupts and then trapped. Enabling interrupts
181 * now is wrong, but it is better than running with
182 * interrupts disabled until they are accidentally
183 * enabled later.
184 */
185 if (ISPL(frame.tf_cs) == SEL_UPL)
186 printf(
187 "pid %ld (%s): trap %d with interrupts disabled\n",
188 (long)curproc->p_pid, curproc->p_comm, type);
189 else if (type != T_BPTFLT && type != T_TRCTRAP) {
190 /*
191 * XXX not quite right, since this may be for a
192 * multiple fault in user mode.
193 */
194 printf("kernel trap %d with interrupts disabled\n",
195 type);
196 /*
197 * We shouldn't enable interrupts while in a critical
198 * section.
199 */
200 if (td->td_critnest == 0)
201 enable_intr();
202 }
203 }
204
205 code = frame.tf_err;
206 if (type == T_PAGEFLT) {
207 /*
208 * If we get a page fault while in a critical section, then
209 * it is most likely a fatal kernel page fault. The kernel
210 * is already going to panic trying to get a sleep lock to
211 * do the VM lookup, so just consider it a fatal trap so the
212 * kernel can print out a useful trap message and even get
213 * to the debugger.
214 */
215 if (td->td_critnest != 0)
216 trap_fatal(&frame, frame.tf_addr);
217 }
218
219 #ifdef DEVICE_POLLING
220 if (poll_in_trap)
221 ether_poll(poll_in_trap);
222 #endif /* DEVICE_POLLING */
223
224 if (ISPL(frame.tf_cs) == SEL_UPL) {
225 /* user trap */
226
227 sticks = td->td_sticks;
228 td->td_frame = &frame;
229 if (td->td_ucred != p->p_ucred)
230 cred_update_thread(td);
231
232 switch (type) {
233 case T_PRIVINFLT: /* privileged instruction fault */
234 ucode = type;
235 i = SIGILL;
236 break;
237
238 case T_BPTFLT: /* bpt instruction fault */
239 case T_TRCTRAP: /* trace trap */
240 frame.tf_rflags &= ~PSL_T;
241 i = SIGTRAP;
242 break;
243
244 case T_ARITHTRAP: /* arithmetic trap */
245 ucode = npxtrap();
246 if (ucode == -1)
247 goto userout;
248 i = SIGFPE;
249 break;
250
251 case T_PROTFLT: /* general protection fault */
252 case T_STKFLT: /* stack fault */
253 case T_SEGNPFLT: /* segment not present fault */
254 case T_TSSFLT: /* invalid TSS fault */
255 case T_DOUBLEFLT: /* double fault */
256 default:
257 ucode = code + BUS_SEGM_FAULT ;
258 i = SIGBUS;
259 break;
260
261 case T_PAGEFLT: /* page fault */
262 i = trap_pfault(&frame, TRUE);
263 if (i == -1)
264 goto userout;
265 if (i == 0)
266 goto user;
267
268 ucode = T_PAGEFLT;
269 break;
270
271 case T_DIVIDE: /* integer divide fault */
272 ucode = FPE_INTDIV;
273 i = SIGFPE;
274 break;
275
276 #ifdef DEV_ISA
277 case T_NMI:
278 /* machine/parity/power fail/"kitchen sink" faults */
279 /* XXX Giant */
280 if (isa_nmi(code) == 0) {
281 #ifdef DDB
282 /*
283 * NMI can be hooked up to a pushbutton
284 * for debugging.
285 */
286 if (ddb_on_nmi) {
287 printf ("NMI ... going to debugger\n");
288 kdb_trap (type, 0, &frame);
289 }
290 #endif /* DDB */
291 goto userout;
292 } else if (panic_on_nmi)
293 panic("NMI indicates hardware failure");
294 break;
295 #endif /* DEV_ISA */
296
297 case T_OFLOW: /* integer overflow fault */
298 ucode = FPE_INTOVF;
299 i = SIGFPE;
300 break;
301
302 case T_BOUND: /* bounds check fault */
303 ucode = FPE_FLTSUB;
304 i = SIGFPE;
305 break;
306
307 case T_DNA:
308 /* transparent fault (due to context switch "late") */
309 if (npxdna())
310 goto userout;
311 i = SIGFPE;
312 ucode = FPE_FPU_NP_TRAP;
313 break;
314
315 case T_FPOPFLT: /* FPU operand fetch fault */
316 ucode = T_FPOPFLT;
317 i = SIGILL;
318 break;
319
320 case T_XMMFLT: /* SIMD floating-point exception */
321 ucode = 0; /* XXX */
322 i = SIGFPE;
323 break;
324 }
325 } else {
326 /* kernel trap */
327
328 KASSERT(cold || td->td_ucred != NULL,
329 ("kernel trap doesn't have ucred"));
330 switch (type) {
331 case T_PAGEFLT: /* page fault */
332 (void) trap_pfault(&frame, FALSE);
333 goto out;
334
335 case T_DNA:
336 /*
337 * The kernel is apparently using npx for copying.
338 * XXX this should be fatal unless the kernel has
339 * registered such use.
340 */
341 if (npxdna()) {
342 printf("npxdna in kernel mode!\n");
343 goto out;
344 }
345 break;
346
347 case T_STKFLT: /* stack fault */
348 break;
349
350 case T_PROTFLT: /* general protection fault */
351 case T_SEGNPFLT: /* segment not present fault */
352 if (td->td_intr_nesting_level != 0)
353 break;
354
355 /*
356 * Invalid segment selectors and out of bounds
357 * %eip's and %esp's can be set up in user mode.
358 * This causes a fault in kernel mode when the
359 * kernel tries to return to user mode. We want
360 * to get this fault so that we can fix the
361 * problem here and not have to check all the
362 * selectors and pointers when the user changes
363 * them.
364 */
365 if (frame.tf_rip == (long)doreti_iret) {
366 frame.tf_rip = (long)doreti_iret_fault;
367 goto out;
368 }
369 if (PCPU_GET(curpcb) != NULL &&
370 PCPU_GET(curpcb)->pcb_onfault != NULL) {
371 frame.tf_rip =
372 (long)PCPU_GET(curpcb)->pcb_onfault;
373 goto out;
374 }
375 break;
376
377 case T_TSSFLT:
378 /*
379 * PSL_NT can be set in user mode and isn't cleared
380 * automatically when the kernel is entered. This
381 * causes a TSS fault when the kernel attempts to
382 * `iret' because the TSS link is uninitialized. We
383 * want to get this fault so that we can fix the
384 * problem here and not every time the kernel is
385 * entered.
386 */
387 if (frame.tf_rflags & PSL_NT) {
388 frame.tf_rflags &= ~PSL_NT;
389 goto out;
390 }
391 break;
392
393 case T_TRCTRAP: /* trace trap */
394 /*
395 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
396 */
397 case T_BPTFLT:
398 /*
399 * If DDB is enabled, let it handle the debugger trap.
400 * Otherwise, debugger traps "can't happen".
401 */
402 #ifdef DDB
403 /* XXX Giant */
404 if (kdb_trap (type, 0, &frame))
405 goto out;
406 #endif
407 break;
408
409 #ifdef DEV_ISA
410 case T_NMI:
411 /* XXX Giant */
412 /* machine/parity/power fail/"kitchen sink" faults */
413 if (isa_nmi(code) == 0) {
414 #ifdef DDB
415 /*
416 * NMI can be hooked up to a pushbutton
417 * for debugging.
418 */
419 if (ddb_on_nmi) {
420 printf ("NMI ... going to debugger\n");
421 kdb_trap (type, 0, &frame);
422 }
423 #endif /* DDB */
424 goto out;
425 } else if (panic_on_nmi == 0)
426 goto out;
427 /* FALLTHROUGH */
428 #endif /* DEV_ISA */
429 }
430
431 trap_fatal(&frame, 0);
432 goto out;
433 }
434
435 /* Translate fault for emulators (e.g. Linux) */
436 if (*p->p_sysent->sv_transtrap)
437 i = (*p->p_sysent->sv_transtrap)(i, type);
438
439 trapsignal(td, i, ucode);
440
441 #ifdef DEBUG
442 if (type <= MAX_TRAP_MSG) {
443 uprintf("fatal process exception: %s",
444 trap_msg[type]);
445 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
446 uprintf(", fault VA = 0x%lx", frame.tf_addr);
447 uprintf("\n");
448 }
449 #endif
450
451 user:
452 userret(td, &frame, sticks);
453 mtx_assert(&Giant, MA_NOTOWNED);
454 userout:
455 #ifdef DIAGNOSTIC
456 cred_free_thread(td);
457 #endif
458 out:
459 return;
460 }
461
462 static int
463 trap_pfault(frame, usermode)
464 struct trapframe *frame;
465 int usermode;
466 {
467 vm_offset_t va;
468 struct vmspace *vm = NULL;
469 vm_map_t map = 0;
470 int rv = 0;
471 vm_prot_t ftype;
472 struct thread *td = curthread;
473 struct proc *p = td->td_proc;
474 vm_offset_t eva = frame->tf_addr;
475
476 va = trunc_page(eva);
477 if (va >= KERNBASE) {
478 /*
479 * Don't allow user-mode faults in kernel address space.
480 */
481 if (usermode)
482 goto nogo;
483
484 map = kernel_map;
485 } else {
486 /*
487 * This is a fault on non-kernel virtual memory.
488 * vm is initialized above to NULL. If curproc is NULL
489 * or curproc->p_vmspace is NULL the fault is fatal.
490 */
491 if (p != NULL)
492 vm = p->p_vmspace;
493
494 if (vm == NULL)
495 goto nogo;
496
497 map = &vm->vm_map;
498 }
499
500 if (frame->tf_err & PGEX_W)
501 ftype = VM_PROT_WRITE;
502 else
503 ftype = VM_PROT_READ;
504
505 if (map != kernel_map) {
506 /*
507 * Keep swapout from messing with us during this
508 * critical time.
509 */
510 PROC_LOCK(p);
511 ++p->p_lock;
512 PROC_UNLOCK(p);
513
514 /* Fault in the user page: */
515 rv = vm_fault(map, va, ftype,
516 (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
517 : VM_FAULT_NORMAL);
518
519 PROC_LOCK(p);
520 --p->p_lock;
521 PROC_UNLOCK(p);
522 } else {
523 /*
524 * Don't have to worry about process locking or stacks in the
525 * kernel.
526 */
527 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
528 }
529 if (rv == KERN_SUCCESS)
530 return (0);
531 nogo:
532 if (!usermode) {
533 if (td->td_intr_nesting_level == 0 &&
534 PCPU_GET(curpcb) != NULL &&
535 PCPU_GET(curpcb)->pcb_onfault != NULL) {
536 frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
537 return (0);
538 }
539 trap_fatal(frame, eva);
540 return (-1);
541 }
542
543 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
544 }
545
546 static void
547 trap_fatal(frame, eva)
548 struct trapframe *frame;
549 vm_offset_t eva;
550 {
551 int code, type, ss;
552 long esp;
553 struct soft_segment_descriptor softseg;
554
555 code = frame->tf_err;
556 type = frame->tf_trapno;
557 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
558
559 if (type <= MAX_TRAP_MSG)
560 printf("\n\nFatal trap %d: %s while in %s mode\n",
561 type, trap_msg[type],
562 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
563 if (type == T_PAGEFLT) {
564 printf("fault virtual address = 0x%lx\n", eva);
565 printf("fault code = %s %s, %s\n",
566 code & PGEX_U ? "user" : "supervisor",
567 code & PGEX_W ? "write" : "read",
568 code & PGEX_P ? "protection violation" : "page not present");
569 }
570 printf("instruction pointer = 0x%lx:0x%lx\n",
571 frame->tf_cs & 0xffff, frame->tf_rip);
572 if (ISPL(frame->tf_cs) == SEL_UPL) {
573 ss = frame->tf_ss & 0xffff;
574 esp = frame->tf_rsp;
575 } else {
576 ss = GSEL(GDATA_SEL, SEL_KPL);
577 esp = (long)&frame->tf_rsp;
578 }
579 printf("stack pointer = 0x%x:0x%lx\n", ss, esp);
580 printf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp);
581 printf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n",
582 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
583 printf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
584 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
585 softseg.ssd_gran);
586 printf("processor eflags = ");
587 if (frame->tf_rflags & PSL_T)
588 printf("trace trap, ");
589 if (frame->tf_rflags & PSL_I)
590 printf("interrupt enabled, ");
591 if (frame->tf_rflags & PSL_NT)
592 printf("nested task, ");
593 if (frame->tf_rflags & PSL_RF)
594 printf("resume, ");
595 printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
596 printf("current process = ");
597 if (curproc) {
598 printf("%lu (%s)\n",
599 (u_long)curproc->p_pid, curproc->p_comm ?
600 curproc->p_comm : "");
601 } else {
602 printf("Idle\n");
603 }
604
605 #ifdef KDB
606 if (kdb_trap(&psl))
607 return;
608 #endif
609 #ifdef DDB
610 if ((debugger_on_panic || db_active) && kdb_trap(type, 0, frame))
611 return;
612 #endif
613 printf("trap number = %d\n", type);
614 if (type <= MAX_TRAP_MSG)
615 panic("%s", trap_msg[type]);
616 else
617 panic("unknown/reserved trap");
618 }
619
620 /*
621 * Double fault handler. Called when a fault occurs while writing
622 * a frame for a trap/exception onto the stack. This usually occurs
623 * when the stack overflows (such is the case with infinite recursion,
624 * for example).
625 */
626 void
627 dblfault_handler()
628 {
629 printf("\nFatal double fault\n");
630 panic("double fault");
631 }
632
633 /*
634 * syscall - system call request C handler
635 *
636 * A system call is essentially treated as a trap.
637 */
638 void
639 syscall(frame)
640 struct trapframe frame;
641 {
642 caddr_t params;
643 struct sysent *callp;
644 struct thread *td = curthread;
645 struct proc *p = td->td_proc;
646 register_t orig_tf_rflags;
647 u_int sticks;
648 int error;
649 int narg;
650 register_t args[8];
651 register_t *argp;
652 u_int code;
653 int reg, regcnt;
654
655 /*
656 * note: PCPU_LAZY_INC() can only be used if we can afford
657 * occassional inaccuracy in the count.
658 */
659 PCPU_LAZY_INC(cnt.v_syscall);
660
661 #ifdef DIAGNOSTIC
662 if (ISPL(frame.tf_cs) != SEL_UPL) {
663 mtx_lock(&Giant); /* try to stabilize the system XXX */
664 panic("syscall");
665 /* NOT REACHED */
666 mtx_unlock(&Giant);
667 }
668 #endif
669
670 reg = 0;
671 regcnt = 6;
672 sticks = td->td_sticks;
673 td->td_frame = &frame;
674 if (td->td_ucred != p->p_ucred)
675 cred_update_thread(td);
676 if (p->p_flag & P_THREADED)
677 thread_user_enter(p, td);
678 params = (caddr_t)frame.tf_rsp + sizeof(register_t);
679 code = frame.tf_rax;
680 orig_tf_rflags = frame.tf_rflags;
681
682 if (p->p_sysent->sv_prepsyscall) {
683 /*
684 * The prep code is MP aware.
685 */
686 (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, ¶ms);
687 } else {
688 if (code == SYS_syscall || code == SYS___syscall) {
689 code = frame.tf_rdi;
690 reg++;
691 regcnt--;
692 }
693 }
694
695 if (p->p_sysent->sv_mask)
696 code &= p->p_sysent->sv_mask;
697
698 if (code >= p->p_sysent->sv_size)
699 callp = &p->p_sysent->sv_table[0];
700 else
701 callp = &p->p_sysent->sv_table[code];
702
703 narg = callp->sy_narg & SYF_ARGMASK;
704
705 /*
706 * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
707 */
708 if (narg <= regcnt) {
709 argp = &frame.tf_rdi;
710 argp += reg;
711 error = 0;
712 } else {
713 KASSERT(narg <= sizeof(args) / sizeof(args[0]),
714 ("Too many syscall arguments!"));
715 KASSERT(params != NULL, ("copyin args with no params!"));
716 argp = &frame.tf_rdi;
717 argp += reg;
718 bcopy(argp, args, sizeof(args[0]) * regcnt);
719 error = copyin(params, &args[regcnt],
720 (narg - regcnt) * sizeof(args[0]));
721 argp = &args[0];
722 }
723
724 #ifdef KTRACE
725 if (KTRPOINT(td, KTR_SYSCALL))
726 ktrsyscall(code, narg, argp);
727 #endif
728
729 /*
730 * Try to run the syscall without Giant if the syscall
731 * is MP safe.
732 */
733 if ((callp->sy_narg & SYF_MPSAFE) == 0)
734 mtx_lock(&Giant);
735
736 if (error == 0) {
737 td->td_retval[0] = 0;
738 td->td_retval[1] = frame.tf_rdx;
739
740 STOPEVENT(p, S_SCE, narg);
741
742 error = (*callp->sy_call)(td, argp);
743 }
744
745 switch (error) {
746 case 0:
747 frame.tf_rax = td->td_retval[0];
748 frame.tf_rdx = td->td_retval[1];
749 frame.tf_rflags &= ~PSL_C;
750 break;
751
752 case ERESTART:
753 /*
754 * Reconstruct pc, we know that 'syscall' is 2 bytes.
755 * We have to do a full context restore so that %r10
756 * (which was holding the value of %rcx) is restored for
757 * the next iteration.
758 */
759 frame.tf_rip -= frame.tf_err;
760 frame.tf_r10 = frame.tf_rcx;
761 td->td_pcb->pcb_flags |= PCB_FULLCTX;
762 break;
763
764 case EJUSTRETURN:
765 break;
766
767 default:
768 if (p->p_sysent->sv_errsize) {
769 if (error >= p->p_sysent->sv_errsize)
770 error = -1; /* XXX */
771 else
772 error = p->p_sysent->sv_errtbl[error];
773 }
774 frame.tf_rax = error;
775 frame.tf_rflags |= PSL_C;
776 break;
777 }
778
779 /*
780 * Release Giant if we previously set it.
781 */
782 if ((callp->sy_narg & SYF_MPSAFE) == 0)
783 mtx_unlock(&Giant);
784
785 /*
786 * Traced syscall.
787 */
788 if (orig_tf_rflags & PSL_T) {
789 frame.tf_rflags &= ~PSL_T;
790 trapsignal(td, SIGTRAP, 0);
791 }
792
793 /*
794 * Handle reschedule and other end-of-syscall issues
795 */
796 userret(td, &frame, sticks);
797
798 #ifdef KTRACE
799 if (KTRPOINT(td, KTR_SYSRET))
800 ktrsysret(code, error, td->td_retval[0]);
801 #endif
802
803 /*
804 * This works because errno is findable through the
805 * register set. If we ever support an emulation where this
806 * is not the case, this code will need to be revisited.
807 */
808 STOPEVENT(p, S_SCX, code);
809
810 #ifdef DIAGNOSTIC
811 cred_free_thread(td);
812 #endif
813 WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
814 (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
815 mtx_assert(&sched_lock, MA_NOTOWNED);
816 mtx_assert(&Giant, MA_NOTOWNED);
817 }
Cache object: fc70bc333bf03ea8e83b9a84ac8313b3
|