The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/trap.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1994, David Greenman
    3  * Copyright (c) 1990, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * the University of Utah, and William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/6.4/sys/amd64/amd64/trap.c 173828 2007-11-21 16:41:51Z jhb $");
   42 
   43 /*
   44  * AMD64 Trap and System call handling
   45  */
   46 
   47 #include "opt_clock.h"
   48 #include "opt_cpu.h"
   49 #include "opt_hwpmc_hooks.h"
   50 #include "opt_isa.h"
   51 #include "opt_kdb.h"
   52 #include "opt_ktrace.h"
   53 
   54 #include <sys/param.h>
   55 #include <sys/bus.h>
   56 #include <sys/systm.h>
   57 #include <sys/proc.h>
   58 #include <sys/pioctl.h>
   59 #include <sys/ptrace.h>
   60 #include <sys/kdb.h>
   61 #include <sys/kernel.h>
   62 #include <sys/ktr.h>
   63 #include <sys/lock.h>
   64 #include <sys/mutex.h>
   65 #include <sys/resourcevar.h>
   66 #include <sys/signalvar.h>
   67 #include <sys/syscall.h>
   68 #include <sys/sysctl.h>
   69 #include <sys/sysent.h>
   70 #include <sys/uio.h>
   71 #include <sys/vmmeter.h>
   72 #ifdef KTRACE
   73 #include <sys/ktrace.h>
   74 #endif
   75 #ifdef HWPMC_HOOKS
   76 #include <sys/pmckern.h>
   77 #endif
   78 
   79 #include <vm/vm.h>
   80 #include <vm/vm_param.h>
   81 #include <vm/pmap.h>
   82 #include <vm/vm_kern.h>
   83 #include <vm/vm_map.h>
   84 #include <vm/vm_page.h>
   85 #include <vm/vm_extern.h>
   86 
   87 #include <machine/cpu.h>
   88 #include <machine/intr_machdep.h>
   89 #include <machine/md_var.h>
   90 #include <machine/pcb.h>
   91 #ifdef SMP
   92 #include <machine/smp.h>
   93 #endif
   94 #include <machine/tss.h>
   95 #include <security/audit/audit.h>
   96 
   97 extern void trap(struct trapframe frame);
   98 extern void syscall(struct trapframe frame);
   99 void dblfault_handler(struct trapframe frame);
  100 
  101 static int trap_pfault(struct trapframe *, int);
  102 static void trap_fatal(struct trapframe *, vm_offset_t);
  103 
  104 #define MAX_TRAP_MSG            30
  105 static char *trap_msg[] = {
  106         "",                                     /*  0 unused */
  107         "privileged instruction fault",         /*  1 T_PRIVINFLT */
  108         "",                                     /*  2 unused */
  109         "breakpoint instruction fault",         /*  3 T_BPTFLT */
  110         "",                                     /*  4 unused */
  111         "",                                     /*  5 unused */
  112         "arithmetic trap",                      /*  6 T_ARITHTRAP */
  113         "",                                     /*  7 unused */
  114         "",                                     /*  8 unused */
  115         "general protection fault",             /*  9 T_PROTFLT */
  116         "trace trap",                           /* 10 T_TRCTRAP */
  117         "",                                     /* 11 unused */
  118         "page fault",                           /* 12 T_PAGEFLT */
  119         "",                                     /* 13 unused */
  120         "alignment fault",                      /* 14 T_ALIGNFLT */
  121         "",                                     /* 15 unused */
  122         "",                                     /* 16 unused */
  123         "",                                     /* 17 unused */
  124         "integer divide fault",                 /* 18 T_DIVIDE */
  125         "non-maskable interrupt trap",          /* 19 T_NMI */
  126         "overflow trap",                        /* 20 T_OFLOW */
  127         "FPU bounds check fault",               /* 21 T_BOUND */
  128         "FPU device not available",             /* 22 T_DNA */
  129         "double fault",                         /* 23 T_DOUBLEFLT */
  130         "FPU operand fetch fault",              /* 24 T_FPOPFLT */
  131         "invalid TSS fault",                    /* 25 T_TSSFLT */
  132         "segment not present fault",            /* 26 T_SEGNPFLT */
  133         "stack fault",                          /* 27 T_STKFLT */
  134         "machine check trap",                   /* 28 T_MCHK */
  135         "SIMD floating-point exception",        /* 29 T_XMMFLT */
  136         "reserved (unknown) fault",             /* 30 T_RESERVED */
  137 };
  138 
  139 #ifdef KDB
  140 static int kdb_on_nmi = 1;
  141 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
  142         &kdb_on_nmi, 0, "Go to KDB on NMI");
  143 #endif
  144 static int panic_on_nmi = 1;
  145 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
  146         &panic_on_nmi, 0, "Panic on NMI");
  147 
  148 #ifdef WITNESS
  149 extern char *syscallnames[];
  150 #endif
  151 
  152 /*
  153  * Exception, fault, and trap interface to the FreeBSD kernel.
  154  * This common code is called from assembly language IDT gate entry
  155  * routines that prepare a suitable stack frame, and restore this
  156  * frame after the exception has been processed.
  157  */
  158 
  159 void
  160 trap(frame)
  161         struct trapframe frame;
  162 {
  163         struct thread *td = curthread;
  164         struct proc *p = td->td_proc;
  165         u_int sticks = 0, type;
  166         int i = 0, ucode = 0, code;
  167 
  168         PCPU_LAZY_INC(cnt.v_trap);
  169         type = frame.tf_trapno;
  170 
  171 #ifdef KDB_STOP_NMI
  172         /* Handler for NMI IPIs used for debugging */
  173         if (type == T_NMI) {
  174                  if (ipi_nmi_handler() == 0)
  175                            goto out;
  176         }
  177 #endif /* KDB_STOP_NMI */
  178 
  179 #ifdef KDB
  180         if (kdb_active) {
  181                 kdb_reenter();
  182                 goto out;
  183         }
  184 #endif
  185 
  186 #ifdef  HWPMC_HOOKS
  187         /*
  188          * CPU PMCs interrupt using an NMI.  If the PMC module is
  189          * active, pass the 'rip' value to the PMC module's interrupt
  190          * handler.  A return value of '1' from the handler means that
  191          * the NMI was handled by it and we can return immediately.
  192          */
  193         if (type == T_NMI && pmc_intr &&
  194             (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame.tf_rip,
  195                 TRAPF_USERMODE(&frame)))
  196                 goto out;
  197 #endif
  198 
  199         if ((frame.tf_rflags & PSL_I) == 0) {
  200                 /*
  201                  * Buggy application or kernel code has disabled
  202                  * interrupts and then trapped.  Enabling interrupts
  203                  * now is wrong, but it is better than running with
  204                  * interrupts disabled until they are accidentally
  205                  * enabled later.
  206                  */
  207                 if (ISPL(frame.tf_cs) == SEL_UPL)
  208                         printf(
  209                             "pid %ld (%s): trap %d with interrupts disabled\n",
  210                             (long)curproc->p_pid, curproc->p_comm, type);
  211                 else if (type != T_NMI && type != T_BPTFLT &&
  212                     type != T_TRCTRAP) {
  213                         /*
  214                          * XXX not quite right, since this may be for a
  215                          * multiple fault in user mode.
  216                          */
  217                         printf("kernel trap %d with interrupts disabled\n",
  218                             type);
  219                         /*
  220                          * We shouldn't enable interrupts while in a critical
  221                          * section.
  222                          */
  223                         if (td->td_critnest == 0)
  224                                 enable_intr();
  225                 }
  226         }
  227 
  228         code = frame.tf_err;
  229         if (type == T_PAGEFLT) {
  230                 /*
  231                  * If we get a page fault while in a critical section, then
  232                  * it is most likely a fatal kernel page fault.  The kernel
  233                  * is already going to panic trying to get a sleep lock to
  234                  * do the VM lookup, so just consider it a fatal trap so the
  235                  * kernel can print out a useful trap message and even get
  236                  * to the debugger.
  237                  */
  238                 if (td->td_critnest != 0)
  239                         trap_fatal(&frame, frame.tf_addr);
  240         }
  241 
  242         if (ISPL(frame.tf_cs) == SEL_UPL) {
  243                 /* user trap */
  244 
  245                 sticks = td->td_sticks;
  246                 td->td_frame = &frame;
  247                 if (td->td_ucred != p->p_ucred) 
  248                         cred_update_thread(td);
  249 
  250                 switch (type) {
  251                 case T_PRIVINFLT:       /* privileged instruction fault */
  252                         ucode = type;
  253                         i = SIGILL;
  254                         break;
  255 
  256                 case T_BPTFLT:          /* bpt instruction fault */
  257                 case T_TRCTRAP:         /* trace trap */
  258                         enable_intr();
  259                         frame.tf_rflags &= ~PSL_T;
  260                         i = SIGTRAP;
  261                         break;
  262 
  263                 case T_ARITHTRAP:       /* arithmetic trap */
  264                         ucode = fputrap();
  265                         if (ucode == -1)
  266                                 goto userout;
  267                         i = SIGFPE;
  268                         break;
  269 
  270                 case T_PROTFLT:         /* general protection fault */
  271                 case T_STKFLT:          /* stack fault */
  272                 case T_SEGNPFLT:        /* segment not present fault */
  273                 case T_TSSFLT:          /* invalid TSS fault */
  274                 case T_DOUBLEFLT:       /* double fault */
  275                 default:
  276                         ucode = code + BUS_SEGM_FAULT ;
  277                         i = SIGBUS;
  278                         break;
  279 
  280                 case T_PAGEFLT:         /* page fault */
  281                         if (td->td_pflags & TDP_SA)
  282                                 thread_user_enter(td);
  283                         i = trap_pfault(&frame, TRUE);
  284                         if (i == -1)
  285                                 goto userout;
  286                         if (i == 0)
  287                                 goto user;
  288 
  289                         ucode = T_PAGEFLT;
  290                         break;
  291 
  292                 case T_DIVIDE:          /* integer divide fault */
  293                         ucode = FPE_INTDIV;
  294                         i = SIGFPE;
  295                         break;
  296 
  297 #ifdef DEV_ISA
  298                 case T_NMI:
  299                         /* machine/parity/power fail/"kitchen sink" faults */
  300                         /* XXX Giant */
  301                         if (isa_nmi(code) == 0) {
  302 #ifdef KDB
  303                                 /*
  304                                  * NMI can be hooked up to a pushbutton
  305                                  * for debugging.
  306                                  */
  307                                 if (kdb_on_nmi) {
  308                                         printf ("NMI ... going to debugger\n");
  309                                         kdb_trap(type, 0, &frame);
  310                                 }
  311 #endif /* KDB */
  312                                 goto userout;
  313                         } else if (panic_on_nmi)
  314                                 panic("NMI indicates hardware failure");
  315                         break;
  316 #endif /* DEV_ISA */
  317 
  318                 case T_OFLOW:           /* integer overflow fault */
  319                         ucode = FPE_INTOVF;
  320                         i = SIGFPE;
  321                         break;
  322 
  323                 case T_BOUND:           /* bounds check fault */
  324                         ucode = FPE_FLTSUB;
  325                         i = SIGFPE;
  326                         break;
  327 
  328                 case T_DNA:
  329                         /* transparent fault (due to context switch "late") */
  330                         if (fpudna())
  331                                 goto userout;
  332                         i = SIGFPE;
  333                         ucode = FPE_FPU_NP_TRAP;
  334                         break;
  335 
  336                 case T_FPOPFLT:         /* FPU operand fetch fault */
  337                         ucode = T_FPOPFLT;
  338                         i = SIGILL;
  339                         break;
  340 
  341                 case T_XMMFLT:          /* SIMD floating-point exception */
  342                         ucode = 0; /* XXX */
  343                         i = SIGFPE;
  344                         break;
  345                 }
  346         } else {
  347                 /* kernel trap */
  348 
  349                 KASSERT(cold || td->td_ucred != NULL,
  350                     ("kernel trap doesn't have ucred"));
  351                 switch (type) {
  352                 case T_PAGEFLT:                 /* page fault */
  353                         (void) trap_pfault(&frame, FALSE);
  354                         goto out;
  355 
  356                 case T_DNA:
  357                         /*
  358                          * The kernel is apparently using fpu for copying.
  359                          * XXX this should be fatal unless the kernel has
  360                          * registered such use.
  361                          */
  362                         if (fpudna()) {
  363                                 printf("fpudna in kernel mode!\n");
  364                                 goto out;
  365                         }
  366                         break;
  367 
  368                 case T_STKFLT:          /* stack fault */
  369                         break;
  370 
  371                 case T_PROTFLT:         /* general protection fault */
  372                 case T_SEGNPFLT:        /* segment not present fault */
  373                         if (td->td_intr_nesting_level != 0)
  374                                 break;
  375 
  376                         /*
  377                          * Invalid segment selectors and out of bounds
  378                          * %rip's and %rsp's can be set up in user mode.
  379                          * This causes a fault in kernel mode when the
  380                          * kernel tries to return to user mode.  We want
  381                          * to get this fault so that we can fix the
  382                          * problem here and not have to check all the
  383                          * selectors and pointers when the user changes
  384                          * them.
  385                          */
  386                         if (frame.tf_rip == (long)doreti_iret) {
  387                                 frame.tf_rip = (long)doreti_iret_fault;
  388                                 goto out;
  389                         }
  390                         if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
  391                                 frame.tf_rip =
  392                                     (long)PCPU_GET(curpcb)->pcb_onfault;
  393                                 goto out;
  394                         }
  395                         break;
  396 
  397                 case T_TSSFLT:
  398                         /*
  399                          * PSL_NT can be set in user mode and isn't cleared
  400                          * automatically when the kernel is entered.  This
  401                          * causes a TSS fault when the kernel attempts to
  402                          * `iret' because the TSS link is uninitialized.  We
  403                          * want to get this fault so that we can fix the
  404                          * problem here and not every time the kernel is
  405                          * entered.
  406                          */
  407                         if (frame.tf_rflags & PSL_NT) {
  408                                 frame.tf_rflags &= ~PSL_NT;
  409                                 goto out;
  410                         }
  411                         break;
  412 
  413                 case T_TRCTRAP:  /* trace trap */
  414                         /*
  415                          * Ignore debug register trace traps due to
  416                          * accesses in the user's address space, which
  417                          * can happen under several conditions such as
  418                          * if a user sets a watchpoint on a buffer and
  419                          * then passes that buffer to a system call.
  420                          * We still want to get TRCTRAPS for addresses
  421                          * in kernel space because that is useful when
  422                          * debugging the kernel.
  423                          */
  424                         if (user_dbreg_trap()) {
  425                                 /*
  426                                  * Reset breakpoint bits because the
  427                                  * processor doesn't
  428                                  */
  429                                 /* XXX check upper bits here */
  430                                 load_dr6(rdr6() & 0xfffffff0);
  431                                 goto out;
  432                         }
  433                         /*
  434                          * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
  435                          */
  436                 case T_BPTFLT:
  437                         /*
  438                          * If KDB is enabled, let it handle the debugger trap.
  439                          * Otherwise, debugger traps "can't happen".
  440                          */
  441 #ifdef KDB
  442                         /* XXX Giant */
  443                         if (kdb_trap(type, 0, &frame))
  444                                 goto out;
  445 #endif
  446                         break;
  447 
  448 #ifdef DEV_ISA
  449                 case T_NMI:
  450                         /* XXX Giant */
  451                         /* machine/parity/power fail/"kitchen sink" faults */
  452                         if (isa_nmi(code) == 0) {
  453 #ifdef KDB
  454                                 /*
  455                                  * NMI can be hooked up to a pushbutton
  456                                  * for debugging.
  457                                  */
  458                                 if (kdb_on_nmi) {
  459                                         printf ("NMI ... going to debugger\n");
  460                                         kdb_trap(type, 0, &frame);
  461                                 }
  462 #endif /* KDB */
  463                                 goto out;
  464                         } else if (panic_on_nmi == 0)
  465                                 goto out;
  466                         /* FALLTHROUGH */
  467 #endif /* DEV_ISA */
  468                 }
  469 
  470                 trap_fatal(&frame, 0);
  471                 goto out;
  472         }
  473 
  474         /* Translate fault for emulators (e.g. Linux) */
  475         if (*p->p_sysent->sv_transtrap)
  476                 i = (*p->p_sysent->sv_transtrap)(i, type);
  477 
  478         trapsignal(td, i, ucode);
  479 
  480 #ifdef DEBUG
  481         if (type <= MAX_TRAP_MSG) {
  482                 uprintf("fatal process exception: %s",
  483                         trap_msg[type]);
  484                 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
  485                         uprintf(", fault VA = 0x%lx", frame.tf_addr);
  486                 uprintf("\n");
  487         }
  488 #endif
  489 
  490 user:
  491         userret(td, &frame, sticks);
  492         mtx_assert(&Giant, MA_NOTOWNED);
  493 userout:
  494 out:
  495         return;
  496 }
  497 
  498 static int
  499 trap_pfault(frame, usermode)
  500         struct trapframe *frame;
  501         int usermode;
  502 {
  503         vm_offset_t va;
  504         struct vmspace *vm = NULL;
  505         vm_map_t map = 0;
  506         int rv = 0;
  507         vm_prot_t ftype;
  508         struct thread *td = curthread;
  509         struct proc *p = td->td_proc;
  510         vm_offset_t eva = frame->tf_addr;
  511 
  512         va = trunc_page(eva);
  513         if (va >= KERNBASE) {
  514                 /*
  515                  * Don't allow user-mode faults in kernel address space.
  516                  */
  517                 if (usermode)
  518                         goto nogo;
  519 
  520                 map = kernel_map;
  521         } else {
  522                 /*
  523                  * This is a fault on non-kernel virtual memory.
  524                  * vm is initialized above to NULL. If curproc is NULL
  525                  * or curproc->p_vmspace is NULL the fault is fatal.
  526                  */
  527                 if (p != NULL)
  528                         vm = p->p_vmspace;
  529 
  530                 if (vm == NULL)
  531                         goto nogo;
  532 
  533                 map = &vm->vm_map;
  534         }
  535 
  536         /*
  537          * PGEX_I is defined only if the execute disable bit capability is
  538          * supported and enabled.
  539          */
  540         if (frame->tf_err & PGEX_W)
  541                 ftype = VM_PROT_WRITE;
  542         else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
  543                 ftype = VM_PROT_EXECUTE;
  544         else
  545                 ftype = VM_PROT_READ;
  546 
  547         if (map != kernel_map) {
  548                 /*
  549                  * Keep swapout from messing with us during this
  550                  *      critical time.
  551                  */
  552                 PROC_LOCK(p);
  553                 ++p->p_lock;
  554                 PROC_UNLOCK(p);
  555 
  556                 /* Fault in the user page: */
  557                 rv = vm_fault(map, va, ftype,
  558                               (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
  559                                                       : VM_FAULT_NORMAL);
  560 
  561                 PROC_LOCK(p);
  562                 --p->p_lock;
  563                 PROC_UNLOCK(p);
  564         } else {
  565                 /*
  566                  * Don't have to worry about process locking or stacks in the
  567                  * kernel.
  568                  */
  569                 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
  570         }
  571         if (rv == KERN_SUCCESS)
  572                 return (0);
  573 nogo:
  574         if (!usermode) {
  575                 if (td->td_intr_nesting_level == 0 &&
  576                     PCPU_GET(curpcb)->pcb_onfault != NULL) {
  577                         frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
  578                         return (0);
  579                 }
  580                 trap_fatal(frame, eva);
  581                 return (-1);
  582         }
  583 
  584         return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
  585 }
  586 
  587 static void
  588 trap_fatal(frame, eva)
  589         struct trapframe *frame;
  590         vm_offset_t eva;
  591 {
  592         int code, ss;
  593         u_int type;
  594         long esp;
  595         struct soft_segment_descriptor softseg;
  596         char *msg;
  597 
  598         code = frame->tf_err;
  599         type = frame->tf_trapno;
  600         sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg);
  601 
  602         if (type <= MAX_TRAP_MSG)
  603                 msg = trap_msg[type];
  604         else
  605                 msg = "UNKNOWN";
  606         printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
  607             ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
  608 #ifdef SMP
  609         /* two separate prints in case of a trap on an unmapped page */
  610         printf("cpuid = %d; ", PCPU_GET(cpuid));
  611         printf("apic id = %02x\n", PCPU_GET(apic_id));
  612 #endif
  613         if (type == T_PAGEFLT) {
  614                 printf("fault virtual address   = 0x%lx\n", eva);
  615                 printf("fault code              = %s %s %s, %s\n",
  616                         code & PGEX_U ? "user" : "supervisor",
  617                         code & PGEX_W ? "write" : "read",
  618                         code & PGEX_I ? "instruction" : "data",
  619                         code & PGEX_P ? "protection violation" : "page not present");
  620         }
  621         printf("instruction pointer     = 0x%lx:0x%lx\n",
  622                frame->tf_cs & 0xffff, frame->tf_rip);
  623         if (ISPL(frame->tf_cs) == SEL_UPL) {
  624                 ss = frame->tf_ss & 0xffff;
  625                 esp = frame->tf_rsp;
  626         } else {
  627                 ss = GSEL(GDATA_SEL, SEL_KPL);
  628                 esp = (long)&frame->tf_rsp;
  629         }
  630         printf("stack pointer           = 0x%x:0x%lx\n", ss, esp);
  631         printf("frame pointer           = 0x%x:0x%lx\n", ss, frame->tf_rbp);
  632         printf("code segment            = base 0x%lx, limit 0x%lx, type 0x%x\n",
  633                softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
  634         printf("                        = DPL %d, pres %d, long %d, def32 %d, gran %d\n",
  635                softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32,
  636                softseg.ssd_gran);
  637         printf("processor eflags        = ");
  638         if (frame->tf_rflags & PSL_T)
  639                 printf("trace trap, ");
  640         if (frame->tf_rflags & PSL_I)
  641                 printf("interrupt enabled, ");
  642         if (frame->tf_rflags & PSL_NT)
  643                 printf("nested task, ");
  644         if (frame->tf_rflags & PSL_RF)
  645                 printf("resume, ");
  646         printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
  647         printf("current process         = ");
  648         if (curproc) {
  649                 printf("%lu (%s)\n",
  650                     (u_long)curproc->p_pid, curproc->p_comm ?
  651                     curproc->p_comm : "");
  652         } else {
  653                 printf("Idle\n");
  654         }
  655 
  656 #ifdef KDB
  657         if (debugger_on_panic || kdb_active) {
  658                 register_t rflags;
  659                 rflags = intr_disable();
  660                 if (kdb_trap(type, 0, frame)) {
  661                         intr_restore(rflags);
  662                         return;
  663                 }
  664                 intr_restore(rflags);
  665         }
  666 #endif
  667         printf("trap number             = %d\n", type);
  668         if (type <= MAX_TRAP_MSG)
  669                 panic("%s", trap_msg[type]);
  670         else
  671                 panic("unknown/reserved trap");
  672 }
  673 
  674 /*
  675  * Double fault handler. Called when a fault occurs while writing
  676  * a frame for a trap/exception onto the stack. This usually occurs
  677  * when the stack overflows (such is the case with infinite recursion,
  678  * for example).
  679  */
  680 void
  681 dblfault_handler(struct trapframe frame)
  682 {
  683         printf("\nFatal double fault\n");
  684         printf("rip = 0x%lx\n", frame.tf_rip);
  685         printf("rsp = 0x%lx\n", frame.tf_rsp);
  686         printf("rbp = 0x%lx\n", frame.tf_rbp);
  687 #ifdef SMP
  688         /* two separate prints in case of a trap on an unmapped page */
  689         printf("cpuid = %d; ", PCPU_GET(cpuid));
  690         printf("apic id = %02x\n", PCPU_GET(apic_id));
  691 #endif
  692         panic("double fault");
  693 }
  694 
  695 /*
  696  *      syscall -       system call request C handler
  697  *
  698  *      A system call is essentially treated as a trap.
  699  */
  700 void
  701 syscall(frame)
  702         struct trapframe frame;
  703 {
  704         caddr_t params;
  705         struct sysent *callp;
  706         struct thread *td = curthread;
  707         struct proc *p = td->td_proc;
  708         register_t orig_tf_rflags;
  709         u_int sticks;
  710         int error;
  711         int narg;
  712         register_t args[8];
  713         register_t *argp;
  714         u_int code;
  715         int reg, regcnt;
  716 
  717         /*
  718          * note: PCPU_LAZY_INC() can only be used if we can afford
  719          * occassional inaccuracy in the count.
  720          */
  721         PCPU_LAZY_INC(cnt.v_syscall);
  722 
  723 #ifdef DIAGNOSTIC
  724         if (ISPL(frame.tf_cs) != SEL_UPL) {
  725                 mtx_lock(&Giant);       /* try to stabilize the system XXX */
  726                 panic("syscall");
  727                 /* NOT REACHED */
  728                 mtx_unlock(&Giant);
  729         }
  730 #endif
  731 
  732         reg = 0;
  733         regcnt = 6;
  734         sticks = td->td_sticks;
  735         td->td_frame = &frame;
  736         if (td->td_ucred != p->p_ucred) 
  737                 cred_update_thread(td);
  738         if (p->p_flag & P_SA)
  739                 thread_user_enter(td);
  740         params = (caddr_t)frame.tf_rsp + sizeof(register_t);
  741         code = frame.tf_rax;
  742         orig_tf_rflags = frame.tf_rflags;
  743 
  744         if (p->p_sysent->sv_prepsyscall) {
  745                 /*
  746                  * The prep code is MP aware.
  747                  */
  748                 (*p->p_sysent->sv_prepsyscall)(&frame, (int *)args, &code, &params);
  749         } else {
  750                 if (code == SYS_syscall || code == SYS___syscall) {
  751                         code = frame.tf_rdi;
  752                         reg++;
  753                         regcnt--;
  754                 }
  755         }
  756 
  757         if (p->p_sysent->sv_mask)
  758                 code &= p->p_sysent->sv_mask;
  759 
  760         if (code >= p->p_sysent->sv_size)
  761                 callp = &p->p_sysent->sv_table[0];
  762         else
  763                 callp = &p->p_sysent->sv_table[code];
  764 
  765         narg = callp->sy_narg & SYF_ARGMASK;
  766 
  767         /*
  768          * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
  769          */
  770         KASSERT(narg <= sizeof(args) / sizeof(args[0]),
  771             ("Too many syscall arguments!"));
  772         error = 0;
  773         argp = &frame.tf_rdi;
  774         argp += reg;
  775         bcopy(argp, args, sizeof(args[0]) * regcnt);
  776         if (narg > regcnt) {
  777                 KASSERT(params != NULL, ("copyin args with no params!"));
  778                 error = copyin(params, &args[regcnt],
  779                         (narg - regcnt) * sizeof(args[0]));
  780         }
  781         argp = &args[0];
  782 
  783 #ifdef KTRACE
  784         if (KTRPOINT(td, KTR_SYSCALL))
  785                 ktrsyscall(code, narg, argp);
  786 #endif
  787 
  788         CTR4(KTR_SYSC, "syscall enter thread %p pid %d proc %s code %d", td,
  789             td->td_proc->p_pid, td->td_proc->p_comm, code);
  790 
  791         if (error == 0) {
  792                 td->td_retval[0] = 0;
  793                 td->td_retval[1] = frame.tf_rdx;
  794 
  795                 STOPEVENT(p, S_SCE, narg);
  796 
  797                 PTRACESTOP_SC(p, td, S_PT_SCE);
  798 
  799                 if ((callp->sy_narg & SYF_MPSAFE) == 0) {
  800                         mtx_lock(&Giant);
  801                         AUDIT_SYSCALL_ENTER(code, td);
  802                         error = (*callp->sy_call)(td, argp);
  803                         AUDIT_SYSCALL_EXIT(error, td);
  804                         mtx_unlock(&Giant);
  805                 } else {
  806                         AUDIT_SYSCALL_ENTER(code, td);
  807                         error = (*callp->sy_call)(td, argp);
  808                         AUDIT_SYSCALL_EXIT(error, td);
  809                 }
  810         }
  811 
  812         switch (error) {
  813         case 0:
  814                 frame.tf_rax = td->td_retval[0];
  815                 frame.tf_rdx = td->td_retval[1];
  816                 frame.tf_rflags &= ~PSL_C;
  817                 break;
  818 
  819         case ERESTART:
  820                 /*
  821                  * Reconstruct pc, we know that 'syscall' is 2 bytes.
  822                  * We have to do a full context restore so that %r10
  823                  * (which was holding the value of %rcx) is restored for
  824                  * the next iteration.
  825                  */
  826                 frame.tf_rip -= frame.tf_err;
  827                 frame.tf_r10 = frame.tf_rcx;
  828                 td->td_pcb->pcb_flags |= PCB_FULLCTX;
  829                 break;
  830 
  831         case EJUSTRETURN:
  832                 break;
  833 
  834         default:
  835                 if (p->p_sysent->sv_errsize) {
  836                         if (error >= p->p_sysent->sv_errsize)
  837                                 error = -1;     /* XXX */
  838                         else
  839                                 error = p->p_sysent->sv_errtbl[error];
  840                 }
  841                 frame.tf_rax = error;
  842                 frame.tf_rflags |= PSL_C;
  843                 break;
  844         }
  845 
  846         /*
  847          * Traced syscall.
  848          */
  849         if (orig_tf_rflags & PSL_T) {
  850                 frame.tf_rflags &= ~PSL_T;
  851                 trapsignal(td, SIGTRAP, 0);
  852         }
  853 
  854         /*
  855          * Handle reschedule and other end-of-syscall issues
  856          */
  857         userret(td, &frame, sticks);
  858 
  859         CTR4(KTR_SYSC, "syscall exit thread %p pid %d proc %s code %d", td,
  860             td->td_proc->p_pid, td->td_proc->p_comm, code);
  861 
  862 #ifdef KTRACE
  863         if (KTRPOINT(td, KTR_SYSRET))
  864                 ktrsysret(code, error, td->td_retval[0]);
  865 #endif
  866 
  867         /*
  868          * This works because errno is findable through the
  869          * register set.  If we ever support an emulation where this
  870          * is not the case, this code will need to be revisited.
  871          */
  872         STOPEVENT(p, S_SCX, code);
  873 
  874         PTRACESTOP_SC(p, td, S_PT_SCX);
  875 
  876         WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
  877             (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
  878         mtx_assert(&sched_lock, MA_NOTOWNED);
  879         mtx_assert(&Giant, MA_NOTOWNED);
  880 }

Cache object: a641e014731698ef10f5d78183cb5728


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.