The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/trap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1994, David Greenman
    3  * Copyright (c) 1990, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * the University of Utah, and William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/8.3/sys/i386/i386/trap.c 227950 2011-11-24 19:02:04Z rstone $");
   42 
   43 /*
   44  * 386 Trap and System call handling
   45  */
   46 
   47 #include "opt_clock.h"
   48 #include "opt_cpu.h"
   49 #include "opt_hwpmc_hooks.h"
   50 #include "opt_isa.h"
   51 #include "opt_kdb.h"
   52 #include "opt_kdtrace.h"
   53 #include "opt_ktrace.h"
   54 #include "opt_npx.h"
   55 #include "opt_trap.h"
   56 
   57 #include <sys/param.h>
   58 #include <sys/bus.h>
   59 #include <sys/systm.h>
   60 #include <sys/proc.h>
   61 #include <sys/pioctl.h>
   62 #include <sys/ptrace.h>
   63 #include <sys/kdb.h>
   64 #include <sys/kernel.h>
   65 #include <sys/ktr.h>
   66 #include <sys/lock.h>
   67 #include <sys/mutex.h>
   68 #include <sys/resourcevar.h>
   69 #include <sys/signalvar.h>
   70 #include <sys/syscall.h>
   71 #include <sys/sysctl.h>
   72 #include <sys/sysent.h>
   73 #include <sys/uio.h>
   74 #include <sys/vmmeter.h>
   75 #ifdef KTRACE
   76 #include <sys/ktrace.h>
   77 #endif
   78 #ifdef HWPMC_HOOKS
   79 #include <sys/pmckern.h>
   80 #endif
   81 #include <security/audit/audit.h>
   82 
   83 #include <vm/vm.h>
   84 #include <vm/vm_param.h>
   85 #include <vm/pmap.h>
   86 #include <vm/vm_kern.h>
   87 #include <vm/vm_map.h>
   88 #include <vm/vm_page.h>
   89 #include <vm/vm_extern.h>
   90 
   91 #include <machine/cpu.h>
   92 #include <machine/intr_machdep.h>
   93 #include <machine/mca.h>
   94 #include <machine/md_var.h>
   95 #include <machine/pcb.h>
   96 #ifdef SMP
   97 #include <machine/smp.h>
   98 #endif
   99 #include <machine/tss.h>
  100 #include <machine/vm86.h>
  101 
  102 #ifdef POWERFAIL_NMI
  103 #include <sys/syslog.h>
  104 #include <machine/clock.h>
  105 #endif
  106 
  107 #ifdef KDTRACE_HOOKS
  108 #include <sys/dtrace_bsd.h>
  109 
  110 /*
  111  * This is a hook which is initialised by the dtrace module
  112  * to handle traps which might occur during DTrace probe
  113  * execution.
  114  */
  115 dtrace_trap_func_t      dtrace_trap_func;
  116 
  117 dtrace_doubletrap_func_t        dtrace_doubletrap_func;
  118 
  119 /*
  120  * This is a hook which is initialised by the systrace module
  121  * when it is loaded. This keeps the DTrace syscall provider
  122  * implementation opaque. 
  123  */
  124 systrace_probe_func_t   systrace_probe_func;
  125 
  126 /*
  127  * These hooks are necessary for the pid, usdt and fasttrap providers.
  128  */
  129 dtrace_fasttrap_probe_ptr_t     dtrace_fasttrap_probe_ptr;
  130 dtrace_pid_probe_ptr_t          dtrace_pid_probe_ptr;
  131 dtrace_return_probe_ptr_t       dtrace_return_probe_ptr;
  132 #endif
  133 
  134 extern void trap(struct trapframe *frame);
  135 extern void syscall(struct trapframe *frame);
  136 
  137 static int trap_pfault(struct trapframe *, int, vm_offset_t);
  138 static void trap_fatal(struct trapframe *, vm_offset_t);
  139 void dblfault_handler(void);
  140 
  141 extern inthand_t IDTVEC(lcall_syscall);
  142 
  143 #define MAX_TRAP_MSG            33
  144 static char *trap_msg[] = {
  145         "",                                     /*  0 unused */
  146         "privileged instruction fault",         /*  1 T_PRIVINFLT */
  147         "",                                     /*  2 unused */
  148         "breakpoint instruction fault",         /*  3 T_BPTFLT */
  149         "",                                     /*  4 unused */
  150         "",                                     /*  5 unused */
  151         "arithmetic trap",                      /*  6 T_ARITHTRAP */
  152         "",                                     /*  7 unused */
  153         "",                                     /*  8 unused */
  154         "general protection fault",             /*  9 T_PROTFLT */
  155         "trace trap",                           /* 10 T_TRCTRAP */
  156         "",                                     /* 11 unused */
  157         "page fault",                           /* 12 T_PAGEFLT */
  158         "",                                     /* 13 unused */
  159         "alignment fault",                      /* 14 T_ALIGNFLT */
  160         "",                                     /* 15 unused */
  161         "",                                     /* 16 unused */
  162         "",                                     /* 17 unused */
  163         "integer divide fault",                 /* 18 T_DIVIDE */
  164         "non-maskable interrupt trap",          /* 19 T_NMI */
  165         "overflow trap",                        /* 20 T_OFLOW */
  166         "FPU bounds check fault",               /* 21 T_BOUND */
  167         "FPU device not available",             /* 22 T_DNA */
  168         "double fault",                         /* 23 T_DOUBLEFLT */
  169         "FPU operand fetch fault",              /* 24 T_FPOPFLT */
  170         "invalid TSS fault",                    /* 25 T_TSSFLT */
  171         "segment not present fault",            /* 26 T_SEGNPFLT */
  172         "stack fault",                          /* 27 T_STKFLT */
  173         "machine check trap",                   /* 28 T_MCHK */
  174         "SIMD floating-point exception",        /* 29 T_XMMFLT */
  175         "reserved (unknown) fault",             /* 30 T_RESERVED */
  176         "",                                     /* 31 unused (reserved) */
  177         "DTrace pid return trap",               /* 32 T_DTRACE_RET */
  178         "DTrace fasttrap probe trap",           /* 33 T_DTRACE_PROBE */
  179 
  180 };
  181 
  182 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  183 extern int has_f00f_bug;
  184 #endif
  185 
  186 #ifdef KDB
  187 static int kdb_on_nmi = 1;
  188 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
  189         &kdb_on_nmi, 0, "Go to KDB on NMI");
  190 TUNABLE_INT("machdep.kdb_on_nmi", &kdb_on_nmi);
  191 #endif
  192 static int panic_on_nmi = 1;
  193 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
  194         &panic_on_nmi, 0, "Panic on NMI");
  195 TUNABLE_INT("machdep.panic_on_nmi", &panic_on_nmi);
  196 static int prot_fault_translation = 0;
  197 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
  198         &prot_fault_translation, 0, "Select signal to deliver on protection fault");
  199 
  200 /*
  201  * Exception, fault, and trap interface to the FreeBSD kernel.
  202  * This common code is called from assembly language IDT gate entry
  203  * routines that prepare a suitable stack frame, and restore this
  204  * frame after the exception has been processed.
  205  */
  206 
  207 void
  208 trap(struct trapframe *frame)
  209 {
  210         struct thread *td = curthread;
  211         struct proc *p = td->td_proc;
  212         int i = 0, ucode = 0, code;
  213         u_int type;
  214         register_t addr = 0;
  215         vm_offset_t eva;
  216         ksiginfo_t ksi;
  217 #ifdef POWERFAIL_NMI
  218         static int lastalert = 0;
  219 #endif
  220 
  221         PCPU_INC(cnt.v_trap);
  222         type = frame->tf_trapno;
  223 
  224 #ifdef SMP
  225         /* Handler for NMI IPIs used for stopping CPUs. */
  226         if (type == T_NMI) {
  227                  if (ipi_nmi_handler() == 0)
  228                            goto out;
  229         }
  230 #endif /* SMP */
  231 
  232 #ifdef KDB
  233         if (kdb_active) {
  234                 kdb_reenter();
  235                 goto out;
  236         }
  237 #endif
  238 
  239         if (type == T_RESERVED) {
  240                 trap_fatal(frame, 0);
  241                 goto out;
  242         }
  243 
  244 #ifdef  HWPMC_HOOKS
  245         /*
  246          * CPU PMCs interrupt using an NMI so we check for that first.
  247          * If the HWPMC module is active, 'pmc_hook' will point to
  248          * the function to be called.  A return value of '1' from the
  249          * hook means that the NMI was handled by it and that we can
  250          * return immediately.
  251          */
  252         if (type == T_NMI && pmc_intr &&
  253             (*pmc_intr)(PCPU_GET(cpuid), frame))
  254             goto out;
  255 #endif
  256 
  257         if (type == T_MCHK) {
  258                 if (!mca_intr())
  259                         trap_fatal(frame, 0);
  260                 goto out;
  261         }
  262 
  263 #ifdef KDTRACE_HOOKS
  264         /*
  265          * A trap can occur while DTrace executes a probe. Before
  266          * executing the probe, DTrace blocks re-scheduling and sets
  267          * a flag in it's per-cpu flags to indicate that it doesn't
  268          * want to fault. On returning from the probe, the no-fault
  269          * flag is cleared and finally re-scheduling is enabled.
  270          *
  271          * If the DTrace kernel module has registered a trap handler,
  272          * call it and if it returns non-zero, assume that it has
  273          * handled the trap and modified the trap frame so that this
  274          * function can return normally.
  275          */
  276         if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
  277             type == T_BPTFLT) {
  278                 struct reg regs;
  279 
  280                 fill_frame_regs(frame, &regs);
  281                 if (type == T_DTRACE_PROBE &&
  282                     dtrace_fasttrap_probe_ptr != NULL &&
  283                     dtrace_fasttrap_probe_ptr(&regs) == 0)
  284                         goto out;
  285                 if (type == T_BPTFLT &&
  286                     dtrace_pid_probe_ptr != NULL &&
  287                     dtrace_pid_probe_ptr(&regs) == 0)
  288                         goto out;
  289                 if (type == T_DTRACE_RET &&
  290                     dtrace_return_probe_ptr != NULL &&
  291                     dtrace_return_probe_ptr(&regs) == 0)
  292                         goto out;
  293         }
  294         if ((type == T_PROTFLT || type == T_PAGEFLT) &&
  295             dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
  296                 goto out;
  297 #endif
  298 
  299         if ((frame->tf_eflags & PSL_I) == 0) {
  300                 /*
  301                  * Buggy application or kernel code has disabled
  302                  * interrupts and then trapped.  Enabling interrupts
  303                  * now is wrong, but it is better than running with
  304                  * interrupts disabled until they are accidentally
  305                  * enabled later.
  306                  */
  307                 if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
  308                         uprintf(
  309                             "pid %ld (%s): trap %d with interrupts disabled\n",
  310                             (long)curproc->p_pid, curthread->td_name, type);
  311                 else if (type != T_BPTFLT && type != T_TRCTRAP &&
  312                          frame->tf_eip != (int)cpu_switch_load_gs) {
  313                         /*
  314                          * XXX not quite right, since this may be for a
  315                          * multiple fault in user mode.
  316                          */
  317                         printf("kernel trap %d with interrupts disabled\n",
  318                             type);
  319                         /*
  320                          * Page faults need interrupts disabled until later,
  321                          * and we shouldn't enable interrupts while holding
  322                          * a spin lock or if servicing an NMI.
  323                          */
  324                         if (type != T_NMI && type != T_PAGEFLT &&
  325                             td->td_md.md_spinlock_count == 0)
  326                                 enable_intr();
  327                 }
  328         }
  329         eva = 0;
  330         code = frame->tf_err;
  331         if (type == T_PAGEFLT) {
  332                 /*
  333                  * For some Cyrix CPUs, %cr2 is clobbered by
  334                  * interrupts.  This problem is worked around by using
  335                  * an interrupt gate for the pagefault handler.  We
  336                  * are finally ready to read %cr2 and then must
  337                  * reenable interrupts.
  338                  *
  339                  * If we get a page fault while in a critical section, then
  340                  * it is most likely a fatal kernel page fault.  The kernel
  341                  * is already going to panic trying to get a sleep lock to
  342                  * do the VM lookup, so just consider it a fatal trap so the
  343                  * kernel can print out a useful trap message and even get
  344                  * to the debugger.
  345                  *
  346                  * If we get a page fault while holding a non-sleepable
  347                  * lock, then it is most likely a fatal kernel page fault.
  348                  * If WITNESS is enabled, then it's going to whine about
  349                  * bogus LORs with various VM locks, so just skip to the
  350                  * fatal trap handling directly.
  351                  */
  352                 eva = rcr2();
  353                 if (td->td_critnest != 0 ||
  354                     WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
  355                     "Kernel page fault") != 0)
  356                         trap_fatal(frame, eva);
  357                 else
  358                         enable_intr();
  359         }
  360 
  361         if ((ISPL(frame->tf_cs) == SEL_UPL) ||
  362             ((frame->tf_eflags & PSL_VM) && 
  363                 !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
  364                 /* user trap */
  365 
  366                 td->td_pticks = 0;
  367                 td->td_frame = frame;
  368                 addr = frame->tf_eip;
  369                 if (td->td_ucred != p->p_ucred) 
  370                         cred_update_thread(td);
  371 
  372                 switch (type) {
  373                 case T_PRIVINFLT:       /* privileged instruction fault */
  374                         i = SIGILL;
  375                         ucode = ILL_PRVOPC;
  376                         break;
  377 
  378                 case T_BPTFLT:          /* bpt instruction fault */
  379                 case T_TRCTRAP:         /* trace trap */
  380                         enable_intr();
  381                         frame->tf_eflags &= ~PSL_T;
  382                         i = SIGTRAP;
  383                         ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
  384                         break;
  385 
  386                 case T_ARITHTRAP:       /* arithmetic trap */
  387 #ifdef DEV_NPX
  388                         ucode = npxtrap();
  389                         if (ucode == -1)
  390                                 goto userout;
  391 #else
  392                         ucode = 0;
  393 #endif
  394                         i = SIGFPE;
  395                         break;
  396 
  397                         /*
  398                          * The following two traps can happen in
  399                          * vm86 mode, and, if so, we want to handle
  400                          * them specially.
  401                          */
  402                 case T_PROTFLT:         /* general protection fault */
  403                 case T_STKFLT:          /* stack fault */
  404                         if (frame->tf_eflags & PSL_VM) {
  405                                 i = vm86_emulate((struct vm86frame *)frame);
  406                                 if (i == 0)
  407                                         goto user;
  408                                 break;
  409                         }
  410                         i = SIGBUS;
  411                         ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
  412                         break;
  413                 case T_SEGNPFLT:        /* segment not present fault */
  414                         i = SIGBUS;
  415                         ucode = BUS_ADRERR;
  416                         break;
  417                 case T_TSSFLT:          /* invalid TSS fault */
  418                         i = SIGBUS;
  419                         ucode = BUS_OBJERR;
  420                         break;
  421                 case T_DOUBLEFLT:       /* double fault */
  422                 default:
  423                         i = SIGBUS;
  424                         ucode = BUS_OBJERR;
  425                         break;
  426 
  427                 case T_PAGEFLT:         /* page fault */
  428 
  429                         i = trap_pfault(frame, TRUE, eva);
  430 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  431                         if (i == -2) {
  432                                 /*
  433                                  * The f00f hack workaround has triggered, so
  434                                  * treat the fault as an illegal instruction 
  435                                  * (T_PRIVINFLT) instead of a page fault.
  436                                  */
  437                                 type = frame->tf_trapno = T_PRIVINFLT;
  438 
  439                                 /* Proceed as in that case. */
  440                                 ucode = ILL_PRVOPC;
  441                                 i = SIGILL;
  442                                 break;
  443                         }
  444 #endif
  445                         if (i == -1)
  446                                 goto userout;
  447                         if (i == 0)
  448                                 goto user;
  449 
  450                         if (i == SIGSEGV)
  451                                 ucode = SEGV_MAPERR;
  452                         else {
  453                                 if (prot_fault_translation == 0) {
  454                                         /*
  455                                          * Autodetect.
  456                                          * This check also covers the images
  457                                          * without the ABI-tag ELF note.
  458                                          */
  459                                         if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
  460                                             && p->p_osrel >= P_OSREL_SIGSEGV) {
  461                                                 i = SIGSEGV;
  462                                                 ucode = SEGV_ACCERR;
  463                                         } else {
  464                                                 i = SIGBUS;
  465                                                 ucode = BUS_PAGE_FAULT;
  466                                         }
  467                                 } else if (prot_fault_translation == 1) {
  468                                         /*
  469                                          * Always compat mode.
  470                                          */
  471                                         i = SIGBUS;
  472                                         ucode = BUS_PAGE_FAULT;
  473                                 } else {
  474                                         /*
  475                                          * Always SIGSEGV mode.
  476                                          */
  477                                         i = SIGSEGV;
  478                                         ucode = SEGV_ACCERR;
  479                                 }
  480                         }
  481                         addr = eva;
  482                         break;
  483 
  484                 case T_DIVIDE:          /* integer divide fault */
  485                         ucode = FPE_INTDIV;
  486                         i = SIGFPE;
  487                         break;
  488 
  489 #ifdef DEV_ISA
  490                 case T_NMI:
  491 #ifdef POWERFAIL_NMI
  492 #ifndef TIMER_FREQ
  493 #  define TIMER_FREQ 1193182
  494 #endif
  495                         if (time_second - lastalert > 10) {
  496                                 log(LOG_WARNING, "NMI: power fail\n");
  497                                 sysbeep(880, hz);
  498                                 lastalert = time_second;
  499                         }
  500                         goto userout;
  501 #else /* !POWERFAIL_NMI */
  502                         /* machine/parity/power fail/"kitchen sink" faults */
  503                         if (isa_nmi(code) == 0) {
  504 #ifdef KDB
  505                                 /*
  506                                  * NMI can be hooked up to a pushbutton
  507                                  * for debugging.
  508                                  */
  509                                 if (kdb_on_nmi) {
  510                                         printf ("NMI ... going to debugger\n");
  511                                         kdb_trap(type, 0, frame);
  512                                 }
  513 #endif /* KDB */
  514                                 goto userout;
  515                         } else if (panic_on_nmi)
  516                                 panic("NMI indicates hardware failure");
  517                         break;
  518 #endif /* POWERFAIL_NMI */
  519 #endif /* DEV_ISA */
  520 
  521                 case T_OFLOW:           /* integer overflow fault */
  522                         ucode = FPE_INTOVF;
  523                         i = SIGFPE;
  524                         break;
  525 
  526                 case T_BOUND:           /* bounds check fault */
  527                         ucode = FPE_FLTSUB;
  528                         i = SIGFPE;
  529                         break;
  530 
  531                 case T_DNA:
  532 #ifdef DEV_NPX
  533                         KASSERT(PCB_USER_FPU(td->td_pcb),
  534                             ("kernel FPU ctx has leaked"));
  535                         /* transparent fault (due to context switch "late") */
  536                         if (npxdna())
  537                                 goto userout;
  538 #endif
  539                         uprintf("pid %d killed due to lack of floating point\n",
  540                                 p->p_pid);
  541                         i = SIGKILL;
  542                         ucode = 0;
  543                         break;
  544 
  545                 case T_FPOPFLT:         /* FPU operand fetch fault */
  546                         ucode = ILL_COPROC;
  547                         i = SIGILL;
  548                         break;
  549 
  550                 case T_XMMFLT:          /* SIMD floating-point exception */
  551                         ucode = 0; /* XXX */
  552                         i = SIGFPE;
  553                         break;
  554                 }
  555         } else {
  556                 /* kernel trap */
  557 
  558                 KASSERT(cold || td->td_ucred != NULL,
  559                     ("kernel trap doesn't have ucred"));
  560                 switch (type) {
  561                 case T_PAGEFLT:                 /* page fault */
  562                         (void) trap_pfault(frame, FALSE, eva);
  563                         goto out;
  564 
  565                 case T_DNA:
  566 #ifdef DEV_NPX
  567                         KASSERT(!PCB_USER_FPU(td->td_pcb),
  568                             ("Unregistered use of FPU in kernel"));
  569                         if (npxdna())
  570                                 goto out;
  571 #endif
  572                         break;
  573 
  574                 case T_ARITHTRAP:       /* arithmetic trap */
  575                 case T_XMMFLT:          /* SIMD floating-point exception */
  576                 case T_FPOPFLT:         /* FPU operand fetch fault */
  577                         /*
  578                          * XXXKIB for now disable any FPU traps in kernel
  579                          * handler registration seems to be overkill
  580                          */
  581                         trap_fatal(frame, 0);
  582                         goto out;
  583 
  584                         /*
  585                          * The following two traps can happen in
  586                          * vm86 mode, and, if so, we want to handle
  587                          * them specially.
  588                          */
  589                 case T_PROTFLT:         /* general protection fault */
  590                 case T_STKFLT:          /* stack fault */
  591                         if (frame->tf_eflags & PSL_VM) {
  592                                 i = vm86_emulate((struct vm86frame *)frame);
  593                                 if (i != 0)
  594                                         /*
  595                                          * returns to original process
  596                                          */
  597                                         vm86_trap((struct vm86frame *)frame);
  598                                 goto out;
  599                         }
  600                         if (type == T_STKFLT)
  601                                 break;
  602 
  603                         /* FALL THROUGH */
  604 
  605                 case T_SEGNPFLT:        /* segment not present fault */
  606                         if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
  607                                 break;
  608 
  609                         /*
  610                          * Invalid %fs's and %gs's can be created using
  611                          * procfs or PT_SETREGS or by invalidating the
  612                          * underlying LDT entry.  This causes a fault
  613                          * in kernel mode when the kernel attempts to
  614                          * switch contexts.  Lose the bad context
  615                          * (XXX) so that we can continue, and generate
  616                          * a signal.
  617                          */
  618                         if (frame->tf_eip == (int)cpu_switch_load_gs) {
  619                                 PCPU_GET(curpcb)->pcb_gs = 0;
  620 #if 0                           
  621                                 PROC_LOCK(p);
  622                                 psignal(p, SIGBUS);
  623                                 PROC_UNLOCK(p);
  624 #endif                          
  625                                 goto out;
  626                         }
  627 
  628                         if (td->td_intr_nesting_level != 0)
  629                                 break;
  630 
  631                         /*
  632                          * Invalid segment selectors and out of bounds
  633                          * %eip's and %esp's can be set up in user mode.
  634                          * This causes a fault in kernel mode when the
  635                          * kernel tries to return to user mode.  We want
  636                          * to get this fault so that we can fix the
  637                          * problem here and not have to check all the
  638                          * selectors and pointers when the user changes
  639                          * them.
  640                          */
  641                         if (frame->tf_eip == (int)doreti_iret) {
  642                                 frame->tf_eip = (int)doreti_iret_fault;
  643                                 goto out;
  644                         }
  645                         if (frame->tf_eip == (int)doreti_popl_ds) {
  646                                 frame->tf_eip = (int)doreti_popl_ds_fault;
  647                                 goto out;
  648                         }
  649                         if (frame->tf_eip == (int)doreti_popl_es) {
  650                                 frame->tf_eip = (int)doreti_popl_es_fault;
  651                                 goto out;
  652                         }
  653                         if (frame->tf_eip == (int)doreti_popl_fs) {
  654                                 frame->tf_eip = (int)doreti_popl_fs_fault;
  655                                 goto out;
  656                         }
  657                         if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
  658                                 frame->tf_eip =
  659                                     (int)PCPU_GET(curpcb)->pcb_onfault;
  660                                 goto out;
  661                         }
  662                         break;
  663 
  664                 case T_TSSFLT:
  665                         /*
  666                          * PSL_NT can be set in user mode and isn't cleared
  667                          * automatically when the kernel is entered.  This
  668                          * causes a TSS fault when the kernel attempts to
  669                          * `iret' because the TSS link is uninitialized.  We
  670                          * want to get this fault so that we can fix the
  671                          * problem here and not every time the kernel is
  672                          * entered.
  673                          */
  674                         if (frame->tf_eflags & PSL_NT) {
  675                                 frame->tf_eflags &= ~PSL_NT;
  676                                 goto out;
  677                         }
  678                         break;
  679 
  680                 case T_TRCTRAP:  /* trace trap */
  681                         if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
  682                                 /*
  683                                  * We've just entered system mode via the
  684                                  * syscall lcall.  Continue single stepping
  685                                  * silently until the syscall handler has
  686                                  * saved the flags.
  687                                  */
  688                                 goto out;
  689                         }
  690                         if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
  691                                 /*
  692                                  * The syscall handler has now saved the
  693                                  * flags.  Stop single stepping it.
  694                                  */
  695                                 frame->tf_eflags &= ~PSL_T;
  696                                 goto out;
  697                         }
  698                         /*
  699                          * Ignore debug register trace traps due to
  700                          * accesses in the user's address space, which
  701                          * can happen under several conditions such as
  702                          * if a user sets a watchpoint on a buffer and
  703                          * then passes that buffer to a system call.
  704                          * We still want to get TRCTRAPS for addresses
  705                          * in kernel space because that is useful when
  706                          * debugging the kernel.
  707                          */
  708                         if (user_dbreg_trap() && 
  709                            !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
  710                                 /*
  711                                  * Reset breakpoint bits because the
  712                                  * processor doesn't
  713                                  */
  714                                 load_dr6(rdr6() & 0xfffffff0);
  715                                 goto out;
  716                         }
  717                         /*
  718                          * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
  719                          */
  720                 case T_BPTFLT:
  721                         /*
  722                          * If KDB is enabled, let it handle the debugger trap.
  723                          * Otherwise, debugger traps "can't happen".
  724                          */
  725 #ifdef KDB
  726                         if (kdb_trap(type, 0, frame))
  727                                 goto out;
  728 #endif
  729                         break;
  730 
  731 #ifdef DEV_ISA
  732                 case T_NMI:
  733 #ifdef POWERFAIL_NMI
  734                         if (time_second - lastalert > 10) {
  735                                 log(LOG_WARNING, "NMI: power fail\n");
  736                                 sysbeep(880, hz);
  737                                 lastalert = time_second;
  738                         }
  739                         goto out;
  740 #else /* !POWERFAIL_NMI */
  741                         /* machine/parity/power fail/"kitchen sink" faults */
  742                         if (isa_nmi(code) == 0) {
  743 #ifdef KDB
  744                                 /*
  745                                  * NMI can be hooked up to a pushbutton
  746                                  * for debugging.
  747                                  */
  748                                 if (kdb_on_nmi) {
  749                                         printf ("NMI ... going to debugger\n");
  750                                         kdb_trap(type, 0, frame);
  751                                 }
  752 #endif /* KDB */
  753                                 goto out;
  754                         } else if (panic_on_nmi == 0)
  755                                 goto out;
  756                         /* FALLTHROUGH */
  757 #endif /* POWERFAIL_NMI */
  758 #endif /* DEV_ISA */
  759                 }
  760 
  761                 trap_fatal(frame, eva);
  762                 goto out;
  763         }
  764 
  765         /* Translate fault for emulators (e.g. Linux) */
  766         if (*p->p_sysent->sv_transtrap)
  767                 i = (*p->p_sysent->sv_transtrap)(i, type);
  768 
  769         ksiginfo_init_trap(&ksi);
  770         ksi.ksi_signo = i;
  771         ksi.ksi_code = ucode;
  772         ksi.ksi_addr = (void *)addr;
  773         ksi.ksi_trapno = type;
  774         trapsignal(td, &ksi);
  775 
  776 #ifdef DEBUG
  777         if (type <= MAX_TRAP_MSG) {
  778                 uprintf("fatal process exception: %s",
  779                         trap_msg[type]);
  780                 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
  781                         uprintf(", fault VA = 0x%lx", (u_long)eva);
  782                 uprintf("\n");
  783         }
  784 #endif
  785 
  786 user:
  787         userret(td, frame);
  788         mtx_assert(&Giant, MA_NOTOWNED);
  789         KASSERT(PCB_USER_FPU(td->td_pcb),
  790             ("Return from trap with kernel FPU ctx leaked"));
  791 userout:
  792 out:
  793         return;
  794 }
  795 
  796 static int
  797 trap_pfault(frame, usermode, eva)
  798         struct trapframe *frame;
  799         int usermode;
  800         vm_offset_t eva;
  801 {
  802         vm_offset_t va;
  803         struct vmspace *vm = NULL;
  804         vm_map_t map;
  805         int rv = 0;
  806         vm_prot_t ftype;
  807         struct thread *td = curthread;
  808         struct proc *p = td->td_proc;
  809 
  810         va = trunc_page(eva);
  811         if (va >= KERNBASE) {
  812                 /*
  813                  * Don't allow user-mode faults in kernel address space.
  814                  * An exception:  if the faulting address is the invalid
  815                  * instruction entry in the IDT, then the Intel Pentium
  816                  * F00F bug workaround was triggered, and we need to
  817                  * treat it is as an illegal instruction, and not a page
  818                  * fault.
  819                  */
  820 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  821                 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
  822                         return -2;
  823 #endif
  824                 if (usermode)
  825                         goto nogo;
  826 
  827                 map = kernel_map;
  828         } else {
  829                 /*
  830                  * This is a fault on non-kernel virtual memory.
  831                  * vm is initialized above to NULL. If curproc is NULL
  832                  * or curproc->p_vmspace is NULL the fault is fatal.
  833                  */
  834                 if (p != NULL)
  835                         vm = p->p_vmspace;
  836 
  837                 if (vm == NULL)
  838                         goto nogo;
  839 
  840                 map = &vm->vm_map;
  841                 if (!usermode && (td->td_intr_nesting_level != 0 ||
  842                     PCPU_GET(curpcb)->pcb_onfault == NULL)) {
  843                         trap_fatal(frame, eva);
  844                         return (-1);
  845                 }
  846         }
  847 
  848         /*
  849          * PGEX_I is defined only if the execute disable bit capability is
  850          * supported and enabled.
  851          */
  852         if (frame->tf_err & PGEX_W)
  853                 ftype = VM_PROT_WRITE;
  854 #ifdef PAE
  855         else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
  856                 ftype = VM_PROT_EXECUTE;
  857 #endif
  858         else
  859                 ftype = VM_PROT_READ;
  860 
  861         if (map != kernel_map) {
  862                 /*
  863                  * Keep swapout from messing with us during this
  864                  *      critical time.
  865                  */
  866                 PROC_LOCK(p);
  867                 ++p->p_lock;
  868                 PROC_UNLOCK(p);
  869 
  870                 /* Fault in the user page: */
  871                 rv = vm_fault(map, va, ftype,
  872                               (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY
  873                                                       : VM_FAULT_NORMAL);
  874 
  875                 PROC_LOCK(p);
  876                 --p->p_lock;
  877                 PROC_UNLOCK(p);
  878         } else {
  879                 /*
  880                  * Don't have to worry about process locking or stacks in the
  881                  * kernel.
  882                  */
  883                 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
  884         }
  885         if (rv == KERN_SUCCESS)
  886                 return (0);
  887 nogo:
  888         if (!usermode) {
  889                 if (td->td_intr_nesting_level == 0 &&
  890                     PCPU_GET(curpcb)->pcb_onfault != NULL) {
  891                         frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
  892                         return (0);
  893                 }
  894                 trap_fatal(frame, eva);
  895                 return (-1);
  896         }
  897 
  898         return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
  899 }
  900 
  901 static void
  902 trap_fatal(frame, eva)
  903         struct trapframe *frame;
  904         vm_offset_t eva;
  905 {
  906         int code, ss, esp;
  907         u_int type;
  908         struct soft_segment_descriptor softseg;
  909         char *msg;
  910 
  911         code = frame->tf_err;
  912         type = frame->tf_trapno;
  913         sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
  914 
  915         if (type <= MAX_TRAP_MSG)
  916                 msg = trap_msg[type];
  917         else
  918                 msg = "UNKNOWN";
  919         printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
  920             frame->tf_eflags & PSL_VM ? "vm86" :
  921             ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
  922 #ifdef SMP
  923         /* two separate prints in case of a trap on an unmapped page */
  924         printf("cpuid = %d; ", PCPU_GET(cpuid));
  925         printf("apic id = %02x\n", PCPU_GET(apic_id));
  926 #endif
  927         if (type == T_PAGEFLT) {
  928                 printf("fault virtual address   = 0x%x\n", eva);
  929                 printf("fault code              = %s %s, %s\n",
  930                         code & PGEX_U ? "user" : "supervisor",
  931                         code & PGEX_W ? "write" : "read",
  932                         code & PGEX_P ? "protection violation" : "page not present");
  933         }
  934         printf("instruction pointer     = 0x%x:0x%x\n",
  935                frame->tf_cs & 0xffff, frame->tf_eip);
  936         if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
  937                 ss = frame->tf_ss & 0xffff;
  938                 esp = frame->tf_esp;
  939         } else {
  940                 ss = GSEL(GDATA_SEL, SEL_KPL);
  941                 esp = (int)&frame->tf_esp;
  942         }
  943         printf("stack pointer           = 0x%x:0x%x\n", ss, esp);
  944         printf("frame pointer           = 0x%x:0x%x\n", ss, frame->tf_ebp);
  945         printf("code segment            = base 0x%x, limit 0x%x, type 0x%x\n",
  946                softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
  947         printf("                        = DPL %d, pres %d, def32 %d, gran %d\n",
  948                softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
  949                softseg.ssd_gran);
  950         printf("processor eflags        = ");
  951         if (frame->tf_eflags & PSL_T)
  952                 printf("trace trap, ");
  953         if (frame->tf_eflags & PSL_I)
  954                 printf("interrupt enabled, ");
  955         if (frame->tf_eflags & PSL_NT)
  956                 printf("nested task, ");
  957         if (frame->tf_eflags & PSL_RF)
  958                 printf("resume, ");
  959         if (frame->tf_eflags & PSL_VM)
  960                 printf("vm86, ");
  961         printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
  962         printf("current process         = ");
  963         if (curproc) {
  964                 printf("%lu (%s)\n", (u_long)curproc->p_pid, curthread->td_name);
  965         } else {
  966                 printf("Idle\n");
  967         }
  968 
  969 #ifdef KDB
  970         if (debugger_on_panic || kdb_active) {
  971                 frame->tf_err = eva;    /* smuggle fault address to ddb */
  972                 if (kdb_trap(type, 0, frame)) {
  973                         frame->tf_err = code;   /* restore error code */
  974                         return;
  975                 }
  976                 frame->tf_err = code;           /* restore error code */
  977         }
  978 #endif
  979         printf("trap number             = %d\n", type);
  980         if (type <= MAX_TRAP_MSG)
  981                 panic("%s", trap_msg[type]);
  982         else
  983                 panic("unknown/reserved trap");
  984 }
  985 
  986 /*
  987  * Double fault handler. Called when a fault occurs while writing
  988  * a frame for a trap/exception onto the stack. This usually occurs
  989  * when the stack overflows (such is the case with infinite recursion,
  990  * for example).
  991  *
  992  * XXX Note that the current PTD gets replaced by IdlePTD when the
  993  * task switch occurs. This means that the stack that was active at
  994  * the time of the double fault is not available at <kstack> unless
  995  * the machine was idle when the double fault occurred. The downside
  996  * of this is that "trace <ebp>" in ddb won't work.
  997  */
  998 void
  999 dblfault_handler()
 1000 {
 1001 #ifdef KDTRACE_HOOKS
 1002         if (dtrace_doubletrap_func != NULL)
 1003                 (*dtrace_doubletrap_func)();
 1004 #endif
 1005         printf("\nFatal double fault:\n");
 1006         printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
 1007         printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
 1008         printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
 1009 #ifdef SMP
 1010         /* two separate prints in case of a trap on an unmapped page */
 1011         printf("cpuid = %d; ", PCPU_GET(cpuid));
 1012         printf("apic id = %02x\n", PCPU_GET(apic_id));
 1013 #endif
 1014         panic("double fault");
 1015 }
 1016 
 1017 int
 1018 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 1019 {
 1020         struct proc *p;
 1021         struct trapframe *frame;
 1022         caddr_t params;
 1023         int error;
 1024 
 1025         p = td->td_proc;
 1026         frame = td->td_frame;
 1027 
 1028         params = (caddr_t)frame->tf_esp + sizeof(int);
 1029         sa->code = frame->tf_eax;
 1030 
 1031         /*
 1032          * Need to check if this is a 32 bit or 64 bit syscall.
 1033          */
 1034         if (sa->code == SYS_syscall) {
 1035                 /*
 1036                  * Code is first argument, followed by actual args.
 1037                  */
 1038                 sa->code = fuword(params);
 1039                 params += sizeof(int);
 1040         } else if (sa->code == SYS___syscall) {
 1041                 /*
 1042                  * Like syscall, but code is a quad, so as to maintain
 1043                  * quad alignment for the rest of the arguments.
 1044                  */
 1045                 sa->code = fuword(params);
 1046                 params += sizeof(quad_t);
 1047         }
 1048 
 1049         if (p->p_sysent->sv_mask)
 1050                 sa->code &= p->p_sysent->sv_mask;
 1051         if (sa->code >= p->p_sysent->sv_size)
 1052                 sa->callp = &p->p_sysent->sv_table[0];
 1053         else
 1054                 sa->callp = &p->p_sysent->sv_table[sa->code];
 1055         sa->narg = sa->callp->sy_narg;
 1056 
 1057         if (params != NULL && sa->narg != 0)
 1058                 error = copyin(params, (caddr_t)sa->args,
 1059                     (u_int)(sa->narg * sizeof(int)));
 1060         else
 1061                 error = 0;
 1062 
 1063         if (error == 0) {
 1064                 td->td_retval[0] = 0;
 1065                 td->td_retval[1] = frame->tf_edx;
 1066         }
 1067                 
 1068         return (error);
 1069 }
 1070 
 1071 #include "../../kern/subr_syscall.c"
 1072 
 1073 /*
 1074  *      syscall -       system call request C handler
 1075  *
 1076  *      A system call is essentially treated as a trap.
 1077  */
 1078 void
 1079 syscall(struct trapframe *frame)
 1080 {
 1081         struct thread *td;
 1082         struct syscall_args sa;
 1083         register_t orig_tf_eflags;
 1084         int error;
 1085         ksiginfo_t ksi;
 1086 
 1087 #ifdef DIAGNOSTIC
 1088         if (ISPL(frame->tf_cs) != SEL_UPL) {
 1089                 panic("syscall");
 1090                 /* NOT REACHED */
 1091         }
 1092 #endif
 1093         orig_tf_eflags = frame->tf_eflags;
 1094 
 1095         td = curthread;
 1096         td->td_frame = frame;
 1097 
 1098         error = syscallenter(td, &sa);
 1099 
 1100         /*
 1101          * Traced syscall.
 1102          */
 1103         if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
 1104                 frame->tf_eflags &= ~PSL_T;
 1105                 ksiginfo_init_trap(&ksi);
 1106                 ksi.ksi_signo = SIGTRAP;
 1107                 ksi.ksi_code = TRAP_TRACE;
 1108                 ksi.ksi_addr = (void *)frame->tf_eip;
 1109                 trapsignal(td, &ksi);
 1110         }
 1111 
 1112         KASSERT(PCB_USER_FPU(td->td_pcb),
 1113             ("System call %s returning with kernel FPU ctx leaked",
 1114              syscallname(td->td_proc, sa.code)));
 1115         KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
 1116             ("System call %s returning with mangled pcb_save",
 1117              syscallname(td->td_proc, sa.code)));
 1118 
 1119         syscallret(td, error, &sa);
 1120 }

Cache object: 1da94dc36fc8ffff1dea3c3344ffb23f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.