The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/trap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1994, David Greenman
    3  * Copyright (c) 1990, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * the University of Utah, and William Jolitz.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  * 3. All advertising materials mentioning features or use of this software
   18  *    must display the following acknowledgement:
   19  *      This product includes software developed by the University of
   20  *      California, Berkeley and its contributors.
   21  * 4. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      from: @(#)trap.c        7.4 (Berkeley) 5/13/91
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __FBSDID("$FreeBSD: releng/10.2/sys/i386/i386/trap.c 284642 2015-06-20 13:25:28Z dim $");
   42 
   43 /*
   44  * 386 Trap and System call handling
   45  */
   46 
   47 #include "opt_clock.h"
   48 #include "opt_cpu.h"
   49 #include "opt_hwpmc_hooks.h"
   50 #include "opt_isa.h"
   51 #include "opt_kdb.h"
   52 #include "opt_kdtrace.h"
   53 #include "opt_npx.h"
   54 #include "opt_trap.h"
   55 
   56 #include <sys/param.h>
   57 #include <sys/bus.h>
   58 #include <sys/systm.h>
   59 #include <sys/proc.h>
   60 #include <sys/pioctl.h>
   61 #include <sys/ptrace.h>
   62 #include <sys/kdb.h>
   63 #include <sys/kernel.h>
   64 #include <sys/ktr.h>
   65 #include <sys/lock.h>
   66 #include <sys/mutex.h>
   67 #include <sys/resourcevar.h>
   68 #include <sys/signalvar.h>
   69 #include <sys/syscall.h>
   70 #include <sys/sysctl.h>
   71 #include <sys/sysent.h>
   72 #include <sys/uio.h>
   73 #include <sys/vmmeter.h>
   74 #ifdef HWPMC_HOOKS
   75 #include <sys/pmckern.h>
   76 PMC_SOFT_DEFINE( , , page_fault, all);
   77 PMC_SOFT_DEFINE( , , page_fault, read);
   78 PMC_SOFT_DEFINE( , , page_fault, write);
   79 #endif
   80 #include <security/audit/audit.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_param.h>
   84 #include <vm/pmap.h>
   85 #include <vm/vm_kern.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_extern.h>
   89 
   90 #include <machine/cpu.h>
   91 #include <machine/intr_machdep.h>
   92 #include <x86/mca.h>
   93 #include <machine/md_var.h>
   94 #include <machine/pcb.h>
   95 #ifdef SMP
   96 #include <machine/smp.h>
   97 #endif
   98 #include <machine/tss.h>
   99 #include <machine/vm86.h>
  100 
  101 #ifdef POWERFAIL_NMI
  102 #include <sys/syslog.h>
  103 #include <machine/clock.h>
  104 #endif
  105 
  106 #ifdef KDTRACE_HOOKS
  107 #include <sys/dtrace_bsd.h>
  108 #endif
  109 
  110 extern void trap(struct trapframe *frame);
  111 extern void syscall(struct trapframe *frame);
  112 
  113 static int trap_pfault(struct trapframe *, int, vm_offset_t);
  114 static void trap_fatal(struct trapframe *, vm_offset_t);
  115 void dblfault_handler(void);
  116 
  117 extern inthand_t IDTVEC(lcall_syscall);
  118 
  119 #define MAX_TRAP_MSG            32
  120 static char *trap_msg[] = {
  121         "",                                     /*  0 unused */
  122         "privileged instruction fault",         /*  1 T_PRIVINFLT */
  123         "",                                     /*  2 unused */
  124         "breakpoint instruction fault",         /*  3 T_BPTFLT */
  125         "",                                     /*  4 unused */
  126         "",                                     /*  5 unused */
  127         "arithmetic trap",                      /*  6 T_ARITHTRAP */
  128         "",                                     /*  7 unused */
  129         "",                                     /*  8 unused */
  130         "general protection fault",             /*  9 T_PROTFLT */
  131         "trace trap",                           /* 10 T_TRCTRAP */
  132         "",                                     /* 11 unused */
  133         "page fault",                           /* 12 T_PAGEFLT */
  134         "",                                     /* 13 unused */
  135         "alignment fault",                      /* 14 T_ALIGNFLT */
  136         "",                                     /* 15 unused */
  137         "",                                     /* 16 unused */
  138         "",                                     /* 17 unused */
  139         "integer divide fault",                 /* 18 T_DIVIDE */
  140         "non-maskable interrupt trap",          /* 19 T_NMI */
  141         "overflow trap",                        /* 20 T_OFLOW */
  142         "FPU bounds check fault",               /* 21 T_BOUND */
  143         "FPU device not available",             /* 22 T_DNA */
  144         "double fault",                         /* 23 T_DOUBLEFLT */
  145         "FPU operand fetch fault",              /* 24 T_FPOPFLT */
  146         "invalid TSS fault",                    /* 25 T_TSSFLT */
  147         "segment not present fault",            /* 26 T_SEGNPFLT */
  148         "stack fault",                          /* 27 T_STKFLT */
  149         "machine check trap",                   /* 28 T_MCHK */
  150         "SIMD floating-point exception",        /* 29 T_XMMFLT */
  151         "reserved (unknown) fault",             /* 30 T_RESERVED */
  152         "",                                     /* 31 unused (reserved) */
  153         "DTrace pid return trap",               /* 32 T_DTRACE_RET */
  154 };
  155 
  156 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  157 int has_f00f_bug = 0;           /* Initialized so that it can be patched. */
  158 #endif
  159 
  160 #ifdef KDB
  161 static int kdb_on_nmi = 1;
  162 SYSCTL_INT(_machdep, OID_AUTO, kdb_on_nmi, CTLFLAG_RW,
  163         &kdb_on_nmi, 0, "Go to KDB on NMI");
  164 TUNABLE_INT("machdep.kdb_on_nmi", &kdb_on_nmi);
  165 #endif
  166 static int panic_on_nmi = 1;
  167 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
  168         &panic_on_nmi, 0, "Panic on NMI");
  169 TUNABLE_INT("machdep.panic_on_nmi", &panic_on_nmi);
  170 static int prot_fault_translation = 0;
  171 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
  172         &prot_fault_translation, 0, "Select signal to deliver on protection fault");
  173 static int uprintf_signal;
  174 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
  175     &uprintf_signal, 0,
  176     "Print debugging information on trap signal to ctty");
  177 
  178 /*
  179  * Exception, fault, and trap interface to the FreeBSD kernel.
  180  * This common code is called from assembly language IDT gate entry
  181  * routines that prepare a suitable stack frame, and restore this
  182  * frame after the exception has been processed.
  183  */
  184 
  185 void
  186 trap(struct trapframe *frame)
  187 {
  188 #ifdef KDTRACE_HOOKS
  189         struct reg regs;
  190 #endif
  191         struct thread *td = curthread;
  192         struct proc *p = td->td_proc;
  193         int i = 0, ucode = 0, code;
  194         u_int type;
  195         register_t addr = 0;
  196         vm_offset_t eva;
  197         ksiginfo_t ksi;
  198 #ifdef POWERFAIL_NMI
  199         static int lastalert = 0;
  200 #endif
  201 
  202         PCPU_INC(cnt.v_trap);
  203         type = frame->tf_trapno;
  204 
  205 #ifdef SMP
  206         /* Handler for NMI IPIs used for stopping CPUs. */
  207         if (type == T_NMI) {
  208                  if (ipi_nmi_handler() == 0)
  209                            goto out;
  210         }
  211 #endif /* SMP */
  212 
  213 #ifdef KDB
  214         if (kdb_active) {
  215                 kdb_reenter();
  216                 goto out;
  217         }
  218 #endif
  219 
  220         if (type == T_RESERVED) {
  221                 trap_fatal(frame, 0);
  222                 goto out;
  223         }
  224 
  225 #ifdef  HWPMC_HOOKS
  226         /*
  227          * CPU PMCs interrupt using an NMI so we check for that first.
  228          * If the HWPMC module is active, 'pmc_hook' will point to
  229          * the function to be called.  A return value of '1' from the
  230          * hook means that the NMI was handled by it and that we can
  231          * return immediately.
  232          */
  233         if (type == T_NMI && pmc_intr &&
  234             (*pmc_intr)(PCPU_GET(cpuid), frame))
  235             goto out;
  236 #endif
  237 
  238         if (type == T_MCHK) {
  239                 mca_intr();
  240                 goto out;
  241         }
  242 
  243 #ifdef KDTRACE_HOOKS
  244         /*
  245          * A trap can occur while DTrace executes a probe. Before
  246          * executing the probe, DTrace blocks re-scheduling and sets
  247          * a flag in its per-cpu flags to indicate that it doesn't
  248          * want to fault. On returning from the probe, the no-fault
  249          * flag is cleared and finally re-scheduling is enabled.
  250          */
  251         if ((type == T_PROTFLT || type == T_PAGEFLT) &&
  252             dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
  253                 goto out;
  254 #endif
  255 
  256         if ((frame->tf_eflags & PSL_I) == 0) {
  257                 /*
  258                  * Buggy application or kernel code has disabled
  259                  * interrupts and then trapped.  Enabling interrupts
  260                  * now is wrong, but it is better than running with
  261                  * interrupts disabled until they are accidentally
  262                  * enabled later.
  263                  */
  264                 if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
  265                         uprintf(
  266                             "pid %ld (%s): trap %d with interrupts disabled\n",
  267                             (long)curproc->p_pid, curthread->td_name, type);
  268                 else if (type != T_NMI && type != T_BPTFLT &&
  269                     type != T_TRCTRAP &&
  270                     frame->tf_eip != (int)cpu_switch_load_gs) {
  271                         /*
  272                          * XXX not quite right, since this may be for a
  273                          * multiple fault in user mode.
  274                          */
  275                         printf("kernel trap %d with interrupts disabled\n",
  276                             type);
  277                         /*
  278                          * Page faults need interrupts disabled until later,
  279                          * and we shouldn't enable interrupts while holding
  280                          * a spin lock.
  281                          */
  282                         if (type != T_PAGEFLT &&
  283                             td->td_md.md_spinlock_count == 0)
  284                                 enable_intr();
  285                 }
  286         }
  287         eva = 0;
  288         code = frame->tf_err;
  289         if (type == T_PAGEFLT) {
  290                 /*
  291                  * For some Cyrix CPUs, %cr2 is clobbered by
  292                  * interrupts.  This problem is worked around by using
  293                  * an interrupt gate for the pagefault handler.  We
  294                  * are finally ready to read %cr2 and conditionally
  295                  * reenable interrupts.  If we hold a spin lock, then
  296                  * we must not reenable interrupts.  This might be a
  297                  * spurious page fault.
  298                  */
  299                 eva = rcr2();
  300                 if (td->td_md.md_spinlock_count == 0)
  301                         enable_intr();
  302         }
  303 
  304         if ((ISPL(frame->tf_cs) == SEL_UPL) ||
  305             ((frame->tf_eflags & PSL_VM) && 
  306                 !(curpcb->pcb_flags & PCB_VM86CALL))) {
  307                 /* user trap */
  308 
  309                 td->td_pticks = 0;
  310                 td->td_frame = frame;
  311                 addr = frame->tf_eip;
  312                 if (td->td_ucred != p->p_ucred) 
  313                         cred_update_thread(td);
  314 
  315                 switch (type) {
  316                 case T_PRIVINFLT:       /* privileged instruction fault */
  317                         i = SIGILL;
  318                         ucode = ILL_PRVOPC;
  319                         break;
  320 
  321                 case T_BPTFLT:          /* bpt instruction fault */
  322                 case T_TRCTRAP:         /* trace trap */
  323                         enable_intr();
  324 #ifdef KDTRACE_HOOKS
  325                         if (type == T_BPTFLT) {
  326                                 fill_frame_regs(frame, &regs);
  327                                 if (dtrace_pid_probe_ptr != NULL &&
  328                                     dtrace_pid_probe_ptr(&regs) == 0)
  329                                         goto out;
  330                         }
  331 #endif
  332                         frame->tf_eflags &= ~PSL_T;
  333                         i = SIGTRAP;
  334                         ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
  335                         break;
  336 
  337                 case T_ARITHTRAP:       /* arithmetic trap */
  338 #ifdef DEV_NPX
  339                         ucode = npxtrap_x87();
  340                         if (ucode == -1)
  341                                 goto userout;
  342 #else
  343                         ucode = 0;
  344 #endif
  345                         i = SIGFPE;
  346                         break;
  347 
  348                         /*
  349                          * The following two traps can happen in
  350                          * vm86 mode, and, if so, we want to handle
  351                          * them specially.
  352                          */
  353                 case T_PROTFLT:         /* general protection fault */
  354                 case T_STKFLT:          /* stack fault */
  355                         if (frame->tf_eflags & PSL_VM) {
  356                                 i = vm86_emulate((struct vm86frame *)frame);
  357                                 if (i == 0)
  358                                         goto user;
  359                                 break;
  360                         }
  361                         i = SIGBUS;
  362                         ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
  363                         break;
  364                 case T_SEGNPFLT:        /* segment not present fault */
  365                         i = SIGBUS;
  366                         ucode = BUS_ADRERR;
  367                         break;
  368                 case T_TSSFLT:          /* invalid TSS fault */
  369                         i = SIGBUS;
  370                         ucode = BUS_OBJERR;
  371                         break;
  372                 case T_ALIGNFLT:
  373                         i = SIGBUS;
  374                         ucode = BUS_ADRALN;
  375                         break;
  376                 case T_DOUBLEFLT:       /* double fault */
  377                 default:
  378                         i = SIGBUS;
  379                         ucode = BUS_OBJERR;
  380                         break;
  381 
  382                 case T_PAGEFLT:         /* page fault */
  383 
  384                         i = trap_pfault(frame, TRUE, eva);
  385 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  386                         if (i == -2) {
  387                                 /*
  388                                  * The f00f hack workaround has triggered, so
  389                                  * treat the fault as an illegal instruction 
  390                                  * (T_PRIVINFLT) instead of a page fault.
  391                                  */
  392                                 type = frame->tf_trapno = T_PRIVINFLT;
  393 
  394                                 /* Proceed as in that case. */
  395                                 ucode = ILL_PRVOPC;
  396                                 i = SIGILL;
  397                                 break;
  398                         }
  399 #endif
  400                         if (i == -1)
  401                                 goto userout;
  402                         if (i == 0)
  403                                 goto user;
  404 
  405                         if (i == SIGSEGV)
  406                                 ucode = SEGV_MAPERR;
  407                         else {
  408                                 if (prot_fault_translation == 0) {
  409                                         /*
  410                                          * Autodetect.
  411                                          * This check also covers the images
  412                                          * without the ABI-tag ELF note.
  413                                          */
  414                                         if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
  415                                             && p->p_osrel >= P_OSREL_SIGSEGV) {
  416                                                 i = SIGSEGV;
  417                                                 ucode = SEGV_ACCERR;
  418                                         } else {
  419                                                 i = SIGBUS;
  420                                                 ucode = BUS_PAGE_FAULT;
  421                                         }
  422                                 } else if (prot_fault_translation == 1) {
  423                                         /*
  424                                          * Always compat mode.
  425                                          */
  426                                         i = SIGBUS;
  427                                         ucode = BUS_PAGE_FAULT;
  428                                 } else {
  429                                         /*
  430                                          * Always SIGSEGV mode.
  431                                          */
  432                                         i = SIGSEGV;
  433                                         ucode = SEGV_ACCERR;
  434                                 }
  435                         }
  436                         addr = eva;
  437                         break;
  438 
  439                 case T_DIVIDE:          /* integer divide fault */
  440                         ucode = FPE_INTDIV;
  441                         i = SIGFPE;
  442                         break;
  443 
  444 #ifdef DEV_ISA
  445                 case T_NMI:
  446 #ifdef POWERFAIL_NMI
  447 #ifndef TIMER_FREQ
  448 #  define TIMER_FREQ 1193182
  449 #endif
  450                         if (time_second - lastalert > 10) {
  451                                 log(LOG_WARNING, "NMI: power fail\n");
  452                                 sysbeep(880, hz);
  453                                 lastalert = time_second;
  454                         }
  455                         goto userout;
  456 #else /* !POWERFAIL_NMI */
  457                         /* machine/parity/power fail/"kitchen sink" faults */
  458                         if (isa_nmi(code) == 0) {
  459 #ifdef KDB
  460                                 /*
  461                                  * NMI can be hooked up to a pushbutton
  462                                  * for debugging.
  463                                  */
  464                                 if (kdb_on_nmi) {
  465                                         printf ("NMI ... going to debugger\n");
  466                                         kdb_trap(type, 0, frame);
  467                                 }
  468 #endif /* KDB */
  469                                 goto userout;
  470                         } else if (panic_on_nmi)
  471                                 panic("NMI indicates hardware failure");
  472                         break;
  473 #endif /* POWERFAIL_NMI */
  474 #endif /* DEV_ISA */
  475 
  476                 case T_OFLOW:           /* integer overflow fault */
  477                         ucode = FPE_INTOVF;
  478                         i = SIGFPE;
  479                         break;
  480 
  481                 case T_BOUND:           /* bounds check fault */
  482                         ucode = FPE_FLTSUB;
  483                         i = SIGFPE;
  484                         break;
  485 
  486                 case T_DNA:
  487 #ifdef DEV_NPX
  488                         KASSERT(PCB_USER_FPU(td->td_pcb),
  489                             ("kernel FPU ctx has leaked"));
  490                         /* transparent fault (due to context switch "late") */
  491                         if (npxdna())
  492                                 goto userout;
  493 #endif
  494                         uprintf("pid %d killed due to lack of floating point\n",
  495                                 p->p_pid);
  496                         i = SIGKILL;
  497                         ucode = 0;
  498                         break;
  499 
  500                 case T_FPOPFLT:         /* FPU operand fetch fault */
  501                         ucode = ILL_COPROC;
  502                         i = SIGILL;
  503                         break;
  504 
  505                 case T_XMMFLT:          /* SIMD floating-point exception */
  506 #if defined(DEV_NPX) && !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
  507                         ucode = npxtrap_sse();
  508                         if (ucode == -1)
  509                                 goto userout;
  510 #else
  511                         ucode = 0;
  512 #endif
  513                         i = SIGFPE;
  514                         break;
  515 #ifdef KDTRACE_HOOKS
  516                 case T_DTRACE_RET:
  517                         enable_intr();
  518                         fill_frame_regs(frame, &regs);
  519                         if (dtrace_return_probe_ptr != NULL &&
  520                             dtrace_return_probe_ptr(&regs) == 0)
  521                                 goto out;
  522                         break;
  523 #endif
  524                 }
  525         } else {
  526                 /* kernel trap */
  527 
  528                 KASSERT(cold || td->td_ucred != NULL,
  529                     ("kernel trap doesn't have ucred"));
  530                 switch (type) {
  531                 case T_PAGEFLT:                 /* page fault */
  532                         (void) trap_pfault(frame, FALSE, eva);
  533                         goto out;
  534 
  535                 case T_DNA:
  536 #ifdef DEV_NPX
  537                         KASSERT(!PCB_USER_FPU(td->td_pcb),
  538                             ("Unregistered use of FPU in kernel"));
  539                         if (npxdna())
  540                                 goto out;
  541 #endif
  542                         break;
  543 
  544                 case T_ARITHTRAP:       /* arithmetic trap */
  545                 case T_XMMFLT:          /* SIMD floating-point exception */
  546                 case T_FPOPFLT:         /* FPU operand fetch fault */
  547                         /*
  548                          * XXXKIB for now disable any FPU traps in kernel
  549                          * handler registration seems to be overkill
  550                          */
  551                         trap_fatal(frame, 0);
  552                         goto out;
  553 
  554                         /*
  555                          * The following two traps can happen in
  556                          * vm86 mode, and, if so, we want to handle
  557                          * them specially.
  558                          */
  559                 case T_PROTFLT:         /* general protection fault */
  560                 case T_STKFLT:          /* stack fault */
  561                         if (frame->tf_eflags & PSL_VM) {
  562                                 i = vm86_emulate((struct vm86frame *)frame);
  563                                 if (i != 0)
  564                                         /*
  565                                          * returns to original process
  566                                          */
  567                                         vm86_trap((struct vm86frame *)frame);
  568                                 goto out;
  569                         }
  570                         if (type == T_STKFLT)
  571                                 break;
  572 
  573                         /* FALL THROUGH */
  574 
  575                 case T_SEGNPFLT:        /* segment not present fault */
  576                         if (curpcb->pcb_flags & PCB_VM86CALL)
  577                                 break;
  578 
  579                         /*
  580                          * Invalid %fs's and %gs's can be created using
  581                          * procfs or PT_SETREGS or by invalidating the
  582                          * underlying LDT entry.  This causes a fault
  583                          * in kernel mode when the kernel attempts to
  584                          * switch contexts.  Lose the bad context
  585                          * (XXX) so that we can continue, and generate
  586                          * a signal.
  587                          */
  588                         if (frame->tf_eip == (int)cpu_switch_load_gs) {
  589                                 curpcb->pcb_gs = 0;
  590 #if 0                           
  591                                 PROC_LOCK(p);
  592                                 kern_psignal(p, SIGBUS);
  593                                 PROC_UNLOCK(p);
  594 #endif                          
  595                                 goto out;
  596                         }
  597 
  598                         if (td->td_intr_nesting_level != 0)
  599                                 break;
  600 
  601                         /*
  602                          * Invalid segment selectors and out of bounds
  603                          * %eip's and %esp's can be set up in user mode.
  604                          * This causes a fault in kernel mode when the
  605                          * kernel tries to return to user mode.  We want
  606                          * to get this fault so that we can fix the
  607                          * problem here and not have to check all the
  608                          * selectors and pointers when the user changes
  609                          * them.
  610                          */
  611                         if (frame->tf_eip == (int)doreti_iret) {
  612                                 frame->tf_eip = (int)doreti_iret_fault;
  613                                 goto out;
  614                         }
  615                         if (frame->tf_eip == (int)doreti_popl_ds) {
  616                                 frame->tf_eip = (int)doreti_popl_ds_fault;
  617                                 goto out;
  618                         }
  619                         if (frame->tf_eip == (int)doreti_popl_es) {
  620                                 frame->tf_eip = (int)doreti_popl_es_fault;
  621                                 goto out;
  622                         }
  623                         if (frame->tf_eip == (int)doreti_popl_fs) {
  624                                 frame->tf_eip = (int)doreti_popl_fs_fault;
  625                                 goto out;
  626                         }
  627                         if (curpcb->pcb_onfault != NULL) {
  628                                 frame->tf_eip =
  629                                     (int)curpcb->pcb_onfault;
  630                                 goto out;
  631                         }
  632                         break;
  633 
  634                 case T_TSSFLT:
  635                         /*
  636                          * PSL_NT can be set in user mode and isn't cleared
  637                          * automatically when the kernel is entered.  This
  638                          * causes a TSS fault when the kernel attempts to
  639                          * `iret' because the TSS link is uninitialized.  We
  640                          * want to get this fault so that we can fix the
  641                          * problem here and not every time the kernel is
  642                          * entered.
  643                          */
  644                         if (frame->tf_eflags & PSL_NT) {
  645                                 frame->tf_eflags &= ~PSL_NT;
  646                                 goto out;
  647                         }
  648                         break;
  649 
  650                 case T_TRCTRAP:  /* trace trap */
  651                         if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
  652                                 /*
  653                                  * We've just entered system mode via the
  654                                  * syscall lcall.  Continue single stepping
  655                                  * silently until the syscall handler has
  656                                  * saved the flags.
  657                                  */
  658                                 goto out;
  659                         }
  660                         if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
  661                                 /*
  662                                  * The syscall handler has now saved the
  663                                  * flags.  Stop single stepping it.
  664                                  */
  665                                 frame->tf_eflags &= ~PSL_T;
  666                                 goto out;
  667                         }
  668                         /*
  669                          * Ignore debug register trace traps due to
  670                          * accesses in the user's address space, which
  671                          * can happen under several conditions such as
  672                          * if a user sets a watchpoint on a buffer and
  673                          * then passes that buffer to a system call.
  674                          * We still want to get TRCTRAPS for addresses
  675                          * in kernel space because that is useful when
  676                          * debugging the kernel.
  677                          */
  678                         if (user_dbreg_trap() && 
  679                            !(curpcb->pcb_flags & PCB_VM86CALL)) {
  680                                 /*
  681                                  * Reset breakpoint bits because the
  682                                  * processor doesn't
  683                                  */
  684                                 load_dr6(rdr6() & 0xfffffff0);
  685                                 goto out;
  686                         }
  687                         /*
  688                          * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
  689                          */
  690                 case T_BPTFLT:
  691                         /*
  692                          * If KDB is enabled, let it handle the debugger trap.
  693                          * Otherwise, debugger traps "can't happen".
  694                          */
  695 #ifdef KDB
  696                         if (kdb_trap(type, 0, frame))
  697                                 goto out;
  698 #endif
  699                         break;
  700 
  701 #ifdef DEV_ISA
  702                 case T_NMI:
  703 #ifdef POWERFAIL_NMI
  704                         if (time_second - lastalert > 10) {
  705                                 log(LOG_WARNING, "NMI: power fail\n");
  706                                 sysbeep(880, hz);
  707                                 lastalert = time_second;
  708                         }
  709                         goto out;
  710 #else /* !POWERFAIL_NMI */
  711                         /* machine/parity/power fail/"kitchen sink" faults */
  712                         if (isa_nmi(code) == 0) {
  713 #ifdef KDB
  714                                 /*
  715                                  * NMI can be hooked up to a pushbutton
  716                                  * for debugging.
  717                                  */
  718                                 if (kdb_on_nmi) {
  719                                         printf ("NMI ... going to debugger\n");
  720                                         kdb_trap(type, 0, frame);
  721                                 }
  722 #endif /* KDB */
  723                                 goto out;
  724                         } else if (panic_on_nmi == 0)
  725                                 goto out;
  726                         /* FALLTHROUGH */
  727 #endif /* POWERFAIL_NMI */
  728 #endif /* DEV_ISA */
  729                 }
  730 
  731                 trap_fatal(frame, eva);
  732                 goto out;
  733         }
  734 
  735         /* Translate fault for emulators (e.g. Linux) */
  736         if (*p->p_sysent->sv_transtrap)
  737                 i = (*p->p_sysent->sv_transtrap)(i, type);
  738 
  739         ksiginfo_init_trap(&ksi);
  740         ksi.ksi_signo = i;
  741         ksi.ksi_code = ucode;
  742         ksi.ksi_addr = (void *)addr;
  743         ksi.ksi_trapno = type;
  744         if (uprintf_signal) {
  745                 uprintf("pid %d comm %s: signal %d err %x code %d type %d "
  746                     "addr 0x%x esp 0x%08x eip 0x%08x "
  747                     "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
  748                     p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
  749                     frame->tf_esp, frame->tf_eip,
  750                     fubyte((void *)(frame->tf_eip + 0)),
  751                     fubyte((void *)(frame->tf_eip + 1)),
  752                     fubyte((void *)(frame->tf_eip + 2)),
  753                     fubyte((void *)(frame->tf_eip + 3)),
  754                     fubyte((void *)(frame->tf_eip + 4)),
  755                     fubyte((void *)(frame->tf_eip + 5)),
  756                     fubyte((void *)(frame->tf_eip + 6)),
  757                     fubyte((void *)(frame->tf_eip + 7)));
  758         }
  759         KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled"));
  760         trapsignal(td, &ksi);
  761 
  762 #ifdef DEBUG
  763         if (type <= MAX_TRAP_MSG) {
  764                 uprintf("fatal process exception: %s",
  765                         trap_msg[type]);
  766                 if ((type == T_PAGEFLT) || (type == T_PROTFLT))
  767                         uprintf(", fault VA = 0x%lx", (u_long)eva);
  768                 uprintf("\n");
  769         }
  770 #endif
  771 
  772 user:
  773         userret(td, frame);
  774         KASSERT(PCB_USER_FPU(td->td_pcb),
  775             ("Return from trap with kernel FPU ctx leaked"));
  776 userout:
  777 out:
  778         return;
  779 }
  780 
  781 static int
  782 trap_pfault(frame, usermode, eva)
  783         struct trapframe *frame;
  784         int usermode;
  785         vm_offset_t eva;
  786 {
  787         vm_offset_t va;
  788         struct vmspace *vm;
  789         vm_map_t map;
  790         int rv = 0;
  791         vm_prot_t ftype;
  792         struct thread *td = curthread;
  793         struct proc *p = td->td_proc;
  794 
  795         if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
  796                 /*
  797                  * Due to both processor errata and lazy TLB invalidation when
  798                  * access restrictions are removed from virtual pages, memory
  799                  * accesses that are allowed by the physical mapping layer may
  800                  * nonetheless cause one spurious page fault per virtual page. 
  801                  * When the thread is executing a "no faulting" section that
  802                  * is bracketed by vm_fault_{disable,enable}_pagefaults(),
  803                  * every page fault is treated as a spurious page fault,
  804                  * unless it accesses the same virtual address as the most
  805                  * recent page fault within the same "no faulting" section.
  806                  */
  807                 if (td->td_md.md_spurflt_addr != eva ||
  808                     (td->td_pflags & TDP_RESETSPUR) != 0) {
  809                         /*
  810                          * Do nothing to the TLB.  A stale TLB entry is
  811                          * flushed automatically by a page fault.
  812                          */
  813                         td->td_md.md_spurflt_addr = eva;
  814                         td->td_pflags &= ~TDP_RESETSPUR;
  815                         return (0);
  816                 }
  817         } else {
  818                 /*
  819                  * If we get a page fault while in a critical section, then
  820                  * it is most likely a fatal kernel page fault.  The kernel
  821                  * is already going to panic trying to get a sleep lock to
  822                  * do the VM lookup, so just consider it a fatal trap so the
  823                  * kernel can print out a useful trap message and even get
  824                  * to the debugger.
  825                  *
  826                  * If we get a page fault while holding a non-sleepable
  827                  * lock, then it is most likely a fatal kernel page fault.
  828                  * If WITNESS is enabled, then it's going to whine about
  829                  * bogus LORs with various VM locks, so just skip to the
  830                  * fatal trap handling directly.
  831                  */
  832                 if (td->td_critnest != 0 ||
  833                     WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
  834                     "Kernel page fault") != 0) {
  835                         trap_fatal(frame, eva);
  836                         return (-1);
  837                 }
  838         }
  839         va = trunc_page(eva);
  840         if (va >= KERNBASE) {
  841                 /*
  842                  * Don't allow user-mode faults in kernel address space.
  843                  * An exception:  if the faulting address is the invalid
  844                  * instruction entry in the IDT, then the Intel Pentium
  845                  * F00F bug workaround was triggered, and we need to
  846                  * treat it is as an illegal instruction, and not a page
  847                  * fault.
  848                  */
  849 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
  850                 if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
  851                         return (-2);
  852 #endif
  853                 if (usermode)
  854                         goto nogo;
  855 
  856                 map = kernel_map;
  857         } else {
  858                 /*
  859                  * This is a fault on non-kernel virtual memory.  If either
  860                  * p or p->p_vmspace is NULL, then the fault is fatal.
  861                  */
  862                 if (p == NULL || (vm = p->p_vmspace) == NULL)
  863                         goto nogo;
  864 
  865                 map = &vm->vm_map;
  866 
  867                 /*
  868                  * When accessing a user-space address, kernel must be
  869                  * ready to accept the page fault, and provide a
  870                  * handling routine.  Since accessing the address
  871                  * without the handler is a bug, do not try to handle
  872                  * it normally, and panic immediately.
  873                  */
  874                 if (!usermode && (td->td_intr_nesting_level != 0 ||
  875                     curpcb->pcb_onfault == NULL)) {
  876                         trap_fatal(frame, eva);
  877                         return (-1);
  878                 }
  879         }
  880 
  881         /*
  882          * PGEX_I is defined only if the execute disable bit capability is
  883          * supported and enabled.
  884          */
  885         if (frame->tf_err & PGEX_W)
  886                 ftype = VM_PROT_WRITE;
  887 #if defined(PAE) || defined(PAE_TABLES)
  888         else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
  889                 ftype = VM_PROT_EXECUTE;
  890 #endif
  891         else
  892                 ftype = VM_PROT_READ;
  893 
  894         if (map != kernel_map) {
  895                 /*
  896                  * Keep swapout from messing with us during this
  897                  *      critical time.
  898                  */
  899                 PROC_LOCK(p);
  900                 ++p->p_lock;
  901                 PROC_UNLOCK(p);
  902 
  903                 /* Fault in the user page: */
  904                 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
  905 
  906                 PROC_LOCK(p);
  907                 --p->p_lock;
  908                 PROC_UNLOCK(p);
  909         } else {
  910                 /*
  911                  * Don't have to worry about process locking or stacks in the
  912                  * kernel.
  913                  */
  914                 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
  915         }
  916         if (rv == KERN_SUCCESS) {
  917 #ifdef HWPMC_HOOKS
  918                 if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
  919                         PMC_SOFT_CALL_TF( , , page_fault, all, frame);
  920                         if (ftype == VM_PROT_READ)
  921                                 PMC_SOFT_CALL_TF( , , page_fault, read,
  922                                     frame);
  923                         else
  924                                 PMC_SOFT_CALL_TF( , , page_fault, write,
  925                                     frame);
  926                 }
  927 #endif
  928                 return (0);
  929         }
  930 nogo:
  931         if (!usermode) {
  932                 if (td->td_intr_nesting_level == 0 &&
  933                     curpcb->pcb_onfault != NULL) {
  934                         frame->tf_eip = (int)curpcb->pcb_onfault;
  935                         return (0);
  936                 }
  937                 trap_fatal(frame, eva);
  938                 return (-1);
  939         }
  940         return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
  941 }
  942 
  943 static void
  944 trap_fatal(frame, eva)
  945         struct trapframe *frame;
  946         vm_offset_t eva;
  947 {
  948         int code, ss, esp;
  949         u_int type;
  950         struct soft_segment_descriptor softseg;
  951         char *msg;
  952 
  953         code = frame->tf_err;
  954         type = frame->tf_trapno;
  955         sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
  956 
  957         if (type <= MAX_TRAP_MSG)
  958                 msg = trap_msg[type];
  959         else
  960                 msg = "UNKNOWN";
  961         printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
  962             frame->tf_eflags & PSL_VM ? "vm86" :
  963             ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
  964 #ifdef SMP
  965         /* two separate prints in case of a trap on an unmapped page */
  966         printf("cpuid = %d; ", PCPU_GET(cpuid));
  967         printf("apic id = %02x\n", PCPU_GET(apic_id));
  968 #endif
  969         if (type == T_PAGEFLT) {
  970                 printf("fault virtual address   = 0x%x\n", eva);
  971                 printf("fault code              = %s %s, %s\n",
  972                         code & PGEX_U ? "user" : "supervisor",
  973                         code & PGEX_W ? "write" : "read",
  974                         code & PGEX_P ? "protection violation" : "page not present");
  975         }
  976         printf("instruction pointer     = 0x%x:0x%x\n",
  977                frame->tf_cs & 0xffff, frame->tf_eip);
  978         if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
  979                 ss = frame->tf_ss & 0xffff;
  980                 esp = frame->tf_esp;
  981         } else {
  982                 ss = GSEL(GDATA_SEL, SEL_KPL);
  983                 esp = (int)&frame->tf_esp;
  984         }
  985         printf("stack pointer           = 0x%x:0x%x\n", ss, esp);
  986         printf("frame pointer           = 0x%x:0x%x\n", ss, frame->tf_ebp);
  987         printf("code segment            = base 0x%x, limit 0x%x, type 0x%x\n",
  988                softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
  989         printf("                        = DPL %d, pres %d, def32 %d, gran %d\n",
  990                softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
  991                softseg.ssd_gran);
  992         printf("processor eflags        = ");
  993         if (frame->tf_eflags & PSL_T)
  994                 printf("trace trap, ");
  995         if (frame->tf_eflags & PSL_I)
  996                 printf("interrupt enabled, ");
  997         if (frame->tf_eflags & PSL_NT)
  998                 printf("nested task, ");
  999         if (frame->tf_eflags & PSL_RF)
 1000                 printf("resume, ");
 1001         if (frame->tf_eflags & PSL_VM)
 1002                 printf("vm86, ");
 1003         printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
 1004         printf("current process         = %d (%s)\n",
 1005             curproc->p_pid, curthread->td_name);
 1006 
 1007 #ifdef KDB
 1008         if (debugger_on_panic || kdb_active) {
 1009                 frame->tf_err = eva;    /* smuggle fault address to ddb */
 1010                 if (kdb_trap(type, 0, frame)) {
 1011                         frame->tf_err = code;   /* restore error code */
 1012                         return;
 1013                 }
 1014                 frame->tf_err = code;           /* restore error code */
 1015         }
 1016 #endif
 1017         printf("trap number             = %d\n", type);
 1018         if (type <= MAX_TRAP_MSG)
 1019                 panic("%s", trap_msg[type]);
 1020         else
 1021                 panic("unknown/reserved trap");
 1022 }
 1023 
 1024 /*
 1025  * Double fault handler. Called when a fault occurs while writing
 1026  * a frame for a trap/exception onto the stack. This usually occurs
 1027  * when the stack overflows (such is the case with infinite recursion,
 1028  * for example).
 1029  *
 1030  * XXX Note that the current PTD gets replaced by IdlePTD when the
 1031  * task switch occurs. This means that the stack that was active at
 1032  * the time of the double fault is not available at <kstack> unless
 1033  * the machine was idle when the double fault occurred. The downside
 1034  * of this is that "trace <ebp>" in ddb won't work.
 1035  */
 1036 void
 1037 dblfault_handler()
 1038 {
 1039 #ifdef KDTRACE_HOOKS
 1040         if (dtrace_doubletrap_func != NULL)
 1041                 (*dtrace_doubletrap_func)();
 1042 #endif
 1043         printf("\nFatal double fault:\n");
 1044         printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
 1045         printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
 1046         printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
 1047 #ifdef SMP
 1048         /* two separate prints in case of a trap on an unmapped page */
 1049         printf("cpuid = %d; ", PCPU_GET(cpuid));
 1050         printf("apic id = %02x\n", PCPU_GET(apic_id));
 1051 #endif
 1052         panic("double fault");
 1053 }
 1054 
 1055 int
 1056 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 1057 {
 1058         struct proc *p;
 1059         struct trapframe *frame;
 1060         caddr_t params;
 1061         long tmp;
 1062         int error;
 1063 
 1064         p = td->td_proc;
 1065         frame = td->td_frame;
 1066 
 1067         params = (caddr_t)frame->tf_esp + sizeof(int);
 1068         sa->code = frame->tf_eax;
 1069 
 1070         /*
 1071          * Need to check if this is a 32 bit or 64 bit syscall.
 1072          */
 1073         if (sa->code == SYS_syscall) {
 1074                 /*
 1075                  * Code is first argument, followed by actual args.
 1076                  */
 1077                 error = fueword(params, &tmp);
 1078                 if (error == -1)
 1079                         return (EFAULT);
 1080                 sa->code = tmp;
 1081                 params += sizeof(int);
 1082         } else if (sa->code == SYS___syscall) {
 1083                 /*
 1084                  * Like syscall, but code is a quad, so as to maintain
 1085                  * quad alignment for the rest of the arguments.
 1086                  */
 1087                 error = fueword(params, &tmp);
 1088                 if (error == -1)
 1089                         return (EFAULT);
 1090                 sa->code = tmp;
 1091                 params += sizeof(quad_t);
 1092         }
 1093 
 1094         if (p->p_sysent->sv_mask)
 1095                 sa->code &= p->p_sysent->sv_mask;
 1096         if (sa->code >= p->p_sysent->sv_size)
 1097                 sa->callp = &p->p_sysent->sv_table[0];
 1098         else
 1099                 sa->callp = &p->p_sysent->sv_table[sa->code];
 1100         sa->narg = sa->callp->sy_narg;
 1101 
 1102         if (params != NULL && sa->narg != 0)
 1103                 error = copyin(params, (caddr_t)sa->args,
 1104                     (u_int)(sa->narg * sizeof(int)));
 1105         else
 1106                 error = 0;
 1107 
 1108         if (error == 0) {
 1109                 td->td_retval[0] = 0;
 1110                 td->td_retval[1] = frame->tf_edx;
 1111         }
 1112                 
 1113         return (error);
 1114 }
 1115 
 1116 #include "../../kern/subr_syscall.c"
 1117 
 1118 /*
 1119  * syscall - system call request C handler.  A system call is
 1120  * essentially treated as a trap by reusing the frame layout.
 1121  */
 1122 void
 1123 syscall(struct trapframe *frame)
 1124 {
 1125         struct thread *td;
 1126         struct syscall_args sa;
 1127         register_t orig_tf_eflags;
 1128         int error;
 1129         ksiginfo_t ksi;
 1130 
 1131 #ifdef DIAGNOSTIC
 1132         if (ISPL(frame->tf_cs) != SEL_UPL) {
 1133                 panic("syscall");
 1134                 /* NOT REACHED */
 1135         }
 1136 #endif
 1137         orig_tf_eflags = frame->tf_eflags;
 1138 
 1139         td = curthread;
 1140         td->td_frame = frame;
 1141 
 1142         error = syscallenter(td, &sa);
 1143 
 1144         /*
 1145          * Traced syscall.
 1146          */
 1147         if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
 1148                 frame->tf_eflags &= ~PSL_T;
 1149                 ksiginfo_init_trap(&ksi);
 1150                 ksi.ksi_signo = SIGTRAP;
 1151                 ksi.ksi_code = TRAP_TRACE;
 1152                 ksi.ksi_addr = (void *)frame->tf_eip;
 1153                 trapsignal(td, &ksi);
 1154         }
 1155 
 1156         KASSERT(PCB_USER_FPU(td->td_pcb),
 1157             ("System call %s returning with kernel FPU ctx leaked",
 1158              syscallname(td->td_proc, sa.code)));
 1159         KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
 1160             ("System call %s returning with mangled pcb_save",
 1161              syscallname(td->td_proc, sa.code)));
 1162 
 1163         syscallret(td, error, &sa);
 1164 }

Cache object: 715094abba2cbbb9f499d3269165e8a3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.