The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/amd64/amd64/mca.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2009 Hudson River Trading LLC
    3  * Written by: John H. Baldwin <jhb@FreeBSD.org>
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * Support for x86 machine check architecture.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include <sys/param.h>
   36 #include <sys/bus.h>
   37 #include <sys/interrupt.h>
   38 #include <sys/kernel.h>
   39 #include <sys/lock.h>
   40 #include <sys/malloc.h>
   41 #include <sys/mutex.h>
   42 #include <sys/proc.h>
   43 #include <sys/sched.h>
   44 #include <sys/smp.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/systm.h>
   47 #include <sys/taskqueue.h>
   48 #include <machine/intr_machdep.h>
   49 #include <machine/apicvar.h>
   50 #include <machine/cputypes.h>
   51 #include <machine/mca.h>
   52 #include <machine/md_var.h>
   53 #include <machine/specialreg.h>
   54 
   55 /* Modes for mca_scan() */
   56 enum scan_mode {
   57         POLLED,
   58         MCE,
   59         CMCI,
   60 };
   61 
   62 /*
   63  * State maintained for each monitored MCx bank to control the
   64  * corrected machine check interrupt threshold.
   65  */
   66 struct cmc_state {
   67         int     max_threshold;
   68         int     last_intr;
   69 };
   70 
   71 struct mca_internal {
   72         struct mca_record rec;
   73         int             logged;
   74         STAILQ_ENTRY(mca_internal) link;
   75 };
   76 
   77 static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
   78 
   79 static int mca_count;           /* Number of records stored. */
   80 
   81 SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture");
   82 
   83 static int mca_enabled = 1;
   84 TUNABLE_INT("hw.mca.enabled", &mca_enabled);
   85 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
   86     "Administrative toggle for machine check support");
   87 
   88 static int amd10h_L1TP = 1;
   89 TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
   90 SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
   91     "Administrative toggle for logging of level one TLB parity (L1TP) errors");
   92 
   93 int workaround_erratum383;
   94 SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
   95     "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
   96 
   97 static STAILQ_HEAD(, mca_internal) mca_records;
   98 static struct callout mca_timer;
   99 static int mca_ticks = 3600;    /* Check hourly by default. */
  100 static struct task mca_task;
  101 static struct mtx mca_lock;
  102 static struct cmc_state **cmc_state;    /* Indexed by cpuid, bank */
  103 static int cmc_banks;
  104 static int cmc_throttle = 60;   /* Time in seconds to throttle CMCI. */
  105 
  106 static int
  107 sysctl_positive_int(SYSCTL_HANDLER_ARGS)
  108 {
  109         int error, value;
  110 
  111         value = *(int *)arg1;
  112         error = sysctl_handle_int(oidp, &value, 0, req);
  113         if (error || req->newptr == NULL)
  114                 return (error);
  115         if (value <= 0)
  116                 return (EINVAL);
  117         *(int *)arg1 = value;
  118         return (0);
  119 }
  120 
  121 static int
  122 sysctl_mca_records(SYSCTL_HANDLER_ARGS)
  123 {
  124         int *name = (int *)arg1;
  125         u_int namelen = arg2;
  126         struct mca_record record;
  127         struct mca_internal *rec;
  128         int i;
  129 
  130         if (namelen != 1)
  131                 return (EINVAL);
  132 
  133         if (name[0] < 0 || name[0] >= mca_count)
  134                 return (EINVAL);
  135 
  136         mtx_lock_spin(&mca_lock);
  137         if (name[0] >= mca_count) {
  138                 mtx_unlock_spin(&mca_lock);
  139                 return (EINVAL);
  140         }
  141         i = 0;
  142         STAILQ_FOREACH(rec, &mca_records, link) {
  143                 if (i == name[0]) {
  144                         record = rec->rec;
  145                         break;
  146                 }
  147                 i++;
  148         }
  149         mtx_unlock_spin(&mca_lock);
  150         return (SYSCTL_OUT(req, &record, sizeof(record)));
  151 }
  152 
  153 static const char *
  154 mca_error_ttype(uint16_t mca_error)
  155 {
  156 
  157         switch ((mca_error & 0x000c) >> 2) {
  158         case 0:
  159                 return ("I");
  160         case 1:
  161                 return ("D");
  162         case 2:
  163                 return ("G");
  164         }
  165         return ("?");
  166 }
  167 
  168 static const char *
  169 mca_error_level(uint16_t mca_error)
  170 {
  171 
  172         switch (mca_error & 0x0003) {
  173         case 0:
  174                 return ("L0");
  175         case 1:
  176                 return ("L1");
  177         case 2:
  178                 return ("L2");
  179         case 3:
  180                 return ("LG");
  181         }
  182         return ("L?");
  183 }
  184 
  185 static const char *
  186 mca_error_request(uint16_t mca_error)
  187 {
  188 
  189         switch ((mca_error & 0x00f0) >> 4) {
  190         case 0x0:
  191                 return ("ERR");
  192         case 0x1:
  193                 return ("RD");
  194         case 0x2:
  195                 return ("WR");
  196         case 0x3:
  197                 return ("DRD");
  198         case 0x4:
  199                 return ("DWR");
  200         case 0x5:
  201                 return ("IRD");
  202         case 0x6:
  203                 return ("PREFETCH");
  204         case 0x7:
  205                 return ("EVICT");
  206         case 0x8:
  207                 return ("SNOOP");
  208         }
  209         return ("???");
  210 }
  211 
  212 static const char *
  213 mca_error_mmtype(uint16_t mca_error)
  214 {
  215 
  216         switch ((mca_error & 0x70) >> 4) {
  217         case 0x0:
  218                 return ("GEN");
  219         case 0x1:
  220                 return ("RD");
  221         case 0x2:
  222                 return ("WR");
  223         case 0x3:
  224                 return ("AC");
  225         case 0x4:
  226                 return ("MS");
  227         }
  228         return ("???");
  229 }
  230 
  231 /* Dump details about a single machine check. */
  232 static void __nonnull(1)
  233 mca_log(const struct mca_record *rec)
  234 {
  235         uint16_t mca_error;
  236 
  237         printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
  238             (long long)rec->mr_status);
  239         printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
  240             (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
  241         printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
  242             rec->mr_cpu_id, rec->mr_apic_id);
  243         printf("MCA: CPU %d ", rec->mr_cpu);
  244         if (rec->mr_status & MC_STATUS_UC)
  245                 printf("UNCOR ");
  246         else {
  247                 printf("COR ");
  248                 if (rec->mr_mcg_cap & MCG_CAP_CMCI_P)
  249                         printf("(%lld) ", ((long long)rec->mr_status &
  250                             MC_STATUS_COR_COUNT) >> 38);
  251         }
  252         if (rec->mr_status & MC_STATUS_PCC)
  253                 printf("PCC ");
  254         if (rec->mr_status & MC_STATUS_OVER)
  255                 printf("OVER ");
  256         mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
  257         switch (mca_error) {
  258                 /* Simple error codes. */
  259         case 0x0000:
  260                 printf("no error");
  261                 break;
  262         case 0x0001:
  263                 printf("unclassified error");
  264                 break;
  265         case 0x0002:
  266                 printf("ucode ROM parity error");
  267                 break;
  268         case 0x0003:
  269                 printf("external error");
  270                 break;
  271         case 0x0004:
  272                 printf("FRC error");
  273                 break;
  274         case 0x0005:
  275                 printf("internal parity error");
  276                 break;
  277         case 0x0400:
  278                 printf("internal timer error");
  279                 break;
  280         default:
  281                 if ((mca_error & 0xfc00) == 0x0400) {
  282                         printf("internal error %x", mca_error & 0x03ff);
  283                         break;
  284                 }
  285 
  286                 /* Compound error codes. */
  287 
  288                 /* Memory hierarchy error. */
  289                 if ((mca_error & 0xeffc) == 0x000c) {
  290                         printf("%s memory error", mca_error_level(mca_error));
  291                         break;
  292                 }
  293 
  294                 /* TLB error. */
  295                 if ((mca_error & 0xeff0) == 0x0010) {
  296                         printf("%sTLB %s error", mca_error_ttype(mca_error),
  297                             mca_error_level(mca_error));
  298                         break;
  299                 }
  300 
  301                 /* Memory controller error. */
  302                 if ((mca_error & 0xef80) == 0x0080) {
  303                         printf("%s channel ", mca_error_mmtype(mca_error));
  304                         if ((mca_error & 0x000f) != 0x000f)
  305                                 printf("%d", mca_error & 0x000f);
  306                         else
  307                                 printf("??");
  308                         printf(" memory error");
  309                         break;
  310                 }
  311                 
  312                 /* Cache error. */
  313                 if ((mca_error & 0xef00) == 0x0100) {
  314                         printf("%sCACHE %s %s error",
  315                             mca_error_ttype(mca_error),
  316                             mca_error_level(mca_error),
  317                             mca_error_request(mca_error));
  318                         break;
  319                 }
  320 
  321                 /* Bus and/or Interconnect error. */
  322                 if ((mca_error & 0xe800) == 0x0800) {                   
  323                         printf("BUS%s ", mca_error_level(mca_error));
  324                         switch ((mca_error & 0x0600) >> 9) {
  325                         case 0:
  326                                 printf("Source");
  327                                 break;
  328                         case 1:
  329                                 printf("Responder");
  330                                 break;
  331                         case 2:
  332                                 printf("Observer");
  333                                 break;
  334                         default:
  335                                 printf("???");
  336                                 break;
  337                         }
  338                         printf(" %s ", mca_error_request(mca_error));
  339                         switch ((mca_error & 0x000c) >> 2) {
  340                         case 0:
  341                                 printf("Memory");
  342                                 break;
  343                         case 2:
  344                                 printf("I/O");
  345                                 break;
  346                         case 3:
  347                                 printf("Other");
  348                                 break;
  349                         default:
  350                                 printf("???");
  351                                 break;
  352                         }
  353                         if (mca_error & 0x0100)
  354                                 printf(" timed out");
  355                         break;
  356                 }
  357 
  358                 printf("unknown error %x", mca_error);
  359                 break;
  360         }
  361         printf("\n");
  362         if (rec->mr_status & MC_STATUS_ADDRV)
  363                 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
  364         if (rec->mr_status & MC_STATUS_MISCV)
  365                 printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
  366 }
  367 
  368 static int __nonnull(2)
  369 mca_check_status(int bank, struct mca_record *rec)
  370 {
  371         uint64_t status;
  372         u_int p[4];
  373 
  374         status = rdmsr(MSR_MC_STATUS(bank));
  375         if (!(status & MC_STATUS_VAL))
  376                 return (0);
  377 
  378         /* Save exception information. */
  379         rec->mr_status = status;
  380         rec->mr_bank = bank;
  381         rec->mr_addr = 0;
  382         if (status & MC_STATUS_ADDRV)
  383                 rec->mr_addr = rdmsr(MSR_MC_ADDR(bank));
  384         rec->mr_misc = 0;
  385         if (status & MC_STATUS_MISCV)
  386                 rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
  387         rec->mr_tsc = rdtsc();
  388         rec->mr_apic_id = PCPU_GET(apic_id);
  389         rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
  390         rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
  391         rec->mr_cpu_id = cpu_id;
  392         rec->mr_cpu_vendor_id = cpu_vendor_id;
  393         rec->mr_cpu = PCPU_GET(cpuid);
  394 
  395         /*
  396          * Clear machine check.  Don't do this for uncorrectable
  397          * errors so that the BIOS can see them.
  398          */
  399         if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
  400                 wrmsr(MSR_MC_STATUS(bank), 0);
  401                 do_cpuid(0, p);
  402         }
  403         return (1);
  404 }
  405 
  406 static void __nonnull(1)
  407 mca_record_entry(const struct mca_record *record)
  408 {
  409         struct mca_internal *rec;
  410 
  411         rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT);
  412         if (rec == NULL) {
  413                 printf("MCA: Unable to allocate space for an event.\n");
  414                 mca_log(record);
  415                 return;
  416         }
  417 
  418         rec->rec = *record;
  419         rec->logged = 0;
  420         mtx_lock_spin(&mca_lock);
  421         STAILQ_INSERT_TAIL(&mca_records, rec, link);
  422         mca_count++;
  423         mtx_unlock_spin(&mca_lock);
  424 }
  425 
  426 /*
  427  * Update the interrupt threshold for a CMCI.  The strategy is to use
  428  * a low trigger that interrupts as soon as the first event occurs.
  429  * However, if a steady stream of events arrive, the threshold is
  430  * increased until the interrupts are throttled to once every
  431  * cmc_throttle seconds or the periodic scan.  If a periodic scan
  432  * finds that the threshold is too high, it is lowered.
  433  */
  434 static void
  435 cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec)
  436 {
  437         struct cmc_state *cc;
  438         uint64_t ctl;
  439         u_int delta;
  440         int count, limit;
  441 
  442         /* Fetch the current limit for this bank. */
  443         cc = &cmc_state[PCPU_GET(cpuid)][bank];
  444         ctl = rdmsr(MSR_MC_CTL2(bank));
  445         count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
  446         delta = (u_int)(ticks - cc->last_intr);
  447 
  448         /*
  449          * If an interrupt was received less than cmc_throttle seconds
  450          * since the previous interrupt and the count from the current
  451          * event is greater than or equal to the current threshold,
  452          * double the threshold up to the max.
  453          */
  454         if (mode == CMCI && valid) {
  455                 limit = ctl & MC_CTL2_THRESHOLD;
  456                 if (delta < cmc_throttle && count >= limit &&
  457                     limit < cc->max_threshold) {
  458                         limit = min(limit << 1, cc->max_threshold);
  459                         ctl &= ~MC_CTL2_THRESHOLD;
  460                         ctl |= limit;
  461                         wrmsr(MSR_MC_CTL2(bank), limit);
  462                 }
  463                 cc->last_intr = ticks;
  464                 return;
  465         }
  466 
  467         /*
  468          * When the banks are polled, check to see if the threshold
  469          * should be lowered.
  470          */
  471         if (mode != POLLED)
  472                 return;
  473 
  474         /* If a CMCI occured recently, do nothing for now. */
  475         if (delta < cmc_throttle)
  476                 return;
  477 
  478         /*
  479          * Compute a new limit based on the average rate of events per
  480          * cmc_throttle seconds since the last interrupt.
  481          */
  482         if (valid) {
  483                 count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
  484                 limit = count * cmc_throttle / delta;
  485                 if (limit <= 0)
  486                         limit = 1;
  487                 else if (limit > cc->max_threshold)
  488                         limit = cc->max_threshold;
  489         } else
  490                 limit = 1;
  491         if ((ctl & MC_CTL2_THRESHOLD) != limit) {
  492                 ctl &= ~MC_CTL2_THRESHOLD;
  493                 ctl |= limit;
  494                 wrmsr(MSR_MC_CTL2(bank), limit);
  495         }
  496 }
  497 
  498 /*
  499  * This scans all the machine check banks of the current CPU to see if
  500  * there are any machine checks.  Any non-recoverable errors are
  501  * reported immediately via mca_log().  The current thread must be
  502  * pinned when this is called.  The 'mode' parameter indicates if we
  503  * are being called from the MC exception handler, the CMCI handler,
  504  * or the periodic poller.  In the MC exception case this function
  505  * returns true if the system is restartable.  Otherwise, it returns a
  506  * count of the number of valid MC records found.
  507  */
  508 static int
  509 mca_scan(enum scan_mode mode)
  510 {
  511         struct mca_record rec;
  512         uint64_t mcg_cap, ucmask;
  513         int count, i, recoverable, valid;
  514 
  515         count = 0;
  516         recoverable = 1;
  517         ucmask = MC_STATUS_UC | MC_STATUS_PCC;
  518 
  519         /* When handling a MCE#, treat the OVER flag as non-restartable. */
  520         if (mode == MCE)
  521                 ucmask |= MC_STATUS_OVER;
  522         mcg_cap = rdmsr(MSR_MCG_CAP);
  523         for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
  524                 /*
  525                  * For a CMCI, only check banks this CPU is
  526                  * responsible for.
  527                  */
  528                 if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i))
  529                         continue;
  530 
  531                 valid = mca_check_status(i, &rec);
  532                 if (valid) {
  533                         count++;
  534                         if (rec.mr_status & ucmask) {
  535                                 recoverable = 0;
  536                                 mca_log(&rec);
  537                         }
  538                         mca_record_entry(&rec);
  539                 }
  540         
  541                 /*
  542                  * If this is a bank this CPU monitors via CMCI,
  543                  * update the threshold.
  544                  */
  545                 if (PCPU_GET(cmci_mask) & (1 << i))
  546                         cmci_update(mode, i, valid, &rec);
  547         }
  548         return (mode == MCE ? recoverable : count);
  549 }
  550 
  551 /*
  552  * Scan the machine check banks on all CPUs by binding to each CPU in
  553  * turn.  If any of the CPUs contained new machine check records, log
  554  * them to the console.
  555  */
  556 static void
  557 mca_scan_cpus(void *context, int pending)
  558 {
  559         struct mca_internal *mca;
  560         struct thread *td;
  561         int count, cpu;
  562 
  563         td = curthread;
  564         count = 0;
  565         thread_lock(td);
  566         for (cpu = 0; cpu <= mp_maxid; cpu++) {
  567                 if (CPU_ABSENT(cpu))
  568                         continue;
  569                 sched_bind(td, cpu);
  570                 thread_unlock(td);
  571                 count += mca_scan(POLLED);
  572                 thread_lock(td);
  573                 sched_unbind(td);
  574         }
  575         thread_unlock(td);
  576         if (count != 0) {
  577                 mtx_lock_spin(&mca_lock);
  578                 STAILQ_FOREACH(mca, &mca_records, link) {
  579                         if (!mca->logged) {
  580                                 mca->logged = 1;
  581                                 mtx_unlock_spin(&mca_lock);
  582                                 mca_log(&mca->rec);
  583                                 mtx_lock_spin(&mca_lock);
  584                         }
  585                 }
  586                 mtx_unlock_spin(&mca_lock);
  587         }
  588 }
  589 
  590 static void
  591 mca_periodic_scan(void *arg)
  592 {
  593 
  594         taskqueue_enqueue(taskqueue_thread, &mca_task);
  595         callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
  596 }
  597 
  598 static int
  599 sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
  600 {
  601         int error, i;
  602 
  603         i = 0;
  604         error = sysctl_handle_int(oidp, &i, 0, req);
  605         if (error)
  606                 return (error);
  607         if (i)
  608                 taskqueue_enqueue(taskqueue_thread, &mca_task);
  609         return (0);
  610 }
  611 
  612 static void
  613 mca_startup(void *dummy)
  614 {
  615 
  616         if (!mca_enabled || !(cpu_feature & CPUID_MCA))
  617                 return;
  618 
  619         callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan,
  620                     NULL);
  621 }
  622 SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
  623 
  624 static void
  625 cmci_setup(uint64_t mcg_cap)
  626 {
  627         int i;
  628 
  629         cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state **),
  630             M_MCA, M_WAITOK);
  631         cmc_banks = mcg_cap & MCG_CAP_COUNT;
  632         for (i = 0; i <= mp_maxid; i++)
  633                 cmc_state[i] = malloc(sizeof(struct cmc_state) * cmc_banks,
  634                     M_MCA, M_WAITOK | M_ZERO);
  635         SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
  636             "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
  637             &cmc_throttle, 0, sysctl_positive_int, "I",
  638             "Interval in seconds to throttle corrected MC interrupts");
  639 }
  640 
  641 static void
  642 mca_setup(uint64_t mcg_cap)
  643 {
  644 
  645         /*
  646          * On AMD Family 10h processors, unless logging of level one TLB
  647          * parity (L1TP) errors is disabled, enable the recommended workaround
  648          * for Erratum 383.
  649          */
  650         if (cpu_vendor_id == CPU_VENDOR_AMD &&
  651             CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
  652                 workaround_erratum383 = 1;
  653 
  654         mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
  655         STAILQ_INIT(&mca_records);
  656         TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL);
  657         callout_init(&mca_timer, CALLOUT_MPSAFE);
  658         SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
  659             "count", CTLFLAG_RD, &mca_count, 0, "Record count");
  660         SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
  661             "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
  662             0, sysctl_positive_int, "I",
  663             "Periodic interval in seconds to scan for machine checks");
  664         SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
  665             "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
  666         SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
  667             "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
  668             sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
  669         if (mcg_cap & MCG_CAP_CMCI_P)
  670                 cmci_setup(mcg_cap);
  671 }
  672 
  673 /*
  674  * See if we should monitor CMCI for this bank.  If CMCI_EN is already
  675  * set in MC_CTL2, then another CPU is responsible for this bank, so
  676  * ignore it.  If CMCI_EN returns zero after being set, then this bank
  677  * does not support CMCI_EN.  If this CPU sets CMCI_EN, then it should
  678  * now monitor this bank.
  679  */
  680 static void
  681 cmci_monitor(int i)
  682 {
  683         struct cmc_state *cc;
  684         uint64_t ctl;
  685 
  686         KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
  687 
  688         ctl = rdmsr(MSR_MC_CTL2(i));
  689         if (ctl & MC_CTL2_CMCI_EN)
  690                 /* Already monitored by another CPU. */
  691                 return;
  692 
  693         /* Set the threshold to one event for now. */
  694         ctl &= ~MC_CTL2_THRESHOLD;
  695         ctl |= MC_CTL2_CMCI_EN | 1;
  696         wrmsr(MSR_MC_CTL2(i), ctl);
  697         ctl = rdmsr(MSR_MC_CTL2(i));
  698         if (!(ctl & MC_CTL2_CMCI_EN))
  699                 /* This bank does not support CMCI. */
  700                 return;
  701 
  702         cc = &cmc_state[PCPU_GET(cpuid)][i];
  703 
  704         /* Determine maximum threshold. */
  705         ctl &= ~MC_CTL2_THRESHOLD;
  706         ctl |= 0x7fff;
  707         wrmsr(MSR_MC_CTL2(i), ctl);
  708         ctl = rdmsr(MSR_MC_CTL2(i));
  709         cc->max_threshold = ctl & MC_CTL2_THRESHOLD;
  710 
  711         /* Start off with a threshold of 1. */
  712         ctl &= ~MC_CTL2_THRESHOLD;
  713         ctl |= 1;
  714         wrmsr(MSR_MC_CTL2(i), ctl);
  715 
  716         /* Mark this bank as monitored. */
  717         PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i);
  718 }
  719 
  720 /*
  721  * For resume, reset the threshold for any banks we monitor back to
  722  * one and throw away the timestamp of the last interrupt.
  723  */
  724 static void
  725 cmci_resume(int i)
  726 {
  727         struct cmc_state *cc;
  728         uint64_t ctl;
  729 
  730         KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
  731 
  732         /* Ignore banks not monitored by this CPU. */
  733         if (!(PCPU_GET(cmci_mask) & 1 << i))
  734                 return;
  735 
  736         cc = &cmc_state[PCPU_GET(cpuid)][i];
  737         cc->last_intr = -ticks;
  738         ctl = rdmsr(MSR_MC_CTL2(i));
  739         ctl &= ~MC_CTL2_THRESHOLD;
  740         ctl |= MC_CTL2_CMCI_EN | 1;
  741         wrmsr(MSR_MC_CTL2(i), ctl);
  742 }
  743 
  744 /*
  745  * Initializes per-CPU machine check registers and enables corrected
  746  * machine check interrupts.
  747  */
  748 static void
  749 _mca_init(int boot)
  750 {
  751         uint64_t mcg_cap;
  752         uint64_t ctl, mask;
  753         int i, skip;
  754 
  755         /* MCE is required. */
  756         if (!mca_enabled || !(cpu_feature & CPUID_MCE))
  757                 return;
  758 
  759         if (cpu_feature & CPUID_MCA) {
  760                 if (boot)
  761                         PCPU_SET(cmci_mask, 0);
  762 
  763                 mcg_cap = rdmsr(MSR_MCG_CAP);
  764                 if (mcg_cap & MCG_CAP_CTL_P)
  765                         /* Enable MCA features. */
  766                         wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
  767                 if (PCPU_GET(cpuid) == 0 && boot)
  768                         mca_setup(mcg_cap);
  769 
  770                 /*
  771                  * Disable logging of level one TLB parity (L1TP) errors by
  772                  * the data cache as an alternative workaround for AMD Family
  773                  * 10h Erratum 383.  Unlike the recommended workaround, there
  774                  * is no performance penalty to this workaround.  However,
  775                  * L1TP errors will go unreported.
  776                  */
  777                 if (cpu_vendor_id == CPU_VENDOR_AMD &&
  778                     CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
  779                         mask = rdmsr(MSR_MC0_CTL_MASK);
  780                         if ((mask & (1UL << 5)) == 0)
  781                                 wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
  782                 }
  783                 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
  784                         /* By default enable logging of all errors. */
  785                         ctl = 0xffffffffffffffffUL;
  786                         skip = 0;
  787 
  788                         if (cpu_vendor_id == CPU_VENDOR_INTEL) {
  789                                 /*
  790                                  * For P6 models before Nehalem MC0_CTL is
  791                                  * always enabled and reserved.
  792                                  */
  793                                 if (i == 0 && CPUID_TO_FAMILY(cpu_id) == 0x6
  794                                     && CPUID_TO_MODEL(cpu_id) < 0x1a)
  795                                         skip = 1;
  796                         } else if (cpu_vendor_id == CPU_VENDOR_AMD) {
  797                                 /* BKDG for Family 10h: unset GartTblWkEn. */
  798                                 if (i == 4 && CPUID_TO_FAMILY(cpu_id) >= 0xf)
  799                                         ctl &= ~(1UL << 10);
  800                         }
  801 
  802                         if (!skip)
  803                                 wrmsr(MSR_MC_CTL(i), ctl);
  804 
  805                         if (mcg_cap & MCG_CAP_CMCI_P) {
  806                                 if (boot)
  807                                         cmci_monitor(i);
  808                                 else
  809                                         cmci_resume(i);
  810                         }
  811 
  812                         /* Clear all errors. */
  813                         wrmsr(MSR_MC_STATUS(i), 0);
  814                 }
  815 
  816                 if (PCPU_GET(cmci_mask) != 0 && boot)
  817                         lapic_enable_cmc();
  818         }
  819 
  820         load_cr4(rcr4() | CR4_MCE);
  821 }
  822 
  823 /* Must be executed on each CPU during boot. */
  824 void
  825 mca_init(void)
  826 {
  827 
  828         _mca_init(1);
  829 }
  830 
  831 /* Must be executed on each CPU during resume. */
  832 void
  833 mca_resume(void)
  834 {
  835 
  836         _mca_init(0);
  837 }
  838 
  839 /*
  840  * The machine check registers for the BSP cannot be initialized until
  841  * the local APIC is initialized.  This happens at SI_SUB_CPU,
  842  * SI_ORDER_SECOND.
  843  */
  844 static void
  845 mca_init_bsp(void *arg __unused)
  846 {
  847 
  848         mca_init();
  849 }
  850 SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL);
  851 
  852 /* Called when a machine check exception fires. */
  853 int
  854 mca_intr(void)
  855 {
  856         uint64_t mcg_status;
  857         int recoverable;
  858 
  859         if (!(cpu_feature & CPUID_MCA)) {
  860                 /*
  861                  * Just print the values of the old Pentium registers
  862                  * and panic.
  863                  */
  864                 printf("MC Type: 0x%lx  Address: 0x%lx\n",
  865                     rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR));
  866                 return (0);
  867         }
  868 
  869         /* Scan the banks and check for any non-recoverable errors. */
  870         recoverable = mca_scan(MCE);
  871         mcg_status = rdmsr(MSR_MCG_STATUS);
  872         if (!(mcg_status & MCG_STATUS_RIPV))
  873                 recoverable = 0;
  874 
  875         /* Clear MCIP. */
  876         wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
  877         return (recoverable);
  878 }
  879 
  880 /* Called for a CMCI (correctable machine check interrupt). */
  881 void
  882 cmc_intr(void)
  883 {
  884         struct mca_internal *mca;
  885         int count;
  886 
  887         /*
  888          * Serialize MCA bank scanning to prevent collisions from
  889          * sibling threads.
  890          */
  891         count = mca_scan(CMCI);
  892 
  893         /* If we found anything, log them to the console. */
  894         if (count != 0) {
  895                 mtx_lock_spin(&mca_lock);
  896                 STAILQ_FOREACH(mca, &mca_records, link) {
  897                         if (!mca->logged) {
  898                                 mca->logged = 1;
  899                                 mtx_unlock_spin(&mca_lock);
  900                                 mca_log(&mca->rec);
  901                                 mtx_lock_spin(&mca_lock);
  902                         }
  903                 }
  904                 mtx_unlock_spin(&mca_lock);
  905         }
  906 }

Cache object: bf5f7e292a9e3f6affb1ce0a83bcea3b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.