FreeBSD/Linux Kernel Cross Reference
sys/i386/i386/mca.c
1 /*-
2 * Copyright (c) 2009 Hudson River Trading LLC
3 * Written by: John H. Baldwin <jhb@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 /*
29 * Support for x86 machine check architecture.
30 */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_apic.h"
36
37 #include <sys/param.h>
38 #include <sys/bus.h>
39 #include <sys/interrupt.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/sysctl.h>
48 #include <sys/systm.h>
49 #include <sys/taskqueue.h>
50 #include <machine/intr_machdep.h>
51 #include <machine/apicvar.h>
52 #include <machine/cputypes.h>
53 #include <machine/mca.h>
54 #include <machine/md_var.h>
55 #include <machine/specialreg.h>
56
57 /* Modes for mca_scan() */
58 enum scan_mode {
59 POLLED,
60 MCE,
61 CMCI,
62 };
63
64 #ifdef DEV_APIC
65 /*
66 * State maintained for each monitored MCx bank to control the
67 * corrected machine check interrupt threshold.
68 */
69 struct cmc_state {
70 int max_threshold;
71 int last_intr;
72 };
73 #endif
74
75 struct mca_internal {
76 struct mca_record rec;
77 int logged;
78 STAILQ_ENTRY(mca_internal) link;
79 };
80
81 static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture");
82
83 static int mca_count; /* Number of records stored. */
84
85 SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture");
86
87 static int mca_enabled = 1;
88 TUNABLE_INT("hw.mca.enabled", &mca_enabled);
89 SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0,
90 "Administrative toggle for machine check support");
91
92 static int amd10h_L1TP = 1;
93 TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP);
94 SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0,
95 "Administrative toggle for logging of level one TLB parity (L1TP) errors");
96
97 int workaround_erratum383;
98 SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0,
99 "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
100
101 static STAILQ_HEAD(, mca_internal) mca_records;
102 static struct callout mca_timer;
103 static int mca_ticks = 3600; /* Check hourly by default. */
104 static struct task mca_task;
105 static struct mtx mca_lock;
106
107 #ifdef DEV_APIC
108 static struct cmc_state **cmc_state; /* Indexed by cpuid, bank */
109 static int cmc_banks;
110 static int cmc_throttle = 60; /* Time in seconds to throttle CMCI. */
111 #endif
112
113 static int
114 sysctl_positive_int(SYSCTL_HANDLER_ARGS)
115 {
116 int error, value;
117
118 value = *(int *)arg1;
119 error = sysctl_handle_int(oidp, &value, 0, req);
120 if (error || req->newptr == NULL)
121 return (error);
122 if (value <= 0)
123 return (EINVAL);
124 *(int *)arg1 = value;
125 return (0);
126 }
127
128 static int
129 sysctl_mca_records(SYSCTL_HANDLER_ARGS)
130 {
131 int *name = (int *)arg1;
132 u_int namelen = arg2;
133 struct mca_record record;
134 struct mca_internal *rec;
135 int i;
136
137 if (namelen != 1)
138 return (EINVAL);
139
140 if (name[0] < 0 || name[0] >= mca_count)
141 return (EINVAL);
142
143 mtx_lock_spin(&mca_lock);
144 if (name[0] >= mca_count) {
145 mtx_unlock_spin(&mca_lock);
146 return (EINVAL);
147 }
148 i = 0;
149 STAILQ_FOREACH(rec, &mca_records, link) {
150 if (i == name[0]) {
151 record = rec->rec;
152 break;
153 }
154 i++;
155 }
156 mtx_unlock_spin(&mca_lock);
157 return (SYSCTL_OUT(req, &record, sizeof(record)));
158 }
159
160 static const char *
161 mca_error_ttype(uint16_t mca_error)
162 {
163
164 switch ((mca_error & 0x000c) >> 2) {
165 case 0:
166 return ("I");
167 case 1:
168 return ("D");
169 case 2:
170 return ("G");
171 }
172 return ("?");
173 }
174
175 static const char *
176 mca_error_level(uint16_t mca_error)
177 {
178
179 switch (mca_error & 0x0003) {
180 case 0:
181 return ("L0");
182 case 1:
183 return ("L1");
184 case 2:
185 return ("L2");
186 case 3:
187 return ("LG");
188 }
189 return ("L?");
190 }
191
192 static const char *
193 mca_error_request(uint16_t mca_error)
194 {
195
196 switch ((mca_error & 0x00f0) >> 4) {
197 case 0x0:
198 return ("ERR");
199 case 0x1:
200 return ("RD");
201 case 0x2:
202 return ("WR");
203 case 0x3:
204 return ("DRD");
205 case 0x4:
206 return ("DWR");
207 case 0x5:
208 return ("IRD");
209 case 0x6:
210 return ("PREFETCH");
211 case 0x7:
212 return ("EVICT");
213 case 0x8:
214 return ("SNOOP");
215 }
216 return ("???");
217 }
218
219 static const char *
220 mca_error_mmtype(uint16_t mca_error)
221 {
222
223 switch ((mca_error & 0x70) >> 4) {
224 case 0x0:
225 return ("GEN");
226 case 0x1:
227 return ("RD");
228 case 0x2:
229 return ("WR");
230 case 0x3:
231 return ("AC");
232 case 0x4:
233 return ("MS");
234 }
235 return ("???");
236 }
237
238 /* Dump details about a single machine check. */
239 static void __nonnull(1)
240 mca_log(const struct mca_record *rec)
241 {
242 uint16_t mca_error;
243
244 printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
245 (long long)rec->mr_status);
246 printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
247 (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
248 printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
249 rec->mr_cpu_id, rec->mr_apic_id);
250 printf("MCA: CPU %d ", rec->mr_cpu);
251 if (rec->mr_status & MC_STATUS_UC)
252 printf("UNCOR ");
253 else {
254 printf("COR ");
255 if (rec->mr_mcg_cap & MCG_CAP_CMCI_P)
256 printf("(%lld) ", ((long long)rec->mr_status &
257 MC_STATUS_COR_COUNT) >> 38);
258 }
259 if (rec->mr_status & MC_STATUS_PCC)
260 printf("PCC ");
261 if (rec->mr_status & MC_STATUS_OVER)
262 printf("OVER ");
263 mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
264 switch (mca_error) {
265 /* Simple error codes. */
266 case 0x0000:
267 printf("no error");
268 break;
269 case 0x0001:
270 printf("unclassified error");
271 break;
272 case 0x0002:
273 printf("ucode ROM parity error");
274 break;
275 case 0x0003:
276 printf("external error");
277 break;
278 case 0x0004:
279 printf("FRC error");
280 break;
281 case 0x0005:
282 printf("internal parity error");
283 break;
284 case 0x0400:
285 printf("internal timer error");
286 break;
287 default:
288 if ((mca_error & 0xfc00) == 0x0400) {
289 printf("internal error %x", mca_error & 0x03ff);
290 break;
291 }
292
293 /* Compound error codes. */
294
295 /* Memory hierarchy error. */
296 if ((mca_error & 0xeffc) == 0x000c) {
297 printf("%s memory error", mca_error_level(mca_error));
298 break;
299 }
300
301 /* TLB error. */
302 if ((mca_error & 0xeff0) == 0x0010) {
303 printf("%sTLB %s error", mca_error_ttype(mca_error),
304 mca_error_level(mca_error));
305 break;
306 }
307
308 /* Memory controller error. */
309 if ((mca_error & 0xef80) == 0x0080) {
310 printf("%s channel ", mca_error_mmtype(mca_error));
311 if ((mca_error & 0x000f) != 0x000f)
312 printf("%d", mca_error & 0x000f);
313 else
314 printf("??");
315 printf(" memory error");
316 break;
317 }
318
319 /* Cache error. */
320 if ((mca_error & 0xef00) == 0x0100) {
321 printf("%sCACHE %s %s error",
322 mca_error_ttype(mca_error),
323 mca_error_level(mca_error),
324 mca_error_request(mca_error));
325 break;
326 }
327
328 /* Bus and/or Interconnect error. */
329 if ((mca_error & 0xe800) == 0x0800) {
330 printf("BUS%s ", mca_error_level(mca_error));
331 switch ((mca_error & 0x0600) >> 9) {
332 case 0:
333 printf("Source");
334 break;
335 case 1:
336 printf("Responder");
337 break;
338 case 2:
339 printf("Observer");
340 break;
341 default:
342 printf("???");
343 break;
344 }
345 printf(" %s ", mca_error_request(mca_error));
346 switch ((mca_error & 0x000c) >> 2) {
347 case 0:
348 printf("Memory");
349 break;
350 case 2:
351 printf("I/O");
352 break;
353 case 3:
354 printf("Other");
355 break;
356 default:
357 printf("???");
358 break;
359 }
360 if (mca_error & 0x0100)
361 printf(" timed out");
362 break;
363 }
364
365 printf("unknown error %x", mca_error);
366 break;
367 }
368 printf("\n");
369 if (rec->mr_status & MC_STATUS_ADDRV)
370 printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
371 if (rec->mr_status & MC_STATUS_MISCV)
372 printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
373 }
374
375 static int __nonnull(2)
376 mca_check_status(int bank, struct mca_record *rec)
377 {
378 uint64_t status;
379 u_int p[4];
380
381 status = rdmsr(MSR_MC_STATUS(bank));
382 if (!(status & MC_STATUS_VAL))
383 return (0);
384
385 /* Save exception information. */
386 rec->mr_status = status;
387 rec->mr_bank = bank;
388 rec->mr_addr = 0;
389 if (status & MC_STATUS_ADDRV)
390 rec->mr_addr = rdmsr(MSR_MC_ADDR(bank));
391 rec->mr_misc = 0;
392 if (status & MC_STATUS_MISCV)
393 rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
394 rec->mr_tsc = rdtsc();
395 rec->mr_apic_id = PCPU_GET(apic_id);
396 rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
397 rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
398 rec->mr_cpu_id = cpu_id;
399 rec->mr_cpu_vendor_id = cpu_vendor_id;
400 rec->mr_cpu = PCPU_GET(cpuid);
401
402 /*
403 * Clear machine check. Don't do this for uncorrectable
404 * errors so that the BIOS can see them.
405 */
406 if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
407 wrmsr(MSR_MC_STATUS(bank), 0);
408 do_cpuid(0, p);
409 }
410 return (1);
411 }
412
413 static void __nonnull(1)
414 mca_record_entry(const struct mca_record *record)
415 {
416 struct mca_internal *rec;
417
418 rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT);
419 if (rec == NULL) {
420 printf("MCA: Unable to allocate space for an event.\n");
421 mca_log(record);
422 return;
423 }
424
425 rec->rec = *record;
426 rec->logged = 0;
427 mtx_lock_spin(&mca_lock);
428 STAILQ_INSERT_TAIL(&mca_records, rec, link);
429 mca_count++;
430 mtx_unlock_spin(&mca_lock);
431 }
432
433 #ifdef DEV_APIC
434 /*
435 * Update the interrupt threshold for a CMCI. The strategy is to use
436 * a low trigger that interrupts as soon as the first event occurs.
437 * However, if a steady stream of events arrive, the threshold is
438 * increased until the interrupts are throttled to once every
439 * cmc_throttle seconds or the periodic scan. If a periodic scan
440 * finds that the threshold is too high, it is lowered.
441 */
442 static void
443 cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec)
444 {
445 struct cmc_state *cc;
446 uint64_t ctl;
447 u_int delta;
448 int count, limit;
449
450 /* Fetch the current limit for this bank. */
451 cc = &cmc_state[PCPU_GET(cpuid)][bank];
452 ctl = rdmsr(MSR_MC_CTL2(bank));
453 count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
454 delta = (u_int)(ticks - cc->last_intr);
455
456 /*
457 * If an interrupt was received less than cmc_throttle seconds
458 * since the previous interrupt and the count from the current
459 * event is greater than or equal to the current threshold,
460 * double the threshold up to the max.
461 */
462 if (mode == CMCI && valid) {
463 limit = ctl & MC_CTL2_THRESHOLD;
464 if (delta < cmc_throttle && count >= limit &&
465 limit < cc->max_threshold) {
466 limit = min(limit << 1, cc->max_threshold);
467 ctl &= ~MC_CTL2_THRESHOLD;
468 ctl |= limit;
469 wrmsr(MSR_MC_CTL2(bank), limit);
470 }
471 cc->last_intr = ticks;
472 return;
473 }
474
475 /*
476 * When the banks are polled, check to see if the threshold
477 * should be lowered.
478 */
479 if (mode != POLLED)
480 return;
481
482 /* If a CMCI occured recently, do nothing for now. */
483 if (delta < cmc_throttle)
484 return;
485
486 /*
487 * Compute a new limit based on the average rate of events per
488 * cmc_throttle seconds since the last interrupt.
489 */
490 if (valid) {
491 count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38;
492 limit = count * cmc_throttle / delta;
493 if (limit <= 0)
494 limit = 1;
495 else if (limit > cc->max_threshold)
496 limit = cc->max_threshold;
497 } else
498 limit = 1;
499 if ((ctl & MC_CTL2_THRESHOLD) != limit) {
500 ctl &= ~MC_CTL2_THRESHOLD;
501 ctl |= limit;
502 wrmsr(MSR_MC_CTL2(bank), limit);
503 }
504 }
505 #endif
506
507 /*
508 * This scans all the machine check banks of the current CPU to see if
509 * there are any machine checks. Any non-recoverable errors are
510 * reported immediately via mca_log(). The current thread must be
511 * pinned when this is called. The 'mode' parameter indicates if we
512 * are being called from the MC exception handler, the CMCI handler,
513 * or the periodic poller. In the MC exception case this function
514 * returns true if the system is restartable. Otherwise, it returns a
515 * count of the number of valid MC records found.
516 */
517 static int
518 mca_scan(enum scan_mode mode)
519 {
520 struct mca_record rec;
521 uint64_t mcg_cap, ucmask;
522 int count, i, recoverable, valid;
523
524 count = 0;
525 recoverable = 1;
526 ucmask = MC_STATUS_UC | MC_STATUS_PCC;
527
528 /* When handling a MCE#, treat the OVER flag as non-restartable. */
529 if (mode == MCE)
530 ucmask |= MC_STATUS_OVER;
531 mcg_cap = rdmsr(MSR_MCG_CAP);
532 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
533 #ifdef DEV_APIC
534 /*
535 * For a CMCI, only check banks this CPU is
536 * responsible for.
537 */
538 if (mode == CMCI && !(PCPU_GET(cmci_mask) & 1 << i))
539 continue;
540 #endif
541
542 valid = mca_check_status(i, &rec);
543 if (valid) {
544 count++;
545 if (rec.mr_status & ucmask) {
546 recoverable = 0;
547 mca_log(&rec);
548 }
549 mca_record_entry(&rec);
550 }
551
552 #ifdef DEV_APIC
553 /*
554 * If this is a bank this CPU monitors via CMCI,
555 * update the threshold.
556 */
557 if (PCPU_GET(cmci_mask) & (1 << i))
558 cmci_update(mode, i, valid, &rec);
559 #endif
560 }
561 return (mode == MCE ? recoverable : count);
562 }
563
564 /*
565 * Scan the machine check banks on all CPUs by binding to each CPU in
566 * turn. If any of the CPUs contained new machine check records, log
567 * them to the console.
568 */
569 static void
570 mca_scan_cpus(void *context, int pending)
571 {
572 struct mca_internal *mca;
573 struct thread *td;
574 int count, cpu;
575
576 td = curthread;
577 count = 0;
578 thread_lock(td);
579 for (cpu = 0; cpu <= mp_maxid; cpu++) {
580 if (CPU_ABSENT(cpu))
581 continue;
582 sched_bind(td, cpu);
583 thread_unlock(td);
584 count += mca_scan(POLLED);
585 thread_lock(td);
586 sched_unbind(td);
587 }
588 thread_unlock(td);
589 if (count != 0) {
590 mtx_lock_spin(&mca_lock);
591 STAILQ_FOREACH(mca, &mca_records, link) {
592 if (!mca->logged) {
593 mca->logged = 1;
594 mtx_unlock_spin(&mca_lock);
595 mca_log(&mca->rec);
596 mtx_lock_spin(&mca_lock);
597 }
598 }
599 mtx_unlock_spin(&mca_lock);
600 }
601 }
602
603 static void
604 mca_periodic_scan(void *arg)
605 {
606
607 taskqueue_enqueue(taskqueue_thread, &mca_task);
608 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL);
609 }
610
611 static int
612 sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
613 {
614 int error, i;
615
616 i = 0;
617 error = sysctl_handle_int(oidp, &i, 0, req);
618 if (error)
619 return (error);
620 if (i)
621 taskqueue_enqueue(taskqueue_thread, &mca_task);
622 return (0);
623 }
624
625 static void
626 mca_startup(void *dummy)
627 {
628
629 if (!mca_enabled || !(cpu_feature & CPUID_MCA))
630 return;
631
632 callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan,
633 NULL);
634 }
635 SYSINIT(mca_startup, SI_SUB_SMP, SI_ORDER_ANY, mca_startup, NULL);
636
637 #ifdef DEV_APIC
638 static void
639 cmci_setup(uint64_t mcg_cap)
640 {
641 int i;
642
643 cmc_state = malloc((mp_maxid + 1) * sizeof(struct cmc_state **),
644 M_MCA, M_WAITOK);
645 cmc_banks = mcg_cap & MCG_CAP_COUNT;
646 for (i = 0; i <= mp_maxid; i++)
647 cmc_state[i] = malloc(sizeof(struct cmc_state) * cmc_banks,
648 M_MCA, M_WAITOK | M_ZERO);
649 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
650 "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
651 &cmc_throttle, 0, sysctl_positive_int, "I",
652 "Interval in seconds to throttle corrected MC interrupts");
653 }
654 #endif
655
656 static void
657 mca_setup(uint64_t mcg_cap)
658 {
659
660 /*
661 * On AMD Family 10h processors, unless logging of level one TLB
662 * parity (L1TP) errors is disabled, enable the recommended workaround
663 * for Erratum 383.
664 */
665 if (cpu_vendor_id == CPU_VENDOR_AMD &&
666 CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP)
667 workaround_erratum383 = 1;
668
669 mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
670 STAILQ_INIT(&mca_records);
671 TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL);
672 callout_init(&mca_timer, CALLOUT_MPSAFE);
673 SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
674 "count", CTLFLAG_RD, &mca_count, 0, "Record count");
675 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
676 "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
677 0, sysctl_positive_int, "I",
678 "Periodic interval in seconds to scan for machine checks");
679 SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
680 "records", CTLFLAG_RD, sysctl_mca_records, "Machine check records");
681 SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
682 "force_scan", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
683 sysctl_mca_scan, "I", "Force an immediate scan for machine checks");
684 #ifdef DEV_APIC
685 if (mcg_cap & MCG_CAP_CMCI_P)
686 cmci_setup(mcg_cap);
687 #endif
688 }
689
690 #ifdef DEV_APIC
691 /*
692 * See if we should monitor CMCI for this bank. If CMCI_EN is already
693 * set in MC_CTL2, then another CPU is responsible for this bank, so
694 * ignore it. If CMCI_EN returns zero after being set, then this bank
695 * does not support CMCI_EN. If this CPU sets CMCI_EN, then it should
696 * now monitor this bank.
697 */
698 static void
699 cmci_monitor(int i)
700 {
701 struct cmc_state *cc;
702 uint64_t ctl;
703
704 KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
705
706 ctl = rdmsr(MSR_MC_CTL2(i));
707 if (ctl & MC_CTL2_CMCI_EN)
708 /* Already monitored by another CPU. */
709 return;
710
711 /* Set the threshold to one event for now. */
712 ctl &= ~MC_CTL2_THRESHOLD;
713 ctl |= MC_CTL2_CMCI_EN | 1;
714 wrmsr(MSR_MC_CTL2(i), ctl);
715 ctl = rdmsr(MSR_MC_CTL2(i));
716 if (!(ctl & MC_CTL2_CMCI_EN))
717 /* This bank does not support CMCI. */
718 return;
719
720 cc = &cmc_state[PCPU_GET(cpuid)][i];
721
722 /* Determine maximum threshold. */
723 ctl &= ~MC_CTL2_THRESHOLD;
724 ctl |= 0x7fff;
725 wrmsr(MSR_MC_CTL2(i), ctl);
726 ctl = rdmsr(MSR_MC_CTL2(i));
727 cc->max_threshold = ctl & MC_CTL2_THRESHOLD;
728
729 /* Start off with a threshold of 1. */
730 ctl &= ~MC_CTL2_THRESHOLD;
731 ctl |= 1;
732 wrmsr(MSR_MC_CTL2(i), ctl);
733
734 /* Mark this bank as monitored. */
735 PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << i);
736 }
737
738 /*
739 * For resume, reset the threshold for any banks we monitor back to
740 * one and throw away the timestamp of the last interrupt.
741 */
742 static void
743 cmci_resume(int i)
744 {
745 struct cmc_state *cc;
746 uint64_t ctl;
747
748 KASSERT(i < cmc_banks, ("CPU %d has more MC banks", PCPU_GET(cpuid)));
749
750 /* Ignore banks not monitored by this CPU. */
751 if (!(PCPU_GET(cmci_mask) & 1 << i))
752 return;
753
754 cc = &cmc_state[PCPU_GET(cpuid)][i];
755 cc->last_intr = -ticks;
756 ctl = rdmsr(MSR_MC_CTL2(i));
757 ctl &= ~MC_CTL2_THRESHOLD;
758 ctl |= MC_CTL2_CMCI_EN | 1;
759 wrmsr(MSR_MC_CTL2(i), ctl);
760 }
761 #endif
762
763 /*
764 * Initializes per-CPU machine check registers and enables corrected
765 * machine check interrupts.
766 */
767 static void
768 _mca_init(int boot)
769 {
770 uint64_t mcg_cap;
771 uint64_t ctl, mask;
772 int i, skip;
773
774 /* MCE is required. */
775 if (!mca_enabled || !(cpu_feature & CPUID_MCE))
776 return;
777
778 if (cpu_feature & CPUID_MCA) {
779 if (boot)
780 PCPU_SET(cmci_mask, 0);
781
782 mcg_cap = rdmsr(MSR_MCG_CAP);
783 if (mcg_cap & MCG_CAP_CTL_P)
784 /* Enable MCA features. */
785 wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE);
786 if (PCPU_GET(cpuid) == 0 && boot)
787 mca_setup(mcg_cap);
788
789 /*
790 * Disable logging of level one TLB parity (L1TP) errors by
791 * the data cache as an alternative workaround for AMD Family
792 * 10h Erratum 383. Unlike the recommended workaround, there
793 * is no performance penalty to this workaround. However,
794 * L1TP errors will go unreported.
795 */
796 if (cpu_vendor_id == CPU_VENDOR_AMD &&
797 CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) {
798 mask = rdmsr(MSR_MC0_CTL_MASK);
799 if ((mask & (1UL << 5)) == 0)
800 wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5));
801 }
802 for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
803 /* By default enable logging of all errors. */
804 ctl = 0xffffffffffffffffUL;
805 skip = 0;
806
807 if (cpu_vendor_id == CPU_VENDOR_INTEL) {
808 /*
809 * For P6 models before Nehalem MC0_CTL is
810 * always enabled and reserved.
811 */
812 if (i == 0 && CPUID_TO_FAMILY(cpu_id) == 0x6
813 && CPUID_TO_MODEL(cpu_id) < 0x1a)
814 skip = 1;
815 } else if (cpu_vendor_id == CPU_VENDOR_AMD) {
816 /* BKDG for Family 10h: unset GartTblWkEn. */
817 if (i == 4 && CPUID_TO_FAMILY(cpu_id) >= 0xf)
818 ctl &= ~(1UL << 10);
819 }
820
821 if (!skip)
822 wrmsr(MSR_MC_CTL(i), ctl);
823
824 #ifdef DEV_APIC
825 if (mcg_cap & MCG_CAP_CMCI_P) {
826 if (boot)
827 cmci_monitor(i);
828 else
829 cmci_resume(i);
830 }
831 #endif
832
833 /* Clear all errors. */
834 wrmsr(MSR_MC_STATUS(i), 0);
835 }
836
837 #ifdef DEV_APIC
838 if (PCPU_GET(cmci_mask) != 0 && boot)
839 lapic_enable_cmc();
840 #endif
841 }
842
843 load_cr4(rcr4() | CR4_MCE);
844 }
845
846 /* Must be executed on each CPU during boot. */
847 void
848 mca_init(void)
849 {
850
851 _mca_init(1);
852 }
853
854 /* Must be executed on each CPU during resume. */
855 void
856 mca_resume(void)
857 {
858
859 _mca_init(0);
860 }
861
862 /*
863 * The machine check registers for the BSP cannot be initialized until
864 * the local APIC is initialized. This happens at SI_SUB_CPU,
865 * SI_ORDER_SECOND.
866 */
867 static void
868 mca_init_bsp(void *arg __unused)
869 {
870
871 mca_init();
872 }
873 SYSINIT(mca_init_bsp, SI_SUB_CPU, SI_ORDER_ANY, mca_init_bsp, NULL);
874
875 /* Called when a machine check exception fires. */
876 int
877 mca_intr(void)
878 {
879 uint64_t mcg_status;
880 int recoverable;
881
882 if (!(cpu_feature & CPUID_MCA)) {
883 /*
884 * Just print the values of the old Pentium registers
885 * and panic.
886 */
887 printf("MC Type: 0x%llx Address: 0x%llx\n",
888 rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR));
889 return (0);
890 }
891
892 /* Scan the banks and check for any non-recoverable errors. */
893 recoverable = mca_scan(MCE);
894 mcg_status = rdmsr(MSR_MCG_STATUS);
895 if (!(mcg_status & MCG_STATUS_RIPV))
896 recoverable = 0;
897
898 /* Clear MCIP. */
899 wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP);
900 return (recoverable);
901 }
902
903 #ifdef DEV_APIC
904 /* Called for a CMCI (correctable machine check interrupt). */
905 void
906 cmc_intr(void)
907 {
908 struct mca_internal *mca;
909 int count;
910
911 /*
912 * Serialize MCA bank scanning to prevent collisions from
913 * sibling threads.
914 */
915 count = mca_scan(CMCI);
916
917 /* If we found anything, log them to the console. */
918 if (count != 0) {
919 mtx_lock_spin(&mca_lock);
920 STAILQ_FOREACH(mca, &mca_records, link) {
921 if (!mca->logged) {
922 mca->logged = 1;
923 mtx_unlock_spin(&mca_lock);
924 mca_log(&mca->rec);
925 mtx_lock_spin(&mca_lock);
926 }
927 }
928 mtx_unlock_spin(&mca_lock);
929 }
930 }
931 #endif
Cache object: eb5d185aedd0af5a679968dba24db712
|