mce.c revision 0937195715713b37ec843f28d99930dd7b1e8fbe
1/* 2 * Machine check handler. 3 * 4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 5 * Rest from unknown author(s). 6 * 2004 Andi Kleen. Rewrote most of it. 7 * Copyright 2008 Intel Corporation 8 * Author: Andi Kleen 9 */ 10#include <linux/thread_info.h> 11#include <linux/capability.h> 12#include <linux/miscdevice.h> 13#include <linux/ratelimit.h> 14#include <linux/kallsyms.h> 15#include <linux/rcupdate.h> 16#include <linux/kobject.h> 17#include <linux/uaccess.h> 18#include <linux/kdebug.h> 19#include <linux/kernel.h> 20#include <linux/percpu.h> 21#include <linux/string.h> 22#include <linux/sysdev.h> 23#include <linux/syscore_ops.h> 24#include <linux/delay.h> 25#include <linux/ctype.h> 26#include <linux/sched.h> 27#include <linux/sysfs.h> 28#include <linux/types.h> 29#include <linux/slab.h> 30#include <linux/init.h> 31#include <linux/kmod.h> 32#include <linux/poll.h> 33#include <linux/nmi.h> 34#include <linux/cpu.h> 35#include <linux/smp.h> 36#include <linux/fs.h> 37#include <linux/mm.h> 38#include <linux/debugfs.h> 39#include <linux/irq_work.h> 40#include <linux/export.h> 41 42#include <asm/processor.h> 43#include <asm/mce.h> 44#include <asm/msr.h> 45 46#include "mce-internal.h" 47 48static DEFINE_MUTEX(mce_chrdev_read_mutex); 49 50#define rcu_dereference_check_mce(p) \ 51 rcu_dereference_index_check((p), \ 52 rcu_read_lock_sched_held() || \ 53 lockdep_is_held(&mce_chrdev_read_mutex)) 54 55#define CREATE_TRACE_POINTS 56#include <trace/events/mce.h> 57 58int mce_disabled __read_mostly; 59 60#define MISC_MCELOG_MINOR 227 61 62#define SPINUNIT 100 /* 100ns */ 63 64atomic_t mce_entry; 65 66DEFINE_PER_CPU(unsigned, mce_exception_count); 67 68/* 69 * Tolerant levels: 70 * 0: always panic on uncorrected errors, log corrected errors 71 * 1: panic or SIGBUS on uncorrected errors, log corrected errors 72 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 73 * 3: never panic or SIGBUS, log all errors (for testing only) 74 */ 75static int tolerant __read_mostly = 1; 76static int banks __read_mostly; 77static int rip_msr __read_mostly; 78static int mce_bootlog __read_mostly = -1; 79static int monarch_timeout __read_mostly = -1; 80static int mce_panic_timeout __read_mostly; 81static int mce_dont_log_ce __read_mostly; 82int mce_cmci_disabled __read_mostly; 83int mce_ignore_ce __read_mostly; 84int mce_ser __read_mostly; 85 86struct mce_bank *mce_banks __read_mostly; 87 88/* User mode helper program triggered by machine check event */ 89static unsigned long mce_need_notify; 90static char mce_helper[128]; 91static char *mce_helper_argv[2] = { mce_helper, NULL }; 92 93static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); 94 95static DEFINE_PER_CPU(struct mce, mces_seen); 96static int cpu_missing; 97 98/* MCA banks polled by the period polling timer for corrected events */ 99DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 100 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 101}; 102 103static DEFINE_PER_CPU(struct work_struct, mce_work); 104 105/* 106 * CPU/chipset specific EDAC code can register a notifier call here to print 107 * MCE errors in a human-readable form. 108 */ 109ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 110 111/* Do initial initialization of a struct mce */ 112void mce_setup(struct mce *m) 113{ 114 memset(m, 0, sizeof(struct mce)); 115 m->cpu = m->extcpu = smp_processor_id(); 116 rdtscll(m->tsc); 117 /* We hope get_seconds stays lockless */ 118 m->time = get_seconds(); 119 m->cpuvendor = boot_cpu_data.x86_vendor; 120 m->cpuid = cpuid_eax(1); 121#ifdef CONFIG_SMP 122 m->socketid = cpu_data(m->extcpu).phys_proc_id; 123#endif 124 m->apicid = cpu_data(m->extcpu).initial_apicid; 125 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); 126} 127 128DEFINE_PER_CPU(struct mce, injectm); 129EXPORT_PER_CPU_SYMBOL_GPL(injectm); 130 131/* 132 * Lockless MCE logging infrastructure. 133 * This avoids deadlocks on printk locks without having to break locks. Also 134 * separate MCEs from kernel messages to avoid bogus bug reports. 135 */ 136 137static struct mce_log mcelog = { 138 .signature = MCE_LOG_SIGNATURE, 139 .len = MCE_LOG_LEN, 140 .recordlen = sizeof(struct mce), 141}; 142 143void mce_log(struct mce *mce) 144{ 145 unsigned next, entry; 146 int ret = 0; 147 148 /* Emit the trace record: */ 149 trace_mce_record(mce); 150 151 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); 152 if (ret == NOTIFY_STOP) 153 return; 154 155 mce->finished = 0; 156 wmb(); 157 for (;;) { 158 entry = rcu_dereference_check_mce(mcelog.next); 159 for (;;) { 160 161 /* 162 * When the buffer fills up discard new entries. 163 * Assume that the earlier errors are the more 164 * interesting ones: 165 */ 166 if (entry >= MCE_LOG_LEN) { 167 set_bit(MCE_OVERFLOW, 168 (unsigned long *)&mcelog.flags); 169 return; 170 } 171 /* Old left over entry. Skip: */ 172 if (mcelog.entry[entry].finished) { 173 entry++; 174 continue; 175 } 176 break; 177 } 178 smp_rmb(); 179 next = entry + 1; 180 if (cmpxchg(&mcelog.next, entry, next) == entry) 181 break; 182 } 183 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 184 wmb(); 185 mcelog.entry[entry].finished = 1; 186 wmb(); 187 188 mce->finished = 1; 189 set_bit(0, &mce_need_notify); 190} 191 192static void drain_mcelog_buffer(void) 193{ 194 unsigned int next, i, prev = 0; 195 196 next = rcu_dereference_check_mce(mcelog.next); 197 198 do { 199 struct mce *m; 200 201 /* drain what was logged during boot */ 202 for (i = prev; i < next; i++) { 203 unsigned long start = jiffies; 204 unsigned retries = 1; 205 206 m = &mcelog.entry[i]; 207 208 while (!m->finished) { 209 if (time_after_eq(jiffies, start + 2*retries)) 210 retries++; 211 212 cpu_relax(); 213 214 if (!m->finished && retries >= 4) { 215 pr_err("MCE: skipping error being logged currently!\n"); 216 break; 217 } 218 } 219 smp_rmb(); 220 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 221 } 222 223 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); 224 prev = next; 225 next = cmpxchg(&mcelog.next, prev, 0); 226 } while (next != prev); 227} 228 229 230void mce_register_decode_chain(struct notifier_block *nb) 231{ 232 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); 233 drain_mcelog_buffer(); 234} 235EXPORT_SYMBOL_GPL(mce_register_decode_chain); 236 237void mce_unregister_decode_chain(struct notifier_block *nb) 238{ 239 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); 240} 241EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); 242 243static void print_mce(struct mce *m) 244{ 245 int ret = 0; 246 247 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", 248 m->extcpu, m->mcgstatus, m->bank, m->status); 249 250 if (m->ip) { 251 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", 252 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 253 m->cs, m->ip); 254 255 if (m->cs == __KERNEL_CS) 256 print_symbol("{%s}", m->ip); 257 pr_cont("\n"); 258 } 259 260 pr_emerg(HW_ERR "TSC %llx ", m->tsc); 261 if (m->addr) 262 pr_cont("ADDR %llx ", m->addr); 263 if (m->misc) 264 pr_cont("MISC %llx ", m->misc); 265 266 pr_cont("\n"); 267 /* 268 * Note this output is parsed by external tools and old fields 269 * should not be changed. 270 */ 271 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", 272 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, 273 cpu_data(m->extcpu).microcode); 274 275 /* 276 * Print out human-readable details about the MCE error, 277 * (if the CPU has an implementation for that) 278 */ 279 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 280 if (ret == NOTIFY_STOP) 281 return; 282 283 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); 284} 285 286#define PANIC_TIMEOUT 5 /* 5 seconds */ 287 288static atomic_t mce_paniced; 289 290static int fake_panic; 291static atomic_t mce_fake_paniced; 292 293/* Panic in progress. Enable interrupts and wait for final IPI */ 294static void wait_for_panic(void) 295{ 296 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 297 298 preempt_disable(); 299 local_irq_enable(); 300 while (timeout-- > 0) 301 udelay(1); 302 if (panic_timeout == 0) 303 panic_timeout = mce_panic_timeout; 304 panic("Panicing machine check CPU died"); 305} 306 307static void mce_panic(char *msg, struct mce *final, char *exp) 308{ 309 int i, apei_err = 0; 310 311 if (!fake_panic) { 312 /* 313 * Make sure only one CPU runs in machine check panic 314 */ 315 if (atomic_inc_return(&mce_paniced) > 1) 316 wait_for_panic(); 317 barrier(); 318 319 bust_spinlocks(1); 320 console_verbose(); 321 } else { 322 /* Don't log too much for fake panic */ 323 if (atomic_inc_return(&mce_fake_paniced) > 1) 324 return; 325 } 326 /* First print corrected ones that are still unlogged */ 327 for (i = 0; i < MCE_LOG_LEN; i++) { 328 struct mce *m = &mcelog.entry[i]; 329 if (!(m->status & MCI_STATUS_VAL)) 330 continue; 331 if (!(m->status & MCI_STATUS_UC)) { 332 print_mce(m); 333 if (!apei_err) 334 apei_err = apei_write_mce(m); 335 } 336 } 337 /* Now print uncorrected but with the final one last */ 338 for (i = 0; i < MCE_LOG_LEN; i++) { 339 struct mce *m = &mcelog.entry[i]; 340 if (!(m->status & MCI_STATUS_VAL)) 341 continue; 342 if (!(m->status & MCI_STATUS_UC)) 343 continue; 344 if (!final || memcmp(m, final, sizeof(struct mce))) { 345 print_mce(m); 346 if (!apei_err) 347 apei_err = apei_write_mce(m); 348 } 349 } 350 if (final) { 351 print_mce(final); 352 if (!apei_err) 353 apei_err = apei_write_mce(final); 354 } 355 if (cpu_missing) 356 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); 357 if (exp) 358 pr_emerg(HW_ERR "Machine check: %s\n", exp); 359 if (!fake_panic) { 360 if (panic_timeout == 0) 361 panic_timeout = mce_panic_timeout; 362 panic(msg); 363 } else 364 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); 365} 366 367/* Support code for software error injection */ 368 369static int msr_to_offset(u32 msr) 370{ 371 unsigned bank = __this_cpu_read(injectm.bank); 372 373 if (msr == rip_msr) 374 return offsetof(struct mce, ip); 375 if (msr == MSR_IA32_MCx_STATUS(bank)) 376 return offsetof(struct mce, status); 377 if (msr == MSR_IA32_MCx_ADDR(bank)) 378 return offsetof(struct mce, addr); 379 if (msr == MSR_IA32_MCx_MISC(bank)) 380 return offsetof(struct mce, misc); 381 if (msr == MSR_IA32_MCG_STATUS) 382 return offsetof(struct mce, mcgstatus); 383 return -1; 384} 385 386/* MSR access wrappers used for error injection */ 387static u64 mce_rdmsrl(u32 msr) 388{ 389 u64 v; 390 391 if (__this_cpu_read(injectm.finished)) { 392 int offset = msr_to_offset(msr); 393 394 if (offset < 0) 395 return 0; 396 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); 397 } 398 399 if (rdmsrl_safe(msr, &v)) { 400 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); 401 /* 402 * Return zero in case the access faulted. This should 403 * not happen normally but can happen if the CPU does 404 * something weird, or if the code is buggy. 405 */ 406 v = 0; 407 } 408 409 return v; 410} 411 412static void mce_wrmsrl(u32 msr, u64 v) 413{ 414 if (__this_cpu_read(injectm.finished)) { 415 int offset = msr_to_offset(msr); 416 417 if (offset >= 0) 418 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; 419 return; 420 } 421 wrmsrl(msr, v); 422} 423 424/* 425 * Collect all global (w.r.t. this processor) status about this machine 426 * check into our "mce" struct so that we can use it later to assess 427 * the severity of the problem as we read per-bank specific details. 428 */ 429static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) 430{ 431 mce_setup(m); 432 433 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 434 if (regs) { 435 /* 436 * Get the address of the instruction at the time of 437 * the machine check error. 438 */ 439 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { 440 m->ip = regs->ip; 441 m->cs = regs->cs; 442 } 443 /* Use accurate RIP reporting if available. */ 444 if (rip_msr) 445 m->ip = mce_rdmsrl(rip_msr); 446 } 447} 448 449/* 450 * Simple lockless ring to communicate PFNs from the exception handler with the 451 * process context work function. This is vastly simplified because there's 452 * only a single reader and a single writer. 453 */ 454#define MCE_RING_SIZE 16 /* we use one entry less */ 455 456struct mce_ring { 457 unsigned short start; 458 unsigned short end; 459 unsigned long ring[MCE_RING_SIZE]; 460}; 461static DEFINE_PER_CPU(struct mce_ring, mce_ring); 462 463/* Runs with CPU affinity in workqueue */ 464static int mce_ring_empty(void) 465{ 466 struct mce_ring *r = &__get_cpu_var(mce_ring); 467 468 return r->start == r->end; 469} 470 471static int mce_ring_get(unsigned long *pfn) 472{ 473 struct mce_ring *r; 474 int ret = 0; 475 476 *pfn = 0; 477 get_cpu(); 478 r = &__get_cpu_var(mce_ring); 479 if (r->start == r->end) 480 goto out; 481 *pfn = r->ring[r->start]; 482 r->start = (r->start + 1) % MCE_RING_SIZE; 483 ret = 1; 484out: 485 put_cpu(); 486 return ret; 487} 488 489/* Always runs in MCE context with preempt off */ 490static int mce_ring_add(unsigned long pfn) 491{ 492 struct mce_ring *r = &__get_cpu_var(mce_ring); 493 unsigned next; 494 495 next = (r->end + 1) % MCE_RING_SIZE; 496 if (next == r->start) 497 return -1; 498 r->ring[r->end] = pfn; 499 wmb(); 500 r->end = next; 501 return 0; 502} 503 504int mce_available(struct cpuinfo_x86 *c) 505{ 506 if (mce_disabled) 507 return 0; 508 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 509} 510 511static void mce_schedule_work(void) 512{ 513 if (!mce_ring_empty()) { 514 struct work_struct *work = &__get_cpu_var(mce_work); 515 if (!work_pending(work)) 516 schedule_work(work); 517 } 518} 519 520DEFINE_PER_CPU(struct irq_work, mce_irq_work); 521 522static void mce_irq_work_cb(struct irq_work *entry) 523{ 524 mce_notify_irq(); 525 mce_schedule_work(); 526} 527 528static void mce_report_event(struct pt_regs *regs) 529{ 530 if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { 531 mce_notify_irq(); 532 /* 533 * Triggering the work queue here is just an insurance 534 * policy in case the syscall exit notify handler 535 * doesn't run soon enough or ends up running on the 536 * wrong CPU (can happen when audit sleeps) 537 */ 538 mce_schedule_work(); 539 return; 540 } 541 542 irq_work_queue(&__get_cpu_var(mce_irq_work)); 543} 544 545DEFINE_PER_CPU(unsigned, mce_poll_count); 546 547/* 548 * Poll for corrected events or events that happened before reset. 549 * Those are just logged through /dev/mcelog. 550 * 551 * This is executed in standard interrupt context. 552 * 553 * Note: spec recommends to panic for fatal unsignalled 554 * errors here. However this would be quite problematic -- 555 * we would need to reimplement the Monarch handling and 556 * it would mess up the exclusion between exception handler 557 * and poll hander -- * so we skip this for now. 558 * These cases should not happen anyways, or only when the CPU 559 * is already totally * confused. In this case it's likely it will 560 * not fully execute the machine check handler either. 561 */ 562void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 563{ 564 struct mce m; 565 int i; 566 567 percpu_inc(mce_poll_count); 568 569 mce_gather_info(&m, NULL); 570 571 for (i = 0; i < banks; i++) { 572 if (!mce_banks[i].ctl || !test_bit(i, *b)) 573 continue; 574 575 m.misc = 0; 576 m.addr = 0; 577 m.bank = i; 578 m.tsc = 0; 579 580 barrier(); 581 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 582 if (!(m.status & MCI_STATUS_VAL)) 583 continue; 584 585 /* 586 * Uncorrected or signalled events are handled by the exception 587 * handler when it is enabled, so don't process those here. 588 * 589 * TBD do the same check for MCI_STATUS_EN here? 590 */ 591 if (!(flags & MCP_UC) && 592 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) 593 continue; 594 595 if (m.status & MCI_STATUS_MISCV) 596 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); 597 if (m.status & MCI_STATUS_ADDRV) 598 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); 599 600 if (!(flags & MCP_TIMESTAMP)) 601 m.tsc = 0; 602 /* 603 * Don't get the IP here because it's unlikely to 604 * have anything to do with the actual error location. 605 */ 606 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) 607 mce_log(&m); 608 609 /* 610 * Clear state for this bank. 611 */ 612 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 613 } 614 615 /* 616 * Don't clear MCG_STATUS here because it's only defined for 617 * exceptions. 618 */ 619 620 sync_core(); 621} 622EXPORT_SYMBOL_GPL(machine_check_poll); 623 624/* 625 * Do a quick check if any of the events requires a panic. 626 * This decides if we keep the events around or clear them. 627 */ 628static int mce_no_way_out(struct mce *m, char **msg) 629{ 630 int i; 631 632 for (i = 0; i < banks; i++) { 633 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 634 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 635 return 1; 636 } 637 return 0; 638} 639 640/* 641 * Variable to establish order between CPUs while scanning. 642 * Each CPU spins initially until executing is equal its number. 643 */ 644static atomic_t mce_executing; 645 646/* 647 * Defines order of CPUs on entry. First CPU becomes Monarch. 648 */ 649static atomic_t mce_callin; 650 651/* 652 * Check if a timeout waiting for other CPUs happened. 653 */ 654static int mce_timed_out(u64 *t) 655{ 656 /* 657 * The others already did panic for some reason. 658 * Bail out like in a timeout. 659 * rmb() to tell the compiler that system_state 660 * might have been modified by someone else. 661 */ 662 rmb(); 663 if (atomic_read(&mce_paniced)) 664 wait_for_panic(); 665 if (!monarch_timeout) 666 goto out; 667 if ((s64)*t < SPINUNIT) { 668 /* CHECKME: Make panic default for 1 too? */ 669 if (tolerant < 1) 670 mce_panic("Timeout synchronizing machine check over CPUs", 671 NULL, NULL); 672 cpu_missing = 1; 673 return 1; 674 } 675 *t -= SPINUNIT; 676out: 677 touch_nmi_watchdog(); 678 return 0; 679} 680 681/* 682 * The Monarch's reign. The Monarch is the CPU who entered 683 * the machine check handler first. It waits for the others to 684 * raise the exception too and then grades them. When any 685 * error is fatal panic. Only then let the others continue. 686 * 687 * The other CPUs entering the MCE handler will be controlled by the 688 * Monarch. They are called Subjects. 689 * 690 * This way we prevent any potential data corruption in a unrecoverable case 691 * and also makes sure always all CPU's errors are examined. 692 * 693 * Also this detects the case of a machine check event coming from outer 694 * space (not detected by any CPUs) In this case some external agent wants 695 * us to shut down, so panic too. 696 * 697 * The other CPUs might still decide to panic if the handler happens 698 * in a unrecoverable place, but in this case the system is in a semi-stable 699 * state and won't corrupt anything by itself. It's ok to let the others 700 * continue for a bit first. 701 * 702 * All the spin loops have timeouts; when a timeout happens a CPU 703 * typically elects itself to be Monarch. 704 */ 705static void mce_reign(void) 706{ 707 int cpu; 708 struct mce *m = NULL; 709 int global_worst = 0; 710 char *msg = NULL; 711 char *nmsg = NULL; 712 713 /* 714 * This CPU is the Monarch and the other CPUs have run 715 * through their handlers. 716 * Grade the severity of the errors of all the CPUs. 717 */ 718 for_each_possible_cpu(cpu) { 719 int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, 720 &nmsg); 721 if (severity > global_worst) { 722 msg = nmsg; 723 global_worst = severity; 724 m = &per_cpu(mces_seen, cpu); 725 } 726 } 727 728 /* 729 * Cannot recover? Panic here then. 730 * This dumps all the mces in the log buffer and stops the 731 * other CPUs. 732 */ 733 if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) 734 mce_panic("Fatal Machine check", m, msg); 735 736 /* 737 * For UC somewhere we let the CPU who detects it handle it. 738 * Also must let continue the others, otherwise the handling 739 * CPU could deadlock on a lock. 740 */ 741 742 /* 743 * No machine check event found. Must be some external 744 * source or one CPU is hung. Panic. 745 */ 746 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) 747 mce_panic("Machine check from unknown source", NULL, NULL); 748 749 /* 750 * Now clear all the mces_seen so that they don't reappear on 751 * the next mce. 752 */ 753 for_each_possible_cpu(cpu) 754 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); 755} 756 757static atomic_t global_nwo; 758 759/* 760 * Start of Monarch synchronization. This waits until all CPUs have 761 * entered the exception handler and then determines if any of them 762 * saw a fatal event that requires panic. Then it executes them 763 * in the entry order. 764 * TBD double check parallel CPU hotunplug 765 */ 766static int mce_start(int *no_way_out) 767{ 768 int order; 769 int cpus = num_online_cpus(); 770 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 771 772 if (!timeout) 773 return -1; 774 775 atomic_add(*no_way_out, &global_nwo); 776 /* 777 * global_nwo should be updated before mce_callin 778 */ 779 smp_wmb(); 780 order = atomic_inc_return(&mce_callin); 781 782 /* 783 * Wait for everyone. 784 */ 785 while (atomic_read(&mce_callin) != cpus) { 786 if (mce_timed_out(&timeout)) { 787 atomic_set(&global_nwo, 0); 788 return -1; 789 } 790 ndelay(SPINUNIT); 791 } 792 793 /* 794 * mce_callin should be read before global_nwo 795 */ 796 smp_rmb(); 797 798 if (order == 1) { 799 /* 800 * Monarch: Starts executing now, the others wait. 801 */ 802 atomic_set(&mce_executing, 1); 803 } else { 804 /* 805 * Subject: Now start the scanning loop one by one in 806 * the original callin order. 807 * This way when there are any shared banks it will be 808 * only seen by one CPU before cleared, avoiding duplicates. 809 */ 810 while (atomic_read(&mce_executing) < order) { 811 if (mce_timed_out(&timeout)) { 812 atomic_set(&global_nwo, 0); 813 return -1; 814 } 815 ndelay(SPINUNIT); 816 } 817 } 818 819 /* 820 * Cache the global no_way_out state. 821 */ 822 *no_way_out = atomic_read(&global_nwo); 823 824 return order; 825} 826 827/* 828 * Synchronize between CPUs after main scanning loop. 829 * This invokes the bulk of the Monarch processing. 830 */ 831static int mce_end(int order) 832{ 833 int ret = -1; 834 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 835 836 if (!timeout) 837 goto reset; 838 if (order < 0) 839 goto reset; 840 841 /* 842 * Allow others to run. 843 */ 844 atomic_inc(&mce_executing); 845 846 if (order == 1) { 847 /* CHECKME: Can this race with a parallel hotplug? */ 848 int cpus = num_online_cpus(); 849 850 /* 851 * Monarch: Wait for everyone to go through their scanning 852 * loops. 853 */ 854 while (atomic_read(&mce_executing) <= cpus) { 855 if (mce_timed_out(&timeout)) 856 goto reset; 857 ndelay(SPINUNIT); 858 } 859 860 mce_reign(); 861 barrier(); 862 ret = 0; 863 } else { 864 /* 865 * Subject: Wait for Monarch to finish. 866 */ 867 while (atomic_read(&mce_executing) != 0) { 868 if (mce_timed_out(&timeout)) 869 goto reset; 870 ndelay(SPINUNIT); 871 } 872 873 /* 874 * Don't reset anything. That's done by the Monarch. 875 */ 876 return 0; 877 } 878 879 /* 880 * Reset all global state. 881 */ 882reset: 883 atomic_set(&global_nwo, 0); 884 atomic_set(&mce_callin, 0); 885 barrier(); 886 887 /* 888 * Let others run again. 889 */ 890 atomic_set(&mce_executing, 0); 891 return ret; 892} 893 894/* 895 * Check if the address reported by the CPU is in a format we can parse. 896 * It would be possible to add code for most other cases, but all would 897 * be somewhat complicated (e.g. segment offset would require an instruction 898 * parser). So only support physical addresses up to page granuality for now. 899 */ 900static int mce_usable_address(struct mce *m) 901{ 902 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) 903 return 0; 904 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) 905 return 0; 906 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) 907 return 0; 908 return 1; 909} 910 911static void mce_clear_state(unsigned long *toclear) 912{ 913 int i; 914 915 for (i = 0; i < banks; i++) { 916 if (test_bit(i, toclear)) 917 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 918 } 919} 920 921/* 922 * The actual machine check handler. This only handles real 923 * exceptions when something got corrupted coming in through int 18. 924 * 925 * This is executed in NMI context not subject to normal locking rules. This 926 * implies that most kernel services cannot be safely used. Don't even 927 * think about putting a printk in there! 928 * 929 * On Intel systems this is entered on all CPUs in parallel through 930 * MCE broadcast. However some CPUs might be broken beyond repair, 931 * so be always careful when synchronizing with others. 932 */ 933void do_machine_check(struct pt_regs *regs, long error_code) 934{ 935 struct mce m, *final; 936 int i; 937 int worst = 0; 938 int severity; 939 /* 940 * Establish sequential order between the CPUs entering the machine 941 * check handler. 942 */ 943 int order; 944 /* 945 * If no_way_out gets set, there is no safe way to recover from this 946 * MCE. If tolerant is cranked up, we'll try anyway. 947 */ 948 int no_way_out = 0; 949 /* 950 * If kill_it gets set, there might be a way to recover from this 951 * error. 952 */ 953 int kill_it = 0; 954 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 955 char *msg = "Unknown"; 956 957 atomic_inc(&mce_entry); 958 959 percpu_inc(mce_exception_count); 960 961 if (!banks) 962 goto out; 963 964 mce_gather_info(&m, regs); 965 966 final = &__get_cpu_var(mces_seen); 967 *final = m; 968 969 no_way_out = mce_no_way_out(&m, &msg); 970 971 barrier(); 972 973 /* 974 * When no restart IP must always kill or panic. 975 */ 976 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 977 kill_it = 1; 978 979 /* 980 * Go through all the banks in exclusion of the other CPUs. 981 * This way we don't report duplicated events on shared banks 982 * because the first one to see it will clear it. 983 */ 984 order = mce_start(&no_way_out); 985 for (i = 0; i < banks; i++) { 986 __clear_bit(i, toclear); 987 if (!mce_banks[i].ctl) 988 continue; 989 990 m.misc = 0; 991 m.addr = 0; 992 m.bank = i; 993 994 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 995 if ((m.status & MCI_STATUS_VAL) == 0) 996 continue; 997 998 /* 999 * Non uncorrected or non signaled errors are handled by 1000 * machine_check_poll. Leave them alone, unless this panics. 1001 */ 1002 if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && 1003 !no_way_out) 1004 continue; 1005 1006 /* 1007 * Set taint even when machine check was not enabled. 1008 */ 1009 add_taint(TAINT_MACHINE_CHECK); 1010 1011 severity = mce_severity(&m, tolerant, NULL); 1012 1013 /* 1014 * When machine check was for corrected handler don't touch, 1015 * unless we're panicing. 1016 */ 1017 if (severity == MCE_KEEP_SEVERITY && !no_way_out) 1018 continue; 1019 __set_bit(i, toclear); 1020 if (severity == MCE_NO_SEVERITY) { 1021 /* 1022 * Machine check event was not enabled. Clear, but 1023 * ignore. 1024 */ 1025 continue; 1026 } 1027 1028 /* 1029 * Kill on action required. 1030 */ 1031 if (severity == MCE_AR_SEVERITY) 1032 kill_it = 1; 1033 1034 if (m.status & MCI_STATUS_MISCV) 1035 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); 1036 if (m.status & MCI_STATUS_ADDRV) 1037 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); 1038 1039 /* 1040 * Action optional error. Queue address for later processing. 1041 * When the ring overflows we just ignore the AO error. 1042 * RED-PEN add some logging mechanism when 1043 * usable_address or mce_add_ring fails. 1044 * RED-PEN don't ignore overflow for tolerant == 0 1045 */ 1046 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) 1047 mce_ring_add(m.addr >> PAGE_SHIFT); 1048 1049 mce_log(&m); 1050 1051 if (severity > worst) { 1052 *final = m; 1053 worst = severity; 1054 } 1055 } 1056 1057 if (!no_way_out) 1058 mce_clear_state(toclear); 1059 1060 /* 1061 * Do most of the synchronization with other CPUs. 1062 * When there's any problem use only local no_way_out state. 1063 */ 1064 if (mce_end(order) < 0) 1065 no_way_out = worst >= MCE_PANIC_SEVERITY; 1066 1067 /* 1068 * If we have decided that we just CAN'T continue, and the user 1069 * has not set tolerant to an insane level, give up and die. 1070 * 1071 * This is mainly used in the case when the system doesn't 1072 * support MCE broadcasting or it has been disabled. 1073 */ 1074 if (no_way_out && tolerant < 3) 1075 mce_panic("Fatal machine check on current CPU", final, msg); 1076 1077 /* 1078 * If the error seems to be unrecoverable, something should be 1079 * done. Try to kill as little as possible. If we can kill just 1080 * one task, do that. If the user has set the tolerance very 1081 * high, don't try to do anything at all. 1082 */ 1083 1084 if (kill_it && tolerant < 3) 1085 force_sig(SIGBUS, current); 1086 1087 /* notify userspace ASAP */ 1088 set_thread_flag(TIF_MCE_NOTIFY); 1089 1090 if (worst > 0) 1091 mce_report_event(regs); 1092 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1093out: 1094 atomic_dec(&mce_entry); 1095 sync_core(); 1096} 1097EXPORT_SYMBOL_GPL(do_machine_check); 1098 1099/* dummy to break dependency. actual code is in mm/memory-failure.c */ 1100void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) 1101{ 1102 printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); 1103} 1104 1105/* 1106 * Called after mce notification in process context. This code 1107 * is allowed to sleep. Call the high level VM handler to process 1108 * any corrupted pages. 1109 * Assume that the work queue code only calls this one at a time 1110 * per CPU. 1111 * Note we don't disable preemption, so this code might run on the wrong 1112 * CPU. In this case the event is picked up by the scheduled work queue. 1113 * This is merely a fast path to expedite processing in some common 1114 * cases. 1115 */ 1116void mce_notify_process(void) 1117{ 1118 unsigned long pfn; 1119 mce_notify_irq(); 1120 while (mce_ring_get(&pfn)) 1121 memory_failure(pfn, MCE_VECTOR); 1122} 1123 1124static void mce_process_work(struct work_struct *dummy) 1125{ 1126 mce_notify_process(); 1127} 1128 1129#ifdef CONFIG_X86_MCE_INTEL 1130/*** 1131 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog 1132 * @cpu: The CPU on which the event occurred. 1133 * @status: Event status information 1134 * 1135 * This function should be called by the thermal interrupt after the 1136 * event has been processed and the decision was made to log the event 1137 * further. 1138 * 1139 * The status parameter will be saved to the 'status' field of 'struct mce' 1140 * and historically has been the register value of the 1141 * MSR_IA32_THERMAL_STATUS (Intel) msr. 1142 */ 1143void mce_log_therm_throt_event(__u64 status) 1144{ 1145 struct mce m; 1146 1147 mce_setup(&m); 1148 m.bank = MCE_THERMAL_BANK; 1149 m.status = status; 1150 mce_log(&m); 1151} 1152#endif /* CONFIG_X86_MCE_INTEL */ 1153 1154/* 1155 * Periodic polling timer for "silent" machine check errors. If the 1156 * poller finds an MCE, poll 2x faster. When the poller finds no more 1157 * errors, poll 2x slower (up to check_interval seconds). 1158 */ 1159static int check_interval = 5 * 60; /* 5 minutes */ 1160 1161static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ 1162static DEFINE_PER_CPU(struct timer_list, mce_timer); 1163 1164static void mce_start_timer(unsigned long data) 1165{ 1166 struct timer_list *t = &per_cpu(mce_timer, data); 1167 int *n; 1168 1169 WARN_ON(smp_processor_id() != data); 1170 1171 if (mce_available(__this_cpu_ptr(&cpu_info))) { 1172 machine_check_poll(MCP_TIMESTAMP, 1173 &__get_cpu_var(mce_poll_banks)); 1174 } 1175 1176 /* 1177 * Alert userspace if needed. If we logged an MCE, reduce the 1178 * polling interval, otherwise increase the polling interval. 1179 */ 1180 n = &__get_cpu_var(mce_next_interval); 1181 if (mce_notify_irq()) 1182 *n = max(*n/2, HZ/100); 1183 else 1184 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1185 1186 t->expires = jiffies + *n; 1187 add_timer_on(t, smp_processor_id()); 1188} 1189 1190/* Must not be called in IRQ context where del_timer_sync() can deadlock */ 1191static void mce_timer_delete_all(void) 1192{ 1193 int cpu; 1194 1195 for_each_online_cpu(cpu) 1196 del_timer_sync(&per_cpu(mce_timer, cpu)); 1197} 1198 1199static void mce_do_trigger(struct work_struct *work) 1200{ 1201 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 1202} 1203 1204static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 1205 1206/* 1207 * Notify the user(s) about new machine check events. 1208 * Can be called from interrupt context, but not from machine check/NMI 1209 * context. 1210 */ 1211int mce_notify_irq(void) 1212{ 1213 /* Not more than two messages every minute */ 1214 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); 1215 1216 clear_thread_flag(TIF_MCE_NOTIFY); 1217 1218 if (test_and_clear_bit(0, &mce_need_notify)) { 1219 /* wake processes polling /dev/mcelog */ 1220 wake_up_interruptible(&mce_chrdev_wait); 1221 1222 /* 1223 * There is no risk of missing notifications because 1224 * work_pending is always cleared before the function is 1225 * executed. 1226 */ 1227 if (mce_helper[0] && !work_pending(&mce_trigger_work)) 1228 schedule_work(&mce_trigger_work); 1229 1230 if (__ratelimit(&ratelimit)) 1231 pr_info(HW_ERR "Machine check events logged\n"); 1232 1233 return 1; 1234 } 1235 return 0; 1236} 1237EXPORT_SYMBOL_GPL(mce_notify_irq); 1238 1239static int __cpuinit __mcheck_cpu_mce_banks_init(void) 1240{ 1241 int i; 1242 1243 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); 1244 if (!mce_banks) 1245 return -ENOMEM; 1246 for (i = 0; i < banks; i++) { 1247 struct mce_bank *b = &mce_banks[i]; 1248 1249 b->ctl = -1ULL; 1250 b->init = 1; 1251 } 1252 return 0; 1253} 1254 1255/* 1256 * Initialize Machine Checks for a CPU. 1257 */ 1258static int __cpuinit __mcheck_cpu_cap_init(void) 1259{ 1260 unsigned b; 1261 u64 cap; 1262 1263 rdmsrl(MSR_IA32_MCG_CAP, cap); 1264 1265 b = cap & MCG_BANKCNT_MASK; 1266 if (!banks) 1267 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); 1268 1269 if (b > MAX_NR_BANKS) { 1270 printk(KERN_WARNING 1271 "MCE: Using only %u machine check banks out of %u\n", 1272 MAX_NR_BANKS, b); 1273 b = MAX_NR_BANKS; 1274 } 1275 1276 /* Don't support asymmetric configurations today */ 1277 WARN_ON(banks != 0 && b != banks); 1278 banks = b; 1279 if (!mce_banks) { 1280 int err = __mcheck_cpu_mce_banks_init(); 1281 1282 if (err) 1283 return err; 1284 } 1285 1286 /* Use accurate RIP reporting if available. */ 1287 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1288 rip_msr = MSR_IA32_MCG_EIP; 1289 1290 if (cap & MCG_SER_P) 1291 mce_ser = 1; 1292 1293 return 0; 1294} 1295 1296static void __mcheck_cpu_init_generic(void) 1297{ 1298 mce_banks_t all_banks; 1299 u64 cap; 1300 int i; 1301 1302 /* 1303 * Log the machine checks left over from the previous reset. 1304 */ 1305 bitmap_fill(all_banks, MAX_NR_BANKS); 1306 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); 1307 1308 set_in_cr4(X86_CR4_MCE); 1309 1310 rdmsrl(MSR_IA32_MCG_CAP, cap); 1311 if (cap & MCG_CTL_P) 1312 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1313 1314 for (i = 0; i < banks; i++) { 1315 struct mce_bank *b = &mce_banks[i]; 1316 1317 if (!b->init) 1318 continue; 1319 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1320 wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 1321 } 1322} 1323 1324/* Add per CPU specific workarounds here */ 1325static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1326{ 1327 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1328 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1329 return -EOPNOTSUPP; 1330 } 1331 1332 /* This should be disabled by the BIOS, but isn't always */ 1333 if (c->x86_vendor == X86_VENDOR_AMD) { 1334 if (c->x86 == 15 && banks > 4) { 1335 /* 1336 * disable GART TBL walk error reporting, which 1337 * trips off incorrectly with the IOMMU & 3ware 1338 * & Cerberus: 1339 */ 1340 clear_bit(10, (unsigned long *)&mce_banks[4].ctl); 1341 } 1342 if (c->x86 <= 17 && mce_bootlog < 0) { 1343 /* 1344 * Lots of broken BIOS around that don't clear them 1345 * by default and leave crap in there. Don't log: 1346 */ 1347 mce_bootlog = 0; 1348 } 1349 /* 1350 * Various K7s with broken bank 0 around. Always disable 1351 * by default. 1352 */ 1353 if (c->x86 == 6 && banks > 0) 1354 mce_banks[0].ctl = 0; 1355 } 1356 1357 if (c->x86_vendor == X86_VENDOR_INTEL) { 1358 /* 1359 * SDM documents that on family 6 bank 0 should not be written 1360 * because it aliases to another special BIOS controlled 1361 * register. 1362 * But it's not aliased anymore on model 0x1a+ 1363 * Don't ignore bank 0 completely because there could be a 1364 * valid event later, merely don't write CTL0. 1365 */ 1366 1367 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) 1368 mce_banks[0].init = 0; 1369 1370 /* 1371 * All newer Intel systems support MCE broadcasting. Enable 1372 * synchronization with a one second timeout. 1373 */ 1374 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && 1375 monarch_timeout < 0) 1376 monarch_timeout = USEC_PER_SEC; 1377 1378 /* 1379 * There are also broken BIOSes on some Pentium M and 1380 * earlier systems: 1381 */ 1382 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) 1383 mce_bootlog = 0; 1384 } 1385 if (monarch_timeout < 0) 1386 monarch_timeout = 0; 1387 if (mce_bootlog != 0) 1388 mce_panic_timeout = 30; 1389 1390 return 0; 1391} 1392 1393static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) 1394{ 1395 if (c->x86 != 5) 1396 return 0; 1397 1398 switch (c->x86_vendor) { 1399 case X86_VENDOR_INTEL: 1400 intel_p5_mcheck_init(c); 1401 return 1; 1402 break; 1403 case X86_VENDOR_CENTAUR: 1404 winchip_mcheck_init(c); 1405 return 1; 1406 break; 1407 } 1408 1409 return 0; 1410} 1411 1412static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) 1413{ 1414 switch (c->x86_vendor) { 1415 case X86_VENDOR_INTEL: 1416 mce_intel_feature_init(c); 1417 break; 1418 case X86_VENDOR_AMD: 1419 mce_amd_feature_init(c); 1420 break; 1421 default: 1422 break; 1423 } 1424} 1425 1426static void __mcheck_cpu_init_timer(void) 1427{ 1428 struct timer_list *t = &__get_cpu_var(mce_timer); 1429 int *n = &__get_cpu_var(mce_next_interval); 1430 1431 setup_timer(t, mce_start_timer, smp_processor_id()); 1432 1433 if (mce_ignore_ce) 1434 return; 1435 1436 *n = check_interval * HZ; 1437 if (!*n) 1438 return; 1439 t->expires = round_jiffies(jiffies + *n); 1440 add_timer_on(t, smp_processor_id()); 1441} 1442 1443/* Handle unconfigured int18 (should never happen) */ 1444static void unexpected_machine_check(struct pt_regs *regs, long error_code) 1445{ 1446 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", 1447 smp_processor_id()); 1448} 1449 1450/* Call the installed machine check handler for this CPU setup. */ 1451void (*machine_check_vector)(struct pt_regs *, long error_code) = 1452 unexpected_machine_check; 1453 1454/* 1455 * Called for each booted CPU to set up machine checks. 1456 * Must be called with preempt off: 1457 */ 1458void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) 1459{ 1460 if (mce_disabled) 1461 return; 1462 1463 if (__mcheck_cpu_ancient_init(c)) 1464 return; 1465 1466 if (!mce_available(c)) 1467 return; 1468 1469 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { 1470 mce_disabled = 1; 1471 return; 1472 } 1473 1474 machine_check_vector = do_machine_check; 1475 1476 __mcheck_cpu_init_generic(); 1477 __mcheck_cpu_init_vendor(c); 1478 __mcheck_cpu_init_timer(); 1479 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); 1480 init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); 1481} 1482 1483/* 1484 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. 1485 */ 1486 1487static DEFINE_SPINLOCK(mce_chrdev_state_lock); 1488static int mce_chrdev_open_count; /* #times opened */ 1489static int mce_chrdev_open_exclu; /* already open exclusive? */ 1490 1491static int mce_chrdev_open(struct inode *inode, struct file *file) 1492{ 1493 spin_lock(&mce_chrdev_state_lock); 1494 1495 if (mce_chrdev_open_exclu || 1496 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 1497 spin_unlock(&mce_chrdev_state_lock); 1498 1499 return -EBUSY; 1500 } 1501 1502 if (file->f_flags & O_EXCL) 1503 mce_chrdev_open_exclu = 1; 1504 mce_chrdev_open_count++; 1505 1506 spin_unlock(&mce_chrdev_state_lock); 1507 1508 return nonseekable_open(inode, file); 1509} 1510 1511static int mce_chrdev_release(struct inode *inode, struct file *file) 1512{ 1513 spin_lock(&mce_chrdev_state_lock); 1514 1515 mce_chrdev_open_count--; 1516 mce_chrdev_open_exclu = 0; 1517 1518 spin_unlock(&mce_chrdev_state_lock); 1519 1520 return 0; 1521} 1522 1523static void collect_tscs(void *data) 1524{ 1525 unsigned long *cpu_tsc = (unsigned long *)data; 1526 1527 rdtscll(cpu_tsc[smp_processor_id()]); 1528} 1529 1530static int mce_apei_read_done; 1531 1532/* Collect MCE record of previous boot in persistent storage via APEI ERST. */ 1533static int __mce_read_apei(char __user **ubuf, size_t usize) 1534{ 1535 int rc; 1536 u64 record_id; 1537 struct mce m; 1538 1539 if (usize < sizeof(struct mce)) 1540 return -EINVAL; 1541 1542 rc = apei_read_mce(&m, &record_id); 1543 /* Error or no more MCE record */ 1544 if (rc <= 0) { 1545 mce_apei_read_done = 1; 1546 return rc; 1547 } 1548 rc = -EFAULT; 1549 if (copy_to_user(*ubuf, &m, sizeof(struct mce))) 1550 return rc; 1551 /* 1552 * In fact, we should have cleared the record after that has 1553 * been flushed to the disk or sent to network in 1554 * /sbin/mcelog, but we have no interface to support that now, 1555 * so just clear it to avoid duplication. 1556 */ 1557 rc = apei_clear_mce(record_id); 1558 if (rc) { 1559 mce_apei_read_done = 1; 1560 return rc; 1561 } 1562 *ubuf += sizeof(struct mce); 1563 1564 return 0; 1565} 1566 1567static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, 1568 size_t usize, loff_t *off) 1569{ 1570 char __user *buf = ubuf; 1571 unsigned long *cpu_tsc; 1572 unsigned prev, next; 1573 int i, err; 1574 1575 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); 1576 if (!cpu_tsc) 1577 return -ENOMEM; 1578 1579 mutex_lock(&mce_chrdev_read_mutex); 1580 1581 if (!mce_apei_read_done) { 1582 err = __mce_read_apei(&buf, usize); 1583 if (err || buf != ubuf) 1584 goto out; 1585 } 1586 1587 next = rcu_dereference_check_mce(mcelog.next); 1588 1589 /* Only supports full reads right now */ 1590 err = -EINVAL; 1591 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) 1592 goto out; 1593 1594 err = 0; 1595 prev = 0; 1596 do { 1597 for (i = prev; i < next; i++) { 1598 unsigned long start = jiffies; 1599 struct mce *m = &mcelog.entry[i]; 1600 1601 while (!m->finished) { 1602 if (time_after_eq(jiffies, start + 2)) { 1603 memset(m, 0, sizeof(*m)); 1604 goto timeout; 1605 } 1606 cpu_relax(); 1607 } 1608 smp_rmb(); 1609 err |= copy_to_user(buf, m, sizeof(*m)); 1610 buf += sizeof(*m); 1611timeout: 1612 ; 1613 } 1614 1615 memset(mcelog.entry + prev, 0, 1616 (next - prev) * sizeof(struct mce)); 1617 prev = next; 1618 next = cmpxchg(&mcelog.next, prev, 0); 1619 } while (next != prev); 1620 1621 synchronize_sched(); 1622 1623 /* 1624 * Collect entries that were still getting written before the 1625 * synchronize. 1626 */ 1627 on_each_cpu(collect_tscs, cpu_tsc, 1); 1628 1629 for (i = next; i < MCE_LOG_LEN; i++) { 1630 struct mce *m = &mcelog.entry[i]; 1631 1632 if (m->finished && m->tsc < cpu_tsc[m->cpu]) { 1633 err |= copy_to_user(buf, m, sizeof(*m)); 1634 smp_rmb(); 1635 buf += sizeof(*m); 1636 memset(m, 0, sizeof(*m)); 1637 } 1638 } 1639 1640 if (err) 1641 err = -EFAULT; 1642 1643out: 1644 mutex_unlock(&mce_chrdev_read_mutex); 1645 kfree(cpu_tsc); 1646 1647 return err ? err : buf - ubuf; 1648} 1649 1650static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) 1651{ 1652 poll_wait(file, &mce_chrdev_wait, wait); 1653 if (rcu_access_index(mcelog.next)) 1654 return POLLIN | POLLRDNORM; 1655 if (!mce_apei_read_done && apei_check_mce()) 1656 return POLLIN | POLLRDNORM; 1657 return 0; 1658} 1659 1660static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, 1661 unsigned long arg) 1662{ 1663 int __user *p = (int __user *)arg; 1664 1665 if (!capable(CAP_SYS_ADMIN)) 1666 return -EPERM; 1667 1668 switch (cmd) { 1669 case MCE_GET_RECORD_LEN: 1670 return put_user(sizeof(struct mce), p); 1671 case MCE_GET_LOG_LEN: 1672 return put_user(MCE_LOG_LEN, p); 1673 case MCE_GETCLEAR_FLAGS: { 1674 unsigned flags; 1675 1676 do { 1677 flags = mcelog.flags; 1678 } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 1679 1680 return put_user(flags, p); 1681 } 1682 default: 1683 return -ENOTTY; 1684 } 1685} 1686 1687static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf, 1688 size_t usize, loff_t *off); 1689 1690void register_mce_write_callback(ssize_t (*fn)(struct file *filp, 1691 const char __user *ubuf, 1692 size_t usize, loff_t *off)) 1693{ 1694 mce_write = fn; 1695} 1696EXPORT_SYMBOL_GPL(register_mce_write_callback); 1697 1698ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, 1699 size_t usize, loff_t *off) 1700{ 1701 if (mce_write) 1702 return mce_write(filp, ubuf, usize, off); 1703 else 1704 return -EINVAL; 1705} 1706 1707static const struct file_operations mce_chrdev_ops = { 1708 .open = mce_chrdev_open, 1709 .release = mce_chrdev_release, 1710 .read = mce_chrdev_read, 1711 .write = mce_chrdev_write, 1712 .poll = mce_chrdev_poll, 1713 .unlocked_ioctl = mce_chrdev_ioctl, 1714 .llseek = no_llseek, 1715}; 1716 1717static struct miscdevice mce_chrdev_device = { 1718 MISC_MCELOG_MINOR, 1719 "mcelog", 1720 &mce_chrdev_ops, 1721}; 1722 1723/* 1724 * mce=off Disables machine check 1725 * mce=no_cmci Disables CMCI 1726 * mce=dont_log_ce Clears corrected events silently, no log created for CEs. 1727 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. 1728 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) 1729 * monarchtimeout is how long to wait for other CPUs on machine 1730 * check, or 0 to not wait 1731 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 1732 * mce=nobootlog Don't log MCEs from before booting. 1733 */ 1734static int __init mcheck_enable(char *str) 1735{ 1736 if (*str == 0) { 1737 enable_p5_mce(); 1738 return 1; 1739 } 1740 if (*str == '=') 1741 str++; 1742 if (!strcmp(str, "off")) 1743 mce_disabled = 1; 1744 else if (!strcmp(str, "no_cmci")) 1745 mce_cmci_disabled = 1; 1746 else if (!strcmp(str, "dont_log_ce")) 1747 mce_dont_log_ce = 1; 1748 else if (!strcmp(str, "ignore_ce")) 1749 mce_ignore_ce = 1; 1750 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1751 mce_bootlog = (str[0] == 'b'); 1752 else if (isdigit(str[0])) { 1753 get_option(&str, &tolerant); 1754 if (*str == ',') { 1755 ++str; 1756 get_option(&str, &monarch_timeout); 1757 } 1758 } else { 1759 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", 1760 str); 1761 return 0; 1762 } 1763 return 1; 1764} 1765__setup("mce", mcheck_enable); 1766 1767int __init mcheck_init(void) 1768{ 1769 mcheck_intel_therm_init(); 1770 1771 return 0; 1772} 1773 1774/* 1775 * mce_syscore: PM support 1776 */ 1777 1778/* 1779 * Disable machine checks on suspend and shutdown. We can't really handle 1780 * them later. 1781 */ 1782static int mce_disable_error_reporting(void) 1783{ 1784 int i; 1785 1786 for (i = 0; i < banks; i++) { 1787 struct mce_bank *b = &mce_banks[i]; 1788 1789 if (b->init) 1790 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1791 } 1792 return 0; 1793} 1794 1795static int mce_syscore_suspend(void) 1796{ 1797 return mce_disable_error_reporting(); 1798} 1799 1800static void mce_syscore_shutdown(void) 1801{ 1802 mce_disable_error_reporting(); 1803} 1804 1805/* 1806 * On resume clear all MCE state. Don't want to see leftovers from the BIOS. 1807 * Only one CPU is active at this time, the others get re-added later using 1808 * CPU hotplug: 1809 */ 1810static void mce_syscore_resume(void) 1811{ 1812 __mcheck_cpu_init_generic(); 1813 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); 1814} 1815 1816static struct syscore_ops mce_syscore_ops = { 1817 .suspend = mce_syscore_suspend, 1818 .shutdown = mce_syscore_shutdown, 1819 .resume = mce_syscore_resume, 1820}; 1821 1822/* 1823 * mce_sysdev: Sysfs support 1824 */ 1825 1826static void mce_cpu_restart(void *data) 1827{ 1828 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1829 return; 1830 __mcheck_cpu_init_generic(); 1831 __mcheck_cpu_init_timer(); 1832} 1833 1834/* Reinit MCEs after user configuration changes */ 1835static void mce_restart(void) 1836{ 1837 mce_timer_delete_all(); 1838 on_each_cpu(mce_cpu_restart, NULL, 1); 1839} 1840 1841/* Toggle features for corrected errors */ 1842static void mce_disable_cmci(void *data) 1843{ 1844 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1845 return; 1846 cmci_clear(); 1847} 1848 1849static void mce_enable_ce(void *all) 1850{ 1851 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1852 return; 1853 cmci_reenable(); 1854 cmci_recheck(); 1855 if (all) 1856 __mcheck_cpu_init_timer(); 1857} 1858 1859static struct sysdev_class mce_sysdev_class = { 1860 .name = "machinecheck", 1861}; 1862 1863DEFINE_PER_CPU(struct sys_device, mce_sysdev); 1864 1865__cpuinitdata 1866void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1867 1868static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) 1869{ 1870 return container_of(attr, struct mce_bank, attr); 1871} 1872 1873static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1874 char *buf) 1875{ 1876 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); 1877} 1878 1879static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1880 const char *buf, size_t size) 1881{ 1882 u64 new; 1883 1884 if (strict_strtoull(buf, 0, &new) < 0) 1885 return -EINVAL; 1886 1887 attr_to_bank(attr)->ctl = new; 1888 mce_restart(); 1889 1890 return size; 1891} 1892 1893static ssize_t 1894show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) 1895{ 1896 strcpy(buf, mce_helper); 1897 strcat(buf, "\n"); 1898 return strlen(mce_helper) + 1; 1899} 1900 1901static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, 1902 const char *buf, size_t siz) 1903{ 1904 char *p; 1905 1906 strncpy(mce_helper, buf, sizeof(mce_helper)); 1907 mce_helper[sizeof(mce_helper)-1] = 0; 1908 p = strchr(mce_helper, '\n'); 1909 1910 if (p) 1911 *p = 0; 1912 1913 return strlen(mce_helper) + !!p; 1914} 1915 1916static ssize_t set_ignore_ce(struct sys_device *s, 1917 struct sysdev_attribute *attr, 1918 const char *buf, size_t size) 1919{ 1920 u64 new; 1921 1922 if (strict_strtoull(buf, 0, &new) < 0) 1923 return -EINVAL; 1924 1925 if (mce_ignore_ce ^ !!new) { 1926 if (new) { 1927 /* disable ce features */ 1928 mce_timer_delete_all(); 1929 on_each_cpu(mce_disable_cmci, NULL, 1); 1930 mce_ignore_ce = 1; 1931 } else { 1932 /* enable ce features */ 1933 mce_ignore_ce = 0; 1934 on_each_cpu(mce_enable_ce, (void *)1, 1); 1935 } 1936 } 1937 return size; 1938} 1939 1940static ssize_t set_cmci_disabled(struct sys_device *s, 1941 struct sysdev_attribute *attr, 1942 const char *buf, size_t size) 1943{ 1944 u64 new; 1945 1946 if (strict_strtoull(buf, 0, &new) < 0) 1947 return -EINVAL; 1948 1949 if (mce_cmci_disabled ^ !!new) { 1950 if (new) { 1951 /* disable cmci */ 1952 on_each_cpu(mce_disable_cmci, NULL, 1); 1953 mce_cmci_disabled = 1; 1954 } else { 1955 /* enable cmci */ 1956 mce_cmci_disabled = 0; 1957 on_each_cpu(mce_enable_ce, NULL, 1); 1958 } 1959 } 1960 return size; 1961} 1962 1963static ssize_t store_int_with_restart(struct sys_device *s, 1964 struct sysdev_attribute *attr, 1965 const char *buf, size_t size) 1966{ 1967 ssize_t ret = sysdev_store_int(s, attr, buf, size); 1968 mce_restart(); 1969 return ret; 1970} 1971 1972static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); 1973static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 1974static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1975static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); 1976 1977static struct sysdev_ext_attribute attr_check_interval = { 1978 _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, 1979 store_int_with_restart), 1980 &check_interval 1981}; 1982 1983static struct sysdev_ext_attribute attr_ignore_ce = { 1984 _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), 1985 &mce_ignore_ce 1986}; 1987 1988static struct sysdev_ext_attribute attr_cmci_disabled = { 1989 _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), 1990 &mce_cmci_disabled 1991}; 1992 1993static struct sysdev_attribute *mce_sysdev_attrs[] = { 1994 &attr_tolerant.attr, 1995 &attr_check_interval.attr, 1996 &attr_trigger, 1997 &attr_monarch_timeout.attr, 1998 &attr_dont_log_ce.attr, 1999 &attr_ignore_ce.attr, 2000 &attr_cmci_disabled.attr, 2001 NULL 2002}; 2003 2004static cpumask_var_t mce_sysdev_initialized; 2005 2006/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ 2007static __cpuinit int mce_sysdev_create(unsigned int cpu) 2008{ 2009 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); 2010 int err; 2011 int i, j; 2012 2013 if (!mce_available(&boot_cpu_data)) 2014 return -EIO; 2015 2016 memset(&sysdev->kobj, 0, sizeof(struct kobject)); 2017 sysdev->id = cpu; 2018 sysdev->cls = &mce_sysdev_class; 2019 2020 err = sysdev_register(sysdev); 2021 if (err) 2022 return err; 2023 2024 for (i = 0; mce_sysdev_attrs[i]; i++) { 2025 err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]); 2026 if (err) 2027 goto error; 2028 } 2029 for (j = 0; j < banks; j++) { 2030 err = sysdev_create_file(sysdev, &mce_banks[j].attr); 2031 if (err) 2032 goto error2; 2033 } 2034 cpumask_set_cpu(cpu, mce_sysdev_initialized); 2035 2036 return 0; 2037error2: 2038 while (--j >= 0) 2039 sysdev_remove_file(sysdev, &mce_banks[j].attr); 2040error: 2041 while (--i >= 0) 2042 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); 2043 2044 sysdev_unregister(sysdev); 2045 2046 return err; 2047} 2048 2049static __cpuinit void mce_sysdev_remove(unsigned int cpu) 2050{ 2051 struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu); 2052 int i; 2053 2054 if (!cpumask_test_cpu(cpu, mce_sysdev_initialized)) 2055 return; 2056 2057 for (i = 0; mce_sysdev_attrs[i]; i++) 2058 sysdev_remove_file(sysdev, mce_sysdev_attrs[i]); 2059 2060 for (i = 0; i < banks; i++) 2061 sysdev_remove_file(sysdev, &mce_banks[i].attr); 2062 2063 sysdev_unregister(sysdev); 2064 cpumask_clear_cpu(cpu, mce_sysdev_initialized); 2065} 2066 2067/* Make sure there are no machine checks on offlined CPUs. */ 2068static void __cpuinit mce_disable_cpu(void *h) 2069{ 2070 unsigned long action = *(unsigned long *)h; 2071 int i; 2072 2073 if (!mce_available(__this_cpu_ptr(&cpu_info))) 2074 return; 2075 2076 if (!(action & CPU_TASKS_FROZEN)) 2077 cmci_clear(); 2078 for (i = 0; i < banks; i++) { 2079 struct mce_bank *b = &mce_banks[i]; 2080 2081 if (b->init) 2082 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 2083 } 2084} 2085 2086static void __cpuinit mce_reenable_cpu(void *h) 2087{ 2088 unsigned long action = *(unsigned long *)h; 2089 int i; 2090 2091 if (!mce_available(__this_cpu_ptr(&cpu_info))) 2092 return; 2093 2094 if (!(action & CPU_TASKS_FROZEN)) 2095 cmci_reenable(); 2096 for (i = 0; i < banks; i++) { 2097 struct mce_bank *b = &mce_banks[i]; 2098 2099 if (b->init) 2100 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 2101 } 2102} 2103 2104/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 2105static int __cpuinit 2106mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 2107{ 2108 unsigned int cpu = (unsigned long)hcpu; 2109 struct timer_list *t = &per_cpu(mce_timer, cpu); 2110 2111 switch (action) { 2112 case CPU_ONLINE: 2113 case CPU_ONLINE_FROZEN: 2114 mce_sysdev_create(cpu); 2115 if (threshold_cpu_callback) 2116 threshold_cpu_callback(action, cpu); 2117 break; 2118 case CPU_DEAD: 2119 case CPU_DEAD_FROZEN: 2120 if (threshold_cpu_callback) 2121 threshold_cpu_callback(action, cpu); 2122 mce_sysdev_remove(cpu); 2123 break; 2124 case CPU_DOWN_PREPARE: 2125 case CPU_DOWN_PREPARE_FROZEN: 2126 del_timer_sync(t); 2127 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 2128 break; 2129 case CPU_DOWN_FAILED: 2130 case CPU_DOWN_FAILED_FROZEN: 2131 if (!mce_ignore_ce && check_interval) { 2132 t->expires = round_jiffies(jiffies + 2133 __get_cpu_var(mce_next_interval)); 2134 add_timer_on(t, cpu); 2135 } 2136 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2137 break; 2138 case CPU_POST_DEAD: 2139 /* intentionally ignoring frozen here */ 2140 cmci_rediscover(cpu); 2141 break; 2142 } 2143 return NOTIFY_OK; 2144} 2145 2146static struct notifier_block mce_cpu_notifier __cpuinitdata = { 2147 .notifier_call = mce_cpu_callback, 2148}; 2149 2150static __init void mce_init_banks(void) 2151{ 2152 int i; 2153 2154 for (i = 0; i < banks; i++) { 2155 struct mce_bank *b = &mce_banks[i]; 2156 struct sysdev_attribute *a = &b->attr; 2157 2158 sysfs_attr_init(&a->attr); 2159 a->attr.name = b->attrname; 2160 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2161 2162 a->attr.mode = 0644; 2163 a->show = show_bank; 2164 a->store = set_bank; 2165 } 2166} 2167 2168static __init int mcheck_init_device(void) 2169{ 2170 int err; 2171 int i = 0; 2172 2173 if (!mce_available(&boot_cpu_data)) 2174 return -EIO; 2175 2176 zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL); 2177 2178 mce_init_banks(); 2179 2180 err = sysdev_class_register(&mce_sysdev_class); 2181 if (err) 2182 return err; 2183 2184 for_each_online_cpu(i) { 2185 err = mce_sysdev_create(i); 2186 if (err) 2187 return err; 2188 } 2189 2190 register_syscore_ops(&mce_syscore_ops); 2191 register_hotcpu_notifier(&mce_cpu_notifier); 2192 2193 /* register character device /dev/mcelog */ 2194 misc_register(&mce_chrdev_device); 2195 2196 return err; 2197} 2198device_initcall(mcheck_init_device); 2199 2200/* 2201 * Old style boot options parsing. Only for compatibility. 2202 */ 2203static int __init mcheck_disable(char *str) 2204{ 2205 mce_disabled = 1; 2206 return 1; 2207} 2208__setup("nomce", mcheck_disable); 2209 2210#ifdef CONFIG_DEBUG_FS 2211struct dentry *mce_get_debugfs_dir(void) 2212{ 2213 static struct dentry *dmce; 2214 2215 if (!dmce) 2216 dmce = debugfs_create_dir("mce", NULL); 2217 2218 return dmce; 2219} 2220 2221static void mce_reset(void) 2222{ 2223 cpu_missing = 0; 2224 atomic_set(&mce_fake_paniced, 0); 2225 atomic_set(&mce_executing, 0); 2226 atomic_set(&mce_callin, 0); 2227 atomic_set(&global_nwo, 0); 2228} 2229 2230static int fake_panic_get(void *data, u64 *val) 2231{ 2232 *val = fake_panic; 2233 return 0; 2234} 2235 2236static int fake_panic_set(void *data, u64 val) 2237{ 2238 mce_reset(); 2239 fake_panic = val; 2240 return 0; 2241} 2242 2243DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, 2244 fake_panic_set, "%llu\n"); 2245 2246static int __init mcheck_debugfs_init(void) 2247{ 2248 struct dentry *dmce, *ffake_panic; 2249 2250 dmce = mce_get_debugfs_dir(); 2251 if (!dmce) 2252 return -ENOMEM; 2253 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, 2254 &fake_panic_fops); 2255 if (!ffake_panic) 2256 return -ENOMEM; 2257 2258 return 0; 2259} 2260late_initcall(mcheck_debugfs_init); 2261#endif 2262