mce.c revision fadd85f16a8ec3fee8af599e79a209682dc52348
1/* 2 * Machine check handler. 3 * 4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 5 * Rest from unknown author(s). 6 * 2004 Andi Kleen. Rewrote most of it. 7 * Copyright 2008 Intel Corporation 8 * Author: Andi Kleen 9 */ 10#include <linux/thread_info.h> 11#include <linux/capability.h> 12#include <linux/miscdevice.h> 13#include <linux/ratelimit.h> 14#include <linux/kallsyms.h> 15#include <linux/rcupdate.h> 16#include <linux/kobject.h> 17#include <linux/uaccess.h> 18#include <linux/kdebug.h> 19#include <linux/kernel.h> 20#include <linux/percpu.h> 21#include <linux/string.h> 22#include <linux/device.h> 23#include <linux/syscore_ops.h> 24#include <linux/delay.h> 25#include <linux/ctype.h> 26#include <linux/sched.h> 27#include <linux/sysfs.h> 28#include <linux/types.h> 29#include <linux/slab.h> 30#include <linux/init.h> 31#include <linux/kmod.h> 32#include <linux/poll.h> 33#include <linux/nmi.h> 34#include <linux/cpu.h> 35#include <linux/smp.h> 36#include <linux/fs.h> 37#include <linux/mm.h> 38#include <linux/debugfs.h> 39#include <linux/irq_work.h> 40#include <linux/export.h> 41 42#include <asm/processor.h> 43#include <asm/mce.h> 44#include <asm/msr.h> 45 46#include "mce-internal.h" 47 48static DEFINE_MUTEX(mce_chrdev_read_mutex); 49 50#define rcu_dereference_check_mce(p) \ 51 rcu_dereference_index_check((p), \ 52 rcu_read_lock_sched_held() || \ 53 lockdep_is_held(&mce_chrdev_read_mutex)) 54 55#define CREATE_TRACE_POINTS 56#include <trace/events/mce.h> 57 58int mce_disabled __read_mostly; 59 60#define MISC_MCELOG_MINOR 227 61 62#define SPINUNIT 100 /* 100ns */ 63 64atomic_t mce_entry; 65 66DEFINE_PER_CPU(unsigned, mce_exception_count); 67 68/* 69 * Tolerant levels: 70 * 0: always panic on uncorrected errors, log corrected errors 71 * 1: panic or SIGBUS on uncorrected errors, log corrected errors 72 * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors 73 * 3: never panic or SIGBUS, log all errors (for testing only) 74 */ 75static int tolerant __read_mostly = 1; 76static int banks __read_mostly; 77static int rip_msr __read_mostly; 78static int mce_bootlog __read_mostly = -1; 79static int monarch_timeout __read_mostly = -1; 80static int mce_panic_timeout __read_mostly; 81static int mce_dont_log_ce __read_mostly; 82int mce_cmci_disabled __read_mostly; 83int mce_ignore_ce __read_mostly; 84int mce_ser __read_mostly; 85 86struct mce_bank *mce_banks __read_mostly; 87 88/* User mode helper program triggered by machine check event */ 89static unsigned long mce_need_notify; 90static char mce_helper[128]; 91static char *mce_helper_argv[2] = { mce_helper, NULL }; 92 93static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); 94 95static DEFINE_PER_CPU(struct mce, mces_seen); 96static int cpu_missing; 97 98/* MCA banks polled by the period polling timer for corrected events */ 99DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 100 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 101}; 102 103static DEFINE_PER_CPU(struct work_struct, mce_work); 104 105/* 106 * CPU/chipset specific EDAC code can register a notifier call here to print 107 * MCE errors in a human-readable form. 108 */ 109ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 110 111/* Do initial initialization of a struct mce */ 112void mce_setup(struct mce *m) 113{ 114 memset(m, 0, sizeof(struct mce)); 115 m->cpu = m->extcpu = smp_processor_id(); 116 rdtscll(m->tsc); 117 /* We hope get_seconds stays lockless */ 118 m->time = get_seconds(); 119 m->cpuvendor = boot_cpu_data.x86_vendor; 120 m->cpuid = cpuid_eax(1); 121 m->socketid = cpu_data(m->extcpu).phys_proc_id; 122 m->apicid = cpu_data(m->extcpu).initial_apicid; 123 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); 124} 125 126DEFINE_PER_CPU(struct mce, injectm); 127EXPORT_PER_CPU_SYMBOL_GPL(injectm); 128 129/* 130 * Lockless MCE logging infrastructure. 131 * This avoids deadlocks on printk locks without having to break locks. Also 132 * separate MCEs from kernel messages to avoid bogus bug reports. 133 */ 134 135static struct mce_log mcelog = { 136 .signature = MCE_LOG_SIGNATURE, 137 .len = MCE_LOG_LEN, 138 .recordlen = sizeof(struct mce), 139}; 140 141void mce_log(struct mce *mce) 142{ 143 unsigned next, entry; 144 int ret = 0; 145 146 /* Emit the trace record: */ 147 trace_mce_record(mce); 148 149 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); 150 if (ret == NOTIFY_STOP) 151 return; 152 153 mce->finished = 0; 154 wmb(); 155 for (;;) { 156 entry = rcu_dereference_check_mce(mcelog.next); 157 for (;;) { 158 159 /* 160 * When the buffer fills up discard new entries. 161 * Assume that the earlier errors are the more 162 * interesting ones: 163 */ 164 if (entry >= MCE_LOG_LEN) { 165 set_bit(MCE_OVERFLOW, 166 (unsigned long *)&mcelog.flags); 167 return; 168 } 169 /* Old left over entry. Skip: */ 170 if (mcelog.entry[entry].finished) { 171 entry++; 172 continue; 173 } 174 break; 175 } 176 smp_rmb(); 177 next = entry + 1; 178 if (cmpxchg(&mcelog.next, entry, next) == entry) 179 break; 180 } 181 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 182 wmb(); 183 mcelog.entry[entry].finished = 1; 184 wmb(); 185 186 mce->finished = 1; 187 set_bit(0, &mce_need_notify); 188} 189 190static void drain_mcelog_buffer(void) 191{ 192 unsigned int next, i, prev = 0; 193 194 next = rcu_dereference_check_mce(mcelog.next); 195 196 do { 197 struct mce *m; 198 199 /* drain what was logged during boot */ 200 for (i = prev; i < next; i++) { 201 unsigned long start = jiffies; 202 unsigned retries = 1; 203 204 m = &mcelog.entry[i]; 205 206 while (!m->finished) { 207 if (time_after_eq(jiffies, start + 2*retries)) 208 retries++; 209 210 cpu_relax(); 211 212 if (!m->finished && retries >= 4) { 213 pr_err("MCE: skipping error being logged currently!\n"); 214 break; 215 } 216 } 217 smp_rmb(); 218 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 219 } 220 221 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); 222 prev = next; 223 next = cmpxchg(&mcelog.next, prev, 0); 224 } while (next != prev); 225} 226 227 228void mce_register_decode_chain(struct notifier_block *nb) 229{ 230 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); 231 drain_mcelog_buffer(); 232} 233EXPORT_SYMBOL_GPL(mce_register_decode_chain); 234 235void mce_unregister_decode_chain(struct notifier_block *nb) 236{ 237 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); 238} 239EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); 240 241static void print_mce(struct mce *m) 242{ 243 int ret = 0; 244 245 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", 246 m->extcpu, m->mcgstatus, m->bank, m->status); 247 248 if (m->ip) { 249 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", 250 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 251 m->cs, m->ip); 252 253 if (m->cs == __KERNEL_CS) 254 print_symbol("{%s}", m->ip); 255 pr_cont("\n"); 256 } 257 258 pr_emerg(HW_ERR "TSC %llx ", m->tsc); 259 if (m->addr) 260 pr_cont("ADDR %llx ", m->addr); 261 if (m->misc) 262 pr_cont("MISC %llx ", m->misc); 263 264 pr_cont("\n"); 265 /* 266 * Note this output is parsed by external tools and old fields 267 * should not be changed. 268 */ 269 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", 270 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, 271 cpu_data(m->extcpu).microcode); 272 273 /* 274 * Print out human-readable details about the MCE error, 275 * (if the CPU has an implementation for that) 276 */ 277 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 278 if (ret == NOTIFY_STOP) 279 return; 280 281 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); 282} 283 284#define PANIC_TIMEOUT 5 /* 5 seconds */ 285 286static atomic_t mce_paniced; 287 288static int fake_panic; 289static atomic_t mce_fake_paniced; 290 291/* Panic in progress. Enable interrupts and wait for final IPI */ 292static void wait_for_panic(void) 293{ 294 long timeout = PANIC_TIMEOUT*USEC_PER_SEC; 295 296 preempt_disable(); 297 local_irq_enable(); 298 while (timeout-- > 0) 299 udelay(1); 300 if (panic_timeout == 0) 301 panic_timeout = mce_panic_timeout; 302 panic("Panicing machine check CPU died"); 303} 304 305static void mce_panic(char *msg, struct mce *final, char *exp) 306{ 307 int i, apei_err = 0; 308 309 if (!fake_panic) { 310 /* 311 * Make sure only one CPU runs in machine check panic 312 */ 313 if (atomic_inc_return(&mce_paniced) > 1) 314 wait_for_panic(); 315 barrier(); 316 317 bust_spinlocks(1); 318 console_verbose(); 319 } else { 320 /* Don't log too much for fake panic */ 321 if (atomic_inc_return(&mce_fake_paniced) > 1) 322 return; 323 } 324 /* First print corrected ones that are still unlogged */ 325 for (i = 0; i < MCE_LOG_LEN; i++) { 326 struct mce *m = &mcelog.entry[i]; 327 if (!(m->status & MCI_STATUS_VAL)) 328 continue; 329 if (!(m->status & MCI_STATUS_UC)) { 330 print_mce(m); 331 if (!apei_err) 332 apei_err = apei_write_mce(m); 333 } 334 } 335 /* Now print uncorrected but with the final one last */ 336 for (i = 0; i < MCE_LOG_LEN; i++) { 337 struct mce *m = &mcelog.entry[i]; 338 if (!(m->status & MCI_STATUS_VAL)) 339 continue; 340 if (!(m->status & MCI_STATUS_UC)) 341 continue; 342 if (!final || memcmp(m, final, sizeof(struct mce))) { 343 print_mce(m); 344 if (!apei_err) 345 apei_err = apei_write_mce(m); 346 } 347 } 348 if (final) { 349 print_mce(final); 350 if (!apei_err) 351 apei_err = apei_write_mce(final); 352 } 353 if (cpu_missing) 354 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n"); 355 if (exp) 356 pr_emerg(HW_ERR "Machine check: %s\n", exp); 357 if (!fake_panic) { 358 if (panic_timeout == 0) 359 panic_timeout = mce_panic_timeout; 360 panic(msg); 361 } else 362 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); 363} 364 365/* Support code for software error injection */ 366 367static int msr_to_offset(u32 msr) 368{ 369 unsigned bank = __this_cpu_read(injectm.bank); 370 371 if (msr == rip_msr) 372 return offsetof(struct mce, ip); 373 if (msr == MSR_IA32_MCx_STATUS(bank)) 374 return offsetof(struct mce, status); 375 if (msr == MSR_IA32_MCx_ADDR(bank)) 376 return offsetof(struct mce, addr); 377 if (msr == MSR_IA32_MCx_MISC(bank)) 378 return offsetof(struct mce, misc); 379 if (msr == MSR_IA32_MCG_STATUS) 380 return offsetof(struct mce, mcgstatus); 381 return -1; 382} 383 384/* MSR access wrappers used for error injection */ 385static u64 mce_rdmsrl(u32 msr) 386{ 387 u64 v; 388 389 if (__this_cpu_read(injectm.finished)) { 390 int offset = msr_to_offset(msr); 391 392 if (offset < 0) 393 return 0; 394 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); 395 } 396 397 if (rdmsrl_safe(msr, &v)) { 398 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); 399 /* 400 * Return zero in case the access faulted. This should 401 * not happen normally but can happen if the CPU does 402 * something weird, or if the code is buggy. 403 */ 404 v = 0; 405 } 406 407 return v; 408} 409 410static void mce_wrmsrl(u32 msr, u64 v) 411{ 412 if (__this_cpu_read(injectm.finished)) { 413 int offset = msr_to_offset(msr); 414 415 if (offset >= 0) 416 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; 417 return; 418 } 419 wrmsrl(msr, v); 420} 421 422/* 423 * Collect all global (w.r.t. this processor) status about this machine 424 * check into our "mce" struct so that we can use it later to assess 425 * the severity of the problem as we read per-bank specific details. 426 */ 427static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) 428{ 429 mce_setup(m); 430 431 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 432 if (regs) { 433 /* 434 * Get the address of the instruction at the time of 435 * the machine check error. 436 */ 437 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) { 438 m->ip = regs->ip; 439 m->cs = regs->cs; 440 } 441 /* Use accurate RIP reporting if available. */ 442 if (rip_msr) 443 m->ip = mce_rdmsrl(rip_msr); 444 } 445} 446 447/* 448 * Simple lockless ring to communicate PFNs from the exception handler with the 449 * process context work function. This is vastly simplified because there's 450 * only a single reader and a single writer. 451 */ 452#define MCE_RING_SIZE 16 /* we use one entry less */ 453 454struct mce_ring { 455 unsigned short start; 456 unsigned short end; 457 unsigned long ring[MCE_RING_SIZE]; 458}; 459static DEFINE_PER_CPU(struct mce_ring, mce_ring); 460 461/* Runs with CPU affinity in workqueue */ 462static int mce_ring_empty(void) 463{ 464 struct mce_ring *r = &__get_cpu_var(mce_ring); 465 466 return r->start == r->end; 467} 468 469static int mce_ring_get(unsigned long *pfn) 470{ 471 struct mce_ring *r; 472 int ret = 0; 473 474 *pfn = 0; 475 get_cpu(); 476 r = &__get_cpu_var(mce_ring); 477 if (r->start == r->end) 478 goto out; 479 *pfn = r->ring[r->start]; 480 r->start = (r->start + 1) % MCE_RING_SIZE; 481 ret = 1; 482out: 483 put_cpu(); 484 return ret; 485} 486 487/* Always runs in MCE context with preempt off */ 488static int mce_ring_add(unsigned long pfn) 489{ 490 struct mce_ring *r = &__get_cpu_var(mce_ring); 491 unsigned next; 492 493 next = (r->end + 1) % MCE_RING_SIZE; 494 if (next == r->start) 495 return -1; 496 r->ring[r->end] = pfn; 497 wmb(); 498 r->end = next; 499 return 0; 500} 501 502int mce_available(struct cpuinfo_x86 *c) 503{ 504 if (mce_disabled) 505 return 0; 506 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); 507} 508 509static void mce_schedule_work(void) 510{ 511 if (!mce_ring_empty()) { 512 struct work_struct *work = &__get_cpu_var(mce_work); 513 if (!work_pending(work)) 514 schedule_work(work); 515 } 516} 517 518DEFINE_PER_CPU(struct irq_work, mce_irq_work); 519 520static void mce_irq_work_cb(struct irq_work *entry) 521{ 522 mce_notify_irq(); 523 mce_schedule_work(); 524} 525 526static void mce_report_event(struct pt_regs *regs) 527{ 528 if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { 529 mce_notify_irq(); 530 /* 531 * Triggering the work queue here is just an insurance 532 * policy in case the syscall exit notify handler 533 * doesn't run soon enough or ends up running on the 534 * wrong CPU (can happen when audit sleeps) 535 */ 536 mce_schedule_work(); 537 return; 538 } 539 540 irq_work_queue(&__get_cpu_var(mce_irq_work)); 541} 542 543DEFINE_PER_CPU(unsigned, mce_poll_count); 544 545/* 546 * Poll for corrected events or events that happened before reset. 547 * Those are just logged through /dev/mcelog. 548 * 549 * This is executed in standard interrupt context. 550 * 551 * Note: spec recommends to panic for fatal unsignalled 552 * errors here. However this would be quite problematic -- 553 * we would need to reimplement the Monarch handling and 554 * it would mess up the exclusion between exception handler 555 * and poll hander -- * so we skip this for now. 556 * These cases should not happen anyways, or only when the CPU 557 * is already totally * confused. In this case it's likely it will 558 * not fully execute the machine check handler either. 559 */ 560void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) 561{ 562 struct mce m; 563 int i; 564 565 percpu_inc(mce_poll_count); 566 567 mce_gather_info(&m, NULL); 568 569 for (i = 0; i < banks; i++) { 570 if (!mce_banks[i].ctl || !test_bit(i, *b)) 571 continue; 572 573 m.misc = 0; 574 m.addr = 0; 575 m.bank = i; 576 m.tsc = 0; 577 578 barrier(); 579 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 580 if (!(m.status & MCI_STATUS_VAL)) 581 continue; 582 583 /* 584 * Uncorrected or signalled events are handled by the exception 585 * handler when it is enabled, so don't process those here. 586 * 587 * TBD do the same check for MCI_STATUS_EN here? 588 */ 589 if (!(flags & MCP_UC) && 590 (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) 591 continue; 592 593 if (m.status & MCI_STATUS_MISCV) 594 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); 595 if (m.status & MCI_STATUS_ADDRV) 596 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); 597 598 if (!(flags & MCP_TIMESTAMP)) 599 m.tsc = 0; 600 /* 601 * Don't get the IP here because it's unlikely to 602 * have anything to do with the actual error location. 603 */ 604 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) 605 mce_log(&m); 606 607 /* 608 * Clear state for this bank. 609 */ 610 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 611 } 612 613 /* 614 * Don't clear MCG_STATUS here because it's only defined for 615 * exceptions. 616 */ 617 618 sync_core(); 619} 620EXPORT_SYMBOL_GPL(machine_check_poll); 621 622/* 623 * Do a quick check if any of the events requires a panic. 624 * This decides if we keep the events around or clear them. 625 */ 626static int mce_no_way_out(struct mce *m, char **msg) 627{ 628 int i; 629 630 for (i = 0; i < banks; i++) { 631 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 632 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 633 return 1; 634 } 635 return 0; 636} 637 638/* 639 * Variable to establish order between CPUs while scanning. 640 * Each CPU spins initially until executing is equal its number. 641 */ 642static atomic_t mce_executing; 643 644/* 645 * Defines order of CPUs on entry. First CPU becomes Monarch. 646 */ 647static atomic_t mce_callin; 648 649/* 650 * Check if a timeout waiting for other CPUs happened. 651 */ 652static int mce_timed_out(u64 *t) 653{ 654 /* 655 * The others already did panic for some reason. 656 * Bail out like in a timeout. 657 * rmb() to tell the compiler that system_state 658 * might have been modified by someone else. 659 */ 660 rmb(); 661 if (atomic_read(&mce_paniced)) 662 wait_for_panic(); 663 if (!monarch_timeout) 664 goto out; 665 if ((s64)*t < SPINUNIT) { 666 /* CHECKME: Make panic default for 1 too? */ 667 if (tolerant < 1) 668 mce_panic("Timeout synchronizing machine check over CPUs", 669 NULL, NULL); 670 cpu_missing = 1; 671 return 1; 672 } 673 *t -= SPINUNIT; 674out: 675 touch_nmi_watchdog(); 676 return 0; 677} 678 679/* 680 * The Monarch's reign. The Monarch is the CPU who entered 681 * the machine check handler first. It waits for the others to 682 * raise the exception too and then grades them. When any 683 * error is fatal panic. Only then let the others continue. 684 * 685 * The other CPUs entering the MCE handler will be controlled by the 686 * Monarch. They are called Subjects. 687 * 688 * This way we prevent any potential data corruption in a unrecoverable case 689 * and also makes sure always all CPU's errors are examined. 690 * 691 * Also this detects the case of a machine check event coming from outer 692 * space (not detected by any CPUs) In this case some external agent wants 693 * us to shut down, so panic too. 694 * 695 * The other CPUs might still decide to panic if the handler happens 696 * in a unrecoverable place, but in this case the system is in a semi-stable 697 * state and won't corrupt anything by itself. It's ok to let the others 698 * continue for a bit first. 699 * 700 * All the spin loops have timeouts; when a timeout happens a CPU 701 * typically elects itself to be Monarch. 702 */ 703static void mce_reign(void) 704{ 705 int cpu; 706 struct mce *m = NULL; 707 int global_worst = 0; 708 char *msg = NULL; 709 char *nmsg = NULL; 710 711 /* 712 * This CPU is the Monarch and the other CPUs have run 713 * through their handlers. 714 * Grade the severity of the errors of all the CPUs. 715 */ 716 for_each_possible_cpu(cpu) { 717 int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, 718 &nmsg); 719 if (severity > global_worst) { 720 msg = nmsg; 721 global_worst = severity; 722 m = &per_cpu(mces_seen, cpu); 723 } 724 } 725 726 /* 727 * Cannot recover? Panic here then. 728 * This dumps all the mces in the log buffer and stops the 729 * other CPUs. 730 */ 731 if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) 732 mce_panic("Fatal Machine check", m, msg); 733 734 /* 735 * For UC somewhere we let the CPU who detects it handle it. 736 * Also must let continue the others, otherwise the handling 737 * CPU could deadlock on a lock. 738 */ 739 740 /* 741 * No machine check event found. Must be some external 742 * source or one CPU is hung. Panic. 743 */ 744 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) 745 mce_panic("Machine check from unknown source", NULL, NULL); 746 747 /* 748 * Now clear all the mces_seen so that they don't reappear on 749 * the next mce. 750 */ 751 for_each_possible_cpu(cpu) 752 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); 753} 754 755static atomic_t global_nwo; 756 757/* 758 * Start of Monarch synchronization. This waits until all CPUs have 759 * entered the exception handler and then determines if any of them 760 * saw a fatal event that requires panic. Then it executes them 761 * in the entry order. 762 * TBD double check parallel CPU hotunplug 763 */ 764static int mce_start(int *no_way_out) 765{ 766 int order; 767 int cpus = num_online_cpus(); 768 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 769 770 if (!timeout) 771 return -1; 772 773 atomic_add(*no_way_out, &global_nwo); 774 /* 775 * global_nwo should be updated before mce_callin 776 */ 777 smp_wmb(); 778 order = atomic_inc_return(&mce_callin); 779 780 /* 781 * Wait for everyone. 782 */ 783 while (atomic_read(&mce_callin) != cpus) { 784 if (mce_timed_out(&timeout)) { 785 atomic_set(&global_nwo, 0); 786 return -1; 787 } 788 ndelay(SPINUNIT); 789 } 790 791 /* 792 * mce_callin should be read before global_nwo 793 */ 794 smp_rmb(); 795 796 if (order == 1) { 797 /* 798 * Monarch: Starts executing now, the others wait. 799 */ 800 atomic_set(&mce_executing, 1); 801 } else { 802 /* 803 * Subject: Now start the scanning loop one by one in 804 * the original callin order. 805 * This way when there are any shared banks it will be 806 * only seen by one CPU before cleared, avoiding duplicates. 807 */ 808 while (atomic_read(&mce_executing) < order) { 809 if (mce_timed_out(&timeout)) { 810 atomic_set(&global_nwo, 0); 811 return -1; 812 } 813 ndelay(SPINUNIT); 814 } 815 } 816 817 /* 818 * Cache the global no_way_out state. 819 */ 820 *no_way_out = atomic_read(&global_nwo); 821 822 return order; 823} 824 825/* 826 * Synchronize between CPUs after main scanning loop. 827 * This invokes the bulk of the Monarch processing. 828 */ 829static int mce_end(int order) 830{ 831 int ret = -1; 832 u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; 833 834 if (!timeout) 835 goto reset; 836 if (order < 0) 837 goto reset; 838 839 /* 840 * Allow others to run. 841 */ 842 atomic_inc(&mce_executing); 843 844 if (order == 1) { 845 /* CHECKME: Can this race with a parallel hotplug? */ 846 int cpus = num_online_cpus(); 847 848 /* 849 * Monarch: Wait for everyone to go through their scanning 850 * loops. 851 */ 852 while (atomic_read(&mce_executing) <= cpus) { 853 if (mce_timed_out(&timeout)) 854 goto reset; 855 ndelay(SPINUNIT); 856 } 857 858 mce_reign(); 859 barrier(); 860 ret = 0; 861 } else { 862 /* 863 * Subject: Wait for Monarch to finish. 864 */ 865 while (atomic_read(&mce_executing) != 0) { 866 if (mce_timed_out(&timeout)) 867 goto reset; 868 ndelay(SPINUNIT); 869 } 870 871 /* 872 * Don't reset anything. That's done by the Monarch. 873 */ 874 return 0; 875 } 876 877 /* 878 * Reset all global state. 879 */ 880reset: 881 atomic_set(&global_nwo, 0); 882 atomic_set(&mce_callin, 0); 883 barrier(); 884 885 /* 886 * Let others run again. 887 */ 888 atomic_set(&mce_executing, 0); 889 return ret; 890} 891 892/* 893 * Check if the address reported by the CPU is in a format we can parse. 894 * It would be possible to add code for most other cases, but all would 895 * be somewhat complicated (e.g. segment offset would require an instruction 896 * parser). So only support physical addresses up to page granuality for now. 897 */ 898static int mce_usable_address(struct mce *m) 899{ 900 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) 901 return 0; 902 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT) 903 return 0; 904 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS) 905 return 0; 906 return 1; 907} 908 909static void mce_clear_state(unsigned long *toclear) 910{ 911 int i; 912 913 for (i = 0; i < banks; i++) { 914 if (test_bit(i, toclear)) 915 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 916 } 917} 918 919/* 920 * The actual machine check handler. This only handles real 921 * exceptions when something got corrupted coming in through int 18. 922 * 923 * This is executed in NMI context not subject to normal locking rules. This 924 * implies that most kernel services cannot be safely used. Don't even 925 * think about putting a printk in there! 926 * 927 * On Intel systems this is entered on all CPUs in parallel through 928 * MCE broadcast. However some CPUs might be broken beyond repair, 929 * so be always careful when synchronizing with others. 930 */ 931void do_machine_check(struct pt_regs *regs, long error_code) 932{ 933 struct mce m, *final; 934 int i; 935 int worst = 0; 936 int severity; 937 /* 938 * Establish sequential order between the CPUs entering the machine 939 * check handler. 940 */ 941 int order; 942 /* 943 * If no_way_out gets set, there is no safe way to recover from this 944 * MCE. If tolerant is cranked up, we'll try anyway. 945 */ 946 int no_way_out = 0; 947 /* 948 * If kill_it gets set, there might be a way to recover from this 949 * error. 950 */ 951 int kill_it = 0; 952 DECLARE_BITMAP(toclear, MAX_NR_BANKS); 953 char *msg = "Unknown"; 954 955 atomic_inc(&mce_entry); 956 957 percpu_inc(mce_exception_count); 958 959 if (!banks) 960 goto out; 961 962 mce_gather_info(&m, regs); 963 964 final = &__get_cpu_var(mces_seen); 965 *final = m; 966 967 no_way_out = mce_no_way_out(&m, &msg); 968 969 barrier(); 970 971 /* 972 * When no restart IP must always kill or panic. 973 */ 974 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 975 kill_it = 1; 976 977 /* 978 * Go through all the banks in exclusion of the other CPUs. 979 * This way we don't report duplicated events on shared banks 980 * because the first one to see it will clear it. 981 */ 982 order = mce_start(&no_way_out); 983 for (i = 0; i < banks; i++) { 984 __clear_bit(i, toclear); 985 if (!mce_banks[i].ctl) 986 continue; 987 988 m.misc = 0; 989 m.addr = 0; 990 m.bank = i; 991 992 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); 993 if ((m.status & MCI_STATUS_VAL) == 0) 994 continue; 995 996 /* 997 * Non uncorrected or non signaled errors are handled by 998 * machine_check_poll. Leave them alone, unless this panics. 999 */ 1000 if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && 1001 !no_way_out) 1002 continue; 1003 1004 /* 1005 * Set taint even when machine check was not enabled. 1006 */ 1007 add_taint(TAINT_MACHINE_CHECK); 1008 1009 severity = mce_severity(&m, tolerant, NULL); 1010 1011 /* 1012 * When machine check was for corrected handler don't touch, 1013 * unless we're panicing. 1014 */ 1015 if (severity == MCE_KEEP_SEVERITY && !no_way_out) 1016 continue; 1017 __set_bit(i, toclear); 1018 if (severity == MCE_NO_SEVERITY) { 1019 /* 1020 * Machine check event was not enabled. Clear, but 1021 * ignore. 1022 */ 1023 continue; 1024 } 1025 1026 /* 1027 * Kill on action required. 1028 */ 1029 if (severity == MCE_AR_SEVERITY) 1030 kill_it = 1; 1031 1032 if (m.status & MCI_STATUS_MISCV) 1033 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); 1034 if (m.status & MCI_STATUS_ADDRV) 1035 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); 1036 1037 /* 1038 * Action optional error. Queue address for later processing. 1039 * When the ring overflows we just ignore the AO error. 1040 * RED-PEN add some logging mechanism when 1041 * usable_address or mce_add_ring fails. 1042 * RED-PEN don't ignore overflow for tolerant == 0 1043 */ 1044 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) 1045 mce_ring_add(m.addr >> PAGE_SHIFT); 1046 1047 mce_log(&m); 1048 1049 if (severity > worst) { 1050 *final = m; 1051 worst = severity; 1052 } 1053 } 1054 1055 if (!no_way_out) 1056 mce_clear_state(toclear); 1057 1058 /* 1059 * Do most of the synchronization with other CPUs. 1060 * When there's any problem use only local no_way_out state. 1061 */ 1062 if (mce_end(order) < 0) 1063 no_way_out = worst >= MCE_PANIC_SEVERITY; 1064 1065 /* 1066 * If we have decided that we just CAN'T continue, and the user 1067 * has not set tolerant to an insane level, give up and die. 1068 * 1069 * This is mainly used in the case when the system doesn't 1070 * support MCE broadcasting or it has been disabled. 1071 */ 1072 if (no_way_out && tolerant < 3) 1073 mce_panic("Fatal machine check on current CPU", final, msg); 1074 1075 /* 1076 * If the error seems to be unrecoverable, something should be 1077 * done. Try to kill as little as possible. If we can kill just 1078 * one task, do that. If the user has set the tolerance very 1079 * high, don't try to do anything at all. 1080 */ 1081 1082 if (kill_it && tolerant < 3) 1083 force_sig(SIGBUS, current); 1084 1085 /* notify userspace ASAP */ 1086 set_thread_flag(TIF_MCE_NOTIFY); 1087 1088 if (worst > 0) 1089 mce_report_event(regs); 1090 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); 1091out: 1092 atomic_dec(&mce_entry); 1093 sync_core(); 1094} 1095EXPORT_SYMBOL_GPL(do_machine_check); 1096 1097/* dummy to break dependency. actual code is in mm/memory-failure.c */ 1098void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) 1099{ 1100 printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); 1101} 1102 1103/* 1104 * Called after mce notification in process context. This code 1105 * is allowed to sleep. Call the high level VM handler to process 1106 * any corrupted pages. 1107 * Assume that the work queue code only calls this one at a time 1108 * per CPU. 1109 * Note we don't disable preemption, so this code might run on the wrong 1110 * CPU. In this case the event is picked up by the scheduled work queue. 1111 * This is merely a fast path to expedite processing in some common 1112 * cases. 1113 */ 1114void mce_notify_process(void) 1115{ 1116 unsigned long pfn; 1117 mce_notify_irq(); 1118 while (mce_ring_get(&pfn)) 1119 memory_failure(pfn, MCE_VECTOR); 1120} 1121 1122static void mce_process_work(struct work_struct *dummy) 1123{ 1124 mce_notify_process(); 1125} 1126 1127#ifdef CONFIG_X86_MCE_INTEL 1128/*** 1129 * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog 1130 * @cpu: The CPU on which the event occurred. 1131 * @status: Event status information 1132 * 1133 * This function should be called by the thermal interrupt after the 1134 * event has been processed and the decision was made to log the event 1135 * further. 1136 * 1137 * The status parameter will be saved to the 'status' field of 'struct mce' 1138 * and historically has been the register value of the 1139 * MSR_IA32_THERMAL_STATUS (Intel) msr. 1140 */ 1141void mce_log_therm_throt_event(__u64 status) 1142{ 1143 struct mce m; 1144 1145 mce_setup(&m); 1146 m.bank = MCE_THERMAL_BANK; 1147 m.status = status; 1148 mce_log(&m); 1149} 1150#endif /* CONFIG_X86_MCE_INTEL */ 1151 1152/* 1153 * Periodic polling timer for "silent" machine check errors. If the 1154 * poller finds an MCE, poll 2x faster. When the poller finds no more 1155 * errors, poll 2x slower (up to check_interval seconds). 1156 */ 1157static int check_interval = 5 * 60; /* 5 minutes */ 1158 1159static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ 1160static DEFINE_PER_CPU(struct timer_list, mce_timer); 1161 1162static void mce_start_timer(unsigned long data) 1163{ 1164 struct timer_list *t = &per_cpu(mce_timer, data); 1165 int *n; 1166 1167 WARN_ON(smp_processor_id() != data); 1168 1169 if (mce_available(__this_cpu_ptr(&cpu_info))) { 1170 machine_check_poll(MCP_TIMESTAMP, 1171 &__get_cpu_var(mce_poll_banks)); 1172 } 1173 1174 /* 1175 * Alert userspace if needed. If we logged an MCE, reduce the 1176 * polling interval, otherwise increase the polling interval. 1177 */ 1178 n = &__get_cpu_var(mce_next_interval); 1179 if (mce_notify_irq()) 1180 *n = max(*n/2, HZ/100); 1181 else 1182 *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); 1183 1184 t->expires = jiffies + *n; 1185 add_timer_on(t, smp_processor_id()); 1186} 1187 1188/* Must not be called in IRQ context where del_timer_sync() can deadlock */ 1189static void mce_timer_delete_all(void) 1190{ 1191 int cpu; 1192 1193 for_each_online_cpu(cpu) 1194 del_timer_sync(&per_cpu(mce_timer, cpu)); 1195} 1196 1197static void mce_do_trigger(struct work_struct *work) 1198{ 1199 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 1200} 1201 1202static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 1203 1204/* 1205 * Notify the user(s) about new machine check events. 1206 * Can be called from interrupt context, but not from machine check/NMI 1207 * context. 1208 */ 1209int mce_notify_irq(void) 1210{ 1211 /* Not more than two messages every minute */ 1212 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); 1213 1214 clear_thread_flag(TIF_MCE_NOTIFY); 1215 1216 if (test_and_clear_bit(0, &mce_need_notify)) { 1217 /* wake processes polling /dev/mcelog */ 1218 wake_up_interruptible(&mce_chrdev_wait); 1219 1220 /* 1221 * There is no risk of missing notifications because 1222 * work_pending is always cleared before the function is 1223 * executed. 1224 */ 1225 if (mce_helper[0] && !work_pending(&mce_trigger_work)) 1226 schedule_work(&mce_trigger_work); 1227 1228 if (__ratelimit(&ratelimit)) 1229 pr_info(HW_ERR "Machine check events logged\n"); 1230 1231 return 1; 1232 } 1233 return 0; 1234} 1235EXPORT_SYMBOL_GPL(mce_notify_irq); 1236 1237static int __cpuinit __mcheck_cpu_mce_banks_init(void) 1238{ 1239 int i; 1240 1241 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); 1242 if (!mce_banks) 1243 return -ENOMEM; 1244 for (i = 0; i < banks; i++) { 1245 struct mce_bank *b = &mce_banks[i]; 1246 1247 b->ctl = -1ULL; 1248 b->init = 1; 1249 } 1250 return 0; 1251} 1252 1253/* 1254 * Initialize Machine Checks for a CPU. 1255 */ 1256static int __cpuinit __mcheck_cpu_cap_init(void) 1257{ 1258 unsigned b; 1259 u64 cap; 1260 1261 rdmsrl(MSR_IA32_MCG_CAP, cap); 1262 1263 b = cap & MCG_BANKCNT_MASK; 1264 if (!banks) 1265 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); 1266 1267 if (b > MAX_NR_BANKS) { 1268 printk(KERN_WARNING 1269 "MCE: Using only %u machine check banks out of %u\n", 1270 MAX_NR_BANKS, b); 1271 b = MAX_NR_BANKS; 1272 } 1273 1274 /* Don't support asymmetric configurations today */ 1275 WARN_ON(banks != 0 && b != banks); 1276 banks = b; 1277 if (!mce_banks) { 1278 int err = __mcheck_cpu_mce_banks_init(); 1279 1280 if (err) 1281 return err; 1282 } 1283 1284 /* Use accurate RIP reporting if available. */ 1285 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) 1286 rip_msr = MSR_IA32_MCG_EIP; 1287 1288 if (cap & MCG_SER_P) 1289 mce_ser = 1; 1290 1291 return 0; 1292} 1293 1294static void __mcheck_cpu_init_generic(void) 1295{ 1296 mce_banks_t all_banks; 1297 u64 cap; 1298 int i; 1299 1300 /* 1301 * Log the machine checks left over from the previous reset. 1302 */ 1303 bitmap_fill(all_banks, MAX_NR_BANKS); 1304 machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); 1305 1306 set_in_cr4(X86_CR4_MCE); 1307 1308 rdmsrl(MSR_IA32_MCG_CAP, cap); 1309 if (cap & MCG_CTL_P) 1310 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1311 1312 for (i = 0; i < banks; i++) { 1313 struct mce_bank *b = &mce_banks[i]; 1314 1315 if (!b->init) 1316 continue; 1317 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 1318 wrmsrl(MSR_IA32_MCx_STATUS(i), 0); 1319 } 1320} 1321 1322/* Add per CPU specific workarounds here */ 1323static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) 1324{ 1325 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1326 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1327 return -EOPNOTSUPP; 1328 } 1329 1330 /* This should be disabled by the BIOS, but isn't always */ 1331 if (c->x86_vendor == X86_VENDOR_AMD) { 1332 if (c->x86 == 15 && banks > 4) { 1333 /* 1334 * disable GART TBL walk error reporting, which 1335 * trips off incorrectly with the IOMMU & 3ware 1336 * & Cerberus: 1337 */ 1338 clear_bit(10, (unsigned long *)&mce_banks[4].ctl); 1339 } 1340 if (c->x86 <= 17 && mce_bootlog < 0) { 1341 /* 1342 * Lots of broken BIOS around that don't clear them 1343 * by default and leave crap in there. Don't log: 1344 */ 1345 mce_bootlog = 0; 1346 } 1347 /* 1348 * Various K7s with broken bank 0 around. Always disable 1349 * by default. 1350 */ 1351 if (c->x86 == 6 && banks > 0) 1352 mce_banks[0].ctl = 0; 1353 } 1354 1355 if (c->x86_vendor == X86_VENDOR_INTEL) { 1356 /* 1357 * SDM documents that on family 6 bank 0 should not be written 1358 * because it aliases to another special BIOS controlled 1359 * register. 1360 * But it's not aliased anymore on model 0x1a+ 1361 * Don't ignore bank 0 completely because there could be a 1362 * valid event later, merely don't write CTL0. 1363 */ 1364 1365 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) 1366 mce_banks[0].init = 0; 1367 1368 /* 1369 * All newer Intel systems support MCE broadcasting. Enable 1370 * synchronization with a one second timeout. 1371 */ 1372 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && 1373 monarch_timeout < 0) 1374 monarch_timeout = USEC_PER_SEC; 1375 1376 /* 1377 * There are also broken BIOSes on some Pentium M and 1378 * earlier systems: 1379 */ 1380 if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) 1381 mce_bootlog = 0; 1382 } 1383 if (monarch_timeout < 0) 1384 monarch_timeout = 0; 1385 if (mce_bootlog != 0) 1386 mce_panic_timeout = 30; 1387 1388 return 0; 1389} 1390 1391static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) 1392{ 1393 if (c->x86 != 5) 1394 return 0; 1395 1396 switch (c->x86_vendor) { 1397 case X86_VENDOR_INTEL: 1398 intel_p5_mcheck_init(c); 1399 return 1; 1400 break; 1401 case X86_VENDOR_CENTAUR: 1402 winchip_mcheck_init(c); 1403 return 1; 1404 break; 1405 } 1406 1407 return 0; 1408} 1409 1410static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) 1411{ 1412 switch (c->x86_vendor) { 1413 case X86_VENDOR_INTEL: 1414 mce_intel_feature_init(c); 1415 break; 1416 case X86_VENDOR_AMD: 1417 mce_amd_feature_init(c); 1418 break; 1419 default: 1420 break; 1421 } 1422} 1423 1424static void __mcheck_cpu_init_timer(void) 1425{ 1426 struct timer_list *t = &__get_cpu_var(mce_timer); 1427 int *n = &__get_cpu_var(mce_next_interval); 1428 1429 setup_timer(t, mce_start_timer, smp_processor_id()); 1430 1431 if (mce_ignore_ce) 1432 return; 1433 1434 *n = check_interval * HZ; 1435 if (!*n) 1436 return; 1437 t->expires = round_jiffies(jiffies + *n); 1438 add_timer_on(t, smp_processor_id()); 1439} 1440 1441/* Handle unconfigured int18 (should never happen) */ 1442static void unexpected_machine_check(struct pt_regs *regs, long error_code) 1443{ 1444 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", 1445 smp_processor_id()); 1446} 1447 1448/* Call the installed machine check handler for this CPU setup. */ 1449void (*machine_check_vector)(struct pt_regs *, long error_code) = 1450 unexpected_machine_check; 1451 1452/* 1453 * Called for each booted CPU to set up machine checks. 1454 * Must be called with preempt off: 1455 */ 1456void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) 1457{ 1458 if (mce_disabled) 1459 return; 1460 1461 if (__mcheck_cpu_ancient_init(c)) 1462 return; 1463 1464 if (!mce_available(c)) 1465 return; 1466 1467 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { 1468 mce_disabled = 1; 1469 return; 1470 } 1471 1472 machine_check_vector = do_machine_check; 1473 1474 __mcheck_cpu_init_generic(); 1475 __mcheck_cpu_init_vendor(c); 1476 __mcheck_cpu_init_timer(); 1477 INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); 1478 init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb); 1479} 1480 1481/* 1482 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. 1483 */ 1484 1485static DEFINE_SPINLOCK(mce_chrdev_state_lock); 1486static int mce_chrdev_open_count; /* #times opened */ 1487static int mce_chrdev_open_exclu; /* already open exclusive? */ 1488 1489static int mce_chrdev_open(struct inode *inode, struct file *file) 1490{ 1491 spin_lock(&mce_chrdev_state_lock); 1492 1493 if (mce_chrdev_open_exclu || 1494 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 1495 spin_unlock(&mce_chrdev_state_lock); 1496 1497 return -EBUSY; 1498 } 1499 1500 if (file->f_flags & O_EXCL) 1501 mce_chrdev_open_exclu = 1; 1502 mce_chrdev_open_count++; 1503 1504 spin_unlock(&mce_chrdev_state_lock); 1505 1506 return nonseekable_open(inode, file); 1507} 1508 1509static int mce_chrdev_release(struct inode *inode, struct file *file) 1510{ 1511 spin_lock(&mce_chrdev_state_lock); 1512 1513 mce_chrdev_open_count--; 1514 mce_chrdev_open_exclu = 0; 1515 1516 spin_unlock(&mce_chrdev_state_lock); 1517 1518 return 0; 1519} 1520 1521static void collect_tscs(void *data) 1522{ 1523 unsigned long *cpu_tsc = (unsigned long *)data; 1524 1525 rdtscll(cpu_tsc[smp_processor_id()]); 1526} 1527 1528static int mce_apei_read_done; 1529 1530/* Collect MCE record of previous boot in persistent storage via APEI ERST. */ 1531static int __mce_read_apei(char __user **ubuf, size_t usize) 1532{ 1533 int rc; 1534 u64 record_id; 1535 struct mce m; 1536 1537 if (usize < sizeof(struct mce)) 1538 return -EINVAL; 1539 1540 rc = apei_read_mce(&m, &record_id); 1541 /* Error or no more MCE record */ 1542 if (rc <= 0) { 1543 mce_apei_read_done = 1; 1544 /* 1545 * When ERST is disabled, mce_chrdev_read() should return 1546 * "no record" instead of "no device." 1547 */ 1548 if (rc == -ENODEV) 1549 return 0; 1550 return rc; 1551 } 1552 rc = -EFAULT; 1553 if (copy_to_user(*ubuf, &m, sizeof(struct mce))) 1554 return rc; 1555 /* 1556 * In fact, we should have cleared the record after that has 1557 * been flushed to the disk or sent to network in 1558 * /sbin/mcelog, but we have no interface to support that now, 1559 * so just clear it to avoid duplication. 1560 */ 1561 rc = apei_clear_mce(record_id); 1562 if (rc) { 1563 mce_apei_read_done = 1; 1564 return rc; 1565 } 1566 *ubuf += sizeof(struct mce); 1567 1568 return 0; 1569} 1570 1571static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, 1572 size_t usize, loff_t *off) 1573{ 1574 char __user *buf = ubuf; 1575 unsigned long *cpu_tsc; 1576 unsigned prev, next; 1577 int i, err; 1578 1579 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); 1580 if (!cpu_tsc) 1581 return -ENOMEM; 1582 1583 mutex_lock(&mce_chrdev_read_mutex); 1584 1585 if (!mce_apei_read_done) { 1586 err = __mce_read_apei(&buf, usize); 1587 if (err || buf != ubuf) 1588 goto out; 1589 } 1590 1591 next = rcu_dereference_check_mce(mcelog.next); 1592 1593 /* Only supports full reads right now */ 1594 err = -EINVAL; 1595 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) 1596 goto out; 1597 1598 err = 0; 1599 prev = 0; 1600 do { 1601 for (i = prev; i < next; i++) { 1602 unsigned long start = jiffies; 1603 struct mce *m = &mcelog.entry[i]; 1604 1605 while (!m->finished) { 1606 if (time_after_eq(jiffies, start + 2)) { 1607 memset(m, 0, sizeof(*m)); 1608 goto timeout; 1609 } 1610 cpu_relax(); 1611 } 1612 smp_rmb(); 1613 err |= copy_to_user(buf, m, sizeof(*m)); 1614 buf += sizeof(*m); 1615timeout: 1616 ; 1617 } 1618 1619 memset(mcelog.entry + prev, 0, 1620 (next - prev) * sizeof(struct mce)); 1621 prev = next; 1622 next = cmpxchg(&mcelog.next, prev, 0); 1623 } while (next != prev); 1624 1625 synchronize_sched(); 1626 1627 /* 1628 * Collect entries that were still getting written before the 1629 * synchronize. 1630 */ 1631 on_each_cpu(collect_tscs, cpu_tsc, 1); 1632 1633 for (i = next; i < MCE_LOG_LEN; i++) { 1634 struct mce *m = &mcelog.entry[i]; 1635 1636 if (m->finished && m->tsc < cpu_tsc[m->cpu]) { 1637 err |= copy_to_user(buf, m, sizeof(*m)); 1638 smp_rmb(); 1639 buf += sizeof(*m); 1640 memset(m, 0, sizeof(*m)); 1641 } 1642 } 1643 1644 if (err) 1645 err = -EFAULT; 1646 1647out: 1648 mutex_unlock(&mce_chrdev_read_mutex); 1649 kfree(cpu_tsc); 1650 1651 return err ? err : buf - ubuf; 1652} 1653 1654static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait) 1655{ 1656 poll_wait(file, &mce_chrdev_wait, wait); 1657 if (rcu_access_index(mcelog.next)) 1658 return POLLIN | POLLRDNORM; 1659 if (!mce_apei_read_done && apei_check_mce()) 1660 return POLLIN | POLLRDNORM; 1661 return 0; 1662} 1663 1664static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, 1665 unsigned long arg) 1666{ 1667 int __user *p = (int __user *)arg; 1668 1669 if (!capable(CAP_SYS_ADMIN)) 1670 return -EPERM; 1671 1672 switch (cmd) { 1673 case MCE_GET_RECORD_LEN: 1674 return put_user(sizeof(struct mce), p); 1675 case MCE_GET_LOG_LEN: 1676 return put_user(MCE_LOG_LEN, p); 1677 case MCE_GETCLEAR_FLAGS: { 1678 unsigned flags; 1679 1680 do { 1681 flags = mcelog.flags; 1682 } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 1683 1684 return put_user(flags, p); 1685 } 1686 default: 1687 return -ENOTTY; 1688 } 1689} 1690 1691static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf, 1692 size_t usize, loff_t *off); 1693 1694void register_mce_write_callback(ssize_t (*fn)(struct file *filp, 1695 const char __user *ubuf, 1696 size_t usize, loff_t *off)) 1697{ 1698 mce_write = fn; 1699} 1700EXPORT_SYMBOL_GPL(register_mce_write_callback); 1701 1702ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, 1703 size_t usize, loff_t *off) 1704{ 1705 if (mce_write) 1706 return mce_write(filp, ubuf, usize, off); 1707 else 1708 return -EINVAL; 1709} 1710 1711static const struct file_operations mce_chrdev_ops = { 1712 .open = mce_chrdev_open, 1713 .release = mce_chrdev_release, 1714 .read = mce_chrdev_read, 1715 .write = mce_chrdev_write, 1716 .poll = mce_chrdev_poll, 1717 .unlocked_ioctl = mce_chrdev_ioctl, 1718 .llseek = no_llseek, 1719}; 1720 1721static struct miscdevice mce_chrdev_device = { 1722 MISC_MCELOG_MINOR, 1723 "mcelog", 1724 &mce_chrdev_ops, 1725}; 1726 1727/* 1728 * mce=off Disables machine check 1729 * mce=no_cmci Disables CMCI 1730 * mce=dont_log_ce Clears corrected events silently, no log created for CEs. 1731 * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. 1732 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) 1733 * monarchtimeout is how long to wait for other CPUs on machine 1734 * check, or 0 to not wait 1735 * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. 1736 * mce=nobootlog Don't log MCEs from before booting. 1737 */ 1738static int __init mcheck_enable(char *str) 1739{ 1740 if (*str == 0) { 1741 enable_p5_mce(); 1742 return 1; 1743 } 1744 if (*str == '=') 1745 str++; 1746 if (!strcmp(str, "off")) 1747 mce_disabled = 1; 1748 else if (!strcmp(str, "no_cmci")) 1749 mce_cmci_disabled = 1; 1750 else if (!strcmp(str, "dont_log_ce")) 1751 mce_dont_log_ce = 1; 1752 else if (!strcmp(str, "ignore_ce")) 1753 mce_ignore_ce = 1; 1754 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) 1755 mce_bootlog = (str[0] == 'b'); 1756 else if (isdigit(str[0])) { 1757 get_option(&str, &tolerant); 1758 if (*str == ',') { 1759 ++str; 1760 get_option(&str, &monarch_timeout); 1761 } 1762 } else { 1763 printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", 1764 str); 1765 return 0; 1766 } 1767 return 1; 1768} 1769__setup("mce", mcheck_enable); 1770 1771int __init mcheck_init(void) 1772{ 1773 mcheck_intel_therm_init(); 1774 1775 return 0; 1776} 1777 1778/* 1779 * mce_syscore: PM support 1780 */ 1781 1782/* 1783 * Disable machine checks on suspend and shutdown. We can't really handle 1784 * them later. 1785 */ 1786static int mce_disable_error_reporting(void) 1787{ 1788 int i; 1789 1790 for (i = 0; i < banks; i++) { 1791 struct mce_bank *b = &mce_banks[i]; 1792 1793 if (b->init) 1794 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 1795 } 1796 return 0; 1797} 1798 1799static int mce_syscore_suspend(void) 1800{ 1801 return mce_disable_error_reporting(); 1802} 1803 1804static void mce_syscore_shutdown(void) 1805{ 1806 mce_disable_error_reporting(); 1807} 1808 1809/* 1810 * On resume clear all MCE state. Don't want to see leftovers from the BIOS. 1811 * Only one CPU is active at this time, the others get re-added later using 1812 * CPU hotplug: 1813 */ 1814static void mce_syscore_resume(void) 1815{ 1816 __mcheck_cpu_init_generic(); 1817 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info)); 1818} 1819 1820static struct syscore_ops mce_syscore_ops = { 1821 .suspend = mce_syscore_suspend, 1822 .shutdown = mce_syscore_shutdown, 1823 .resume = mce_syscore_resume, 1824}; 1825 1826/* 1827 * mce_device: Sysfs support 1828 */ 1829 1830static void mce_cpu_restart(void *data) 1831{ 1832 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1833 return; 1834 __mcheck_cpu_init_generic(); 1835 __mcheck_cpu_init_timer(); 1836} 1837 1838/* Reinit MCEs after user configuration changes */ 1839static void mce_restart(void) 1840{ 1841 mce_timer_delete_all(); 1842 on_each_cpu(mce_cpu_restart, NULL, 1); 1843} 1844 1845/* Toggle features for corrected errors */ 1846static void mce_disable_cmci(void *data) 1847{ 1848 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1849 return; 1850 cmci_clear(); 1851} 1852 1853static void mce_enable_ce(void *all) 1854{ 1855 if (!mce_available(__this_cpu_ptr(&cpu_info))) 1856 return; 1857 cmci_reenable(); 1858 cmci_recheck(); 1859 if (all) 1860 __mcheck_cpu_init_timer(); 1861} 1862 1863static struct bus_type mce_subsys = { 1864 .name = "machinecheck", 1865 .dev_name = "machinecheck", 1866}; 1867 1868DEFINE_PER_CPU(struct device *, mce_device); 1869 1870__cpuinitdata 1871void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1872 1873static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) 1874{ 1875 return container_of(attr, struct mce_bank, attr); 1876} 1877 1878static ssize_t show_bank(struct device *s, struct device_attribute *attr, 1879 char *buf) 1880{ 1881 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); 1882} 1883 1884static ssize_t set_bank(struct device *s, struct device_attribute *attr, 1885 const char *buf, size_t size) 1886{ 1887 u64 new; 1888 1889 if (strict_strtoull(buf, 0, &new) < 0) 1890 return -EINVAL; 1891 1892 attr_to_bank(attr)->ctl = new; 1893 mce_restart(); 1894 1895 return size; 1896} 1897 1898static ssize_t 1899show_trigger(struct device *s, struct device_attribute *attr, char *buf) 1900{ 1901 strcpy(buf, mce_helper); 1902 strcat(buf, "\n"); 1903 return strlen(mce_helper) + 1; 1904} 1905 1906static ssize_t set_trigger(struct device *s, struct device_attribute *attr, 1907 const char *buf, size_t siz) 1908{ 1909 char *p; 1910 1911 strncpy(mce_helper, buf, sizeof(mce_helper)); 1912 mce_helper[sizeof(mce_helper)-1] = 0; 1913 p = strchr(mce_helper, '\n'); 1914 1915 if (p) 1916 *p = 0; 1917 1918 return strlen(mce_helper) + !!p; 1919} 1920 1921static ssize_t set_ignore_ce(struct device *s, 1922 struct device_attribute *attr, 1923 const char *buf, size_t size) 1924{ 1925 u64 new; 1926 1927 if (strict_strtoull(buf, 0, &new) < 0) 1928 return -EINVAL; 1929 1930 if (mce_ignore_ce ^ !!new) { 1931 if (new) { 1932 /* disable ce features */ 1933 mce_timer_delete_all(); 1934 on_each_cpu(mce_disable_cmci, NULL, 1); 1935 mce_ignore_ce = 1; 1936 } else { 1937 /* enable ce features */ 1938 mce_ignore_ce = 0; 1939 on_each_cpu(mce_enable_ce, (void *)1, 1); 1940 } 1941 } 1942 return size; 1943} 1944 1945static ssize_t set_cmci_disabled(struct device *s, 1946 struct device_attribute *attr, 1947 const char *buf, size_t size) 1948{ 1949 u64 new; 1950 1951 if (strict_strtoull(buf, 0, &new) < 0) 1952 return -EINVAL; 1953 1954 if (mce_cmci_disabled ^ !!new) { 1955 if (new) { 1956 /* disable cmci */ 1957 on_each_cpu(mce_disable_cmci, NULL, 1); 1958 mce_cmci_disabled = 1; 1959 } else { 1960 /* enable cmci */ 1961 mce_cmci_disabled = 0; 1962 on_each_cpu(mce_enable_ce, NULL, 1); 1963 } 1964 } 1965 return size; 1966} 1967 1968static ssize_t store_int_with_restart(struct device *s, 1969 struct device_attribute *attr, 1970 const char *buf, size_t size) 1971{ 1972 ssize_t ret = device_store_int(s, attr, buf, size); 1973 mce_restart(); 1974 return ret; 1975} 1976 1977static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); 1978static DEVICE_INT_ATTR(tolerant, 0644, tolerant); 1979static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout); 1980static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); 1981 1982static struct dev_ext_attribute dev_attr_check_interval = { 1983 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart), 1984 &check_interval 1985}; 1986 1987static struct dev_ext_attribute dev_attr_ignore_ce = { 1988 __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce), 1989 &mce_ignore_ce 1990}; 1991 1992static struct dev_ext_attribute dev_attr_cmci_disabled = { 1993 __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled), 1994 &mce_cmci_disabled 1995}; 1996 1997static struct device_attribute *mce_device_attrs[] = { 1998 &dev_attr_tolerant.attr, 1999 &dev_attr_check_interval.attr, 2000 &dev_attr_trigger, 2001 &dev_attr_monarch_timeout.attr, 2002 &dev_attr_dont_log_ce.attr, 2003 &dev_attr_ignore_ce.attr, 2004 &dev_attr_cmci_disabled.attr, 2005 NULL 2006}; 2007 2008static cpumask_var_t mce_device_initialized; 2009 2010static void mce_device_release(struct device *dev) 2011{ 2012 kfree(dev); 2013} 2014 2015/* Per cpu device init. All of the cpus still share the same ctrl bank: */ 2016static __cpuinit int mce_device_create(unsigned int cpu) 2017{ 2018 struct device *dev; 2019 int err; 2020 int i, j; 2021 2022 if (!mce_available(&boot_cpu_data)) 2023 return -EIO; 2024 2025 dev = kzalloc(sizeof *dev, GFP_KERNEL); 2026 if (!dev) 2027 return -ENOMEM; 2028 dev->id = cpu; 2029 dev->bus = &mce_subsys; 2030 dev->release = &mce_device_release; 2031 2032 err = device_register(dev); 2033 if (err) 2034 return err; 2035 2036 for (i = 0; mce_device_attrs[i]; i++) { 2037 err = device_create_file(dev, mce_device_attrs[i]); 2038 if (err) 2039 goto error; 2040 } 2041 for (j = 0; j < banks; j++) { 2042 err = device_create_file(dev, &mce_banks[j].attr); 2043 if (err) 2044 goto error2; 2045 } 2046 cpumask_set_cpu(cpu, mce_device_initialized); 2047 per_cpu(mce_device, cpu) = dev; 2048 2049 return 0; 2050error2: 2051 while (--j >= 0) 2052 device_remove_file(dev, &mce_banks[j].attr); 2053error: 2054 while (--i >= 0) 2055 device_remove_file(dev, mce_device_attrs[i]); 2056 2057 device_unregister(dev); 2058 2059 return err; 2060} 2061 2062static __cpuinit void mce_device_remove(unsigned int cpu) 2063{ 2064 struct device *dev = per_cpu(mce_device, cpu); 2065 int i; 2066 2067 if (!cpumask_test_cpu(cpu, mce_device_initialized)) 2068 return; 2069 2070 for (i = 0; mce_device_attrs[i]; i++) 2071 device_remove_file(dev, mce_device_attrs[i]); 2072 2073 for (i = 0; i < banks; i++) 2074 device_remove_file(dev, &mce_banks[i].attr); 2075 2076 device_unregister(dev); 2077 cpumask_clear_cpu(cpu, mce_device_initialized); 2078 per_cpu(mce_device, cpu) = NULL; 2079} 2080 2081/* Make sure there are no machine checks on offlined CPUs. */ 2082static void __cpuinit mce_disable_cpu(void *h) 2083{ 2084 unsigned long action = *(unsigned long *)h; 2085 int i; 2086 2087 if (!mce_available(__this_cpu_ptr(&cpu_info))) 2088 return; 2089 2090 if (!(action & CPU_TASKS_FROZEN)) 2091 cmci_clear(); 2092 for (i = 0; i < banks; i++) { 2093 struct mce_bank *b = &mce_banks[i]; 2094 2095 if (b->init) 2096 wrmsrl(MSR_IA32_MCx_CTL(i), 0); 2097 } 2098} 2099 2100static void __cpuinit mce_reenable_cpu(void *h) 2101{ 2102 unsigned long action = *(unsigned long *)h; 2103 int i; 2104 2105 if (!mce_available(__this_cpu_ptr(&cpu_info))) 2106 return; 2107 2108 if (!(action & CPU_TASKS_FROZEN)) 2109 cmci_reenable(); 2110 for (i = 0; i < banks; i++) { 2111 struct mce_bank *b = &mce_banks[i]; 2112 2113 if (b->init) 2114 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); 2115 } 2116} 2117 2118/* Get notified when a cpu comes on/off. Be hotplug friendly. */ 2119static int __cpuinit 2120mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 2121{ 2122 unsigned int cpu = (unsigned long)hcpu; 2123 struct timer_list *t = &per_cpu(mce_timer, cpu); 2124 2125 switch (action) { 2126 case CPU_ONLINE: 2127 case CPU_ONLINE_FROZEN: 2128 mce_device_create(cpu); 2129 if (threshold_cpu_callback) 2130 threshold_cpu_callback(action, cpu); 2131 break; 2132 case CPU_DEAD: 2133 case CPU_DEAD_FROZEN: 2134 if (threshold_cpu_callback) 2135 threshold_cpu_callback(action, cpu); 2136 mce_device_remove(cpu); 2137 break; 2138 case CPU_DOWN_PREPARE: 2139 case CPU_DOWN_PREPARE_FROZEN: 2140 del_timer_sync(t); 2141 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 2142 break; 2143 case CPU_DOWN_FAILED: 2144 case CPU_DOWN_FAILED_FROZEN: 2145 if (!mce_ignore_ce && check_interval) { 2146 t->expires = round_jiffies(jiffies + 2147 __get_cpu_var(mce_next_interval)); 2148 add_timer_on(t, cpu); 2149 } 2150 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2151 break; 2152 case CPU_POST_DEAD: 2153 /* intentionally ignoring frozen here */ 2154 cmci_rediscover(cpu); 2155 break; 2156 } 2157 return NOTIFY_OK; 2158} 2159 2160static struct notifier_block mce_cpu_notifier __cpuinitdata = { 2161 .notifier_call = mce_cpu_callback, 2162}; 2163 2164static __init void mce_init_banks(void) 2165{ 2166 int i; 2167 2168 for (i = 0; i < banks; i++) { 2169 struct mce_bank *b = &mce_banks[i]; 2170 struct device_attribute *a = &b->attr; 2171 2172 sysfs_attr_init(&a->attr); 2173 a->attr.name = b->attrname; 2174 snprintf(b->attrname, ATTR_LEN, "bank%d", i); 2175 2176 a->attr.mode = 0644; 2177 a->show = show_bank; 2178 a->store = set_bank; 2179 } 2180} 2181 2182static __init int mcheck_init_device(void) 2183{ 2184 int err; 2185 int i = 0; 2186 2187 if (!mce_available(&boot_cpu_data)) 2188 return -EIO; 2189 2190 zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); 2191 2192 mce_init_banks(); 2193 2194 err = subsys_system_register(&mce_subsys, NULL); 2195 if (err) 2196 return err; 2197 2198 for_each_online_cpu(i) { 2199 err = mce_device_create(i); 2200 if (err) 2201 return err; 2202 } 2203 2204 register_syscore_ops(&mce_syscore_ops); 2205 register_hotcpu_notifier(&mce_cpu_notifier); 2206 2207 /* register character device /dev/mcelog */ 2208 misc_register(&mce_chrdev_device); 2209 2210 return err; 2211} 2212device_initcall(mcheck_init_device); 2213 2214/* 2215 * Old style boot options parsing. Only for compatibility. 2216 */ 2217static int __init mcheck_disable(char *str) 2218{ 2219 mce_disabled = 1; 2220 return 1; 2221} 2222__setup("nomce", mcheck_disable); 2223 2224#ifdef CONFIG_DEBUG_FS 2225struct dentry *mce_get_debugfs_dir(void) 2226{ 2227 static struct dentry *dmce; 2228 2229 if (!dmce) 2230 dmce = debugfs_create_dir("mce", NULL); 2231 2232 return dmce; 2233} 2234 2235static void mce_reset(void) 2236{ 2237 cpu_missing = 0; 2238 atomic_set(&mce_fake_paniced, 0); 2239 atomic_set(&mce_executing, 0); 2240 atomic_set(&mce_callin, 0); 2241 atomic_set(&global_nwo, 0); 2242} 2243 2244static int fake_panic_get(void *data, u64 *val) 2245{ 2246 *val = fake_panic; 2247 return 0; 2248} 2249 2250static int fake_panic_set(void *data, u64 val) 2251{ 2252 mce_reset(); 2253 fake_panic = val; 2254 return 0; 2255} 2256 2257DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, 2258 fake_panic_set, "%llu\n"); 2259 2260static int __init mcheck_debugfs_init(void) 2261{ 2262 struct dentry *dmce, *ffake_panic; 2263 2264 dmce = mce_get_debugfs_dir(); 2265 if (!dmce) 2266 return -ENOMEM; 2267 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, 2268 &fake_panic_fops); 2269 if (!ffake_panic) 2270 return -ENOMEM; 2271 2272 return 0; 2273} 2274late_initcall(mcheck_debugfs_init); 2275#endif 2276