ghes.c revision 90ab5ee94171b3e28de6bb42ee30b527014e0be7
1/* 2 * APEI Generic Hardware Error Source support 3 * 4 * Generic Hardware Error Source provides a way to report platform 5 * hardware errors (such as that from chipset). It works in so called 6 * "Firmware First" mode, that is, hardware errors are reported to 7 * firmware firstly, then reported to Linux by firmware. This way, 8 * some non-standard hardware error registers or non-standard hardware 9 * link can be checked by firmware to produce more hardware error 10 * information for Linux. 11 * 12 * For more information about Generic Hardware Error Source, please 13 * refer to ACPI Specification version 4.0, section 17.3.2.6 14 * 15 * Copyright 2010,2011 Intel Corp. 16 * Author: Huang Ying <ying.huang@intel.com> 17 * 18 * This program is free software; you can redistribute it and/or 19 * modify it under the terms of the GNU General Public License version 20 * 2 as published by the Free Software Foundation; 21 * 22 * This program is distributed in the hope that it will be useful, 23 * but WITHOUT ANY WARRANTY; without even the implied warranty of 24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 * GNU General Public License for more details. 26 * 27 * You should have received a copy of the GNU General Public License 28 * along with this program; if not, write to the Free Software 29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 30 */ 31 32#include <linux/kernel.h> 33#include <linux/module.h> 34#include <linux/init.h> 35#include <linux/acpi.h> 36#include <linux/io.h> 37#include <linux/interrupt.h> 38#include <linux/timer.h> 39#include <linux/cper.h> 40#include <linux/kdebug.h> 41#include <linux/platform_device.h> 42#include <linux/mutex.h> 43#include <linux/ratelimit.h> 44#include <linux/vmalloc.h> 45#include <linux/irq_work.h> 46#include <linux/llist.h> 47#include <linux/genalloc.h> 48#include <acpi/apei.h> 49#include <acpi/atomicio.h> 50#include <acpi/hed.h> 51#include <asm/mce.h> 52#include <asm/tlbflush.h> 53#include <asm/nmi.h> 54 55#include "apei-internal.h" 56 57#define GHES_PFX "GHES: " 58 59#define GHES_ESTATUS_MAX_SIZE 65536 60#define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536 61 62#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3 63 64/* This is just an estimation for memory pool allocation */ 65#define GHES_ESTATUS_CACHE_AVG_SIZE 512 66 67#define GHES_ESTATUS_CACHES_SIZE 4 68 69#define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL 70/* Prevent too many caches are allocated because of RCU */ 71#define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2) 72 73#define GHES_ESTATUS_CACHE_LEN(estatus_len) \ 74 (sizeof(struct ghes_estatus_cache) + (estatus_len)) 75#define GHES_ESTATUS_FROM_CACHE(estatus_cache) \ 76 ((struct acpi_hest_generic_status *) \ 77 ((struct ghes_estatus_cache *)(estatus_cache) + 1)) 78 79#define GHES_ESTATUS_NODE_LEN(estatus_len) \ 80 (sizeof(struct ghes_estatus_node) + (estatus_len)) 81#define GHES_ESTATUS_FROM_NODE(estatus_node) \ 82 ((struct acpi_hest_generic_status *) \ 83 ((struct ghes_estatus_node *)(estatus_node) + 1)) 84 85/* 86 * One struct ghes is created for each generic hardware error source. 87 * It provides the context for APEI hardware error timer/IRQ/SCI/NMI 88 * handler. 89 * 90 * estatus: memory buffer for error status block, allocated during 91 * HEST parsing. 92 */ 93#define GHES_TO_CLEAR 0x0001 94#define GHES_EXITING 0x0002 95 96struct ghes { 97 struct acpi_hest_generic *generic; 98 struct acpi_hest_generic_status *estatus; 99 u64 buffer_paddr; 100 unsigned long flags; 101 union { 102 struct list_head list; 103 struct timer_list timer; 104 unsigned int irq; 105 }; 106}; 107 108struct ghes_estatus_node { 109 struct llist_node llnode; 110 struct acpi_hest_generic *generic; 111}; 112 113struct ghes_estatus_cache { 114 u32 estatus_len; 115 atomic_t count; 116 struct acpi_hest_generic *generic; 117 unsigned long long time_in; 118 struct rcu_head rcu; 119}; 120 121bool ghes_disable; 122module_param_named(disable, ghes_disable, bool, 0); 123 124static int ghes_panic_timeout __read_mostly = 30; 125 126/* 127 * All error sources notified with SCI shares one notifier function, 128 * so they need to be linked and checked one by one. This is applied 129 * to NMI too. 130 * 131 * RCU is used for these lists, so ghes_list_mutex is only used for 132 * list changing, not for traversing. 133 */ 134static LIST_HEAD(ghes_sci); 135static LIST_HEAD(ghes_nmi); 136static DEFINE_MUTEX(ghes_list_mutex); 137 138/* 139 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for 140 * mutual exclusion. 141 */ 142static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); 143 144/* 145 * Because the memory area used to transfer hardware error information 146 * from BIOS to Linux can be determined only in NMI, IRQ or timer 147 * handler, but general ioremap can not be used in atomic context, so 148 * a special version of atomic ioremap is implemented for that. 149 */ 150 151/* 152 * Two virtual pages are used, one for NMI context, the other for 153 * IRQ/PROCESS context 154 */ 155#define GHES_IOREMAP_PAGES 2 156#define GHES_IOREMAP_NMI_PAGE(base) (base) 157#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) 158 159/* virtual memory area for atomic ioremap */ 160static struct vm_struct *ghes_ioremap_area; 161/* 162 * These 2 spinlock is used to prevent atomic ioremap virtual memory 163 * area from being mapped simultaneously. 164 */ 165static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); 166static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); 167 168/* 169 * printk is not safe in NMI context. So in NMI handler, we allocate 170 * required memory from lock-less memory allocator 171 * (ghes_estatus_pool), save estatus into it, put them into lock-less 172 * list (ghes_estatus_llist), then delay printk into IRQ context via 173 * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record 174 * required pool size by all NMI error source. 175 */ 176static struct gen_pool *ghes_estatus_pool; 177static unsigned long ghes_estatus_pool_size_request; 178static struct llist_head ghes_estatus_llist; 179static struct irq_work ghes_proc_irq_work; 180 181struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; 182static atomic_t ghes_estatus_cache_alloced; 183 184static int ghes_ioremap_init(void) 185{ 186 ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, 187 VM_IOREMAP, VMALLOC_START, VMALLOC_END); 188 if (!ghes_ioremap_area) { 189 pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n"); 190 return -ENOMEM; 191 } 192 193 return 0; 194} 195 196static void ghes_ioremap_exit(void) 197{ 198 free_vm_area(ghes_ioremap_area); 199} 200 201static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) 202{ 203 unsigned long vaddr; 204 205 vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); 206 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, 207 pfn << PAGE_SHIFT, PAGE_KERNEL); 208 209 return (void __iomem *)vaddr; 210} 211 212static void __iomem *ghes_ioremap_pfn_irq(u64 pfn) 213{ 214 unsigned long vaddr; 215 216 vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr); 217 ioremap_page_range(vaddr, vaddr + PAGE_SIZE, 218 pfn << PAGE_SHIFT, PAGE_KERNEL); 219 220 return (void __iomem *)vaddr; 221} 222 223static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) 224{ 225 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 226 void *base = ghes_ioremap_area->addr; 227 228 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); 229 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 230 __flush_tlb_one(vaddr); 231} 232 233static void ghes_iounmap_irq(void __iomem *vaddr_ptr) 234{ 235 unsigned long vaddr = (unsigned long __force)vaddr_ptr; 236 void *base = ghes_ioremap_area->addr; 237 238 BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); 239 unmap_kernel_range_noflush(vaddr, PAGE_SIZE); 240 __flush_tlb_one(vaddr); 241} 242 243static int ghes_estatus_pool_init(void) 244{ 245 ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1); 246 if (!ghes_estatus_pool) 247 return -ENOMEM; 248 return 0; 249} 250 251static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, 252 struct gen_pool_chunk *chunk, 253 void *data) 254{ 255 free_page(chunk->start_addr); 256} 257 258static void ghes_estatus_pool_exit(void) 259{ 260 gen_pool_for_each_chunk(ghes_estatus_pool, 261 ghes_estatus_pool_free_chunk_page, NULL); 262 gen_pool_destroy(ghes_estatus_pool); 263} 264 265static int ghes_estatus_pool_expand(unsigned long len) 266{ 267 unsigned long i, pages, size, addr; 268 int ret; 269 270 ghes_estatus_pool_size_request += PAGE_ALIGN(len); 271 size = gen_pool_size(ghes_estatus_pool); 272 if (size >= ghes_estatus_pool_size_request) 273 return 0; 274 pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; 275 for (i = 0; i < pages; i++) { 276 addr = __get_free_page(GFP_KERNEL); 277 if (!addr) 278 return -ENOMEM; 279 ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); 280 if (ret) 281 return ret; 282 } 283 284 return 0; 285} 286 287static void ghes_estatus_pool_shrink(unsigned long len) 288{ 289 ghes_estatus_pool_size_request -= PAGE_ALIGN(len); 290} 291 292static struct ghes *ghes_new(struct acpi_hest_generic *generic) 293{ 294 struct ghes *ghes; 295 unsigned int error_block_length; 296 int rc; 297 298 ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); 299 if (!ghes) 300 return ERR_PTR(-ENOMEM); 301 ghes->generic = generic; 302 rc = acpi_pre_map_gar(&generic->error_status_address); 303 if (rc) 304 goto err_free; 305 error_block_length = generic->error_block_length; 306 if (error_block_length > GHES_ESTATUS_MAX_SIZE) { 307 pr_warning(FW_WARN GHES_PFX 308 "Error status block length is too long: %u for " 309 "generic hardware error source: %d.\n", 310 error_block_length, generic->header.source_id); 311 error_block_length = GHES_ESTATUS_MAX_SIZE; 312 } 313 ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); 314 if (!ghes->estatus) { 315 rc = -ENOMEM; 316 goto err_unmap; 317 } 318 319 return ghes; 320 321err_unmap: 322 acpi_post_unmap_gar(&generic->error_status_address); 323err_free: 324 kfree(ghes); 325 return ERR_PTR(rc); 326} 327 328static void ghes_fini(struct ghes *ghes) 329{ 330 kfree(ghes->estatus); 331 acpi_post_unmap_gar(&ghes->generic->error_status_address); 332} 333 334enum { 335 GHES_SEV_NO = 0x0, 336 GHES_SEV_CORRECTED = 0x1, 337 GHES_SEV_RECOVERABLE = 0x2, 338 GHES_SEV_PANIC = 0x3, 339}; 340 341static inline int ghes_severity(int severity) 342{ 343 switch (severity) { 344 case CPER_SEV_INFORMATIONAL: 345 return GHES_SEV_NO; 346 case CPER_SEV_CORRECTED: 347 return GHES_SEV_CORRECTED; 348 case CPER_SEV_RECOVERABLE: 349 return GHES_SEV_RECOVERABLE; 350 case CPER_SEV_FATAL: 351 return GHES_SEV_PANIC; 352 default: 353 /* Unknown, go panic */ 354 return GHES_SEV_PANIC; 355 } 356} 357 358static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len, 359 int from_phys) 360{ 361 void __iomem *vaddr; 362 unsigned long flags = 0; 363 int in_nmi = in_nmi(); 364 u64 offset; 365 u32 trunk; 366 367 while (len > 0) { 368 offset = paddr - (paddr & PAGE_MASK); 369 if (in_nmi) { 370 raw_spin_lock(&ghes_ioremap_lock_nmi); 371 vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT); 372 } else { 373 spin_lock_irqsave(&ghes_ioremap_lock_irq, flags); 374 vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT); 375 } 376 trunk = PAGE_SIZE - offset; 377 trunk = min(trunk, len); 378 if (from_phys) 379 memcpy_fromio(buffer, vaddr + offset, trunk); 380 else 381 memcpy_toio(vaddr + offset, buffer, trunk); 382 len -= trunk; 383 paddr += trunk; 384 buffer += trunk; 385 if (in_nmi) { 386 ghes_iounmap_nmi(vaddr); 387 raw_spin_unlock(&ghes_ioremap_lock_nmi); 388 } else { 389 ghes_iounmap_irq(vaddr); 390 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags); 391 } 392 } 393} 394 395static int ghes_read_estatus(struct ghes *ghes, int silent) 396{ 397 struct acpi_hest_generic *g = ghes->generic; 398 u64 buf_paddr; 399 u32 len; 400 int rc; 401 402 rc = acpi_atomic_read(&buf_paddr, &g->error_status_address); 403 if (rc) { 404 if (!silent && printk_ratelimit()) 405 pr_warning(FW_WARN GHES_PFX 406"Failed to read error status block address for hardware error source: %d.\n", 407 g->header.source_id); 408 return -EIO; 409 } 410 if (!buf_paddr) 411 return -ENOENT; 412 413 ghes_copy_tofrom_phys(ghes->estatus, buf_paddr, 414 sizeof(*ghes->estatus), 1); 415 if (!ghes->estatus->block_status) 416 return -ENOENT; 417 418 ghes->buffer_paddr = buf_paddr; 419 ghes->flags |= GHES_TO_CLEAR; 420 421 rc = -EIO; 422 len = apei_estatus_len(ghes->estatus); 423 if (len < sizeof(*ghes->estatus)) 424 goto err_read_block; 425 if (len > ghes->generic->error_block_length) 426 goto err_read_block; 427 if (apei_estatus_check_header(ghes->estatus)) 428 goto err_read_block; 429 ghes_copy_tofrom_phys(ghes->estatus + 1, 430 buf_paddr + sizeof(*ghes->estatus), 431 len - sizeof(*ghes->estatus), 1); 432 if (apei_estatus_check(ghes->estatus)) 433 goto err_read_block; 434 rc = 0; 435 436err_read_block: 437 if (rc && !silent && printk_ratelimit()) 438 pr_warning(FW_WARN GHES_PFX 439 "Failed to read error status block!\n"); 440 return rc; 441} 442 443static void ghes_clear_estatus(struct ghes *ghes) 444{ 445 ghes->estatus->block_status = 0; 446 if (!(ghes->flags & GHES_TO_CLEAR)) 447 return; 448 ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr, 449 sizeof(ghes->estatus->block_status), 0); 450 ghes->flags &= ~GHES_TO_CLEAR; 451} 452 453static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) 454{ 455 int sev, sec_sev; 456 struct acpi_hest_generic_data *gdata; 457 458 sev = ghes_severity(estatus->error_severity); 459 apei_estatus_for_each_section(estatus, gdata) { 460 sec_sev = ghes_severity(gdata->error_severity); 461 if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, 462 CPER_SEC_PLATFORM_MEM)) { 463 struct cper_sec_mem_err *mem_err; 464 mem_err = (struct cper_sec_mem_err *)(gdata+1); 465#ifdef CONFIG_X86_MCE 466 apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, 467 mem_err); 468#endif 469#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE 470 if (sev == GHES_SEV_RECOVERABLE && 471 sec_sev == GHES_SEV_RECOVERABLE && 472 mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { 473 unsigned long pfn; 474 pfn = mem_err->physical_addr >> PAGE_SHIFT; 475 memory_failure_queue(pfn, 0, 0); 476 } 477#endif 478 } 479 } 480} 481 482static void __ghes_print_estatus(const char *pfx, 483 const struct acpi_hest_generic *generic, 484 const struct acpi_hest_generic_status *estatus) 485{ 486 if (pfx == NULL) { 487 if (ghes_severity(estatus->error_severity) <= 488 GHES_SEV_CORRECTED) 489 pfx = KERN_WARNING HW_ERR; 490 else 491 pfx = KERN_ERR HW_ERR; 492 } 493 printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n", 494 pfx, generic->header.source_id); 495 apei_estatus_print(pfx, estatus); 496} 497 498static int ghes_print_estatus(const char *pfx, 499 const struct acpi_hest_generic *generic, 500 const struct acpi_hest_generic_status *estatus) 501{ 502 /* Not more than 2 messages every 5 seconds */ 503 static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2); 504 static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2); 505 struct ratelimit_state *ratelimit; 506 507 if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED) 508 ratelimit = &ratelimit_corrected; 509 else 510 ratelimit = &ratelimit_uncorrected; 511 if (__ratelimit(ratelimit)) { 512 __ghes_print_estatus(pfx, generic, estatus); 513 return 1; 514 } 515 return 0; 516} 517 518/* 519 * GHES error status reporting throttle, to report more kinds of 520 * errors, instead of just most frequently occurred errors. 521 */ 522static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus) 523{ 524 u32 len; 525 int i, cached = 0; 526 unsigned long long now; 527 struct ghes_estatus_cache *cache; 528 struct acpi_hest_generic_status *cache_estatus; 529 530 len = apei_estatus_len(estatus); 531 rcu_read_lock(); 532 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 533 cache = rcu_dereference(ghes_estatus_caches[i]); 534 if (cache == NULL) 535 continue; 536 if (len != cache->estatus_len) 537 continue; 538 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 539 if (memcmp(estatus, cache_estatus, len)) 540 continue; 541 atomic_inc(&cache->count); 542 now = sched_clock(); 543 if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC) 544 cached = 1; 545 break; 546 } 547 rcu_read_unlock(); 548 return cached; 549} 550 551static struct ghes_estatus_cache *ghes_estatus_cache_alloc( 552 struct acpi_hest_generic *generic, 553 struct acpi_hest_generic_status *estatus) 554{ 555 int alloced; 556 u32 len, cache_len; 557 struct ghes_estatus_cache *cache; 558 struct acpi_hest_generic_status *cache_estatus; 559 560 alloced = atomic_add_return(1, &ghes_estatus_cache_alloced); 561 if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) { 562 atomic_dec(&ghes_estatus_cache_alloced); 563 return NULL; 564 } 565 len = apei_estatus_len(estatus); 566 cache_len = GHES_ESTATUS_CACHE_LEN(len); 567 cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len); 568 if (!cache) { 569 atomic_dec(&ghes_estatus_cache_alloced); 570 return NULL; 571 } 572 cache_estatus = GHES_ESTATUS_FROM_CACHE(cache); 573 memcpy(cache_estatus, estatus, len); 574 cache->estatus_len = len; 575 atomic_set(&cache->count, 0); 576 cache->generic = generic; 577 cache->time_in = sched_clock(); 578 return cache; 579} 580 581static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache) 582{ 583 u32 len; 584 585 len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache)); 586 len = GHES_ESTATUS_CACHE_LEN(len); 587 gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len); 588 atomic_dec(&ghes_estatus_cache_alloced); 589} 590 591static void ghes_estatus_cache_rcu_free(struct rcu_head *head) 592{ 593 struct ghes_estatus_cache *cache; 594 595 cache = container_of(head, struct ghes_estatus_cache, rcu); 596 ghes_estatus_cache_free(cache); 597} 598 599static void ghes_estatus_cache_add( 600 struct acpi_hest_generic *generic, 601 struct acpi_hest_generic_status *estatus) 602{ 603 int i, slot = -1, count; 604 unsigned long long now, duration, period, max_period = 0; 605 struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache; 606 607 new_cache = ghes_estatus_cache_alloc(generic, estatus); 608 if (new_cache == NULL) 609 return; 610 rcu_read_lock(); 611 now = sched_clock(); 612 for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) { 613 cache = rcu_dereference(ghes_estatus_caches[i]); 614 if (cache == NULL) { 615 slot = i; 616 slot_cache = NULL; 617 break; 618 } 619 duration = now - cache->time_in; 620 if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) { 621 slot = i; 622 slot_cache = cache; 623 break; 624 } 625 count = atomic_read(&cache->count); 626 period = duration; 627 do_div(period, (count + 1)); 628 if (period > max_period) { 629 max_period = period; 630 slot = i; 631 slot_cache = cache; 632 } 633 } 634 /* new_cache must be put into array after its contents are written */ 635 smp_wmb(); 636 if (slot != -1 && cmpxchg(ghes_estatus_caches + slot, 637 slot_cache, new_cache) == slot_cache) { 638 if (slot_cache) 639 call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free); 640 } else 641 ghes_estatus_cache_free(new_cache); 642 rcu_read_unlock(); 643} 644 645static int ghes_proc(struct ghes *ghes) 646{ 647 int rc; 648 649 rc = ghes_read_estatus(ghes, 0); 650 if (rc) 651 goto out; 652 if (!ghes_estatus_cached(ghes->estatus)) { 653 if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) 654 ghes_estatus_cache_add(ghes->generic, ghes->estatus); 655 } 656 ghes_do_proc(ghes->estatus); 657out: 658 ghes_clear_estatus(ghes); 659 return 0; 660} 661 662static void ghes_add_timer(struct ghes *ghes) 663{ 664 struct acpi_hest_generic *g = ghes->generic; 665 unsigned long expire; 666 667 if (!g->notify.poll_interval) { 668 pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n", 669 g->header.source_id); 670 return; 671 } 672 expire = jiffies + msecs_to_jiffies(g->notify.poll_interval); 673 ghes->timer.expires = round_jiffies_relative(expire); 674 add_timer(&ghes->timer); 675} 676 677static void ghes_poll_func(unsigned long data) 678{ 679 struct ghes *ghes = (void *)data; 680 681 ghes_proc(ghes); 682 if (!(ghes->flags & GHES_EXITING)) 683 ghes_add_timer(ghes); 684} 685 686static irqreturn_t ghes_irq_func(int irq, void *data) 687{ 688 struct ghes *ghes = data; 689 int rc; 690 691 rc = ghes_proc(ghes); 692 if (rc) 693 return IRQ_NONE; 694 695 return IRQ_HANDLED; 696} 697 698static int ghes_notify_sci(struct notifier_block *this, 699 unsigned long event, void *data) 700{ 701 struct ghes *ghes; 702 int ret = NOTIFY_DONE; 703 704 rcu_read_lock(); 705 list_for_each_entry_rcu(ghes, &ghes_sci, list) { 706 if (!ghes_proc(ghes)) 707 ret = NOTIFY_OK; 708 } 709 rcu_read_unlock(); 710 711 return ret; 712} 713 714static void ghes_proc_in_irq(struct irq_work *irq_work) 715{ 716 struct llist_node *llnode, *next, *tail = NULL; 717 struct ghes_estatus_node *estatus_node; 718 struct acpi_hest_generic *generic; 719 struct acpi_hest_generic_status *estatus; 720 u32 len, node_len; 721 722 /* 723 * Because the time order of estatus in list is reversed, 724 * revert it back to proper order. 725 */ 726 llnode = llist_del_all(&ghes_estatus_llist); 727 while (llnode) { 728 next = llnode->next; 729 llnode->next = tail; 730 tail = llnode; 731 llnode = next; 732 } 733 llnode = tail; 734 while (llnode) { 735 next = llnode->next; 736 estatus_node = llist_entry(llnode, struct ghes_estatus_node, 737 llnode); 738 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 739 len = apei_estatus_len(estatus); 740 node_len = GHES_ESTATUS_NODE_LEN(len); 741 ghes_do_proc(estatus); 742 if (!ghes_estatus_cached(estatus)) { 743 generic = estatus_node->generic; 744 if (ghes_print_estatus(NULL, generic, estatus)) 745 ghes_estatus_cache_add(generic, estatus); 746 } 747 gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node, 748 node_len); 749 llnode = next; 750 } 751} 752 753static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) 754{ 755 struct ghes *ghes, *ghes_global = NULL; 756 int sev, sev_global = -1; 757 int ret = NMI_DONE; 758 759 raw_spin_lock(&ghes_nmi_lock); 760 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 761 if (ghes_read_estatus(ghes, 1)) { 762 ghes_clear_estatus(ghes); 763 continue; 764 } 765 sev = ghes_severity(ghes->estatus->error_severity); 766 if (sev > sev_global) { 767 sev_global = sev; 768 ghes_global = ghes; 769 } 770 ret = NMI_HANDLED; 771 } 772 773 if (ret == NMI_DONE) 774 goto out; 775 776 if (sev_global >= GHES_SEV_PANIC) { 777 oops_begin(); 778 __ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic, 779 ghes_global->estatus); 780 /* reboot to log the error! */ 781 if (panic_timeout == 0) 782 panic_timeout = ghes_panic_timeout; 783 panic("Fatal hardware error!"); 784 } 785 786 list_for_each_entry_rcu(ghes, &ghes_nmi, list) { 787#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 788 u32 len, node_len; 789 struct ghes_estatus_node *estatus_node; 790 struct acpi_hest_generic_status *estatus; 791#endif 792 if (!(ghes->flags & GHES_TO_CLEAR)) 793 continue; 794#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 795 if (ghes_estatus_cached(ghes->estatus)) 796 goto next; 797 /* Save estatus for further processing in IRQ context */ 798 len = apei_estatus_len(ghes->estatus); 799 node_len = GHES_ESTATUS_NODE_LEN(len); 800 estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, 801 node_len); 802 if (estatus_node) { 803 estatus_node->generic = ghes->generic; 804 estatus = GHES_ESTATUS_FROM_NODE(estatus_node); 805 memcpy(estatus, ghes->estatus, len); 806 llist_add(&estatus_node->llnode, &ghes_estatus_llist); 807 } 808next: 809#endif 810 ghes_clear_estatus(ghes); 811 } 812#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG 813 irq_work_queue(&ghes_proc_irq_work); 814#endif 815 816out: 817 raw_spin_unlock(&ghes_nmi_lock); 818 return ret; 819} 820 821static struct notifier_block ghes_notifier_sci = { 822 .notifier_call = ghes_notify_sci, 823}; 824 825static unsigned long ghes_esource_prealloc_size( 826 const struct acpi_hest_generic *generic) 827{ 828 unsigned long block_length, prealloc_records, prealloc_size; 829 830 block_length = min_t(unsigned long, generic->error_block_length, 831 GHES_ESTATUS_MAX_SIZE); 832 prealloc_records = max_t(unsigned long, 833 generic->records_to_preallocate, 1); 834 prealloc_size = min_t(unsigned long, block_length * prealloc_records, 835 GHES_ESOURCE_PREALLOC_MAX_SIZE); 836 837 return prealloc_size; 838} 839 840static int __devinit ghes_probe(struct platform_device *ghes_dev) 841{ 842 struct acpi_hest_generic *generic; 843 struct ghes *ghes = NULL; 844 unsigned long len; 845 int rc = -EINVAL; 846 847 generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; 848 if (!generic->enabled) 849 return -ENODEV; 850 851 switch (generic->notify.type) { 852 case ACPI_HEST_NOTIFY_POLLED: 853 case ACPI_HEST_NOTIFY_EXTERNAL: 854 case ACPI_HEST_NOTIFY_SCI: 855 case ACPI_HEST_NOTIFY_NMI: 856 break; 857 case ACPI_HEST_NOTIFY_LOCAL: 858 pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", 859 generic->header.source_id); 860 goto err; 861 default: 862 pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", 863 generic->notify.type, generic->header.source_id); 864 goto err; 865 } 866 867 rc = -EIO; 868 if (generic->error_block_length < 869 sizeof(struct acpi_hest_generic_status)) { 870 pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n", 871 generic->error_block_length, 872 generic->header.source_id); 873 goto err; 874 } 875 ghes = ghes_new(generic); 876 if (IS_ERR(ghes)) { 877 rc = PTR_ERR(ghes); 878 ghes = NULL; 879 goto err; 880 } 881 switch (generic->notify.type) { 882 case ACPI_HEST_NOTIFY_POLLED: 883 ghes->timer.function = ghes_poll_func; 884 ghes->timer.data = (unsigned long)ghes; 885 init_timer_deferrable(&ghes->timer); 886 ghes_add_timer(ghes); 887 break; 888 case ACPI_HEST_NOTIFY_EXTERNAL: 889 /* External interrupt vector is GSI */ 890 if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { 891 pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", 892 generic->header.source_id); 893 goto err; 894 } 895 if (request_irq(ghes->irq, ghes_irq_func, 896 0, "GHES IRQ", ghes)) { 897 pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", 898 generic->header.source_id); 899 goto err; 900 } 901 break; 902 case ACPI_HEST_NOTIFY_SCI: 903 mutex_lock(&ghes_list_mutex); 904 if (list_empty(&ghes_sci)) 905 register_acpi_hed_notifier(&ghes_notifier_sci); 906 list_add_rcu(&ghes->list, &ghes_sci); 907 mutex_unlock(&ghes_list_mutex); 908 break; 909 case ACPI_HEST_NOTIFY_NMI: 910 len = ghes_esource_prealloc_size(generic); 911 ghes_estatus_pool_expand(len); 912 mutex_lock(&ghes_list_mutex); 913 if (list_empty(&ghes_nmi)) 914 register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, 915 "ghes"); 916 list_add_rcu(&ghes->list, &ghes_nmi); 917 mutex_unlock(&ghes_list_mutex); 918 break; 919 default: 920 BUG(); 921 } 922 platform_set_drvdata(ghes_dev, ghes); 923 924 return 0; 925err: 926 if (ghes) { 927 ghes_fini(ghes); 928 kfree(ghes); 929 } 930 return rc; 931} 932 933static int __devexit ghes_remove(struct platform_device *ghes_dev) 934{ 935 struct ghes *ghes; 936 struct acpi_hest_generic *generic; 937 unsigned long len; 938 939 ghes = platform_get_drvdata(ghes_dev); 940 generic = ghes->generic; 941 942 ghes->flags |= GHES_EXITING; 943 switch (generic->notify.type) { 944 case ACPI_HEST_NOTIFY_POLLED: 945 del_timer_sync(&ghes->timer); 946 break; 947 case ACPI_HEST_NOTIFY_EXTERNAL: 948 free_irq(ghes->irq, ghes); 949 break; 950 case ACPI_HEST_NOTIFY_SCI: 951 mutex_lock(&ghes_list_mutex); 952 list_del_rcu(&ghes->list); 953 if (list_empty(&ghes_sci)) 954 unregister_acpi_hed_notifier(&ghes_notifier_sci); 955 mutex_unlock(&ghes_list_mutex); 956 break; 957 case ACPI_HEST_NOTIFY_NMI: 958 mutex_lock(&ghes_list_mutex); 959 list_del_rcu(&ghes->list); 960 if (list_empty(&ghes_nmi)) 961 unregister_nmi_handler(NMI_LOCAL, "ghes"); 962 mutex_unlock(&ghes_list_mutex); 963 /* 964 * To synchronize with NMI handler, ghes can only be 965 * freed after NMI handler finishes. 966 */ 967 synchronize_rcu(); 968 len = ghes_esource_prealloc_size(generic); 969 ghes_estatus_pool_shrink(len); 970 break; 971 default: 972 BUG(); 973 break; 974 } 975 976 ghes_fini(ghes); 977 kfree(ghes); 978 979 platform_set_drvdata(ghes_dev, NULL); 980 981 return 0; 982} 983 984static struct platform_driver ghes_platform_driver = { 985 .driver = { 986 .name = "GHES", 987 .owner = THIS_MODULE, 988 }, 989 .probe = ghes_probe, 990 .remove = ghes_remove, 991}; 992 993static int __init ghes_init(void) 994{ 995 int rc; 996 997 if (acpi_disabled) 998 return -ENODEV; 999 1000 if (hest_disable) { 1001 pr_info(GHES_PFX "HEST is not enabled!\n"); 1002 return -EINVAL; 1003 } 1004 1005 if (ghes_disable) { 1006 pr_info(GHES_PFX "GHES is not enabled!\n"); 1007 return -EINVAL; 1008 } 1009 1010 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); 1011 1012 rc = ghes_ioremap_init(); 1013 if (rc) 1014 goto err; 1015 1016 rc = ghes_estatus_pool_init(); 1017 if (rc) 1018 goto err_ioremap_exit; 1019 1020 rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE * 1021 GHES_ESTATUS_CACHE_ALLOCED_MAX); 1022 if (rc) 1023 goto err_pool_exit; 1024 1025 rc = platform_driver_register(&ghes_platform_driver); 1026 if (rc) 1027 goto err_pool_exit; 1028 1029 rc = apei_osc_setup(); 1030 if (rc == 0 && osc_sb_apei_support_acked) 1031 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n"); 1032 else if (rc == 0 && !osc_sb_apei_support_acked) 1033 pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n"); 1034 else if (rc && osc_sb_apei_support_acked) 1035 pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); 1036 else 1037 pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); 1038 1039 return 0; 1040err_pool_exit: 1041 ghes_estatus_pool_exit(); 1042err_ioremap_exit: 1043 ghes_ioremap_exit(); 1044err: 1045 return rc; 1046} 1047 1048static void __exit ghes_exit(void) 1049{ 1050 platform_driver_unregister(&ghes_platform_driver); 1051 ghes_estatus_pool_exit(); 1052 ghes_ioremap_exit(); 1053} 1054 1055module_init(ghes_init); 1056module_exit(ghes_exit); 1057 1058MODULE_AUTHOR("Huang Ying"); 1059MODULE_DESCRIPTION("APEI Generic Hardware Error Source support"); 1060MODULE_LICENSE("GPL"); 1061MODULE_ALIAS("platform:GHES"); 1062