ghes.c revision b6a9501658530d8b8374e37f1edb549039a8a260
1/*
2 * APEI Generic Hardware Error Source support
3 *
4 * Generic Hardware Error Source provides a way to report platform
5 * hardware errors (such as that from chipset). It works in so called
6 * "Firmware First" mode, that is, hardware errors are reported to
7 * firmware firstly, then reported to Linux by firmware. This way,
8 * some non-standard hardware error registers or non-standard hardware
9 * link can be checked by firmware to produce more hardware error
10 * information for Linux.
11 *
12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 *
15 * Copyright 2010 Intel Corp.
16 *   Author: Huang Ying <ying.huang@intel.com>
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License version
20 * 2 as published by the Free Software Foundation;
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30 */
31
32#include <linux/kernel.h>
33#include <linux/module.h>
34#include <linux/init.h>
35#include <linux/acpi.h>
36#include <linux/io.h>
37#include <linux/interrupt.h>
38#include <linux/timer.h>
39#include <linux/cper.h>
40#include <linux/kdebug.h>
41#include <linux/platform_device.h>
42#include <linux/mutex.h>
43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h>
45#include <acpi/apei.h>
46#include <acpi/atomicio.h>
47#include <acpi/hed.h>
48#include <asm/mce.h>
49#include <asm/tlbflush.h>
50
51#include "apei-internal.h"
52
53#define GHES_PFX	"GHES: "
54
55#define GHES_ESTATUS_MAX_SIZE		65536
56
57/*
58 * One struct ghes is created for each generic hardware error source.
59 * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
60 * handler.
61 *
62 * estatus: memory buffer for error status block, allocated during
63 * HEST parsing.
64 */
65#define GHES_TO_CLEAR		0x0001
66#define GHES_EXITING		0x0002
67
68struct ghes {
69	struct acpi_hest_generic *generic;
70	struct acpi_hest_generic_status *estatus;
71	u64 buffer_paddr;
72	unsigned long flags;
73	union {
74		struct list_head list;
75		struct timer_list timer;
76		unsigned int irq;
77	};
78};
79
80int ghes_disable;
81module_param_named(disable, ghes_disable, bool, 0);
82
83static int ghes_panic_timeout	__read_mostly = 30;
84
85/*
86 * All error sources notified with SCI shares one notifier function,
87 * so they need to be linked and checked one by one.  This is applied
88 * to NMI too.
89 *
90 * RCU is used for these lists, so ghes_list_mutex is only used for
91 * list changing, not for traversing.
92 */
93static LIST_HEAD(ghes_sci);
94static LIST_HEAD(ghes_nmi);
95static DEFINE_MUTEX(ghes_list_mutex);
96
97/*
98 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
99 * mutual exclusion.
100 */
101static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
102
103/*
104 * Because the memory area used to transfer hardware error information
105 * from BIOS to Linux can be determined only in NMI, IRQ or timer
106 * handler, but general ioremap can not be used in atomic context, so
107 * a special version of atomic ioremap is implemented for that.
108 */
109
110/*
111 * Two virtual pages are used, one for NMI context, the other for
112 * IRQ/PROCESS context
113 */
114#define GHES_IOREMAP_PAGES		2
115#define GHES_IOREMAP_NMI_PAGE(base)	(base)
116#define GHES_IOREMAP_IRQ_PAGE(base)	((base) + PAGE_SIZE)
117
118/* virtual memory area for atomic ioremap */
119static struct vm_struct *ghes_ioremap_area;
120/*
121 * These 2 spinlock is used to prevent atomic ioremap virtual memory
122 * area from being mapped simultaneously.
123 */
124static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
125static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
126
127static int ghes_ioremap_init(void)
128{
129	ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
130		VM_IOREMAP, VMALLOC_START, VMALLOC_END);
131	if (!ghes_ioremap_area) {
132		pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
133		return -ENOMEM;
134	}
135
136	return 0;
137}
138
139static void ghes_ioremap_exit(void)
140{
141	free_vm_area(ghes_ioremap_area);
142}
143
144static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
145{
146	unsigned long vaddr;
147
148	vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
149	ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
150			   pfn << PAGE_SHIFT, PAGE_KERNEL);
151
152	return (void __iomem *)vaddr;
153}
154
155static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
156{
157	unsigned long vaddr;
158
159	vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
160	ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
161			   pfn << PAGE_SHIFT, PAGE_KERNEL);
162
163	return (void __iomem *)vaddr;
164}
165
166static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
167{
168	unsigned long vaddr = (unsigned long __force)vaddr_ptr;
169	void *base = ghes_ioremap_area->addr;
170
171	BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
172	unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
173	__flush_tlb_one(vaddr);
174}
175
176static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
177{
178	unsigned long vaddr = (unsigned long __force)vaddr_ptr;
179	void *base = ghes_ioremap_area->addr;
180
181	BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
182	unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
183	__flush_tlb_one(vaddr);
184}
185
186static struct ghes *ghes_new(struct acpi_hest_generic *generic)
187{
188	struct ghes *ghes;
189	unsigned int error_block_length;
190	int rc;
191
192	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
193	if (!ghes)
194		return ERR_PTR(-ENOMEM);
195	ghes->generic = generic;
196	rc = acpi_pre_map_gar(&generic->error_status_address);
197	if (rc)
198		goto err_free;
199	error_block_length = generic->error_block_length;
200	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
201		pr_warning(FW_WARN GHES_PFX
202			   "Error status block length is too long: %u for "
203			   "generic hardware error source: %d.\n",
204			   error_block_length, generic->header.source_id);
205		error_block_length = GHES_ESTATUS_MAX_SIZE;
206	}
207	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
208	if (!ghes->estatus) {
209		rc = -ENOMEM;
210		goto err_unmap;
211	}
212
213	return ghes;
214
215err_unmap:
216	acpi_post_unmap_gar(&generic->error_status_address);
217err_free:
218	kfree(ghes);
219	return ERR_PTR(rc);
220}
221
222static void ghes_fini(struct ghes *ghes)
223{
224	kfree(ghes->estatus);
225	acpi_post_unmap_gar(&ghes->generic->error_status_address);
226}
227
228enum {
229	GHES_SEV_NO = 0x0,
230	GHES_SEV_CORRECTED = 0x1,
231	GHES_SEV_RECOVERABLE = 0x2,
232	GHES_SEV_PANIC = 0x3,
233};
234
235static inline int ghes_severity(int severity)
236{
237	switch (severity) {
238	case CPER_SEV_INFORMATIONAL:
239		return GHES_SEV_NO;
240	case CPER_SEV_CORRECTED:
241		return GHES_SEV_CORRECTED;
242	case CPER_SEV_RECOVERABLE:
243		return GHES_SEV_RECOVERABLE;
244	case CPER_SEV_FATAL:
245		return GHES_SEV_PANIC;
246	default:
247		/* Unknown, go panic */
248		return GHES_SEV_PANIC;
249	}
250}
251
252static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
253				  int from_phys)
254{
255	void __iomem *vaddr;
256	unsigned long flags = 0;
257	int in_nmi = in_nmi();
258	u64 offset;
259	u32 trunk;
260
261	while (len > 0) {
262		offset = paddr - (paddr & PAGE_MASK);
263		if (in_nmi) {
264			raw_spin_lock(&ghes_ioremap_lock_nmi);
265			vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
266		} else {
267			spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
268			vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
269		}
270		trunk = PAGE_SIZE - offset;
271		trunk = min(trunk, len);
272		if (from_phys)
273			memcpy_fromio(buffer, vaddr + offset, trunk);
274		else
275			memcpy_toio(vaddr + offset, buffer, trunk);
276		len -= trunk;
277		paddr += trunk;
278		buffer += trunk;
279		if (in_nmi) {
280			ghes_iounmap_nmi(vaddr);
281			raw_spin_unlock(&ghes_ioremap_lock_nmi);
282		} else {
283			ghes_iounmap_irq(vaddr);
284			spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
285		}
286	}
287}
288
289static int ghes_read_estatus(struct ghes *ghes, int silent)
290{
291	struct acpi_hest_generic *g = ghes->generic;
292	u64 buf_paddr;
293	u32 len;
294	int rc;
295
296	rc = acpi_atomic_read(&buf_paddr, &g->error_status_address);
297	if (rc) {
298		if (!silent && printk_ratelimit())
299			pr_warning(FW_WARN GHES_PFX
300"Failed to read error status block address for hardware error source: %d.\n",
301				   g->header.source_id);
302		return -EIO;
303	}
304	if (!buf_paddr)
305		return -ENOENT;
306
307	ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
308			      sizeof(*ghes->estatus), 1);
309	if (!ghes->estatus->block_status)
310		return -ENOENT;
311
312	ghes->buffer_paddr = buf_paddr;
313	ghes->flags |= GHES_TO_CLEAR;
314
315	rc = -EIO;
316	len = apei_estatus_len(ghes->estatus);
317	if (len < sizeof(*ghes->estatus))
318		goto err_read_block;
319	if (len > ghes->generic->error_block_length)
320		goto err_read_block;
321	if (apei_estatus_check_header(ghes->estatus))
322		goto err_read_block;
323	ghes_copy_tofrom_phys(ghes->estatus + 1,
324			      buf_paddr + sizeof(*ghes->estatus),
325			      len - sizeof(*ghes->estatus), 1);
326	if (apei_estatus_check(ghes->estatus))
327		goto err_read_block;
328	rc = 0;
329
330err_read_block:
331	if (rc && !silent && printk_ratelimit())
332		pr_warning(FW_WARN GHES_PFX
333			   "Failed to read error status block!\n");
334	return rc;
335}
336
337static void ghes_clear_estatus(struct ghes *ghes)
338{
339	ghes->estatus->block_status = 0;
340	if (!(ghes->flags & GHES_TO_CLEAR))
341		return;
342	ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
343			      sizeof(ghes->estatus->block_status), 0);
344	ghes->flags &= ~GHES_TO_CLEAR;
345}
346
347static void ghes_do_proc(struct ghes *ghes)
348{
349	int sev, processed = 0;
350	struct acpi_hest_generic_data *gdata;
351
352	sev = ghes_severity(ghes->estatus->error_severity);
353	apei_estatus_for_each_section(ghes->estatus, gdata) {
354#ifdef CONFIG_X86_MCE
355		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
356				 CPER_SEC_PLATFORM_MEM)) {
357			apei_mce_report_mem_error(
358				sev == GHES_SEV_CORRECTED,
359				(struct cper_sec_mem_err *)(gdata+1));
360			processed = 1;
361		}
362#endif
363	}
364}
365
366static void __ghes_print_estatus(const char *pfx, struct ghes *ghes)
367{
368	if (pfx == NULL) {
369		if (ghes_severity(ghes->estatus->error_severity) <=
370		    GHES_SEV_CORRECTED)
371			pfx = KERN_WARNING HW_ERR;
372		else
373			pfx = KERN_ERR HW_ERR;
374	}
375	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
376	       pfx, ghes->generic->header.source_id);
377	apei_estatus_print(pfx, ghes->estatus);
378}
379
380static void ghes_print_estatus(const char *pfx, struct ghes *ghes)
381{
382	/* Not more than 2 messages every 5 seconds */
383	static DEFINE_RATELIMIT_STATE(ratelimit, 5*HZ, 2);
384
385	if (__ratelimit(&ratelimit))
386		__ghes_print_estatus(pfx, ghes);
387}
388
389static int ghes_proc(struct ghes *ghes)
390{
391	int rc;
392
393	rc = ghes_read_estatus(ghes, 0);
394	if (rc)
395		goto out;
396	ghes_print_estatus(NULL, ghes);
397	ghes_do_proc(ghes);
398
399out:
400	ghes_clear_estatus(ghes);
401	return 0;
402}
403
404static void ghes_add_timer(struct ghes *ghes)
405{
406	struct acpi_hest_generic *g = ghes->generic;
407	unsigned long expire;
408
409	if (!g->notify.poll_interval) {
410		pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
411			   g->header.source_id);
412		return;
413	}
414	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
415	ghes->timer.expires = round_jiffies_relative(expire);
416	add_timer(&ghes->timer);
417}
418
419static void ghes_poll_func(unsigned long data)
420{
421	struct ghes *ghes = (void *)data;
422
423	ghes_proc(ghes);
424	if (!(ghes->flags & GHES_EXITING))
425		ghes_add_timer(ghes);
426}
427
428static irqreturn_t ghes_irq_func(int irq, void *data)
429{
430	struct ghes *ghes = data;
431	int rc;
432
433	rc = ghes_proc(ghes);
434	if (rc)
435		return IRQ_NONE;
436
437	return IRQ_HANDLED;
438}
439
440static int ghes_notify_sci(struct notifier_block *this,
441				  unsigned long event, void *data)
442{
443	struct ghes *ghes;
444	int ret = NOTIFY_DONE;
445
446	rcu_read_lock();
447	list_for_each_entry_rcu(ghes, &ghes_sci, list) {
448		if (!ghes_proc(ghes))
449			ret = NOTIFY_OK;
450	}
451	rcu_read_unlock();
452
453	return ret;
454}
455
456static int ghes_notify_nmi(struct notifier_block *this,
457				  unsigned long cmd, void *data)
458{
459	struct ghes *ghes, *ghes_global = NULL;
460	int sev, sev_global = -1;
461	int ret = NOTIFY_DONE;
462
463	if (cmd != DIE_NMI)
464		return ret;
465
466	raw_spin_lock(&ghes_nmi_lock);
467	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
468		if (ghes_read_estatus(ghes, 1)) {
469			ghes_clear_estatus(ghes);
470			continue;
471		}
472		sev = ghes_severity(ghes->estatus->error_severity);
473		if (sev > sev_global) {
474			sev_global = sev;
475			ghes_global = ghes;
476		}
477		ret = NOTIFY_STOP;
478	}
479
480	if (ret == NOTIFY_DONE)
481		goto out;
482
483	if (sev_global >= GHES_SEV_PANIC) {
484		oops_begin();
485		__ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
486		/* reboot to log the error! */
487		if (panic_timeout == 0)
488			panic_timeout = ghes_panic_timeout;
489		panic("Fatal hardware error!");
490	}
491
492	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
493		if (!(ghes->flags & GHES_TO_CLEAR))
494			continue;
495		/* Do not print estatus because printk is not NMI safe */
496		ghes_do_proc(ghes);
497		ghes_clear_estatus(ghes);
498	}
499
500out:
501	raw_spin_unlock(&ghes_nmi_lock);
502	return ret;
503}
504
505static struct notifier_block ghes_notifier_sci = {
506	.notifier_call = ghes_notify_sci,
507};
508
509static struct notifier_block ghes_notifier_nmi = {
510	.notifier_call = ghes_notify_nmi,
511};
512
513static int __devinit ghes_probe(struct platform_device *ghes_dev)
514{
515	struct acpi_hest_generic *generic;
516	struct ghes *ghes = NULL;
517	int rc = -EINVAL;
518
519	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
520	if (!generic->enabled)
521		return -ENODEV;
522
523	switch (generic->notify.type) {
524	case ACPI_HEST_NOTIFY_POLLED:
525	case ACPI_HEST_NOTIFY_EXTERNAL:
526	case ACPI_HEST_NOTIFY_SCI:
527	case ACPI_HEST_NOTIFY_NMI:
528		break;
529	case ACPI_HEST_NOTIFY_LOCAL:
530		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
531			   generic->header.source_id);
532		goto err;
533	default:
534		pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
535			   generic->notify.type, generic->header.source_id);
536		goto err;
537	}
538
539	rc = -EIO;
540	if (generic->error_block_length <
541	    sizeof(struct acpi_hest_generic_status)) {
542		pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
543			   generic->error_block_length,
544			   generic->header.source_id);
545		goto err;
546	}
547	ghes = ghes_new(generic);
548	if (IS_ERR(ghes)) {
549		rc = PTR_ERR(ghes);
550		ghes = NULL;
551		goto err;
552	}
553	switch (generic->notify.type) {
554	case ACPI_HEST_NOTIFY_POLLED:
555		ghes->timer.function = ghes_poll_func;
556		ghes->timer.data = (unsigned long)ghes;
557		init_timer_deferrable(&ghes->timer);
558		ghes_add_timer(ghes);
559		break;
560	case ACPI_HEST_NOTIFY_EXTERNAL:
561		/* External interrupt vector is GSI */
562		if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
563			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
564			       generic->header.source_id);
565			goto err;
566		}
567		if (request_irq(ghes->irq, ghes_irq_func,
568				0, "GHES IRQ", ghes)) {
569			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
570			       generic->header.source_id);
571			goto err;
572		}
573		break;
574	case ACPI_HEST_NOTIFY_SCI:
575		mutex_lock(&ghes_list_mutex);
576		if (list_empty(&ghes_sci))
577			register_acpi_hed_notifier(&ghes_notifier_sci);
578		list_add_rcu(&ghes->list, &ghes_sci);
579		mutex_unlock(&ghes_list_mutex);
580		break;
581	case ACPI_HEST_NOTIFY_NMI:
582		mutex_lock(&ghes_list_mutex);
583		if (list_empty(&ghes_nmi))
584			register_die_notifier(&ghes_notifier_nmi);
585		list_add_rcu(&ghes->list, &ghes_nmi);
586		mutex_unlock(&ghes_list_mutex);
587		break;
588	default:
589		BUG();
590	}
591	platform_set_drvdata(ghes_dev, ghes);
592
593	return 0;
594err:
595	if (ghes) {
596		ghes_fini(ghes);
597		kfree(ghes);
598	}
599	return rc;
600}
601
602static int __devexit ghes_remove(struct platform_device *ghes_dev)
603{
604	struct ghes *ghes;
605	struct acpi_hest_generic *generic;
606
607	ghes = platform_get_drvdata(ghes_dev);
608	generic = ghes->generic;
609
610	ghes->flags |= GHES_EXITING;
611	switch (generic->notify.type) {
612	case ACPI_HEST_NOTIFY_POLLED:
613		del_timer_sync(&ghes->timer);
614		break;
615	case ACPI_HEST_NOTIFY_EXTERNAL:
616		free_irq(ghes->irq, ghes);
617		break;
618	case ACPI_HEST_NOTIFY_SCI:
619		mutex_lock(&ghes_list_mutex);
620		list_del_rcu(&ghes->list);
621		if (list_empty(&ghes_sci))
622			unregister_acpi_hed_notifier(&ghes_notifier_sci);
623		mutex_unlock(&ghes_list_mutex);
624		break;
625	case ACPI_HEST_NOTIFY_NMI:
626		mutex_lock(&ghes_list_mutex);
627		list_del_rcu(&ghes->list);
628		if (list_empty(&ghes_nmi))
629			unregister_die_notifier(&ghes_notifier_nmi);
630		mutex_unlock(&ghes_list_mutex);
631		/*
632		 * To synchronize with NMI handler, ghes can only be
633		 * freed after NMI handler finishes.
634		 */
635		synchronize_rcu();
636		break;
637	default:
638		BUG();
639		break;
640	}
641
642	ghes_fini(ghes);
643	kfree(ghes);
644
645	platform_set_drvdata(ghes_dev, NULL);
646
647	return 0;
648}
649
650static struct platform_driver ghes_platform_driver = {
651	.driver		= {
652		.name	= "GHES",
653		.owner	= THIS_MODULE,
654	},
655	.probe		= ghes_probe,
656	.remove		= ghes_remove,
657};
658
659static int __init ghes_init(void)
660{
661	int rc;
662
663	if (acpi_disabled)
664		return -ENODEV;
665
666	if (hest_disable) {
667		pr_info(GHES_PFX "HEST is not enabled!\n");
668		return -EINVAL;
669	}
670
671	if (ghes_disable) {
672		pr_info(GHES_PFX "GHES is not enabled!\n");
673		return -EINVAL;
674	}
675
676	rc = ghes_ioremap_init();
677	if (rc)
678		goto err;
679
680	rc = platform_driver_register(&ghes_platform_driver);
681	if (rc)
682		goto err_ioremap_exit;
683
684	return 0;
685err_ioremap_exit:
686	ghes_ioremap_exit();
687err:
688	return rc;
689}
690
691static void __exit ghes_exit(void)
692{
693	platform_driver_unregister(&ghes_platform_driver);
694	ghes_ioremap_exit();
695}
696
697module_init(ghes_init);
698module_exit(ghes_exit);
699
700MODULE_AUTHOR("Huang Ying");
701MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
702MODULE_LICENSE("GPL");
703MODULE_ALIAS("platform:GHES");
704