ghes.c revision 90ab5ee94171b3e28de6bb42ee30b527014e0be7
1/*
2 * APEI Generic Hardware Error Source support
3 *
4 * Generic Hardware Error Source provides a way to report platform
5 * hardware errors (such as that from chipset). It works in so called
6 * "Firmware First" mode, that is, hardware errors are reported to
7 * firmware firstly, then reported to Linux by firmware. This way,
8 * some non-standard hardware error registers or non-standard hardware
9 * link can be checked by firmware to produce more hardware error
10 * information for Linux.
11 *
12 * For more information about Generic Hardware Error Source, please
13 * refer to ACPI Specification version 4.0, section 17.3.2.6
14 *
15 * Copyright 2010,2011 Intel Corp.
16 *   Author: Huang Ying <ying.huang@intel.com>
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License version
20 * 2 as published by the Free Software Foundation;
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30 */
31
32#include <linux/kernel.h>
33#include <linux/module.h>
34#include <linux/init.h>
35#include <linux/acpi.h>
36#include <linux/io.h>
37#include <linux/interrupt.h>
38#include <linux/timer.h>
39#include <linux/cper.h>
40#include <linux/kdebug.h>
41#include <linux/platform_device.h>
42#include <linux/mutex.h>
43#include <linux/ratelimit.h>
44#include <linux/vmalloc.h>
45#include <linux/irq_work.h>
46#include <linux/llist.h>
47#include <linux/genalloc.h>
48#include <acpi/apei.h>
49#include <acpi/atomicio.h>
50#include <acpi/hed.h>
51#include <asm/mce.h>
52#include <asm/tlbflush.h>
53#include <asm/nmi.h>
54
55#include "apei-internal.h"
56
57#define GHES_PFX	"GHES: "
58
59#define GHES_ESTATUS_MAX_SIZE		65536
60#define GHES_ESOURCE_PREALLOC_MAX_SIZE	65536
61
62#define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
63
64/* This is just an estimation for memory pool allocation */
65#define GHES_ESTATUS_CACHE_AVG_SIZE	512
66
67#define GHES_ESTATUS_CACHES_SIZE	4
68
69#define GHES_ESTATUS_IN_CACHE_MAX_NSEC	10000000000ULL
70/* Prevent too many caches are allocated because of RCU */
71#define GHES_ESTATUS_CACHE_ALLOCED_MAX	(GHES_ESTATUS_CACHES_SIZE * 3 / 2)
72
73#define GHES_ESTATUS_CACHE_LEN(estatus_len)			\
74	(sizeof(struct ghes_estatus_cache) + (estatus_len))
75#define GHES_ESTATUS_FROM_CACHE(estatus_cache)			\
76	((struct acpi_hest_generic_status *)			\
77	 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
78
79#define GHES_ESTATUS_NODE_LEN(estatus_len)			\
80	(sizeof(struct ghes_estatus_node) + (estatus_len))
81#define GHES_ESTATUS_FROM_NODE(estatus_node)				\
82	((struct acpi_hest_generic_status *)				\
83	 ((struct ghes_estatus_node *)(estatus_node) + 1))
84
85/*
86 * One struct ghes is created for each generic hardware error source.
87 * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
88 * handler.
89 *
90 * estatus: memory buffer for error status block, allocated during
91 * HEST parsing.
92 */
93#define GHES_TO_CLEAR		0x0001
94#define GHES_EXITING		0x0002
95
96struct ghes {
97	struct acpi_hest_generic *generic;
98	struct acpi_hest_generic_status *estatus;
99	u64 buffer_paddr;
100	unsigned long flags;
101	union {
102		struct list_head list;
103		struct timer_list timer;
104		unsigned int irq;
105	};
106};
107
108struct ghes_estatus_node {
109	struct llist_node llnode;
110	struct acpi_hest_generic *generic;
111};
112
113struct ghes_estatus_cache {
114	u32 estatus_len;
115	atomic_t count;
116	struct acpi_hest_generic *generic;
117	unsigned long long time_in;
118	struct rcu_head rcu;
119};
120
121bool ghes_disable;
122module_param_named(disable, ghes_disable, bool, 0);
123
124static int ghes_panic_timeout	__read_mostly = 30;
125
126/*
127 * All error sources notified with SCI shares one notifier function,
128 * so they need to be linked and checked one by one.  This is applied
129 * to NMI too.
130 *
131 * RCU is used for these lists, so ghes_list_mutex is only used for
132 * list changing, not for traversing.
133 */
134static LIST_HEAD(ghes_sci);
135static LIST_HEAD(ghes_nmi);
136static DEFINE_MUTEX(ghes_list_mutex);
137
138/*
139 * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
140 * mutual exclusion.
141 */
142static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
143
144/*
145 * Because the memory area used to transfer hardware error information
146 * from BIOS to Linux can be determined only in NMI, IRQ or timer
147 * handler, but general ioremap can not be used in atomic context, so
148 * a special version of atomic ioremap is implemented for that.
149 */
150
151/*
152 * Two virtual pages are used, one for NMI context, the other for
153 * IRQ/PROCESS context
154 */
155#define GHES_IOREMAP_PAGES		2
156#define GHES_IOREMAP_NMI_PAGE(base)	(base)
157#define GHES_IOREMAP_IRQ_PAGE(base)	((base) + PAGE_SIZE)
158
159/* virtual memory area for atomic ioremap */
160static struct vm_struct *ghes_ioremap_area;
161/*
162 * These 2 spinlock is used to prevent atomic ioremap virtual memory
163 * area from being mapped simultaneously.
164 */
165static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
166static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
167
168/*
169 * printk is not safe in NMI context.  So in NMI handler, we allocate
170 * required memory from lock-less memory allocator
171 * (ghes_estatus_pool), save estatus into it, put them into lock-less
172 * list (ghes_estatus_llist), then delay printk into IRQ context via
173 * irq_work (ghes_proc_irq_work).  ghes_estatus_size_request record
174 * required pool size by all NMI error source.
175 */
176static struct gen_pool *ghes_estatus_pool;
177static unsigned long ghes_estatus_pool_size_request;
178static struct llist_head ghes_estatus_llist;
179static struct irq_work ghes_proc_irq_work;
180
181struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
182static atomic_t ghes_estatus_cache_alloced;
183
184static int ghes_ioremap_init(void)
185{
186	ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
187		VM_IOREMAP, VMALLOC_START, VMALLOC_END);
188	if (!ghes_ioremap_area) {
189		pr_err(GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
190		return -ENOMEM;
191	}
192
193	return 0;
194}
195
196static void ghes_ioremap_exit(void)
197{
198	free_vm_area(ghes_ioremap_area);
199}
200
201static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
202{
203	unsigned long vaddr;
204
205	vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
206	ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
207			   pfn << PAGE_SHIFT, PAGE_KERNEL);
208
209	return (void __iomem *)vaddr;
210}
211
212static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
213{
214	unsigned long vaddr;
215
216	vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
217	ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
218			   pfn << PAGE_SHIFT, PAGE_KERNEL);
219
220	return (void __iomem *)vaddr;
221}
222
223static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
224{
225	unsigned long vaddr = (unsigned long __force)vaddr_ptr;
226	void *base = ghes_ioremap_area->addr;
227
228	BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
229	unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
230	__flush_tlb_one(vaddr);
231}
232
233static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
234{
235	unsigned long vaddr = (unsigned long __force)vaddr_ptr;
236	void *base = ghes_ioremap_area->addr;
237
238	BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
239	unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
240	__flush_tlb_one(vaddr);
241}
242
243static int ghes_estatus_pool_init(void)
244{
245	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
246	if (!ghes_estatus_pool)
247		return -ENOMEM;
248	return 0;
249}
250
251static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool,
252					      struct gen_pool_chunk *chunk,
253					      void *data)
254{
255	free_page(chunk->start_addr);
256}
257
258static void ghes_estatus_pool_exit(void)
259{
260	gen_pool_for_each_chunk(ghes_estatus_pool,
261				ghes_estatus_pool_free_chunk_page, NULL);
262	gen_pool_destroy(ghes_estatus_pool);
263}
264
265static int ghes_estatus_pool_expand(unsigned long len)
266{
267	unsigned long i, pages, size, addr;
268	int ret;
269
270	ghes_estatus_pool_size_request += PAGE_ALIGN(len);
271	size = gen_pool_size(ghes_estatus_pool);
272	if (size >= ghes_estatus_pool_size_request)
273		return 0;
274	pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE;
275	for (i = 0; i < pages; i++) {
276		addr = __get_free_page(GFP_KERNEL);
277		if (!addr)
278			return -ENOMEM;
279		ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1);
280		if (ret)
281			return ret;
282	}
283
284	return 0;
285}
286
287static void ghes_estatus_pool_shrink(unsigned long len)
288{
289	ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
290}
291
292static struct ghes *ghes_new(struct acpi_hest_generic *generic)
293{
294	struct ghes *ghes;
295	unsigned int error_block_length;
296	int rc;
297
298	ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
299	if (!ghes)
300		return ERR_PTR(-ENOMEM);
301	ghes->generic = generic;
302	rc = acpi_pre_map_gar(&generic->error_status_address);
303	if (rc)
304		goto err_free;
305	error_block_length = generic->error_block_length;
306	if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
307		pr_warning(FW_WARN GHES_PFX
308			   "Error status block length is too long: %u for "
309			   "generic hardware error source: %d.\n",
310			   error_block_length, generic->header.source_id);
311		error_block_length = GHES_ESTATUS_MAX_SIZE;
312	}
313	ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
314	if (!ghes->estatus) {
315		rc = -ENOMEM;
316		goto err_unmap;
317	}
318
319	return ghes;
320
321err_unmap:
322	acpi_post_unmap_gar(&generic->error_status_address);
323err_free:
324	kfree(ghes);
325	return ERR_PTR(rc);
326}
327
328static void ghes_fini(struct ghes *ghes)
329{
330	kfree(ghes->estatus);
331	acpi_post_unmap_gar(&ghes->generic->error_status_address);
332}
333
334enum {
335	GHES_SEV_NO = 0x0,
336	GHES_SEV_CORRECTED = 0x1,
337	GHES_SEV_RECOVERABLE = 0x2,
338	GHES_SEV_PANIC = 0x3,
339};
340
341static inline int ghes_severity(int severity)
342{
343	switch (severity) {
344	case CPER_SEV_INFORMATIONAL:
345		return GHES_SEV_NO;
346	case CPER_SEV_CORRECTED:
347		return GHES_SEV_CORRECTED;
348	case CPER_SEV_RECOVERABLE:
349		return GHES_SEV_RECOVERABLE;
350	case CPER_SEV_FATAL:
351		return GHES_SEV_PANIC;
352	default:
353		/* Unknown, go panic */
354		return GHES_SEV_PANIC;
355	}
356}
357
358static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
359				  int from_phys)
360{
361	void __iomem *vaddr;
362	unsigned long flags = 0;
363	int in_nmi = in_nmi();
364	u64 offset;
365	u32 trunk;
366
367	while (len > 0) {
368		offset = paddr - (paddr & PAGE_MASK);
369		if (in_nmi) {
370			raw_spin_lock(&ghes_ioremap_lock_nmi);
371			vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
372		} else {
373			spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
374			vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
375		}
376		trunk = PAGE_SIZE - offset;
377		trunk = min(trunk, len);
378		if (from_phys)
379			memcpy_fromio(buffer, vaddr + offset, trunk);
380		else
381			memcpy_toio(vaddr + offset, buffer, trunk);
382		len -= trunk;
383		paddr += trunk;
384		buffer += trunk;
385		if (in_nmi) {
386			ghes_iounmap_nmi(vaddr);
387			raw_spin_unlock(&ghes_ioremap_lock_nmi);
388		} else {
389			ghes_iounmap_irq(vaddr);
390			spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
391		}
392	}
393}
394
395static int ghes_read_estatus(struct ghes *ghes, int silent)
396{
397	struct acpi_hest_generic *g = ghes->generic;
398	u64 buf_paddr;
399	u32 len;
400	int rc;
401
402	rc = acpi_atomic_read(&buf_paddr, &g->error_status_address);
403	if (rc) {
404		if (!silent && printk_ratelimit())
405			pr_warning(FW_WARN GHES_PFX
406"Failed to read error status block address for hardware error source: %d.\n",
407				   g->header.source_id);
408		return -EIO;
409	}
410	if (!buf_paddr)
411		return -ENOENT;
412
413	ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
414			      sizeof(*ghes->estatus), 1);
415	if (!ghes->estatus->block_status)
416		return -ENOENT;
417
418	ghes->buffer_paddr = buf_paddr;
419	ghes->flags |= GHES_TO_CLEAR;
420
421	rc = -EIO;
422	len = apei_estatus_len(ghes->estatus);
423	if (len < sizeof(*ghes->estatus))
424		goto err_read_block;
425	if (len > ghes->generic->error_block_length)
426		goto err_read_block;
427	if (apei_estatus_check_header(ghes->estatus))
428		goto err_read_block;
429	ghes_copy_tofrom_phys(ghes->estatus + 1,
430			      buf_paddr + sizeof(*ghes->estatus),
431			      len - sizeof(*ghes->estatus), 1);
432	if (apei_estatus_check(ghes->estatus))
433		goto err_read_block;
434	rc = 0;
435
436err_read_block:
437	if (rc && !silent && printk_ratelimit())
438		pr_warning(FW_WARN GHES_PFX
439			   "Failed to read error status block!\n");
440	return rc;
441}
442
443static void ghes_clear_estatus(struct ghes *ghes)
444{
445	ghes->estatus->block_status = 0;
446	if (!(ghes->flags & GHES_TO_CLEAR))
447		return;
448	ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
449			      sizeof(ghes->estatus->block_status), 0);
450	ghes->flags &= ~GHES_TO_CLEAR;
451}
452
453static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
454{
455	int sev, sec_sev;
456	struct acpi_hest_generic_data *gdata;
457
458	sev = ghes_severity(estatus->error_severity);
459	apei_estatus_for_each_section(estatus, gdata) {
460		sec_sev = ghes_severity(gdata->error_severity);
461		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
462				 CPER_SEC_PLATFORM_MEM)) {
463			struct cper_sec_mem_err *mem_err;
464			mem_err = (struct cper_sec_mem_err *)(gdata+1);
465#ifdef CONFIG_X86_MCE
466			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
467						  mem_err);
468#endif
469#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
470			if (sev == GHES_SEV_RECOVERABLE &&
471			    sec_sev == GHES_SEV_RECOVERABLE &&
472			    mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
473				unsigned long pfn;
474				pfn = mem_err->physical_addr >> PAGE_SHIFT;
475				memory_failure_queue(pfn, 0, 0);
476			}
477#endif
478		}
479	}
480}
481
482static void __ghes_print_estatus(const char *pfx,
483				 const struct acpi_hest_generic *generic,
484				 const struct acpi_hest_generic_status *estatus)
485{
486	if (pfx == NULL) {
487		if (ghes_severity(estatus->error_severity) <=
488		    GHES_SEV_CORRECTED)
489			pfx = KERN_WARNING HW_ERR;
490		else
491			pfx = KERN_ERR HW_ERR;
492	}
493	printk("%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
494	       pfx, generic->header.source_id);
495	apei_estatus_print(pfx, estatus);
496}
497
498static int ghes_print_estatus(const char *pfx,
499			      const struct acpi_hest_generic *generic,
500			      const struct acpi_hest_generic_status *estatus)
501{
502	/* Not more than 2 messages every 5 seconds */
503	static DEFINE_RATELIMIT_STATE(ratelimit_corrected, 5*HZ, 2);
504	static DEFINE_RATELIMIT_STATE(ratelimit_uncorrected, 5*HZ, 2);
505	struct ratelimit_state *ratelimit;
506
507	if (ghes_severity(estatus->error_severity) <= GHES_SEV_CORRECTED)
508		ratelimit = &ratelimit_corrected;
509	else
510		ratelimit = &ratelimit_uncorrected;
511	if (__ratelimit(ratelimit)) {
512		__ghes_print_estatus(pfx, generic, estatus);
513		return 1;
514	}
515	return 0;
516}
517
518/*
519 * GHES error status reporting throttle, to report more kinds of
520 * errors, instead of just most frequently occurred errors.
521 */
522static int ghes_estatus_cached(struct acpi_hest_generic_status *estatus)
523{
524	u32 len;
525	int i, cached = 0;
526	unsigned long long now;
527	struct ghes_estatus_cache *cache;
528	struct acpi_hest_generic_status *cache_estatus;
529
530	len = apei_estatus_len(estatus);
531	rcu_read_lock();
532	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
533		cache = rcu_dereference(ghes_estatus_caches[i]);
534		if (cache == NULL)
535			continue;
536		if (len != cache->estatus_len)
537			continue;
538		cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
539		if (memcmp(estatus, cache_estatus, len))
540			continue;
541		atomic_inc(&cache->count);
542		now = sched_clock();
543		if (now - cache->time_in < GHES_ESTATUS_IN_CACHE_MAX_NSEC)
544			cached = 1;
545		break;
546	}
547	rcu_read_unlock();
548	return cached;
549}
550
551static struct ghes_estatus_cache *ghes_estatus_cache_alloc(
552	struct acpi_hest_generic *generic,
553	struct acpi_hest_generic_status *estatus)
554{
555	int alloced;
556	u32 len, cache_len;
557	struct ghes_estatus_cache *cache;
558	struct acpi_hest_generic_status *cache_estatus;
559
560	alloced = atomic_add_return(1, &ghes_estatus_cache_alloced);
561	if (alloced > GHES_ESTATUS_CACHE_ALLOCED_MAX) {
562		atomic_dec(&ghes_estatus_cache_alloced);
563		return NULL;
564	}
565	len = apei_estatus_len(estatus);
566	cache_len = GHES_ESTATUS_CACHE_LEN(len);
567	cache = (void *)gen_pool_alloc(ghes_estatus_pool, cache_len);
568	if (!cache) {
569		atomic_dec(&ghes_estatus_cache_alloced);
570		return NULL;
571	}
572	cache_estatus = GHES_ESTATUS_FROM_CACHE(cache);
573	memcpy(cache_estatus, estatus, len);
574	cache->estatus_len = len;
575	atomic_set(&cache->count, 0);
576	cache->generic = generic;
577	cache->time_in = sched_clock();
578	return cache;
579}
580
581static void ghes_estatus_cache_free(struct ghes_estatus_cache *cache)
582{
583	u32 len;
584
585	len = apei_estatus_len(GHES_ESTATUS_FROM_CACHE(cache));
586	len = GHES_ESTATUS_CACHE_LEN(len);
587	gen_pool_free(ghes_estatus_pool, (unsigned long)cache, len);
588	atomic_dec(&ghes_estatus_cache_alloced);
589}
590
591static void ghes_estatus_cache_rcu_free(struct rcu_head *head)
592{
593	struct ghes_estatus_cache *cache;
594
595	cache = container_of(head, struct ghes_estatus_cache, rcu);
596	ghes_estatus_cache_free(cache);
597}
598
599static void ghes_estatus_cache_add(
600	struct acpi_hest_generic *generic,
601	struct acpi_hest_generic_status *estatus)
602{
603	int i, slot = -1, count;
604	unsigned long long now, duration, period, max_period = 0;
605	struct ghes_estatus_cache *cache, *slot_cache = NULL, *new_cache;
606
607	new_cache = ghes_estatus_cache_alloc(generic, estatus);
608	if (new_cache == NULL)
609		return;
610	rcu_read_lock();
611	now = sched_clock();
612	for (i = 0; i < GHES_ESTATUS_CACHES_SIZE; i++) {
613		cache = rcu_dereference(ghes_estatus_caches[i]);
614		if (cache == NULL) {
615			slot = i;
616			slot_cache = NULL;
617			break;
618		}
619		duration = now - cache->time_in;
620		if (duration >= GHES_ESTATUS_IN_CACHE_MAX_NSEC) {
621			slot = i;
622			slot_cache = cache;
623			break;
624		}
625		count = atomic_read(&cache->count);
626		period = duration;
627		do_div(period, (count + 1));
628		if (period > max_period) {
629			max_period = period;
630			slot = i;
631			slot_cache = cache;
632		}
633	}
634	/* new_cache must be put into array after its contents are written */
635	smp_wmb();
636	if (slot != -1 && cmpxchg(ghes_estatus_caches + slot,
637				  slot_cache, new_cache) == slot_cache) {
638		if (slot_cache)
639			call_rcu(&slot_cache->rcu, ghes_estatus_cache_rcu_free);
640	} else
641		ghes_estatus_cache_free(new_cache);
642	rcu_read_unlock();
643}
644
645static int ghes_proc(struct ghes *ghes)
646{
647	int rc;
648
649	rc = ghes_read_estatus(ghes, 0);
650	if (rc)
651		goto out;
652	if (!ghes_estatus_cached(ghes->estatus)) {
653		if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
654			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
655	}
656	ghes_do_proc(ghes->estatus);
657out:
658	ghes_clear_estatus(ghes);
659	return 0;
660}
661
662static void ghes_add_timer(struct ghes *ghes)
663{
664	struct acpi_hest_generic *g = ghes->generic;
665	unsigned long expire;
666
667	if (!g->notify.poll_interval) {
668		pr_warning(FW_WARN GHES_PFX "Poll interval is 0 for generic hardware error source: %d, disabled.\n",
669			   g->header.source_id);
670		return;
671	}
672	expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
673	ghes->timer.expires = round_jiffies_relative(expire);
674	add_timer(&ghes->timer);
675}
676
677static void ghes_poll_func(unsigned long data)
678{
679	struct ghes *ghes = (void *)data;
680
681	ghes_proc(ghes);
682	if (!(ghes->flags & GHES_EXITING))
683		ghes_add_timer(ghes);
684}
685
686static irqreturn_t ghes_irq_func(int irq, void *data)
687{
688	struct ghes *ghes = data;
689	int rc;
690
691	rc = ghes_proc(ghes);
692	if (rc)
693		return IRQ_NONE;
694
695	return IRQ_HANDLED;
696}
697
698static int ghes_notify_sci(struct notifier_block *this,
699				  unsigned long event, void *data)
700{
701	struct ghes *ghes;
702	int ret = NOTIFY_DONE;
703
704	rcu_read_lock();
705	list_for_each_entry_rcu(ghes, &ghes_sci, list) {
706		if (!ghes_proc(ghes))
707			ret = NOTIFY_OK;
708	}
709	rcu_read_unlock();
710
711	return ret;
712}
713
714static void ghes_proc_in_irq(struct irq_work *irq_work)
715{
716	struct llist_node *llnode, *next, *tail = NULL;
717	struct ghes_estatus_node *estatus_node;
718	struct acpi_hest_generic *generic;
719	struct acpi_hest_generic_status *estatus;
720	u32 len, node_len;
721
722	/*
723	 * Because the time order of estatus in list is reversed,
724	 * revert it back to proper order.
725	 */
726	llnode = llist_del_all(&ghes_estatus_llist);
727	while (llnode) {
728		next = llnode->next;
729		llnode->next = tail;
730		tail = llnode;
731		llnode = next;
732	}
733	llnode = tail;
734	while (llnode) {
735		next = llnode->next;
736		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
737					   llnode);
738		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
739		len = apei_estatus_len(estatus);
740		node_len = GHES_ESTATUS_NODE_LEN(len);
741		ghes_do_proc(estatus);
742		if (!ghes_estatus_cached(estatus)) {
743			generic = estatus_node->generic;
744			if (ghes_print_estatus(NULL, generic, estatus))
745				ghes_estatus_cache_add(generic, estatus);
746		}
747		gen_pool_free(ghes_estatus_pool, (unsigned long)estatus_node,
748			      node_len);
749		llnode = next;
750	}
751}
752
753static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
754{
755	struct ghes *ghes, *ghes_global = NULL;
756	int sev, sev_global = -1;
757	int ret = NMI_DONE;
758
759	raw_spin_lock(&ghes_nmi_lock);
760	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
761		if (ghes_read_estatus(ghes, 1)) {
762			ghes_clear_estatus(ghes);
763			continue;
764		}
765		sev = ghes_severity(ghes->estatus->error_severity);
766		if (sev > sev_global) {
767			sev_global = sev;
768			ghes_global = ghes;
769		}
770		ret = NMI_HANDLED;
771	}
772
773	if (ret == NMI_DONE)
774		goto out;
775
776	if (sev_global >= GHES_SEV_PANIC) {
777		oops_begin();
778		__ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global->generic,
779				     ghes_global->estatus);
780		/* reboot to log the error! */
781		if (panic_timeout == 0)
782			panic_timeout = ghes_panic_timeout;
783		panic("Fatal hardware error!");
784	}
785
786	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
787#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
788		u32 len, node_len;
789		struct ghes_estatus_node *estatus_node;
790		struct acpi_hest_generic_status *estatus;
791#endif
792		if (!(ghes->flags & GHES_TO_CLEAR))
793			continue;
794#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
795		if (ghes_estatus_cached(ghes->estatus))
796			goto next;
797		/* Save estatus for further processing in IRQ context */
798		len = apei_estatus_len(ghes->estatus);
799		node_len = GHES_ESTATUS_NODE_LEN(len);
800		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
801						      node_len);
802		if (estatus_node) {
803			estatus_node->generic = ghes->generic;
804			estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
805			memcpy(estatus, ghes->estatus, len);
806			llist_add(&estatus_node->llnode, &ghes_estatus_llist);
807		}
808next:
809#endif
810		ghes_clear_estatus(ghes);
811	}
812#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
813	irq_work_queue(&ghes_proc_irq_work);
814#endif
815
816out:
817	raw_spin_unlock(&ghes_nmi_lock);
818	return ret;
819}
820
821static struct notifier_block ghes_notifier_sci = {
822	.notifier_call = ghes_notify_sci,
823};
824
825static unsigned long ghes_esource_prealloc_size(
826	const struct acpi_hest_generic *generic)
827{
828	unsigned long block_length, prealloc_records, prealloc_size;
829
830	block_length = min_t(unsigned long, generic->error_block_length,
831			     GHES_ESTATUS_MAX_SIZE);
832	prealloc_records = max_t(unsigned long,
833				 generic->records_to_preallocate, 1);
834	prealloc_size = min_t(unsigned long, block_length * prealloc_records,
835			      GHES_ESOURCE_PREALLOC_MAX_SIZE);
836
837	return prealloc_size;
838}
839
840static int __devinit ghes_probe(struct platform_device *ghes_dev)
841{
842	struct acpi_hest_generic *generic;
843	struct ghes *ghes = NULL;
844	unsigned long len;
845	int rc = -EINVAL;
846
847	generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data;
848	if (!generic->enabled)
849		return -ENODEV;
850
851	switch (generic->notify.type) {
852	case ACPI_HEST_NOTIFY_POLLED:
853	case ACPI_HEST_NOTIFY_EXTERNAL:
854	case ACPI_HEST_NOTIFY_SCI:
855	case ACPI_HEST_NOTIFY_NMI:
856		break;
857	case ACPI_HEST_NOTIFY_LOCAL:
858		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
859			   generic->header.source_id);
860		goto err;
861	default:
862		pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n",
863			   generic->notify.type, generic->header.source_id);
864		goto err;
865	}
866
867	rc = -EIO;
868	if (generic->error_block_length <
869	    sizeof(struct acpi_hest_generic_status)) {
870		pr_warning(FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware error source: %d\n",
871			   generic->error_block_length,
872			   generic->header.source_id);
873		goto err;
874	}
875	ghes = ghes_new(generic);
876	if (IS_ERR(ghes)) {
877		rc = PTR_ERR(ghes);
878		ghes = NULL;
879		goto err;
880	}
881	switch (generic->notify.type) {
882	case ACPI_HEST_NOTIFY_POLLED:
883		ghes->timer.function = ghes_poll_func;
884		ghes->timer.data = (unsigned long)ghes;
885		init_timer_deferrable(&ghes->timer);
886		ghes_add_timer(ghes);
887		break;
888	case ACPI_HEST_NOTIFY_EXTERNAL:
889		/* External interrupt vector is GSI */
890		if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
891			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
892			       generic->header.source_id);
893			goto err;
894		}
895		if (request_irq(ghes->irq, ghes_irq_func,
896				0, "GHES IRQ", ghes)) {
897			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
898			       generic->header.source_id);
899			goto err;
900		}
901		break;
902	case ACPI_HEST_NOTIFY_SCI:
903		mutex_lock(&ghes_list_mutex);
904		if (list_empty(&ghes_sci))
905			register_acpi_hed_notifier(&ghes_notifier_sci);
906		list_add_rcu(&ghes->list, &ghes_sci);
907		mutex_unlock(&ghes_list_mutex);
908		break;
909	case ACPI_HEST_NOTIFY_NMI:
910		len = ghes_esource_prealloc_size(generic);
911		ghes_estatus_pool_expand(len);
912		mutex_lock(&ghes_list_mutex);
913		if (list_empty(&ghes_nmi))
914			register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
915						"ghes");
916		list_add_rcu(&ghes->list, &ghes_nmi);
917		mutex_unlock(&ghes_list_mutex);
918		break;
919	default:
920		BUG();
921	}
922	platform_set_drvdata(ghes_dev, ghes);
923
924	return 0;
925err:
926	if (ghes) {
927		ghes_fini(ghes);
928		kfree(ghes);
929	}
930	return rc;
931}
932
933static int __devexit ghes_remove(struct platform_device *ghes_dev)
934{
935	struct ghes *ghes;
936	struct acpi_hest_generic *generic;
937	unsigned long len;
938
939	ghes = platform_get_drvdata(ghes_dev);
940	generic = ghes->generic;
941
942	ghes->flags |= GHES_EXITING;
943	switch (generic->notify.type) {
944	case ACPI_HEST_NOTIFY_POLLED:
945		del_timer_sync(&ghes->timer);
946		break;
947	case ACPI_HEST_NOTIFY_EXTERNAL:
948		free_irq(ghes->irq, ghes);
949		break;
950	case ACPI_HEST_NOTIFY_SCI:
951		mutex_lock(&ghes_list_mutex);
952		list_del_rcu(&ghes->list);
953		if (list_empty(&ghes_sci))
954			unregister_acpi_hed_notifier(&ghes_notifier_sci);
955		mutex_unlock(&ghes_list_mutex);
956		break;
957	case ACPI_HEST_NOTIFY_NMI:
958		mutex_lock(&ghes_list_mutex);
959		list_del_rcu(&ghes->list);
960		if (list_empty(&ghes_nmi))
961			unregister_nmi_handler(NMI_LOCAL, "ghes");
962		mutex_unlock(&ghes_list_mutex);
963		/*
964		 * To synchronize with NMI handler, ghes can only be
965		 * freed after NMI handler finishes.
966		 */
967		synchronize_rcu();
968		len = ghes_esource_prealloc_size(generic);
969		ghes_estatus_pool_shrink(len);
970		break;
971	default:
972		BUG();
973		break;
974	}
975
976	ghes_fini(ghes);
977	kfree(ghes);
978
979	platform_set_drvdata(ghes_dev, NULL);
980
981	return 0;
982}
983
984static struct platform_driver ghes_platform_driver = {
985	.driver		= {
986		.name	= "GHES",
987		.owner	= THIS_MODULE,
988	},
989	.probe		= ghes_probe,
990	.remove		= ghes_remove,
991};
992
993static int __init ghes_init(void)
994{
995	int rc;
996
997	if (acpi_disabled)
998		return -ENODEV;
999
1000	if (hest_disable) {
1001		pr_info(GHES_PFX "HEST is not enabled!\n");
1002		return -EINVAL;
1003	}
1004
1005	if (ghes_disable) {
1006		pr_info(GHES_PFX "GHES is not enabled!\n");
1007		return -EINVAL;
1008	}
1009
1010	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1011
1012	rc = ghes_ioremap_init();
1013	if (rc)
1014		goto err;
1015
1016	rc = ghes_estatus_pool_init();
1017	if (rc)
1018		goto err_ioremap_exit;
1019
1020	rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
1021				      GHES_ESTATUS_CACHE_ALLOCED_MAX);
1022	if (rc)
1023		goto err_pool_exit;
1024
1025	rc = platform_driver_register(&ghes_platform_driver);
1026	if (rc)
1027		goto err_pool_exit;
1028
1029	rc = apei_osc_setup();
1030	if (rc == 0 && osc_sb_apei_support_acked)
1031		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1032	else if (rc == 0 && !osc_sb_apei_support_acked)
1033		pr_info(GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1034	else if (rc && osc_sb_apei_support_acked)
1035		pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1036	else
1037		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
1038
1039	return 0;
1040err_pool_exit:
1041	ghes_estatus_pool_exit();
1042err_ioremap_exit:
1043	ghes_ioremap_exit();
1044err:
1045	return rc;
1046}
1047
1048static void __exit ghes_exit(void)
1049{
1050	platform_driver_unregister(&ghes_platform_driver);
1051	ghes_estatus_pool_exit();
1052	ghes_ioremap_exit();
1053}
1054
1055module_init(ghes_init);
1056module_exit(ghes_exit);
1057
1058MODULE_AUTHOR("Huang Ying");
1059MODULE_DESCRIPTION("APEI Generic Hardware Error Source support");
1060MODULE_LICENSE("GPL");
1061MODULE_ALIAS("platform:GHES");
1062