1/*
2 * MCE grading rules.
3 * Copyright 2008, 2009 Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
9 *
10 * Author: Andi Kleen
11 */
12#include <linux/kernel.h>
13#include <linux/seq_file.h>
14#include <linux/init.h>
15#include <linux/debugfs.h>
16#include <asm/mce.h>
17
18#include "mce-internal.h"
19
20/*
21 * Grade an mce by severity. In general the most severe ones are processed
22 * first. Since there are quite a lot of combinations test the bits in a
23 * table-driven way. The rules are simply processed in order, first
24 * match wins.
25 *
26 * Note this is only used for machine check exceptions, the corrected
27 * errors use much simpler rules. The exceptions still check for the corrected
28 * errors, but only to leave them alone for the CMCI handler (except for
29 * panic situations)
30 */
31
32enum context { IN_KERNEL = 1, IN_USER = 2 };
33enum ser { SER_REQUIRED = 1, NO_SER = 2 };
34
35static struct severity {
36	u64 mask;
37	u64 result;
38	unsigned char sev;
39	unsigned char mcgmask;
40	unsigned char mcgres;
41	unsigned char ser;
42	unsigned char context;
43	unsigned char covered;
44	char *msg;
45} severities[] = {
46#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
47#define  KERNEL		.context = IN_KERNEL
48#define  USER		.context = IN_USER
49#define  SER		.ser = SER_REQUIRED
50#define  NOSER		.ser = NO_SER
51#define  BITCLR(x)	.mask = x, .result = 0
52#define  BITSET(x)	.mask = x, .result = x
53#define  MCGMASK(x, y)	.mcgmask = x, .mcgres = y
54#define  MASK(x, y)	.mask = x, .result = y
55#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
56#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
57#define	MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
58
59	MCESEV(
60		NO, "Invalid",
61		BITCLR(MCI_STATUS_VAL)
62		),
63	MCESEV(
64		NO, "Not enabled",
65		BITCLR(MCI_STATUS_EN)
66		),
67	MCESEV(
68		PANIC, "Processor context corrupt",
69		BITSET(MCI_STATUS_PCC)
70		),
71	/* When MCIP is not set something is very confused */
72	MCESEV(
73		PANIC, "MCIP not set in MCA handler",
74		MCGMASK(MCG_STATUS_MCIP, 0)
75		),
76	/* Neither return not error IP -- no chance to recover -> PANIC */
77	MCESEV(
78		PANIC, "Neither restart nor error IP",
79		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
80		),
81	MCESEV(
82		PANIC, "In kernel and no restart IP",
83		KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
84		),
85	MCESEV(
86		KEEP, "Corrected error",
87		NOSER, BITCLR(MCI_STATUS_UC)
88		),
89
90	/* ignore OVER for UCNA */
91	MCESEV(
92		KEEP, "Uncorrected no action required",
93		SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
94		),
95	MCESEV(
96		PANIC, "Illegal combination (UCNA with AR=1)",
97		SER,
98		MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
99		),
100	MCESEV(
101		KEEP, "Non signalled machine check",
102		SER, BITCLR(MCI_STATUS_S)
103		),
104
105	MCESEV(
106		PANIC, "Action required with lost events",
107		SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
108		),
109
110	/* known AR MCACODs: */
111#ifdef	CONFIG_MEMORY_FAILURE
112	MCESEV(
113		KEEP, "Action required but unaffected thread is continuable",
114		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
115		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
116		),
117	MCESEV(
118		AR, "Action required: data load error in a user process",
119		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
120		USER
121		),
122	MCESEV(
123		AR, "Action required: instruction fetch error in a user process",
124		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
125		USER
126		),
127#endif
128	MCESEV(
129		PANIC, "Action required: unknown MCACOD",
130		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
131		),
132
133	/* known AO MCACODs: */
134	MCESEV(
135		AO, "Action optional: memory scrubbing error",
136		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
137		),
138	MCESEV(
139		AO, "Action optional: last level cache writeback error",
140		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
141		),
142	MCESEV(
143		SOME, "Action optional: unknown MCACOD",
144		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
145		),
146	MCESEV(
147		SOME, "Action optional with lost events",
148		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
149		),
150
151	MCESEV(
152		PANIC, "Overflowed uncorrected",
153		BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
154		),
155	MCESEV(
156		UC, "Uncorrected",
157		BITSET(MCI_STATUS_UC)
158		),
159	MCESEV(
160		SOME, "No match",
161		BITSET(0)
162		)	/* always matches. keep at end */
163};
164
165/*
166 * If mcgstatus indicated that ip/cs on the stack were
167 * no good, then "m->cs" will be zero and we will have
168 * to assume the worst case (IN_KERNEL) as we actually
169 * have no idea what we were executing when the machine
170 * check hit.
171 * If we do have a good "m->cs" (or a faked one in the
172 * case we were executing in VM86 mode) we can use it to
173 * distinguish an exception taken in user from from one
174 * taken in the kernel.
175 */
176static int error_context(struct mce *m)
177{
178	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
179}
180
181int mce_severity(struct mce *m, int tolerant, char **msg)
182{
183	enum context ctx = error_context(m);
184	struct severity *s;
185
186	for (s = severities;; s++) {
187		if ((m->status & s->mask) != s->result)
188			continue;
189		if ((m->mcgstatus & s->mcgmask) != s->mcgres)
190			continue;
191		if (s->ser == SER_REQUIRED && !mca_cfg.ser)
192			continue;
193		if (s->ser == NO_SER && mca_cfg.ser)
194			continue;
195		if (s->context && ctx != s->context)
196			continue;
197		if (msg)
198			*msg = s->msg;
199		s->covered = 1;
200		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
201			if (panic_on_oops || tolerant < 1)
202				return MCE_PANIC_SEVERITY;
203		}
204		return s->sev;
205	}
206}
207
208#ifdef CONFIG_DEBUG_FS
209static void *s_start(struct seq_file *f, loff_t *pos)
210{
211	if (*pos >= ARRAY_SIZE(severities))
212		return NULL;
213	return &severities[*pos];
214}
215
216static void *s_next(struct seq_file *f, void *data, loff_t *pos)
217{
218	if (++(*pos) >= ARRAY_SIZE(severities))
219		return NULL;
220	return &severities[*pos];
221}
222
223static void s_stop(struct seq_file *f, void *data)
224{
225}
226
227static int s_show(struct seq_file *f, void *data)
228{
229	struct severity *ser = data;
230	seq_printf(f, "%d\t%s\n", ser->covered, ser->msg);
231	return 0;
232}
233
234static const struct seq_operations severities_seq_ops = {
235	.start	= s_start,
236	.next	= s_next,
237	.stop	= s_stop,
238	.show	= s_show,
239};
240
241static int severities_coverage_open(struct inode *inode, struct file *file)
242{
243	return seq_open(file, &severities_seq_ops);
244}
245
246static ssize_t severities_coverage_write(struct file *file,
247					 const char __user *ubuf,
248					 size_t count, loff_t *ppos)
249{
250	int i;
251	for (i = 0; i < ARRAY_SIZE(severities); i++)
252		severities[i].covered = 0;
253	return count;
254}
255
256static const struct file_operations severities_coverage_fops = {
257	.open		= severities_coverage_open,
258	.release	= seq_release,
259	.read		= seq_read,
260	.write		= severities_coverage_write,
261	.llseek		= seq_lseek,
262};
263
264static int __init severities_debugfs_init(void)
265{
266	struct dentry *dmce, *fsev;
267
268	dmce = mce_get_debugfs_dir();
269	if (!dmce)
270		goto err_out;
271
272	fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
273				   &severities_coverage_fops);
274	if (!fsev)
275		goto err_out;
276
277	return 0;
278
279err_out:
280	return -ENOMEM;
281}
282late_initcall(severities_debugfs_init);
283#endif /* CONFIG_DEBUG_FS */
284