edac_mc.c revision d3d09e18203dba16a9dbdb2b4cc673d90748cdd1
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <linux/bitops.h>
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h"
35#include "edac_module.h"
36
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
41/* lock to memory controller's control array */
42static DEFINE_MUTEX(mem_ctls_mutex);
43static LIST_HEAD(mc_devices);
44
45unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
46			         unsigned len)
47{
48	struct mem_ctl_info *mci = dimm->mci;
49	int i, n, count = 0;
50	char *p = buf;
51
52	for (i = 0; i < mci->n_layers; i++) {
53		n = snprintf(p, len, "%s %d ",
54			      edac_layer_name[mci->layers[i].type],
55			      dimm->location[i]);
56		p += n;
57		len -= n;
58		count += n;
59		if (!len)
60			break;
61	}
62
63	return count;
64}
65
66#ifdef CONFIG_EDAC_DEBUG
67
68static void edac_mc_dump_channel(struct rank_info *chan)
69{
70	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
71	edac_dbg(4, "    channel = %p\n", chan);
72	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
73	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
74}
75
76static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
77{
78	char location[80];
79
80	edac_dimm_info_location(dimm, location, sizeof(location));
81
82	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
83		 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
84		 number, location, dimm->csrow, dimm->cschannel);
85	edac_dbg(4, "  dimm = %p\n", dimm);
86	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
87	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
88	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
89	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
90}
91
92static void edac_mc_dump_csrow(struct csrow_info *csrow)
93{
94	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
95	edac_dbg(4, "  csrow = %p\n", csrow);
96	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
97	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
98	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
99	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
100	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
101	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
102}
103
104static void edac_mc_dump_mci(struct mem_ctl_info *mci)
105{
106	edac_dbg(3, "\tmci = %p\n", mci);
107	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
108	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
109	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
110	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
111	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
112		 mci->nr_csrows, mci->csrows);
113	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
114		 mci->tot_dimms, mci->dimms);
115	edac_dbg(3, "\tdev = %p\n", mci->pdev);
116	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
117		 mci->mod_name, mci->ctl_name);
118	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
119}
120
121#endif				/* CONFIG_EDAC_DEBUG */
122
123/*
124 * keep those in sync with the enum mem_type
125 */
126const char *edac_mem_types[] = {
127	"Empty csrow",
128	"Reserved csrow type",
129	"Unknown csrow type",
130	"Fast page mode RAM",
131	"Extended data out RAM",
132	"Burst Extended data out RAM",
133	"Single data rate SDRAM",
134	"Registered single data rate SDRAM",
135	"Double data rate SDRAM",
136	"Registered Double data rate SDRAM",
137	"Rambus DRAM",
138	"Unbuffered DDR2 RAM",
139	"Fully buffered DDR2",
140	"Registered DDR2 RAM",
141	"Rambus XDR",
142	"Unbuffered DDR3 RAM",
143	"Registered DDR3 RAM",
144};
145EXPORT_SYMBOL_GPL(edac_mem_types);
146
147/**
148 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
149 * @p:		pointer to a pointer with the memory offset to be used. At
150 *		return, this will be incremented to point to the next offset
151 * @size:	Size of the data structure to be reserved
152 * @n_elems:	Number of elements that should be reserved
153 *
154 * If 'size' is a constant, the compiler will optimize this whole function
155 * down to either a no-op or the addition of a constant to the value of '*p'.
156 *
157 * The 'p' pointer is absolutely needed to keep the proper advancing
158 * further in memory to the proper offsets when allocating the struct along
159 * with its embedded structs, as edac_device_alloc_ctl_info() does it
160 * above, for example.
161 *
162 * At return, the pointer 'p' will be incremented to be used on a next call
163 * to this function.
164 */
165void *edac_align_ptr(void **p, unsigned size, int n_elems)
166{
167	unsigned align, r;
168	void *ptr = *p;
169
170	*p += size * n_elems;
171
172	/*
173	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
174	 * 'size'.  Adjust 'p' so that its alignment is at least as
175	 * stringent as what the compiler would provide for X and return
176	 * the aligned result.
177	 * Here we assume that the alignment of a "long long" is the most
178	 * stringent alignment that the compiler will ever provide by default.
179	 * As far as I know, this is a reasonable assumption.
180	 */
181	if (size > sizeof(long))
182		align = sizeof(long long);
183	else if (size > sizeof(int))
184		align = sizeof(long);
185	else if (size > sizeof(short))
186		align = sizeof(int);
187	else if (size > sizeof(char))
188		align = sizeof(short);
189	else
190		return (char *)ptr;
191
192	r = (unsigned long)p % align;
193
194	if (r == 0)
195		return (char *)ptr;
196
197	*p += align - r;
198
199	return (void *)(((unsigned long)ptr) + align - r);
200}
201
202static void _edac_mc_free(struct mem_ctl_info *mci)
203{
204	int i, chn, row;
205	struct csrow_info *csr;
206	const unsigned int tot_dimms = mci->tot_dimms;
207	const unsigned int tot_channels = mci->num_cschannel;
208	const unsigned int tot_csrows = mci->nr_csrows;
209
210	if (mci->dimms) {
211		for (i = 0; i < tot_dimms; i++)
212			kfree(mci->dimms[i]);
213		kfree(mci->dimms);
214	}
215	if (mci->csrows) {
216		for (row = 0; row < tot_csrows; row++) {
217			csr = mci->csrows[row];
218			if (csr) {
219				if (csr->channels) {
220					for (chn = 0; chn < tot_channels; chn++)
221						kfree(csr->channels[chn]);
222					kfree(csr->channels);
223				}
224				kfree(csr);
225			}
226		}
227		kfree(mci->csrows);
228	}
229	kfree(mci);
230}
231
232/**
233 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
234 * @mc_num:		Memory controller number
235 * @n_layers:		Number of MC hierarchy layers
236 * layers:		Describes each layer as seen by the Memory Controller
237 * @size_pvt:		size of private storage needed
238 *
239 *
240 * Everything is kmalloc'ed as one big chunk - more efficient.
241 * Only can be used if all structures have the same lifetime - otherwise
242 * you have to allocate and initialize your own structures.
243 *
244 * Use edac_mc_free() to free mc structures allocated by this function.
245 *
246 * NOTE: drivers handle multi-rank memories in different ways: in some
247 * drivers, one multi-rank memory stick is mapped as one entry, while, in
248 * others, a single multi-rank memory stick would be mapped into several
249 * entries. Currently, this function will allocate multiple struct dimm_info
250 * on such scenarios, as grouping the multiple ranks require drivers change.
251 *
252 * Returns:
253 *	On failure: NULL
254 *	On success: struct mem_ctl_info pointer
255 */
256struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
257				   unsigned n_layers,
258				   struct edac_mc_layer *layers,
259				   unsigned sz_pvt)
260{
261	struct mem_ctl_info *mci;
262	struct edac_mc_layer *layer;
263	struct csrow_info *csr;
264	struct rank_info *chan;
265	struct dimm_info *dimm;
266	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
267	unsigned pos[EDAC_MAX_LAYERS];
268	unsigned size, tot_dimms = 1, count = 1;
269	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
270	void *pvt, *p, *ptr = NULL;
271	int i, j, row, chn, n, len, off;
272	bool per_rank = false;
273
274	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
275	/*
276	 * Calculate the total amount of dimms and csrows/cschannels while
277	 * in the old API emulation mode
278	 */
279	for (i = 0; i < n_layers; i++) {
280		tot_dimms *= layers[i].size;
281		if (layers[i].is_virt_csrow)
282			tot_csrows *= layers[i].size;
283		else
284			tot_channels *= layers[i].size;
285
286		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
287			per_rank = true;
288	}
289
290	/* Figure out the offsets of the various items from the start of an mc
291	 * structure.  We want the alignment of each item to be at least as
292	 * stringent as what the compiler would provide if we could simply
293	 * hardcode everything into a single struct.
294	 */
295	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
296	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
297	for (i = 0; i < n_layers; i++) {
298		count *= layers[i].size;
299		edac_dbg(4, "errcount layer %d size %d\n", i, count);
300		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
301		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
302		tot_errcount += 2 * count;
303	}
304
305	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
306	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
307	size = ((unsigned long)pvt) + sz_pvt;
308
309	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
310		 size,
311		 tot_dimms,
312		 per_rank ? "ranks" : "dimms",
313		 tot_csrows * tot_channels);
314
315	mci = kzalloc(size, GFP_KERNEL);
316	if (mci == NULL)
317		return NULL;
318
319	/* Adjust pointers so they point within the memory we just allocated
320	 * rather than an imaginary chunk of memory located at address 0.
321	 */
322	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
323	for (i = 0; i < n_layers; i++) {
324		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
325		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
326	}
327	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
328
329	/* setup index and various internal pointers */
330	mci->mc_idx = mc_num;
331	mci->tot_dimms = tot_dimms;
332	mci->pvt_info = pvt;
333	mci->n_layers = n_layers;
334	mci->layers = layer;
335	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
336	mci->nr_csrows = tot_csrows;
337	mci->num_cschannel = tot_channels;
338	mci->mem_is_per_rank = per_rank;
339
340	/*
341	 * Alocate and fill the csrow/channels structs
342	 */
343	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
344	if (!mci->csrows)
345		goto error;
346	for (row = 0; row < tot_csrows; row++) {
347		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
348		if (!csr)
349			goto error;
350		mci->csrows[row] = csr;
351		csr->csrow_idx = row;
352		csr->mci = mci;
353		csr->nr_channels = tot_channels;
354		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
355					GFP_KERNEL);
356		if (!csr->channels)
357			goto error;
358
359		for (chn = 0; chn < tot_channels; chn++) {
360			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
361			if (!chan)
362				goto error;
363			csr->channels[chn] = chan;
364			chan->chan_idx = chn;
365			chan->csrow = csr;
366		}
367	}
368
369	/*
370	 * Allocate and fill the dimm structs
371	 */
372	mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
373	if (!mci->dimms)
374		goto error;
375
376	memset(&pos, 0, sizeof(pos));
377	row = 0;
378	chn = 0;
379	for (i = 0; i < tot_dimms; i++) {
380		chan = mci->csrows[row]->channels[chn];
381		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
382		if (off < 0 || off >= tot_dimms) {
383			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
384			goto error;
385		}
386
387		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
388		if (!dimm)
389			goto error;
390		mci->dimms[off] = dimm;
391		dimm->mci = mci;
392
393		/*
394		 * Copy DIMM location and initialize it.
395		 */
396		len = sizeof(dimm->label);
397		p = dimm->label;
398		n = snprintf(p, len, "mc#%u", mc_num);
399		p += n;
400		len -= n;
401		for (j = 0; j < n_layers; j++) {
402			n = snprintf(p, len, "%s#%u",
403				     edac_layer_name[layers[j].type],
404				     pos[j]);
405			p += n;
406			len -= n;
407			dimm->location[j] = pos[j];
408
409			if (len <= 0)
410				break;
411		}
412
413		/* Link it to the csrows old API data */
414		chan->dimm = dimm;
415		dimm->csrow = row;
416		dimm->cschannel = chn;
417
418		/* Increment csrow location */
419		if (layers[0].is_virt_csrow) {
420			chn++;
421			if (chn == tot_channels) {
422				chn = 0;
423				row++;
424			}
425		} else {
426			row++;
427			if (row == tot_csrows) {
428				row = 0;
429				chn++;
430			}
431		}
432
433		/* Increment dimm location */
434		for (j = n_layers - 1; j >= 0; j--) {
435			pos[j]++;
436			if (pos[j] < layers[j].size)
437				break;
438			pos[j] = 0;
439		}
440	}
441
442	mci->op_state = OP_ALLOC;
443
444	/* at this point, the root kobj is valid, and in order to
445	 * 'free' the object, then the function:
446	 *      edac_mc_unregister_sysfs_main_kobj() must be called
447	 * which will perform kobj unregistration and the actual free
448	 * will occur during the kobject callback operation
449	 */
450
451	return mci;
452
453error:
454	_edac_mc_free(mci);
455
456	return NULL;
457}
458EXPORT_SYMBOL_GPL(edac_mc_alloc);
459
460/**
461 * edac_mc_free
462 *	'Free' a previously allocated 'mci' structure
463 * @mci: pointer to a struct mem_ctl_info structure
464 */
465void edac_mc_free(struct mem_ctl_info *mci)
466{
467	edac_dbg(1, "\n");
468
469	/* If we're not yet registered with sysfs free only what was allocated
470	 * in edac_mc_alloc().
471	 */
472	if (!device_is_registered(&mci->dev)) {
473		_edac_mc_free(mci);
474		return;
475	}
476
477	/* the mci instance is freed here, when the sysfs object is dropped */
478	edac_unregister_sysfs(mci);
479}
480EXPORT_SYMBOL_GPL(edac_mc_free);
481
482
483/**
484 * find_mci_by_dev
485 *
486 *	scan list of controllers looking for the one that manages
487 *	the 'dev' device
488 * @dev: pointer to a struct device related with the MCI
489 */
490struct mem_ctl_info *find_mci_by_dev(struct device *dev)
491{
492	struct mem_ctl_info *mci;
493	struct list_head *item;
494
495	edac_dbg(3, "\n");
496
497	list_for_each(item, &mc_devices) {
498		mci = list_entry(item, struct mem_ctl_info, link);
499
500		if (mci->pdev == dev)
501			return mci;
502	}
503
504	return NULL;
505}
506EXPORT_SYMBOL_GPL(find_mci_by_dev);
507
508/*
509 * handler for EDAC to check if NMI type handler has asserted interrupt
510 */
511static int edac_mc_assert_error_check_and_clear(void)
512{
513	int old_state;
514
515	if (edac_op_state == EDAC_OPSTATE_POLL)
516		return 1;
517
518	old_state = edac_err_assert;
519	edac_err_assert = 0;
520
521	return old_state;
522}
523
524/*
525 * edac_mc_workq_function
526 *	performs the operation scheduled by a workq request
527 */
528static void edac_mc_workq_function(struct work_struct *work_req)
529{
530	struct delayed_work *d_work = to_delayed_work(work_req);
531	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
532
533	mutex_lock(&mem_ctls_mutex);
534
535	/* if this control struct has movd to offline state, we are done */
536	if (mci->op_state == OP_OFFLINE) {
537		mutex_unlock(&mem_ctls_mutex);
538		return;
539	}
540
541	/* Only poll controllers that are running polled and have a check */
542	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
543		mci->edac_check(mci);
544
545	mutex_unlock(&mem_ctls_mutex);
546
547	/* Reschedule */
548	queue_delayed_work(edac_workqueue, &mci->work,
549			msecs_to_jiffies(edac_mc_get_poll_msec()));
550}
551
552/*
553 * edac_mc_workq_setup
554 *	initialize a workq item for this mci
555 *	passing in the new delay period in msec
556 *
557 *	locking model:
558 *
559 *		called with the mem_ctls_mutex held
560 */
561static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
562{
563	edac_dbg(0, "\n");
564
565	/* if this instance is not in the POLL state, then simply return */
566	if (mci->op_state != OP_RUNNING_POLL)
567		return;
568
569	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
570	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
571}
572
573/*
574 * edac_mc_workq_teardown
575 *	stop the workq processing on this mci
576 *
577 *	locking model:
578 *
579 *		called WITHOUT lock held
580 */
581static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
582{
583	int status;
584
585	if (mci->op_state != OP_RUNNING_POLL)
586		return;
587
588	status = cancel_delayed_work(&mci->work);
589	if (status == 0) {
590		edac_dbg(0, "not canceled, flush the queue\n");
591
592		/* workq instance might be running, wait for it */
593		flush_workqueue(edac_workqueue);
594	}
595}
596
597/*
598 * edac_mc_reset_delay_period(unsigned long value)
599 *
600 *	user space has updated our poll period value, need to
601 *	reset our workq delays
602 */
603void edac_mc_reset_delay_period(int value)
604{
605	struct mem_ctl_info *mci;
606	struct list_head *item;
607
608	mutex_lock(&mem_ctls_mutex);
609
610	list_for_each(item, &mc_devices) {
611		mci = list_entry(item, struct mem_ctl_info, link);
612
613		edac_mc_workq_setup(mci, (unsigned long) value);
614	}
615
616	mutex_unlock(&mem_ctls_mutex);
617}
618
619
620
621/* Return 0 on success, 1 on failure.
622 * Before calling this function, caller must
623 * assign a unique value to mci->mc_idx.
624 *
625 *	locking model:
626 *
627 *		called with the mem_ctls_mutex lock held
628 */
629static int add_mc_to_global_list(struct mem_ctl_info *mci)
630{
631	struct list_head *item, *insert_before;
632	struct mem_ctl_info *p;
633
634	insert_before = &mc_devices;
635
636	p = find_mci_by_dev(mci->pdev);
637	if (unlikely(p != NULL))
638		goto fail0;
639
640	list_for_each(item, &mc_devices) {
641		p = list_entry(item, struct mem_ctl_info, link);
642
643		if (p->mc_idx >= mci->mc_idx) {
644			if (unlikely(p->mc_idx == mci->mc_idx))
645				goto fail1;
646
647			insert_before = item;
648			break;
649		}
650	}
651
652	list_add_tail_rcu(&mci->link, insert_before);
653	atomic_inc(&edac_handlers);
654	return 0;
655
656fail0:
657	edac_printk(KERN_WARNING, EDAC_MC,
658		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
659		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
660	return 1;
661
662fail1:
663	edac_printk(KERN_WARNING, EDAC_MC,
664		"bug in low-level driver: attempt to assign\n"
665		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
666	return 1;
667}
668
669static void del_mc_from_global_list(struct mem_ctl_info *mci)
670{
671	atomic_dec(&edac_handlers);
672	list_del_rcu(&mci->link);
673
674	/* these are for safe removal of devices from global list while
675	 * NMI handlers may be traversing list
676	 */
677	synchronize_rcu();
678	INIT_LIST_HEAD(&mci->link);
679}
680
681/**
682 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
683 *
684 * If found, return a pointer to the structure.
685 * Else return NULL.
686 *
687 * Caller must hold mem_ctls_mutex.
688 */
689struct mem_ctl_info *edac_mc_find(int idx)
690{
691	struct list_head *item;
692	struct mem_ctl_info *mci;
693
694	list_for_each(item, &mc_devices) {
695		mci = list_entry(item, struct mem_ctl_info, link);
696
697		if (mci->mc_idx >= idx) {
698			if (mci->mc_idx == idx)
699				return mci;
700
701			break;
702		}
703	}
704
705	return NULL;
706}
707EXPORT_SYMBOL(edac_mc_find);
708
709/**
710 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
711 *                 create sysfs entries associated with mci structure
712 * @mci: pointer to the mci structure to be added to the list
713 *
714 * Return:
715 *	0	Success
716 *	!0	Failure
717 */
718
719/* FIXME - should a warning be printed if no error detection? correction? */
720int edac_mc_add_mc(struct mem_ctl_info *mci)
721{
722	edac_dbg(0, "\n");
723
724#ifdef CONFIG_EDAC_DEBUG
725	if (edac_debug_level >= 3)
726		edac_mc_dump_mci(mci);
727
728	if (edac_debug_level >= 4) {
729		int i;
730
731		for (i = 0; i < mci->nr_csrows; i++) {
732			struct csrow_info *csrow = mci->csrows[i];
733			u32 nr_pages = 0;
734			int j;
735
736			for (j = 0; j < csrow->nr_channels; j++)
737				nr_pages += csrow->channels[j]->dimm->nr_pages;
738			if (!nr_pages)
739				continue;
740			edac_mc_dump_csrow(csrow);
741			for (j = 0; j < csrow->nr_channels; j++)
742				if (csrow->channels[j]->dimm->nr_pages)
743					edac_mc_dump_channel(csrow->channels[j]);
744		}
745		for (i = 0; i < mci->tot_dimms; i++)
746			if (mci->dimms[i]->nr_pages)
747				edac_mc_dump_dimm(mci->dimms[i], i);
748	}
749#endif
750	mutex_lock(&mem_ctls_mutex);
751
752	if (add_mc_to_global_list(mci))
753		goto fail0;
754
755	/* set load time so that error rate can be tracked */
756	mci->start_time = jiffies;
757
758	if (edac_create_sysfs_mci_device(mci)) {
759		edac_mc_printk(mci, KERN_WARNING,
760			"failed to create sysfs device\n");
761		goto fail1;
762	}
763
764	/* If there IS a check routine, then we are running POLLED */
765	if (mci->edac_check != NULL) {
766		/* This instance is NOW RUNNING */
767		mci->op_state = OP_RUNNING_POLL;
768
769		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
770	} else {
771		mci->op_state = OP_RUNNING_INTERRUPT;
772	}
773
774	/* Report action taken */
775	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
776		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
777
778	mutex_unlock(&mem_ctls_mutex);
779	return 0;
780
781fail1:
782	del_mc_from_global_list(mci);
783
784fail0:
785	mutex_unlock(&mem_ctls_mutex);
786	return 1;
787}
788EXPORT_SYMBOL_GPL(edac_mc_add_mc);
789
790/**
791 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
792 *                 remove mci structure from global list
793 * @pdev: Pointer to 'struct device' representing mci structure to remove.
794 *
795 * Return pointer to removed mci structure, or NULL if device not found.
796 */
797struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
798{
799	struct mem_ctl_info *mci;
800
801	edac_dbg(0, "\n");
802
803	mutex_lock(&mem_ctls_mutex);
804
805	/* find the requested mci struct in the global list */
806	mci = find_mci_by_dev(dev);
807	if (mci == NULL) {
808		mutex_unlock(&mem_ctls_mutex);
809		return NULL;
810	}
811
812	del_mc_from_global_list(mci);
813	mutex_unlock(&mem_ctls_mutex);
814
815	/* flush workq processes */
816	edac_mc_workq_teardown(mci);
817
818	/* marking MCI offline */
819	mci->op_state = OP_OFFLINE;
820
821	/* remove from sysfs */
822	edac_remove_sysfs_mci_device(mci);
823
824	edac_printk(KERN_INFO, EDAC_MC,
825		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
826		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
827
828	return mci;
829}
830EXPORT_SYMBOL_GPL(edac_mc_del_mc);
831
832static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
833				u32 size)
834{
835	struct page *pg;
836	void *virt_addr;
837	unsigned long flags = 0;
838
839	edac_dbg(3, "\n");
840
841	/* ECC error page was not in our memory. Ignore it. */
842	if (!pfn_valid(page))
843		return;
844
845	/* Find the actual page structure then map it and fix */
846	pg = pfn_to_page(page);
847
848	if (PageHighMem(pg))
849		local_irq_save(flags);
850
851	virt_addr = kmap_atomic(pg);
852
853	/* Perform architecture specific atomic scrub operation */
854	atomic_scrub(virt_addr + offset, size);
855
856	/* Unmap and complete */
857	kunmap_atomic(virt_addr);
858
859	if (PageHighMem(pg))
860		local_irq_restore(flags);
861}
862
863/* FIXME - should return -1 */
864int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
865{
866	struct csrow_info **csrows = mci->csrows;
867	int row, i, j, n;
868
869	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
870	row = -1;
871
872	for (i = 0; i < mci->nr_csrows; i++) {
873		struct csrow_info *csrow = csrows[i];
874		n = 0;
875		for (j = 0; j < csrow->nr_channels; j++) {
876			struct dimm_info *dimm = csrow->channels[j]->dimm;
877			n += dimm->nr_pages;
878		}
879		if (n == 0)
880			continue;
881
882		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
883			 mci->mc_idx,
884			 csrow->first_page, page, csrow->last_page,
885			 csrow->page_mask);
886
887		if ((page >= csrow->first_page) &&
888		    (page <= csrow->last_page) &&
889		    ((page & csrow->page_mask) ==
890		     (csrow->first_page & csrow->page_mask))) {
891			row = i;
892			break;
893		}
894	}
895
896	if (row == -1)
897		edac_mc_printk(mci, KERN_ERR,
898			"could not look up page error address %lx\n",
899			(unsigned long)page);
900
901	return row;
902}
903EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
904
905const char *edac_layer_name[] = {
906	[EDAC_MC_LAYER_BRANCH] = "branch",
907	[EDAC_MC_LAYER_CHANNEL] = "channel",
908	[EDAC_MC_LAYER_SLOT] = "slot",
909	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
910};
911EXPORT_SYMBOL_GPL(edac_layer_name);
912
913static void edac_inc_ce_error(struct mem_ctl_info *mci,
914			      bool enable_per_layer_report,
915			      const int pos[EDAC_MAX_LAYERS],
916			      const u16 count)
917{
918	int i, index = 0;
919
920	mci->ce_mc += count;
921
922	if (!enable_per_layer_report) {
923		mci->ce_noinfo_count += count;
924		return;
925	}
926
927	for (i = 0; i < mci->n_layers; i++) {
928		if (pos[i] < 0)
929			break;
930		index += pos[i];
931		mci->ce_per_layer[i][index] += count;
932
933		if (i < mci->n_layers - 1)
934			index *= mci->layers[i + 1].size;
935	}
936}
937
938static void edac_inc_ue_error(struct mem_ctl_info *mci,
939				    bool enable_per_layer_report,
940				    const int pos[EDAC_MAX_LAYERS],
941				    const u16 count)
942{
943	int i, index = 0;
944
945	mci->ue_mc += count;
946
947	if (!enable_per_layer_report) {
948		mci->ce_noinfo_count += count;
949		return;
950	}
951
952	for (i = 0; i < mci->n_layers; i++) {
953		if (pos[i] < 0)
954			break;
955		index += pos[i];
956		mci->ue_per_layer[i][index] += count;
957
958		if (i < mci->n_layers - 1)
959			index *= mci->layers[i + 1].size;
960	}
961}
962
963static void edac_ce_error(struct mem_ctl_info *mci,
964			  const u16 error_count,
965			  const int pos[EDAC_MAX_LAYERS],
966			  const char *msg,
967			  const char *location,
968			  const char *label,
969			  const char *detail,
970			  const char *other_detail,
971			  const bool enable_per_layer_report,
972			  const unsigned long page_frame_number,
973			  const unsigned long offset_in_page,
974			  long grain)
975{
976	unsigned long remapped_page;
977	char *msg_aux = "";
978
979	if (*msg)
980		msg_aux = " ";
981
982	if (edac_mc_get_log_ce()) {
983		if (other_detail && *other_detail)
984			edac_mc_printk(mci, KERN_WARNING,
985				       "%d CE %s%son %s (%s %s - %s)\n",
986				       error_count, msg, msg_aux, label,
987				       location, detail, other_detail);
988		else
989			edac_mc_printk(mci, KERN_WARNING,
990				       "%d CE %s%son %s (%s %s)\n",
991				       error_count, msg, msg_aux, label,
992				       location, detail);
993	}
994	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
995
996	if (mci->scrub_mode & SCRUB_SW_SRC) {
997		/*
998			* Some memory controllers (called MCs below) can remap
999			* memory so that it is still available at a different
1000			* address when PCI devices map into memory.
1001			* MC's that can't do this, lose the memory where PCI
1002			* devices are mapped. This mapping is MC-dependent
1003			* and so we call back into the MC driver for it to
1004			* map the MC page to a physical (CPU) page which can
1005			* then be mapped to a virtual page - which can then
1006			* be scrubbed.
1007			*/
1008		remapped_page = mci->ctl_page_to_phys ?
1009			mci->ctl_page_to_phys(mci, page_frame_number) :
1010			page_frame_number;
1011
1012		edac_mc_scrub_block(remapped_page,
1013					offset_in_page, grain);
1014	}
1015}
1016
1017static void edac_ue_error(struct mem_ctl_info *mci,
1018			  const u16 error_count,
1019			  const int pos[EDAC_MAX_LAYERS],
1020			  const char *msg,
1021			  const char *location,
1022			  const char *label,
1023			  const char *detail,
1024			  const char *other_detail,
1025			  const bool enable_per_layer_report)
1026{
1027	char *msg_aux = "";
1028
1029	if (*msg)
1030		msg_aux = " ";
1031
1032	if (edac_mc_get_log_ue()) {
1033		if (other_detail && *other_detail)
1034			edac_mc_printk(mci, KERN_WARNING,
1035				       "%d UE %s%son %s (%s %s - %s)\n",
1036				       error_count, msg, msg_aux, label,
1037				       location, detail, other_detail);
1038		else
1039			edac_mc_printk(mci, KERN_WARNING,
1040				       "%d UE %s%son %s (%s %s)\n",
1041				       error_count, msg, msg_aux, label,
1042				       location, detail);
1043	}
1044
1045	if (edac_mc_get_panic_on_ue()) {
1046		if (other_detail && *other_detail)
1047			panic("UE %s%son %s (%s%s - %s)\n",
1048			      msg, msg_aux, label, location, detail, other_detail);
1049		else
1050			panic("UE %s%son %s (%s%s)\n",
1051			      msg, msg_aux, label, location, detail);
1052	}
1053
1054	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1055}
1056
1057#define OTHER_LABEL " or "
1058
1059/**
1060 * edac_mc_handle_error - reports a memory event to userspace
1061 *
1062 * @type:		severity of the error (CE/UE/Fatal)
1063 * @mci:		a struct mem_ctl_info pointer
1064 * @error_count:	Number of errors of the same type
1065 * @page_frame_number:	mem page where the error occurred
1066 * @offset_in_page:	offset of the error inside the page
1067 * @syndrome:		ECC syndrome
1068 * @top_layer:		Memory layer[0] position
1069 * @mid_layer:		Memory layer[1] position
1070 * @low_layer:		Memory layer[2] position
1071 * @msg:		Message meaningful to the end users that
1072 *			explains the event
1073 * @other_detail:	Technical details about the event that
1074 *			may help hardware manufacturers and
1075 *			EDAC developers to analyse the event
1076 */
1077void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1078			  struct mem_ctl_info *mci,
1079			  const u16 error_count,
1080			  const unsigned long page_frame_number,
1081			  const unsigned long offset_in_page,
1082			  const unsigned long syndrome,
1083			  const int top_layer,
1084			  const int mid_layer,
1085			  const int low_layer,
1086			  const char *msg,
1087			  const char *other_detail)
1088{
1089	/* FIXME: too much for stack: move it to some pre-alocated area */
1090	char detail[80], location[80];
1091	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1092	char *p;
1093	int row = -1, chan = -1;
1094	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1095	int i;
1096	long grain;
1097	bool enable_per_layer_report = false;
1098	u8 grain_bits;
1099
1100	edac_dbg(3, "MC%d\n", mci->mc_idx);
1101
1102	/*
1103	 * Check if the event report is consistent and if the memory
1104	 * location is known. If it is known, enable_per_layer_report will be
1105	 * true, the DIMM(s) label info will be filled and the per-layer
1106	 * error counters will be incremented.
1107	 */
1108	for (i = 0; i < mci->n_layers; i++) {
1109		if (pos[i] >= (int)mci->layers[i].size) {
1110
1111			edac_mc_printk(mci, KERN_ERR,
1112				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1113				       edac_layer_name[mci->layers[i].type],
1114				       pos[i], mci->layers[i].size);
1115			/*
1116			 * Instead of just returning it, let's use what's
1117			 * known about the error. The increment routines and
1118			 * the DIMM filter logic will do the right thing by
1119			 * pointing the likely damaged DIMMs.
1120			 */
1121			pos[i] = -1;
1122		}
1123		if (pos[i] >= 0)
1124			enable_per_layer_report = true;
1125	}
1126
1127	/*
1128	 * Get the dimm label/grain that applies to the match criteria.
1129	 * As the error algorithm may not be able to point to just one memory
1130	 * stick, the logic here will get all possible labels that could
1131	 * pottentially be affected by the error.
1132	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1133	 * to have only the MC channel and the MC dimm (also called "branch")
1134	 * but the channel is not known, as the memory is arranged in pairs,
1135	 * where each memory belongs to a separate channel within the same
1136	 * branch.
1137	 */
1138	grain = 0;
1139	p = label;
1140	*p = '\0';
1141
1142	for (i = 0; i < mci->tot_dimms; i++) {
1143		struct dimm_info *dimm = mci->dimms[i];
1144
1145		if (top_layer >= 0 && top_layer != dimm->location[0])
1146			continue;
1147		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1148			continue;
1149		if (low_layer >= 0 && low_layer != dimm->location[2])
1150			continue;
1151
1152		/* get the max grain, over the error match range */
1153		if (dimm->grain > grain)
1154			grain = dimm->grain;
1155
1156		/*
1157		 * If the error is memory-controller wide, there's no need to
1158		 * seek for the affected DIMMs because the whole
1159		 * channel/memory controller/...  may be affected.
1160		 * Also, don't show errors for empty DIMM slots.
1161		 */
1162		if (enable_per_layer_report && dimm->nr_pages) {
1163			if (p != label) {
1164				strcpy(p, OTHER_LABEL);
1165				p += strlen(OTHER_LABEL);
1166			}
1167			strcpy(p, dimm->label);
1168			p += strlen(p);
1169			*p = '\0';
1170
1171			/*
1172			 * get csrow/channel of the DIMM, in order to allow
1173			 * incrementing the compat API counters
1174			 */
1175			edac_dbg(4, "%s csrows map: (%d,%d)\n",
1176				 mci->mem_is_per_rank ? "rank" : "dimm",
1177				 dimm->csrow, dimm->cschannel);
1178			if (row == -1)
1179				row = dimm->csrow;
1180			else if (row >= 0 && row != dimm->csrow)
1181				row = -2;
1182
1183			if (chan == -1)
1184				chan = dimm->cschannel;
1185			else if (chan >= 0 && chan != dimm->cschannel)
1186				chan = -2;
1187		}
1188	}
1189
1190	if (!enable_per_layer_report) {
1191		strcpy(label, "any memory");
1192	} else {
1193		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1194		if (p == label)
1195			strcpy(label, "unknown memory");
1196		if (type == HW_EVENT_ERR_CORRECTED) {
1197			if (row >= 0) {
1198				mci->csrows[row]->ce_count += error_count;
1199				if (chan >= 0)
1200					mci->csrows[row]->channels[chan]->ce_count += error_count;
1201			}
1202		} else
1203			if (row >= 0)
1204				mci->csrows[row]->ue_count += error_count;
1205	}
1206
1207	/* Fill the RAM location data */
1208	p = location;
1209
1210	for (i = 0; i < mci->n_layers; i++) {
1211		if (pos[i] < 0)
1212			continue;
1213
1214		p += sprintf(p, "%s:%d ",
1215			     edac_layer_name[mci->layers[i].type],
1216			     pos[i]);
1217	}
1218	if (p > location)
1219		*(p - 1) = '\0';
1220
1221	/* Report the error via the trace interface */
1222	grain_bits = fls_long(grain) + 1;
1223	trace_mc_event(type, msg, label, error_count,
1224		       mci->mc_idx, top_layer, mid_layer, low_layer,
1225		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1226		       grain_bits, syndrome, other_detail);
1227
1228	/* Memory type dependent details about the error */
1229	if (type == HW_EVENT_ERR_CORRECTED) {
1230		snprintf(detail, sizeof(detail),
1231			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1232			page_frame_number, offset_in_page,
1233			grain, syndrome);
1234		edac_ce_error(mci, error_count, pos, msg, location, label,
1235			      detail, other_detail, enable_per_layer_report,
1236			      page_frame_number, offset_in_page, grain);
1237	} else {
1238		snprintf(detail, sizeof(detail),
1239			"page:0x%lx offset:0x%lx grain:%ld",
1240			page_frame_number, offset_in_page, grain);
1241
1242		edac_ue_error(mci, error_count, pos, msg, location, label,
1243			      detail, other_detail, enable_per_layer_report);
1244	}
1245}
1246EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1247