edac_mc.c revision 956b9ba156dbfdb9cede2b2927ddf8be2233b3a7
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <linux/bitops.h>
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h"
35#include "edac_module.h"
36
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
41/* lock to memory controller's control array */
42static DEFINE_MUTEX(mem_ctls_mutex);
43static LIST_HEAD(mc_devices);
44
45#ifdef CONFIG_EDAC_DEBUG
46
47static void edac_mc_dump_channel(struct rank_info *chan)
48{
49	edac_dbg(4, "\tchannel = %p\n", chan);
50	edac_dbg(4, "\tchannel->chan_idx = %d\n", chan->chan_idx);
51	edac_dbg(4, "\tchannel->csrow = %p\n", chan->csrow);
52	edac_dbg(4, "\tchannel->dimm = %p\n", chan->dimm);
53}
54
55static void edac_mc_dump_dimm(struct dimm_info *dimm)
56{
57	int i;
58
59	edac_dbg(4, "\tdimm = %p\n", dimm);
60	edac_dbg(4, "\tdimm->label = '%s'\n", dimm->label);
61	edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
62	edac_dbg(4, "\tdimm location ");
63	for (i = 0; i < dimm->mci->n_layers; i++) {
64		printk(KERN_CONT "%d", dimm->location[i]);
65		if (i < dimm->mci->n_layers - 1)
66			printk(KERN_CONT ".");
67	}
68	printk(KERN_CONT "\n");
69	edac_dbg(4, "\tdimm->grain = %d\n", dimm->grain);
70	edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
71}
72
73static void edac_mc_dump_csrow(struct csrow_info *csrow)
74{
75	edac_dbg(4, "\tcsrow = %p\n", csrow);
76	edac_dbg(4, "\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
77	edac_dbg(4, "\tcsrow->first_page = 0x%lx\n", csrow->first_page);
78	edac_dbg(4, "\tcsrow->last_page = 0x%lx\n", csrow->last_page);
79	edac_dbg(4, "\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
80	edac_dbg(4, "\tcsrow->nr_channels = %d\n", csrow->nr_channels);
81	edac_dbg(4, "\tcsrow->channels = %p\n", csrow->channels);
82	edac_dbg(4, "\tcsrow->mci = %p\n", csrow->mci);
83}
84
85static void edac_mc_dump_mci(struct mem_ctl_info *mci)
86{
87	edac_dbg(3, "\tmci = %p\n", mci);
88	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
89	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
90	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
91	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
92	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
93		 mci->nr_csrows, mci->csrows);
94	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
95		 mci->tot_dimms, mci->dimms);
96	edac_dbg(3, "\tdev = %p\n", mci->pdev);
97	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
98		 mci->mod_name, mci->ctl_name);
99	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
100}
101
102#endif				/* CONFIG_EDAC_DEBUG */
103
104/*
105 * keep those in sync with the enum mem_type
106 */
107const char *edac_mem_types[] = {
108	"Empty csrow",
109	"Reserved csrow type",
110	"Unknown csrow type",
111	"Fast page mode RAM",
112	"Extended data out RAM",
113	"Burst Extended data out RAM",
114	"Single data rate SDRAM",
115	"Registered single data rate SDRAM",
116	"Double data rate SDRAM",
117	"Registered Double data rate SDRAM",
118	"Rambus DRAM",
119	"Unbuffered DDR2 RAM",
120	"Fully buffered DDR2",
121	"Registered DDR2 RAM",
122	"Rambus XDR",
123	"Unbuffered DDR3 RAM",
124	"Registered DDR3 RAM",
125};
126EXPORT_SYMBOL_GPL(edac_mem_types);
127
128/**
129 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
130 * @p:		pointer to a pointer with the memory offset to be used. At
131 *		return, this will be incremented to point to the next offset
132 * @size:	Size of the data structure to be reserved
133 * @n_elems:	Number of elements that should be reserved
134 *
135 * If 'size' is a constant, the compiler will optimize this whole function
136 * down to either a no-op or the addition of a constant to the value of '*p'.
137 *
138 * The 'p' pointer is absolutely needed to keep the proper advancing
139 * further in memory to the proper offsets when allocating the struct along
140 * with its embedded structs, as edac_device_alloc_ctl_info() does it
141 * above, for example.
142 *
143 * At return, the pointer 'p' will be incremented to be used on a next call
144 * to this function.
145 */
146void *edac_align_ptr(void **p, unsigned size, int n_elems)
147{
148	unsigned align, r;
149	void *ptr = *p;
150
151	*p += size * n_elems;
152
153	/*
154	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
155	 * 'size'.  Adjust 'p' so that its alignment is at least as
156	 * stringent as what the compiler would provide for X and return
157	 * the aligned result.
158	 * Here we assume that the alignment of a "long long" is the most
159	 * stringent alignment that the compiler will ever provide by default.
160	 * As far as I know, this is a reasonable assumption.
161	 */
162	if (size > sizeof(long))
163		align = sizeof(long long);
164	else if (size > sizeof(int))
165		align = sizeof(long);
166	else if (size > sizeof(short))
167		align = sizeof(int);
168	else if (size > sizeof(char))
169		align = sizeof(short);
170	else
171		return (char *)ptr;
172
173	r = size % align;
174
175	if (r == 0)
176		return (char *)ptr;
177
178	*p += align - r;
179
180	return (void *)(((unsigned long)ptr) + align - r);
181}
182
183/**
184 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
185 * @mc_num:		Memory controller number
186 * @n_layers:		Number of MC hierarchy layers
187 * layers:		Describes each layer as seen by the Memory Controller
188 * @size_pvt:		size of private storage needed
189 *
190 *
191 * Everything is kmalloc'ed as one big chunk - more efficient.
192 * Only can be used if all structures have the same lifetime - otherwise
193 * you have to allocate and initialize your own structures.
194 *
195 * Use edac_mc_free() to free mc structures allocated by this function.
196 *
197 * NOTE: drivers handle multi-rank memories in different ways: in some
198 * drivers, one multi-rank memory stick is mapped as one entry, while, in
199 * others, a single multi-rank memory stick would be mapped into several
200 * entries. Currently, this function will allocate multiple struct dimm_info
201 * on such scenarios, as grouping the multiple ranks require drivers change.
202 *
203 * Returns:
204 *	On failure: NULL
205 *	On success: struct mem_ctl_info pointer
206 */
207struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
208				   unsigned n_layers,
209				   struct edac_mc_layer *layers,
210				   unsigned sz_pvt)
211{
212	struct mem_ctl_info *mci;
213	struct edac_mc_layer *layer;
214	struct csrow_info *csr;
215	struct rank_info *chan;
216	struct dimm_info *dimm;
217	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
218	unsigned pos[EDAC_MAX_LAYERS];
219	unsigned size, tot_dimms = 1, count = 1;
220	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
221	void *pvt, *p, *ptr = NULL;
222	int i, j, row, chn, n, len, off;
223	bool per_rank = false;
224
225	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
226	/*
227	 * Calculate the total amount of dimms and csrows/cschannels while
228	 * in the old API emulation mode
229	 */
230	for (i = 0; i < n_layers; i++) {
231		tot_dimms *= layers[i].size;
232		if (layers[i].is_virt_csrow)
233			tot_csrows *= layers[i].size;
234		else
235			tot_channels *= layers[i].size;
236
237		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
238			per_rank = true;
239	}
240
241	/* Figure out the offsets of the various items from the start of an mc
242	 * structure.  We want the alignment of each item to be at least as
243	 * stringent as what the compiler would provide if we could simply
244	 * hardcode everything into a single struct.
245	 */
246	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
247	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
248	for (i = 0; i < n_layers; i++) {
249		count *= layers[i].size;
250		edac_dbg(4, "errcount layer %d size %d\n", i, count);
251		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
252		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
253		tot_errcount += 2 * count;
254	}
255
256	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
257	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
258	size = ((unsigned long)pvt) + sz_pvt;
259
260	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
261		 size,
262		 tot_dimms,
263		 per_rank ? "ranks" : "dimms",
264		 tot_csrows * tot_channels);
265
266	mci = kzalloc(size, GFP_KERNEL);
267	if (mci == NULL)
268		return NULL;
269
270	/* Adjust pointers so they point within the memory we just allocated
271	 * rather than an imaginary chunk of memory located at address 0.
272	 */
273	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
274	for (i = 0; i < n_layers; i++) {
275		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
276		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
277	}
278	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
279
280	/* setup index and various internal pointers */
281	mci->mc_idx = mc_num;
282	mci->tot_dimms = tot_dimms;
283	mci->pvt_info = pvt;
284	mci->n_layers = n_layers;
285	mci->layers = layer;
286	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
287	mci->nr_csrows = tot_csrows;
288	mci->num_cschannel = tot_channels;
289	mci->mem_is_per_rank = per_rank;
290
291	/*
292	 * Alocate and fill the csrow/channels structs
293	 */
294	mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL);
295	if (!mci->csrows)
296		goto error;
297	for (row = 0; row < tot_csrows; row++) {
298		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
299		if (!csr)
300			goto error;
301		mci->csrows[row] = csr;
302		csr->csrow_idx = row;
303		csr->mci = mci;
304		csr->nr_channels = tot_channels;
305		csr->channels = kcalloc(sizeof(*csr->channels), tot_channels,
306					GFP_KERNEL);
307		if (!csr->channels)
308			goto error;
309
310		for (chn = 0; chn < tot_channels; chn++) {
311			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
312			if (!chan)
313				goto error;
314			csr->channels[chn] = chan;
315			chan->chan_idx = chn;
316			chan->csrow = csr;
317		}
318	}
319
320	/*
321	 * Allocate and fill the dimm structs
322	 */
323	mci->dimms  = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL);
324	if (!mci->dimms)
325		goto error;
326
327	memset(&pos, 0, sizeof(pos));
328	row = 0;
329	chn = 0;
330	edac_dbg(4, "initializing %d %s\n",
331		 tot_dimms, per_rank ? "ranks" : "dimms");
332	for (i = 0; i < tot_dimms; i++) {
333		chan = mci->csrows[row]->channels[chn];
334		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
335		if (off < 0 || off >= tot_dimms) {
336			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
337			goto error;
338		}
339
340		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
341		mci->dimms[off] = dimm;
342		dimm->mci = mci;
343
344		edac_dbg(2, "%d: %s%i (%d:%d:%d): row %d, chan %d\n",
345			 i, per_rank ? "rank" : "dimm", off,
346			 pos[0], pos[1], pos[2], row, chn);
347
348		/*
349		 * Copy DIMM location and initialize it.
350		 */
351		len = sizeof(dimm->label);
352		p = dimm->label;
353		n = snprintf(p, len, "mc#%u", mc_num);
354		p += n;
355		len -= n;
356		for (j = 0; j < n_layers; j++) {
357			n = snprintf(p, len, "%s#%u",
358				     edac_layer_name[layers[j].type],
359				     pos[j]);
360			p += n;
361			len -= n;
362			dimm->location[j] = pos[j];
363
364			if (len <= 0)
365				break;
366		}
367
368		/* Link it to the csrows old API data */
369		chan->dimm = dimm;
370		dimm->csrow = row;
371		dimm->cschannel = chn;
372
373		/* Increment csrow location */
374		row++;
375		if (row == tot_csrows) {
376			row = 0;
377			chn++;
378		}
379
380		/* Increment dimm location */
381		for (j = n_layers - 1; j >= 0; j--) {
382			pos[j]++;
383			if (pos[j] < layers[j].size)
384				break;
385			pos[j] = 0;
386		}
387	}
388
389	mci->op_state = OP_ALLOC;
390
391	/* at this point, the root kobj is valid, and in order to
392	 * 'free' the object, then the function:
393	 *      edac_mc_unregister_sysfs_main_kobj() must be called
394	 * which will perform kobj unregistration and the actual free
395	 * will occur during the kobject callback operation
396	 */
397
398	return mci;
399
400error:
401	if (mci->dimms) {
402		for (i = 0; i < tot_dimms; i++)
403			kfree(mci->dimms[i]);
404		kfree(mci->dimms);
405	}
406	if (mci->csrows) {
407		for (chn = 0; chn < tot_channels; chn++) {
408			csr = mci->csrows[chn];
409			if (csr) {
410				for (chn = 0; chn < tot_channels; chn++)
411					kfree(csr->channels[chn]);
412				kfree(csr);
413			}
414			kfree(mci->csrows[i]);
415		}
416		kfree(mci->csrows);
417	}
418	kfree(mci);
419
420	return NULL;
421}
422EXPORT_SYMBOL_GPL(edac_mc_alloc);
423
424/**
425 * edac_mc_free
426 *	'Free' a previously allocated 'mci' structure
427 * @mci: pointer to a struct mem_ctl_info structure
428 */
429void edac_mc_free(struct mem_ctl_info *mci)
430{
431	edac_dbg(1, "\n");
432
433	/* the mci instance is freed here, when the sysfs object is dropped */
434	edac_unregister_sysfs(mci);
435}
436EXPORT_SYMBOL_GPL(edac_mc_free);
437
438
439/**
440 * find_mci_by_dev
441 *
442 *	scan list of controllers looking for the one that manages
443 *	the 'dev' device
444 * @dev: pointer to a struct device related with the MCI
445 */
446struct mem_ctl_info *find_mci_by_dev(struct device *dev)
447{
448	struct mem_ctl_info *mci;
449	struct list_head *item;
450
451	edac_dbg(3, "\n");
452
453	list_for_each(item, &mc_devices) {
454		mci = list_entry(item, struct mem_ctl_info, link);
455
456		if (mci->pdev == dev)
457			return mci;
458	}
459
460	return NULL;
461}
462EXPORT_SYMBOL_GPL(find_mci_by_dev);
463
464/*
465 * handler for EDAC to check if NMI type handler has asserted interrupt
466 */
467static int edac_mc_assert_error_check_and_clear(void)
468{
469	int old_state;
470
471	if (edac_op_state == EDAC_OPSTATE_POLL)
472		return 1;
473
474	old_state = edac_err_assert;
475	edac_err_assert = 0;
476
477	return old_state;
478}
479
480/*
481 * edac_mc_workq_function
482 *	performs the operation scheduled by a workq request
483 */
484static void edac_mc_workq_function(struct work_struct *work_req)
485{
486	struct delayed_work *d_work = to_delayed_work(work_req);
487	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
488
489	mutex_lock(&mem_ctls_mutex);
490
491	/* if this control struct has movd to offline state, we are done */
492	if (mci->op_state == OP_OFFLINE) {
493		mutex_unlock(&mem_ctls_mutex);
494		return;
495	}
496
497	/* Only poll controllers that are running polled and have a check */
498	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
499		mci->edac_check(mci);
500
501	mutex_unlock(&mem_ctls_mutex);
502
503	/* Reschedule */
504	queue_delayed_work(edac_workqueue, &mci->work,
505			msecs_to_jiffies(edac_mc_get_poll_msec()));
506}
507
508/*
509 * edac_mc_workq_setup
510 *	initialize a workq item for this mci
511 *	passing in the new delay period in msec
512 *
513 *	locking model:
514 *
515 *		called with the mem_ctls_mutex held
516 */
517static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
518{
519	edac_dbg(0, "\n");
520
521	/* if this instance is not in the POLL state, then simply return */
522	if (mci->op_state != OP_RUNNING_POLL)
523		return;
524
525	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
526	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
527}
528
529/*
530 * edac_mc_workq_teardown
531 *	stop the workq processing on this mci
532 *
533 *	locking model:
534 *
535 *		called WITHOUT lock held
536 */
537static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
538{
539	int status;
540
541	if (mci->op_state != OP_RUNNING_POLL)
542		return;
543
544	status = cancel_delayed_work(&mci->work);
545	if (status == 0) {
546		edac_dbg(0, "not canceled, flush the queue\n");
547
548		/* workq instance might be running, wait for it */
549		flush_workqueue(edac_workqueue);
550	}
551}
552
553/*
554 * edac_mc_reset_delay_period(unsigned long value)
555 *
556 *	user space has updated our poll period value, need to
557 *	reset our workq delays
558 */
559void edac_mc_reset_delay_period(int value)
560{
561	struct mem_ctl_info *mci;
562	struct list_head *item;
563
564	mutex_lock(&mem_ctls_mutex);
565
566	/* scan the list and turn off all workq timers, doing so under lock
567	 */
568	list_for_each(item, &mc_devices) {
569		mci = list_entry(item, struct mem_ctl_info, link);
570
571		if (mci->op_state == OP_RUNNING_POLL)
572			cancel_delayed_work(&mci->work);
573	}
574
575	mutex_unlock(&mem_ctls_mutex);
576
577
578	/* re-walk the list, and reset the poll delay */
579	mutex_lock(&mem_ctls_mutex);
580
581	list_for_each(item, &mc_devices) {
582		mci = list_entry(item, struct mem_ctl_info, link);
583
584		edac_mc_workq_setup(mci, (unsigned long) value);
585	}
586
587	mutex_unlock(&mem_ctls_mutex);
588}
589
590
591
592/* Return 0 on success, 1 on failure.
593 * Before calling this function, caller must
594 * assign a unique value to mci->mc_idx.
595 *
596 *	locking model:
597 *
598 *		called with the mem_ctls_mutex lock held
599 */
600static int add_mc_to_global_list(struct mem_ctl_info *mci)
601{
602	struct list_head *item, *insert_before;
603	struct mem_ctl_info *p;
604
605	insert_before = &mc_devices;
606
607	p = find_mci_by_dev(mci->pdev);
608	if (unlikely(p != NULL))
609		goto fail0;
610
611	list_for_each(item, &mc_devices) {
612		p = list_entry(item, struct mem_ctl_info, link);
613
614		if (p->mc_idx >= mci->mc_idx) {
615			if (unlikely(p->mc_idx == mci->mc_idx))
616				goto fail1;
617
618			insert_before = item;
619			break;
620		}
621	}
622
623	list_add_tail_rcu(&mci->link, insert_before);
624	atomic_inc(&edac_handlers);
625	return 0;
626
627fail0:
628	edac_printk(KERN_WARNING, EDAC_MC,
629		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
630		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
631	return 1;
632
633fail1:
634	edac_printk(KERN_WARNING, EDAC_MC,
635		"bug in low-level driver: attempt to assign\n"
636		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
637	return 1;
638}
639
640static void del_mc_from_global_list(struct mem_ctl_info *mci)
641{
642	atomic_dec(&edac_handlers);
643	list_del_rcu(&mci->link);
644
645	/* these are for safe removal of devices from global list while
646	 * NMI handlers may be traversing list
647	 */
648	synchronize_rcu();
649	INIT_LIST_HEAD(&mci->link);
650}
651
652/**
653 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
654 *
655 * If found, return a pointer to the structure.
656 * Else return NULL.
657 *
658 * Caller must hold mem_ctls_mutex.
659 */
660struct mem_ctl_info *edac_mc_find(int idx)
661{
662	struct list_head *item;
663	struct mem_ctl_info *mci;
664
665	list_for_each(item, &mc_devices) {
666		mci = list_entry(item, struct mem_ctl_info, link);
667
668		if (mci->mc_idx >= idx) {
669			if (mci->mc_idx == idx)
670				return mci;
671
672			break;
673		}
674	}
675
676	return NULL;
677}
678EXPORT_SYMBOL(edac_mc_find);
679
680/**
681 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
682 *                 create sysfs entries associated with mci structure
683 * @mci: pointer to the mci structure to be added to the list
684 *
685 * Return:
686 *	0	Success
687 *	!0	Failure
688 */
689
690/* FIXME - should a warning be printed if no error detection? correction? */
691int edac_mc_add_mc(struct mem_ctl_info *mci)
692{
693	edac_dbg(0, "\n");
694
695#ifdef CONFIG_EDAC_DEBUG
696	if (edac_debug_level >= 3)
697		edac_mc_dump_mci(mci);
698
699	if (edac_debug_level >= 4) {
700		int i;
701
702		for (i = 0; i < mci->nr_csrows; i++) {
703			int j;
704
705			edac_mc_dump_csrow(mci->csrows[i]);
706			for (j = 0; j < mci->csrows[i]->nr_channels; j++)
707				edac_mc_dump_channel(mci->csrows[i]->channels[j]);
708		}
709		for (i = 0; i < mci->tot_dimms; i++)
710			edac_mc_dump_dimm(mci->dimms[i]);
711	}
712#endif
713	mutex_lock(&mem_ctls_mutex);
714
715	if (add_mc_to_global_list(mci))
716		goto fail0;
717
718	/* set load time so that error rate can be tracked */
719	mci->start_time = jiffies;
720
721	if (edac_create_sysfs_mci_device(mci)) {
722		edac_mc_printk(mci, KERN_WARNING,
723			"failed to create sysfs device\n");
724		goto fail1;
725	}
726
727	/* If there IS a check routine, then we are running POLLED */
728	if (mci->edac_check != NULL) {
729		/* This instance is NOW RUNNING */
730		mci->op_state = OP_RUNNING_POLL;
731
732		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
733	} else {
734		mci->op_state = OP_RUNNING_INTERRUPT;
735	}
736
737	/* Report action taken */
738	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
739		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
740
741	mutex_unlock(&mem_ctls_mutex);
742	return 0;
743
744fail1:
745	del_mc_from_global_list(mci);
746
747fail0:
748	mutex_unlock(&mem_ctls_mutex);
749	return 1;
750}
751EXPORT_SYMBOL_GPL(edac_mc_add_mc);
752
753/**
754 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
755 *                 remove mci structure from global list
756 * @pdev: Pointer to 'struct device' representing mci structure to remove.
757 *
758 * Return pointer to removed mci structure, or NULL if device not found.
759 */
760struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
761{
762	struct mem_ctl_info *mci;
763
764	edac_dbg(0, "\n");
765
766	mutex_lock(&mem_ctls_mutex);
767
768	/* find the requested mci struct in the global list */
769	mci = find_mci_by_dev(dev);
770	if (mci == NULL) {
771		mutex_unlock(&mem_ctls_mutex);
772		return NULL;
773	}
774
775	del_mc_from_global_list(mci);
776	mutex_unlock(&mem_ctls_mutex);
777
778	/* flush workq processes */
779	edac_mc_workq_teardown(mci);
780
781	/* marking MCI offline */
782	mci->op_state = OP_OFFLINE;
783
784	/* remove from sysfs */
785	edac_remove_sysfs_mci_device(mci);
786
787	edac_printk(KERN_INFO, EDAC_MC,
788		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
789		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
790
791	return mci;
792}
793EXPORT_SYMBOL_GPL(edac_mc_del_mc);
794
795static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
796				u32 size)
797{
798	struct page *pg;
799	void *virt_addr;
800	unsigned long flags = 0;
801
802	edac_dbg(3, "\n");
803
804	/* ECC error page was not in our memory. Ignore it. */
805	if (!pfn_valid(page))
806		return;
807
808	/* Find the actual page structure then map it and fix */
809	pg = pfn_to_page(page);
810
811	if (PageHighMem(pg))
812		local_irq_save(flags);
813
814	virt_addr = kmap_atomic(pg);
815
816	/* Perform architecture specific atomic scrub operation */
817	atomic_scrub(virt_addr + offset, size);
818
819	/* Unmap and complete */
820	kunmap_atomic(virt_addr);
821
822	if (PageHighMem(pg))
823		local_irq_restore(flags);
824}
825
826/* FIXME - should return -1 */
827int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
828{
829	struct csrow_info **csrows = mci->csrows;
830	int row, i, j, n;
831
832	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
833	row = -1;
834
835	for (i = 0; i < mci->nr_csrows; i++) {
836		struct csrow_info *csrow = csrows[i];
837		n = 0;
838		for (j = 0; j < csrow->nr_channels; j++) {
839			struct dimm_info *dimm = csrow->channels[j]->dimm;
840			n += dimm->nr_pages;
841		}
842		if (n == 0)
843			continue;
844
845		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
846			 mci->mc_idx,
847			 csrow->first_page, page, csrow->last_page,
848			 csrow->page_mask);
849
850		if ((page >= csrow->first_page) &&
851		    (page <= csrow->last_page) &&
852		    ((page & csrow->page_mask) ==
853		     (csrow->first_page & csrow->page_mask))) {
854			row = i;
855			break;
856		}
857	}
858
859	if (row == -1)
860		edac_mc_printk(mci, KERN_ERR,
861			"could not look up page error address %lx\n",
862			(unsigned long)page);
863
864	return row;
865}
866EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
867
868const char *edac_layer_name[] = {
869	[EDAC_MC_LAYER_BRANCH] = "branch",
870	[EDAC_MC_LAYER_CHANNEL] = "channel",
871	[EDAC_MC_LAYER_SLOT] = "slot",
872	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
873};
874EXPORT_SYMBOL_GPL(edac_layer_name);
875
876static void edac_inc_ce_error(struct mem_ctl_info *mci,
877				    bool enable_per_layer_report,
878				    const int pos[EDAC_MAX_LAYERS])
879{
880	int i, index = 0;
881
882	mci->ce_mc++;
883
884	if (!enable_per_layer_report) {
885		mci->ce_noinfo_count++;
886		return;
887	}
888
889	for (i = 0; i < mci->n_layers; i++) {
890		if (pos[i] < 0)
891			break;
892		index += pos[i];
893		mci->ce_per_layer[i][index]++;
894
895		if (i < mci->n_layers - 1)
896			index *= mci->layers[i + 1].size;
897	}
898}
899
900static void edac_inc_ue_error(struct mem_ctl_info *mci,
901				    bool enable_per_layer_report,
902				    const int pos[EDAC_MAX_LAYERS])
903{
904	int i, index = 0;
905
906	mci->ue_mc++;
907
908	if (!enable_per_layer_report) {
909		mci->ce_noinfo_count++;
910		return;
911	}
912
913	for (i = 0; i < mci->n_layers; i++) {
914		if (pos[i] < 0)
915			break;
916		index += pos[i];
917		mci->ue_per_layer[i][index]++;
918
919		if (i < mci->n_layers - 1)
920			index *= mci->layers[i + 1].size;
921	}
922}
923
924static void edac_ce_error(struct mem_ctl_info *mci,
925			  const int pos[EDAC_MAX_LAYERS],
926			  const char *msg,
927			  const char *location,
928			  const char *label,
929			  const char *detail,
930			  const char *other_detail,
931			  const bool enable_per_layer_report,
932			  const unsigned long page_frame_number,
933			  const unsigned long offset_in_page,
934			  long grain)
935{
936	unsigned long remapped_page;
937
938	if (edac_mc_get_log_ce()) {
939		if (other_detail && *other_detail)
940			edac_mc_printk(mci, KERN_WARNING,
941				       "CE %s on %s (%s %s - %s)\n",
942				       msg, label, location,
943				       detail, other_detail);
944		else
945			edac_mc_printk(mci, KERN_WARNING,
946				       "CE %s on %s (%s %s)\n",
947				       msg, label, location,
948				       detail);
949	}
950	edac_inc_ce_error(mci, enable_per_layer_report, pos);
951
952	if (mci->scrub_mode & SCRUB_SW_SRC) {
953		/*
954			* Some memory controllers (called MCs below) can remap
955			* memory so that it is still available at a different
956			* address when PCI devices map into memory.
957			* MC's that can't do this, lose the memory where PCI
958			* devices are mapped. This mapping is MC-dependent
959			* and so we call back into the MC driver for it to
960			* map the MC page to a physical (CPU) page which can
961			* then be mapped to a virtual page - which can then
962			* be scrubbed.
963			*/
964		remapped_page = mci->ctl_page_to_phys ?
965			mci->ctl_page_to_phys(mci, page_frame_number) :
966			page_frame_number;
967
968		edac_mc_scrub_block(remapped_page,
969					offset_in_page, grain);
970	}
971}
972
973static void edac_ue_error(struct mem_ctl_info *mci,
974			  const int pos[EDAC_MAX_LAYERS],
975			  const char *msg,
976			  const char *location,
977			  const char *label,
978			  const char *detail,
979			  const char *other_detail,
980			  const bool enable_per_layer_report)
981{
982	if (edac_mc_get_log_ue()) {
983		if (other_detail && *other_detail)
984			edac_mc_printk(mci, KERN_WARNING,
985				       "UE %s on %s (%s %s - %s)\n",
986			               msg, label, location, detail,
987				       other_detail);
988		else
989			edac_mc_printk(mci, KERN_WARNING,
990				       "UE %s on %s (%s %s)\n",
991			               msg, label, location, detail);
992	}
993
994	if (edac_mc_get_panic_on_ue()) {
995		if (other_detail && *other_detail)
996			panic("UE %s on %s (%s%s - %s)\n",
997			      msg, label, location, detail, other_detail);
998		else
999			panic("UE %s on %s (%s%s)\n",
1000			      msg, label, location, detail);
1001	}
1002
1003	edac_inc_ue_error(mci, enable_per_layer_report, pos);
1004}
1005
1006#define OTHER_LABEL " or "
1007
1008/**
1009 * edac_mc_handle_error - reports a memory event to userspace
1010 *
1011 * @type:		severity of the error (CE/UE/Fatal)
1012 * @mci:		a struct mem_ctl_info pointer
1013 * @page_frame_number:	mem page where the error occurred
1014 * @offset_in_page:	offset of the error inside the page
1015 * @syndrome:		ECC syndrome
1016 * @top_layer:		Memory layer[0] position
1017 * @mid_layer:		Memory layer[1] position
1018 * @low_layer:		Memory layer[2] position
1019 * @msg:		Message meaningful to the end users that
1020 *			explains the event
1021 * @other_detail:	Technical details about the event that
1022 *			may help hardware manufacturers and
1023 *			EDAC developers to analyse the event
1024 * @arch_log:		Architecture-specific struct that can
1025 *			be used to add extended information to the
1026 *			tracepoint, like dumping MCE registers.
1027 */
1028void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1029			  struct mem_ctl_info *mci,
1030			  const unsigned long page_frame_number,
1031			  const unsigned long offset_in_page,
1032			  const unsigned long syndrome,
1033			  const int top_layer,
1034			  const int mid_layer,
1035			  const int low_layer,
1036			  const char *msg,
1037			  const char *other_detail,
1038			  const void *arch_log)
1039{
1040	/* FIXME: too much for stack: move it to some pre-alocated area */
1041	char detail[80], location[80];
1042	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1043	char *p;
1044	int row = -1, chan = -1;
1045	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1046	int i;
1047	long grain;
1048	bool enable_per_layer_report = false;
1049	u16 error_count;	/* FIXME: make it a parameter */
1050	u8 grain_bits;
1051
1052	edac_dbg(3, "MC%d\n", mci->mc_idx);
1053
1054	/*
1055	 * Check if the event report is consistent and if the memory
1056	 * location is known. If it is known, enable_per_layer_report will be
1057	 * true, the DIMM(s) label info will be filled and the per-layer
1058	 * error counters will be incremented.
1059	 */
1060	for (i = 0; i < mci->n_layers; i++) {
1061		if (pos[i] >= (int)mci->layers[i].size) {
1062			if (type == HW_EVENT_ERR_CORRECTED)
1063				p = "CE";
1064			else
1065				p = "UE";
1066
1067			edac_mc_printk(mci, KERN_ERR,
1068				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1069				       edac_layer_name[mci->layers[i].type],
1070				       pos[i], mci->layers[i].size);
1071			/*
1072			 * Instead of just returning it, let's use what's
1073			 * known about the error. The increment routines and
1074			 * the DIMM filter logic will do the right thing by
1075			 * pointing the likely damaged DIMMs.
1076			 */
1077			pos[i] = -1;
1078		}
1079		if (pos[i] >= 0)
1080			enable_per_layer_report = true;
1081	}
1082
1083	/*
1084	 * Get the dimm label/grain that applies to the match criteria.
1085	 * As the error algorithm may not be able to point to just one memory
1086	 * stick, the logic here will get all possible labels that could
1087	 * pottentially be affected by the error.
1088	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1089	 * to have only the MC channel and the MC dimm (also called "branch")
1090	 * but the channel is not known, as the memory is arranged in pairs,
1091	 * where each memory belongs to a separate channel within the same
1092	 * branch.
1093	 */
1094	grain = 0;
1095	p = label;
1096	*p = '\0';
1097	for (i = 0; i < mci->tot_dimms; i++) {
1098		struct dimm_info *dimm = mci->dimms[i];
1099
1100		if (top_layer >= 0 && top_layer != dimm->location[0])
1101			continue;
1102		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1103			continue;
1104		if (low_layer >= 0 && low_layer != dimm->location[2])
1105			continue;
1106
1107		/* get the max grain, over the error match range */
1108		if (dimm->grain > grain)
1109			grain = dimm->grain;
1110
1111		/*
1112		 * If the error is memory-controller wide, there's no need to
1113		 * seek for the affected DIMMs because the whole
1114		 * channel/memory controller/...  may be affected.
1115		 * Also, don't show errors for empty DIMM slots.
1116		 */
1117		if (enable_per_layer_report && dimm->nr_pages) {
1118			if (p != label) {
1119				strcpy(p, OTHER_LABEL);
1120				p += strlen(OTHER_LABEL);
1121			}
1122			strcpy(p, dimm->label);
1123			p += strlen(p);
1124			*p = '\0';
1125
1126			/*
1127			 * get csrow/channel of the DIMM, in order to allow
1128			 * incrementing the compat API counters
1129			 */
1130			edac_dbg(4, "%s csrows map: (%d,%d)\n",
1131				 mci->mem_is_per_rank ? "rank" : "dimm",
1132				 dimm->csrow, dimm->cschannel);
1133			if (row == -1)
1134				row = dimm->csrow;
1135			else if (row >= 0 && row != dimm->csrow)
1136				row = -2;
1137
1138			if (chan == -1)
1139				chan = dimm->cschannel;
1140			else if (chan >= 0 && chan != dimm->cschannel)
1141				chan = -2;
1142		}
1143	}
1144
1145	if (!enable_per_layer_report) {
1146		strcpy(label, "any memory");
1147	} else {
1148		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1149		if (p == label)
1150			strcpy(label, "unknown memory");
1151		if (type == HW_EVENT_ERR_CORRECTED) {
1152			if (row >= 0) {
1153				mci->csrows[row]->ce_count++;
1154				if (chan >= 0)
1155					mci->csrows[row]->channels[chan]->ce_count++;
1156			}
1157		} else
1158			if (row >= 0)
1159				mci->csrows[row]->ue_count++;
1160	}
1161
1162	/* Fill the RAM location data */
1163	p = location;
1164	for (i = 0; i < mci->n_layers; i++) {
1165		if (pos[i] < 0)
1166			continue;
1167
1168		p += sprintf(p, "%s:%d ",
1169			     edac_layer_name[mci->layers[i].type],
1170			     pos[i]);
1171	}
1172	if (p > location)
1173		*(p - 1) = '\0';
1174
1175	/* Report the error via the trace interface */
1176
1177	error_count = 1;	/* FIXME: allow change it */
1178	grain_bits = fls_long(grain) + 1;
1179	trace_mc_event(type, msg, label, error_count,
1180		       mci->mc_idx, top_layer, mid_layer, low_layer,
1181		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1182		       grain_bits, syndrome, other_detail);
1183
1184	/* Memory type dependent details about the error */
1185	if (type == HW_EVENT_ERR_CORRECTED) {
1186		snprintf(detail, sizeof(detail),
1187			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1188			page_frame_number, offset_in_page,
1189			grain, syndrome);
1190		edac_ce_error(mci, pos, msg, location, label, detail,
1191			      other_detail, enable_per_layer_report,
1192			      page_frame_number, offset_in_page, grain);
1193	} else {
1194		snprintf(detail, sizeof(detail),
1195			"page:0x%lx offset:0x%lx grain:%ld",
1196			page_frame_number, offset_in_page, grain);
1197
1198		edac_ue_error(mci, pos, msg, location, label, detail,
1199			      other_detail, enable_per_layer_report);
1200	}
1201}
1202EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1203