edac_mc.c revision 93e4fe64ece4eccf0ff4ac69bceb389290b8ab7c
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
33#include "edac_core.h"
34#include "edac_module.h"
35
36/* lock to memory controller's control array */
37static DEFINE_MUTEX(mem_ctls_mutex);
38static LIST_HEAD(mc_devices);
39
40#ifdef CONFIG_EDAC_DEBUG
41
42static void edac_mc_dump_channel(struct rank_info *chan)
43{
44	debugf4("\tchannel = %p\n", chan);
45	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
47	debugf4("\tdimm->ce_count = %d\n", chan->dimm->ce_count);
48	debugf4("\tdimm->label = '%s'\n", chan->dimm->label);
49	debugf4("\tdimm->nr_pages = 0x%x\n", chan->dimm->nr_pages);
50}
51
52static void edac_mc_dump_csrow(struct csrow_info *csrow)
53{
54	debugf4("\tcsrow = %p\n", csrow);
55	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
60	debugf4("\tcsrow->channels = %p\n", csrow->channels);
61	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
62}
63
64static void edac_mc_dump_mci(struct mem_ctl_info *mci)
65{
66	debugf3("\tmci = %p\n", mci);
67	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
68	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
69	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
70	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
71	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
72		mci->nr_csrows, mci->csrows);
73	debugf3("\tdev = %p\n", mci->dev);
74	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
75	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
76}
77
78#endif				/* CONFIG_EDAC_DEBUG */
79
80/*
81 * keep those in sync with the enum mem_type
82 */
83const char *edac_mem_types[] = {
84	"Empty csrow",
85	"Reserved csrow type",
86	"Unknown csrow type",
87	"Fast page mode RAM",
88	"Extended data out RAM",
89	"Burst Extended data out RAM",
90	"Single data rate SDRAM",
91	"Registered single data rate SDRAM",
92	"Double data rate SDRAM",
93	"Registered Double data rate SDRAM",
94	"Rambus DRAM",
95	"Unbuffered DDR2 RAM",
96	"Fully buffered DDR2",
97	"Registered DDR2 RAM",
98	"Rambus XDR",
99	"Unbuffered DDR3 RAM",
100	"Registered DDR3 RAM",
101};
102EXPORT_SYMBOL_GPL(edac_mem_types);
103
104/**
105 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
106 * @p:		pointer to a pointer with the memory offset to be used. At
107 *		return, this will be incremented to point to the next offset
108 * @size:	Size of the data structure to be reserved
109 * @n_elems:	Number of elements that should be reserved
110 *
111 * If 'size' is a constant, the compiler will optimize this whole function
112 * down to either a no-op or the addition of a constant to the value of '*p'.
113 *
114 * The 'p' pointer is absolutely needed to keep the proper advancing
115 * further in memory to the proper offsets when allocating the struct along
116 * with its embedded structs, as edac_device_alloc_ctl_info() does it
117 * above, for example.
118 *
119 * At return, the pointer 'p' will be incremented to be used on a next call
120 * to this function.
121 */
122void *edac_align_ptr(void **p, unsigned size, int n_elems)
123{
124	unsigned align, r;
125	void *ptr = *p;
126
127	*p += size * n_elems;
128
129	/*
130	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
131	 * 'size'.  Adjust 'p' so that its alignment is at least as
132	 * stringent as what the compiler would provide for X and return
133	 * the aligned result.
134	 * Here we assume that the alignment of a "long long" is the most
135	 * stringent alignment that the compiler will ever provide by default.
136	 * As far as I know, this is a reasonable assumption.
137	 */
138	if (size > sizeof(long))
139		align = sizeof(long long);
140	else if (size > sizeof(int))
141		align = sizeof(long);
142	else if (size > sizeof(short))
143		align = sizeof(int);
144	else if (size > sizeof(char))
145		align = sizeof(short);
146	else
147		return (char *)ptr;
148
149	r = size % align;
150
151	if (r == 0)
152		return (char *)ptr;
153
154	*p += align - r;
155
156	return (void *)(((unsigned long)ptr) + align - r);
157}
158
159/**
160 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
161 * @size_pvt:	size of private storage needed
162 * @nr_csrows:	Number of CWROWS needed for this MC
163 * @nr_chans:	Number of channels for the MC
164 *
165 * Everything is kmalloc'ed as one big chunk - more efficient.
166 * Only can be used if all structures have the same lifetime - otherwise
167 * you have to allocate and initialize your own structures.
168 *
169 * Use edac_mc_free() to free mc structures allocated by this function.
170 *
171 * Returns:
172 *	NULL allocation failed
173 *	struct mem_ctl_info pointer
174 */
175struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
176				unsigned nr_chans, int edac_index)
177{
178	void *ptr = NULL;
179	struct mem_ctl_info *mci;
180	struct csrow_info *csi, *csrow;
181	struct rank_info *chi, *chp, *chan;
182	struct dimm_info *dimm;
183	void *pvt;
184	unsigned size;
185	int row, chn;
186	int err;
187
188	/* Figure out the offsets of the various items from the start of an mc
189	 * structure.  We want the alignment of each item to be at least as
190	 * stringent as what the compiler would provide if we could simply
191	 * hardcode everything into a single struct.
192	 */
193	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
194	csi = edac_align_ptr(&ptr, sizeof(*csi), nr_csrows);
195	chi = edac_align_ptr(&ptr, sizeof(*chi), nr_csrows * nr_chans);
196	dimm = edac_align_ptr(&ptr, sizeof(*dimm), nr_csrows * nr_chans);
197	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
198	size = ((unsigned long)pvt) + sz_pvt;
199
200	mci = kzalloc(size, GFP_KERNEL);
201	if (mci == NULL)
202		return NULL;
203
204	/* Adjust pointers so they point within the memory we just allocated
205	 * rather than an imaginary chunk of memory located at address 0.
206	 */
207	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
208	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
209	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
210	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
211
212	/* setup index and various internal pointers */
213	mci->mc_idx = edac_index;
214	mci->csrows = csi;
215	mci->dimms  = dimm;
216	mci->pvt_info = pvt;
217	mci->nr_csrows = nr_csrows;
218
219	/*
220	 * For now, assumes that a per-csrow arrangement for dimms.
221	 * This will be latter changed.
222	 */
223	dimm = mci->dimms;
224
225	for (row = 0; row < nr_csrows; row++) {
226		csrow = &csi[row];
227		csrow->csrow_idx = row;
228		csrow->mci = mci;
229		csrow->nr_channels = nr_chans;
230		chp = &chi[row * nr_chans];
231		csrow->channels = chp;
232
233		for (chn = 0; chn < nr_chans; chn++) {
234			chan = &chp[chn];
235			chan->chan_idx = chn;
236			chan->csrow = csrow;
237
238			mci->csrows[row].channels[chn].dimm = dimm;
239			dimm->csrow = row;
240			dimm->csrow_channel = chn;
241			dimm++;
242			mci->nr_dimms++;
243		}
244	}
245
246	mci->op_state = OP_ALLOC;
247	INIT_LIST_HEAD(&mci->grp_kobj_list);
248
249	/*
250	 * Initialize the 'root' kobj for the edac_mc controller
251	 */
252	err = edac_mc_register_sysfs_main_kobj(mci);
253	if (err) {
254		kfree(mci);
255		return NULL;
256	}
257
258	/* at this point, the root kobj is valid, and in order to
259	 * 'free' the object, then the function:
260	 *      edac_mc_unregister_sysfs_main_kobj() must be called
261	 * which will perform kobj unregistration and the actual free
262	 * will occur during the kobject callback operation
263	 */
264	return mci;
265}
266EXPORT_SYMBOL_GPL(edac_mc_alloc);
267
268/**
269 * edac_mc_free
270 *	'Free' a previously allocated 'mci' structure
271 * @mci: pointer to a struct mem_ctl_info structure
272 */
273void edac_mc_free(struct mem_ctl_info *mci)
274{
275	debugf1("%s()\n", __func__);
276
277	edac_mc_unregister_sysfs_main_kobj(mci);
278
279	/* free the mci instance memory here */
280	kfree(mci);
281}
282EXPORT_SYMBOL_GPL(edac_mc_free);
283
284
285/**
286 * find_mci_by_dev
287 *
288 *	scan list of controllers looking for the one that manages
289 *	the 'dev' device
290 * @dev: pointer to a struct device related with the MCI
291 */
292struct mem_ctl_info *find_mci_by_dev(struct device *dev)
293{
294	struct mem_ctl_info *mci;
295	struct list_head *item;
296
297	debugf3("%s()\n", __func__);
298
299	list_for_each(item, &mc_devices) {
300		mci = list_entry(item, struct mem_ctl_info, link);
301
302		if (mci->dev == dev)
303			return mci;
304	}
305
306	return NULL;
307}
308EXPORT_SYMBOL_GPL(find_mci_by_dev);
309
310/*
311 * handler for EDAC to check if NMI type handler has asserted interrupt
312 */
313static int edac_mc_assert_error_check_and_clear(void)
314{
315	int old_state;
316
317	if (edac_op_state == EDAC_OPSTATE_POLL)
318		return 1;
319
320	old_state = edac_err_assert;
321	edac_err_assert = 0;
322
323	return old_state;
324}
325
326/*
327 * edac_mc_workq_function
328 *	performs the operation scheduled by a workq request
329 */
330static void edac_mc_workq_function(struct work_struct *work_req)
331{
332	struct delayed_work *d_work = to_delayed_work(work_req);
333	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
334
335	mutex_lock(&mem_ctls_mutex);
336
337	/* if this control struct has movd to offline state, we are done */
338	if (mci->op_state == OP_OFFLINE) {
339		mutex_unlock(&mem_ctls_mutex);
340		return;
341	}
342
343	/* Only poll controllers that are running polled and have a check */
344	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
345		mci->edac_check(mci);
346
347	mutex_unlock(&mem_ctls_mutex);
348
349	/* Reschedule */
350	queue_delayed_work(edac_workqueue, &mci->work,
351			msecs_to_jiffies(edac_mc_get_poll_msec()));
352}
353
354/*
355 * edac_mc_workq_setup
356 *	initialize a workq item for this mci
357 *	passing in the new delay period in msec
358 *
359 *	locking model:
360 *
361 *		called with the mem_ctls_mutex held
362 */
363static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
364{
365	debugf0("%s()\n", __func__);
366
367	/* if this instance is not in the POLL state, then simply return */
368	if (mci->op_state != OP_RUNNING_POLL)
369		return;
370
371	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
372	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
373}
374
375/*
376 * edac_mc_workq_teardown
377 *	stop the workq processing on this mci
378 *
379 *	locking model:
380 *
381 *		called WITHOUT lock held
382 */
383static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
384{
385	int status;
386
387	if (mci->op_state != OP_RUNNING_POLL)
388		return;
389
390	status = cancel_delayed_work(&mci->work);
391	if (status == 0) {
392		debugf0("%s() not canceled, flush the queue\n",
393			__func__);
394
395		/* workq instance might be running, wait for it */
396		flush_workqueue(edac_workqueue);
397	}
398}
399
400/*
401 * edac_mc_reset_delay_period(unsigned long value)
402 *
403 *	user space has updated our poll period value, need to
404 *	reset our workq delays
405 */
406void edac_mc_reset_delay_period(int value)
407{
408	struct mem_ctl_info *mci;
409	struct list_head *item;
410
411	mutex_lock(&mem_ctls_mutex);
412
413	/* scan the list and turn off all workq timers, doing so under lock
414	 */
415	list_for_each(item, &mc_devices) {
416		mci = list_entry(item, struct mem_ctl_info, link);
417
418		if (mci->op_state == OP_RUNNING_POLL)
419			cancel_delayed_work(&mci->work);
420	}
421
422	mutex_unlock(&mem_ctls_mutex);
423
424
425	/* re-walk the list, and reset the poll delay */
426	mutex_lock(&mem_ctls_mutex);
427
428	list_for_each(item, &mc_devices) {
429		mci = list_entry(item, struct mem_ctl_info, link);
430
431		edac_mc_workq_setup(mci, (unsigned long) value);
432	}
433
434	mutex_unlock(&mem_ctls_mutex);
435}
436
437
438
439/* Return 0 on success, 1 on failure.
440 * Before calling this function, caller must
441 * assign a unique value to mci->mc_idx.
442 *
443 *	locking model:
444 *
445 *		called with the mem_ctls_mutex lock held
446 */
447static int add_mc_to_global_list(struct mem_ctl_info *mci)
448{
449	struct list_head *item, *insert_before;
450	struct mem_ctl_info *p;
451
452	insert_before = &mc_devices;
453
454	p = find_mci_by_dev(mci->dev);
455	if (unlikely(p != NULL))
456		goto fail0;
457
458	list_for_each(item, &mc_devices) {
459		p = list_entry(item, struct mem_ctl_info, link);
460
461		if (p->mc_idx >= mci->mc_idx) {
462			if (unlikely(p->mc_idx == mci->mc_idx))
463				goto fail1;
464
465			insert_before = item;
466			break;
467		}
468	}
469
470	list_add_tail_rcu(&mci->link, insert_before);
471	atomic_inc(&edac_handlers);
472	return 0;
473
474fail0:
475	edac_printk(KERN_WARNING, EDAC_MC,
476		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
477		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
478	return 1;
479
480fail1:
481	edac_printk(KERN_WARNING, EDAC_MC,
482		"bug in low-level driver: attempt to assign\n"
483		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
484	return 1;
485}
486
487static void del_mc_from_global_list(struct mem_ctl_info *mci)
488{
489	atomic_dec(&edac_handlers);
490	list_del_rcu(&mci->link);
491
492	/* these are for safe removal of devices from global list while
493	 * NMI handlers may be traversing list
494	 */
495	synchronize_rcu();
496	INIT_LIST_HEAD(&mci->link);
497}
498
499/**
500 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
501 *
502 * If found, return a pointer to the structure.
503 * Else return NULL.
504 *
505 * Caller must hold mem_ctls_mutex.
506 */
507struct mem_ctl_info *edac_mc_find(int idx)
508{
509	struct list_head *item;
510	struct mem_ctl_info *mci;
511
512	list_for_each(item, &mc_devices) {
513		mci = list_entry(item, struct mem_ctl_info, link);
514
515		if (mci->mc_idx >= idx) {
516			if (mci->mc_idx == idx)
517				return mci;
518
519			break;
520		}
521	}
522
523	return NULL;
524}
525EXPORT_SYMBOL(edac_mc_find);
526
527/**
528 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
529 *                 create sysfs entries associated with mci structure
530 * @mci: pointer to the mci structure to be added to the list
531 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
532 *
533 * Return:
534 *	0	Success
535 *	!0	Failure
536 */
537
538/* FIXME - should a warning be printed if no error detection? correction? */
539int edac_mc_add_mc(struct mem_ctl_info *mci)
540{
541	debugf0("%s()\n", __func__);
542
543#ifdef CONFIG_EDAC_DEBUG
544	if (edac_debug_level >= 3)
545		edac_mc_dump_mci(mci);
546
547	if (edac_debug_level >= 4) {
548		int i;
549
550		for (i = 0; i < mci->nr_csrows; i++) {
551			int j;
552
553			edac_mc_dump_csrow(&mci->csrows[i]);
554			for (j = 0; j < mci->csrows[i].nr_channels; j++)
555				edac_mc_dump_channel(&mci->csrows[i].
556						channels[j]);
557		}
558	}
559#endif
560	mutex_lock(&mem_ctls_mutex);
561
562	if (add_mc_to_global_list(mci))
563		goto fail0;
564
565	/* set load time so that error rate can be tracked */
566	mci->start_time = jiffies;
567
568	if (edac_create_sysfs_mci_device(mci)) {
569		edac_mc_printk(mci, KERN_WARNING,
570			"failed to create sysfs device\n");
571		goto fail1;
572	}
573
574	/* If there IS a check routine, then we are running POLLED */
575	if (mci->edac_check != NULL) {
576		/* This instance is NOW RUNNING */
577		mci->op_state = OP_RUNNING_POLL;
578
579		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
580	} else {
581		mci->op_state = OP_RUNNING_INTERRUPT;
582	}
583
584	/* Report action taken */
585	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
586		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
587
588	mutex_unlock(&mem_ctls_mutex);
589	return 0;
590
591fail1:
592	del_mc_from_global_list(mci);
593
594fail0:
595	mutex_unlock(&mem_ctls_mutex);
596	return 1;
597}
598EXPORT_SYMBOL_GPL(edac_mc_add_mc);
599
600/**
601 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
602 *                 remove mci structure from global list
603 * @pdev: Pointer to 'struct device' representing mci structure to remove.
604 *
605 * Return pointer to removed mci structure, or NULL if device not found.
606 */
607struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
608{
609	struct mem_ctl_info *mci;
610
611	debugf0("%s()\n", __func__);
612
613	mutex_lock(&mem_ctls_mutex);
614
615	/* find the requested mci struct in the global list */
616	mci = find_mci_by_dev(dev);
617	if (mci == NULL) {
618		mutex_unlock(&mem_ctls_mutex);
619		return NULL;
620	}
621
622	del_mc_from_global_list(mci);
623	mutex_unlock(&mem_ctls_mutex);
624
625	/* flush workq processes */
626	edac_mc_workq_teardown(mci);
627
628	/* marking MCI offline */
629	mci->op_state = OP_OFFLINE;
630
631	/* remove from sysfs */
632	edac_remove_sysfs_mci_device(mci);
633
634	edac_printk(KERN_INFO, EDAC_MC,
635		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
636		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
637
638	return mci;
639}
640EXPORT_SYMBOL_GPL(edac_mc_del_mc);
641
642static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
643				u32 size)
644{
645	struct page *pg;
646	void *virt_addr;
647	unsigned long flags = 0;
648
649	debugf3("%s()\n", __func__);
650
651	/* ECC error page was not in our memory. Ignore it. */
652	if (!pfn_valid(page))
653		return;
654
655	/* Find the actual page structure then map it and fix */
656	pg = pfn_to_page(page);
657
658	if (PageHighMem(pg))
659		local_irq_save(flags);
660
661	virt_addr = kmap_atomic(pg);
662
663	/* Perform architecture specific atomic scrub operation */
664	atomic_scrub(virt_addr + offset, size);
665
666	/* Unmap and complete */
667	kunmap_atomic(virt_addr);
668
669	if (PageHighMem(pg))
670		local_irq_restore(flags);
671}
672
673/* FIXME - should return -1 */
674int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
675{
676	struct csrow_info *csrows = mci->csrows;
677	int row, i, j, n;
678
679	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
680	row = -1;
681
682	for (i = 0; i < mci->nr_csrows; i++) {
683		struct csrow_info *csrow = &csrows[i];
684		n = 0;
685		for (j = 0; j < csrow->nr_channels; j++) {
686			struct dimm_info *dimm = csrow->channels[j].dimm;
687			n += dimm->nr_pages;
688		}
689		if (n == 0)
690			continue;
691
692		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
693			"mask(0x%lx)\n", mci->mc_idx, __func__,
694			csrow->first_page, page, csrow->last_page,
695			csrow->page_mask);
696
697		if ((page >= csrow->first_page) &&
698		    (page <= csrow->last_page) &&
699		    ((page & csrow->page_mask) ==
700		     (csrow->first_page & csrow->page_mask))) {
701			row = i;
702			break;
703		}
704	}
705
706	if (row == -1)
707		edac_mc_printk(mci, KERN_ERR,
708			"could not look up page error address %lx\n",
709			(unsigned long)page);
710
711	return row;
712}
713EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
714
715/* FIXME - setable log (warning/emerg) levels */
716/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
717void edac_mc_handle_ce(struct mem_ctl_info *mci,
718		unsigned long page_frame_number,
719		unsigned long offset_in_page, unsigned long syndrome,
720		int row, int channel, const char *msg)
721{
722	unsigned long remapped_page;
723	char *label = NULL;
724	u32 grain;
725
726	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
727
728	/* FIXME - maybe make panic on INTERNAL ERROR an option */
729	if (row >= mci->nr_csrows || row < 0) {
730		/* something is wrong */
731		edac_mc_printk(mci, KERN_ERR,
732			"INTERNAL ERROR: row out of range "
733			"(%d >= %d)\n", row, mci->nr_csrows);
734		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
735		return;
736	}
737
738	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
739		/* something is wrong */
740		edac_mc_printk(mci, KERN_ERR,
741			"INTERNAL ERROR: channel out of range "
742			"(%d >= %d)\n", channel,
743			mci->csrows[row].nr_channels);
744		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
745		return;
746	}
747
748	label = mci->csrows[row].channels[channel].dimm->label;
749	grain = mci->csrows[row].channels[channel].dimm->grain;
750
751	if (edac_mc_get_log_ce())
752		/* FIXME - put in DIMM location */
753		edac_mc_printk(mci, KERN_WARNING,
754			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
755			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
756			page_frame_number, offset_in_page,
757			grain, syndrome, row, channel,
758			label, msg);
759
760	mci->ce_count++;
761	mci->csrows[row].ce_count++;
762	mci->csrows[row].channels[channel].dimm->ce_count++;
763	mci->csrows[row].channels[channel].ce_count++;
764
765	if (mci->scrub_mode & SCRUB_SW_SRC) {
766		/*
767		 * Some MC's can remap memory so that it is still available
768		 * at a different address when PCI devices map into memory.
769		 * MC's that can't do this lose the memory where PCI devices
770		 * are mapped.  This mapping is MC dependent and so we call
771		 * back into the MC driver for it to map the MC page to
772		 * a physical (CPU) page which can then be mapped to a virtual
773		 * page - which can then be scrubbed.
774		 */
775		remapped_page = mci->ctl_page_to_phys ?
776			mci->ctl_page_to_phys(mci, page_frame_number) :
777			page_frame_number;
778
779		edac_mc_scrub_block(remapped_page, offset_in_page, grain);
780	}
781}
782EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
783
784void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
785{
786	if (edac_mc_get_log_ce())
787		edac_mc_printk(mci, KERN_WARNING,
788			"CE - no information available: %s\n", msg);
789
790	mci->ce_noinfo_count++;
791	mci->ce_count++;
792}
793EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
794
795void edac_mc_handle_ue(struct mem_ctl_info *mci,
796		unsigned long page_frame_number,
797		unsigned long offset_in_page, int row, const char *msg)
798{
799	int len = EDAC_MC_LABEL_LEN * 4;
800	char labels[len + 1];
801	char *pos = labels;
802	int chan;
803	int chars;
804	char *label = NULL;
805	u32 grain;
806
807	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
808
809	/* FIXME - maybe make panic on INTERNAL ERROR an option */
810	if (row >= mci->nr_csrows || row < 0) {
811		/* something is wrong */
812		edac_mc_printk(mci, KERN_ERR,
813			"INTERNAL ERROR: row out of range "
814			"(%d >= %d)\n", row, mci->nr_csrows);
815		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
816		return;
817	}
818
819	grain = mci->csrows[row].channels[0].dimm->grain;
820	label = mci->csrows[row].channels[0].dimm->label;
821	chars = snprintf(pos, len + 1, "%s", label);
822	len -= chars;
823	pos += chars;
824
825	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
826		chan++) {
827		label = mci->csrows[row].channels[chan].dimm->label;
828		chars = snprintf(pos, len + 1, ":%s", label);
829		len -= chars;
830		pos += chars;
831	}
832
833	if (edac_mc_get_log_ue())
834		edac_mc_printk(mci, KERN_EMERG,
835			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
836			"labels \"%s\": %s\n", page_frame_number,
837			offset_in_page, grain, row, labels, msg);
838
839	if (edac_mc_get_panic_on_ue())
840		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
841			"row %d, labels \"%s\": %s\n", mci->mc_idx,
842			page_frame_number, offset_in_page,
843			grain, row, labels, msg);
844
845	mci->ue_count++;
846	mci->csrows[row].ue_count++;
847}
848EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
849
850void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
851{
852	if (edac_mc_get_panic_on_ue())
853		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
854
855	if (edac_mc_get_log_ue())
856		edac_mc_printk(mci, KERN_WARNING,
857			"UE - no information available: %s\n", msg);
858	mci->ue_noinfo_count++;
859	mci->ue_count++;
860}
861EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
862
863/*************************************************************
864 * On Fully Buffered DIMM modules, this help function is
865 * called to process UE events
866 */
867void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
868			unsigned int csrow,
869			unsigned int channela,
870			unsigned int channelb, char *msg)
871{
872	int len = EDAC_MC_LABEL_LEN * 4;
873	char labels[len + 1];
874	char *pos = labels;
875	int chars;
876	char *label;
877
878	if (csrow >= mci->nr_csrows) {
879		/* something is wrong */
880		edac_mc_printk(mci, KERN_ERR,
881			"INTERNAL ERROR: row out of range (%d >= %d)\n",
882			csrow, mci->nr_csrows);
883		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
884		return;
885	}
886
887	if (channela >= mci->csrows[csrow].nr_channels) {
888		/* something is wrong */
889		edac_mc_printk(mci, KERN_ERR,
890			"INTERNAL ERROR: channel-a out of range "
891			"(%d >= %d)\n",
892			channela, mci->csrows[csrow].nr_channels);
893		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
894		return;
895	}
896
897	if (channelb >= mci->csrows[csrow].nr_channels) {
898		/* something is wrong */
899		edac_mc_printk(mci, KERN_ERR,
900			"INTERNAL ERROR: channel-b out of range "
901			"(%d >= %d)\n",
902			channelb, mci->csrows[csrow].nr_channels);
903		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
904		return;
905	}
906
907	mci->ue_count++;
908	mci->csrows[csrow].ue_count++;
909
910	/* Generate the DIMM labels from the specified channels */
911	label = mci->csrows[csrow].channels[channela].dimm->label;
912	chars = snprintf(pos, len + 1, "%s", label);
913	len -= chars;
914	pos += chars;
915
916	chars = snprintf(pos, len + 1, "-%s",
917			mci->csrows[csrow].channels[channelb].dimm->label);
918
919	if (edac_mc_get_log_ue())
920		edac_mc_printk(mci, KERN_EMERG,
921			"UE row %d, channel-a= %d channel-b= %d "
922			"labels \"%s\": %s\n", csrow, channela, channelb,
923			labels, msg);
924
925	if (edac_mc_get_panic_on_ue())
926		panic("UE row %d, channel-a= %d channel-b= %d "
927			"labels \"%s\": %s\n", csrow, channela,
928			channelb, labels, msg);
929}
930EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
931
932/*************************************************************
933 * On Fully Buffered DIMM modules, this help function is
934 * called to process CE events
935 */
936void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
937			unsigned int csrow, unsigned int channel, char *msg)
938{
939	char *label = NULL;
940
941	/* Ensure boundary values */
942	if (csrow >= mci->nr_csrows) {
943		/* something is wrong */
944		edac_mc_printk(mci, KERN_ERR,
945			"INTERNAL ERROR: row out of range (%d >= %d)\n",
946			csrow, mci->nr_csrows);
947		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
948		return;
949	}
950	if (channel >= mci->csrows[csrow].nr_channels) {
951		/* something is wrong */
952		edac_mc_printk(mci, KERN_ERR,
953			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
954			channel, mci->csrows[csrow].nr_channels);
955		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
956		return;
957	}
958
959	label = mci->csrows[csrow].channels[channel].dimm->label;
960
961	if (edac_mc_get_log_ce())
962		/* FIXME - put in DIMM location */
963		edac_mc_printk(mci, KERN_WARNING,
964			"CE row %d, channel %d, label \"%s\": %s\n",
965			csrow, channel, label, msg);
966
967	mci->ce_count++;
968	mci->csrows[csrow].ce_count++;
969	mci->csrows[csrow].channels[channel].dimm->ce_count++;
970	mci->csrows[csrow].channels[channel].ce_count++;
971}
972EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
973