edac_mc.c revision 084a4fccef39ac7abb039511f32380f28d0b67e6
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
33#include "edac_core.h"
34#include "edac_module.h"
35
36/* lock to memory controller's control array */
37static DEFINE_MUTEX(mem_ctls_mutex);
38static LIST_HEAD(mc_devices);
39
40#ifdef CONFIG_EDAC_DEBUG
41
42static void edac_mc_dump_channel(struct rank_info *chan)
43{
44	debugf4("\tchannel = %p\n", chan);
45	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46	debugf4("\tchannel->ce_count = %d\n", chan->dimm->ce_count);
47	debugf4("\tchannel->label = '%s'\n", chan->dimm->label);
48	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
49}
50
51static void edac_mc_dump_csrow(struct csrow_info *csrow)
52{
53	debugf4("\tcsrow = %p\n", csrow);
54	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
55	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
56	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
57	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
58	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
59	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
60	debugf4("\tcsrow->channels = %p\n", csrow->channels);
61	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
62}
63
64static void edac_mc_dump_mci(struct mem_ctl_info *mci)
65{
66	debugf3("\tmci = %p\n", mci);
67	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
68	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
69	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
70	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
71	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
72		mci->nr_csrows, mci->csrows);
73	debugf3("\tdev = %p\n", mci->dev);
74	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
75	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
76}
77
78#endif				/* CONFIG_EDAC_DEBUG */
79
80/*
81 * keep those in sync with the enum mem_type
82 */
83const char *edac_mem_types[] = {
84	"Empty csrow",
85	"Reserved csrow type",
86	"Unknown csrow type",
87	"Fast page mode RAM",
88	"Extended data out RAM",
89	"Burst Extended data out RAM",
90	"Single data rate SDRAM",
91	"Registered single data rate SDRAM",
92	"Double data rate SDRAM",
93	"Registered Double data rate SDRAM",
94	"Rambus DRAM",
95	"Unbuffered DDR2 RAM",
96	"Fully buffered DDR2",
97	"Registered DDR2 RAM",
98	"Rambus XDR",
99	"Unbuffered DDR3 RAM",
100	"Registered DDR3 RAM",
101};
102EXPORT_SYMBOL_GPL(edac_mem_types);
103
104/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
105 * Adjust 'ptr' so that its alignment is at least as stringent as what the
106 * compiler would provide for X and return the aligned result.
107 *
108 * If 'size' is a constant, the compiler will optimize this whole function
109 * down to either a no-op or the addition of a constant to the value of 'ptr'.
110 */
111void *edac_align_ptr(void *ptr, unsigned size)
112{
113	unsigned align, r;
114
115	/* Here we assume that the alignment of a "long long" is the most
116	 * stringent alignment that the compiler will ever provide by default.
117	 * As far as I know, this is a reasonable assumption.
118	 */
119	if (size > sizeof(long))
120		align = sizeof(long long);
121	else if (size > sizeof(int))
122		align = sizeof(long);
123	else if (size > sizeof(short))
124		align = sizeof(int);
125	else if (size > sizeof(char))
126		align = sizeof(short);
127	else
128		return (char *)ptr;
129
130	r = size % align;
131
132	if (r == 0)
133		return (char *)ptr;
134
135	return (void *)(((unsigned long)ptr) + align - r);
136}
137
138/**
139 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
140 * @size_pvt:	size of private storage needed
141 * @nr_csrows:	Number of CWROWS needed for this MC
142 * @nr_chans:	Number of channels for the MC
143 *
144 * Everything is kmalloc'ed as one big chunk - more efficient.
145 * Only can be used if all structures have the same lifetime - otherwise
146 * you have to allocate and initialize your own structures.
147 *
148 * Use edac_mc_free() to free mc structures allocated by this function.
149 *
150 * Returns:
151 *	NULL allocation failed
152 *	struct mem_ctl_info pointer
153 */
154struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
155				unsigned nr_chans, int edac_index)
156{
157	struct mem_ctl_info *mci;
158	struct csrow_info *csi, *csrow;
159	struct rank_info *chi, *chp, *chan;
160	struct dimm_info *dimm;
161	void *pvt;
162	unsigned size;
163	int row, chn;
164	int err;
165
166	/* Figure out the offsets of the various items from the start of an mc
167	 * structure.  We want the alignment of each item to be at least as
168	 * stringent as what the compiler would provide if we could simply
169	 * hardcode everything into a single struct.
170	 */
171	mci = (struct mem_ctl_info *)0;
172	csi = edac_align_ptr(&mci[1], sizeof(*csi));
173	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
174	dimm = edac_align_ptr(&chi[nr_chans * nr_csrows], sizeof(*dimm));
175	pvt = edac_align_ptr(&dimm[nr_chans * nr_csrows], sz_pvt);
176	size = ((unsigned long)pvt) + sz_pvt;
177
178	mci = kzalloc(size, GFP_KERNEL);
179	if (mci == NULL)
180		return NULL;
181
182	/* Adjust pointers so they point within the memory we just allocated
183	 * rather than an imaginary chunk of memory located at address 0.
184	 */
185	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
186	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
187	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
188	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
189
190	/* setup index and various internal pointers */
191	mci->mc_idx = edac_index;
192	mci->csrows = csi;
193	mci->dimms  = dimm;
194	mci->pvt_info = pvt;
195	mci->nr_csrows = nr_csrows;
196
197	/*
198	 * For now, assumes that a per-csrow arrangement for dimms.
199	 * This will be latter changed.
200	 */
201	dimm = mci->dimms;
202
203	for (row = 0; row < nr_csrows; row++) {
204		csrow = &csi[row];
205		csrow->csrow_idx = row;
206		csrow->mci = mci;
207		csrow->nr_channels = nr_chans;
208		chp = &chi[row * nr_chans];
209		csrow->channels = chp;
210
211		for (chn = 0; chn < nr_chans; chn++) {
212			chan = &chp[chn];
213			chan->chan_idx = chn;
214			chan->csrow = csrow;
215
216			mci->csrows[row].channels[chn].dimm = dimm;
217			dimm->csrow = row;
218			dimm->csrow_channel = chn;
219			dimm++;
220			mci->nr_dimms++;
221		}
222	}
223
224	mci->op_state = OP_ALLOC;
225	INIT_LIST_HEAD(&mci->grp_kobj_list);
226
227	/*
228	 * Initialize the 'root' kobj for the edac_mc controller
229	 */
230	err = edac_mc_register_sysfs_main_kobj(mci);
231	if (err) {
232		kfree(mci);
233		return NULL;
234	}
235
236	/* at this point, the root kobj is valid, and in order to
237	 * 'free' the object, then the function:
238	 *      edac_mc_unregister_sysfs_main_kobj() must be called
239	 * which will perform kobj unregistration and the actual free
240	 * will occur during the kobject callback operation
241	 */
242	return mci;
243}
244EXPORT_SYMBOL_GPL(edac_mc_alloc);
245
246/**
247 * edac_mc_free
248 *	'Free' a previously allocated 'mci' structure
249 * @mci: pointer to a struct mem_ctl_info structure
250 */
251void edac_mc_free(struct mem_ctl_info *mci)
252{
253	debugf1("%s()\n", __func__);
254
255	edac_mc_unregister_sysfs_main_kobj(mci);
256
257	/* free the mci instance memory here */
258	kfree(mci);
259}
260EXPORT_SYMBOL_GPL(edac_mc_free);
261
262
263/**
264 * find_mci_by_dev
265 *
266 *	scan list of controllers looking for the one that manages
267 *	the 'dev' device
268 * @dev: pointer to a struct device related with the MCI
269 */
270struct mem_ctl_info *find_mci_by_dev(struct device *dev)
271{
272	struct mem_ctl_info *mci;
273	struct list_head *item;
274
275	debugf3("%s()\n", __func__);
276
277	list_for_each(item, &mc_devices) {
278		mci = list_entry(item, struct mem_ctl_info, link);
279
280		if (mci->dev == dev)
281			return mci;
282	}
283
284	return NULL;
285}
286EXPORT_SYMBOL_GPL(find_mci_by_dev);
287
288/*
289 * handler for EDAC to check if NMI type handler has asserted interrupt
290 */
291static int edac_mc_assert_error_check_and_clear(void)
292{
293	int old_state;
294
295	if (edac_op_state == EDAC_OPSTATE_POLL)
296		return 1;
297
298	old_state = edac_err_assert;
299	edac_err_assert = 0;
300
301	return old_state;
302}
303
304/*
305 * edac_mc_workq_function
306 *	performs the operation scheduled by a workq request
307 */
308static void edac_mc_workq_function(struct work_struct *work_req)
309{
310	struct delayed_work *d_work = to_delayed_work(work_req);
311	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
312
313	mutex_lock(&mem_ctls_mutex);
314
315	/* if this control struct has movd to offline state, we are done */
316	if (mci->op_state == OP_OFFLINE) {
317		mutex_unlock(&mem_ctls_mutex);
318		return;
319	}
320
321	/* Only poll controllers that are running polled and have a check */
322	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
323		mci->edac_check(mci);
324
325	mutex_unlock(&mem_ctls_mutex);
326
327	/* Reschedule */
328	queue_delayed_work(edac_workqueue, &mci->work,
329			msecs_to_jiffies(edac_mc_get_poll_msec()));
330}
331
332/*
333 * edac_mc_workq_setup
334 *	initialize a workq item for this mci
335 *	passing in the new delay period in msec
336 *
337 *	locking model:
338 *
339 *		called with the mem_ctls_mutex held
340 */
341static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
342{
343	debugf0("%s()\n", __func__);
344
345	/* if this instance is not in the POLL state, then simply return */
346	if (mci->op_state != OP_RUNNING_POLL)
347		return;
348
349	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
350	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
351}
352
353/*
354 * edac_mc_workq_teardown
355 *	stop the workq processing on this mci
356 *
357 *	locking model:
358 *
359 *		called WITHOUT lock held
360 */
361static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
362{
363	int status;
364
365	if (mci->op_state != OP_RUNNING_POLL)
366		return;
367
368	status = cancel_delayed_work(&mci->work);
369	if (status == 0) {
370		debugf0("%s() not canceled, flush the queue\n",
371			__func__);
372
373		/* workq instance might be running, wait for it */
374		flush_workqueue(edac_workqueue);
375	}
376}
377
378/*
379 * edac_mc_reset_delay_period(unsigned long value)
380 *
381 *	user space has updated our poll period value, need to
382 *	reset our workq delays
383 */
384void edac_mc_reset_delay_period(int value)
385{
386	struct mem_ctl_info *mci;
387	struct list_head *item;
388
389	mutex_lock(&mem_ctls_mutex);
390
391	/* scan the list and turn off all workq timers, doing so under lock
392	 */
393	list_for_each(item, &mc_devices) {
394		mci = list_entry(item, struct mem_ctl_info, link);
395
396		if (mci->op_state == OP_RUNNING_POLL)
397			cancel_delayed_work(&mci->work);
398	}
399
400	mutex_unlock(&mem_ctls_mutex);
401
402
403	/* re-walk the list, and reset the poll delay */
404	mutex_lock(&mem_ctls_mutex);
405
406	list_for_each(item, &mc_devices) {
407		mci = list_entry(item, struct mem_ctl_info, link);
408
409		edac_mc_workq_setup(mci, (unsigned long) value);
410	}
411
412	mutex_unlock(&mem_ctls_mutex);
413}
414
415
416
417/* Return 0 on success, 1 on failure.
418 * Before calling this function, caller must
419 * assign a unique value to mci->mc_idx.
420 *
421 *	locking model:
422 *
423 *		called with the mem_ctls_mutex lock held
424 */
425static int add_mc_to_global_list(struct mem_ctl_info *mci)
426{
427	struct list_head *item, *insert_before;
428	struct mem_ctl_info *p;
429
430	insert_before = &mc_devices;
431
432	p = find_mci_by_dev(mci->dev);
433	if (unlikely(p != NULL))
434		goto fail0;
435
436	list_for_each(item, &mc_devices) {
437		p = list_entry(item, struct mem_ctl_info, link);
438
439		if (p->mc_idx >= mci->mc_idx) {
440			if (unlikely(p->mc_idx == mci->mc_idx))
441				goto fail1;
442
443			insert_before = item;
444			break;
445		}
446	}
447
448	list_add_tail_rcu(&mci->link, insert_before);
449	atomic_inc(&edac_handlers);
450	return 0;
451
452fail0:
453	edac_printk(KERN_WARNING, EDAC_MC,
454		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
455		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
456	return 1;
457
458fail1:
459	edac_printk(KERN_WARNING, EDAC_MC,
460		"bug in low-level driver: attempt to assign\n"
461		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
462	return 1;
463}
464
465static void del_mc_from_global_list(struct mem_ctl_info *mci)
466{
467	atomic_dec(&edac_handlers);
468	list_del_rcu(&mci->link);
469
470	/* these are for safe removal of devices from global list while
471	 * NMI handlers may be traversing list
472	 */
473	synchronize_rcu();
474	INIT_LIST_HEAD(&mci->link);
475}
476
477/**
478 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
479 *
480 * If found, return a pointer to the structure.
481 * Else return NULL.
482 *
483 * Caller must hold mem_ctls_mutex.
484 */
485struct mem_ctl_info *edac_mc_find(int idx)
486{
487	struct list_head *item;
488	struct mem_ctl_info *mci;
489
490	list_for_each(item, &mc_devices) {
491		mci = list_entry(item, struct mem_ctl_info, link);
492
493		if (mci->mc_idx >= idx) {
494			if (mci->mc_idx == idx)
495				return mci;
496
497			break;
498		}
499	}
500
501	return NULL;
502}
503EXPORT_SYMBOL(edac_mc_find);
504
505/**
506 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
507 *                 create sysfs entries associated with mci structure
508 * @mci: pointer to the mci structure to be added to the list
509 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
510 *
511 * Return:
512 *	0	Success
513 *	!0	Failure
514 */
515
516/* FIXME - should a warning be printed if no error detection? correction? */
517int edac_mc_add_mc(struct mem_ctl_info *mci)
518{
519	debugf0("%s()\n", __func__);
520
521#ifdef CONFIG_EDAC_DEBUG
522	if (edac_debug_level >= 3)
523		edac_mc_dump_mci(mci);
524
525	if (edac_debug_level >= 4) {
526		int i;
527
528		for (i = 0; i < mci->nr_csrows; i++) {
529			int j;
530
531			edac_mc_dump_csrow(&mci->csrows[i]);
532			for (j = 0; j < mci->csrows[i].nr_channels; j++)
533				edac_mc_dump_channel(&mci->csrows[i].
534						channels[j]);
535		}
536	}
537#endif
538	mutex_lock(&mem_ctls_mutex);
539
540	if (add_mc_to_global_list(mci))
541		goto fail0;
542
543	/* set load time so that error rate can be tracked */
544	mci->start_time = jiffies;
545
546	if (edac_create_sysfs_mci_device(mci)) {
547		edac_mc_printk(mci, KERN_WARNING,
548			"failed to create sysfs device\n");
549		goto fail1;
550	}
551
552	/* If there IS a check routine, then we are running POLLED */
553	if (mci->edac_check != NULL) {
554		/* This instance is NOW RUNNING */
555		mci->op_state = OP_RUNNING_POLL;
556
557		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
558	} else {
559		mci->op_state = OP_RUNNING_INTERRUPT;
560	}
561
562	/* Report action taken */
563	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
564		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
565
566	mutex_unlock(&mem_ctls_mutex);
567	return 0;
568
569fail1:
570	del_mc_from_global_list(mci);
571
572fail0:
573	mutex_unlock(&mem_ctls_mutex);
574	return 1;
575}
576EXPORT_SYMBOL_GPL(edac_mc_add_mc);
577
578/**
579 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
580 *                 remove mci structure from global list
581 * @pdev: Pointer to 'struct device' representing mci structure to remove.
582 *
583 * Return pointer to removed mci structure, or NULL if device not found.
584 */
585struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
586{
587	struct mem_ctl_info *mci;
588
589	debugf0("%s()\n", __func__);
590
591	mutex_lock(&mem_ctls_mutex);
592
593	/* find the requested mci struct in the global list */
594	mci = find_mci_by_dev(dev);
595	if (mci == NULL) {
596		mutex_unlock(&mem_ctls_mutex);
597		return NULL;
598	}
599
600	del_mc_from_global_list(mci);
601	mutex_unlock(&mem_ctls_mutex);
602
603	/* flush workq processes */
604	edac_mc_workq_teardown(mci);
605
606	/* marking MCI offline */
607	mci->op_state = OP_OFFLINE;
608
609	/* remove from sysfs */
610	edac_remove_sysfs_mci_device(mci);
611
612	edac_printk(KERN_INFO, EDAC_MC,
613		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
614		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
615
616	return mci;
617}
618EXPORT_SYMBOL_GPL(edac_mc_del_mc);
619
620static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
621				u32 size)
622{
623	struct page *pg;
624	void *virt_addr;
625	unsigned long flags = 0;
626
627	debugf3("%s()\n", __func__);
628
629	/* ECC error page was not in our memory. Ignore it. */
630	if (!pfn_valid(page))
631		return;
632
633	/* Find the actual page structure then map it and fix */
634	pg = pfn_to_page(page);
635
636	if (PageHighMem(pg))
637		local_irq_save(flags);
638
639	virt_addr = kmap_atomic(pg);
640
641	/* Perform architecture specific atomic scrub operation */
642	atomic_scrub(virt_addr + offset, size);
643
644	/* Unmap and complete */
645	kunmap_atomic(virt_addr);
646
647	if (PageHighMem(pg))
648		local_irq_restore(flags);
649}
650
651/* FIXME - should return -1 */
652int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
653{
654	struct csrow_info *csrows = mci->csrows;
655	int row, i;
656
657	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
658	row = -1;
659
660	for (i = 0; i < mci->nr_csrows; i++) {
661		struct csrow_info *csrow = &csrows[i];
662
663		if (csrow->nr_pages == 0)
664			continue;
665
666		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
667			"mask(0x%lx)\n", mci->mc_idx, __func__,
668			csrow->first_page, page, csrow->last_page,
669			csrow->page_mask);
670
671		if ((page >= csrow->first_page) &&
672		    (page <= csrow->last_page) &&
673		    ((page & csrow->page_mask) ==
674		     (csrow->first_page & csrow->page_mask))) {
675			row = i;
676			break;
677		}
678	}
679
680	if (row == -1)
681		edac_mc_printk(mci, KERN_ERR,
682			"could not look up page error address %lx\n",
683			(unsigned long)page);
684
685	return row;
686}
687EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
688
689/* FIXME - setable log (warning/emerg) levels */
690/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
691void edac_mc_handle_ce(struct mem_ctl_info *mci,
692		unsigned long page_frame_number,
693		unsigned long offset_in_page, unsigned long syndrome,
694		int row, int channel, const char *msg)
695{
696	unsigned long remapped_page;
697	char *label = NULL;
698	u32 grain;
699
700	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
701
702	/* FIXME - maybe make panic on INTERNAL ERROR an option */
703	if (row >= mci->nr_csrows || row < 0) {
704		/* something is wrong */
705		edac_mc_printk(mci, KERN_ERR,
706			"INTERNAL ERROR: row out of range "
707			"(%d >= %d)\n", row, mci->nr_csrows);
708		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
709		return;
710	}
711
712	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
713		/* something is wrong */
714		edac_mc_printk(mci, KERN_ERR,
715			"INTERNAL ERROR: channel out of range "
716			"(%d >= %d)\n", channel,
717			mci->csrows[row].nr_channels);
718		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
719		return;
720	}
721
722	label = mci->csrows[row].channels[channel].dimm->label;
723	grain = mci->csrows[row].channels[channel].dimm->grain;
724
725	if (edac_mc_get_log_ce())
726		/* FIXME - put in DIMM location */
727		edac_mc_printk(mci, KERN_WARNING,
728			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
729			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
730			page_frame_number, offset_in_page,
731			grain, syndrome, row, channel,
732			label, msg);
733
734	mci->ce_count++;
735	mci->csrows[row].ce_count++;
736	mci->csrows[row].channels[channel].dimm->ce_count++;
737	mci->csrows[row].channels[channel].ce_count++;
738
739	if (mci->scrub_mode & SCRUB_SW_SRC) {
740		/*
741		 * Some MC's can remap memory so that it is still available
742		 * at a different address when PCI devices map into memory.
743		 * MC's that can't do this lose the memory where PCI devices
744		 * are mapped.  This mapping is MC dependent and so we call
745		 * back into the MC driver for it to map the MC page to
746		 * a physical (CPU) page which can then be mapped to a virtual
747		 * page - which can then be scrubbed.
748		 */
749		remapped_page = mci->ctl_page_to_phys ?
750			mci->ctl_page_to_phys(mci, page_frame_number) :
751			page_frame_number;
752
753		edac_mc_scrub_block(remapped_page, offset_in_page, grain);
754	}
755}
756EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
757
758void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
759{
760	if (edac_mc_get_log_ce())
761		edac_mc_printk(mci, KERN_WARNING,
762			"CE - no information available: %s\n", msg);
763
764	mci->ce_noinfo_count++;
765	mci->ce_count++;
766}
767EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
768
769void edac_mc_handle_ue(struct mem_ctl_info *mci,
770		unsigned long page_frame_number,
771		unsigned long offset_in_page, int row, const char *msg)
772{
773	int len = EDAC_MC_LABEL_LEN * 4;
774	char labels[len + 1];
775	char *pos = labels;
776	int chan;
777	int chars;
778	char *label = NULL;
779	u32 grain;
780
781	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
782
783	/* FIXME - maybe make panic on INTERNAL ERROR an option */
784	if (row >= mci->nr_csrows || row < 0) {
785		/* something is wrong */
786		edac_mc_printk(mci, KERN_ERR,
787			"INTERNAL ERROR: row out of range "
788			"(%d >= %d)\n", row, mci->nr_csrows);
789		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
790		return;
791	}
792
793	grain = mci->csrows[row].channels[0].dimm->grain;
794	label = mci->csrows[row].channels[0].dimm->label;
795	chars = snprintf(pos, len + 1, "%s", label);
796	len -= chars;
797	pos += chars;
798
799	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
800		chan++) {
801		label = mci->csrows[row].channels[chan].dimm->label;
802		chars = snprintf(pos, len + 1, ":%s", label);
803		len -= chars;
804		pos += chars;
805	}
806
807	if (edac_mc_get_log_ue())
808		edac_mc_printk(mci, KERN_EMERG,
809			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
810			"labels \"%s\": %s\n", page_frame_number,
811			offset_in_page, grain, row, labels, msg);
812
813	if (edac_mc_get_panic_on_ue())
814		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
815			"row %d, labels \"%s\": %s\n", mci->mc_idx,
816			page_frame_number, offset_in_page,
817			grain, row, labels, msg);
818
819	mci->ue_count++;
820	mci->csrows[row].ue_count++;
821}
822EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
823
824void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
825{
826	if (edac_mc_get_panic_on_ue())
827		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
828
829	if (edac_mc_get_log_ue())
830		edac_mc_printk(mci, KERN_WARNING,
831			"UE - no information available: %s\n", msg);
832	mci->ue_noinfo_count++;
833	mci->ue_count++;
834}
835EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
836
837/*************************************************************
838 * On Fully Buffered DIMM modules, this help function is
839 * called to process UE events
840 */
841void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
842			unsigned int csrow,
843			unsigned int channela,
844			unsigned int channelb, char *msg)
845{
846	int len = EDAC_MC_LABEL_LEN * 4;
847	char labels[len + 1];
848	char *pos = labels;
849	int chars;
850	char *label;
851
852	if (csrow >= mci->nr_csrows) {
853		/* something is wrong */
854		edac_mc_printk(mci, KERN_ERR,
855			"INTERNAL ERROR: row out of range (%d >= %d)\n",
856			csrow, mci->nr_csrows);
857		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
858		return;
859	}
860
861	if (channela >= mci->csrows[csrow].nr_channels) {
862		/* something is wrong */
863		edac_mc_printk(mci, KERN_ERR,
864			"INTERNAL ERROR: channel-a out of range "
865			"(%d >= %d)\n",
866			channela, mci->csrows[csrow].nr_channels);
867		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
868		return;
869	}
870
871	if (channelb >= mci->csrows[csrow].nr_channels) {
872		/* something is wrong */
873		edac_mc_printk(mci, KERN_ERR,
874			"INTERNAL ERROR: channel-b out of range "
875			"(%d >= %d)\n",
876			channelb, mci->csrows[csrow].nr_channels);
877		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
878		return;
879	}
880
881	mci->ue_count++;
882	mci->csrows[csrow].ue_count++;
883
884	/* Generate the DIMM labels from the specified channels */
885	label = mci->csrows[csrow].channels[channela].dimm->label;
886	chars = snprintf(pos, len + 1, "%s", label);
887	len -= chars;
888	pos += chars;
889
890	chars = snprintf(pos, len + 1, "-%s",
891			mci->csrows[csrow].channels[channelb].dimm->label);
892
893	if (edac_mc_get_log_ue())
894		edac_mc_printk(mci, KERN_EMERG,
895			"UE row %d, channel-a= %d channel-b= %d "
896			"labels \"%s\": %s\n", csrow, channela, channelb,
897			labels, msg);
898
899	if (edac_mc_get_panic_on_ue())
900		panic("UE row %d, channel-a= %d channel-b= %d "
901			"labels \"%s\": %s\n", csrow, channela,
902			channelb, labels, msg);
903}
904EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
905
906/*************************************************************
907 * On Fully Buffered DIMM modules, this help function is
908 * called to process CE events
909 */
910void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
911			unsigned int csrow, unsigned int channel, char *msg)
912{
913	char *label = NULL;
914
915	/* Ensure boundary values */
916	if (csrow >= mci->nr_csrows) {
917		/* something is wrong */
918		edac_mc_printk(mci, KERN_ERR,
919			"INTERNAL ERROR: row out of range (%d >= %d)\n",
920			csrow, mci->nr_csrows);
921		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
922		return;
923	}
924	if (channel >= mci->csrows[csrow].nr_channels) {
925		/* something is wrong */
926		edac_mc_printk(mci, KERN_ERR,
927			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
928			channel, mci->csrows[csrow].nr_channels);
929		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
930		return;
931	}
932
933	label = mci->csrows[csrow].channels[channel].dimm->label;
934
935	if (edac_mc_get_log_ce())
936		/* FIXME - put in DIMM location */
937		edac_mc_printk(mci, KERN_WARNING,
938			"CE row %d, channel %d, label \"%s\": %s\n",
939			csrow, channel, label, msg);
940
941	mci->ce_count++;
942	mci->csrows[csrow].ce_count++;
943	mci->csrows[csrow].channels[channel].dimm->ce_count++;
944	mci->csrows[csrow].channels[channel].ce_count++;
945}
946EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
947