edac_mc.c revision 8096cfafbb7ad3cb1a286ae7e8086167f4ebb4b6
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/sysdev.h>
29#include <linux/ctype.h>
30#include <linux/edac.h>
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h"
35#include "edac_module.h"
36
37/* lock to memory controller's control array */
38static DEFINE_MUTEX(mem_ctls_mutex);
39static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
40
41#ifdef CONFIG_EDAC_DEBUG
42
43static void edac_mc_dump_channel(struct channel_info *chan)
44{
45	debugf4("\tchannel = %p\n", chan);
46	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
47	debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
48	debugf4("\tchannel->label = '%s'\n", chan->label);
49	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
50}
51
52static void edac_mc_dump_csrow(struct csrow_info *csrow)
53{
54	debugf4("\tcsrow = %p\n", csrow);
55	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
56	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
57	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
58	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
59	debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
60	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
61	debugf4("\tcsrow->channels = %p\n", csrow->channels);
62	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
63}
64
65static void edac_mc_dump_mci(struct mem_ctl_info *mci)
66{
67	debugf3("\tmci = %p\n", mci);
68	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
69	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
70	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
71	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
72	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
73		mci->nr_csrows, mci->csrows);
74	debugf3("\tdev = %p\n", mci->dev);
75	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
76	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
77}
78
79#endif				/* CONFIG_EDAC_DEBUG */
80
81/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
82 * Adjust 'ptr' so that its alignment is at least as stringent as what the
83 * compiler would provide for X and return the aligned result.
84 *
85 * If 'size' is a constant, the compiler will optimize this whole function
86 * down to either a no-op or the addition of a constant to the value of 'ptr'.
87 */
88void *edac_align_ptr(void *ptr, unsigned size)
89{
90	unsigned align, r;
91
92	/* Here we assume that the alignment of a "long long" is the most
93	 * stringent alignment that the compiler will ever provide by default.
94	 * As far as I know, this is a reasonable assumption.
95	 */
96	if (size > sizeof(long))
97		align = sizeof(long long);
98	else if (size > sizeof(int))
99		align = sizeof(long);
100	else if (size > sizeof(short))
101		align = sizeof(int);
102	else if (size > sizeof(char))
103		align = sizeof(short);
104	else
105		return (char *)ptr;
106
107	r = size % align;
108
109	if (r == 0)
110		return (char *)ptr;
111
112	return (void *)(((unsigned long)ptr) + align - r);
113}
114
115/**
116 * edac_mc_alloc: Allocate a struct mem_ctl_info structure
117 * @size_pvt:	size of private storage needed
118 * @nr_csrows:	Number of CWROWS needed for this MC
119 * @nr_chans:	Number of channels for the MC
120 *
121 * Everything is kmalloc'ed as one big chunk - more efficient.
122 * Only can be used if all structures have the same lifetime - otherwise
123 * you have to allocate and initialize your own structures.
124 *
125 * Use edac_mc_free() to free mc structures allocated by this function.
126 *
127 * Returns:
128 *	NULL allocation failed
129 *	struct mem_ctl_info pointer
130 */
131struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
132				unsigned nr_chans, int edac_index)
133{
134	struct mem_ctl_info *mci;
135	struct csrow_info *csi, *csrow;
136	struct channel_info *chi, *chp, *chan;
137	void *pvt;
138	unsigned size;
139	int row, chn;
140	int err;
141
142	/* Figure out the offsets of the various items from the start of an mc
143	 * structure.  We want the alignment of each item to be at least as
144	 * stringent as what the compiler would provide if we could simply
145	 * hardcode everything into a single struct.
146	 */
147	mci = (struct mem_ctl_info *)0;
148	csi = edac_align_ptr(&mci[1], sizeof(*csi));
149	chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
150	pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
151	size = ((unsigned long)pvt) + sz_pvt;
152
153	mci = kzalloc(size, GFP_KERNEL);
154	if (mci == NULL)
155		return NULL;
156
157	/* Adjust pointers so they point within the memory we just allocated
158	 * rather than an imaginary chunk of memory located at address 0.
159	 */
160	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
161	chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
162	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
163
164	/* setup index and various internal pointers */
165	mci->mc_idx = edac_index;
166	mci->csrows = csi;
167	mci->pvt_info = pvt;
168	mci->nr_csrows = nr_csrows;
169
170	for (row = 0; row < nr_csrows; row++) {
171		csrow = &csi[row];
172		csrow->csrow_idx = row;
173		csrow->mci = mci;
174		csrow->nr_channels = nr_chans;
175		chp = &chi[row * nr_chans];
176		csrow->channels = chp;
177
178		for (chn = 0; chn < nr_chans; chn++) {
179			chan = &chp[chn];
180			chan->chan_idx = chn;
181			chan->csrow = csrow;
182		}
183	}
184
185	mci->op_state = OP_ALLOC;
186
187	/*
188	 * Initialize the 'root' kobj for the edac_mc controller
189	 */
190	err = edac_mc_register_sysfs_main_kobj(mci);
191	if (err) {
192		kfree(mci);
193		return NULL;
194	}
195
196	/* at this point, the root kobj is valid, and in order to
197	 * 'free' the object, then the function:
198	 *      edac_mc_unregister_sysfs_main_kobj() must be called
199	 * which will perform kobj unregistration and the actual free
200	 * will occur during the kobject callback operation
201	 */
202	return mci;
203}
204EXPORT_SYMBOL_GPL(edac_mc_alloc);
205
206/**
207 * edac_mc_free
208 *	'Free' a previously allocated 'mci' structure
209 * @mci: pointer to a struct mem_ctl_info structure
210 */
211void edac_mc_free(struct mem_ctl_info *mci)
212{
213	edac_mc_unregister_sysfs_main_kobj(mci);
214}
215EXPORT_SYMBOL_GPL(edac_mc_free);
216
217static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
218{
219	struct mem_ctl_info *mci;
220	struct list_head *item;
221
222	debugf3("%s()\n", __func__);
223
224	list_for_each(item, &mc_devices) {
225		mci = list_entry(item, struct mem_ctl_info, link);
226
227		if (mci->dev == dev)
228			return mci;
229	}
230
231	return NULL;
232}
233
234/*
235 * handler for EDAC to check if NMI type handler has asserted interrupt
236 */
237static int edac_mc_assert_error_check_and_clear(void)
238{
239	int old_state;
240
241	if (edac_op_state == EDAC_OPSTATE_POLL)
242		return 1;
243
244	old_state = edac_err_assert;
245	edac_err_assert = 0;
246
247	return old_state;
248}
249
250/*
251 * edac_mc_workq_function
252 *	performs the operation scheduled by a workq request
253 */
254static void edac_mc_workq_function(struct work_struct *work_req)
255{
256	struct delayed_work *d_work = (struct delayed_work *)work_req;
257	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
258
259	mutex_lock(&mem_ctls_mutex);
260
261	/* Only poll controllers that are running polled and have a check */
262	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
263		mci->edac_check(mci);
264
265	/*
266	 * FIXME: temp place holder for PCI checks,
267	 * goes away when we break out PCI
268	 */
269	edac_pci_do_parity_check();
270
271	mutex_unlock(&mem_ctls_mutex);
272
273	/* Reschedule */
274	queue_delayed_work(edac_workqueue, &mci->work,
275			msecs_to_jiffies(edac_mc_get_poll_msec()));
276}
277
278/*
279 * edac_mc_workq_setup
280 *	initialize a workq item for this mci
281 *	passing in the new delay period in msec
282 */
283void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
284{
285	debugf0("%s()\n", __func__);
286
287	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
288	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
289}
290
291/*
292 * edac_mc_workq_teardown
293 *	stop the workq processing on this mci
294 */
295void edac_mc_workq_teardown(struct mem_ctl_info *mci)
296{
297	int status;
298
299	status = cancel_delayed_work(&mci->work);
300	if (status == 0) {
301		/* workq instance might be running, wait for it */
302		flush_workqueue(edac_workqueue);
303	}
304}
305
306/*
307 * edac_reset_delay_period
308 */
309
310void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value)
311{
312	mutex_lock(&mem_ctls_mutex);
313
314	/* cancel the current workq request */
315	edac_mc_workq_teardown(mci);
316
317	/* restart the workq request, with new delay value */
318	edac_mc_workq_setup(mci, value);
319
320	mutex_unlock(&mem_ctls_mutex);
321}
322
323/* Return 0 on success, 1 on failure.
324 * Before calling this function, caller must
325 * assign a unique value to mci->mc_idx.
326 */
327static int add_mc_to_global_list(struct mem_ctl_info *mci)
328{
329	struct list_head *item, *insert_before;
330	struct mem_ctl_info *p;
331
332	insert_before = &mc_devices;
333
334	if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL))
335		goto fail0;
336
337	list_for_each(item, &mc_devices) {
338		p = list_entry(item, struct mem_ctl_info, link);
339
340		if (p->mc_idx >= mci->mc_idx) {
341			if (unlikely(p->mc_idx == mci->mc_idx))
342				goto fail1;
343
344			insert_before = item;
345			break;
346		}
347	}
348
349	list_add_tail_rcu(&mci->link, insert_before);
350	atomic_inc(&edac_handlers);
351	return 0;
352
353fail0:
354	edac_printk(KERN_WARNING, EDAC_MC,
355		"%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
356		dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
357	return 1;
358
359fail1:
360	edac_printk(KERN_WARNING, EDAC_MC,
361		"bug in low-level driver: attempt to assign\n"
362		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
363	return 1;
364}
365
366static void complete_mc_list_del(struct rcu_head *head)
367{
368	struct mem_ctl_info *mci;
369
370	mci = container_of(head, struct mem_ctl_info, rcu);
371	INIT_LIST_HEAD(&mci->link);
372	complete(&mci->complete);
373}
374
375static void del_mc_from_global_list(struct mem_ctl_info *mci)
376{
377	atomic_dec(&edac_handlers);
378	list_del_rcu(&mci->link);
379	init_completion(&mci->complete);
380	call_rcu(&mci->rcu, complete_mc_list_del);
381	wait_for_completion(&mci->complete);
382}
383
384/**
385 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
386 *
387 * If found, return a pointer to the structure.
388 * Else return NULL.
389 *
390 * Caller must hold mem_ctls_mutex.
391 */
392struct mem_ctl_info *edac_mc_find(int idx)
393{
394	struct list_head *item;
395	struct mem_ctl_info *mci;
396
397	list_for_each(item, &mc_devices) {
398		mci = list_entry(item, struct mem_ctl_info, link);
399
400		if (mci->mc_idx >= idx) {
401			if (mci->mc_idx == idx)
402				return mci;
403
404			break;
405		}
406	}
407
408	return NULL;
409}
410EXPORT_SYMBOL(edac_mc_find);
411
412/**
413 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
414 *                 create sysfs entries associated with mci structure
415 * @mci: pointer to the mci structure to be added to the list
416 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
417 *
418 * Return:
419 *	0	Success
420 *	!0	Failure
421 */
422
423/* FIXME - should a warning be printed if no error detection? correction? */
424int edac_mc_add_mc(struct mem_ctl_info *mci)
425{
426	debugf0("%s()\n", __func__);
427
428#ifdef CONFIG_EDAC_DEBUG
429	if (edac_debug_level >= 3)
430		edac_mc_dump_mci(mci);
431
432	if (edac_debug_level >= 4) {
433		int i;
434
435		for (i = 0; i < mci->nr_csrows; i++) {
436			int j;
437
438			edac_mc_dump_csrow(&mci->csrows[i]);
439			for (j = 0; j < mci->csrows[i].nr_channels; j++)
440				edac_mc_dump_channel(&mci->csrows[i].
441						channels[j]);
442		}
443	}
444#endif
445	mutex_lock(&mem_ctls_mutex);
446
447	if (add_mc_to_global_list(mci))
448		goto fail0;
449
450	/* set load time so that error rate can be tracked */
451	mci->start_time = jiffies;
452
453	if (edac_create_sysfs_mci_device(mci)) {
454		edac_mc_printk(mci, KERN_WARNING,
455			"failed to create sysfs device\n");
456		goto fail1;
457	}
458
459	/* If there IS a check routine, then we are running POLLED */
460	if (mci->edac_check != NULL) {
461		/* This instance is NOW RUNNING */
462		mci->op_state = OP_RUNNING_POLL;
463
464		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
465	} else {
466		mci->op_state = OP_RUNNING_INTERRUPT;
467	}
468
469	/* Report action taken */
470	edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n",
471		mci->mod_name, mci->ctl_name, dev_name(mci));
472
473	mutex_unlock(&mem_ctls_mutex);
474	return 0;
475
476fail1:
477	del_mc_from_global_list(mci);
478
479fail0:
480	mutex_unlock(&mem_ctls_mutex);
481	return 1;
482}
483EXPORT_SYMBOL_GPL(edac_mc_add_mc);
484
485/**
486 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
487 *                 remove mci structure from global list
488 * @pdev: Pointer to 'struct device' representing mci structure to remove.
489 *
490 * Return pointer to removed mci structure, or NULL if device not found.
491 */
492struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
493{
494	struct mem_ctl_info *mci;
495
496	debugf0("MC: %s()\n", __func__);
497	mutex_lock(&mem_ctls_mutex);
498
499	if ((mci = find_mci_by_dev(dev)) == NULL) {
500		mutex_unlock(&mem_ctls_mutex);
501		return NULL;
502	}
503
504	/* marking MCI offline */
505	mci->op_state = OP_OFFLINE;
506
507	/* flush workq processes */
508	edac_mc_workq_teardown(mci);
509
510	edac_remove_sysfs_mci_device(mci);
511	del_mc_from_global_list(mci);
512	mutex_unlock(&mem_ctls_mutex);
513	edac_printk(KERN_INFO, EDAC_MC,
514		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
515		mci->mod_name, mci->ctl_name, dev_name(mci));
516	return mci;
517}
518EXPORT_SYMBOL_GPL(edac_mc_del_mc);
519
520static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
521				u32 size)
522{
523	struct page *pg;
524	void *virt_addr;
525	unsigned long flags = 0;
526
527	debugf3("%s()\n", __func__);
528
529	/* ECC error page was not in our memory. Ignore it. */
530	if (!pfn_valid(page))
531		return;
532
533	/* Find the actual page structure then map it and fix */
534	pg = pfn_to_page(page);
535
536	if (PageHighMem(pg))
537		local_irq_save(flags);
538
539	virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
540
541	/* Perform architecture specific atomic scrub operation */
542	atomic_scrub(virt_addr + offset, size);
543
544	/* Unmap and complete */
545	kunmap_atomic(virt_addr, KM_BOUNCE_READ);
546
547	if (PageHighMem(pg))
548		local_irq_restore(flags);
549}
550
551/* FIXME - should return -1 */
552int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
553{
554	struct csrow_info *csrows = mci->csrows;
555	int row, i;
556
557	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
558	row = -1;
559
560	for (i = 0; i < mci->nr_csrows; i++) {
561		struct csrow_info *csrow = &csrows[i];
562
563		if (csrow->nr_pages == 0)
564			continue;
565
566		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
567			"mask(0x%lx)\n", mci->mc_idx, __func__,
568			csrow->first_page, page, csrow->last_page,
569			csrow->page_mask);
570
571		if ((page >= csrow->first_page) &&
572		    (page <= csrow->last_page) &&
573		    ((page & csrow->page_mask) ==
574		     (csrow->first_page & csrow->page_mask))) {
575			row = i;
576			break;
577		}
578	}
579
580	if (row == -1)
581		edac_mc_printk(mci, KERN_ERR,
582			"could not look up page error address %lx\n",
583			(unsigned long)page);
584
585	return row;
586}
587EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
588
589/* FIXME - setable log (warning/emerg) levels */
590/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
591void edac_mc_handle_ce(struct mem_ctl_info *mci,
592		unsigned long page_frame_number,
593		unsigned long offset_in_page, unsigned long syndrome,
594		int row, int channel, const char *msg)
595{
596	unsigned long remapped_page;
597
598	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
599
600	/* FIXME - maybe make panic on INTERNAL ERROR an option */
601	if (row >= mci->nr_csrows || row < 0) {
602		/* something is wrong */
603		edac_mc_printk(mci, KERN_ERR,
604			"INTERNAL ERROR: row out of range "
605			"(%d >= %d)\n", row, mci->nr_csrows);
606		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
607		return;
608	}
609
610	if (channel >= mci->csrows[row].nr_channels || channel < 0) {
611		/* something is wrong */
612		edac_mc_printk(mci, KERN_ERR,
613			"INTERNAL ERROR: channel out of range "
614			"(%d >= %d)\n", channel,
615			mci->csrows[row].nr_channels);
616		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
617		return;
618	}
619
620	if (edac_mc_get_log_ce())
621		/* FIXME - put in DIMM location */
622		edac_mc_printk(mci, KERN_WARNING,
623			"CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
624			"0x%lx, row %d, channel %d, label \"%s\": %s\n",
625			page_frame_number, offset_in_page,
626			mci->csrows[row].grain, syndrome, row, channel,
627			mci->csrows[row].channels[channel].label, msg);
628
629	mci->ce_count++;
630	mci->csrows[row].ce_count++;
631	mci->csrows[row].channels[channel].ce_count++;
632
633	if (mci->scrub_mode & SCRUB_SW_SRC) {
634		/*
635		 * Some MC's can remap memory so that it is still available
636		 * at a different address when PCI devices map into memory.
637		 * MC's that can't do this lose the memory where PCI devices
638		 * are mapped.  This mapping is MC dependant and so we call
639		 * back into the MC driver for it to map the MC page to
640		 * a physical (CPU) page which can then be mapped to a virtual
641		 * page - which can then be scrubbed.
642		 */
643		remapped_page = mci->ctl_page_to_phys ?
644			mci->ctl_page_to_phys(mci, page_frame_number) :
645			page_frame_number;
646
647		edac_mc_scrub_block(remapped_page, offset_in_page,
648				mci->csrows[row].grain);
649	}
650}
651EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
652
653void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
654{
655	if (edac_mc_get_log_ce())
656		edac_mc_printk(mci, KERN_WARNING,
657			"CE - no information available: %s\n", msg);
658
659	mci->ce_noinfo_count++;
660	mci->ce_count++;
661}
662EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
663
664void edac_mc_handle_ue(struct mem_ctl_info *mci,
665		unsigned long page_frame_number,
666		unsigned long offset_in_page, int row, const char *msg)
667{
668	int len = EDAC_MC_LABEL_LEN * 4;
669	char labels[len + 1];
670	char *pos = labels;
671	int chan;
672	int chars;
673
674	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
675
676	/* FIXME - maybe make panic on INTERNAL ERROR an option */
677	if (row >= mci->nr_csrows || row < 0) {
678		/* something is wrong */
679		edac_mc_printk(mci, KERN_ERR,
680			"INTERNAL ERROR: row out of range "
681			"(%d >= %d)\n", row, mci->nr_csrows);
682		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
683		return;
684	}
685
686	chars = snprintf(pos, len + 1, "%s",
687			 mci->csrows[row].channels[0].label);
688	len -= chars;
689	pos += chars;
690
691	for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
692		chan++) {
693		chars = snprintf(pos, len + 1, ":%s",
694				 mci->csrows[row].channels[chan].label);
695		len -= chars;
696		pos += chars;
697	}
698
699	if (edac_mc_get_log_ue())
700		edac_mc_printk(mci, KERN_EMERG,
701			"UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
702			"labels \"%s\": %s\n", page_frame_number,
703			offset_in_page, mci->csrows[row].grain, row,
704			labels, msg);
705
706	if (edac_mc_get_panic_on_ue())
707		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
708			"row %d, labels \"%s\": %s\n", mci->mc_idx,
709			page_frame_number, offset_in_page,
710			mci->csrows[row].grain, row, labels, msg);
711
712	mci->ue_count++;
713	mci->csrows[row].ue_count++;
714}
715EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
716
717void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
718{
719	if (edac_mc_get_panic_on_ue())
720		panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
721
722	if (edac_mc_get_log_ue())
723		edac_mc_printk(mci, KERN_WARNING,
724			"UE - no information available: %s\n", msg);
725	mci->ue_noinfo_count++;
726	mci->ue_count++;
727}
728EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
729
730/*************************************************************
731 * On Fully Buffered DIMM modules, this help function is
732 * called to process UE events
733 */
734void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
735			unsigned int csrow,
736			unsigned int channela,
737			unsigned int channelb, char *msg)
738{
739	int len = EDAC_MC_LABEL_LEN * 4;
740	char labels[len + 1];
741	char *pos = labels;
742	int chars;
743
744	if (csrow >= mci->nr_csrows) {
745		/* something is wrong */
746		edac_mc_printk(mci, KERN_ERR,
747			"INTERNAL ERROR: row out of range (%d >= %d)\n",
748			csrow, mci->nr_csrows);
749		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
750		return;
751	}
752
753	if (channela >= mci->csrows[csrow].nr_channels) {
754		/* something is wrong */
755		edac_mc_printk(mci, KERN_ERR,
756			"INTERNAL ERROR: channel-a out of range "
757			"(%d >= %d)\n",
758			channela, mci->csrows[csrow].nr_channels);
759		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
760		return;
761	}
762
763	if (channelb >= mci->csrows[csrow].nr_channels) {
764		/* something is wrong */
765		edac_mc_printk(mci, KERN_ERR,
766			"INTERNAL ERROR: channel-b out of range "
767			"(%d >= %d)\n",
768			channelb, mci->csrows[csrow].nr_channels);
769		edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
770		return;
771	}
772
773	mci->ue_count++;
774	mci->csrows[csrow].ue_count++;
775
776	/* Generate the DIMM labels from the specified channels */
777	chars = snprintf(pos, len + 1, "%s",
778			 mci->csrows[csrow].channels[channela].label);
779	len -= chars;
780	pos += chars;
781	chars = snprintf(pos, len + 1, "-%s",
782			 mci->csrows[csrow].channels[channelb].label);
783
784	if (edac_mc_get_log_ue())
785		edac_mc_printk(mci, KERN_EMERG,
786			"UE row %d, channel-a= %d channel-b= %d "
787			"labels \"%s\": %s\n", csrow, channela, channelb,
788			labels, msg);
789
790	if (edac_mc_get_panic_on_ue())
791		panic("UE row %d, channel-a= %d channel-b= %d "
792			"labels \"%s\": %s\n", csrow, channela,
793			channelb, labels, msg);
794}
795EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
796
797/*************************************************************
798 * On Fully Buffered DIMM modules, this help function is
799 * called to process CE events
800 */
801void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
802			unsigned int csrow, unsigned int channel, char *msg)
803{
804
805	/* Ensure boundary values */
806	if (csrow >= mci->nr_csrows) {
807		/* something is wrong */
808		edac_mc_printk(mci, KERN_ERR,
809			"INTERNAL ERROR: row out of range (%d >= %d)\n",
810			csrow, mci->nr_csrows);
811		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
812		return;
813	}
814	if (channel >= mci->csrows[csrow].nr_channels) {
815		/* something is wrong */
816		edac_mc_printk(mci, KERN_ERR,
817			"INTERNAL ERROR: channel out of range (%d >= %d)\n",
818			channel, mci->csrows[csrow].nr_channels);
819		edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
820		return;
821	}
822
823	if (edac_mc_get_log_ce())
824		/* FIXME - put in DIMM location */
825		edac_mc_printk(mci, KERN_WARNING,
826			"CE row %d, channel %d, label \"%s\": %s\n",
827			csrow, channel,
828			mci->csrows[csrow].channels[channel].label, msg);
829
830	mci->ce_count++;
831	mci->csrows[csrow].ce_count++;
832	mci->csrows[csrow].channels[channel].ce_count++;
833}
834EXPORT_SYMBOL(edac_mc_handle_fbd_ce);
835
836/*
837 * Iterate over all MC instances and check for ECC, et al, errors
838 */
839void edac_check_mc_devices(void)
840{
841	struct list_head *item;
842	struct mem_ctl_info *mci;
843
844	debugf3("%s()\n", __func__);
845	mutex_lock(&mem_ctls_mutex);
846
847	list_for_each(item, &mc_devices) {
848		mci = list_entry(item, struct mem_ctl_info, link);
849
850		if (mci->edac_check != NULL)
851			mci->edac_check(mci);
852	}
853
854	mutex_unlock(&mem_ctls_mutex);
855}
856