edac_mc.c revision 4275be63559719c3149b19751029f1b0f1b26775
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
33#include "edac_core.h"
34#include "edac_module.h"
35
36/* lock to memory controller's control array */
37static DEFINE_MUTEX(mem_ctls_mutex);
38static LIST_HEAD(mc_devices);
39
40#ifdef CONFIG_EDAC_DEBUG
41
42static void edac_mc_dump_channel(struct rank_info *chan)
43{
44	debugf4("\tchannel = %p\n", chan);
45	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
46	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
47	debugf4("\tchannel->dimm = %p\n", chan->dimm);
48}
49
50static void edac_mc_dump_dimm(struct dimm_info *dimm)
51{
52	int i;
53
54	debugf4("\tdimm = %p\n", dimm);
55	debugf4("\tdimm->label = '%s'\n", dimm->label);
56	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
57	debugf4("\tdimm location ");
58	for (i = 0; i < dimm->mci->n_layers; i++) {
59		printk(KERN_CONT "%d", dimm->location[i]);
60		if (i < dimm->mci->n_layers - 1)
61			printk(KERN_CONT ".");
62	}
63	printk(KERN_CONT "\n");
64	debugf4("\tdimm->grain = %d\n", dimm->grain);
65	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
66}
67
68static void edac_mc_dump_csrow(struct csrow_info *csrow)
69{
70	debugf4("\tcsrow = %p\n", csrow);
71	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
72	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
73	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
74	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
75	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
76	debugf4("\tcsrow->channels = %p\n", csrow->channels);
77	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
78}
79
80static void edac_mc_dump_mci(struct mem_ctl_info *mci)
81{
82	debugf3("\tmci = %p\n", mci);
83	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
86	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
87	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
88		mci->nr_csrows, mci->csrows);
89	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
90		mci->tot_dimms, mci->dimms);
91	debugf3("\tdev = %p\n", mci->dev);
92	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
93	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
94}
95
96#endif				/* CONFIG_EDAC_DEBUG */
97
98/*
99 * keep those in sync with the enum mem_type
100 */
101const char *edac_mem_types[] = {
102	"Empty csrow",
103	"Reserved csrow type",
104	"Unknown csrow type",
105	"Fast page mode RAM",
106	"Extended data out RAM",
107	"Burst Extended data out RAM",
108	"Single data rate SDRAM",
109	"Registered single data rate SDRAM",
110	"Double data rate SDRAM",
111	"Registered Double data rate SDRAM",
112	"Rambus DRAM",
113	"Unbuffered DDR2 RAM",
114	"Fully buffered DDR2",
115	"Registered DDR2 RAM",
116	"Rambus XDR",
117	"Unbuffered DDR3 RAM",
118	"Registered DDR3 RAM",
119};
120EXPORT_SYMBOL_GPL(edac_mem_types);
121
122/**
123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
124 * @p:		pointer to a pointer with the memory offset to be used. At
125 *		return, this will be incremented to point to the next offset
126 * @size:	Size of the data structure to be reserved
127 * @n_elems:	Number of elements that should be reserved
128 *
129 * If 'size' is a constant, the compiler will optimize this whole function
130 * down to either a no-op or the addition of a constant to the value of '*p'.
131 *
132 * The 'p' pointer is absolutely needed to keep the proper advancing
133 * further in memory to the proper offsets when allocating the struct along
134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
135 * above, for example.
136 *
137 * At return, the pointer 'p' will be incremented to be used on a next call
138 * to this function.
139 */
140void *edac_align_ptr(void **p, unsigned size, int n_elems)
141{
142	unsigned align, r;
143	void *ptr = *p;
144
145	*p += size * n_elems;
146
147	/*
148	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
149	 * 'size'.  Adjust 'p' so that its alignment is at least as
150	 * stringent as what the compiler would provide for X and return
151	 * the aligned result.
152	 * Here we assume that the alignment of a "long long" is the most
153	 * stringent alignment that the compiler will ever provide by default.
154	 * As far as I know, this is a reasonable assumption.
155	 */
156	if (size > sizeof(long))
157		align = sizeof(long long);
158	else if (size > sizeof(int))
159		align = sizeof(long);
160	else if (size > sizeof(short))
161		align = sizeof(int);
162	else if (size > sizeof(char))
163		align = sizeof(short);
164	else
165		return (char *)ptr;
166
167	r = size % align;
168
169	if (r == 0)
170		return (char *)ptr;
171
172	*p += align - r;
173
174	return (void *)(((unsigned long)ptr) + align - r);
175}
176
177/**
178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
179 * @mc_num:		Memory controller number
180 * @n_layers:		Number of MC hierarchy layers
181 * layers:		Describes each layer as seen by the Memory Controller
182 * @size_pvt:		size of private storage needed
183 *
184 *
185 * Everything is kmalloc'ed as one big chunk - more efficient.
186 * Only can be used if all structures have the same lifetime - otherwise
187 * you have to allocate and initialize your own structures.
188 *
189 * Use edac_mc_free() to free mc structures allocated by this function.
190 *
191 * NOTE: drivers handle multi-rank memories in different ways: in some
192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
193 * others, a single multi-rank memory stick would be mapped into several
194 * entries. Currently, this function will allocate multiple struct dimm_info
195 * on such scenarios, as grouping the multiple ranks require drivers change.
196 *
197 * Returns:
198 *	NULL allocation failed
199 *	struct mem_ctl_info pointer
200 */
201struct mem_ctl_info *new_edac_mc_alloc(unsigned mc_num,
202				       unsigned n_layers,
203				       struct edac_mc_layer *layers,
204				       unsigned sz_pvt)
205{
206	struct mem_ctl_info *mci;
207	struct edac_mc_layer *layer;
208	struct csrow_info *csi, *csr;
209	struct rank_info *chi, *chp, *chan;
210	struct dimm_info *dimm;
211	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212	unsigned pos[EDAC_MAX_LAYERS];
213	void *pvt, *ptr = NULL;
214	unsigned size, tot_dimms = 1, count = 1;
215	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
216	int i, j, err, row, chn;
217	bool per_rank = false;
218
219	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220	/*
221	 * Calculate the total amount of dimms and csrows/cschannels while
222	 * in the old API emulation mode
223	 */
224	for (i = 0; i < n_layers; i++) {
225		tot_dimms *= layers[i].size;
226		if (layers[i].is_virt_csrow)
227			tot_csrows *= layers[i].size;
228		else
229			tot_channels *= layers[i].size;
230
231		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
232			per_rank = true;
233	}
234
235	/* Figure out the offsets of the various items from the start of an mc
236	 * structure.  We want the alignment of each item to be at least as
237	 * stringent as what the compiler would provide if we could simply
238	 * hardcode everything into a single struct.
239	 */
240	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
241	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242	csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243	chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244	dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245	for (i = 0; i < n_layers; i++) {
246		count *= layers[i].size;
247		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
248		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250		tot_errcount += 2 * count;
251	}
252
253	debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
254	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
255	size = ((unsigned long)pvt) + sz_pvt;
256
257	debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258		__func__, size,
259		tot_dimms,
260		per_rank ? "ranks" : "dimms",
261		tot_csrows * tot_channels);
262	mci = kzalloc(size, GFP_KERNEL);
263	if (mci == NULL)
264		return NULL;
265
266	/* Adjust pointers so they point within the memory we just allocated
267	 * rather than an imaginary chunk of memory located at address 0.
268	 */
269	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
270	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
271	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
272	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
273	for (i = 0; i < n_layers; i++) {
274		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276	}
277	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
278
279	/* setup index and various internal pointers */
280	mci->mc_idx = mc_num;
281	mci->csrows = csi;
282	mci->dimms  = dimm;
283	mci->tot_dimms = tot_dimms;
284	mci->pvt_info = pvt;
285	mci->n_layers = n_layers;
286	mci->layers = layer;
287	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
288	mci->nr_csrows = tot_csrows;
289	mci->num_cschannel = tot_channels;
290	mci->mem_is_per_rank = per_rank;
291
292	/*
293	 * Fill the csrow struct
294	 */
295	for (row = 0; row < tot_csrows; row++) {
296		csr = &csi[row];
297		csr->csrow_idx = row;
298		csr->mci = mci;
299		csr->nr_channels = tot_channels;
300		chp = &chi[row * tot_channels];
301		csr->channels = chp;
302
303		for (chn = 0; chn < tot_channels; chn++) {
304			chan = &chp[chn];
305			chan->chan_idx = chn;
306			chan->csrow = csr;
307		}
308	}
309
310	/*
311	 * Fill the dimm struct
312	 */
313	memset(&pos, 0, sizeof(pos));
314	row = 0;
315	chn = 0;
316	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317		per_rank ? "ranks" : "dimms");
318	for (i = 0; i < tot_dimms; i++) {
319		chan = &csi[row].channels[chn];
320		dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
321			       pos[0], pos[1], pos[2]);
322		dimm->mci = mci;
323
324		debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
325			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326			pos[0], pos[1], pos[2], row, chn);
327
328		/* Copy DIMM location */
329		for (j = 0; j < n_layers; j++)
330			dimm->location[j] = pos[j];
331
332		/* Link it to the csrows old API data */
333		chan->dimm = dimm;
334		dimm->csrow = row;
335		dimm->cschannel = chn;
336
337		/* Increment csrow location */
338		row++;
339		if (row == tot_csrows) {
340			row = 0;
341			chn++;
342		}
343
344		/* Increment dimm location */
345		for (j = n_layers - 1; j >= 0; j--) {
346			pos[j]++;
347			if (pos[j] < layers[j].size)
348				break;
349			pos[j] = 0;
350		}
351	}
352
353	mci->op_state = OP_ALLOC;
354	INIT_LIST_HEAD(&mci->grp_kobj_list);
355
356	/*
357	 * Initialize the 'root' kobj for the edac_mc controller
358	 */
359	err = edac_mc_register_sysfs_main_kobj(mci);
360	if (err) {
361		kfree(mci);
362		return NULL;
363	}
364
365	/* at this point, the root kobj is valid, and in order to
366	 * 'free' the object, then the function:
367	 *      edac_mc_unregister_sysfs_main_kobj() must be called
368	 * which will perform kobj unregistration and the actual free
369	 * will occur during the kobject callback operation
370	 */
371	return mci;
372}
373EXPORT_SYMBOL_GPL(new_edac_mc_alloc);
374
375/**
376 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
377 * @mc_num:		Memory controller number
378 * @n_layers:		Number of layers at the MC hierarchy
379 * layers:		Describes each layer as seen by the Memory Controller
380 * @size_pvt:		Size of private storage needed
381 *
382 *
383 * FIXME: drivers handle multi-rank memories in different ways: some
384 * drivers map multi-ranked DIMMs as one DIMM while others
385 * as several DIMMs.
386 *
387 * Everything is kmalloc'ed as one big chunk - more efficient.
388 * It can only be used if all structures have the same lifetime - otherwise
389 * you have to allocate and initialize your own structures.
390 *
391 * Use edac_mc_free() to free mc structures allocated by this function.
392 *
393 * Returns:
394 *	On failure: NULL
395 *	On success: struct mem_ctl_info pointer
396 */
397
398struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
399				   unsigned nr_chans, int mc_num)
400{
401	unsigned n_layers = 2;
402	struct edac_mc_layer layers[n_layers];
403
404	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
405	layers[0].size = nr_csrows;
406	layers[0].is_virt_csrow = true;
407	layers[1].type = EDAC_MC_LAYER_CHANNEL;
408	layers[1].size = nr_chans;
409	layers[1].is_virt_csrow = false;
410
411	return new_edac_mc_alloc(mc_num, ARRAY_SIZE(layers), layers, sz_pvt);
412}
413EXPORT_SYMBOL_GPL(edac_mc_alloc);
414
415/**
416 * edac_mc_free
417 *	'Free' a previously allocated 'mci' structure
418 * @mci: pointer to a struct mem_ctl_info structure
419 */
420void edac_mc_free(struct mem_ctl_info *mci)
421{
422	debugf1("%s()\n", __func__);
423
424	edac_mc_unregister_sysfs_main_kobj(mci);
425
426	/* free the mci instance memory here */
427	kfree(mci);
428}
429EXPORT_SYMBOL_GPL(edac_mc_free);
430
431
432/**
433 * find_mci_by_dev
434 *
435 *	scan list of controllers looking for the one that manages
436 *	the 'dev' device
437 * @dev: pointer to a struct device related with the MCI
438 */
439struct mem_ctl_info *find_mci_by_dev(struct device *dev)
440{
441	struct mem_ctl_info *mci;
442	struct list_head *item;
443
444	debugf3("%s()\n", __func__);
445
446	list_for_each(item, &mc_devices) {
447		mci = list_entry(item, struct mem_ctl_info, link);
448
449		if (mci->dev == dev)
450			return mci;
451	}
452
453	return NULL;
454}
455EXPORT_SYMBOL_GPL(find_mci_by_dev);
456
457/*
458 * handler for EDAC to check if NMI type handler has asserted interrupt
459 */
460static int edac_mc_assert_error_check_and_clear(void)
461{
462	int old_state;
463
464	if (edac_op_state == EDAC_OPSTATE_POLL)
465		return 1;
466
467	old_state = edac_err_assert;
468	edac_err_assert = 0;
469
470	return old_state;
471}
472
473/*
474 * edac_mc_workq_function
475 *	performs the operation scheduled by a workq request
476 */
477static void edac_mc_workq_function(struct work_struct *work_req)
478{
479	struct delayed_work *d_work = to_delayed_work(work_req);
480	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
481
482	mutex_lock(&mem_ctls_mutex);
483
484	/* if this control struct has movd to offline state, we are done */
485	if (mci->op_state == OP_OFFLINE) {
486		mutex_unlock(&mem_ctls_mutex);
487		return;
488	}
489
490	/* Only poll controllers that are running polled and have a check */
491	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
492		mci->edac_check(mci);
493
494	mutex_unlock(&mem_ctls_mutex);
495
496	/* Reschedule */
497	queue_delayed_work(edac_workqueue, &mci->work,
498			msecs_to_jiffies(edac_mc_get_poll_msec()));
499}
500
501/*
502 * edac_mc_workq_setup
503 *	initialize a workq item for this mci
504 *	passing in the new delay period in msec
505 *
506 *	locking model:
507 *
508 *		called with the mem_ctls_mutex held
509 */
510static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
511{
512	debugf0("%s()\n", __func__);
513
514	/* if this instance is not in the POLL state, then simply return */
515	if (mci->op_state != OP_RUNNING_POLL)
516		return;
517
518	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
519	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
520}
521
522/*
523 * edac_mc_workq_teardown
524 *	stop the workq processing on this mci
525 *
526 *	locking model:
527 *
528 *		called WITHOUT lock held
529 */
530static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
531{
532	int status;
533
534	if (mci->op_state != OP_RUNNING_POLL)
535		return;
536
537	status = cancel_delayed_work(&mci->work);
538	if (status == 0) {
539		debugf0("%s() not canceled, flush the queue\n",
540			__func__);
541
542		/* workq instance might be running, wait for it */
543		flush_workqueue(edac_workqueue);
544	}
545}
546
547/*
548 * edac_mc_reset_delay_period(unsigned long value)
549 *
550 *	user space has updated our poll period value, need to
551 *	reset our workq delays
552 */
553void edac_mc_reset_delay_period(int value)
554{
555	struct mem_ctl_info *mci;
556	struct list_head *item;
557
558	mutex_lock(&mem_ctls_mutex);
559
560	/* scan the list and turn off all workq timers, doing so under lock
561	 */
562	list_for_each(item, &mc_devices) {
563		mci = list_entry(item, struct mem_ctl_info, link);
564
565		if (mci->op_state == OP_RUNNING_POLL)
566			cancel_delayed_work(&mci->work);
567	}
568
569	mutex_unlock(&mem_ctls_mutex);
570
571
572	/* re-walk the list, and reset the poll delay */
573	mutex_lock(&mem_ctls_mutex);
574
575	list_for_each(item, &mc_devices) {
576		mci = list_entry(item, struct mem_ctl_info, link);
577
578		edac_mc_workq_setup(mci, (unsigned long) value);
579	}
580
581	mutex_unlock(&mem_ctls_mutex);
582}
583
584
585
586/* Return 0 on success, 1 on failure.
587 * Before calling this function, caller must
588 * assign a unique value to mci->mc_idx.
589 *
590 *	locking model:
591 *
592 *		called with the mem_ctls_mutex lock held
593 */
594static int add_mc_to_global_list(struct mem_ctl_info *mci)
595{
596	struct list_head *item, *insert_before;
597	struct mem_ctl_info *p;
598
599	insert_before = &mc_devices;
600
601	p = find_mci_by_dev(mci->dev);
602	if (unlikely(p != NULL))
603		goto fail0;
604
605	list_for_each(item, &mc_devices) {
606		p = list_entry(item, struct mem_ctl_info, link);
607
608		if (p->mc_idx >= mci->mc_idx) {
609			if (unlikely(p->mc_idx == mci->mc_idx))
610				goto fail1;
611
612			insert_before = item;
613			break;
614		}
615	}
616
617	list_add_tail_rcu(&mci->link, insert_before);
618	atomic_inc(&edac_handlers);
619	return 0;
620
621fail0:
622	edac_printk(KERN_WARNING, EDAC_MC,
623		"%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
624		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
625	return 1;
626
627fail1:
628	edac_printk(KERN_WARNING, EDAC_MC,
629		"bug in low-level driver: attempt to assign\n"
630		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
631	return 1;
632}
633
634static void del_mc_from_global_list(struct mem_ctl_info *mci)
635{
636	atomic_dec(&edac_handlers);
637	list_del_rcu(&mci->link);
638
639	/* these are for safe removal of devices from global list while
640	 * NMI handlers may be traversing list
641	 */
642	synchronize_rcu();
643	INIT_LIST_HEAD(&mci->link);
644}
645
646/**
647 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
648 *
649 * If found, return a pointer to the structure.
650 * Else return NULL.
651 *
652 * Caller must hold mem_ctls_mutex.
653 */
654struct mem_ctl_info *edac_mc_find(int idx)
655{
656	struct list_head *item;
657	struct mem_ctl_info *mci;
658
659	list_for_each(item, &mc_devices) {
660		mci = list_entry(item, struct mem_ctl_info, link);
661
662		if (mci->mc_idx >= idx) {
663			if (mci->mc_idx == idx)
664				return mci;
665
666			break;
667		}
668	}
669
670	return NULL;
671}
672EXPORT_SYMBOL(edac_mc_find);
673
674/**
675 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
676 *                 create sysfs entries associated with mci structure
677 * @mci: pointer to the mci structure to be added to the list
678 *
679 * Return:
680 *	0	Success
681 *	!0	Failure
682 */
683
684/* FIXME - should a warning be printed if no error detection? correction? */
685int edac_mc_add_mc(struct mem_ctl_info *mci)
686{
687	debugf0("%s()\n", __func__);
688
689#ifdef CONFIG_EDAC_DEBUG
690	if (edac_debug_level >= 3)
691		edac_mc_dump_mci(mci);
692
693	if (edac_debug_level >= 4) {
694		int i;
695
696		for (i = 0; i < mci->nr_csrows; i++) {
697			int j;
698
699			edac_mc_dump_csrow(&mci->csrows[i]);
700			for (j = 0; j < mci->csrows[i].nr_channels; j++)
701				edac_mc_dump_channel(&mci->csrows[i].
702						channels[j]);
703		}
704		for (i = 0; i < mci->tot_dimms; i++)
705			edac_mc_dump_dimm(&mci->dimms[i]);
706	}
707#endif
708	mutex_lock(&mem_ctls_mutex);
709
710	if (add_mc_to_global_list(mci))
711		goto fail0;
712
713	/* set load time so that error rate can be tracked */
714	mci->start_time = jiffies;
715
716	if (edac_create_sysfs_mci_device(mci)) {
717		edac_mc_printk(mci, KERN_WARNING,
718			"failed to create sysfs device\n");
719		goto fail1;
720	}
721
722	/* If there IS a check routine, then we are running POLLED */
723	if (mci->edac_check != NULL) {
724		/* This instance is NOW RUNNING */
725		mci->op_state = OP_RUNNING_POLL;
726
727		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
728	} else {
729		mci->op_state = OP_RUNNING_INTERRUPT;
730	}
731
732	/* Report action taken */
733	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
734		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
735
736	mutex_unlock(&mem_ctls_mutex);
737	return 0;
738
739fail1:
740	del_mc_from_global_list(mci);
741
742fail0:
743	mutex_unlock(&mem_ctls_mutex);
744	return 1;
745}
746EXPORT_SYMBOL_GPL(edac_mc_add_mc);
747
748/**
749 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
750 *                 remove mci structure from global list
751 * @pdev: Pointer to 'struct device' representing mci structure to remove.
752 *
753 * Return pointer to removed mci structure, or NULL if device not found.
754 */
755struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
756{
757	struct mem_ctl_info *mci;
758
759	debugf0("%s()\n", __func__);
760
761	mutex_lock(&mem_ctls_mutex);
762
763	/* find the requested mci struct in the global list */
764	mci = find_mci_by_dev(dev);
765	if (mci == NULL) {
766		mutex_unlock(&mem_ctls_mutex);
767		return NULL;
768	}
769
770	del_mc_from_global_list(mci);
771	mutex_unlock(&mem_ctls_mutex);
772
773	/* flush workq processes */
774	edac_mc_workq_teardown(mci);
775
776	/* marking MCI offline */
777	mci->op_state = OP_OFFLINE;
778
779	/* remove from sysfs */
780	edac_remove_sysfs_mci_device(mci);
781
782	edac_printk(KERN_INFO, EDAC_MC,
783		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
784		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
785
786	return mci;
787}
788EXPORT_SYMBOL_GPL(edac_mc_del_mc);
789
790static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
791				u32 size)
792{
793	struct page *pg;
794	void *virt_addr;
795	unsigned long flags = 0;
796
797	debugf3("%s()\n", __func__);
798
799	/* ECC error page was not in our memory. Ignore it. */
800	if (!pfn_valid(page))
801		return;
802
803	/* Find the actual page structure then map it and fix */
804	pg = pfn_to_page(page);
805
806	if (PageHighMem(pg))
807		local_irq_save(flags);
808
809	virt_addr = kmap_atomic(pg);
810
811	/* Perform architecture specific atomic scrub operation */
812	atomic_scrub(virt_addr + offset, size);
813
814	/* Unmap and complete */
815	kunmap_atomic(virt_addr);
816
817	if (PageHighMem(pg))
818		local_irq_restore(flags);
819}
820
821/* FIXME - should return -1 */
822int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
823{
824	struct csrow_info *csrows = mci->csrows;
825	int row, i, j, n;
826
827	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
828	row = -1;
829
830	for (i = 0; i < mci->nr_csrows; i++) {
831		struct csrow_info *csrow = &csrows[i];
832		n = 0;
833		for (j = 0; j < csrow->nr_channels; j++) {
834			struct dimm_info *dimm = csrow->channels[j].dimm;
835			n += dimm->nr_pages;
836		}
837		if (n == 0)
838			continue;
839
840		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
841			"mask(0x%lx)\n", mci->mc_idx, __func__,
842			csrow->first_page, page, csrow->last_page,
843			csrow->page_mask);
844
845		if ((page >= csrow->first_page) &&
846		    (page <= csrow->last_page) &&
847		    ((page & csrow->page_mask) ==
848		     (csrow->first_page & csrow->page_mask))) {
849			row = i;
850			break;
851		}
852	}
853
854	if (row == -1)
855		edac_mc_printk(mci, KERN_ERR,
856			"could not look up page error address %lx\n",
857			(unsigned long)page);
858
859	return row;
860}
861EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
862
863const char *edac_layer_name[] = {
864	[EDAC_MC_LAYER_BRANCH] = "branch",
865	[EDAC_MC_LAYER_CHANNEL] = "channel",
866	[EDAC_MC_LAYER_SLOT] = "slot",
867	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
868};
869EXPORT_SYMBOL_GPL(edac_layer_name);
870
871static void edac_inc_ce_error(struct mem_ctl_info *mci,
872				    bool enable_per_layer_report,
873				    const int pos[EDAC_MAX_LAYERS])
874{
875	int i, index = 0;
876
877	mci->ce_count++;
878
879	if (!enable_per_layer_report) {
880		mci->ce_noinfo_count++;
881		return;
882	}
883
884	for (i = 0; i < mci->n_layers; i++) {
885		if (pos[i] < 0)
886			break;
887		index += pos[i];
888		mci->ce_per_layer[i][index]++;
889
890		if (i < mci->n_layers - 1)
891			index *= mci->layers[i + 1].size;
892	}
893}
894
895static void edac_inc_ue_error(struct mem_ctl_info *mci,
896				    bool enable_per_layer_report,
897				    const int pos[EDAC_MAX_LAYERS])
898{
899	int i, index = 0;
900
901	mci->ue_count++;
902
903	if (!enable_per_layer_report) {
904		mci->ce_noinfo_count++;
905		return;
906	}
907
908	for (i = 0; i < mci->n_layers; i++) {
909		if (pos[i] < 0)
910			break;
911		index += pos[i];
912		mci->ue_per_layer[i][index]++;
913
914		if (i < mci->n_layers - 1)
915			index *= mci->layers[i + 1].size;
916	}
917}
918
919static void edac_ce_error(struct mem_ctl_info *mci,
920			  const int pos[EDAC_MAX_LAYERS],
921			  const char *msg,
922			  const char *location,
923			  const char *label,
924			  const char *detail,
925			  const char *other_detail,
926			  const bool enable_per_layer_report,
927			  const unsigned long page_frame_number,
928			  const unsigned long offset_in_page,
929			  u32 grain)
930{
931	unsigned long remapped_page;
932
933	if (edac_mc_get_log_ce()) {
934		if (other_detail && *other_detail)
935			edac_mc_printk(mci, KERN_WARNING,
936				       "CE %s on %s (%s%s - %s)\n",
937				       msg, label, location,
938				       detail, other_detail);
939		else
940			edac_mc_printk(mci, KERN_WARNING,
941				       "CE %s on %s (%s%s)\n",
942				       msg, label, location,
943				       detail);
944	}
945	edac_inc_ce_error(mci, enable_per_layer_report, pos);
946
947	if (mci->scrub_mode & SCRUB_SW_SRC) {
948		/*
949			* Some memory controllers (called MCs below) can remap
950			* memory so that it is still available at a different
951			* address when PCI devices map into memory.
952			* MC's that can't do this, lose the memory where PCI
953			* devices are mapped. This mapping is MC-dependent
954			* and so we call back into the MC driver for it to
955			* map the MC page to a physical (CPU) page which can
956			* then be mapped to a virtual page - which can then
957			* be scrubbed.
958			*/
959		remapped_page = mci->ctl_page_to_phys ?
960			mci->ctl_page_to_phys(mci, page_frame_number) :
961			page_frame_number;
962
963		edac_mc_scrub_block(remapped_page,
964					offset_in_page, grain);
965	}
966}
967
968static void edac_ue_error(struct mem_ctl_info *mci,
969			  const int pos[EDAC_MAX_LAYERS],
970			  const char *msg,
971			  const char *location,
972			  const char *label,
973			  const char *detail,
974			  const char *other_detail,
975			  const bool enable_per_layer_report)
976{
977	if (edac_mc_get_log_ue()) {
978		if (other_detail && *other_detail)
979			edac_mc_printk(mci, KERN_WARNING,
980				       "UE %s on %s (%s%s - %s)\n",
981			               msg, label, location, detail,
982				       other_detail);
983		else
984			edac_mc_printk(mci, KERN_WARNING,
985				       "UE %s on %s (%s%s)\n",
986			               msg, label, location, detail);
987	}
988
989	if (edac_mc_get_panic_on_ue()) {
990		if (other_detail && *other_detail)
991			panic("UE %s on %s (%s%s - %s)\n",
992			      msg, label, location, detail, other_detail);
993		else
994			panic("UE %s on %s (%s%s)\n",
995			      msg, label, location, detail);
996	}
997
998	edac_inc_ue_error(mci, enable_per_layer_report, pos);
999}
1000
1001#define OTHER_LABEL " or "
1002void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1003			  struct mem_ctl_info *mci,
1004			  const unsigned long page_frame_number,
1005			  const unsigned long offset_in_page,
1006			  const unsigned long syndrome,
1007			  const int layer0,
1008			  const int layer1,
1009			  const int layer2,
1010			  const char *msg,
1011			  const char *other_detail,
1012			  const void *mcelog)
1013{
1014	/* FIXME: too much for stack: move it to some pre-alocated area */
1015	char detail[80], location[80];
1016	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1017	char *p;
1018	int row = -1, chan = -1;
1019	int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
1020	int i;
1021	u32 grain;
1022	bool enable_per_layer_report = false;
1023
1024	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1025
1026	/*
1027	 * Check if the event report is consistent and if the memory
1028	 * location is known. If it is known, enable_per_layer_report will be
1029	 * true, the DIMM(s) label info will be filled and the per-layer
1030	 * error counters will be incremented.
1031	 */
1032	for (i = 0; i < mci->n_layers; i++) {
1033		if (pos[i] >= (int)mci->layers[i].size) {
1034			if (type == HW_EVENT_ERR_CORRECTED)
1035				p = "CE";
1036			else
1037				p = "UE";
1038
1039			edac_mc_printk(mci, KERN_ERR,
1040				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1041				       edac_layer_name[mci->layers[i].type],
1042				       pos[i], mci->layers[i].size);
1043			/*
1044			 * Instead of just returning it, let's use what's
1045			 * known about the error. The increment routines and
1046			 * the DIMM filter logic will do the right thing by
1047			 * pointing the likely damaged DIMMs.
1048			 */
1049			pos[i] = -1;
1050		}
1051		if (pos[i] >= 0)
1052			enable_per_layer_report = true;
1053	}
1054
1055	/*
1056	 * Get the dimm label/grain that applies to the match criteria.
1057	 * As the error algorithm may not be able to point to just one memory
1058	 * stick, the logic here will get all possible labels that could
1059	 * pottentially be affected by the error.
1060	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1061	 * to have only the MC channel and the MC dimm (also called "branch")
1062	 * but the channel is not known, as the memory is arranged in pairs,
1063	 * where each memory belongs to a separate channel within the same
1064	 * branch.
1065	 */
1066	grain = 0;
1067	p = label;
1068	*p = '\0';
1069	for (i = 0; i < mci->tot_dimms; i++) {
1070		struct dimm_info *dimm = &mci->dimms[i];
1071
1072		if (layer0 >= 0 && layer0 != dimm->location[0])
1073			continue;
1074		if (layer1 >= 0 && layer1 != dimm->location[1])
1075			continue;
1076		if (layer2 >= 0 && layer2 != dimm->location[2])
1077			continue;
1078
1079		/* get the max grain, over the error match range */
1080		if (dimm->grain > grain)
1081			grain = dimm->grain;
1082
1083		/*
1084		 * If the error is memory-controller wide, there's no need to
1085		 * seek for the affected DIMMs because the whole
1086		 * channel/memory controller/...  may be affected.
1087		 * Also, don't show errors for empty DIMM slots.
1088		 */
1089		if (enable_per_layer_report && dimm->nr_pages) {
1090			if (p != label) {
1091				strcpy(p, OTHER_LABEL);
1092				p += strlen(OTHER_LABEL);
1093			}
1094			strcpy(p, dimm->label);
1095			p += strlen(p);
1096			*p = '\0';
1097
1098			/*
1099			 * get csrow/channel of the DIMM, in order to allow
1100			 * incrementing the compat API counters
1101			 */
1102			debugf4("%s: %s csrows map: (%d,%d)\n",
1103				__func__,
1104				mci->mem_is_per_rank ? "rank" : "dimm",
1105				dimm->csrow, dimm->cschannel);
1106
1107			if (row == -1)
1108				row = dimm->csrow;
1109			else if (row >= 0 && row != dimm->csrow)
1110				row = -2;
1111
1112			if (chan == -1)
1113				chan = dimm->cschannel;
1114			else if (chan >= 0 && chan != dimm->cschannel)
1115				chan = -2;
1116		}
1117	}
1118
1119	if (!enable_per_layer_report) {
1120		strcpy(label, "any memory");
1121	} else {
1122		debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1123			__func__, row, chan);
1124		if (p == label)
1125			strcpy(label, "unknown memory");
1126		if (type == HW_EVENT_ERR_CORRECTED) {
1127			if (row >= 0) {
1128				mci->csrows[row].ce_count++;
1129				if (chan >= 0)
1130					mci->csrows[row].channels[chan].ce_count++;
1131			}
1132		} else
1133			if (row >= 0)
1134				mci->csrows[row].ue_count++;
1135	}
1136
1137	/* Fill the RAM location data */
1138	p = location;
1139	for (i = 0; i < mci->n_layers; i++) {
1140		if (pos[i] < 0)
1141			continue;
1142
1143		p += sprintf(p, "%s:%d ",
1144			     edac_layer_name[mci->layers[i].type],
1145			     pos[i]);
1146	}
1147
1148	/* Memory type dependent details about the error */
1149	if (type == HW_EVENT_ERR_CORRECTED) {
1150		snprintf(detail, sizeof(detail),
1151			"page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
1152			page_frame_number, offset_in_page,
1153			grain, syndrome);
1154		edac_ce_error(mci, pos, msg, location, label, detail,
1155			      other_detail, enable_per_layer_report,
1156			      page_frame_number, offset_in_page, grain);
1157	} else {
1158		snprintf(detail, sizeof(detail),
1159			"page:0x%lx offset:0x%lx grain:%d",
1160			page_frame_number, offset_in_page, grain);
1161
1162		edac_ue_error(mci, pos, msg, location, label, detail,
1163			      other_detail, enable_per_layer_report);
1164	}
1165}
1166EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1167