edac_mc.c revision d90c008963ef638cb7ab7d5eb76362b3c2d379bc
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <linux/bitops.h>
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h"
35#include "edac_module.h"
36
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
41/* lock to memory controller's control array */
42static DEFINE_MUTEX(mem_ctls_mutex);
43static LIST_HEAD(mc_devices);
44
45#ifdef CONFIG_EDAC_DEBUG
46
47static void edac_mc_dump_channel(struct rank_info *chan)
48{
49	debugf4("\tchannel = %p\n", chan);
50	debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
51	debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
52	debugf4("\tchannel->dimm = %p\n", chan->dimm);
53}
54
55static void edac_mc_dump_dimm(struct dimm_info *dimm)
56{
57	int i;
58
59	debugf4("\tdimm = %p\n", dimm);
60	debugf4("\tdimm->label = '%s'\n", dimm->label);
61	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
62	debugf4("\tdimm location ");
63	for (i = 0; i < dimm->mci->n_layers; i++) {
64		printk(KERN_CONT "%d", dimm->location[i]);
65		if (i < dimm->mci->n_layers - 1)
66			printk(KERN_CONT ".");
67	}
68	printk(KERN_CONT "\n");
69	debugf4("\tdimm->grain = %d\n", dimm->grain);
70	debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
71}
72
73static void edac_mc_dump_csrow(struct csrow_info *csrow)
74{
75	debugf4("\tcsrow = %p\n", csrow);
76	debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
77	debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
78	debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
79	debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
80	debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
81	debugf4("\tcsrow->channels = %p\n", csrow->channels);
82	debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
83}
84
85static void edac_mc_dump_mci(struct mem_ctl_info *mci)
86{
87	debugf3("\tmci = %p\n", mci);
88	debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
89	debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
90	debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
91	debugf4("\tmci->edac_check = %p\n", mci->edac_check);
92	debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
93		mci->nr_csrows, mci->csrows);
94	debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
95		mci->tot_dimms, mci->dimms);
96	debugf3("\tdev = %p\n", mci->pdev);
97	debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
98	debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
99}
100
101#endif				/* CONFIG_EDAC_DEBUG */
102
103/*
104 * keep those in sync with the enum mem_type
105 */
106const char *edac_mem_types[] = {
107	"Empty csrow",
108	"Reserved csrow type",
109	"Unknown csrow type",
110	"Fast page mode RAM",
111	"Extended data out RAM",
112	"Burst Extended data out RAM",
113	"Single data rate SDRAM",
114	"Registered single data rate SDRAM",
115	"Double data rate SDRAM",
116	"Registered Double data rate SDRAM",
117	"Rambus DRAM",
118	"Unbuffered DDR2 RAM",
119	"Fully buffered DDR2",
120	"Registered DDR2 RAM",
121	"Rambus XDR",
122	"Unbuffered DDR3 RAM",
123	"Registered DDR3 RAM",
124};
125EXPORT_SYMBOL_GPL(edac_mem_types);
126
127/**
128 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
129 * @p:		pointer to a pointer with the memory offset to be used. At
130 *		return, this will be incremented to point to the next offset
131 * @size:	Size of the data structure to be reserved
132 * @n_elems:	Number of elements that should be reserved
133 *
134 * If 'size' is a constant, the compiler will optimize this whole function
135 * down to either a no-op or the addition of a constant to the value of '*p'.
136 *
137 * The 'p' pointer is absolutely needed to keep the proper advancing
138 * further in memory to the proper offsets when allocating the struct along
139 * with its embedded structs, as edac_device_alloc_ctl_info() does it
140 * above, for example.
141 *
142 * At return, the pointer 'p' will be incremented to be used on a next call
143 * to this function.
144 */
145void *edac_align_ptr(void **p, unsigned size, int n_elems)
146{
147	unsigned align, r;
148	void *ptr = *p;
149
150	*p += size * n_elems;
151
152	/*
153	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
154	 * 'size'.  Adjust 'p' so that its alignment is at least as
155	 * stringent as what the compiler would provide for X and return
156	 * the aligned result.
157	 * Here we assume that the alignment of a "long long" is the most
158	 * stringent alignment that the compiler will ever provide by default.
159	 * As far as I know, this is a reasonable assumption.
160	 */
161	if (size > sizeof(long))
162		align = sizeof(long long);
163	else if (size > sizeof(int))
164		align = sizeof(long);
165	else if (size > sizeof(short))
166		align = sizeof(int);
167	else if (size > sizeof(char))
168		align = sizeof(short);
169	else
170		return (char *)ptr;
171
172	r = size % align;
173
174	if (r == 0)
175		return (char *)ptr;
176
177	*p += align - r;
178
179	return (void *)(((unsigned long)ptr) + align - r);
180}
181
182/**
183 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
184 * @mc_num:		Memory controller number
185 * @n_layers:		Number of MC hierarchy layers
186 * layers:		Describes each layer as seen by the Memory Controller
187 * @size_pvt:		size of private storage needed
188 *
189 *
190 * Everything is kmalloc'ed as one big chunk - more efficient.
191 * Only can be used if all structures have the same lifetime - otherwise
192 * you have to allocate and initialize your own structures.
193 *
194 * Use edac_mc_free() to free mc structures allocated by this function.
195 *
196 * NOTE: drivers handle multi-rank memories in different ways: in some
197 * drivers, one multi-rank memory stick is mapped as one entry, while, in
198 * others, a single multi-rank memory stick would be mapped into several
199 * entries. Currently, this function will allocate multiple struct dimm_info
200 * on such scenarios, as grouping the multiple ranks require drivers change.
201 *
202 * Returns:
203 *	On failure: NULL
204 *	On success: struct mem_ctl_info pointer
205 */
206struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
207				   unsigned n_layers,
208				   struct edac_mc_layer *layers,
209				   unsigned sz_pvt)
210{
211	struct mem_ctl_info *mci;
212	struct edac_mc_layer *layer;
213	struct csrow_info *csi, *csr;
214	struct rank_info *chi, *chp, *chan;
215	struct dimm_info *dimm;
216	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
217	unsigned pos[EDAC_MAX_LAYERS];
218	unsigned size, tot_dimms = 1, count = 1;
219	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
220	void *pvt, *p, *ptr = NULL;
221	int i, j, row, chn, n, len;
222	bool per_rank = false;
223
224	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
225	/*
226	 * Calculate the total amount of dimms and csrows/cschannels while
227	 * in the old API emulation mode
228	 */
229	for (i = 0; i < n_layers; i++) {
230		tot_dimms *= layers[i].size;
231		if (layers[i].is_virt_csrow)
232			tot_csrows *= layers[i].size;
233		else
234			tot_channels *= layers[i].size;
235
236		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
237			per_rank = true;
238	}
239
240	/* Figure out the offsets of the various items from the start of an mc
241	 * structure.  We want the alignment of each item to be at least as
242	 * stringent as what the compiler would provide if we could simply
243	 * hardcode everything into a single struct.
244	 */
245	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
246	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
247	csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
248	chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
249	dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
250	for (i = 0; i < n_layers; i++) {
251		count *= layers[i].size;
252		debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
253		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
254		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
255		tot_errcount += 2 * count;
256	}
257
258	debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
259	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
260	size = ((unsigned long)pvt) + sz_pvt;
261
262	debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
263		__func__, size,
264		tot_dimms,
265		per_rank ? "ranks" : "dimms",
266		tot_csrows * tot_channels);
267	mci = kzalloc(size, GFP_KERNEL);
268	if (mci == NULL)
269		return NULL;
270
271	/* Adjust pointers so they point within the memory we just allocated
272	 * rather than an imaginary chunk of memory located at address 0.
273	 */
274	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
275	csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
276	chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
277	dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
278	for (i = 0; i < n_layers; i++) {
279		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
280		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
281	}
282	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
283
284	/* setup index and various internal pointers */
285	mci->mc_idx = mc_num;
286	mci->csrows = csi;
287	mci->dimms  = dimm;
288	mci->tot_dimms = tot_dimms;
289	mci->pvt_info = pvt;
290	mci->n_layers = n_layers;
291	mci->layers = layer;
292	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
293	mci->nr_csrows = tot_csrows;
294	mci->num_cschannel = tot_channels;
295	mci->mem_is_per_rank = per_rank;
296
297	/*
298	 * Fill the csrow struct
299	 */
300	for (row = 0; row < tot_csrows; row++) {
301		csr = &csi[row];
302		csr->csrow_idx = row;
303		csr->mci = mci;
304		csr->nr_channels = tot_channels;
305		chp = &chi[row * tot_channels];
306		csr->channels = chp;
307
308		for (chn = 0; chn < tot_channels; chn++) {
309			chan = &chp[chn];
310			chan->chan_idx = chn;
311			chan->csrow = csr;
312		}
313	}
314
315	/*
316	 * Fill the dimm struct
317	 */
318	memset(&pos, 0, sizeof(pos));
319	row = 0;
320	chn = 0;
321	debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
322		per_rank ? "ranks" : "dimms");
323	for (i = 0; i < tot_dimms; i++) {
324		chan = &csi[row].channels[chn];
325		dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
326			       pos[0], pos[1], pos[2]);
327		dimm->mci = mci;
328
329		debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
330			i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
331			pos[0], pos[1], pos[2], row, chn);
332
333		/*
334		 * Copy DIMM location and initialize it.
335		 */
336		len = sizeof(dimm->label);
337		p = dimm->label;
338		n = snprintf(p, len, "mc#%u", mc_num);
339		p += n;
340		len -= n;
341		for (j = 0; j < n_layers; j++) {
342			n = snprintf(p, len, "%s#%u",
343				     edac_layer_name[layers[j].type],
344				     pos[j]);
345			p += n;
346			len -= n;
347			dimm->location[j] = pos[j];
348
349			if (len <= 0)
350				break;
351		}
352
353		/* Link it to the csrows old API data */
354		chan->dimm = dimm;
355		dimm->csrow = row;
356		dimm->cschannel = chn;
357
358		/* Increment csrow location */
359		row++;
360		if (row == tot_csrows) {
361			row = 0;
362			chn++;
363		}
364
365		/* Increment dimm location */
366		for (j = n_layers - 1; j >= 0; j--) {
367			pos[j]++;
368			if (pos[j] < layers[j].size)
369				break;
370			pos[j] = 0;
371		}
372	}
373
374	mci->op_state = OP_ALLOC;
375
376	/* at this point, the root kobj is valid, and in order to
377	 * 'free' the object, then the function:
378	 *      edac_mc_unregister_sysfs_main_kobj() must be called
379	 * which will perform kobj unregistration and the actual free
380	 * will occur during the kobject callback operation
381	 */
382
383	return mci;
384}
385EXPORT_SYMBOL_GPL(edac_mc_alloc);
386
387/**
388 * edac_mc_free
389 *	'Free' a previously allocated 'mci' structure
390 * @mci: pointer to a struct mem_ctl_info structure
391 */
392void edac_mc_free(struct mem_ctl_info *mci)
393{
394	debugf1("%s()\n", __func__);
395
396	edac_unregister_sysfs(mci);
397
398	/* free the mci instance memory here */
399	kfree(mci);
400}
401EXPORT_SYMBOL_GPL(edac_mc_free);
402
403
404/**
405 * find_mci_by_dev
406 *
407 *	scan list of controllers looking for the one that manages
408 *	the 'dev' device
409 * @dev: pointer to a struct device related with the MCI
410 */
411struct mem_ctl_info *find_mci_by_dev(struct device *dev)
412{
413	struct mem_ctl_info *mci;
414	struct list_head *item;
415
416	debugf3("%s()\n", __func__);
417
418	list_for_each(item, &mc_devices) {
419		mci = list_entry(item, struct mem_ctl_info, link);
420
421		if (mci->pdev == dev)
422			return mci;
423	}
424
425	return NULL;
426}
427EXPORT_SYMBOL_GPL(find_mci_by_dev);
428
429/*
430 * handler for EDAC to check if NMI type handler has asserted interrupt
431 */
432static int edac_mc_assert_error_check_and_clear(void)
433{
434	int old_state;
435
436	if (edac_op_state == EDAC_OPSTATE_POLL)
437		return 1;
438
439	old_state = edac_err_assert;
440	edac_err_assert = 0;
441
442	return old_state;
443}
444
445/*
446 * edac_mc_workq_function
447 *	performs the operation scheduled by a workq request
448 */
449static void edac_mc_workq_function(struct work_struct *work_req)
450{
451	struct delayed_work *d_work = to_delayed_work(work_req);
452	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
453
454	mutex_lock(&mem_ctls_mutex);
455
456	/* if this control struct has movd to offline state, we are done */
457	if (mci->op_state == OP_OFFLINE) {
458		mutex_unlock(&mem_ctls_mutex);
459		return;
460	}
461
462	/* Only poll controllers that are running polled and have a check */
463	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
464		mci->edac_check(mci);
465
466	mutex_unlock(&mem_ctls_mutex);
467
468	/* Reschedule */
469	queue_delayed_work(edac_workqueue, &mci->work,
470			msecs_to_jiffies(edac_mc_get_poll_msec()));
471}
472
473/*
474 * edac_mc_workq_setup
475 *	initialize a workq item for this mci
476 *	passing in the new delay period in msec
477 *
478 *	locking model:
479 *
480 *		called with the mem_ctls_mutex held
481 */
482static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
483{
484	debugf0("%s()\n", __func__);
485
486	/* if this instance is not in the POLL state, then simply return */
487	if (mci->op_state != OP_RUNNING_POLL)
488		return;
489
490	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
491	queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
492}
493
494/*
495 * edac_mc_workq_teardown
496 *	stop the workq processing on this mci
497 *
498 *	locking model:
499 *
500 *		called WITHOUT lock held
501 */
502static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
503{
504	int status;
505
506	if (mci->op_state != OP_RUNNING_POLL)
507		return;
508
509	status = cancel_delayed_work(&mci->work);
510	if (status == 0) {
511		debugf0("%s() not canceled, flush the queue\n",
512			__func__);
513
514		/* workq instance might be running, wait for it */
515		flush_workqueue(edac_workqueue);
516	}
517}
518
519/*
520 * edac_mc_reset_delay_period(unsigned long value)
521 *
522 *	user space has updated our poll period value, need to
523 *	reset our workq delays
524 */
525void edac_mc_reset_delay_period(int value)
526{
527	struct mem_ctl_info *mci;
528	struct list_head *item;
529
530	mutex_lock(&mem_ctls_mutex);
531
532	/* scan the list and turn off all workq timers, doing so under lock
533	 */
534	list_for_each(item, &mc_devices) {
535		mci = list_entry(item, struct mem_ctl_info, link);
536
537		if (mci->op_state == OP_RUNNING_POLL)
538			cancel_delayed_work(&mci->work);
539	}
540
541	mutex_unlock(&mem_ctls_mutex);
542
543
544	/* re-walk the list, and reset the poll delay */
545	mutex_lock(&mem_ctls_mutex);
546
547	list_for_each(item, &mc_devices) {
548		mci = list_entry(item, struct mem_ctl_info, link);
549
550		edac_mc_workq_setup(mci, (unsigned long) value);
551	}
552
553	mutex_unlock(&mem_ctls_mutex);
554}
555
556
557
558/* Return 0 on success, 1 on failure.
559 * Before calling this function, caller must
560 * assign a unique value to mci->mc_idx.
561 *
562 *	locking model:
563 *
564 *		called with the mem_ctls_mutex lock held
565 */
566static int add_mc_to_global_list(struct mem_ctl_info *mci)
567{
568	struct list_head *item, *insert_before;
569	struct mem_ctl_info *p;
570
571	insert_before = &mc_devices;
572
573	p = find_mci_by_dev(mci->pdev);
574	if (unlikely(p != NULL))
575		goto fail0;
576
577	list_for_each(item, &mc_devices) {
578		p = list_entry(item, struct mem_ctl_info, link);
579
580		if (p->mc_idx >= mci->mc_idx) {
581			if (unlikely(p->mc_idx == mci->mc_idx))
582				goto fail1;
583
584			insert_before = item;
585			break;
586		}
587	}
588
589	list_add_tail_rcu(&mci->link, insert_before);
590	atomic_inc(&edac_handlers);
591	return 0;
592
593fail0:
594	edac_printk(KERN_WARNING, EDAC_MC,
595		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
596		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
597	return 1;
598
599fail1:
600	edac_printk(KERN_WARNING, EDAC_MC,
601		"bug in low-level driver: attempt to assign\n"
602		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
603	return 1;
604}
605
606static void del_mc_from_global_list(struct mem_ctl_info *mci)
607{
608	atomic_dec(&edac_handlers);
609	list_del_rcu(&mci->link);
610
611	/* these are for safe removal of devices from global list while
612	 * NMI handlers may be traversing list
613	 */
614	synchronize_rcu();
615	INIT_LIST_HEAD(&mci->link);
616}
617
618/**
619 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
620 *
621 * If found, return a pointer to the structure.
622 * Else return NULL.
623 *
624 * Caller must hold mem_ctls_mutex.
625 */
626struct mem_ctl_info *edac_mc_find(int idx)
627{
628	struct list_head *item;
629	struct mem_ctl_info *mci;
630
631	list_for_each(item, &mc_devices) {
632		mci = list_entry(item, struct mem_ctl_info, link);
633
634		if (mci->mc_idx >= idx) {
635			if (mci->mc_idx == idx)
636				return mci;
637
638			break;
639		}
640	}
641
642	return NULL;
643}
644EXPORT_SYMBOL(edac_mc_find);
645
646/**
647 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
648 *                 create sysfs entries associated with mci structure
649 * @mci: pointer to the mci structure to be added to the list
650 *
651 * Return:
652 *	0	Success
653 *	!0	Failure
654 */
655
656/* FIXME - should a warning be printed if no error detection? correction? */
657int edac_mc_add_mc(struct mem_ctl_info *mci)
658{
659	debugf0("%s()\n", __func__);
660
661#ifdef CONFIG_EDAC_DEBUG
662	if (edac_debug_level >= 3)
663		edac_mc_dump_mci(mci);
664
665	if (edac_debug_level >= 4) {
666		int i;
667
668		for (i = 0; i < mci->nr_csrows; i++) {
669			int j;
670
671			edac_mc_dump_csrow(&mci->csrows[i]);
672			for (j = 0; j < mci->csrows[i].nr_channels; j++)
673				edac_mc_dump_channel(&mci->csrows[i].
674						channels[j]);
675		}
676		for (i = 0; i < mci->tot_dimms; i++)
677			edac_mc_dump_dimm(&mci->dimms[i]);
678	}
679#endif
680	mutex_lock(&mem_ctls_mutex);
681
682	if (add_mc_to_global_list(mci))
683		goto fail0;
684
685	/* set load time so that error rate can be tracked */
686	mci->start_time = jiffies;
687
688	if (edac_create_sysfs_mci_device(mci)) {
689		edac_mc_printk(mci, KERN_WARNING,
690			"failed to create sysfs device\n");
691		goto fail1;
692	}
693
694	/* If there IS a check routine, then we are running POLLED */
695	if (mci->edac_check != NULL) {
696		/* This instance is NOW RUNNING */
697		mci->op_state = OP_RUNNING_POLL;
698
699		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
700	} else {
701		mci->op_state = OP_RUNNING_INTERRUPT;
702	}
703
704	/* Report action taken */
705	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
706		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
707
708	mutex_unlock(&mem_ctls_mutex);
709	return 0;
710
711fail1:
712	del_mc_from_global_list(mci);
713
714fail0:
715	mutex_unlock(&mem_ctls_mutex);
716	return 1;
717}
718EXPORT_SYMBOL_GPL(edac_mc_add_mc);
719
720/**
721 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
722 *                 remove mci structure from global list
723 * @pdev: Pointer to 'struct device' representing mci structure to remove.
724 *
725 * Return pointer to removed mci structure, or NULL if device not found.
726 */
727struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
728{
729	struct mem_ctl_info *mci;
730
731	debugf0("%s()\n", __func__);
732
733	mutex_lock(&mem_ctls_mutex);
734
735	/* find the requested mci struct in the global list */
736	mci = find_mci_by_dev(dev);
737	if (mci == NULL) {
738		mutex_unlock(&mem_ctls_mutex);
739		return NULL;
740	}
741
742	del_mc_from_global_list(mci);
743	mutex_unlock(&mem_ctls_mutex);
744
745	/* flush workq processes */
746	edac_mc_workq_teardown(mci);
747
748	/* marking MCI offline */
749	mci->op_state = OP_OFFLINE;
750
751	/* remove from sysfs */
752	edac_remove_sysfs_mci_device(mci);
753
754	edac_printk(KERN_INFO, EDAC_MC,
755		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
756		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
757
758	return mci;
759}
760EXPORT_SYMBOL_GPL(edac_mc_del_mc);
761
762static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
763				u32 size)
764{
765	struct page *pg;
766	void *virt_addr;
767	unsigned long flags = 0;
768
769	debugf3("%s()\n", __func__);
770
771	/* ECC error page was not in our memory. Ignore it. */
772	if (!pfn_valid(page))
773		return;
774
775	/* Find the actual page structure then map it and fix */
776	pg = pfn_to_page(page);
777
778	if (PageHighMem(pg))
779		local_irq_save(flags);
780
781	virt_addr = kmap_atomic(pg);
782
783	/* Perform architecture specific atomic scrub operation */
784	atomic_scrub(virt_addr + offset, size);
785
786	/* Unmap and complete */
787	kunmap_atomic(virt_addr);
788
789	if (PageHighMem(pg))
790		local_irq_restore(flags);
791}
792
793/* FIXME - should return -1 */
794int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
795{
796	struct csrow_info *csrows = mci->csrows;
797	int row, i, j, n;
798
799	debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
800	row = -1;
801
802	for (i = 0; i < mci->nr_csrows; i++) {
803		struct csrow_info *csrow = &csrows[i];
804		n = 0;
805		for (j = 0; j < csrow->nr_channels; j++) {
806			struct dimm_info *dimm = csrow->channels[j].dimm;
807			n += dimm->nr_pages;
808		}
809		if (n == 0)
810			continue;
811
812		debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
813			"mask(0x%lx)\n", mci->mc_idx, __func__,
814			csrow->first_page, page, csrow->last_page,
815			csrow->page_mask);
816
817		if ((page >= csrow->first_page) &&
818		    (page <= csrow->last_page) &&
819		    ((page & csrow->page_mask) ==
820		     (csrow->first_page & csrow->page_mask))) {
821			row = i;
822			break;
823		}
824	}
825
826	if (row == -1)
827		edac_mc_printk(mci, KERN_ERR,
828			"could not look up page error address %lx\n",
829			(unsigned long)page);
830
831	return row;
832}
833EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
834
835const char *edac_layer_name[] = {
836	[EDAC_MC_LAYER_BRANCH] = "branch",
837	[EDAC_MC_LAYER_CHANNEL] = "channel",
838	[EDAC_MC_LAYER_SLOT] = "slot",
839	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
840};
841EXPORT_SYMBOL_GPL(edac_layer_name);
842
843static void edac_inc_ce_error(struct mem_ctl_info *mci,
844				    bool enable_per_layer_report,
845				    const int pos[EDAC_MAX_LAYERS])
846{
847	int i, index = 0;
848
849	mci->ce_mc++;
850
851	if (!enable_per_layer_report) {
852		mci->ce_noinfo_count++;
853		return;
854	}
855
856	for (i = 0; i < mci->n_layers; i++) {
857		if (pos[i] < 0)
858			break;
859		index += pos[i];
860		mci->ce_per_layer[i][index]++;
861
862		if (i < mci->n_layers - 1)
863			index *= mci->layers[i + 1].size;
864	}
865}
866
867static void edac_inc_ue_error(struct mem_ctl_info *mci,
868				    bool enable_per_layer_report,
869				    const int pos[EDAC_MAX_LAYERS])
870{
871	int i, index = 0;
872
873	mci->ue_mc++;
874
875	if (!enable_per_layer_report) {
876		mci->ce_noinfo_count++;
877		return;
878	}
879
880	for (i = 0; i < mci->n_layers; i++) {
881		if (pos[i] < 0)
882			break;
883		index += pos[i];
884		mci->ue_per_layer[i][index]++;
885
886		if (i < mci->n_layers - 1)
887			index *= mci->layers[i + 1].size;
888	}
889}
890
891static void edac_ce_error(struct mem_ctl_info *mci,
892			  const int pos[EDAC_MAX_LAYERS],
893			  const char *msg,
894			  const char *location,
895			  const char *label,
896			  const char *detail,
897			  const char *other_detail,
898			  const bool enable_per_layer_report,
899			  const unsigned long page_frame_number,
900			  const unsigned long offset_in_page,
901			  long grain)
902{
903	unsigned long remapped_page;
904
905	if (edac_mc_get_log_ce()) {
906		if (other_detail && *other_detail)
907			edac_mc_printk(mci, KERN_WARNING,
908				       "CE %s on %s (%s %s - %s)\n",
909				       msg, label, location,
910				       detail, other_detail);
911		else
912			edac_mc_printk(mci, KERN_WARNING,
913				       "CE %s on %s (%s %s)\n",
914				       msg, label, location,
915				       detail);
916	}
917	edac_inc_ce_error(mci, enable_per_layer_report, pos);
918
919	if (mci->scrub_mode & SCRUB_SW_SRC) {
920		/*
921			* Some memory controllers (called MCs below) can remap
922			* memory so that it is still available at a different
923			* address when PCI devices map into memory.
924			* MC's that can't do this, lose the memory where PCI
925			* devices are mapped. This mapping is MC-dependent
926			* and so we call back into the MC driver for it to
927			* map the MC page to a physical (CPU) page which can
928			* then be mapped to a virtual page - which can then
929			* be scrubbed.
930			*/
931		remapped_page = mci->ctl_page_to_phys ?
932			mci->ctl_page_to_phys(mci, page_frame_number) :
933			page_frame_number;
934
935		edac_mc_scrub_block(remapped_page,
936					offset_in_page, grain);
937	}
938}
939
940static void edac_ue_error(struct mem_ctl_info *mci,
941			  const int pos[EDAC_MAX_LAYERS],
942			  const char *msg,
943			  const char *location,
944			  const char *label,
945			  const char *detail,
946			  const char *other_detail,
947			  const bool enable_per_layer_report)
948{
949	if (edac_mc_get_log_ue()) {
950		if (other_detail && *other_detail)
951			edac_mc_printk(mci, KERN_WARNING,
952				       "UE %s on %s (%s %s - %s)\n",
953			               msg, label, location, detail,
954				       other_detail);
955		else
956			edac_mc_printk(mci, KERN_WARNING,
957				       "UE %s on %s (%s %s)\n",
958			               msg, label, location, detail);
959	}
960
961	if (edac_mc_get_panic_on_ue()) {
962		if (other_detail && *other_detail)
963			panic("UE %s on %s (%s%s - %s)\n",
964			      msg, label, location, detail, other_detail);
965		else
966			panic("UE %s on %s (%s%s)\n",
967			      msg, label, location, detail);
968	}
969
970	edac_inc_ue_error(mci, enable_per_layer_report, pos);
971}
972
973#define OTHER_LABEL " or "
974
975/**
976 * edac_mc_handle_error - reports a memory event to userspace
977 *
978 * @type:		severity of the error (CE/UE/Fatal)
979 * @mci:		a struct mem_ctl_info pointer
980 * @page_frame_number:	mem page where the error occurred
981 * @offset_in_page:	offset of the error inside the page
982 * @syndrome:		ECC syndrome
983 * @top_layer:		Memory layer[0] position
984 * @mid_layer:		Memory layer[1] position
985 * @low_layer:		Memory layer[2] position
986 * @msg:		Message meaningful to the end users that
987 *			explains the event
988 * @other_detail:	Technical details about the event that
989 *			may help hardware manufacturers and
990 *			EDAC developers to analyse the event
991 * @arch_log:		Architecture-specific struct that can
992 *			be used to add extended information to the
993 *			tracepoint, like dumping MCE registers.
994 */
995void edac_mc_handle_error(const enum hw_event_mc_err_type type,
996			  struct mem_ctl_info *mci,
997			  const unsigned long page_frame_number,
998			  const unsigned long offset_in_page,
999			  const unsigned long syndrome,
1000			  const int top_layer,
1001			  const int mid_layer,
1002			  const int low_layer,
1003			  const char *msg,
1004			  const char *other_detail,
1005			  const void *arch_log)
1006{
1007	/* FIXME: too much for stack: move it to some pre-alocated area */
1008	char detail[80], location[80];
1009	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
1010	char *p;
1011	int row = -1, chan = -1;
1012	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1013	int i;
1014	long grain;
1015	bool enable_per_layer_report = false;
1016	u16 error_count;	/* FIXME: make it a parameter */
1017	u8 grain_bits;
1018
1019	debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
1020
1021	/*
1022	 * Check if the event report is consistent and if the memory
1023	 * location is known. If it is known, enable_per_layer_report will be
1024	 * true, the DIMM(s) label info will be filled and the per-layer
1025	 * error counters will be incremented.
1026	 */
1027	for (i = 0; i < mci->n_layers; i++) {
1028		if (pos[i] >= (int)mci->layers[i].size) {
1029			if (type == HW_EVENT_ERR_CORRECTED)
1030				p = "CE";
1031			else
1032				p = "UE";
1033
1034			edac_mc_printk(mci, KERN_ERR,
1035				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1036				       edac_layer_name[mci->layers[i].type],
1037				       pos[i], mci->layers[i].size);
1038			/*
1039			 * Instead of just returning it, let's use what's
1040			 * known about the error. The increment routines and
1041			 * the DIMM filter logic will do the right thing by
1042			 * pointing the likely damaged DIMMs.
1043			 */
1044			pos[i] = -1;
1045		}
1046		if (pos[i] >= 0)
1047			enable_per_layer_report = true;
1048	}
1049
1050	/*
1051	 * Get the dimm label/grain that applies to the match criteria.
1052	 * As the error algorithm may not be able to point to just one memory
1053	 * stick, the logic here will get all possible labels that could
1054	 * pottentially be affected by the error.
1055	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1056	 * to have only the MC channel and the MC dimm (also called "branch")
1057	 * but the channel is not known, as the memory is arranged in pairs,
1058	 * where each memory belongs to a separate channel within the same
1059	 * branch.
1060	 */
1061	grain = 0;
1062	p = label;
1063	*p = '\0';
1064	for (i = 0; i < mci->tot_dimms; i++) {
1065		struct dimm_info *dimm = &mci->dimms[i];
1066
1067		if (top_layer >= 0 && top_layer != dimm->location[0])
1068			continue;
1069		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1070			continue;
1071		if (low_layer >= 0 && low_layer != dimm->location[2])
1072			continue;
1073
1074		/* get the max grain, over the error match range */
1075		if (dimm->grain > grain)
1076			grain = dimm->grain;
1077
1078		/*
1079		 * If the error is memory-controller wide, there's no need to
1080		 * seek for the affected DIMMs because the whole
1081		 * channel/memory controller/...  may be affected.
1082		 * Also, don't show errors for empty DIMM slots.
1083		 */
1084		if (enable_per_layer_report && dimm->nr_pages) {
1085			if (p != label) {
1086				strcpy(p, OTHER_LABEL);
1087				p += strlen(OTHER_LABEL);
1088			}
1089			strcpy(p, dimm->label);
1090			p += strlen(p);
1091			*p = '\0';
1092
1093			/*
1094			 * get csrow/channel of the DIMM, in order to allow
1095			 * incrementing the compat API counters
1096			 */
1097			debugf4("%s: %s csrows map: (%d,%d)\n",
1098				__func__,
1099				mci->mem_is_per_rank ? "rank" : "dimm",
1100				dimm->csrow, dimm->cschannel);
1101
1102			if (row == -1)
1103				row = dimm->csrow;
1104			else if (row >= 0 && row != dimm->csrow)
1105				row = -2;
1106
1107			if (chan == -1)
1108				chan = dimm->cschannel;
1109			else if (chan >= 0 && chan != dimm->cschannel)
1110				chan = -2;
1111		}
1112	}
1113
1114	if (!enable_per_layer_report) {
1115		strcpy(label, "any memory");
1116	} else {
1117		debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1118			__func__, row, chan);
1119		if (p == label)
1120			strcpy(label, "unknown memory");
1121		if (type == HW_EVENT_ERR_CORRECTED) {
1122			if (row >= 0) {
1123				mci->csrows[row].ce_count++;
1124				if (chan >= 0)
1125					mci->csrows[row].channels[chan].ce_count++;
1126			}
1127		} else
1128			if (row >= 0)
1129				mci->csrows[row].ue_count++;
1130	}
1131
1132	/* Fill the RAM location data */
1133	p = location;
1134	for (i = 0; i < mci->n_layers; i++) {
1135		if (pos[i] < 0)
1136			continue;
1137
1138		p += sprintf(p, "%s:%d ",
1139			     edac_layer_name[mci->layers[i].type],
1140			     pos[i]);
1141	}
1142	if (p > location)
1143		*(p - 1) = '\0';
1144
1145	/* Report the error via the trace interface */
1146
1147	error_count = 1;	/* FIXME: allow change it */
1148	grain_bits = fls_long(grain) + 1;
1149	trace_mc_event(type, msg, label, error_count,
1150		       mci->mc_idx, top_layer, mid_layer, low_layer,
1151		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
1152		       grain_bits, syndrome, other_detail);
1153
1154	/* Memory type dependent details about the error */
1155	if (type == HW_EVENT_ERR_CORRECTED) {
1156		snprintf(detail, sizeof(detail),
1157			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1158			page_frame_number, offset_in_page,
1159			grain, syndrome);
1160		edac_ce_error(mci, pos, msg, location, label, detail,
1161			      other_detail, enable_per_layer_report,
1162			      page_frame_number, offset_in_page, grain);
1163	} else {
1164		snprintf(detail, sizeof(detail),
1165			"page:0x%lx offset:0x%lx grain:%ld",
1166			page_frame_number, offset_in_page, grain);
1167
1168		edac_ue_error(mci, pos, msg, location, label, detail,
1169			      other_detail, enable_per_layer_report);
1170	}
1171}
1172EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1173