edac_mc.c revision e7e248304c8ccf02b89e04c3b3b66006b993b5a7
1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <linux/bitops.h>
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h"
35#include "edac_module.h"
36
37#define CREATE_TRACE_POINTS
38#define TRACE_INCLUDE_PATH ../../include/ras
39#include <ras/ras_event.h>
40
41/* lock to memory controller's control array */
42static DEFINE_MUTEX(mem_ctls_mutex);
43static LIST_HEAD(mc_devices);
44
45/*
46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
47 *	apei/ghes and i7core_edac to be used at the same time.
48 */
49static void const *edac_mc_owner;
50
51unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
52			         unsigned len)
53{
54	struct mem_ctl_info *mci = dimm->mci;
55	int i, n, count = 0;
56	char *p = buf;
57
58	for (i = 0; i < mci->n_layers; i++) {
59		n = snprintf(p, len, "%s %d ",
60			      edac_layer_name[mci->layers[i].type],
61			      dimm->location[i]);
62		p += n;
63		len -= n;
64		count += n;
65		if (!len)
66			break;
67	}
68
69	return count;
70}
71
72#ifdef CONFIG_EDAC_DEBUG
73
74static void edac_mc_dump_channel(struct rank_info *chan)
75{
76	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
77	edac_dbg(4, "    channel = %p\n", chan);
78	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
79	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
80}
81
82static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
83{
84	char location[80];
85
86	edac_dimm_info_location(dimm, location, sizeof(location));
87
88	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
89		 dimm->mci->mem_is_per_rank ? "rank" : "dimm",
90		 number, location, dimm->csrow, dimm->cschannel);
91	edac_dbg(4, "  dimm = %p\n", dimm);
92	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
93	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
94	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
95	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
96}
97
98static void edac_mc_dump_csrow(struct csrow_info *csrow)
99{
100	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
101	edac_dbg(4, "  csrow = %p\n", csrow);
102	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
103	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
104	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
105	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
106	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
107	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
108}
109
110static void edac_mc_dump_mci(struct mem_ctl_info *mci)
111{
112	edac_dbg(3, "\tmci = %p\n", mci);
113	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
114	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
115	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
116	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
117	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
118		 mci->nr_csrows, mci->csrows);
119	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
120		 mci->tot_dimms, mci->dimms);
121	edac_dbg(3, "\tdev = %p\n", mci->pdev);
122	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
123		 mci->mod_name, mci->ctl_name);
124	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
125}
126
127#endif				/* CONFIG_EDAC_DEBUG */
128
129/*
130 * keep those in sync with the enum mem_type
131 */
132const char *edac_mem_types[] = {
133	"Empty csrow",
134	"Reserved csrow type",
135	"Unknown csrow type",
136	"Fast page mode RAM",
137	"Extended data out RAM",
138	"Burst Extended data out RAM",
139	"Single data rate SDRAM",
140	"Registered single data rate SDRAM",
141	"Double data rate SDRAM",
142	"Registered Double data rate SDRAM",
143	"Rambus DRAM",
144	"Unbuffered DDR2 RAM",
145	"Fully buffered DDR2",
146	"Registered DDR2 RAM",
147	"Rambus XDR",
148	"Unbuffered DDR3 RAM",
149	"Registered DDR3 RAM",
150};
151EXPORT_SYMBOL_GPL(edac_mem_types);
152
153/**
154 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
155 * @p:		pointer to a pointer with the memory offset to be used. At
156 *		return, this will be incremented to point to the next offset
157 * @size:	Size of the data structure to be reserved
158 * @n_elems:	Number of elements that should be reserved
159 *
160 * If 'size' is a constant, the compiler will optimize this whole function
161 * down to either a no-op or the addition of a constant to the value of '*p'.
162 *
163 * The 'p' pointer is absolutely needed to keep the proper advancing
164 * further in memory to the proper offsets when allocating the struct along
165 * with its embedded structs, as edac_device_alloc_ctl_info() does it
166 * above, for example.
167 *
168 * At return, the pointer 'p' will be incremented to be used on a next call
169 * to this function.
170 */
171void *edac_align_ptr(void **p, unsigned size, int n_elems)
172{
173	unsigned align, r;
174	void *ptr = *p;
175
176	*p += size * n_elems;
177
178	/*
179	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
180	 * 'size'.  Adjust 'p' so that its alignment is at least as
181	 * stringent as what the compiler would provide for X and return
182	 * the aligned result.
183	 * Here we assume that the alignment of a "long long" is the most
184	 * stringent alignment that the compiler will ever provide by default.
185	 * As far as I know, this is a reasonable assumption.
186	 */
187	if (size > sizeof(long))
188		align = sizeof(long long);
189	else if (size > sizeof(int))
190		align = sizeof(long);
191	else if (size > sizeof(short))
192		align = sizeof(int);
193	else if (size > sizeof(char))
194		align = sizeof(short);
195	else
196		return (char *)ptr;
197
198	r = (unsigned long)p % align;
199
200	if (r == 0)
201		return (char *)ptr;
202
203	*p += align - r;
204
205	return (void *)(((unsigned long)ptr) + align - r);
206}
207
208static void _edac_mc_free(struct mem_ctl_info *mci)
209{
210	int i, chn, row;
211	struct csrow_info *csr;
212	const unsigned int tot_dimms = mci->tot_dimms;
213	const unsigned int tot_channels = mci->num_cschannel;
214	const unsigned int tot_csrows = mci->nr_csrows;
215
216	if (mci->dimms) {
217		for (i = 0; i < tot_dimms; i++)
218			kfree(mci->dimms[i]);
219		kfree(mci->dimms);
220	}
221	if (mci->csrows) {
222		for (row = 0; row < tot_csrows; row++) {
223			csr = mci->csrows[row];
224			if (csr) {
225				if (csr->channels) {
226					for (chn = 0; chn < tot_channels; chn++)
227						kfree(csr->channels[chn]);
228					kfree(csr->channels);
229				}
230				kfree(csr);
231			}
232		}
233		kfree(mci->csrows);
234	}
235	kfree(mci);
236}
237
238/**
239 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
240 * @mc_num:		Memory controller number
241 * @n_layers:		Number of MC hierarchy layers
242 * layers:		Describes each layer as seen by the Memory Controller
243 * @size_pvt:		size of private storage needed
244 *
245 *
246 * Everything is kmalloc'ed as one big chunk - more efficient.
247 * Only can be used if all structures have the same lifetime - otherwise
248 * you have to allocate and initialize your own structures.
249 *
250 * Use edac_mc_free() to free mc structures allocated by this function.
251 *
252 * NOTE: drivers handle multi-rank memories in different ways: in some
253 * drivers, one multi-rank memory stick is mapped as one entry, while, in
254 * others, a single multi-rank memory stick would be mapped into several
255 * entries. Currently, this function will allocate multiple struct dimm_info
256 * on such scenarios, as grouping the multiple ranks require drivers change.
257 *
258 * Returns:
259 *	On failure: NULL
260 *	On success: struct mem_ctl_info pointer
261 */
262struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
263				   unsigned n_layers,
264				   struct edac_mc_layer *layers,
265				   unsigned sz_pvt)
266{
267	struct mem_ctl_info *mci;
268	struct edac_mc_layer *layer;
269	struct csrow_info *csr;
270	struct rank_info *chan;
271	struct dimm_info *dimm;
272	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
273	unsigned pos[EDAC_MAX_LAYERS];
274	unsigned size, tot_dimms = 1, count = 1;
275	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
276	void *pvt, *p, *ptr = NULL;
277	int i, j, row, chn, n, len, off;
278	bool per_rank = false;
279
280	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
281	/*
282	 * Calculate the total amount of dimms and csrows/cschannels while
283	 * in the old API emulation mode
284	 */
285	for (i = 0; i < n_layers; i++) {
286		tot_dimms *= layers[i].size;
287		if (layers[i].is_virt_csrow)
288			tot_csrows *= layers[i].size;
289		else
290			tot_channels *= layers[i].size;
291
292		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
293			per_rank = true;
294	}
295
296	/* Figure out the offsets of the various items from the start of an mc
297	 * structure.  We want the alignment of each item to be at least as
298	 * stringent as what the compiler would provide if we could simply
299	 * hardcode everything into a single struct.
300	 */
301	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
302	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
303	for (i = 0; i < n_layers; i++) {
304		count *= layers[i].size;
305		edac_dbg(4, "errcount layer %d size %d\n", i, count);
306		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
307		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
308		tot_errcount += 2 * count;
309	}
310
311	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
312	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
313	size = ((unsigned long)pvt) + sz_pvt;
314
315	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
316		 size,
317		 tot_dimms,
318		 per_rank ? "ranks" : "dimms",
319		 tot_csrows * tot_channels);
320
321	mci = kzalloc(size, GFP_KERNEL);
322	if (mci == NULL)
323		return NULL;
324
325	/* Adjust pointers so they point within the memory we just allocated
326	 * rather than an imaginary chunk of memory located at address 0.
327	 */
328	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
329	for (i = 0; i < n_layers; i++) {
330		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
331		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
332	}
333	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
334
335	/* setup index and various internal pointers */
336	mci->mc_idx = mc_num;
337	mci->tot_dimms = tot_dimms;
338	mci->pvt_info = pvt;
339	mci->n_layers = n_layers;
340	mci->layers = layer;
341	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
342	mci->nr_csrows = tot_csrows;
343	mci->num_cschannel = tot_channels;
344	mci->mem_is_per_rank = per_rank;
345
346	/*
347	 * Alocate and fill the csrow/channels structs
348	 */
349	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
350	if (!mci->csrows)
351		goto error;
352	for (row = 0; row < tot_csrows; row++) {
353		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
354		if (!csr)
355			goto error;
356		mci->csrows[row] = csr;
357		csr->csrow_idx = row;
358		csr->mci = mci;
359		csr->nr_channels = tot_channels;
360		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
361					GFP_KERNEL);
362		if (!csr->channels)
363			goto error;
364
365		for (chn = 0; chn < tot_channels; chn++) {
366			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
367			if (!chan)
368				goto error;
369			csr->channels[chn] = chan;
370			chan->chan_idx = chn;
371			chan->csrow = csr;
372		}
373	}
374
375	/*
376	 * Allocate and fill the dimm structs
377	 */
378	mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
379	if (!mci->dimms)
380		goto error;
381
382	memset(&pos, 0, sizeof(pos));
383	row = 0;
384	chn = 0;
385	for (i = 0; i < tot_dimms; i++) {
386		chan = mci->csrows[row]->channels[chn];
387		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
388		if (off < 0 || off >= tot_dimms) {
389			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
390			goto error;
391		}
392
393		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
394		if (!dimm)
395			goto error;
396		mci->dimms[off] = dimm;
397		dimm->mci = mci;
398
399		/*
400		 * Copy DIMM location and initialize it.
401		 */
402		len = sizeof(dimm->label);
403		p = dimm->label;
404		n = snprintf(p, len, "mc#%u", mc_num);
405		p += n;
406		len -= n;
407		for (j = 0; j < n_layers; j++) {
408			n = snprintf(p, len, "%s#%u",
409				     edac_layer_name[layers[j].type],
410				     pos[j]);
411			p += n;
412			len -= n;
413			dimm->location[j] = pos[j];
414
415			if (len <= 0)
416				break;
417		}
418
419		/* Link it to the csrows old API data */
420		chan->dimm = dimm;
421		dimm->csrow = row;
422		dimm->cschannel = chn;
423
424		/* Increment csrow location */
425		if (layers[0].is_virt_csrow) {
426			chn++;
427			if (chn == tot_channels) {
428				chn = 0;
429				row++;
430			}
431		} else {
432			row++;
433			if (row == tot_csrows) {
434				row = 0;
435				chn++;
436			}
437		}
438
439		/* Increment dimm location */
440		for (j = n_layers - 1; j >= 0; j--) {
441			pos[j]++;
442			if (pos[j] < layers[j].size)
443				break;
444			pos[j] = 0;
445		}
446	}
447
448	mci->op_state = OP_ALLOC;
449
450	return mci;
451
452error:
453	_edac_mc_free(mci);
454
455	return NULL;
456}
457EXPORT_SYMBOL_GPL(edac_mc_alloc);
458
459/**
460 * edac_mc_free
461 *	'Free' a previously allocated 'mci' structure
462 * @mci: pointer to a struct mem_ctl_info structure
463 */
464void edac_mc_free(struct mem_ctl_info *mci)
465{
466	edac_dbg(1, "\n");
467
468	/* If we're not yet registered with sysfs free only what was allocated
469	 * in edac_mc_alloc().
470	 */
471	if (!device_is_registered(&mci->dev)) {
472		_edac_mc_free(mci);
473		return;
474	}
475
476	/* the mci instance is freed here, when the sysfs object is dropped */
477	edac_unregister_sysfs(mci);
478}
479EXPORT_SYMBOL_GPL(edac_mc_free);
480
481
482/**
483 * find_mci_by_dev
484 *
485 *	scan list of controllers looking for the one that manages
486 *	the 'dev' device
487 * @dev: pointer to a struct device related with the MCI
488 */
489struct mem_ctl_info *find_mci_by_dev(struct device *dev)
490{
491	struct mem_ctl_info *mci;
492	struct list_head *item;
493
494	edac_dbg(3, "\n");
495
496	list_for_each(item, &mc_devices) {
497		mci = list_entry(item, struct mem_ctl_info, link);
498
499		if (mci->pdev == dev)
500			return mci;
501	}
502
503	return NULL;
504}
505EXPORT_SYMBOL_GPL(find_mci_by_dev);
506
507/*
508 * handler for EDAC to check if NMI type handler has asserted interrupt
509 */
510static int edac_mc_assert_error_check_and_clear(void)
511{
512	int old_state;
513
514	if (edac_op_state == EDAC_OPSTATE_POLL)
515		return 1;
516
517	old_state = edac_err_assert;
518	edac_err_assert = 0;
519
520	return old_state;
521}
522
523/*
524 * edac_mc_workq_function
525 *	performs the operation scheduled by a workq request
526 */
527static void edac_mc_workq_function(struct work_struct *work_req)
528{
529	struct delayed_work *d_work = to_delayed_work(work_req);
530	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
531
532	mutex_lock(&mem_ctls_mutex);
533
534	/* if this control struct has movd to offline state, we are done */
535	if (mci->op_state == OP_OFFLINE) {
536		mutex_unlock(&mem_ctls_mutex);
537		return;
538	}
539
540	/* Only poll controllers that are running polled and have a check */
541	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
542		mci->edac_check(mci);
543
544	mutex_unlock(&mem_ctls_mutex);
545
546	/* Reschedule */
547	queue_delayed_work(edac_workqueue, &mci->work,
548			msecs_to_jiffies(edac_mc_get_poll_msec()));
549}
550
551/*
552 * edac_mc_workq_setup
553 *	initialize a workq item for this mci
554 *	passing in the new delay period in msec
555 *
556 *	locking model:
557 *
558 *		called with the mem_ctls_mutex held
559 */
560static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
561{
562	edac_dbg(0, "\n");
563
564	/* if this instance is not in the POLL state, then simply return */
565	if (mci->op_state != OP_RUNNING_POLL)
566		return;
567
568	INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
569	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
570}
571
572/*
573 * edac_mc_workq_teardown
574 *	stop the workq processing on this mci
575 *
576 *	locking model:
577 *
578 *		called WITHOUT lock held
579 */
580static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
581{
582	int status;
583
584	if (mci->op_state != OP_RUNNING_POLL)
585		return;
586
587	status = cancel_delayed_work(&mci->work);
588	if (status == 0) {
589		edac_dbg(0, "not canceled, flush the queue\n");
590
591		/* workq instance might be running, wait for it */
592		flush_workqueue(edac_workqueue);
593	}
594}
595
596/*
597 * edac_mc_reset_delay_period(unsigned long value)
598 *
599 *	user space has updated our poll period value, need to
600 *	reset our workq delays
601 */
602void edac_mc_reset_delay_period(int value)
603{
604	struct mem_ctl_info *mci;
605	struct list_head *item;
606
607	mutex_lock(&mem_ctls_mutex);
608
609	list_for_each(item, &mc_devices) {
610		mci = list_entry(item, struct mem_ctl_info, link);
611
612		edac_mc_workq_setup(mci, (unsigned long) value);
613	}
614
615	mutex_unlock(&mem_ctls_mutex);
616}
617
618
619
620/* Return 0 on success, 1 on failure.
621 * Before calling this function, caller must
622 * assign a unique value to mci->mc_idx.
623 *
624 *	locking model:
625 *
626 *		called with the mem_ctls_mutex lock held
627 */
628static int add_mc_to_global_list(struct mem_ctl_info *mci)
629{
630	struct list_head *item, *insert_before;
631	struct mem_ctl_info *p;
632
633	insert_before = &mc_devices;
634
635	p = find_mci_by_dev(mci->pdev);
636	if (unlikely(p != NULL))
637		goto fail0;
638
639	list_for_each(item, &mc_devices) {
640		p = list_entry(item, struct mem_ctl_info, link);
641
642		if (p->mc_idx >= mci->mc_idx) {
643			if (unlikely(p->mc_idx == mci->mc_idx))
644				goto fail1;
645
646			insert_before = item;
647			break;
648		}
649	}
650
651	list_add_tail_rcu(&mci->link, insert_before);
652	atomic_inc(&edac_handlers);
653	return 0;
654
655fail0:
656	edac_printk(KERN_WARNING, EDAC_MC,
657		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
658		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
659	return 1;
660
661fail1:
662	edac_printk(KERN_WARNING, EDAC_MC,
663		"bug in low-level driver: attempt to assign\n"
664		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
665	return 1;
666}
667
668static int del_mc_from_global_list(struct mem_ctl_info *mci)
669{
670	int handlers = atomic_dec_return(&edac_handlers);
671	list_del_rcu(&mci->link);
672
673	/* these are for safe removal of devices from global list while
674	 * NMI handlers may be traversing list
675	 */
676	synchronize_rcu();
677	INIT_LIST_HEAD(&mci->link);
678
679	return handlers;
680}
681
682/**
683 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
684 *
685 * If found, return a pointer to the structure.
686 * Else return NULL.
687 *
688 * Caller must hold mem_ctls_mutex.
689 */
690struct mem_ctl_info *edac_mc_find(int idx)
691{
692	struct list_head *item;
693	struct mem_ctl_info *mci;
694
695	list_for_each(item, &mc_devices) {
696		mci = list_entry(item, struct mem_ctl_info, link);
697
698		if (mci->mc_idx >= idx) {
699			if (mci->mc_idx == idx)
700				return mci;
701
702			break;
703		}
704	}
705
706	return NULL;
707}
708EXPORT_SYMBOL(edac_mc_find);
709
710/**
711 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
712 *                 create sysfs entries associated with mci structure
713 * @mci: pointer to the mci structure to be added to the list
714 *
715 * Return:
716 *	0	Success
717 *	!0	Failure
718 */
719
720/* FIXME - should a warning be printed if no error detection? correction? */
721int edac_mc_add_mc(struct mem_ctl_info *mci)
722{
723	int ret = -EINVAL;
724	edac_dbg(0, "\n");
725
726#ifdef CONFIG_EDAC_DEBUG
727	if (edac_debug_level >= 3)
728		edac_mc_dump_mci(mci);
729
730	if (edac_debug_level >= 4) {
731		int i;
732
733		for (i = 0; i < mci->nr_csrows; i++) {
734			struct csrow_info *csrow = mci->csrows[i];
735			u32 nr_pages = 0;
736			int j;
737
738			for (j = 0; j < csrow->nr_channels; j++)
739				nr_pages += csrow->channels[j]->dimm->nr_pages;
740			if (!nr_pages)
741				continue;
742			edac_mc_dump_csrow(csrow);
743			for (j = 0; j < csrow->nr_channels; j++)
744				if (csrow->channels[j]->dimm->nr_pages)
745					edac_mc_dump_channel(csrow->channels[j]);
746		}
747		for (i = 0; i < mci->tot_dimms; i++)
748			if (mci->dimms[i]->nr_pages)
749				edac_mc_dump_dimm(mci->dimms[i], i);
750	}
751#endif
752	mutex_lock(&mem_ctls_mutex);
753
754	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
755		ret = -EPERM;
756		goto fail0;
757	}
758
759	if (add_mc_to_global_list(mci))
760		goto fail0;
761
762	/* set load time so that error rate can be tracked */
763	mci->start_time = jiffies;
764
765	if (edac_create_sysfs_mci_device(mci)) {
766		edac_mc_printk(mci, KERN_WARNING,
767			"failed to create sysfs device\n");
768		goto fail1;
769	}
770
771	/* If there IS a check routine, then we are running POLLED */
772	if (mci->edac_check != NULL) {
773		/* This instance is NOW RUNNING */
774		mci->op_state = OP_RUNNING_POLL;
775
776		edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
777	} else {
778		mci->op_state = OP_RUNNING_INTERRUPT;
779	}
780
781	/* Report action taken */
782	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
783		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
784
785	edac_mc_owner = mci->mod_name;
786
787	mutex_unlock(&mem_ctls_mutex);
788	return 0;
789
790fail1:
791	del_mc_from_global_list(mci);
792
793fail0:
794	mutex_unlock(&mem_ctls_mutex);
795	return ret;
796}
797EXPORT_SYMBOL_GPL(edac_mc_add_mc);
798
799/**
800 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
801 *                 remove mci structure from global list
802 * @pdev: Pointer to 'struct device' representing mci structure to remove.
803 *
804 * Return pointer to removed mci structure, or NULL if device not found.
805 */
806struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
807{
808	struct mem_ctl_info *mci;
809
810	edac_dbg(0, "\n");
811
812	mutex_lock(&mem_ctls_mutex);
813
814	/* find the requested mci struct in the global list */
815	mci = find_mci_by_dev(dev);
816	if (mci == NULL) {
817		mutex_unlock(&mem_ctls_mutex);
818		return NULL;
819	}
820
821	if (!del_mc_from_global_list(mci))
822		edac_mc_owner = NULL;
823	mutex_unlock(&mem_ctls_mutex);
824
825	/* flush workq processes */
826	edac_mc_workq_teardown(mci);
827
828	/* marking MCI offline */
829	mci->op_state = OP_OFFLINE;
830
831	/* remove from sysfs */
832	edac_remove_sysfs_mci_device(mci);
833
834	edac_printk(KERN_INFO, EDAC_MC,
835		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
836		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
837
838	return mci;
839}
840EXPORT_SYMBOL_GPL(edac_mc_del_mc);
841
842static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
843				u32 size)
844{
845	struct page *pg;
846	void *virt_addr;
847	unsigned long flags = 0;
848
849	edac_dbg(3, "\n");
850
851	/* ECC error page was not in our memory. Ignore it. */
852	if (!pfn_valid(page))
853		return;
854
855	/* Find the actual page structure then map it and fix */
856	pg = pfn_to_page(page);
857
858	if (PageHighMem(pg))
859		local_irq_save(flags);
860
861	virt_addr = kmap_atomic(pg);
862
863	/* Perform architecture specific atomic scrub operation */
864	atomic_scrub(virt_addr + offset, size);
865
866	/* Unmap and complete */
867	kunmap_atomic(virt_addr);
868
869	if (PageHighMem(pg))
870		local_irq_restore(flags);
871}
872
873/* FIXME - should return -1 */
874int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
875{
876	struct csrow_info **csrows = mci->csrows;
877	int row, i, j, n;
878
879	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
880	row = -1;
881
882	for (i = 0; i < mci->nr_csrows; i++) {
883		struct csrow_info *csrow = csrows[i];
884		n = 0;
885		for (j = 0; j < csrow->nr_channels; j++) {
886			struct dimm_info *dimm = csrow->channels[j]->dimm;
887			n += dimm->nr_pages;
888		}
889		if (n == 0)
890			continue;
891
892		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
893			 mci->mc_idx,
894			 csrow->first_page, page, csrow->last_page,
895			 csrow->page_mask);
896
897		if ((page >= csrow->first_page) &&
898		    (page <= csrow->last_page) &&
899		    ((page & csrow->page_mask) ==
900		     (csrow->first_page & csrow->page_mask))) {
901			row = i;
902			break;
903		}
904	}
905
906	if (row == -1)
907		edac_mc_printk(mci, KERN_ERR,
908			"could not look up page error address %lx\n",
909			(unsigned long)page);
910
911	return row;
912}
913EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
914
915const char *edac_layer_name[] = {
916	[EDAC_MC_LAYER_BRANCH] = "branch",
917	[EDAC_MC_LAYER_CHANNEL] = "channel",
918	[EDAC_MC_LAYER_SLOT] = "slot",
919	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
920	[EDAC_MC_LAYER_ALL_MEM] = "memory",
921};
922EXPORT_SYMBOL_GPL(edac_layer_name);
923
924static void edac_inc_ce_error(struct mem_ctl_info *mci,
925			      bool enable_per_layer_report,
926			      const int pos[EDAC_MAX_LAYERS],
927			      const u16 count)
928{
929	int i, index = 0;
930
931	mci->ce_mc += count;
932
933	if (!enable_per_layer_report) {
934		mci->ce_noinfo_count += count;
935		return;
936	}
937
938	for (i = 0; i < mci->n_layers; i++) {
939		if (pos[i] < 0)
940			break;
941		index += pos[i];
942		mci->ce_per_layer[i][index] += count;
943
944		if (i < mci->n_layers - 1)
945			index *= mci->layers[i + 1].size;
946	}
947}
948
949static void edac_inc_ue_error(struct mem_ctl_info *mci,
950				    bool enable_per_layer_report,
951				    const int pos[EDAC_MAX_LAYERS],
952				    const u16 count)
953{
954	int i, index = 0;
955
956	mci->ue_mc += count;
957
958	if (!enable_per_layer_report) {
959		mci->ce_noinfo_count += count;
960		return;
961	}
962
963	for (i = 0; i < mci->n_layers; i++) {
964		if (pos[i] < 0)
965			break;
966		index += pos[i];
967		mci->ue_per_layer[i][index] += count;
968
969		if (i < mci->n_layers - 1)
970			index *= mci->layers[i + 1].size;
971	}
972}
973
974static void edac_ce_error(struct mem_ctl_info *mci,
975			  const u16 error_count,
976			  const int pos[EDAC_MAX_LAYERS],
977			  const char *msg,
978			  const char *location,
979			  const char *label,
980			  const char *detail,
981			  const char *other_detail,
982			  const bool enable_per_layer_report,
983			  const unsigned long page_frame_number,
984			  const unsigned long offset_in_page,
985			  long grain)
986{
987	unsigned long remapped_page;
988	char *msg_aux = "";
989
990	if (*msg)
991		msg_aux = " ";
992
993	if (edac_mc_get_log_ce()) {
994		if (other_detail && *other_detail)
995			edac_mc_printk(mci, KERN_WARNING,
996				       "%d CE %s%son %s (%s %s - %s)\n",
997				       error_count, msg, msg_aux, label,
998				       location, detail, other_detail);
999		else
1000			edac_mc_printk(mci, KERN_WARNING,
1001				       "%d CE %s%son %s (%s %s)\n",
1002				       error_count, msg, msg_aux, label,
1003				       location, detail);
1004	}
1005	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1006
1007	if (mci->scrub_mode & SCRUB_SW_SRC) {
1008		/*
1009			* Some memory controllers (called MCs below) can remap
1010			* memory so that it is still available at a different
1011			* address when PCI devices map into memory.
1012			* MC's that can't do this, lose the memory where PCI
1013			* devices are mapped. This mapping is MC-dependent
1014			* and so we call back into the MC driver for it to
1015			* map the MC page to a physical (CPU) page which can
1016			* then be mapped to a virtual page - which can then
1017			* be scrubbed.
1018			*/
1019		remapped_page = mci->ctl_page_to_phys ?
1020			mci->ctl_page_to_phys(mci, page_frame_number) :
1021			page_frame_number;
1022
1023		edac_mc_scrub_block(remapped_page,
1024					offset_in_page, grain);
1025	}
1026}
1027
1028static void edac_ue_error(struct mem_ctl_info *mci,
1029			  const u16 error_count,
1030			  const int pos[EDAC_MAX_LAYERS],
1031			  const char *msg,
1032			  const char *location,
1033			  const char *label,
1034			  const char *detail,
1035			  const char *other_detail,
1036			  const bool enable_per_layer_report)
1037{
1038	char *msg_aux = "";
1039
1040	if (*msg)
1041		msg_aux = " ";
1042
1043	if (edac_mc_get_log_ue()) {
1044		if (other_detail && *other_detail)
1045			edac_mc_printk(mci, KERN_WARNING,
1046				       "%d UE %s%son %s (%s %s - %s)\n",
1047				       error_count, msg, msg_aux, label,
1048				       location, detail, other_detail);
1049		else
1050			edac_mc_printk(mci, KERN_WARNING,
1051				       "%d UE %s%son %s (%s %s)\n",
1052				       error_count, msg, msg_aux, label,
1053				       location, detail);
1054	}
1055
1056	if (edac_mc_get_panic_on_ue()) {
1057		if (other_detail && *other_detail)
1058			panic("UE %s%son %s (%s%s - %s)\n",
1059			      msg, msg_aux, label, location, detail, other_detail);
1060		else
1061			panic("UE %s%son %s (%s%s)\n",
1062			      msg, msg_aux, label, location, detail);
1063	}
1064
1065	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1066}
1067
1068/**
1069 * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1070 *			      anything to discover the error location
1071 *
1072 * @type:		severity of the error (CE/UE/Fatal)
1073 * @mci:		a struct mem_ctl_info pointer
1074 * @e:			error description
1075 *
1076 * This raw function is used internally by edac_mc_handle_error(). It should
1077 * only be called directly when the hardware error come directly from BIOS,
1078 * like in the case of APEI GHES driver.
1079 */
1080void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1081			      struct mem_ctl_info *mci,
1082			      struct edac_raw_error_desc *e)
1083{
1084	char detail[80];
1085	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1086
1087	/* Memory type dependent details about the error */
1088	if (type == HW_EVENT_ERR_CORRECTED) {
1089		snprintf(detail, sizeof(detail),
1090			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1091			e->page_frame_number, e->offset_in_page,
1092			e->grain, e->syndrome);
1093		edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1094			      detail, e->other_detail, e->enable_per_layer_report,
1095			      e->page_frame_number, e->offset_in_page, e->grain);
1096	} else {
1097		snprintf(detail, sizeof(detail),
1098			"page:0x%lx offset:0x%lx grain:%ld",
1099			e->page_frame_number, e->offset_in_page, e->grain);
1100
1101		edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1102			      detail, e->other_detail, e->enable_per_layer_report);
1103	}
1104
1105
1106}
1107EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1108
1109/**
1110 * edac_mc_handle_error - reports a memory event to userspace
1111 *
1112 * @type:		severity of the error (CE/UE/Fatal)
1113 * @mci:		a struct mem_ctl_info pointer
1114 * @error_count:	Number of errors of the same type
1115 * @page_frame_number:	mem page where the error occurred
1116 * @offset_in_page:	offset of the error inside the page
1117 * @syndrome:		ECC syndrome
1118 * @top_layer:		Memory layer[0] position
1119 * @mid_layer:		Memory layer[1] position
1120 * @low_layer:		Memory layer[2] position
1121 * @msg:		Message meaningful to the end users that
1122 *			explains the event
1123 * @other_detail:	Technical details about the event that
1124 *			may help hardware manufacturers and
1125 *			EDAC developers to analyse the event
1126 */
1127void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1128			  struct mem_ctl_info *mci,
1129			  const u16 error_count,
1130			  const unsigned long page_frame_number,
1131			  const unsigned long offset_in_page,
1132			  const unsigned long syndrome,
1133			  const int top_layer,
1134			  const int mid_layer,
1135			  const int low_layer,
1136			  const char *msg,
1137			  const char *other_detail)
1138{
1139	char *p;
1140	int row = -1, chan = -1;
1141	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1142	int i, n_labels = 0;
1143	u8 grain_bits;
1144	struct edac_raw_error_desc *e = &mci->error_desc;
1145
1146	edac_dbg(3, "MC%d\n", mci->mc_idx);
1147
1148	/* Fills the error report buffer */
1149	memset(e, 0, sizeof (*e));
1150	e->error_count = error_count;
1151	e->top_layer = top_layer;
1152	e->mid_layer = mid_layer;
1153	e->low_layer = low_layer;
1154	e->page_frame_number = page_frame_number;
1155	e->offset_in_page = offset_in_page;
1156	e->syndrome = syndrome;
1157	e->msg = msg;
1158	e->other_detail = other_detail;
1159
1160	/*
1161	 * Check if the event report is consistent and if the memory
1162	 * location is known. If it is known, enable_per_layer_report will be
1163	 * true, the DIMM(s) label info will be filled and the per-layer
1164	 * error counters will be incremented.
1165	 */
1166	for (i = 0; i < mci->n_layers; i++) {
1167		if (pos[i] >= (int)mci->layers[i].size) {
1168
1169			edac_mc_printk(mci, KERN_ERR,
1170				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1171				       edac_layer_name[mci->layers[i].type],
1172				       pos[i], mci->layers[i].size);
1173			/*
1174			 * Instead of just returning it, let's use what's
1175			 * known about the error. The increment routines and
1176			 * the DIMM filter logic will do the right thing by
1177			 * pointing the likely damaged DIMMs.
1178			 */
1179			pos[i] = -1;
1180		}
1181		if (pos[i] >= 0)
1182			e->enable_per_layer_report = true;
1183	}
1184
1185	/*
1186	 * Get the dimm label/grain that applies to the match criteria.
1187	 * As the error algorithm may not be able to point to just one memory
1188	 * stick, the logic here will get all possible labels that could
1189	 * pottentially be affected by the error.
1190	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1191	 * to have only the MC channel and the MC dimm (also called "branch")
1192	 * but the channel is not known, as the memory is arranged in pairs,
1193	 * where each memory belongs to a separate channel within the same
1194	 * branch.
1195	 */
1196	p = e->label;
1197	*p = '\0';
1198
1199	for (i = 0; i < mci->tot_dimms; i++) {
1200		struct dimm_info *dimm = mci->dimms[i];
1201
1202		if (top_layer >= 0 && top_layer != dimm->location[0])
1203			continue;
1204		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1205			continue;
1206		if (low_layer >= 0 && low_layer != dimm->location[2])
1207			continue;
1208
1209		/* get the max grain, over the error match range */
1210		if (dimm->grain > e->grain)
1211			e->grain = dimm->grain;
1212
1213		/*
1214		 * If the error is memory-controller wide, there's no need to
1215		 * seek for the affected DIMMs because the whole
1216		 * channel/memory controller/...  may be affected.
1217		 * Also, don't show errors for empty DIMM slots.
1218		 */
1219		if (e->enable_per_layer_report && dimm->nr_pages) {
1220			if (n_labels >= EDAC_MAX_LABELS) {
1221				e->enable_per_layer_report = false;
1222				break;
1223			}
1224			n_labels++;
1225			if (p != e->label) {
1226				strcpy(p, OTHER_LABEL);
1227				p += strlen(OTHER_LABEL);
1228			}
1229			strcpy(p, dimm->label);
1230			p += strlen(p);
1231			*p = '\0';
1232
1233			/*
1234			 * get csrow/channel of the DIMM, in order to allow
1235			 * incrementing the compat API counters
1236			 */
1237			edac_dbg(4, "%s csrows map: (%d,%d)\n",
1238				 mci->mem_is_per_rank ? "rank" : "dimm",
1239				 dimm->csrow, dimm->cschannel);
1240			if (row == -1)
1241				row = dimm->csrow;
1242			else if (row >= 0 && row != dimm->csrow)
1243				row = -2;
1244
1245			if (chan == -1)
1246				chan = dimm->cschannel;
1247			else if (chan >= 0 && chan != dimm->cschannel)
1248				chan = -2;
1249		}
1250	}
1251
1252	if (!e->enable_per_layer_report) {
1253		strcpy(e->label, "any memory");
1254	} else {
1255		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1256		if (p == e->label)
1257			strcpy(e->label, "unknown memory");
1258		if (type == HW_EVENT_ERR_CORRECTED) {
1259			if (row >= 0) {
1260				mci->csrows[row]->ce_count += error_count;
1261				if (chan >= 0)
1262					mci->csrows[row]->channels[chan]->ce_count += error_count;
1263			}
1264		} else
1265			if (row >= 0)
1266				mci->csrows[row]->ue_count += error_count;
1267	}
1268
1269	/* Fill the RAM location data */
1270	p = e->location;
1271
1272	for (i = 0; i < mci->n_layers; i++) {
1273		if (pos[i] < 0)
1274			continue;
1275
1276		p += sprintf(p, "%s:%d ",
1277			     edac_layer_name[mci->layers[i].type],
1278			     pos[i]);
1279	}
1280	if (p > e->location)
1281		*(p - 1) = '\0';
1282
1283	/* Report the error via the trace interface */
1284	grain_bits = fls_long(e->grain) + 1;
1285	trace_mc_event(type, e->msg, e->label, e->error_count,
1286		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1287		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1288		       grain_bits, e->syndrome, e->other_detail);
1289
1290	edac_raw_mc_handle_error(type, mci, e);
1291}
1292EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1293