1/*
2 * edac_mc kernel module
3 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
4 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 *	http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
15#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
28#include <linux/ctype.h>
29#include <linux/edac.h>
30#include <linux/bitops.h>
31#include <asm/uaccess.h>
32#include <asm/page.h>
33#include <asm/edac.h>
34#include "edac_core.h"
35#include "edac_module.h"
36#include <ras/ras_event.h>
37
38/* lock to memory controller's control array */
39static DEFINE_MUTEX(mem_ctls_mutex);
40static LIST_HEAD(mc_devices);
41
42/*
43 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
44 *	apei/ghes and i7core_edac to be used at the same time.
45 */
46static void const *edac_mc_owner;
47
48static struct bus_type mc_bus[EDAC_MAX_MCS];
49
50unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
51			         unsigned len)
52{
53	struct mem_ctl_info *mci = dimm->mci;
54	int i, n, count = 0;
55	char *p = buf;
56
57	for (i = 0; i < mci->n_layers; i++) {
58		n = snprintf(p, len, "%s %d ",
59			      edac_layer_name[mci->layers[i].type],
60			      dimm->location[i]);
61		p += n;
62		len -= n;
63		count += n;
64		if (!len)
65			break;
66	}
67
68	return count;
69}
70
71#ifdef CONFIG_EDAC_DEBUG
72
73static void edac_mc_dump_channel(struct rank_info *chan)
74{
75	edac_dbg(4, "  channel->chan_idx = %d\n", chan->chan_idx);
76	edac_dbg(4, "    channel = %p\n", chan);
77	edac_dbg(4, "    channel->csrow = %p\n", chan->csrow);
78	edac_dbg(4, "    channel->dimm = %p\n", chan->dimm);
79}
80
81static void edac_mc_dump_dimm(struct dimm_info *dimm, int number)
82{
83	char location[80];
84
85	edac_dimm_info_location(dimm, location, sizeof(location));
86
87	edac_dbg(4, "%s%i: %smapped as virtual row %d, chan %d\n",
88		 dimm->mci->csbased ? "rank" : "dimm",
89		 number, location, dimm->csrow, dimm->cschannel);
90	edac_dbg(4, "  dimm = %p\n", dimm);
91	edac_dbg(4, "  dimm->label = '%s'\n", dimm->label);
92	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
93	edac_dbg(4, "  dimm->grain = %d\n", dimm->grain);
94	edac_dbg(4, "  dimm->nr_pages = 0x%x\n", dimm->nr_pages);
95}
96
97static void edac_mc_dump_csrow(struct csrow_info *csrow)
98{
99	edac_dbg(4, "csrow->csrow_idx = %d\n", csrow->csrow_idx);
100	edac_dbg(4, "  csrow = %p\n", csrow);
101	edac_dbg(4, "  csrow->first_page = 0x%lx\n", csrow->first_page);
102	edac_dbg(4, "  csrow->last_page = 0x%lx\n", csrow->last_page);
103	edac_dbg(4, "  csrow->page_mask = 0x%lx\n", csrow->page_mask);
104	edac_dbg(4, "  csrow->nr_channels = %d\n", csrow->nr_channels);
105	edac_dbg(4, "  csrow->channels = %p\n", csrow->channels);
106	edac_dbg(4, "  csrow->mci = %p\n", csrow->mci);
107}
108
109static void edac_mc_dump_mci(struct mem_ctl_info *mci)
110{
111	edac_dbg(3, "\tmci = %p\n", mci);
112	edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap);
113	edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
114	edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap);
115	edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check);
116	edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n",
117		 mci->nr_csrows, mci->csrows);
118	edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n",
119		 mci->tot_dimms, mci->dimms);
120	edac_dbg(3, "\tdev = %p\n", mci->pdev);
121	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
122		 mci->mod_name, mci->ctl_name);
123	edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info);
124}
125
126#endif				/* CONFIG_EDAC_DEBUG */
127
128/*
129 * keep those in sync with the enum mem_type
130 */
131const char * const edac_mem_types[] = {
132	"Empty csrow",
133	"Reserved csrow type",
134	"Unknown csrow type",
135	"Fast page mode RAM",
136	"Extended data out RAM",
137	"Burst Extended data out RAM",
138	"Single data rate SDRAM",
139	"Registered single data rate SDRAM",
140	"Double data rate SDRAM",
141	"Registered Double data rate SDRAM",
142	"Rambus DRAM",
143	"Unbuffered DDR2 RAM",
144	"Fully buffered DDR2",
145	"Registered DDR2 RAM",
146	"Rambus XDR",
147	"Unbuffered DDR3 RAM",
148	"Registered DDR3 RAM",
149};
150EXPORT_SYMBOL_GPL(edac_mem_types);
151
152/**
153 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
154 * @p:		pointer to a pointer with the memory offset to be used. At
155 *		return, this will be incremented to point to the next offset
156 * @size:	Size of the data structure to be reserved
157 * @n_elems:	Number of elements that should be reserved
158 *
159 * If 'size' is a constant, the compiler will optimize this whole function
160 * down to either a no-op or the addition of a constant to the value of '*p'.
161 *
162 * The 'p' pointer is absolutely needed to keep the proper advancing
163 * further in memory to the proper offsets when allocating the struct along
164 * with its embedded structs, as edac_device_alloc_ctl_info() does it
165 * above, for example.
166 *
167 * At return, the pointer 'p' will be incremented to be used on a next call
168 * to this function.
169 */
170void *edac_align_ptr(void **p, unsigned size, int n_elems)
171{
172	unsigned align, r;
173	void *ptr = *p;
174
175	*p += size * n_elems;
176
177	/*
178	 * 'p' can possibly be an unaligned item X such that sizeof(X) is
179	 * 'size'.  Adjust 'p' so that its alignment is at least as
180	 * stringent as what the compiler would provide for X and return
181	 * the aligned result.
182	 * Here we assume that the alignment of a "long long" is the most
183	 * stringent alignment that the compiler will ever provide by default.
184	 * As far as I know, this is a reasonable assumption.
185	 */
186	if (size > sizeof(long))
187		align = sizeof(long long);
188	else if (size > sizeof(int))
189		align = sizeof(long);
190	else if (size > sizeof(short))
191		align = sizeof(int);
192	else if (size > sizeof(char))
193		align = sizeof(short);
194	else
195		return (char *)ptr;
196
197	r = (unsigned long)p % align;
198
199	if (r == 0)
200		return (char *)ptr;
201
202	*p += align - r;
203
204	return (void *)(((unsigned long)ptr) + align - r);
205}
206
207static void _edac_mc_free(struct mem_ctl_info *mci)
208{
209	int i, chn, row;
210	struct csrow_info *csr;
211	const unsigned int tot_dimms = mci->tot_dimms;
212	const unsigned int tot_channels = mci->num_cschannel;
213	const unsigned int tot_csrows = mci->nr_csrows;
214
215	if (mci->dimms) {
216		for (i = 0; i < tot_dimms; i++)
217			kfree(mci->dimms[i]);
218		kfree(mci->dimms);
219	}
220	if (mci->csrows) {
221		for (row = 0; row < tot_csrows; row++) {
222			csr = mci->csrows[row];
223			if (csr) {
224				if (csr->channels) {
225					for (chn = 0; chn < tot_channels; chn++)
226						kfree(csr->channels[chn]);
227					kfree(csr->channels);
228				}
229				kfree(csr);
230			}
231		}
232		kfree(mci->csrows);
233	}
234	kfree(mci);
235}
236
237/**
238 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
239 * @mc_num:		Memory controller number
240 * @n_layers:		Number of MC hierarchy layers
241 * layers:		Describes each layer as seen by the Memory Controller
242 * @size_pvt:		size of private storage needed
243 *
244 *
245 * Everything is kmalloc'ed as one big chunk - more efficient.
246 * Only can be used if all structures have the same lifetime - otherwise
247 * you have to allocate and initialize your own structures.
248 *
249 * Use edac_mc_free() to free mc structures allocated by this function.
250 *
251 * NOTE: drivers handle multi-rank memories in different ways: in some
252 * drivers, one multi-rank memory stick is mapped as one entry, while, in
253 * others, a single multi-rank memory stick would be mapped into several
254 * entries. Currently, this function will allocate multiple struct dimm_info
255 * on such scenarios, as grouping the multiple ranks require drivers change.
256 *
257 * Returns:
258 *	On failure: NULL
259 *	On success: struct mem_ctl_info pointer
260 */
261struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
262				   unsigned n_layers,
263				   struct edac_mc_layer *layers,
264				   unsigned sz_pvt)
265{
266	struct mem_ctl_info *mci;
267	struct edac_mc_layer *layer;
268	struct csrow_info *csr;
269	struct rank_info *chan;
270	struct dimm_info *dimm;
271	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
272	unsigned pos[EDAC_MAX_LAYERS];
273	unsigned size, tot_dimms = 1, count = 1;
274	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
275	void *pvt, *p, *ptr = NULL;
276	int i, j, row, chn, n, len, off;
277	bool per_rank = false;
278
279	BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
280	/*
281	 * Calculate the total amount of dimms and csrows/cschannels while
282	 * in the old API emulation mode
283	 */
284	for (i = 0; i < n_layers; i++) {
285		tot_dimms *= layers[i].size;
286		if (layers[i].is_virt_csrow)
287			tot_csrows *= layers[i].size;
288		else
289			tot_channels *= layers[i].size;
290
291		if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
292			per_rank = true;
293	}
294
295	/* Figure out the offsets of the various items from the start of an mc
296	 * structure.  We want the alignment of each item to be at least as
297	 * stringent as what the compiler would provide if we could simply
298	 * hardcode everything into a single struct.
299	 */
300	mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
301	layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
302	for (i = 0; i < n_layers; i++) {
303		count *= layers[i].size;
304		edac_dbg(4, "errcount layer %d size %d\n", i, count);
305		ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
306		ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
307		tot_errcount += 2 * count;
308	}
309
310	edac_dbg(4, "allocating %d error counters\n", tot_errcount);
311	pvt = edac_align_ptr(&ptr, sz_pvt, 1);
312	size = ((unsigned long)pvt) + sz_pvt;
313
314	edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
315		 size,
316		 tot_dimms,
317		 per_rank ? "ranks" : "dimms",
318		 tot_csrows * tot_channels);
319
320	mci = kzalloc(size, GFP_KERNEL);
321	if (mci == NULL)
322		return NULL;
323
324	/* Adjust pointers so they point within the memory we just allocated
325	 * rather than an imaginary chunk of memory located at address 0.
326	 */
327	layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
328	for (i = 0; i < n_layers; i++) {
329		mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
330		mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
331	}
332	pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
333
334	/* setup index and various internal pointers */
335	mci->mc_idx = mc_num;
336	mci->tot_dimms = tot_dimms;
337	mci->pvt_info = pvt;
338	mci->n_layers = n_layers;
339	mci->layers = layer;
340	memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
341	mci->nr_csrows = tot_csrows;
342	mci->num_cschannel = tot_channels;
343	mci->csbased = per_rank;
344
345	/*
346	 * Alocate and fill the csrow/channels structs
347	 */
348	mci->csrows = kcalloc(tot_csrows, sizeof(*mci->csrows), GFP_KERNEL);
349	if (!mci->csrows)
350		goto error;
351	for (row = 0; row < tot_csrows; row++) {
352		csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL);
353		if (!csr)
354			goto error;
355		mci->csrows[row] = csr;
356		csr->csrow_idx = row;
357		csr->mci = mci;
358		csr->nr_channels = tot_channels;
359		csr->channels = kcalloc(tot_channels, sizeof(*csr->channels),
360					GFP_KERNEL);
361		if (!csr->channels)
362			goto error;
363
364		for (chn = 0; chn < tot_channels; chn++) {
365			chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL);
366			if (!chan)
367				goto error;
368			csr->channels[chn] = chan;
369			chan->chan_idx = chn;
370			chan->csrow = csr;
371		}
372	}
373
374	/*
375	 * Allocate and fill the dimm structs
376	 */
377	mci->dimms  = kcalloc(tot_dimms, sizeof(*mci->dimms), GFP_KERNEL);
378	if (!mci->dimms)
379		goto error;
380
381	memset(&pos, 0, sizeof(pos));
382	row = 0;
383	chn = 0;
384	for (i = 0; i < tot_dimms; i++) {
385		chan = mci->csrows[row]->channels[chn];
386		off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]);
387		if (off < 0 || off >= tot_dimms) {
388			edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n");
389			goto error;
390		}
391
392		dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL);
393		if (!dimm)
394			goto error;
395		mci->dimms[off] = dimm;
396		dimm->mci = mci;
397
398		/*
399		 * Copy DIMM location and initialize it.
400		 */
401		len = sizeof(dimm->label);
402		p = dimm->label;
403		n = snprintf(p, len, "mc#%u", mc_num);
404		p += n;
405		len -= n;
406		for (j = 0; j < n_layers; j++) {
407			n = snprintf(p, len, "%s#%u",
408				     edac_layer_name[layers[j].type],
409				     pos[j]);
410			p += n;
411			len -= n;
412			dimm->location[j] = pos[j];
413
414			if (len <= 0)
415				break;
416		}
417
418		/* Link it to the csrows old API data */
419		chan->dimm = dimm;
420		dimm->csrow = row;
421		dimm->cschannel = chn;
422
423		/* Increment csrow location */
424		if (layers[0].is_virt_csrow) {
425			chn++;
426			if (chn == tot_channels) {
427				chn = 0;
428				row++;
429			}
430		} else {
431			row++;
432			if (row == tot_csrows) {
433				row = 0;
434				chn++;
435			}
436		}
437
438		/* Increment dimm location */
439		for (j = n_layers - 1; j >= 0; j--) {
440			pos[j]++;
441			if (pos[j] < layers[j].size)
442				break;
443			pos[j] = 0;
444		}
445	}
446
447	mci->op_state = OP_ALLOC;
448
449	return mci;
450
451error:
452	_edac_mc_free(mci);
453
454	return NULL;
455}
456EXPORT_SYMBOL_GPL(edac_mc_alloc);
457
458/**
459 * edac_mc_free
460 *	'Free' a previously allocated 'mci' structure
461 * @mci: pointer to a struct mem_ctl_info structure
462 */
463void edac_mc_free(struct mem_ctl_info *mci)
464{
465	edac_dbg(1, "\n");
466
467	/* If we're not yet registered with sysfs free only what was allocated
468	 * in edac_mc_alloc().
469	 */
470	if (!device_is_registered(&mci->dev)) {
471		_edac_mc_free(mci);
472		return;
473	}
474
475	/* the mci instance is freed here, when the sysfs object is dropped */
476	edac_unregister_sysfs(mci);
477}
478EXPORT_SYMBOL_GPL(edac_mc_free);
479
480
481/**
482 * find_mci_by_dev
483 *
484 *	scan list of controllers looking for the one that manages
485 *	the 'dev' device
486 * @dev: pointer to a struct device related with the MCI
487 */
488struct mem_ctl_info *find_mci_by_dev(struct device *dev)
489{
490	struct mem_ctl_info *mci;
491	struct list_head *item;
492
493	edac_dbg(3, "\n");
494
495	list_for_each(item, &mc_devices) {
496		mci = list_entry(item, struct mem_ctl_info, link);
497
498		if (mci->pdev == dev)
499			return mci;
500	}
501
502	return NULL;
503}
504EXPORT_SYMBOL_GPL(find_mci_by_dev);
505
506/*
507 * handler for EDAC to check if NMI type handler has asserted interrupt
508 */
509static int edac_mc_assert_error_check_and_clear(void)
510{
511	int old_state;
512
513	if (edac_op_state == EDAC_OPSTATE_POLL)
514		return 1;
515
516	old_state = edac_err_assert;
517	edac_err_assert = 0;
518
519	return old_state;
520}
521
522/*
523 * edac_mc_workq_function
524 *	performs the operation scheduled by a workq request
525 */
526static void edac_mc_workq_function(struct work_struct *work_req)
527{
528	struct delayed_work *d_work = to_delayed_work(work_req);
529	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
530
531	mutex_lock(&mem_ctls_mutex);
532
533	/* if this control struct has movd to offline state, we are done */
534	if (mci->op_state == OP_OFFLINE) {
535		mutex_unlock(&mem_ctls_mutex);
536		return;
537	}
538
539	/* Only poll controllers that are running polled and have a check */
540	if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
541		mci->edac_check(mci);
542
543	mutex_unlock(&mem_ctls_mutex);
544
545	/* Reschedule */
546	queue_delayed_work(edac_workqueue, &mci->work,
547			msecs_to_jiffies(edac_mc_get_poll_msec()));
548}
549
550/*
551 * edac_mc_workq_setup
552 *	initialize a workq item for this mci
553 *	passing in the new delay period in msec
554 *
555 *	locking model:
556 *
557 *		called with the mem_ctls_mutex held
558 */
559static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec,
560				bool init)
561{
562	edac_dbg(0, "\n");
563
564	/* if this instance is not in the POLL state, then simply return */
565	if (mci->op_state != OP_RUNNING_POLL)
566		return;
567
568	if (init)
569		INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
570
571	mod_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
572}
573
574/*
575 * edac_mc_workq_teardown
576 *	stop the workq processing on this mci
577 *
578 *	locking model:
579 *
580 *		called WITHOUT lock held
581 */
582static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
583{
584	int status;
585
586	if (mci->op_state != OP_RUNNING_POLL)
587		return;
588
589	status = cancel_delayed_work(&mci->work);
590	if (status == 0) {
591		edac_dbg(0, "not canceled, flush the queue\n");
592
593		/* workq instance might be running, wait for it */
594		flush_workqueue(edac_workqueue);
595	}
596}
597
598/*
599 * edac_mc_reset_delay_period(unsigned long value)
600 *
601 *	user space has updated our poll period value, need to
602 *	reset our workq delays
603 */
604void edac_mc_reset_delay_period(unsigned long value)
605{
606	struct mem_ctl_info *mci;
607	struct list_head *item;
608
609	mutex_lock(&mem_ctls_mutex);
610
611	list_for_each(item, &mc_devices) {
612		mci = list_entry(item, struct mem_ctl_info, link);
613
614		edac_mc_workq_setup(mci, value, false);
615	}
616
617	mutex_unlock(&mem_ctls_mutex);
618}
619
620
621
622/* Return 0 on success, 1 on failure.
623 * Before calling this function, caller must
624 * assign a unique value to mci->mc_idx.
625 *
626 *	locking model:
627 *
628 *		called with the mem_ctls_mutex lock held
629 */
630static int add_mc_to_global_list(struct mem_ctl_info *mci)
631{
632	struct list_head *item, *insert_before;
633	struct mem_ctl_info *p;
634
635	insert_before = &mc_devices;
636
637	p = find_mci_by_dev(mci->pdev);
638	if (unlikely(p != NULL))
639		goto fail0;
640
641	list_for_each(item, &mc_devices) {
642		p = list_entry(item, struct mem_ctl_info, link);
643
644		if (p->mc_idx >= mci->mc_idx) {
645			if (unlikely(p->mc_idx == mci->mc_idx))
646				goto fail1;
647
648			insert_before = item;
649			break;
650		}
651	}
652
653	list_add_tail_rcu(&mci->link, insert_before);
654	atomic_inc(&edac_handlers);
655	return 0;
656
657fail0:
658	edac_printk(KERN_WARNING, EDAC_MC,
659		"%s (%s) %s %s already assigned %d\n", dev_name(p->pdev),
660		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
661	return 1;
662
663fail1:
664	edac_printk(KERN_WARNING, EDAC_MC,
665		"bug in low-level driver: attempt to assign\n"
666		"    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
667	return 1;
668}
669
670static int del_mc_from_global_list(struct mem_ctl_info *mci)
671{
672	int handlers = atomic_dec_return(&edac_handlers);
673	list_del_rcu(&mci->link);
674
675	/* these are for safe removal of devices from global list while
676	 * NMI handlers may be traversing list
677	 */
678	synchronize_rcu();
679	INIT_LIST_HEAD(&mci->link);
680
681	return handlers;
682}
683
684/**
685 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
686 *
687 * If found, return a pointer to the structure.
688 * Else return NULL.
689 *
690 * Caller must hold mem_ctls_mutex.
691 */
692struct mem_ctl_info *edac_mc_find(int idx)
693{
694	struct list_head *item;
695	struct mem_ctl_info *mci;
696
697	list_for_each(item, &mc_devices) {
698		mci = list_entry(item, struct mem_ctl_info, link);
699
700		if (mci->mc_idx >= idx) {
701			if (mci->mc_idx == idx)
702				return mci;
703
704			break;
705		}
706	}
707
708	return NULL;
709}
710EXPORT_SYMBOL(edac_mc_find);
711
712/**
713 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
714 *                 create sysfs entries associated with mci structure
715 * @mci: pointer to the mci structure to be added to the list
716 *
717 * Return:
718 *	0	Success
719 *	!0	Failure
720 */
721
722/* FIXME - should a warning be printed if no error detection? correction? */
723int edac_mc_add_mc(struct mem_ctl_info *mci)
724{
725	int ret = -EINVAL;
726	edac_dbg(0, "\n");
727
728	if (mci->mc_idx >= EDAC_MAX_MCS) {
729		pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
730		return -ENODEV;
731	}
732
733#ifdef CONFIG_EDAC_DEBUG
734	if (edac_debug_level >= 3)
735		edac_mc_dump_mci(mci);
736
737	if (edac_debug_level >= 4) {
738		int i;
739
740		for (i = 0; i < mci->nr_csrows; i++) {
741			struct csrow_info *csrow = mci->csrows[i];
742			u32 nr_pages = 0;
743			int j;
744
745			for (j = 0; j < csrow->nr_channels; j++)
746				nr_pages += csrow->channels[j]->dimm->nr_pages;
747			if (!nr_pages)
748				continue;
749			edac_mc_dump_csrow(csrow);
750			for (j = 0; j < csrow->nr_channels; j++)
751				if (csrow->channels[j]->dimm->nr_pages)
752					edac_mc_dump_channel(csrow->channels[j]);
753		}
754		for (i = 0; i < mci->tot_dimms; i++)
755			if (mci->dimms[i]->nr_pages)
756				edac_mc_dump_dimm(mci->dimms[i], i);
757	}
758#endif
759	mutex_lock(&mem_ctls_mutex);
760
761	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
762		ret = -EPERM;
763		goto fail0;
764	}
765
766	if (add_mc_to_global_list(mci))
767		goto fail0;
768
769	/* set load time so that error rate can be tracked */
770	mci->start_time = jiffies;
771
772	mci->bus = &mc_bus[mci->mc_idx];
773
774	if (edac_create_sysfs_mci_device(mci)) {
775		edac_mc_printk(mci, KERN_WARNING,
776			"failed to create sysfs device\n");
777		goto fail1;
778	}
779
780	/* If there IS a check routine, then we are running POLLED */
781	if (mci->edac_check != NULL) {
782		/* This instance is NOW RUNNING */
783		mci->op_state = OP_RUNNING_POLL;
784
785		edac_mc_workq_setup(mci, edac_mc_get_poll_msec(), true);
786	} else {
787		mci->op_state = OP_RUNNING_INTERRUPT;
788	}
789
790	/* Report action taken */
791	edac_mc_printk(mci, KERN_INFO,
792		"Giving out device to module %s controller %s: DEV %s (%s)\n",
793		mci->mod_name, mci->ctl_name, mci->dev_name,
794		edac_op_state_to_string(mci->op_state));
795
796	edac_mc_owner = mci->mod_name;
797
798	mutex_unlock(&mem_ctls_mutex);
799	return 0;
800
801fail1:
802	del_mc_from_global_list(mci);
803
804fail0:
805	mutex_unlock(&mem_ctls_mutex);
806	return ret;
807}
808EXPORT_SYMBOL_GPL(edac_mc_add_mc);
809
810/**
811 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
812 *                 remove mci structure from global list
813 * @pdev: Pointer to 'struct device' representing mci structure to remove.
814 *
815 * Return pointer to removed mci structure, or NULL if device not found.
816 */
817struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
818{
819	struct mem_ctl_info *mci;
820
821	edac_dbg(0, "\n");
822
823	mutex_lock(&mem_ctls_mutex);
824
825	/* find the requested mci struct in the global list */
826	mci = find_mci_by_dev(dev);
827	if (mci == NULL) {
828		mutex_unlock(&mem_ctls_mutex);
829		return NULL;
830	}
831
832	if (!del_mc_from_global_list(mci))
833		edac_mc_owner = NULL;
834	mutex_unlock(&mem_ctls_mutex);
835
836	/* flush workq processes */
837	edac_mc_workq_teardown(mci);
838
839	/* marking MCI offline */
840	mci->op_state = OP_OFFLINE;
841
842	/* remove from sysfs */
843	edac_remove_sysfs_mci_device(mci);
844
845	edac_printk(KERN_INFO, EDAC_MC,
846		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
847		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
848
849	return mci;
850}
851EXPORT_SYMBOL_GPL(edac_mc_del_mc);
852
853static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
854				u32 size)
855{
856	struct page *pg;
857	void *virt_addr;
858	unsigned long flags = 0;
859
860	edac_dbg(3, "\n");
861
862	/* ECC error page was not in our memory. Ignore it. */
863	if (!pfn_valid(page))
864		return;
865
866	/* Find the actual page structure then map it and fix */
867	pg = pfn_to_page(page);
868
869	if (PageHighMem(pg))
870		local_irq_save(flags);
871
872	virt_addr = kmap_atomic(pg);
873
874	/* Perform architecture specific atomic scrub operation */
875	atomic_scrub(virt_addr + offset, size);
876
877	/* Unmap and complete */
878	kunmap_atomic(virt_addr);
879
880	if (PageHighMem(pg))
881		local_irq_restore(flags);
882}
883
884/* FIXME - should return -1 */
885int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
886{
887	struct csrow_info **csrows = mci->csrows;
888	int row, i, j, n;
889
890	edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page);
891	row = -1;
892
893	for (i = 0; i < mci->nr_csrows; i++) {
894		struct csrow_info *csrow = csrows[i];
895		n = 0;
896		for (j = 0; j < csrow->nr_channels; j++) {
897			struct dimm_info *dimm = csrow->channels[j]->dimm;
898			n += dimm->nr_pages;
899		}
900		if (n == 0)
901			continue;
902
903		edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n",
904			 mci->mc_idx,
905			 csrow->first_page, page, csrow->last_page,
906			 csrow->page_mask);
907
908		if ((page >= csrow->first_page) &&
909		    (page <= csrow->last_page) &&
910		    ((page & csrow->page_mask) ==
911		     (csrow->first_page & csrow->page_mask))) {
912			row = i;
913			break;
914		}
915	}
916
917	if (row == -1)
918		edac_mc_printk(mci, KERN_ERR,
919			"could not look up page error address %lx\n",
920			(unsigned long)page);
921
922	return row;
923}
924EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
925
926const char *edac_layer_name[] = {
927	[EDAC_MC_LAYER_BRANCH] = "branch",
928	[EDAC_MC_LAYER_CHANNEL] = "channel",
929	[EDAC_MC_LAYER_SLOT] = "slot",
930	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
931	[EDAC_MC_LAYER_ALL_MEM] = "memory",
932};
933EXPORT_SYMBOL_GPL(edac_layer_name);
934
935static void edac_inc_ce_error(struct mem_ctl_info *mci,
936			      bool enable_per_layer_report,
937			      const int pos[EDAC_MAX_LAYERS],
938			      const u16 count)
939{
940	int i, index = 0;
941
942	mci->ce_mc += count;
943
944	if (!enable_per_layer_report) {
945		mci->ce_noinfo_count += count;
946		return;
947	}
948
949	for (i = 0; i < mci->n_layers; i++) {
950		if (pos[i] < 0)
951			break;
952		index += pos[i];
953		mci->ce_per_layer[i][index] += count;
954
955		if (i < mci->n_layers - 1)
956			index *= mci->layers[i + 1].size;
957	}
958}
959
960static void edac_inc_ue_error(struct mem_ctl_info *mci,
961				    bool enable_per_layer_report,
962				    const int pos[EDAC_MAX_LAYERS],
963				    const u16 count)
964{
965	int i, index = 0;
966
967	mci->ue_mc += count;
968
969	if (!enable_per_layer_report) {
970		mci->ce_noinfo_count += count;
971		return;
972	}
973
974	for (i = 0; i < mci->n_layers; i++) {
975		if (pos[i] < 0)
976			break;
977		index += pos[i];
978		mci->ue_per_layer[i][index] += count;
979
980		if (i < mci->n_layers - 1)
981			index *= mci->layers[i + 1].size;
982	}
983}
984
985static void edac_ce_error(struct mem_ctl_info *mci,
986			  const u16 error_count,
987			  const int pos[EDAC_MAX_LAYERS],
988			  const char *msg,
989			  const char *location,
990			  const char *label,
991			  const char *detail,
992			  const char *other_detail,
993			  const bool enable_per_layer_report,
994			  const unsigned long page_frame_number,
995			  const unsigned long offset_in_page,
996			  long grain)
997{
998	unsigned long remapped_page;
999	char *msg_aux = "";
1000
1001	if (*msg)
1002		msg_aux = " ";
1003
1004	if (edac_mc_get_log_ce()) {
1005		if (other_detail && *other_detail)
1006			edac_mc_printk(mci, KERN_WARNING,
1007				       "%d CE %s%son %s (%s %s - %s)\n",
1008				       error_count, msg, msg_aux, label,
1009				       location, detail, other_detail);
1010		else
1011			edac_mc_printk(mci, KERN_WARNING,
1012				       "%d CE %s%son %s (%s %s)\n",
1013				       error_count, msg, msg_aux, label,
1014				       location, detail);
1015	}
1016	edac_inc_ce_error(mci, enable_per_layer_report, pos, error_count);
1017
1018	if (mci->scrub_mode == SCRUB_SW_SRC) {
1019		/*
1020			* Some memory controllers (called MCs below) can remap
1021			* memory so that it is still available at a different
1022			* address when PCI devices map into memory.
1023			* MC's that can't do this, lose the memory where PCI
1024			* devices are mapped. This mapping is MC-dependent
1025			* and so we call back into the MC driver for it to
1026			* map the MC page to a physical (CPU) page which can
1027			* then be mapped to a virtual page - which can then
1028			* be scrubbed.
1029			*/
1030		remapped_page = mci->ctl_page_to_phys ?
1031			mci->ctl_page_to_phys(mci, page_frame_number) :
1032			page_frame_number;
1033
1034		edac_mc_scrub_block(remapped_page,
1035					offset_in_page, grain);
1036	}
1037}
1038
1039static void edac_ue_error(struct mem_ctl_info *mci,
1040			  const u16 error_count,
1041			  const int pos[EDAC_MAX_LAYERS],
1042			  const char *msg,
1043			  const char *location,
1044			  const char *label,
1045			  const char *detail,
1046			  const char *other_detail,
1047			  const bool enable_per_layer_report)
1048{
1049	char *msg_aux = "";
1050
1051	if (*msg)
1052		msg_aux = " ";
1053
1054	if (edac_mc_get_log_ue()) {
1055		if (other_detail && *other_detail)
1056			edac_mc_printk(mci, KERN_WARNING,
1057				       "%d UE %s%son %s (%s %s - %s)\n",
1058				       error_count, msg, msg_aux, label,
1059				       location, detail, other_detail);
1060		else
1061			edac_mc_printk(mci, KERN_WARNING,
1062				       "%d UE %s%son %s (%s %s)\n",
1063				       error_count, msg, msg_aux, label,
1064				       location, detail);
1065	}
1066
1067	if (edac_mc_get_panic_on_ue()) {
1068		if (other_detail && *other_detail)
1069			panic("UE %s%son %s (%s%s - %s)\n",
1070			      msg, msg_aux, label, location, detail, other_detail);
1071		else
1072			panic("UE %s%son %s (%s%s)\n",
1073			      msg, msg_aux, label, location, detail);
1074	}
1075
1076	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
1077}
1078
1079/**
1080 * edac_raw_mc_handle_error - reports a memory event to userspace without doing
1081 *			      anything to discover the error location
1082 *
1083 * @type:		severity of the error (CE/UE/Fatal)
1084 * @mci:		a struct mem_ctl_info pointer
1085 * @e:			error description
1086 *
1087 * This raw function is used internally by edac_mc_handle_error(). It should
1088 * only be called directly when the hardware error come directly from BIOS,
1089 * like in the case of APEI GHES driver.
1090 */
1091void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
1092			      struct mem_ctl_info *mci,
1093			      struct edac_raw_error_desc *e)
1094{
1095	char detail[80];
1096	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
1097
1098	/* Memory type dependent details about the error */
1099	if (type == HW_EVENT_ERR_CORRECTED) {
1100		snprintf(detail, sizeof(detail),
1101			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
1102			e->page_frame_number, e->offset_in_page,
1103			e->grain, e->syndrome);
1104		edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1105			      detail, e->other_detail, e->enable_per_layer_report,
1106			      e->page_frame_number, e->offset_in_page, e->grain);
1107	} else {
1108		snprintf(detail, sizeof(detail),
1109			"page:0x%lx offset:0x%lx grain:%ld",
1110			e->page_frame_number, e->offset_in_page, e->grain);
1111
1112		edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
1113			      detail, e->other_detail, e->enable_per_layer_report);
1114	}
1115
1116
1117}
1118EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
1119
1120/**
1121 * edac_mc_handle_error - reports a memory event to userspace
1122 *
1123 * @type:		severity of the error (CE/UE/Fatal)
1124 * @mci:		a struct mem_ctl_info pointer
1125 * @error_count:	Number of errors of the same type
1126 * @page_frame_number:	mem page where the error occurred
1127 * @offset_in_page:	offset of the error inside the page
1128 * @syndrome:		ECC syndrome
1129 * @top_layer:		Memory layer[0] position
1130 * @mid_layer:		Memory layer[1] position
1131 * @low_layer:		Memory layer[2] position
1132 * @msg:		Message meaningful to the end users that
1133 *			explains the event
1134 * @other_detail:	Technical details about the event that
1135 *			may help hardware manufacturers and
1136 *			EDAC developers to analyse the event
1137 */
1138void edac_mc_handle_error(const enum hw_event_mc_err_type type,
1139			  struct mem_ctl_info *mci,
1140			  const u16 error_count,
1141			  const unsigned long page_frame_number,
1142			  const unsigned long offset_in_page,
1143			  const unsigned long syndrome,
1144			  const int top_layer,
1145			  const int mid_layer,
1146			  const int low_layer,
1147			  const char *msg,
1148			  const char *other_detail)
1149{
1150	char *p;
1151	int row = -1, chan = -1;
1152	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
1153	int i, n_labels = 0;
1154	u8 grain_bits;
1155	struct edac_raw_error_desc *e = &mci->error_desc;
1156
1157	edac_dbg(3, "MC%d\n", mci->mc_idx);
1158
1159	/* Fills the error report buffer */
1160	memset(e, 0, sizeof (*e));
1161	e->error_count = error_count;
1162	e->top_layer = top_layer;
1163	e->mid_layer = mid_layer;
1164	e->low_layer = low_layer;
1165	e->page_frame_number = page_frame_number;
1166	e->offset_in_page = offset_in_page;
1167	e->syndrome = syndrome;
1168	e->msg = msg;
1169	e->other_detail = other_detail;
1170
1171	/*
1172	 * Check if the event report is consistent and if the memory
1173	 * location is known. If it is known, enable_per_layer_report will be
1174	 * true, the DIMM(s) label info will be filled and the per-layer
1175	 * error counters will be incremented.
1176	 */
1177	for (i = 0; i < mci->n_layers; i++) {
1178		if (pos[i] >= (int)mci->layers[i].size) {
1179
1180			edac_mc_printk(mci, KERN_ERR,
1181				       "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1182				       edac_layer_name[mci->layers[i].type],
1183				       pos[i], mci->layers[i].size);
1184			/*
1185			 * Instead of just returning it, let's use what's
1186			 * known about the error. The increment routines and
1187			 * the DIMM filter logic will do the right thing by
1188			 * pointing the likely damaged DIMMs.
1189			 */
1190			pos[i] = -1;
1191		}
1192		if (pos[i] >= 0)
1193			e->enable_per_layer_report = true;
1194	}
1195
1196	/*
1197	 * Get the dimm label/grain that applies to the match criteria.
1198	 * As the error algorithm may not be able to point to just one memory
1199	 * stick, the logic here will get all possible labels that could
1200	 * pottentially be affected by the error.
1201	 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1202	 * to have only the MC channel and the MC dimm (also called "branch")
1203	 * but the channel is not known, as the memory is arranged in pairs,
1204	 * where each memory belongs to a separate channel within the same
1205	 * branch.
1206	 */
1207	p = e->label;
1208	*p = '\0';
1209
1210	for (i = 0; i < mci->tot_dimms; i++) {
1211		struct dimm_info *dimm = mci->dimms[i];
1212
1213		if (top_layer >= 0 && top_layer != dimm->location[0])
1214			continue;
1215		if (mid_layer >= 0 && mid_layer != dimm->location[1])
1216			continue;
1217		if (low_layer >= 0 && low_layer != dimm->location[2])
1218			continue;
1219
1220		/* get the max grain, over the error match range */
1221		if (dimm->grain > e->grain)
1222			e->grain = dimm->grain;
1223
1224		/*
1225		 * If the error is memory-controller wide, there's no need to
1226		 * seek for the affected DIMMs because the whole
1227		 * channel/memory controller/...  may be affected.
1228		 * Also, don't show errors for empty DIMM slots.
1229		 */
1230		if (e->enable_per_layer_report && dimm->nr_pages) {
1231			if (n_labels >= EDAC_MAX_LABELS) {
1232				e->enable_per_layer_report = false;
1233				break;
1234			}
1235			n_labels++;
1236			if (p != e->label) {
1237				strcpy(p, OTHER_LABEL);
1238				p += strlen(OTHER_LABEL);
1239			}
1240			strcpy(p, dimm->label);
1241			p += strlen(p);
1242			*p = '\0';
1243
1244			/*
1245			 * get csrow/channel of the DIMM, in order to allow
1246			 * incrementing the compat API counters
1247			 */
1248			edac_dbg(4, "%s csrows map: (%d,%d)\n",
1249				 mci->csbased ? "rank" : "dimm",
1250				 dimm->csrow, dimm->cschannel);
1251			if (row == -1)
1252				row = dimm->csrow;
1253			else if (row >= 0 && row != dimm->csrow)
1254				row = -2;
1255
1256			if (chan == -1)
1257				chan = dimm->cschannel;
1258			else if (chan >= 0 && chan != dimm->cschannel)
1259				chan = -2;
1260		}
1261	}
1262
1263	if (!e->enable_per_layer_report) {
1264		strcpy(e->label, "any memory");
1265	} else {
1266		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
1267		if (p == e->label)
1268			strcpy(e->label, "unknown memory");
1269		if (type == HW_EVENT_ERR_CORRECTED) {
1270			if (row >= 0) {
1271				mci->csrows[row]->ce_count += error_count;
1272				if (chan >= 0)
1273					mci->csrows[row]->channels[chan]->ce_count += error_count;
1274			}
1275		} else
1276			if (row >= 0)
1277				mci->csrows[row]->ue_count += error_count;
1278	}
1279
1280	/* Fill the RAM location data */
1281	p = e->location;
1282
1283	for (i = 0; i < mci->n_layers; i++) {
1284		if (pos[i] < 0)
1285			continue;
1286
1287		p += sprintf(p, "%s:%d ",
1288			     edac_layer_name[mci->layers[i].type],
1289			     pos[i]);
1290	}
1291	if (p > e->location)
1292		*(p - 1) = '\0';
1293
1294	/* Report the error via the trace interface */
1295	grain_bits = fls_long(e->grain) + 1;
1296	trace_mc_event(type, e->msg, e->label, e->error_count,
1297		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
1298		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
1299		       grain_bits, e->syndrome, e->other_detail);
1300
1301	edac_raw_mc_handle_error(type, mci, e);
1302}
1303EXPORT_SYMBOL_GPL(edac_mc_handle_error);
1304