edac_mc.c revision 956b9ba156dbfdb9cede2b2927ddf8be2233b3a7
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/ctype.h> 29#include <linux/edac.h> 30#include <linux/bitops.h> 31#include <asm/uaccess.h> 32#include <asm/page.h> 33#include <asm/edac.h> 34#include "edac_core.h" 35#include "edac_module.h" 36 37#define CREATE_TRACE_POINTS 38#define TRACE_INCLUDE_PATH ../../include/ras 39#include <ras/ras_event.h> 40 41/* lock to memory controller's control array */ 42static DEFINE_MUTEX(mem_ctls_mutex); 43static LIST_HEAD(mc_devices); 44 45#ifdef CONFIG_EDAC_DEBUG 46 47static void edac_mc_dump_channel(struct rank_info *chan) 48{ 49 edac_dbg(4, "\tchannel = %p\n", chan); 50 edac_dbg(4, "\tchannel->chan_idx = %d\n", chan->chan_idx); 51 edac_dbg(4, "\tchannel->csrow = %p\n", chan->csrow); 52 edac_dbg(4, "\tchannel->dimm = %p\n", chan->dimm); 53} 54 55static void edac_mc_dump_dimm(struct dimm_info *dimm) 56{ 57 int i; 58 59 edac_dbg(4, "\tdimm = %p\n", dimm); 60 edac_dbg(4, "\tdimm->label = '%s'\n", dimm->label); 61 edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 62 edac_dbg(4, "\tdimm location "); 63 for (i = 0; i < dimm->mci->n_layers; i++) { 64 printk(KERN_CONT "%d", dimm->location[i]); 65 if (i < dimm->mci->n_layers - 1) 66 printk(KERN_CONT "."); 67 } 68 printk(KERN_CONT "\n"); 69 edac_dbg(4, "\tdimm->grain = %d\n", dimm->grain); 70 edac_dbg(4, "\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 71} 72 73static void edac_mc_dump_csrow(struct csrow_info *csrow) 74{ 75 edac_dbg(4, "\tcsrow = %p\n", csrow); 76 edac_dbg(4, "\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 77 edac_dbg(4, "\tcsrow->first_page = 0x%lx\n", csrow->first_page); 78 edac_dbg(4, "\tcsrow->last_page = 0x%lx\n", csrow->last_page); 79 edac_dbg(4, "\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 80 edac_dbg(4, "\tcsrow->nr_channels = %d\n", csrow->nr_channels); 81 edac_dbg(4, "\tcsrow->channels = %p\n", csrow->channels); 82 edac_dbg(4, "\tcsrow->mci = %p\n", csrow->mci); 83} 84 85static void edac_mc_dump_mci(struct mem_ctl_info *mci) 86{ 87 edac_dbg(3, "\tmci = %p\n", mci); 88 edac_dbg(3, "\tmci->mtype_cap = %lx\n", mci->mtype_cap); 89 edac_dbg(3, "\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 90 edac_dbg(3, "\tmci->edac_cap = %lx\n", mci->edac_cap); 91 edac_dbg(4, "\tmci->edac_check = %p\n", mci->edac_check); 92 edac_dbg(3, "\tmci->nr_csrows = %d, csrows = %p\n", 93 mci->nr_csrows, mci->csrows); 94 edac_dbg(3, "\tmci->nr_dimms = %d, dimms = %p\n", 95 mci->tot_dimms, mci->dimms); 96 edac_dbg(3, "\tdev = %p\n", mci->pdev); 97 edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n", 98 mci->mod_name, mci->ctl_name); 99 edac_dbg(3, "\tpvt_info = %p\n\n", mci->pvt_info); 100} 101 102#endif /* CONFIG_EDAC_DEBUG */ 103 104/* 105 * keep those in sync with the enum mem_type 106 */ 107const char *edac_mem_types[] = { 108 "Empty csrow", 109 "Reserved csrow type", 110 "Unknown csrow type", 111 "Fast page mode RAM", 112 "Extended data out RAM", 113 "Burst Extended data out RAM", 114 "Single data rate SDRAM", 115 "Registered single data rate SDRAM", 116 "Double data rate SDRAM", 117 "Registered Double data rate SDRAM", 118 "Rambus DRAM", 119 "Unbuffered DDR2 RAM", 120 "Fully buffered DDR2", 121 "Registered DDR2 RAM", 122 "Rambus XDR", 123 "Unbuffered DDR3 RAM", 124 "Registered DDR3 RAM", 125}; 126EXPORT_SYMBOL_GPL(edac_mem_types); 127 128/** 129 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation 130 * @p: pointer to a pointer with the memory offset to be used. At 131 * return, this will be incremented to point to the next offset 132 * @size: Size of the data structure to be reserved 133 * @n_elems: Number of elements that should be reserved 134 * 135 * If 'size' is a constant, the compiler will optimize this whole function 136 * down to either a no-op or the addition of a constant to the value of '*p'. 137 * 138 * The 'p' pointer is absolutely needed to keep the proper advancing 139 * further in memory to the proper offsets when allocating the struct along 140 * with its embedded structs, as edac_device_alloc_ctl_info() does it 141 * above, for example. 142 * 143 * At return, the pointer 'p' will be incremented to be used on a next call 144 * to this function. 145 */ 146void *edac_align_ptr(void **p, unsigned size, int n_elems) 147{ 148 unsigned align, r; 149 void *ptr = *p; 150 151 *p += size * n_elems; 152 153 /* 154 * 'p' can possibly be an unaligned item X such that sizeof(X) is 155 * 'size'. Adjust 'p' so that its alignment is at least as 156 * stringent as what the compiler would provide for X and return 157 * the aligned result. 158 * Here we assume that the alignment of a "long long" is the most 159 * stringent alignment that the compiler will ever provide by default. 160 * As far as I know, this is a reasonable assumption. 161 */ 162 if (size > sizeof(long)) 163 align = sizeof(long long); 164 else if (size > sizeof(int)) 165 align = sizeof(long); 166 else if (size > sizeof(short)) 167 align = sizeof(int); 168 else if (size > sizeof(char)) 169 align = sizeof(short); 170 else 171 return (char *)ptr; 172 173 r = size % align; 174 175 if (r == 0) 176 return (char *)ptr; 177 178 *p += align - r; 179 180 return (void *)(((unsigned long)ptr) + align - r); 181} 182 183/** 184 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure 185 * @mc_num: Memory controller number 186 * @n_layers: Number of MC hierarchy layers 187 * layers: Describes each layer as seen by the Memory Controller 188 * @size_pvt: size of private storage needed 189 * 190 * 191 * Everything is kmalloc'ed as one big chunk - more efficient. 192 * Only can be used if all structures have the same lifetime - otherwise 193 * you have to allocate and initialize your own structures. 194 * 195 * Use edac_mc_free() to free mc structures allocated by this function. 196 * 197 * NOTE: drivers handle multi-rank memories in different ways: in some 198 * drivers, one multi-rank memory stick is mapped as one entry, while, in 199 * others, a single multi-rank memory stick would be mapped into several 200 * entries. Currently, this function will allocate multiple struct dimm_info 201 * on such scenarios, as grouping the multiple ranks require drivers change. 202 * 203 * Returns: 204 * On failure: NULL 205 * On success: struct mem_ctl_info pointer 206 */ 207struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, 208 unsigned n_layers, 209 struct edac_mc_layer *layers, 210 unsigned sz_pvt) 211{ 212 struct mem_ctl_info *mci; 213 struct edac_mc_layer *layer; 214 struct csrow_info *csr; 215 struct rank_info *chan; 216 struct dimm_info *dimm; 217 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 218 unsigned pos[EDAC_MAX_LAYERS]; 219 unsigned size, tot_dimms = 1, count = 1; 220 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; 221 void *pvt, *p, *ptr = NULL; 222 int i, j, row, chn, n, len, off; 223 bool per_rank = false; 224 225 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); 226 /* 227 * Calculate the total amount of dimms and csrows/cschannels while 228 * in the old API emulation mode 229 */ 230 for (i = 0; i < n_layers; i++) { 231 tot_dimms *= layers[i].size; 232 if (layers[i].is_virt_csrow) 233 tot_csrows *= layers[i].size; 234 else 235 tot_channels *= layers[i].size; 236 237 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) 238 per_rank = true; 239 } 240 241 /* Figure out the offsets of the various items from the start of an mc 242 * structure. We want the alignment of each item to be at least as 243 * stringent as what the compiler would provide if we could simply 244 * hardcode everything into a single struct. 245 */ 246 mci = edac_align_ptr(&ptr, sizeof(*mci), 1); 247 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); 248 for (i = 0; i < n_layers; i++) { 249 count *= layers[i].size; 250 edac_dbg(4, "errcount layer %d size %d\n", i, count); 251 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 252 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 253 tot_errcount += 2 * count; 254 } 255 256 edac_dbg(4, "allocating %d error counters\n", tot_errcount); 257 pvt = edac_align_ptr(&ptr, sz_pvt, 1); 258 size = ((unsigned long)pvt) + sz_pvt; 259 260 edac_dbg(1, "allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", 261 size, 262 tot_dimms, 263 per_rank ? "ranks" : "dimms", 264 tot_csrows * tot_channels); 265 266 mci = kzalloc(size, GFP_KERNEL); 267 if (mci == NULL) 268 return NULL; 269 270 /* Adjust pointers so they point within the memory we just allocated 271 * rather than an imaginary chunk of memory located at address 0. 272 */ 273 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); 274 for (i = 0; i < n_layers; i++) { 275 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); 276 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); 277 } 278 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 279 280 /* setup index and various internal pointers */ 281 mci->mc_idx = mc_num; 282 mci->tot_dimms = tot_dimms; 283 mci->pvt_info = pvt; 284 mci->n_layers = n_layers; 285 mci->layers = layer; 286 memcpy(mci->layers, layers, sizeof(*layer) * n_layers); 287 mci->nr_csrows = tot_csrows; 288 mci->num_cschannel = tot_channels; 289 mci->mem_is_per_rank = per_rank; 290 291 /* 292 * Alocate and fill the csrow/channels structs 293 */ 294 mci->csrows = kcalloc(sizeof(*mci->csrows), tot_csrows, GFP_KERNEL); 295 if (!mci->csrows) 296 goto error; 297 for (row = 0; row < tot_csrows; row++) { 298 csr = kzalloc(sizeof(**mci->csrows), GFP_KERNEL); 299 if (!csr) 300 goto error; 301 mci->csrows[row] = csr; 302 csr->csrow_idx = row; 303 csr->mci = mci; 304 csr->nr_channels = tot_channels; 305 csr->channels = kcalloc(sizeof(*csr->channels), tot_channels, 306 GFP_KERNEL); 307 if (!csr->channels) 308 goto error; 309 310 for (chn = 0; chn < tot_channels; chn++) { 311 chan = kzalloc(sizeof(**csr->channels), GFP_KERNEL); 312 if (!chan) 313 goto error; 314 csr->channels[chn] = chan; 315 chan->chan_idx = chn; 316 chan->csrow = csr; 317 } 318 } 319 320 /* 321 * Allocate and fill the dimm structs 322 */ 323 mci->dimms = kcalloc(sizeof(*mci->dimms), tot_dimms, GFP_KERNEL); 324 if (!mci->dimms) 325 goto error; 326 327 memset(&pos, 0, sizeof(pos)); 328 row = 0; 329 chn = 0; 330 edac_dbg(4, "initializing %d %s\n", 331 tot_dimms, per_rank ? "ranks" : "dimms"); 332 for (i = 0; i < tot_dimms; i++) { 333 chan = mci->csrows[row]->channels[chn]; 334 off = EDAC_DIMM_OFF(layer, n_layers, pos[0], pos[1], pos[2]); 335 if (off < 0 || off >= tot_dimms) { 336 edac_mc_printk(mci, KERN_ERR, "EDAC core bug: EDAC_DIMM_OFF is trying to do an illegal data access\n"); 337 goto error; 338 } 339 340 dimm = kzalloc(sizeof(**mci->dimms), GFP_KERNEL); 341 mci->dimms[off] = dimm; 342 dimm->mci = mci; 343 344 edac_dbg(2, "%d: %s%i (%d:%d:%d): row %d, chan %d\n", 345 i, per_rank ? "rank" : "dimm", off, 346 pos[0], pos[1], pos[2], row, chn); 347 348 /* 349 * Copy DIMM location and initialize it. 350 */ 351 len = sizeof(dimm->label); 352 p = dimm->label; 353 n = snprintf(p, len, "mc#%u", mc_num); 354 p += n; 355 len -= n; 356 for (j = 0; j < n_layers; j++) { 357 n = snprintf(p, len, "%s#%u", 358 edac_layer_name[layers[j].type], 359 pos[j]); 360 p += n; 361 len -= n; 362 dimm->location[j] = pos[j]; 363 364 if (len <= 0) 365 break; 366 } 367 368 /* Link it to the csrows old API data */ 369 chan->dimm = dimm; 370 dimm->csrow = row; 371 dimm->cschannel = chn; 372 373 /* Increment csrow location */ 374 row++; 375 if (row == tot_csrows) { 376 row = 0; 377 chn++; 378 } 379 380 /* Increment dimm location */ 381 for (j = n_layers - 1; j >= 0; j--) { 382 pos[j]++; 383 if (pos[j] < layers[j].size) 384 break; 385 pos[j] = 0; 386 } 387 } 388 389 mci->op_state = OP_ALLOC; 390 391 /* at this point, the root kobj is valid, and in order to 392 * 'free' the object, then the function: 393 * edac_mc_unregister_sysfs_main_kobj() must be called 394 * which will perform kobj unregistration and the actual free 395 * will occur during the kobject callback operation 396 */ 397 398 return mci; 399 400error: 401 if (mci->dimms) { 402 for (i = 0; i < tot_dimms; i++) 403 kfree(mci->dimms[i]); 404 kfree(mci->dimms); 405 } 406 if (mci->csrows) { 407 for (chn = 0; chn < tot_channels; chn++) { 408 csr = mci->csrows[chn]; 409 if (csr) { 410 for (chn = 0; chn < tot_channels; chn++) 411 kfree(csr->channels[chn]); 412 kfree(csr); 413 } 414 kfree(mci->csrows[i]); 415 } 416 kfree(mci->csrows); 417 } 418 kfree(mci); 419 420 return NULL; 421} 422EXPORT_SYMBOL_GPL(edac_mc_alloc); 423 424/** 425 * edac_mc_free 426 * 'Free' a previously allocated 'mci' structure 427 * @mci: pointer to a struct mem_ctl_info structure 428 */ 429void edac_mc_free(struct mem_ctl_info *mci) 430{ 431 edac_dbg(1, "\n"); 432 433 /* the mci instance is freed here, when the sysfs object is dropped */ 434 edac_unregister_sysfs(mci); 435} 436EXPORT_SYMBOL_GPL(edac_mc_free); 437 438 439/** 440 * find_mci_by_dev 441 * 442 * scan list of controllers looking for the one that manages 443 * the 'dev' device 444 * @dev: pointer to a struct device related with the MCI 445 */ 446struct mem_ctl_info *find_mci_by_dev(struct device *dev) 447{ 448 struct mem_ctl_info *mci; 449 struct list_head *item; 450 451 edac_dbg(3, "\n"); 452 453 list_for_each(item, &mc_devices) { 454 mci = list_entry(item, struct mem_ctl_info, link); 455 456 if (mci->pdev == dev) 457 return mci; 458 } 459 460 return NULL; 461} 462EXPORT_SYMBOL_GPL(find_mci_by_dev); 463 464/* 465 * handler for EDAC to check if NMI type handler has asserted interrupt 466 */ 467static int edac_mc_assert_error_check_and_clear(void) 468{ 469 int old_state; 470 471 if (edac_op_state == EDAC_OPSTATE_POLL) 472 return 1; 473 474 old_state = edac_err_assert; 475 edac_err_assert = 0; 476 477 return old_state; 478} 479 480/* 481 * edac_mc_workq_function 482 * performs the operation scheduled by a workq request 483 */ 484static void edac_mc_workq_function(struct work_struct *work_req) 485{ 486 struct delayed_work *d_work = to_delayed_work(work_req); 487 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 488 489 mutex_lock(&mem_ctls_mutex); 490 491 /* if this control struct has movd to offline state, we are done */ 492 if (mci->op_state == OP_OFFLINE) { 493 mutex_unlock(&mem_ctls_mutex); 494 return; 495 } 496 497 /* Only poll controllers that are running polled and have a check */ 498 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 499 mci->edac_check(mci); 500 501 mutex_unlock(&mem_ctls_mutex); 502 503 /* Reschedule */ 504 queue_delayed_work(edac_workqueue, &mci->work, 505 msecs_to_jiffies(edac_mc_get_poll_msec())); 506} 507 508/* 509 * edac_mc_workq_setup 510 * initialize a workq item for this mci 511 * passing in the new delay period in msec 512 * 513 * locking model: 514 * 515 * called with the mem_ctls_mutex held 516 */ 517static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 518{ 519 edac_dbg(0, "\n"); 520 521 /* if this instance is not in the POLL state, then simply return */ 522 if (mci->op_state != OP_RUNNING_POLL) 523 return; 524 525 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 526 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 527} 528 529/* 530 * edac_mc_workq_teardown 531 * stop the workq processing on this mci 532 * 533 * locking model: 534 * 535 * called WITHOUT lock held 536 */ 537static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 538{ 539 int status; 540 541 if (mci->op_state != OP_RUNNING_POLL) 542 return; 543 544 status = cancel_delayed_work(&mci->work); 545 if (status == 0) { 546 edac_dbg(0, "not canceled, flush the queue\n"); 547 548 /* workq instance might be running, wait for it */ 549 flush_workqueue(edac_workqueue); 550 } 551} 552 553/* 554 * edac_mc_reset_delay_period(unsigned long value) 555 * 556 * user space has updated our poll period value, need to 557 * reset our workq delays 558 */ 559void edac_mc_reset_delay_period(int value) 560{ 561 struct mem_ctl_info *mci; 562 struct list_head *item; 563 564 mutex_lock(&mem_ctls_mutex); 565 566 /* scan the list and turn off all workq timers, doing so under lock 567 */ 568 list_for_each(item, &mc_devices) { 569 mci = list_entry(item, struct mem_ctl_info, link); 570 571 if (mci->op_state == OP_RUNNING_POLL) 572 cancel_delayed_work(&mci->work); 573 } 574 575 mutex_unlock(&mem_ctls_mutex); 576 577 578 /* re-walk the list, and reset the poll delay */ 579 mutex_lock(&mem_ctls_mutex); 580 581 list_for_each(item, &mc_devices) { 582 mci = list_entry(item, struct mem_ctl_info, link); 583 584 edac_mc_workq_setup(mci, (unsigned long) value); 585 } 586 587 mutex_unlock(&mem_ctls_mutex); 588} 589 590 591 592/* Return 0 on success, 1 on failure. 593 * Before calling this function, caller must 594 * assign a unique value to mci->mc_idx. 595 * 596 * locking model: 597 * 598 * called with the mem_ctls_mutex lock held 599 */ 600static int add_mc_to_global_list(struct mem_ctl_info *mci) 601{ 602 struct list_head *item, *insert_before; 603 struct mem_ctl_info *p; 604 605 insert_before = &mc_devices; 606 607 p = find_mci_by_dev(mci->pdev); 608 if (unlikely(p != NULL)) 609 goto fail0; 610 611 list_for_each(item, &mc_devices) { 612 p = list_entry(item, struct mem_ctl_info, link); 613 614 if (p->mc_idx >= mci->mc_idx) { 615 if (unlikely(p->mc_idx == mci->mc_idx)) 616 goto fail1; 617 618 insert_before = item; 619 break; 620 } 621 } 622 623 list_add_tail_rcu(&mci->link, insert_before); 624 atomic_inc(&edac_handlers); 625 return 0; 626 627fail0: 628 edac_printk(KERN_WARNING, EDAC_MC, 629 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev), 630 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 631 return 1; 632 633fail1: 634 edac_printk(KERN_WARNING, EDAC_MC, 635 "bug in low-level driver: attempt to assign\n" 636 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 637 return 1; 638} 639 640static void del_mc_from_global_list(struct mem_ctl_info *mci) 641{ 642 atomic_dec(&edac_handlers); 643 list_del_rcu(&mci->link); 644 645 /* these are for safe removal of devices from global list while 646 * NMI handlers may be traversing list 647 */ 648 synchronize_rcu(); 649 INIT_LIST_HEAD(&mci->link); 650} 651 652/** 653 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 654 * 655 * If found, return a pointer to the structure. 656 * Else return NULL. 657 * 658 * Caller must hold mem_ctls_mutex. 659 */ 660struct mem_ctl_info *edac_mc_find(int idx) 661{ 662 struct list_head *item; 663 struct mem_ctl_info *mci; 664 665 list_for_each(item, &mc_devices) { 666 mci = list_entry(item, struct mem_ctl_info, link); 667 668 if (mci->mc_idx >= idx) { 669 if (mci->mc_idx == idx) 670 return mci; 671 672 break; 673 } 674 } 675 676 return NULL; 677} 678EXPORT_SYMBOL(edac_mc_find); 679 680/** 681 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 682 * create sysfs entries associated with mci structure 683 * @mci: pointer to the mci structure to be added to the list 684 * 685 * Return: 686 * 0 Success 687 * !0 Failure 688 */ 689 690/* FIXME - should a warning be printed if no error detection? correction? */ 691int edac_mc_add_mc(struct mem_ctl_info *mci) 692{ 693 edac_dbg(0, "\n"); 694 695#ifdef CONFIG_EDAC_DEBUG 696 if (edac_debug_level >= 3) 697 edac_mc_dump_mci(mci); 698 699 if (edac_debug_level >= 4) { 700 int i; 701 702 for (i = 0; i < mci->nr_csrows; i++) { 703 int j; 704 705 edac_mc_dump_csrow(mci->csrows[i]); 706 for (j = 0; j < mci->csrows[i]->nr_channels; j++) 707 edac_mc_dump_channel(mci->csrows[i]->channels[j]); 708 } 709 for (i = 0; i < mci->tot_dimms; i++) 710 edac_mc_dump_dimm(mci->dimms[i]); 711 } 712#endif 713 mutex_lock(&mem_ctls_mutex); 714 715 if (add_mc_to_global_list(mci)) 716 goto fail0; 717 718 /* set load time so that error rate can be tracked */ 719 mci->start_time = jiffies; 720 721 if (edac_create_sysfs_mci_device(mci)) { 722 edac_mc_printk(mci, KERN_WARNING, 723 "failed to create sysfs device\n"); 724 goto fail1; 725 } 726 727 /* If there IS a check routine, then we are running POLLED */ 728 if (mci->edac_check != NULL) { 729 /* This instance is NOW RUNNING */ 730 mci->op_state = OP_RUNNING_POLL; 731 732 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 733 } else { 734 mci->op_state = OP_RUNNING_INTERRUPT; 735 } 736 737 /* Report action taken */ 738 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 739 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 740 741 mutex_unlock(&mem_ctls_mutex); 742 return 0; 743 744fail1: 745 del_mc_from_global_list(mci); 746 747fail0: 748 mutex_unlock(&mem_ctls_mutex); 749 return 1; 750} 751EXPORT_SYMBOL_GPL(edac_mc_add_mc); 752 753/** 754 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 755 * remove mci structure from global list 756 * @pdev: Pointer to 'struct device' representing mci structure to remove. 757 * 758 * Return pointer to removed mci structure, or NULL if device not found. 759 */ 760struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 761{ 762 struct mem_ctl_info *mci; 763 764 edac_dbg(0, "\n"); 765 766 mutex_lock(&mem_ctls_mutex); 767 768 /* find the requested mci struct in the global list */ 769 mci = find_mci_by_dev(dev); 770 if (mci == NULL) { 771 mutex_unlock(&mem_ctls_mutex); 772 return NULL; 773 } 774 775 del_mc_from_global_list(mci); 776 mutex_unlock(&mem_ctls_mutex); 777 778 /* flush workq processes */ 779 edac_mc_workq_teardown(mci); 780 781 /* marking MCI offline */ 782 mci->op_state = OP_OFFLINE; 783 784 /* remove from sysfs */ 785 edac_remove_sysfs_mci_device(mci); 786 787 edac_printk(KERN_INFO, EDAC_MC, 788 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 789 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 790 791 return mci; 792} 793EXPORT_SYMBOL_GPL(edac_mc_del_mc); 794 795static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 796 u32 size) 797{ 798 struct page *pg; 799 void *virt_addr; 800 unsigned long flags = 0; 801 802 edac_dbg(3, "\n"); 803 804 /* ECC error page was not in our memory. Ignore it. */ 805 if (!pfn_valid(page)) 806 return; 807 808 /* Find the actual page structure then map it and fix */ 809 pg = pfn_to_page(page); 810 811 if (PageHighMem(pg)) 812 local_irq_save(flags); 813 814 virt_addr = kmap_atomic(pg); 815 816 /* Perform architecture specific atomic scrub operation */ 817 atomic_scrub(virt_addr + offset, size); 818 819 /* Unmap and complete */ 820 kunmap_atomic(virt_addr); 821 822 if (PageHighMem(pg)) 823 local_irq_restore(flags); 824} 825 826/* FIXME - should return -1 */ 827int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 828{ 829 struct csrow_info **csrows = mci->csrows; 830 int row, i, j, n; 831 832 edac_dbg(1, "MC%d: 0x%lx\n", mci->mc_idx, page); 833 row = -1; 834 835 for (i = 0; i < mci->nr_csrows; i++) { 836 struct csrow_info *csrow = csrows[i]; 837 n = 0; 838 for (j = 0; j < csrow->nr_channels; j++) { 839 struct dimm_info *dimm = csrow->channels[j]->dimm; 840 n += dimm->nr_pages; 841 } 842 if (n == 0) 843 continue; 844 845 edac_dbg(3, "MC%d: first(0x%lx) page(0x%lx) last(0x%lx) mask(0x%lx)\n", 846 mci->mc_idx, 847 csrow->first_page, page, csrow->last_page, 848 csrow->page_mask); 849 850 if ((page >= csrow->first_page) && 851 (page <= csrow->last_page) && 852 ((page & csrow->page_mask) == 853 (csrow->first_page & csrow->page_mask))) { 854 row = i; 855 break; 856 } 857 } 858 859 if (row == -1) 860 edac_mc_printk(mci, KERN_ERR, 861 "could not look up page error address %lx\n", 862 (unsigned long)page); 863 864 return row; 865} 866EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 867 868const char *edac_layer_name[] = { 869 [EDAC_MC_LAYER_BRANCH] = "branch", 870 [EDAC_MC_LAYER_CHANNEL] = "channel", 871 [EDAC_MC_LAYER_SLOT] = "slot", 872 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", 873}; 874EXPORT_SYMBOL_GPL(edac_layer_name); 875 876static void edac_inc_ce_error(struct mem_ctl_info *mci, 877 bool enable_per_layer_report, 878 const int pos[EDAC_MAX_LAYERS]) 879{ 880 int i, index = 0; 881 882 mci->ce_mc++; 883 884 if (!enable_per_layer_report) { 885 mci->ce_noinfo_count++; 886 return; 887 } 888 889 for (i = 0; i < mci->n_layers; i++) { 890 if (pos[i] < 0) 891 break; 892 index += pos[i]; 893 mci->ce_per_layer[i][index]++; 894 895 if (i < mci->n_layers - 1) 896 index *= mci->layers[i + 1].size; 897 } 898} 899 900static void edac_inc_ue_error(struct mem_ctl_info *mci, 901 bool enable_per_layer_report, 902 const int pos[EDAC_MAX_LAYERS]) 903{ 904 int i, index = 0; 905 906 mci->ue_mc++; 907 908 if (!enable_per_layer_report) { 909 mci->ce_noinfo_count++; 910 return; 911 } 912 913 for (i = 0; i < mci->n_layers; i++) { 914 if (pos[i] < 0) 915 break; 916 index += pos[i]; 917 mci->ue_per_layer[i][index]++; 918 919 if (i < mci->n_layers - 1) 920 index *= mci->layers[i + 1].size; 921 } 922} 923 924static void edac_ce_error(struct mem_ctl_info *mci, 925 const int pos[EDAC_MAX_LAYERS], 926 const char *msg, 927 const char *location, 928 const char *label, 929 const char *detail, 930 const char *other_detail, 931 const bool enable_per_layer_report, 932 const unsigned long page_frame_number, 933 const unsigned long offset_in_page, 934 long grain) 935{ 936 unsigned long remapped_page; 937 938 if (edac_mc_get_log_ce()) { 939 if (other_detail && *other_detail) 940 edac_mc_printk(mci, KERN_WARNING, 941 "CE %s on %s (%s %s - %s)\n", 942 msg, label, location, 943 detail, other_detail); 944 else 945 edac_mc_printk(mci, KERN_WARNING, 946 "CE %s on %s (%s %s)\n", 947 msg, label, location, 948 detail); 949 } 950 edac_inc_ce_error(mci, enable_per_layer_report, pos); 951 952 if (mci->scrub_mode & SCRUB_SW_SRC) { 953 /* 954 * Some memory controllers (called MCs below) can remap 955 * memory so that it is still available at a different 956 * address when PCI devices map into memory. 957 * MC's that can't do this, lose the memory where PCI 958 * devices are mapped. This mapping is MC-dependent 959 * and so we call back into the MC driver for it to 960 * map the MC page to a physical (CPU) page which can 961 * then be mapped to a virtual page - which can then 962 * be scrubbed. 963 */ 964 remapped_page = mci->ctl_page_to_phys ? 965 mci->ctl_page_to_phys(mci, page_frame_number) : 966 page_frame_number; 967 968 edac_mc_scrub_block(remapped_page, 969 offset_in_page, grain); 970 } 971} 972 973static void edac_ue_error(struct mem_ctl_info *mci, 974 const int pos[EDAC_MAX_LAYERS], 975 const char *msg, 976 const char *location, 977 const char *label, 978 const char *detail, 979 const char *other_detail, 980 const bool enable_per_layer_report) 981{ 982 if (edac_mc_get_log_ue()) { 983 if (other_detail && *other_detail) 984 edac_mc_printk(mci, KERN_WARNING, 985 "UE %s on %s (%s %s - %s)\n", 986 msg, label, location, detail, 987 other_detail); 988 else 989 edac_mc_printk(mci, KERN_WARNING, 990 "UE %s on %s (%s %s)\n", 991 msg, label, location, detail); 992 } 993 994 if (edac_mc_get_panic_on_ue()) { 995 if (other_detail && *other_detail) 996 panic("UE %s on %s (%s%s - %s)\n", 997 msg, label, location, detail, other_detail); 998 else 999 panic("UE %s on %s (%s%s)\n", 1000 msg, label, location, detail); 1001 } 1002 1003 edac_inc_ue_error(mci, enable_per_layer_report, pos); 1004} 1005 1006#define OTHER_LABEL " or " 1007 1008/** 1009 * edac_mc_handle_error - reports a memory event to userspace 1010 * 1011 * @type: severity of the error (CE/UE/Fatal) 1012 * @mci: a struct mem_ctl_info pointer 1013 * @page_frame_number: mem page where the error occurred 1014 * @offset_in_page: offset of the error inside the page 1015 * @syndrome: ECC syndrome 1016 * @top_layer: Memory layer[0] position 1017 * @mid_layer: Memory layer[1] position 1018 * @low_layer: Memory layer[2] position 1019 * @msg: Message meaningful to the end users that 1020 * explains the event 1021 * @other_detail: Technical details about the event that 1022 * may help hardware manufacturers and 1023 * EDAC developers to analyse the event 1024 * @arch_log: Architecture-specific struct that can 1025 * be used to add extended information to the 1026 * tracepoint, like dumping MCE registers. 1027 */ 1028void edac_mc_handle_error(const enum hw_event_mc_err_type type, 1029 struct mem_ctl_info *mci, 1030 const unsigned long page_frame_number, 1031 const unsigned long offset_in_page, 1032 const unsigned long syndrome, 1033 const int top_layer, 1034 const int mid_layer, 1035 const int low_layer, 1036 const char *msg, 1037 const char *other_detail, 1038 const void *arch_log) 1039{ 1040 /* FIXME: too much for stack: move it to some pre-alocated area */ 1041 char detail[80], location[80]; 1042 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; 1043 char *p; 1044 int row = -1, chan = -1; 1045 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; 1046 int i; 1047 long grain; 1048 bool enable_per_layer_report = false; 1049 u16 error_count; /* FIXME: make it a parameter */ 1050 u8 grain_bits; 1051 1052 edac_dbg(3, "MC%d\n", mci->mc_idx); 1053 1054 /* 1055 * Check if the event report is consistent and if the memory 1056 * location is known. If it is known, enable_per_layer_report will be 1057 * true, the DIMM(s) label info will be filled and the per-layer 1058 * error counters will be incremented. 1059 */ 1060 for (i = 0; i < mci->n_layers; i++) { 1061 if (pos[i] >= (int)mci->layers[i].size) { 1062 if (type == HW_EVENT_ERR_CORRECTED) 1063 p = "CE"; 1064 else 1065 p = "UE"; 1066 1067 edac_mc_printk(mci, KERN_ERR, 1068 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", 1069 edac_layer_name[mci->layers[i].type], 1070 pos[i], mci->layers[i].size); 1071 /* 1072 * Instead of just returning it, let's use what's 1073 * known about the error. The increment routines and 1074 * the DIMM filter logic will do the right thing by 1075 * pointing the likely damaged DIMMs. 1076 */ 1077 pos[i] = -1; 1078 } 1079 if (pos[i] >= 0) 1080 enable_per_layer_report = true; 1081 } 1082 1083 /* 1084 * Get the dimm label/grain that applies to the match criteria. 1085 * As the error algorithm may not be able to point to just one memory 1086 * stick, the logic here will get all possible labels that could 1087 * pottentially be affected by the error. 1088 * On FB-DIMM memory controllers, for uncorrected errors, it is common 1089 * to have only the MC channel and the MC dimm (also called "branch") 1090 * but the channel is not known, as the memory is arranged in pairs, 1091 * where each memory belongs to a separate channel within the same 1092 * branch. 1093 */ 1094 grain = 0; 1095 p = label; 1096 *p = '\0'; 1097 for (i = 0; i < mci->tot_dimms; i++) { 1098 struct dimm_info *dimm = mci->dimms[i]; 1099 1100 if (top_layer >= 0 && top_layer != dimm->location[0]) 1101 continue; 1102 if (mid_layer >= 0 && mid_layer != dimm->location[1]) 1103 continue; 1104 if (low_layer >= 0 && low_layer != dimm->location[2]) 1105 continue; 1106 1107 /* get the max grain, over the error match range */ 1108 if (dimm->grain > grain) 1109 grain = dimm->grain; 1110 1111 /* 1112 * If the error is memory-controller wide, there's no need to 1113 * seek for the affected DIMMs because the whole 1114 * channel/memory controller/... may be affected. 1115 * Also, don't show errors for empty DIMM slots. 1116 */ 1117 if (enable_per_layer_report && dimm->nr_pages) { 1118 if (p != label) { 1119 strcpy(p, OTHER_LABEL); 1120 p += strlen(OTHER_LABEL); 1121 } 1122 strcpy(p, dimm->label); 1123 p += strlen(p); 1124 *p = '\0'; 1125 1126 /* 1127 * get csrow/channel of the DIMM, in order to allow 1128 * incrementing the compat API counters 1129 */ 1130 edac_dbg(4, "%s csrows map: (%d,%d)\n", 1131 mci->mem_is_per_rank ? "rank" : "dimm", 1132 dimm->csrow, dimm->cschannel); 1133 if (row == -1) 1134 row = dimm->csrow; 1135 else if (row >= 0 && row != dimm->csrow) 1136 row = -2; 1137 1138 if (chan == -1) 1139 chan = dimm->cschannel; 1140 else if (chan >= 0 && chan != dimm->cschannel) 1141 chan = -2; 1142 } 1143 } 1144 1145 if (!enable_per_layer_report) { 1146 strcpy(label, "any memory"); 1147 } else { 1148 edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); 1149 if (p == label) 1150 strcpy(label, "unknown memory"); 1151 if (type == HW_EVENT_ERR_CORRECTED) { 1152 if (row >= 0) { 1153 mci->csrows[row]->ce_count++; 1154 if (chan >= 0) 1155 mci->csrows[row]->channels[chan]->ce_count++; 1156 } 1157 } else 1158 if (row >= 0) 1159 mci->csrows[row]->ue_count++; 1160 } 1161 1162 /* Fill the RAM location data */ 1163 p = location; 1164 for (i = 0; i < mci->n_layers; i++) { 1165 if (pos[i] < 0) 1166 continue; 1167 1168 p += sprintf(p, "%s:%d ", 1169 edac_layer_name[mci->layers[i].type], 1170 pos[i]); 1171 } 1172 if (p > location) 1173 *(p - 1) = '\0'; 1174 1175 /* Report the error via the trace interface */ 1176 1177 error_count = 1; /* FIXME: allow change it */ 1178 grain_bits = fls_long(grain) + 1; 1179 trace_mc_event(type, msg, label, error_count, 1180 mci->mc_idx, top_layer, mid_layer, low_layer, 1181 PAGES_TO_MiB(page_frame_number) | offset_in_page, 1182 grain_bits, syndrome, other_detail); 1183 1184 /* Memory type dependent details about the error */ 1185 if (type == HW_EVENT_ERR_CORRECTED) { 1186 snprintf(detail, sizeof(detail), 1187 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", 1188 page_frame_number, offset_in_page, 1189 grain, syndrome); 1190 edac_ce_error(mci, pos, msg, location, label, detail, 1191 other_detail, enable_per_layer_report, 1192 page_frame_number, offset_in_page, grain); 1193 } else { 1194 snprintf(detail, sizeof(detail), 1195 "page:0x%lx offset:0x%lx grain:%ld", 1196 page_frame_number, offset_in_page, grain); 1197 1198 edac_ue_error(mci, pos, msg, location, label, detail, 1199 other_detail, enable_per_layer_report); 1200 } 1201} 1202EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1203