edac_mc.c revision d90c008963ef638cb7ab7d5eb76362b3c2d379bc
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/ctype.h> 29#include <linux/edac.h> 30#include <linux/bitops.h> 31#include <asm/uaccess.h> 32#include <asm/page.h> 33#include <asm/edac.h> 34#include "edac_core.h" 35#include "edac_module.h" 36 37#define CREATE_TRACE_POINTS 38#define TRACE_INCLUDE_PATH ../../include/ras 39#include <ras/ras_event.h> 40 41/* lock to memory controller's control array */ 42static DEFINE_MUTEX(mem_ctls_mutex); 43static LIST_HEAD(mc_devices); 44 45#ifdef CONFIG_EDAC_DEBUG 46 47static void edac_mc_dump_channel(struct rank_info *chan) 48{ 49 debugf4("\tchannel = %p\n", chan); 50 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 51 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 52 debugf4("\tchannel->dimm = %p\n", chan->dimm); 53} 54 55static void edac_mc_dump_dimm(struct dimm_info *dimm) 56{ 57 int i; 58 59 debugf4("\tdimm = %p\n", dimm); 60 debugf4("\tdimm->label = '%s'\n", dimm->label); 61 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 62 debugf4("\tdimm location "); 63 for (i = 0; i < dimm->mci->n_layers; i++) { 64 printk(KERN_CONT "%d", dimm->location[i]); 65 if (i < dimm->mci->n_layers - 1) 66 printk(KERN_CONT "."); 67 } 68 printk(KERN_CONT "\n"); 69 debugf4("\tdimm->grain = %d\n", dimm->grain); 70 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 71} 72 73static void edac_mc_dump_csrow(struct csrow_info *csrow) 74{ 75 debugf4("\tcsrow = %p\n", csrow); 76 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 77 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 78 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 79 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 80 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 81 debugf4("\tcsrow->channels = %p\n", csrow->channels); 82 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 83} 84 85static void edac_mc_dump_mci(struct mem_ctl_info *mci) 86{ 87 debugf3("\tmci = %p\n", mci); 88 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 89 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 90 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 91 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 92 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 93 mci->nr_csrows, mci->csrows); 94 debugf3("\tmci->nr_dimms = %d, dimms = %p\n", 95 mci->tot_dimms, mci->dimms); 96 debugf3("\tdev = %p\n", mci->pdev); 97 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 98 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 99} 100 101#endif /* CONFIG_EDAC_DEBUG */ 102 103/* 104 * keep those in sync with the enum mem_type 105 */ 106const char *edac_mem_types[] = { 107 "Empty csrow", 108 "Reserved csrow type", 109 "Unknown csrow type", 110 "Fast page mode RAM", 111 "Extended data out RAM", 112 "Burst Extended data out RAM", 113 "Single data rate SDRAM", 114 "Registered single data rate SDRAM", 115 "Double data rate SDRAM", 116 "Registered Double data rate SDRAM", 117 "Rambus DRAM", 118 "Unbuffered DDR2 RAM", 119 "Fully buffered DDR2", 120 "Registered DDR2 RAM", 121 "Rambus XDR", 122 "Unbuffered DDR3 RAM", 123 "Registered DDR3 RAM", 124}; 125EXPORT_SYMBOL_GPL(edac_mem_types); 126 127/** 128 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation 129 * @p: pointer to a pointer with the memory offset to be used. At 130 * return, this will be incremented to point to the next offset 131 * @size: Size of the data structure to be reserved 132 * @n_elems: Number of elements that should be reserved 133 * 134 * If 'size' is a constant, the compiler will optimize this whole function 135 * down to either a no-op or the addition of a constant to the value of '*p'. 136 * 137 * The 'p' pointer is absolutely needed to keep the proper advancing 138 * further in memory to the proper offsets when allocating the struct along 139 * with its embedded structs, as edac_device_alloc_ctl_info() does it 140 * above, for example. 141 * 142 * At return, the pointer 'p' will be incremented to be used on a next call 143 * to this function. 144 */ 145void *edac_align_ptr(void **p, unsigned size, int n_elems) 146{ 147 unsigned align, r; 148 void *ptr = *p; 149 150 *p += size * n_elems; 151 152 /* 153 * 'p' can possibly be an unaligned item X such that sizeof(X) is 154 * 'size'. Adjust 'p' so that its alignment is at least as 155 * stringent as what the compiler would provide for X and return 156 * the aligned result. 157 * Here we assume that the alignment of a "long long" is the most 158 * stringent alignment that the compiler will ever provide by default. 159 * As far as I know, this is a reasonable assumption. 160 */ 161 if (size > sizeof(long)) 162 align = sizeof(long long); 163 else if (size > sizeof(int)) 164 align = sizeof(long); 165 else if (size > sizeof(short)) 166 align = sizeof(int); 167 else if (size > sizeof(char)) 168 align = sizeof(short); 169 else 170 return (char *)ptr; 171 172 r = size % align; 173 174 if (r == 0) 175 return (char *)ptr; 176 177 *p += align - r; 178 179 return (void *)(((unsigned long)ptr) + align - r); 180} 181 182/** 183 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure 184 * @mc_num: Memory controller number 185 * @n_layers: Number of MC hierarchy layers 186 * layers: Describes each layer as seen by the Memory Controller 187 * @size_pvt: size of private storage needed 188 * 189 * 190 * Everything is kmalloc'ed as one big chunk - more efficient. 191 * Only can be used if all structures have the same lifetime - otherwise 192 * you have to allocate and initialize your own structures. 193 * 194 * Use edac_mc_free() to free mc structures allocated by this function. 195 * 196 * NOTE: drivers handle multi-rank memories in different ways: in some 197 * drivers, one multi-rank memory stick is mapped as one entry, while, in 198 * others, a single multi-rank memory stick would be mapped into several 199 * entries. Currently, this function will allocate multiple struct dimm_info 200 * on such scenarios, as grouping the multiple ranks require drivers change. 201 * 202 * Returns: 203 * On failure: NULL 204 * On success: struct mem_ctl_info pointer 205 */ 206struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, 207 unsigned n_layers, 208 struct edac_mc_layer *layers, 209 unsigned sz_pvt) 210{ 211 struct mem_ctl_info *mci; 212 struct edac_mc_layer *layer; 213 struct csrow_info *csi, *csr; 214 struct rank_info *chi, *chp, *chan; 215 struct dimm_info *dimm; 216 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 217 unsigned pos[EDAC_MAX_LAYERS]; 218 unsigned size, tot_dimms = 1, count = 1; 219 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; 220 void *pvt, *p, *ptr = NULL; 221 int i, j, row, chn, n, len; 222 bool per_rank = false; 223 224 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); 225 /* 226 * Calculate the total amount of dimms and csrows/cschannels while 227 * in the old API emulation mode 228 */ 229 for (i = 0; i < n_layers; i++) { 230 tot_dimms *= layers[i].size; 231 if (layers[i].is_virt_csrow) 232 tot_csrows *= layers[i].size; 233 else 234 tot_channels *= layers[i].size; 235 236 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) 237 per_rank = true; 238 } 239 240 /* Figure out the offsets of the various items from the start of an mc 241 * structure. We want the alignment of each item to be at least as 242 * stringent as what the compiler would provide if we could simply 243 * hardcode everything into a single struct. 244 */ 245 mci = edac_align_ptr(&ptr, sizeof(*mci), 1); 246 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); 247 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows); 248 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels); 249 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms); 250 for (i = 0; i < n_layers; i++) { 251 count *= layers[i].size; 252 debugf4("%s: errcount layer %d size %d\n", __func__, i, count); 253 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 254 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 255 tot_errcount += 2 * count; 256 } 257 258 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount); 259 pvt = edac_align_ptr(&ptr, sz_pvt, 1); 260 size = ((unsigned long)pvt) + sz_pvt; 261 262 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", 263 __func__, size, 264 tot_dimms, 265 per_rank ? "ranks" : "dimms", 266 tot_csrows * tot_channels); 267 mci = kzalloc(size, GFP_KERNEL); 268 if (mci == NULL) 269 return NULL; 270 271 /* Adjust pointers so they point within the memory we just allocated 272 * rather than an imaginary chunk of memory located at address 0. 273 */ 274 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); 275 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 276 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi)); 277 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm)); 278 for (i = 0; i < n_layers; i++) { 279 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); 280 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); 281 } 282 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 283 284 /* setup index and various internal pointers */ 285 mci->mc_idx = mc_num; 286 mci->csrows = csi; 287 mci->dimms = dimm; 288 mci->tot_dimms = tot_dimms; 289 mci->pvt_info = pvt; 290 mci->n_layers = n_layers; 291 mci->layers = layer; 292 memcpy(mci->layers, layers, sizeof(*layer) * n_layers); 293 mci->nr_csrows = tot_csrows; 294 mci->num_cschannel = tot_channels; 295 mci->mem_is_per_rank = per_rank; 296 297 /* 298 * Fill the csrow struct 299 */ 300 for (row = 0; row < tot_csrows; row++) { 301 csr = &csi[row]; 302 csr->csrow_idx = row; 303 csr->mci = mci; 304 csr->nr_channels = tot_channels; 305 chp = &chi[row * tot_channels]; 306 csr->channels = chp; 307 308 for (chn = 0; chn < tot_channels; chn++) { 309 chan = &chp[chn]; 310 chan->chan_idx = chn; 311 chan->csrow = csr; 312 } 313 } 314 315 /* 316 * Fill the dimm struct 317 */ 318 memset(&pos, 0, sizeof(pos)); 319 row = 0; 320 chn = 0; 321 debugf4("%s: initializing %d %s\n", __func__, tot_dimms, 322 per_rank ? "ranks" : "dimms"); 323 for (i = 0; i < tot_dimms; i++) { 324 chan = &csi[row].channels[chn]; 325 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers, 326 pos[0], pos[1], pos[2]); 327 dimm->mci = mci; 328 329 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__, 330 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms), 331 pos[0], pos[1], pos[2], row, chn); 332 333 /* 334 * Copy DIMM location and initialize it. 335 */ 336 len = sizeof(dimm->label); 337 p = dimm->label; 338 n = snprintf(p, len, "mc#%u", mc_num); 339 p += n; 340 len -= n; 341 for (j = 0; j < n_layers; j++) { 342 n = snprintf(p, len, "%s#%u", 343 edac_layer_name[layers[j].type], 344 pos[j]); 345 p += n; 346 len -= n; 347 dimm->location[j] = pos[j]; 348 349 if (len <= 0) 350 break; 351 } 352 353 /* Link it to the csrows old API data */ 354 chan->dimm = dimm; 355 dimm->csrow = row; 356 dimm->cschannel = chn; 357 358 /* Increment csrow location */ 359 row++; 360 if (row == tot_csrows) { 361 row = 0; 362 chn++; 363 } 364 365 /* Increment dimm location */ 366 for (j = n_layers - 1; j >= 0; j--) { 367 pos[j]++; 368 if (pos[j] < layers[j].size) 369 break; 370 pos[j] = 0; 371 } 372 } 373 374 mci->op_state = OP_ALLOC; 375 376 /* at this point, the root kobj is valid, and in order to 377 * 'free' the object, then the function: 378 * edac_mc_unregister_sysfs_main_kobj() must be called 379 * which will perform kobj unregistration and the actual free 380 * will occur during the kobject callback operation 381 */ 382 383 return mci; 384} 385EXPORT_SYMBOL_GPL(edac_mc_alloc); 386 387/** 388 * edac_mc_free 389 * 'Free' a previously allocated 'mci' structure 390 * @mci: pointer to a struct mem_ctl_info structure 391 */ 392void edac_mc_free(struct mem_ctl_info *mci) 393{ 394 debugf1("%s()\n", __func__); 395 396 edac_unregister_sysfs(mci); 397 398 /* free the mci instance memory here */ 399 kfree(mci); 400} 401EXPORT_SYMBOL_GPL(edac_mc_free); 402 403 404/** 405 * find_mci_by_dev 406 * 407 * scan list of controllers looking for the one that manages 408 * the 'dev' device 409 * @dev: pointer to a struct device related with the MCI 410 */ 411struct mem_ctl_info *find_mci_by_dev(struct device *dev) 412{ 413 struct mem_ctl_info *mci; 414 struct list_head *item; 415 416 debugf3("%s()\n", __func__); 417 418 list_for_each(item, &mc_devices) { 419 mci = list_entry(item, struct mem_ctl_info, link); 420 421 if (mci->pdev == dev) 422 return mci; 423 } 424 425 return NULL; 426} 427EXPORT_SYMBOL_GPL(find_mci_by_dev); 428 429/* 430 * handler for EDAC to check if NMI type handler has asserted interrupt 431 */ 432static int edac_mc_assert_error_check_and_clear(void) 433{ 434 int old_state; 435 436 if (edac_op_state == EDAC_OPSTATE_POLL) 437 return 1; 438 439 old_state = edac_err_assert; 440 edac_err_assert = 0; 441 442 return old_state; 443} 444 445/* 446 * edac_mc_workq_function 447 * performs the operation scheduled by a workq request 448 */ 449static void edac_mc_workq_function(struct work_struct *work_req) 450{ 451 struct delayed_work *d_work = to_delayed_work(work_req); 452 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 453 454 mutex_lock(&mem_ctls_mutex); 455 456 /* if this control struct has movd to offline state, we are done */ 457 if (mci->op_state == OP_OFFLINE) { 458 mutex_unlock(&mem_ctls_mutex); 459 return; 460 } 461 462 /* Only poll controllers that are running polled and have a check */ 463 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 464 mci->edac_check(mci); 465 466 mutex_unlock(&mem_ctls_mutex); 467 468 /* Reschedule */ 469 queue_delayed_work(edac_workqueue, &mci->work, 470 msecs_to_jiffies(edac_mc_get_poll_msec())); 471} 472 473/* 474 * edac_mc_workq_setup 475 * initialize a workq item for this mci 476 * passing in the new delay period in msec 477 * 478 * locking model: 479 * 480 * called with the mem_ctls_mutex held 481 */ 482static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 483{ 484 debugf0("%s()\n", __func__); 485 486 /* if this instance is not in the POLL state, then simply return */ 487 if (mci->op_state != OP_RUNNING_POLL) 488 return; 489 490 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 491 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 492} 493 494/* 495 * edac_mc_workq_teardown 496 * stop the workq processing on this mci 497 * 498 * locking model: 499 * 500 * called WITHOUT lock held 501 */ 502static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 503{ 504 int status; 505 506 if (mci->op_state != OP_RUNNING_POLL) 507 return; 508 509 status = cancel_delayed_work(&mci->work); 510 if (status == 0) { 511 debugf0("%s() not canceled, flush the queue\n", 512 __func__); 513 514 /* workq instance might be running, wait for it */ 515 flush_workqueue(edac_workqueue); 516 } 517} 518 519/* 520 * edac_mc_reset_delay_period(unsigned long value) 521 * 522 * user space has updated our poll period value, need to 523 * reset our workq delays 524 */ 525void edac_mc_reset_delay_period(int value) 526{ 527 struct mem_ctl_info *mci; 528 struct list_head *item; 529 530 mutex_lock(&mem_ctls_mutex); 531 532 /* scan the list and turn off all workq timers, doing so under lock 533 */ 534 list_for_each(item, &mc_devices) { 535 mci = list_entry(item, struct mem_ctl_info, link); 536 537 if (mci->op_state == OP_RUNNING_POLL) 538 cancel_delayed_work(&mci->work); 539 } 540 541 mutex_unlock(&mem_ctls_mutex); 542 543 544 /* re-walk the list, and reset the poll delay */ 545 mutex_lock(&mem_ctls_mutex); 546 547 list_for_each(item, &mc_devices) { 548 mci = list_entry(item, struct mem_ctl_info, link); 549 550 edac_mc_workq_setup(mci, (unsigned long) value); 551 } 552 553 mutex_unlock(&mem_ctls_mutex); 554} 555 556 557 558/* Return 0 on success, 1 on failure. 559 * Before calling this function, caller must 560 * assign a unique value to mci->mc_idx. 561 * 562 * locking model: 563 * 564 * called with the mem_ctls_mutex lock held 565 */ 566static int add_mc_to_global_list(struct mem_ctl_info *mci) 567{ 568 struct list_head *item, *insert_before; 569 struct mem_ctl_info *p; 570 571 insert_before = &mc_devices; 572 573 p = find_mci_by_dev(mci->pdev); 574 if (unlikely(p != NULL)) 575 goto fail0; 576 577 list_for_each(item, &mc_devices) { 578 p = list_entry(item, struct mem_ctl_info, link); 579 580 if (p->mc_idx >= mci->mc_idx) { 581 if (unlikely(p->mc_idx == mci->mc_idx)) 582 goto fail1; 583 584 insert_before = item; 585 break; 586 } 587 } 588 589 list_add_tail_rcu(&mci->link, insert_before); 590 atomic_inc(&edac_handlers); 591 return 0; 592 593fail0: 594 edac_printk(KERN_WARNING, EDAC_MC, 595 "%s (%s) %s %s already assigned %d\n", dev_name(p->pdev), 596 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 597 return 1; 598 599fail1: 600 edac_printk(KERN_WARNING, EDAC_MC, 601 "bug in low-level driver: attempt to assign\n" 602 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 603 return 1; 604} 605 606static void del_mc_from_global_list(struct mem_ctl_info *mci) 607{ 608 atomic_dec(&edac_handlers); 609 list_del_rcu(&mci->link); 610 611 /* these are for safe removal of devices from global list while 612 * NMI handlers may be traversing list 613 */ 614 synchronize_rcu(); 615 INIT_LIST_HEAD(&mci->link); 616} 617 618/** 619 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 620 * 621 * If found, return a pointer to the structure. 622 * Else return NULL. 623 * 624 * Caller must hold mem_ctls_mutex. 625 */ 626struct mem_ctl_info *edac_mc_find(int idx) 627{ 628 struct list_head *item; 629 struct mem_ctl_info *mci; 630 631 list_for_each(item, &mc_devices) { 632 mci = list_entry(item, struct mem_ctl_info, link); 633 634 if (mci->mc_idx >= idx) { 635 if (mci->mc_idx == idx) 636 return mci; 637 638 break; 639 } 640 } 641 642 return NULL; 643} 644EXPORT_SYMBOL(edac_mc_find); 645 646/** 647 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 648 * create sysfs entries associated with mci structure 649 * @mci: pointer to the mci structure to be added to the list 650 * 651 * Return: 652 * 0 Success 653 * !0 Failure 654 */ 655 656/* FIXME - should a warning be printed if no error detection? correction? */ 657int edac_mc_add_mc(struct mem_ctl_info *mci) 658{ 659 debugf0("%s()\n", __func__); 660 661#ifdef CONFIG_EDAC_DEBUG 662 if (edac_debug_level >= 3) 663 edac_mc_dump_mci(mci); 664 665 if (edac_debug_level >= 4) { 666 int i; 667 668 for (i = 0; i < mci->nr_csrows; i++) { 669 int j; 670 671 edac_mc_dump_csrow(&mci->csrows[i]); 672 for (j = 0; j < mci->csrows[i].nr_channels; j++) 673 edac_mc_dump_channel(&mci->csrows[i]. 674 channels[j]); 675 } 676 for (i = 0; i < mci->tot_dimms; i++) 677 edac_mc_dump_dimm(&mci->dimms[i]); 678 } 679#endif 680 mutex_lock(&mem_ctls_mutex); 681 682 if (add_mc_to_global_list(mci)) 683 goto fail0; 684 685 /* set load time so that error rate can be tracked */ 686 mci->start_time = jiffies; 687 688 if (edac_create_sysfs_mci_device(mci)) { 689 edac_mc_printk(mci, KERN_WARNING, 690 "failed to create sysfs device\n"); 691 goto fail1; 692 } 693 694 /* If there IS a check routine, then we are running POLLED */ 695 if (mci->edac_check != NULL) { 696 /* This instance is NOW RUNNING */ 697 mci->op_state = OP_RUNNING_POLL; 698 699 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 700 } else { 701 mci->op_state = OP_RUNNING_INTERRUPT; 702 } 703 704 /* Report action taken */ 705 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 706 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 707 708 mutex_unlock(&mem_ctls_mutex); 709 return 0; 710 711fail1: 712 del_mc_from_global_list(mci); 713 714fail0: 715 mutex_unlock(&mem_ctls_mutex); 716 return 1; 717} 718EXPORT_SYMBOL_GPL(edac_mc_add_mc); 719 720/** 721 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 722 * remove mci structure from global list 723 * @pdev: Pointer to 'struct device' representing mci structure to remove. 724 * 725 * Return pointer to removed mci structure, or NULL if device not found. 726 */ 727struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 728{ 729 struct mem_ctl_info *mci; 730 731 debugf0("%s()\n", __func__); 732 733 mutex_lock(&mem_ctls_mutex); 734 735 /* find the requested mci struct in the global list */ 736 mci = find_mci_by_dev(dev); 737 if (mci == NULL) { 738 mutex_unlock(&mem_ctls_mutex); 739 return NULL; 740 } 741 742 del_mc_from_global_list(mci); 743 mutex_unlock(&mem_ctls_mutex); 744 745 /* flush workq processes */ 746 edac_mc_workq_teardown(mci); 747 748 /* marking MCI offline */ 749 mci->op_state = OP_OFFLINE; 750 751 /* remove from sysfs */ 752 edac_remove_sysfs_mci_device(mci); 753 754 edac_printk(KERN_INFO, EDAC_MC, 755 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 756 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 757 758 return mci; 759} 760EXPORT_SYMBOL_GPL(edac_mc_del_mc); 761 762static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 763 u32 size) 764{ 765 struct page *pg; 766 void *virt_addr; 767 unsigned long flags = 0; 768 769 debugf3("%s()\n", __func__); 770 771 /* ECC error page was not in our memory. Ignore it. */ 772 if (!pfn_valid(page)) 773 return; 774 775 /* Find the actual page structure then map it and fix */ 776 pg = pfn_to_page(page); 777 778 if (PageHighMem(pg)) 779 local_irq_save(flags); 780 781 virt_addr = kmap_atomic(pg); 782 783 /* Perform architecture specific atomic scrub operation */ 784 atomic_scrub(virt_addr + offset, size); 785 786 /* Unmap and complete */ 787 kunmap_atomic(virt_addr); 788 789 if (PageHighMem(pg)) 790 local_irq_restore(flags); 791} 792 793/* FIXME - should return -1 */ 794int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 795{ 796 struct csrow_info *csrows = mci->csrows; 797 int row, i, j, n; 798 799 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 800 row = -1; 801 802 for (i = 0; i < mci->nr_csrows; i++) { 803 struct csrow_info *csrow = &csrows[i]; 804 n = 0; 805 for (j = 0; j < csrow->nr_channels; j++) { 806 struct dimm_info *dimm = csrow->channels[j].dimm; 807 n += dimm->nr_pages; 808 } 809 if (n == 0) 810 continue; 811 812 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 813 "mask(0x%lx)\n", mci->mc_idx, __func__, 814 csrow->first_page, page, csrow->last_page, 815 csrow->page_mask); 816 817 if ((page >= csrow->first_page) && 818 (page <= csrow->last_page) && 819 ((page & csrow->page_mask) == 820 (csrow->first_page & csrow->page_mask))) { 821 row = i; 822 break; 823 } 824 } 825 826 if (row == -1) 827 edac_mc_printk(mci, KERN_ERR, 828 "could not look up page error address %lx\n", 829 (unsigned long)page); 830 831 return row; 832} 833EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 834 835const char *edac_layer_name[] = { 836 [EDAC_MC_LAYER_BRANCH] = "branch", 837 [EDAC_MC_LAYER_CHANNEL] = "channel", 838 [EDAC_MC_LAYER_SLOT] = "slot", 839 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", 840}; 841EXPORT_SYMBOL_GPL(edac_layer_name); 842 843static void edac_inc_ce_error(struct mem_ctl_info *mci, 844 bool enable_per_layer_report, 845 const int pos[EDAC_MAX_LAYERS]) 846{ 847 int i, index = 0; 848 849 mci->ce_mc++; 850 851 if (!enable_per_layer_report) { 852 mci->ce_noinfo_count++; 853 return; 854 } 855 856 for (i = 0; i < mci->n_layers; i++) { 857 if (pos[i] < 0) 858 break; 859 index += pos[i]; 860 mci->ce_per_layer[i][index]++; 861 862 if (i < mci->n_layers - 1) 863 index *= mci->layers[i + 1].size; 864 } 865} 866 867static void edac_inc_ue_error(struct mem_ctl_info *mci, 868 bool enable_per_layer_report, 869 const int pos[EDAC_MAX_LAYERS]) 870{ 871 int i, index = 0; 872 873 mci->ue_mc++; 874 875 if (!enable_per_layer_report) { 876 mci->ce_noinfo_count++; 877 return; 878 } 879 880 for (i = 0; i < mci->n_layers; i++) { 881 if (pos[i] < 0) 882 break; 883 index += pos[i]; 884 mci->ue_per_layer[i][index]++; 885 886 if (i < mci->n_layers - 1) 887 index *= mci->layers[i + 1].size; 888 } 889} 890 891static void edac_ce_error(struct mem_ctl_info *mci, 892 const int pos[EDAC_MAX_LAYERS], 893 const char *msg, 894 const char *location, 895 const char *label, 896 const char *detail, 897 const char *other_detail, 898 const bool enable_per_layer_report, 899 const unsigned long page_frame_number, 900 const unsigned long offset_in_page, 901 long grain) 902{ 903 unsigned long remapped_page; 904 905 if (edac_mc_get_log_ce()) { 906 if (other_detail && *other_detail) 907 edac_mc_printk(mci, KERN_WARNING, 908 "CE %s on %s (%s %s - %s)\n", 909 msg, label, location, 910 detail, other_detail); 911 else 912 edac_mc_printk(mci, KERN_WARNING, 913 "CE %s on %s (%s %s)\n", 914 msg, label, location, 915 detail); 916 } 917 edac_inc_ce_error(mci, enable_per_layer_report, pos); 918 919 if (mci->scrub_mode & SCRUB_SW_SRC) { 920 /* 921 * Some memory controllers (called MCs below) can remap 922 * memory so that it is still available at a different 923 * address when PCI devices map into memory. 924 * MC's that can't do this, lose the memory where PCI 925 * devices are mapped. This mapping is MC-dependent 926 * and so we call back into the MC driver for it to 927 * map the MC page to a physical (CPU) page which can 928 * then be mapped to a virtual page - which can then 929 * be scrubbed. 930 */ 931 remapped_page = mci->ctl_page_to_phys ? 932 mci->ctl_page_to_phys(mci, page_frame_number) : 933 page_frame_number; 934 935 edac_mc_scrub_block(remapped_page, 936 offset_in_page, grain); 937 } 938} 939 940static void edac_ue_error(struct mem_ctl_info *mci, 941 const int pos[EDAC_MAX_LAYERS], 942 const char *msg, 943 const char *location, 944 const char *label, 945 const char *detail, 946 const char *other_detail, 947 const bool enable_per_layer_report) 948{ 949 if (edac_mc_get_log_ue()) { 950 if (other_detail && *other_detail) 951 edac_mc_printk(mci, KERN_WARNING, 952 "UE %s on %s (%s %s - %s)\n", 953 msg, label, location, detail, 954 other_detail); 955 else 956 edac_mc_printk(mci, KERN_WARNING, 957 "UE %s on %s (%s %s)\n", 958 msg, label, location, detail); 959 } 960 961 if (edac_mc_get_panic_on_ue()) { 962 if (other_detail && *other_detail) 963 panic("UE %s on %s (%s%s - %s)\n", 964 msg, label, location, detail, other_detail); 965 else 966 panic("UE %s on %s (%s%s)\n", 967 msg, label, location, detail); 968 } 969 970 edac_inc_ue_error(mci, enable_per_layer_report, pos); 971} 972 973#define OTHER_LABEL " or " 974 975/** 976 * edac_mc_handle_error - reports a memory event to userspace 977 * 978 * @type: severity of the error (CE/UE/Fatal) 979 * @mci: a struct mem_ctl_info pointer 980 * @page_frame_number: mem page where the error occurred 981 * @offset_in_page: offset of the error inside the page 982 * @syndrome: ECC syndrome 983 * @top_layer: Memory layer[0] position 984 * @mid_layer: Memory layer[1] position 985 * @low_layer: Memory layer[2] position 986 * @msg: Message meaningful to the end users that 987 * explains the event 988 * @other_detail: Technical details about the event that 989 * may help hardware manufacturers and 990 * EDAC developers to analyse the event 991 * @arch_log: Architecture-specific struct that can 992 * be used to add extended information to the 993 * tracepoint, like dumping MCE registers. 994 */ 995void edac_mc_handle_error(const enum hw_event_mc_err_type type, 996 struct mem_ctl_info *mci, 997 const unsigned long page_frame_number, 998 const unsigned long offset_in_page, 999 const unsigned long syndrome, 1000 const int top_layer, 1001 const int mid_layer, 1002 const int low_layer, 1003 const char *msg, 1004 const char *other_detail, 1005 const void *arch_log) 1006{ 1007 /* FIXME: too much for stack: move it to some pre-alocated area */ 1008 char detail[80], location[80]; 1009 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; 1010 char *p; 1011 int row = -1, chan = -1; 1012 int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; 1013 int i; 1014 long grain; 1015 bool enable_per_layer_report = false; 1016 u16 error_count; /* FIXME: make it a parameter */ 1017 u8 grain_bits; 1018 1019 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 1020 1021 /* 1022 * Check if the event report is consistent and if the memory 1023 * location is known. If it is known, enable_per_layer_report will be 1024 * true, the DIMM(s) label info will be filled and the per-layer 1025 * error counters will be incremented. 1026 */ 1027 for (i = 0; i < mci->n_layers; i++) { 1028 if (pos[i] >= (int)mci->layers[i].size) { 1029 if (type == HW_EVENT_ERR_CORRECTED) 1030 p = "CE"; 1031 else 1032 p = "UE"; 1033 1034 edac_mc_printk(mci, KERN_ERR, 1035 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", 1036 edac_layer_name[mci->layers[i].type], 1037 pos[i], mci->layers[i].size); 1038 /* 1039 * Instead of just returning it, let's use what's 1040 * known about the error. The increment routines and 1041 * the DIMM filter logic will do the right thing by 1042 * pointing the likely damaged DIMMs. 1043 */ 1044 pos[i] = -1; 1045 } 1046 if (pos[i] >= 0) 1047 enable_per_layer_report = true; 1048 } 1049 1050 /* 1051 * Get the dimm label/grain that applies to the match criteria. 1052 * As the error algorithm may not be able to point to just one memory 1053 * stick, the logic here will get all possible labels that could 1054 * pottentially be affected by the error. 1055 * On FB-DIMM memory controllers, for uncorrected errors, it is common 1056 * to have only the MC channel and the MC dimm (also called "branch") 1057 * but the channel is not known, as the memory is arranged in pairs, 1058 * where each memory belongs to a separate channel within the same 1059 * branch. 1060 */ 1061 grain = 0; 1062 p = label; 1063 *p = '\0'; 1064 for (i = 0; i < mci->tot_dimms; i++) { 1065 struct dimm_info *dimm = &mci->dimms[i]; 1066 1067 if (top_layer >= 0 && top_layer != dimm->location[0]) 1068 continue; 1069 if (mid_layer >= 0 && mid_layer != dimm->location[1]) 1070 continue; 1071 if (low_layer >= 0 && low_layer != dimm->location[2]) 1072 continue; 1073 1074 /* get the max grain, over the error match range */ 1075 if (dimm->grain > grain) 1076 grain = dimm->grain; 1077 1078 /* 1079 * If the error is memory-controller wide, there's no need to 1080 * seek for the affected DIMMs because the whole 1081 * channel/memory controller/... may be affected. 1082 * Also, don't show errors for empty DIMM slots. 1083 */ 1084 if (enable_per_layer_report && dimm->nr_pages) { 1085 if (p != label) { 1086 strcpy(p, OTHER_LABEL); 1087 p += strlen(OTHER_LABEL); 1088 } 1089 strcpy(p, dimm->label); 1090 p += strlen(p); 1091 *p = '\0'; 1092 1093 /* 1094 * get csrow/channel of the DIMM, in order to allow 1095 * incrementing the compat API counters 1096 */ 1097 debugf4("%s: %s csrows map: (%d,%d)\n", 1098 __func__, 1099 mci->mem_is_per_rank ? "rank" : "dimm", 1100 dimm->csrow, dimm->cschannel); 1101 1102 if (row == -1) 1103 row = dimm->csrow; 1104 else if (row >= 0 && row != dimm->csrow) 1105 row = -2; 1106 1107 if (chan == -1) 1108 chan = dimm->cschannel; 1109 else if (chan >= 0 && chan != dimm->cschannel) 1110 chan = -2; 1111 } 1112 } 1113 1114 if (!enable_per_layer_report) { 1115 strcpy(label, "any memory"); 1116 } else { 1117 debugf4("%s: csrow/channel to increment: (%d,%d)\n", 1118 __func__, row, chan); 1119 if (p == label) 1120 strcpy(label, "unknown memory"); 1121 if (type == HW_EVENT_ERR_CORRECTED) { 1122 if (row >= 0) { 1123 mci->csrows[row].ce_count++; 1124 if (chan >= 0) 1125 mci->csrows[row].channels[chan].ce_count++; 1126 } 1127 } else 1128 if (row >= 0) 1129 mci->csrows[row].ue_count++; 1130 } 1131 1132 /* Fill the RAM location data */ 1133 p = location; 1134 for (i = 0; i < mci->n_layers; i++) { 1135 if (pos[i] < 0) 1136 continue; 1137 1138 p += sprintf(p, "%s:%d ", 1139 edac_layer_name[mci->layers[i].type], 1140 pos[i]); 1141 } 1142 if (p > location) 1143 *(p - 1) = '\0'; 1144 1145 /* Report the error via the trace interface */ 1146 1147 error_count = 1; /* FIXME: allow change it */ 1148 grain_bits = fls_long(grain) + 1; 1149 trace_mc_event(type, msg, label, error_count, 1150 mci->mc_idx, top_layer, mid_layer, low_layer, 1151 PAGES_TO_MiB(page_frame_number) | offset_in_page, 1152 grain_bits, syndrome, other_detail); 1153 1154 /* Memory type dependent details about the error */ 1155 if (type == HW_EVENT_ERR_CORRECTED) { 1156 snprintf(detail, sizeof(detail), 1157 "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", 1158 page_frame_number, offset_in_page, 1159 grain, syndrome); 1160 edac_ce_error(mci, pos, msg, location, label, detail, 1161 other_detail, enable_per_layer_report, 1162 page_frame_number, offset_in_page, grain); 1163 } else { 1164 snprintf(detail, sizeof(detail), 1165 "page:0x%lx offset:0x%lx grain:%ld", 1166 page_frame_number, offset_in_page, grain); 1167 1168 edac_ue_error(mci, pos, msg, location, label, detail, 1169 other_detail, enable_per_layer_report); 1170 } 1171} 1172EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1173