edac_mc.c revision 4275be63559719c3149b19751029f1b0f1b26775
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/ctype.h> 29#include <linux/edac.h> 30#include <asm/uaccess.h> 31#include <asm/page.h> 32#include <asm/edac.h> 33#include "edac_core.h" 34#include "edac_module.h" 35 36/* lock to memory controller's control array */ 37static DEFINE_MUTEX(mem_ctls_mutex); 38static LIST_HEAD(mc_devices); 39 40#ifdef CONFIG_EDAC_DEBUG 41 42static void edac_mc_dump_channel(struct rank_info *chan) 43{ 44 debugf4("\tchannel = %p\n", chan); 45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 46 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 47 debugf4("\tchannel->dimm = %p\n", chan->dimm); 48} 49 50static void edac_mc_dump_dimm(struct dimm_info *dimm) 51{ 52 int i; 53 54 debugf4("\tdimm = %p\n", dimm); 55 debugf4("\tdimm->label = '%s'\n", dimm->label); 56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 57 debugf4("\tdimm location "); 58 for (i = 0; i < dimm->mci->n_layers; i++) { 59 printk(KERN_CONT "%d", dimm->location[i]); 60 if (i < dimm->mci->n_layers - 1) 61 printk(KERN_CONT "."); 62 } 63 printk(KERN_CONT "\n"); 64 debugf4("\tdimm->grain = %d\n", dimm->grain); 65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages); 66} 67 68static void edac_mc_dump_csrow(struct csrow_info *csrow) 69{ 70 debugf4("\tcsrow = %p\n", csrow); 71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 72 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 73 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 75 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 76 debugf4("\tcsrow->channels = %p\n", csrow->channels); 77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 78} 79 80static void edac_mc_dump_mci(struct mem_ctl_info *mci) 81{ 82 debugf3("\tmci = %p\n", mci); 83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 86 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 88 mci->nr_csrows, mci->csrows); 89 debugf3("\tmci->nr_dimms = %d, dimms = %p\n", 90 mci->tot_dimms, mci->dimms); 91 debugf3("\tdev = %p\n", mci->dev); 92 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 93 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 94} 95 96#endif /* CONFIG_EDAC_DEBUG */ 97 98/* 99 * keep those in sync with the enum mem_type 100 */ 101const char *edac_mem_types[] = { 102 "Empty csrow", 103 "Reserved csrow type", 104 "Unknown csrow type", 105 "Fast page mode RAM", 106 "Extended data out RAM", 107 "Burst Extended data out RAM", 108 "Single data rate SDRAM", 109 "Registered single data rate SDRAM", 110 "Double data rate SDRAM", 111 "Registered Double data rate SDRAM", 112 "Rambus DRAM", 113 "Unbuffered DDR2 RAM", 114 "Fully buffered DDR2", 115 "Registered DDR2 RAM", 116 "Rambus XDR", 117 "Unbuffered DDR3 RAM", 118 "Registered DDR3 RAM", 119}; 120EXPORT_SYMBOL_GPL(edac_mem_types); 121 122/** 123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation 124 * @p: pointer to a pointer with the memory offset to be used. At 125 * return, this will be incremented to point to the next offset 126 * @size: Size of the data structure to be reserved 127 * @n_elems: Number of elements that should be reserved 128 * 129 * If 'size' is a constant, the compiler will optimize this whole function 130 * down to either a no-op or the addition of a constant to the value of '*p'. 131 * 132 * The 'p' pointer is absolutely needed to keep the proper advancing 133 * further in memory to the proper offsets when allocating the struct along 134 * with its embedded structs, as edac_device_alloc_ctl_info() does it 135 * above, for example. 136 * 137 * At return, the pointer 'p' will be incremented to be used on a next call 138 * to this function. 139 */ 140void *edac_align_ptr(void **p, unsigned size, int n_elems) 141{ 142 unsigned align, r; 143 void *ptr = *p; 144 145 *p += size * n_elems; 146 147 /* 148 * 'p' can possibly be an unaligned item X such that sizeof(X) is 149 * 'size'. Adjust 'p' so that its alignment is at least as 150 * stringent as what the compiler would provide for X and return 151 * the aligned result. 152 * Here we assume that the alignment of a "long long" is the most 153 * stringent alignment that the compiler will ever provide by default. 154 * As far as I know, this is a reasonable assumption. 155 */ 156 if (size > sizeof(long)) 157 align = sizeof(long long); 158 else if (size > sizeof(int)) 159 align = sizeof(long); 160 else if (size > sizeof(short)) 161 align = sizeof(int); 162 else if (size > sizeof(char)) 163 align = sizeof(short); 164 else 165 return (char *)ptr; 166 167 r = size % align; 168 169 if (r == 0) 170 return (char *)ptr; 171 172 *p += align - r; 173 174 return (void *)(((unsigned long)ptr) + align - r); 175} 176 177/** 178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure 179 * @mc_num: Memory controller number 180 * @n_layers: Number of MC hierarchy layers 181 * layers: Describes each layer as seen by the Memory Controller 182 * @size_pvt: size of private storage needed 183 * 184 * 185 * Everything is kmalloc'ed as one big chunk - more efficient. 186 * Only can be used if all structures have the same lifetime - otherwise 187 * you have to allocate and initialize your own structures. 188 * 189 * Use edac_mc_free() to free mc structures allocated by this function. 190 * 191 * NOTE: drivers handle multi-rank memories in different ways: in some 192 * drivers, one multi-rank memory stick is mapped as one entry, while, in 193 * others, a single multi-rank memory stick would be mapped into several 194 * entries. Currently, this function will allocate multiple struct dimm_info 195 * on such scenarios, as grouping the multiple ranks require drivers change. 196 * 197 * Returns: 198 * NULL allocation failed 199 * struct mem_ctl_info pointer 200 */ 201struct mem_ctl_info *new_edac_mc_alloc(unsigned mc_num, 202 unsigned n_layers, 203 struct edac_mc_layer *layers, 204 unsigned sz_pvt) 205{ 206 struct mem_ctl_info *mci; 207 struct edac_mc_layer *layer; 208 struct csrow_info *csi, *csr; 209 struct rank_info *chi, *chp, *chan; 210 struct dimm_info *dimm; 211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS]; 212 unsigned pos[EDAC_MAX_LAYERS]; 213 void *pvt, *ptr = NULL; 214 unsigned size, tot_dimms = 1, count = 1; 215 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0; 216 int i, j, err, row, chn; 217 bool per_rank = false; 218 219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0); 220 /* 221 * Calculate the total amount of dimms and csrows/cschannels while 222 * in the old API emulation mode 223 */ 224 for (i = 0; i < n_layers; i++) { 225 tot_dimms *= layers[i].size; 226 if (layers[i].is_virt_csrow) 227 tot_csrows *= layers[i].size; 228 else 229 tot_channels *= layers[i].size; 230 231 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT) 232 per_rank = true; 233 } 234 235 /* Figure out the offsets of the various items from the start of an mc 236 * structure. We want the alignment of each item to be at least as 237 * stringent as what the compiler would provide if we could simply 238 * hardcode everything into a single struct. 239 */ 240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1); 241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers); 242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows); 243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels); 244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms); 245 for (i = 0; i < n_layers; i++) { 246 count *= layers[i].size; 247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count); 248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count); 250 tot_errcount += 2 * count; 251 } 252 253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount); 254 pvt = edac_align_ptr(&ptr, sz_pvt, 1); 255 size = ((unsigned long)pvt) + sz_pvt; 256 257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n", 258 __func__, size, 259 tot_dimms, 260 per_rank ? "ranks" : "dimms", 261 tot_csrows * tot_channels); 262 mci = kzalloc(size, GFP_KERNEL); 263 if (mci == NULL) 264 return NULL; 265 266 /* Adjust pointers so they point within the memory we just allocated 267 * rather than an imaginary chunk of memory located at address 0. 268 */ 269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer)); 270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi)); 272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm)); 273 for (i = 0; i < n_layers; i++) { 274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i])); 275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i])); 276 } 277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 278 279 /* setup index and various internal pointers */ 280 mci->mc_idx = mc_num; 281 mci->csrows = csi; 282 mci->dimms = dimm; 283 mci->tot_dimms = tot_dimms; 284 mci->pvt_info = pvt; 285 mci->n_layers = n_layers; 286 mci->layers = layer; 287 memcpy(mci->layers, layers, sizeof(*layer) * n_layers); 288 mci->nr_csrows = tot_csrows; 289 mci->num_cschannel = tot_channels; 290 mci->mem_is_per_rank = per_rank; 291 292 /* 293 * Fill the csrow struct 294 */ 295 for (row = 0; row < tot_csrows; row++) { 296 csr = &csi[row]; 297 csr->csrow_idx = row; 298 csr->mci = mci; 299 csr->nr_channels = tot_channels; 300 chp = &chi[row * tot_channels]; 301 csr->channels = chp; 302 303 for (chn = 0; chn < tot_channels; chn++) { 304 chan = &chp[chn]; 305 chan->chan_idx = chn; 306 chan->csrow = csr; 307 } 308 } 309 310 /* 311 * Fill the dimm struct 312 */ 313 memset(&pos, 0, sizeof(pos)); 314 row = 0; 315 chn = 0; 316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms, 317 per_rank ? "ranks" : "dimms"); 318 for (i = 0; i < tot_dimms; i++) { 319 chan = &csi[row].channels[chn]; 320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers, 321 pos[0], pos[1], pos[2]); 322 dimm->mci = mci; 323 324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__, 325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms), 326 pos[0], pos[1], pos[2], row, chn); 327 328 /* Copy DIMM location */ 329 for (j = 0; j < n_layers; j++) 330 dimm->location[j] = pos[j]; 331 332 /* Link it to the csrows old API data */ 333 chan->dimm = dimm; 334 dimm->csrow = row; 335 dimm->cschannel = chn; 336 337 /* Increment csrow location */ 338 row++; 339 if (row == tot_csrows) { 340 row = 0; 341 chn++; 342 } 343 344 /* Increment dimm location */ 345 for (j = n_layers - 1; j >= 0; j--) { 346 pos[j]++; 347 if (pos[j] < layers[j].size) 348 break; 349 pos[j] = 0; 350 } 351 } 352 353 mci->op_state = OP_ALLOC; 354 INIT_LIST_HEAD(&mci->grp_kobj_list); 355 356 /* 357 * Initialize the 'root' kobj for the edac_mc controller 358 */ 359 err = edac_mc_register_sysfs_main_kobj(mci); 360 if (err) { 361 kfree(mci); 362 return NULL; 363 } 364 365 /* at this point, the root kobj is valid, and in order to 366 * 'free' the object, then the function: 367 * edac_mc_unregister_sysfs_main_kobj() must be called 368 * which will perform kobj unregistration and the actual free 369 * will occur during the kobject callback operation 370 */ 371 return mci; 372} 373EXPORT_SYMBOL_GPL(new_edac_mc_alloc); 374 375/** 376 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure 377 * @mc_num: Memory controller number 378 * @n_layers: Number of layers at the MC hierarchy 379 * layers: Describes each layer as seen by the Memory Controller 380 * @size_pvt: Size of private storage needed 381 * 382 * 383 * FIXME: drivers handle multi-rank memories in different ways: some 384 * drivers map multi-ranked DIMMs as one DIMM while others 385 * as several DIMMs. 386 * 387 * Everything is kmalloc'ed as one big chunk - more efficient. 388 * It can only be used if all structures have the same lifetime - otherwise 389 * you have to allocate and initialize your own structures. 390 * 391 * Use edac_mc_free() to free mc structures allocated by this function. 392 * 393 * Returns: 394 * On failure: NULL 395 * On success: struct mem_ctl_info pointer 396 */ 397 398struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 399 unsigned nr_chans, int mc_num) 400{ 401 unsigned n_layers = 2; 402 struct edac_mc_layer layers[n_layers]; 403 404 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 405 layers[0].size = nr_csrows; 406 layers[0].is_virt_csrow = true; 407 layers[1].type = EDAC_MC_LAYER_CHANNEL; 408 layers[1].size = nr_chans; 409 layers[1].is_virt_csrow = false; 410 411 return new_edac_mc_alloc(mc_num, ARRAY_SIZE(layers), layers, sz_pvt); 412} 413EXPORT_SYMBOL_GPL(edac_mc_alloc); 414 415/** 416 * edac_mc_free 417 * 'Free' a previously allocated 'mci' structure 418 * @mci: pointer to a struct mem_ctl_info structure 419 */ 420void edac_mc_free(struct mem_ctl_info *mci) 421{ 422 debugf1("%s()\n", __func__); 423 424 edac_mc_unregister_sysfs_main_kobj(mci); 425 426 /* free the mci instance memory here */ 427 kfree(mci); 428} 429EXPORT_SYMBOL_GPL(edac_mc_free); 430 431 432/** 433 * find_mci_by_dev 434 * 435 * scan list of controllers looking for the one that manages 436 * the 'dev' device 437 * @dev: pointer to a struct device related with the MCI 438 */ 439struct mem_ctl_info *find_mci_by_dev(struct device *dev) 440{ 441 struct mem_ctl_info *mci; 442 struct list_head *item; 443 444 debugf3("%s()\n", __func__); 445 446 list_for_each(item, &mc_devices) { 447 mci = list_entry(item, struct mem_ctl_info, link); 448 449 if (mci->dev == dev) 450 return mci; 451 } 452 453 return NULL; 454} 455EXPORT_SYMBOL_GPL(find_mci_by_dev); 456 457/* 458 * handler for EDAC to check if NMI type handler has asserted interrupt 459 */ 460static int edac_mc_assert_error_check_and_clear(void) 461{ 462 int old_state; 463 464 if (edac_op_state == EDAC_OPSTATE_POLL) 465 return 1; 466 467 old_state = edac_err_assert; 468 edac_err_assert = 0; 469 470 return old_state; 471} 472 473/* 474 * edac_mc_workq_function 475 * performs the operation scheduled by a workq request 476 */ 477static void edac_mc_workq_function(struct work_struct *work_req) 478{ 479 struct delayed_work *d_work = to_delayed_work(work_req); 480 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 481 482 mutex_lock(&mem_ctls_mutex); 483 484 /* if this control struct has movd to offline state, we are done */ 485 if (mci->op_state == OP_OFFLINE) { 486 mutex_unlock(&mem_ctls_mutex); 487 return; 488 } 489 490 /* Only poll controllers that are running polled and have a check */ 491 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 492 mci->edac_check(mci); 493 494 mutex_unlock(&mem_ctls_mutex); 495 496 /* Reschedule */ 497 queue_delayed_work(edac_workqueue, &mci->work, 498 msecs_to_jiffies(edac_mc_get_poll_msec())); 499} 500 501/* 502 * edac_mc_workq_setup 503 * initialize a workq item for this mci 504 * passing in the new delay period in msec 505 * 506 * locking model: 507 * 508 * called with the mem_ctls_mutex held 509 */ 510static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 511{ 512 debugf0("%s()\n", __func__); 513 514 /* if this instance is not in the POLL state, then simply return */ 515 if (mci->op_state != OP_RUNNING_POLL) 516 return; 517 518 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 519 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 520} 521 522/* 523 * edac_mc_workq_teardown 524 * stop the workq processing on this mci 525 * 526 * locking model: 527 * 528 * called WITHOUT lock held 529 */ 530static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 531{ 532 int status; 533 534 if (mci->op_state != OP_RUNNING_POLL) 535 return; 536 537 status = cancel_delayed_work(&mci->work); 538 if (status == 0) { 539 debugf0("%s() not canceled, flush the queue\n", 540 __func__); 541 542 /* workq instance might be running, wait for it */ 543 flush_workqueue(edac_workqueue); 544 } 545} 546 547/* 548 * edac_mc_reset_delay_period(unsigned long value) 549 * 550 * user space has updated our poll period value, need to 551 * reset our workq delays 552 */ 553void edac_mc_reset_delay_period(int value) 554{ 555 struct mem_ctl_info *mci; 556 struct list_head *item; 557 558 mutex_lock(&mem_ctls_mutex); 559 560 /* scan the list and turn off all workq timers, doing so under lock 561 */ 562 list_for_each(item, &mc_devices) { 563 mci = list_entry(item, struct mem_ctl_info, link); 564 565 if (mci->op_state == OP_RUNNING_POLL) 566 cancel_delayed_work(&mci->work); 567 } 568 569 mutex_unlock(&mem_ctls_mutex); 570 571 572 /* re-walk the list, and reset the poll delay */ 573 mutex_lock(&mem_ctls_mutex); 574 575 list_for_each(item, &mc_devices) { 576 mci = list_entry(item, struct mem_ctl_info, link); 577 578 edac_mc_workq_setup(mci, (unsigned long) value); 579 } 580 581 mutex_unlock(&mem_ctls_mutex); 582} 583 584 585 586/* Return 0 on success, 1 on failure. 587 * Before calling this function, caller must 588 * assign a unique value to mci->mc_idx. 589 * 590 * locking model: 591 * 592 * called with the mem_ctls_mutex lock held 593 */ 594static int add_mc_to_global_list(struct mem_ctl_info *mci) 595{ 596 struct list_head *item, *insert_before; 597 struct mem_ctl_info *p; 598 599 insert_before = &mc_devices; 600 601 p = find_mci_by_dev(mci->dev); 602 if (unlikely(p != NULL)) 603 goto fail0; 604 605 list_for_each(item, &mc_devices) { 606 p = list_entry(item, struct mem_ctl_info, link); 607 608 if (p->mc_idx >= mci->mc_idx) { 609 if (unlikely(p->mc_idx == mci->mc_idx)) 610 goto fail1; 611 612 insert_before = item; 613 break; 614 } 615 } 616 617 list_add_tail_rcu(&mci->link, insert_before); 618 atomic_inc(&edac_handlers); 619 return 0; 620 621fail0: 622 edac_printk(KERN_WARNING, EDAC_MC, 623 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 624 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 625 return 1; 626 627fail1: 628 edac_printk(KERN_WARNING, EDAC_MC, 629 "bug in low-level driver: attempt to assign\n" 630 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 631 return 1; 632} 633 634static void del_mc_from_global_list(struct mem_ctl_info *mci) 635{ 636 atomic_dec(&edac_handlers); 637 list_del_rcu(&mci->link); 638 639 /* these are for safe removal of devices from global list while 640 * NMI handlers may be traversing list 641 */ 642 synchronize_rcu(); 643 INIT_LIST_HEAD(&mci->link); 644} 645 646/** 647 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 648 * 649 * If found, return a pointer to the structure. 650 * Else return NULL. 651 * 652 * Caller must hold mem_ctls_mutex. 653 */ 654struct mem_ctl_info *edac_mc_find(int idx) 655{ 656 struct list_head *item; 657 struct mem_ctl_info *mci; 658 659 list_for_each(item, &mc_devices) { 660 mci = list_entry(item, struct mem_ctl_info, link); 661 662 if (mci->mc_idx >= idx) { 663 if (mci->mc_idx == idx) 664 return mci; 665 666 break; 667 } 668 } 669 670 return NULL; 671} 672EXPORT_SYMBOL(edac_mc_find); 673 674/** 675 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 676 * create sysfs entries associated with mci structure 677 * @mci: pointer to the mci structure to be added to the list 678 * 679 * Return: 680 * 0 Success 681 * !0 Failure 682 */ 683 684/* FIXME - should a warning be printed if no error detection? correction? */ 685int edac_mc_add_mc(struct mem_ctl_info *mci) 686{ 687 debugf0("%s()\n", __func__); 688 689#ifdef CONFIG_EDAC_DEBUG 690 if (edac_debug_level >= 3) 691 edac_mc_dump_mci(mci); 692 693 if (edac_debug_level >= 4) { 694 int i; 695 696 for (i = 0; i < mci->nr_csrows; i++) { 697 int j; 698 699 edac_mc_dump_csrow(&mci->csrows[i]); 700 for (j = 0; j < mci->csrows[i].nr_channels; j++) 701 edac_mc_dump_channel(&mci->csrows[i]. 702 channels[j]); 703 } 704 for (i = 0; i < mci->tot_dimms; i++) 705 edac_mc_dump_dimm(&mci->dimms[i]); 706 } 707#endif 708 mutex_lock(&mem_ctls_mutex); 709 710 if (add_mc_to_global_list(mci)) 711 goto fail0; 712 713 /* set load time so that error rate can be tracked */ 714 mci->start_time = jiffies; 715 716 if (edac_create_sysfs_mci_device(mci)) { 717 edac_mc_printk(mci, KERN_WARNING, 718 "failed to create sysfs device\n"); 719 goto fail1; 720 } 721 722 /* If there IS a check routine, then we are running POLLED */ 723 if (mci->edac_check != NULL) { 724 /* This instance is NOW RUNNING */ 725 mci->op_state = OP_RUNNING_POLL; 726 727 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 728 } else { 729 mci->op_state = OP_RUNNING_INTERRUPT; 730 } 731 732 /* Report action taken */ 733 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 734 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 735 736 mutex_unlock(&mem_ctls_mutex); 737 return 0; 738 739fail1: 740 del_mc_from_global_list(mci); 741 742fail0: 743 mutex_unlock(&mem_ctls_mutex); 744 return 1; 745} 746EXPORT_SYMBOL_GPL(edac_mc_add_mc); 747 748/** 749 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 750 * remove mci structure from global list 751 * @pdev: Pointer to 'struct device' representing mci structure to remove. 752 * 753 * Return pointer to removed mci structure, or NULL if device not found. 754 */ 755struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 756{ 757 struct mem_ctl_info *mci; 758 759 debugf0("%s()\n", __func__); 760 761 mutex_lock(&mem_ctls_mutex); 762 763 /* find the requested mci struct in the global list */ 764 mci = find_mci_by_dev(dev); 765 if (mci == NULL) { 766 mutex_unlock(&mem_ctls_mutex); 767 return NULL; 768 } 769 770 del_mc_from_global_list(mci); 771 mutex_unlock(&mem_ctls_mutex); 772 773 /* flush workq processes */ 774 edac_mc_workq_teardown(mci); 775 776 /* marking MCI offline */ 777 mci->op_state = OP_OFFLINE; 778 779 /* remove from sysfs */ 780 edac_remove_sysfs_mci_device(mci); 781 782 edac_printk(KERN_INFO, EDAC_MC, 783 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 784 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 785 786 return mci; 787} 788EXPORT_SYMBOL_GPL(edac_mc_del_mc); 789 790static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 791 u32 size) 792{ 793 struct page *pg; 794 void *virt_addr; 795 unsigned long flags = 0; 796 797 debugf3("%s()\n", __func__); 798 799 /* ECC error page was not in our memory. Ignore it. */ 800 if (!pfn_valid(page)) 801 return; 802 803 /* Find the actual page structure then map it and fix */ 804 pg = pfn_to_page(page); 805 806 if (PageHighMem(pg)) 807 local_irq_save(flags); 808 809 virt_addr = kmap_atomic(pg); 810 811 /* Perform architecture specific atomic scrub operation */ 812 atomic_scrub(virt_addr + offset, size); 813 814 /* Unmap and complete */ 815 kunmap_atomic(virt_addr); 816 817 if (PageHighMem(pg)) 818 local_irq_restore(flags); 819} 820 821/* FIXME - should return -1 */ 822int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 823{ 824 struct csrow_info *csrows = mci->csrows; 825 int row, i, j, n; 826 827 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 828 row = -1; 829 830 for (i = 0; i < mci->nr_csrows; i++) { 831 struct csrow_info *csrow = &csrows[i]; 832 n = 0; 833 for (j = 0; j < csrow->nr_channels; j++) { 834 struct dimm_info *dimm = csrow->channels[j].dimm; 835 n += dimm->nr_pages; 836 } 837 if (n == 0) 838 continue; 839 840 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 841 "mask(0x%lx)\n", mci->mc_idx, __func__, 842 csrow->first_page, page, csrow->last_page, 843 csrow->page_mask); 844 845 if ((page >= csrow->first_page) && 846 (page <= csrow->last_page) && 847 ((page & csrow->page_mask) == 848 (csrow->first_page & csrow->page_mask))) { 849 row = i; 850 break; 851 } 852 } 853 854 if (row == -1) 855 edac_mc_printk(mci, KERN_ERR, 856 "could not look up page error address %lx\n", 857 (unsigned long)page); 858 859 return row; 860} 861EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 862 863const char *edac_layer_name[] = { 864 [EDAC_MC_LAYER_BRANCH] = "branch", 865 [EDAC_MC_LAYER_CHANNEL] = "channel", 866 [EDAC_MC_LAYER_SLOT] = "slot", 867 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", 868}; 869EXPORT_SYMBOL_GPL(edac_layer_name); 870 871static void edac_inc_ce_error(struct mem_ctl_info *mci, 872 bool enable_per_layer_report, 873 const int pos[EDAC_MAX_LAYERS]) 874{ 875 int i, index = 0; 876 877 mci->ce_count++; 878 879 if (!enable_per_layer_report) { 880 mci->ce_noinfo_count++; 881 return; 882 } 883 884 for (i = 0; i < mci->n_layers; i++) { 885 if (pos[i] < 0) 886 break; 887 index += pos[i]; 888 mci->ce_per_layer[i][index]++; 889 890 if (i < mci->n_layers - 1) 891 index *= mci->layers[i + 1].size; 892 } 893} 894 895static void edac_inc_ue_error(struct mem_ctl_info *mci, 896 bool enable_per_layer_report, 897 const int pos[EDAC_MAX_LAYERS]) 898{ 899 int i, index = 0; 900 901 mci->ue_count++; 902 903 if (!enable_per_layer_report) { 904 mci->ce_noinfo_count++; 905 return; 906 } 907 908 for (i = 0; i < mci->n_layers; i++) { 909 if (pos[i] < 0) 910 break; 911 index += pos[i]; 912 mci->ue_per_layer[i][index]++; 913 914 if (i < mci->n_layers - 1) 915 index *= mci->layers[i + 1].size; 916 } 917} 918 919static void edac_ce_error(struct mem_ctl_info *mci, 920 const int pos[EDAC_MAX_LAYERS], 921 const char *msg, 922 const char *location, 923 const char *label, 924 const char *detail, 925 const char *other_detail, 926 const bool enable_per_layer_report, 927 const unsigned long page_frame_number, 928 const unsigned long offset_in_page, 929 u32 grain) 930{ 931 unsigned long remapped_page; 932 933 if (edac_mc_get_log_ce()) { 934 if (other_detail && *other_detail) 935 edac_mc_printk(mci, KERN_WARNING, 936 "CE %s on %s (%s%s - %s)\n", 937 msg, label, location, 938 detail, other_detail); 939 else 940 edac_mc_printk(mci, KERN_WARNING, 941 "CE %s on %s (%s%s)\n", 942 msg, label, location, 943 detail); 944 } 945 edac_inc_ce_error(mci, enable_per_layer_report, pos); 946 947 if (mci->scrub_mode & SCRUB_SW_SRC) { 948 /* 949 * Some memory controllers (called MCs below) can remap 950 * memory so that it is still available at a different 951 * address when PCI devices map into memory. 952 * MC's that can't do this, lose the memory where PCI 953 * devices are mapped. This mapping is MC-dependent 954 * and so we call back into the MC driver for it to 955 * map the MC page to a physical (CPU) page which can 956 * then be mapped to a virtual page - which can then 957 * be scrubbed. 958 */ 959 remapped_page = mci->ctl_page_to_phys ? 960 mci->ctl_page_to_phys(mci, page_frame_number) : 961 page_frame_number; 962 963 edac_mc_scrub_block(remapped_page, 964 offset_in_page, grain); 965 } 966} 967 968static void edac_ue_error(struct mem_ctl_info *mci, 969 const int pos[EDAC_MAX_LAYERS], 970 const char *msg, 971 const char *location, 972 const char *label, 973 const char *detail, 974 const char *other_detail, 975 const bool enable_per_layer_report) 976{ 977 if (edac_mc_get_log_ue()) { 978 if (other_detail && *other_detail) 979 edac_mc_printk(mci, KERN_WARNING, 980 "UE %s on %s (%s%s - %s)\n", 981 msg, label, location, detail, 982 other_detail); 983 else 984 edac_mc_printk(mci, KERN_WARNING, 985 "UE %s on %s (%s%s)\n", 986 msg, label, location, detail); 987 } 988 989 if (edac_mc_get_panic_on_ue()) { 990 if (other_detail && *other_detail) 991 panic("UE %s on %s (%s%s - %s)\n", 992 msg, label, location, detail, other_detail); 993 else 994 panic("UE %s on %s (%s%s)\n", 995 msg, label, location, detail); 996 } 997 998 edac_inc_ue_error(mci, enable_per_layer_report, pos); 999} 1000 1001#define OTHER_LABEL " or " 1002void edac_mc_handle_error(const enum hw_event_mc_err_type type, 1003 struct mem_ctl_info *mci, 1004 const unsigned long page_frame_number, 1005 const unsigned long offset_in_page, 1006 const unsigned long syndrome, 1007 const int layer0, 1008 const int layer1, 1009 const int layer2, 1010 const char *msg, 1011 const char *other_detail, 1012 const void *mcelog) 1013{ 1014 /* FIXME: too much for stack: move it to some pre-alocated area */ 1015 char detail[80], location[80]; 1016 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; 1017 char *p; 1018 int row = -1, chan = -1; 1019 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 }; 1020 int i; 1021 u32 grain; 1022 bool enable_per_layer_report = false; 1023 1024 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 1025 1026 /* 1027 * Check if the event report is consistent and if the memory 1028 * location is known. If it is known, enable_per_layer_report will be 1029 * true, the DIMM(s) label info will be filled and the per-layer 1030 * error counters will be incremented. 1031 */ 1032 for (i = 0; i < mci->n_layers; i++) { 1033 if (pos[i] >= (int)mci->layers[i].size) { 1034 if (type == HW_EVENT_ERR_CORRECTED) 1035 p = "CE"; 1036 else 1037 p = "UE"; 1038 1039 edac_mc_printk(mci, KERN_ERR, 1040 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n", 1041 edac_layer_name[mci->layers[i].type], 1042 pos[i], mci->layers[i].size); 1043 /* 1044 * Instead of just returning it, let's use what's 1045 * known about the error. The increment routines and 1046 * the DIMM filter logic will do the right thing by 1047 * pointing the likely damaged DIMMs. 1048 */ 1049 pos[i] = -1; 1050 } 1051 if (pos[i] >= 0) 1052 enable_per_layer_report = true; 1053 } 1054 1055 /* 1056 * Get the dimm label/grain that applies to the match criteria. 1057 * As the error algorithm may not be able to point to just one memory 1058 * stick, the logic here will get all possible labels that could 1059 * pottentially be affected by the error. 1060 * On FB-DIMM memory controllers, for uncorrected errors, it is common 1061 * to have only the MC channel and the MC dimm (also called "branch") 1062 * but the channel is not known, as the memory is arranged in pairs, 1063 * where each memory belongs to a separate channel within the same 1064 * branch. 1065 */ 1066 grain = 0; 1067 p = label; 1068 *p = '\0'; 1069 for (i = 0; i < mci->tot_dimms; i++) { 1070 struct dimm_info *dimm = &mci->dimms[i]; 1071 1072 if (layer0 >= 0 && layer0 != dimm->location[0]) 1073 continue; 1074 if (layer1 >= 0 && layer1 != dimm->location[1]) 1075 continue; 1076 if (layer2 >= 0 && layer2 != dimm->location[2]) 1077 continue; 1078 1079 /* get the max grain, over the error match range */ 1080 if (dimm->grain > grain) 1081 grain = dimm->grain; 1082 1083 /* 1084 * If the error is memory-controller wide, there's no need to 1085 * seek for the affected DIMMs because the whole 1086 * channel/memory controller/... may be affected. 1087 * Also, don't show errors for empty DIMM slots. 1088 */ 1089 if (enable_per_layer_report && dimm->nr_pages) { 1090 if (p != label) { 1091 strcpy(p, OTHER_LABEL); 1092 p += strlen(OTHER_LABEL); 1093 } 1094 strcpy(p, dimm->label); 1095 p += strlen(p); 1096 *p = '\0'; 1097 1098 /* 1099 * get csrow/channel of the DIMM, in order to allow 1100 * incrementing the compat API counters 1101 */ 1102 debugf4("%s: %s csrows map: (%d,%d)\n", 1103 __func__, 1104 mci->mem_is_per_rank ? "rank" : "dimm", 1105 dimm->csrow, dimm->cschannel); 1106 1107 if (row == -1) 1108 row = dimm->csrow; 1109 else if (row >= 0 && row != dimm->csrow) 1110 row = -2; 1111 1112 if (chan == -1) 1113 chan = dimm->cschannel; 1114 else if (chan >= 0 && chan != dimm->cschannel) 1115 chan = -2; 1116 } 1117 } 1118 1119 if (!enable_per_layer_report) { 1120 strcpy(label, "any memory"); 1121 } else { 1122 debugf4("%s: csrow/channel to increment: (%d,%d)\n", 1123 __func__, row, chan); 1124 if (p == label) 1125 strcpy(label, "unknown memory"); 1126 if (type == HW_EVENT_ERR_CORRECTED) { 1127 if (row >= 0) { 1128 mci->csrows[row].ce_count++; 1129 if (chan >= 0) 1130 mci->csrows[row].channels[chan].ce_count++; 1131 } 1132 } else 1133 if (row >= 0) 1134 mci->csrows[row].ue_count++; 1135 } 1136 1137 /* Fill the RAM location data */ 1138 p = location; 1139 for (i = 0; i < mci->n_layers; i++) { 1140 if (pos[i] < 0) 1141 continue; 1142 1143 p += sprintf(p, "%s:%d ", 1144 edac_layer_name[mci->layers[i].type], 1145 pos[i]); 1146 } 1147 1148 /* Memory type dependent details about the error */ 1149 if (type == HW_EVENT_ERR_CORRECTED) { 1150 snprintf(detail, sizeof(detail), 1151 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx", 1152 page_frame_number, offset_in_page, 1153 grain, syndrome); 1154 edac_ce_error(mci, pos, msg, location, label, detail, 1155 other_detail, enable_per_layer_report, 1156 page_frame_number, offset_in_page, grain); 1157 } else { 1158 snprintf(detail, sizeof(detail), 1159 "page:0x%lx offset:0x%lx grain:%d", 1160 page_frame_number, offset_in_page, grain); 1161 1162 edac_ue_error(mci, pos, msg, location, label, detail, 1163 other_detail, enable_per_layer_report); 1164 } 1165} 1166EXPORT_SYMBOL_GPL(edac_mc_handle_error); 1167