edac_mc.c revision 084a4fccef39ac7abb039511f32380f28d0b67e6
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/ctype.h> 29#include <linux/edac.h> 30#include <asm/uaccess.h> 31#include <asm/page.h> 32#include <asm/edac.h> 33#include "edac_core.h" 34#include "edac_module.h" 35 36/* lock to memory controller's control array */ 37static DEFINE_MUTEX(mem_ctls_mutex); 38static LIST_HEAD(mc_devices); 39 40#ifdef CONFIG_EDAC_DEBUG 41 42static void edac_mc_dump_channel(struct rank_info *chan) 43{ 44 debugf4("\tchannel = %p\n", chan); 45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 46 debugf4("\tchannel->ce_count = %d\n", chan->dimm->ce_count); 47 debugf4("\tchannel->label = '%s'\n", chan->dimm->label); 48 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 49} 50 51static void edac_mc_dump_csrow(struct csrow_info *csrow) 52{ 53 debugf4("\tcsrow = %p\n", csrow); 54 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 55 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 56 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 57 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 58 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); 59 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 60 debugf4("\tcsrow->channels = %p\n", csrow->channels); 61 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 62} 63 64static void edac_mc_dump_mci(struct mem_ctl_info *mci) 65{ 66 debugf3("\tmci = %p\n", mci); 67 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 68 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 69 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 70 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 71 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 72 mci->nr_csrows, mci->csrows); 73 debugf3("\tdev = %p\n", mci->dev); 74 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 75 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 76} 77 78#endif /* CONFIG_EDAC_DEBUG */ 79 80/* 81 * keep those in sync with the enum mem_type 82 */ 83const char *edac_mem_types[] = { 84 "Empty csrow", 85 "Reserved csrow type", 86 "Unknown csrow type", 87 "Fast page mode RAM", 88 "Extended data out RAM", 89 "Burst Extended data out RAM", 90 "Single data rate SDRAM", 91 "Registered single data rate SDRAM", 92 "Double data rate SDRAM", 93 "Registered Double data rate SDRAM", 94 "Rambus DRAM", 95 "Unbuffered DDR2 RAM", 96 "Fully buffered DDR2", 97 "Registered DDR2 RAM", 98 "Rambus XDR", 99 "Unbuffered DDR3 RAM", 100 "Registered DDR3 RAM", 101}; 102EXPORT_SYMBOL_GPL(edac_mem_types); 103 104/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. 105 * Adjust 'ptr' so that its alignment is at least as stringent as what the 106 * compiler would provide for X and return the aligned result. 107 * 108 * If 'size' is a constant, the compiler will optimize this whole function 109 * down to either a no-op or the addition of a constant to the value of 'ptr'. 110 */ 111void *edac_align_ptr(void *ptr, unsigned size) 112{ 113 unsigned align, r; 114 115 /* Here we assume that the alignment of a "long long" is the most 116 * stringent alignment that the compiler will ever provide by default. 117 * As far as I know, this is a reasonable assumption. 118 */ 119 if (size > sizeof(long)) 120 align = sizeof(long long); 121 else if (size > sizeof(int)) 122 align = sizeof(long); 123 else if (size > sizeof(short)) 124 align = sizeof(int); 125 else if (size > sizeof(char)) 126 align = sizeof(short); 127 else 128 return (char *)ptr; 129 130 r = size % align; 131 132 if (r == 0) 133 return (char *)ptr; 134 135 return (void *)(((unsigned long)ptr) + align - r); 136} 137 138/** 139 * edac_mc_alloc: Allocate a struct mem_ctl_info structure 140 * @size_pvt: size of private storage needed 141 * @nr_csrows: Number of CWROWS needed for this MC 142 * @nr_chans: Number of channels for the MC 143 * 144 * Everything is kmalloc'ed as one big chunk - more efficient. 145 * Only can be used if all structures have the same lifetime - otherwise 146 * you have to allocate and initialize your own structures. 147 * 148 * Use edac_mc_free() to free mc structures allocated by this function. 149 * 150 * Returns: 151 * NULL allocation failed 152 * struct mem_ctl_info pointer 153 */ 154struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 155 unsigned nr_chans, int edac_index) 156{ 157 struct mem_ctl_info *mci; 158 struct csrow_info *csi, *csrow; 159 struct rank_info *chi, *chp, *chan; 160 struct dimm_info *dimm; 161 void *pvt; 162 unsigned size; 163 int row, chn; 164 int err; 165 166 /* Figure out the offsets of the various items from the start of an mc 167 * structure. We want the alignment of each item to be at least as 168 * stringent as what the compiler would provide if we could simply 169 * hardcode everything into a single struct. 170 */ 171 mci = (struct mem_ctl_info *)0; 172 csi = edac_align_ptr(&mci[1], sizeof(*csi)); 173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); 174 dimm = edac_align_ptr(&chi[nr_chans * nr_csrows], sizeof(*dimm)); 175 pvt = edac_align_ptr(&dimm[nr_chans * nr_csrows], sz_pvt); 176 size = ((unsigned long)pvt) + sz_pvt; 177 178 mci = kzalloc(size, GFP_KERNEL); 179 if (mci == NULL) 180 return NULL; 181 182 /* Adjust pointers so they point within the memory we just allocated 183 * rather than an imaginary chunk of memory located at address 0. 184 */ 185 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 186 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi)); 187 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm)); 188 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 189 190 /* setup index and various internal pointers */ 191 mci->mc_idx = edac_index; 192 mci->csrows = csi; 193 mci->dimms = dimm; 194 mci->pvt_info = pvt; 195 mci->nr_csrows = nr_csrows; 196 197 /* 198 * For now, assumes that a per-csrow arrangement for dimms. 199 * This will be latter changed. 200 */ 201 dimm = mci->dimms; 202 203 for (row = 0; row < nr_csrows; row++) { 204 csrow = &csi[row]; 205 csrow->csrow_idx = row; 206 csrow->mci = mci; 207 csrow->nr_channels = nr_chans; 208 chp = &chi[row * nr_chans]; 209 csrow->channels = chp; 210 211 for (chn = 0; chn < nr_chans; chn++) { 212 chan = &chp[chn]; 213 chan->chan_idx = chn; 214 chan->csrow = csrow; 215 216 mci->csrows[row].channels[chn].dimm = dimm; 217 dimm->csrow = row; 218 dimm->csrow_channel = chn; 219 dimm++; 220 mci->nr_dimms++; 221 } 222 } 223 224 mci->op_state = OP_ALLOC; 225 INIT_LIST_HEAD(&mci->grp_kobj_list); 226 227 /* 228 * Initialize the 'root' kobj for the edac_mc controller 229 */ 230 err = edac_mc_register_sysfs_main_kobj(mci); 231 if (err) { 232 kfree(mci); 233 return NULL; 234 } 235 236 /* at this point, the root kobj is valid, and in order to 237 * 'free' the object, then the function: 238 * edac_mc_unregister_sysfs_main_kobj() must be called 239 * which will perform kobj unregistration and the actual free 240 * will occur during the kobject callback operation 241 */ 242 return mci; 243} 244EXPORT_SYMBOL_GPL(edac_mc_alloc); 245 246/** 247 * edac_mc_free 248 * 'Free' a previously allocated 'mci' structure 249 * @mci: pointer to a struct mem_ctl_info structure 250 */ 251void edac_mc_free(struct mem_ctl_info *mci) 252{ 253 debugf1("%s()\n", __func__); 254 255 edac_mc_unregister_sysfs_main_kobj(mci); 256 257 /* free the mci instance memory here */ 258 kfree(mci); 259} 260EXPORT_SYMBOL_GPL(edac_mc_free); 261 262 263/** 264 * find_mci_by_dev 265 * 266 * scan list of controllers looking for the one that manages 267 * the 'dev' device 268 * @dev: pointer to a struct device related with the MCI 269 */ 270struct mem_ctl_info *find_mci_by_dev(struct device *dev) 271{ 272 struct mem_ctl_info *mci; 273 struct list_head *item; 274 275 debugf3("%s()\n", __func__); 276 277 list_for_each(item, &mc_devices) { 278 mci = list_entry(item, struct mem_ctl_info, link); 279 280 if (mci->dev == dev) 281 return mci; 282 } 283 284 return NULL; 285} 286EXPORT_SYMBOL_GPL(find_mci_by_dev); 287 288/* 289 * handler for EDAC to check if NMI type handler has asserted interrupt 290 */ 291static int edac_mc_assert_error_check_and_clear(void) 292{ 293 int old_state; 294 295 if (edac_op_state == EDAC_OPSTATE_POLL) 296 return 1; 297 298 old_state = edac_err_assert; 299 edac_err_assert = 0; 300 301 return old_state; 302} 303 304/* 305 * edac_mc_workq_function 306 * performs the operation scheduled by a workq request 307 */ 308static void edac_mc_workq_function(struct work_struct *work_req) 309{ 310 struct delayed_work *d_work = to_delayed_work(work_req); 311 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 312 313 mutex_lock(&mem_ctls_mutex); 314 315 /* if this control struct has movd to offline state, we are done */ 316 if (mci->op_state == OP_OFFLINE) { 317 mutex_unlock(&mem_ctls_mutex); 318 return; 319 } 320 321 /* Only poll controllers that are running polled and have a check */ 322 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 323 mci->edac_check(mci); 324 325 mutex_unlock(&mem_ctls_mutex); 326 327 /* Reschedule */ 328 queue_delayed_work(edac_workqueue, &mci->work, 329 msecs_to_jiffies(edac_mc_get_poll_msec())); 330} 331 332/* 333 * edac_mc_workq_setup 334 * initialize a workq item for this mci 335 * passing in the new delay period in msec 336 * 337 * locking model: 338 * 339 * called with the mem_ctls_mutex held 340 */ 341static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 342{ 343 debugf0("%s()\n", __func__); 344 345 /* if this instance is not in the POLL state, then simply return */ 346 if (mci->op_state != OP_RUNNING_POLL) 347 return; 348 349 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 350 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 351} 352 353/* 354 * edac_mc_workq_teardown 355 * stop the workq processing on this mci 356 * 357 * locking model: 358 * 359 * called WITHOUT lock held 360 */ 361static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 362{ 363 int status; 364 365 if (mci->op_state != OP_RUNNING_POLL) 366 return; 367 368 status = cancel_delayed_work(&mci->work); 369 if (status == 0) { 370 debugf0("%s() not canceled, flush the queue\n", 371 __func__); 372 373 /* workq instance might be running, wait for it */ 374 flush_workqueue(edac_workqueue); 375 } 376} 377 378/* 379 * edac_mc_reset_delay_period(unsigned long value) 380 * 381 * user space has updated our poll period value, need to 382 * reset our workq delays 383 */ 384void edac_mc_reset_delay_period(int value) 385{ 386 struct mem_ctl_info *mci; 387 struct list_head *item; 388 389 mutex_lock(&mem_ctls_mutex); 390 391 /* scan the list and turn off all workq timers, doing so under lock 392 */ 393 list_for_each(item, &mc_devices) { 394 mci = list_entry(item, struct mem_ctl_info, link); 395 396 if (mci->op_state == OP_RUNNING_POLL) 397 cancel_delayed_work(&mci->work); 398 } 399 400 mutex_unlock(&mem_ctls_mutex); 401 402 403 /* re-walk the list, and reset the poll delay */ 404 mutex_lock(&mem_ctls_mutex); 405 406 list_for_each(item, &mc_devices) { 407 mci = list_entry(item, struct mem_ctl_info, link); 408 409 edac_mc_workq_setup(mci, (unsigned long) value); 410 } 411 412 mutex_unlock(&mem_ctls_mutex); 413} 414 415 416 417/* Return 0 on success, 1 on failure. 418 * Before calling this function, caller must 419 * assign a unique value to mci->mc_idx. 420 * 421 * locking model: 422 * 423 * called with the mem_ctls_mutex lock held 424 */ 425static int add_mc_to_global_list(struct mem_ctl_info *mci) 426{ 427 struct list_head *item, *insert_before; 428 struct mem_ctl_info *p; 429 430 insert_before = &mc_devices; 431 432 p = find_mci_by_dev(mci->dev); 433 if (unlikely(p != NULL)) 434 goto fail0; 435 436 list_for_each(item, &mc_devices) { 437 p = list_entry(item, struct mem_ctl_info, link); 438 439 if (p->mc_idx >= mci->mc_idx) { 440 if (unlikely(p->mc_idx == mci->mc_idx)) 441 goto fail1; 442 443 insert_before = item; 444 break; 445 } 446 } 447 448 list_add_tail_rcu(&mci->link, insert_before); 449 atomic_inc(&edac_handlers); 450 return 0; 451 452fail0: 453 edac_printk(KERN_WARNING, EDAC_MC, 454 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 455 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 456 return 1; 457 458fail1: 459 edac_printk(KERN_WARNING, EDAC_MC, 460 "bug in low-level driver: attempt to assign\n" 461 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 462 return 1; 463} 464 465static void del_mc_from_global_list(struct mem_ctl_info *mci) 466{ 467 atomic_dec(&edac_handlers); 468 list_del_rcu(&mci->link); 469 470 /* these are for safe removal of devices from global list while 471 * NMI handlers may be traversing list 472 */ 473 synchronize_rcu(); 474 INIT_LIST_HEAD(&mci->link); 475} 476 477/** 478 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 479 * 480 * If found, return a pointer to the structure. 481 * Else return NULL. 482 * 483 * Caller must hold mem_ctls_mutex. 484 */ 485struct mem_ctl_info *edac_mc_find(int idx) 486{ 487 struct list_head *item; 488 struct mem_ctl_info *mci; 489 490 list_for_each(item, &mc_devices) { 491 mci = list_entry(item, struct mem_ctl_info, link); 492 493 if (mci->mc_idx >= idx) { 494 if (mci->mc_idx == idx) 495 return mci; 496 497 break; 498 } 499 } 500 501 return NULL; 502} 503EXPORT_SYMBOL(edac_mc_find); 504 505/** 506 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 507 * create sysfs entries associated with mci structure 508 * @mci: pointer to the mci structure to be added to the list 509 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. 510 * 511 * Return: 512 * 0 Success 513 * !0 Failure 514 */ 515 516/* FIXME - should a warning be printed if no error detection? correction? */ 517int edac_mc_add_mc(struct mem_ctl_info *mci) 518{ 519 debugf0("%s()\n", __func__); 520 521#ifdef CONFIG_EDAC_DEBUG 522 if (edac_debug_level >= 3) 523 edac_mc_dump_mci(mci); 524 525 if (edac_debug_level >= 4) { 526 int i; 527 528 for (i = 0; i < mci->nr_csrows; i++) { 529 int j; 530 531 edac_mc_dump_csrow(&mci->csrows[i]); 532 for (j = 0; j < mci->csrows[i].nr_channels; j++) 533 edac_mc_dump_channel(&mci->csrows[i]. 534 channels[j]); 535 } 536 } 537#endif 538 mutex_lock(&mem_ctls_mutex); 539 540 if (add_mc_to_global_list(mci)) 541 goto fail0; 542 543 /* set load time so that error rate can be tracked */ 544 mci->start_time = jiffies; 545 546 if (edac_create_sysfs_mci_device(mci)) { 547 edac_mc_printk(mci, KERN_WARNING, 548 "failed to create sysfs device\n"); 549 goto fail1; 550 } 551 552 /* If there IS a check routine, then we are running POLLED */ 553 if (mci->edac_check != NULL) { 554 /* This instance is NOW RUNNING */ 555 mci->op_state = OP_RUNNING_POLL; 556 557 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 558 } else { 559 mci->op_state = OP_RUNNING_INTERRUPT; 560 } 561 562 /* Report action taken */ 563 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 564 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 565 566 mutex_unlock(&mem_ctls_mutex); 567 return 0; 568 569fail1: 570 del_mc_from_global_list(mci); 571 572fail0: 573 mutex_unlock(&mem_ctls_mutex); 574 return 1; 575} 576EXPORT_SYMBOL_GPL(edac_mc_add_mc); 577 578/** 579 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 580 * remove mci structure from global list 581 * @pdev: Pointer to 'struct device' representing mci structure to remove. 582 * 583 * Return pointer to removed mci structure, or NULL if device not found. 584 */ 585struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 586{ 587 struct mem_ctl_info *mci; 588 589 debugf0("%s()\n", __func__); 590 591 mutex_lock(&mem_ctls_mutex); 592 593 /* find the requested mci struct in the global list */ 594 mci = find_mci_by_dev(dev); 595 if (mci == NULL) { 596 mutex_unlock(&mem_ctls_mutex); 597 return NULL; 598 } 599 600 del_mc_from_global_list(mci); 601 mutex_unlock(&mem_ctls_mutex); 602 603 /* flush workq processes */ 604 edac_mc_workq_teardown(mci); 605 606 /* marking MCI offline */ 607 mci->op_state = OP_OFFLINE; 608 609 /* remove from sysfs */ 610 edac_remove_sysfs_mci_device(mci); 611 612 edac_printk(KERN_INFO, EDAC_MC, 613 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 614 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 615 616 return mci; 617} 618EXPORT_SYMBOL_GPL(edac_mc_del_mc); 619 620static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 621 u32 size) 622{ 623 struct page *pg; 624 void *virt_addr; 625 unsigned long flags = 0; 626 627 debugf3("%s()\n", __func__); 628 629 /* ECC error page was not in our memory. Ignore it. */ 630 if (!pfn_valid(page)) 631 return; 632 633 /* Find the actual page structure then map it and fix */ 634 pg = pfn_to_page(page); 635 636 if (PageHighMem(pg)) 637 local_irq_save(flags); 638 639 virt_addr = kmap_atomic(pg); 640 641 /* Perform architecture specific atomic scrub operation */ 642 atomic_scrub(virt_addr + offset, size); 643 644 /* Unmap and complete */ 645 kunmap_atomic(virt_addr); 646 647 if (PageHighMem(pg)) 648 local_irq_restore(flags); 649} 650 651/* FIXME - should return -1 */ 652int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 653{ 654 struct csrow_info *csrows = mci->csrows; 655 int row, i; 656 657 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 658 row = -1; 659 660 for (i = 0; i < mci->nr_csrows; i++) { 661 struct csrow_info *csrow = &csrows[i]; 662 663 if (csrow->nr_pages == 0) 664 continue; 665 666 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 667 "mask(0x%lx)\n", mci->mc_idx, __func__, 668 csrow->first_page, page, csrow->last_page, 669 csrow->page_mask); 670 671 if ((page >= csrow->first_page) && 672 (page <= csrow->last_page) && 673 ((page & csrow->page_mask) == 674 (csrow->first_page & csrow->page_mask))) { 675 row = i; 676 break; 677 } 678 } 679 680 if (row == -1) 681 edac_mc_printk(mci, KERN_ERR, 682 "could not look up page error address %lx\n", 683 (unsigned long)page); 684 685 return row; 686} 687EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 688 689/* FIXME - setable log (warning/emerg) levels */ 690/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ 691void edac_mc_handle_ce(struct mem_ctl_info *mci, 692 unsigned long page_frame_number, 693 unsigned long offset_in_page, unsigned long syndrome, 694 int row, int channel, const char *msg) 695{ 696 unsigned long remapped_page; 697 char *label = NULL; 698 u32 grain; 699 700 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 701 702 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 703 if (row >= mci->nr_csrows || row < 0) { 704 /* something is wrong */ 705 edac_mc_printk(mci, KERN_ERR, 706 "INTERNAL ERROR: row out of range " 707 "(%d >= %d)\n", row, mci->nr_csrows); 708 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 709 return; 710 } 711 712 if (channel >= mci->csrows[row].nr_channels || channel < 0) { 713 /* something is wrong */ 714 edac_mc_printk(mci, KERN_ERR, 715 "INTERNAL ERROR: channel out of range " 716 "(%d >= %d)\n", channel, 717 mci->csrows[row].nr_channels); 718 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 719 return; 720 } 721 722 label = mci->csrows[row].channels[channel].dimm->label; 723 grain = mci->csrows[row].channels[channel].dimm->grain; 724 725 if (edac_mc_get_log_ce()) 726 /* FIXME - put in DIMM location */ 727 edac_mc_printk(mci, KERN_WARNING, 728 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " 729 "0x%lx, row %d, channel %d, label \"%s\": %s\n", 730 page_frame_number, offset_in_page, 731 grain, syndrome, row, channel, 732 label, msg); 733 734 mci->ce_count++; 735 mci->csrows[row].ce_count++; 736 mci->csrows[row].channels[channel].dimm->ce_count++; 737 mci->csrows[row].channels[channel].ce_count++; 738 739 if (mci->scrub_mode & SCRUB_SW_SRC) { 740 /* 741 * Some MC's can remap memory so that it is still available 742 * at a different address when PCI devices map into memory. 743 * MC's that can't do this lose the memory where PCI devices 744 * are mapped. This mapping is MC dependent and so we call 745 * back into the MC driver for it to map the MC page to 746 * a physical (CPU) page which can then be mapped to a virtual 747 * page - which can then be scrubbed. 748 */ 749 remapped_page = mci->ctl_page_to_phys ? 750 mci->ctl_page_to_phys(mci, page_frame_number) : 751 page_frame_number; 752 753 edac_mc_scrub_block(remapped_page, offset_in_page, grain); 754 } 755} 756EXPORT_SYMBOL_GPL(edac_mc_handle_ce); 757 758void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) 759{ 760 if (edac_mc_get_log_ce()) 761 edac_mc_printk(mci, KERN_WARNING, 762 "CE - no information available: %s\n", msg); 763 764 mci->ce_noinfo_count++; 765 mci->ce_count++; 766} 767EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); 768 769void edac_mc_handle_ue(struct mem_ctl_info *mci, 770 unsigned long page_frame_number, 771 unsigned long offset_in_page, int row, const char *msg) 772{ 773 int len = EDAC_MC_LABEL_LEN * 4; 774 char labels[len + 1]; 775 char *pos = labels; 776 int chan; 777 int chars; 778 char *label = NULL; 779 u32 grain; 780 781 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 782 783 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 784 if (row >= mci->nr_csrows || row < 0) { 785 /* something is wrong */ 786 edac_mc_printk(mci, KERN_ERR, 787 "INTERNAL ERROR: row out of range " 788 "(%d >= %d)\n", row, mci->nr_csrows); 789 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 790 return; 791 } 792 793 grain = mci->csrows[row].channels[0].dimm->grain; 794 label = mci->csrows[row].channels[0].dimm->label; 795 chars = snprintf(pos, len + 1, "%s", label); 796 len -= chars; 797 pos += chars; 798 799 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); 800 chan++) { 801 label = mci->csrows[row].channels[chan].dimm->label; 802 chars = snprintf(pos, len + 1, ":%s", label); 803 len -= chars; 804 pos += chars; 805 } 806 807 if (edac_mc_get_log_ue()) 808 edac_mc_printk(mci, KERN_EMERG, 809 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " 810 "labels \"%s\": %s\n", page_frame_number, 811 offset_in_page, grain, row, labels, msg); 812 813 if (edac_mc_get_panic_on_ue()) 814 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " 815 "row %d, labels \"%s\": %s\n", mci->mc_idx, 816 page_frame_number, offset_in_page, 817 grain, row, labels, msg); 818 819 mci->ue_count++; 820 mci->csrows[row].ue_count++; 821} 822EXPORT_SYMBOL_GPL(edac_mc_handle_ue); 823 824void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) 825{ 826 if (edac_mc_get_panic_on_ue()) 827 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); 828 829 if (edac_mc_get_log_ue()) 830 edac_mc_printk(mci, KERN_WARNING, 831 "UE - no information available: %s\n", msg); 832 mci->ue_noinfo_count++; 833 mci->ue_count++; 834} 835EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 836 837/************************************************************* 838 * On Fully Buffered DIMM modules, this help function is 839 * called to process UE events 840 */ 841void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, 842 unsigned int csrow, 843 unsigned int channela, 844 unsigned int channelb, char *msg) 845{ 846 int len = EDAC_MC_LABEL_LEN * 4; 847 char labels[len + 1]; 848 char *pos = labels; 849 int chars; 850 char *label; 851 852 if (csrow >= mci->nr_csrows) { 853 /* something is wrong */ 854 edac_mc_printk(mci, KERN_ERR, 855 "INTERNAL ERROR: row out of range (%d >= %d)\n", 856 csrow, mci->nr_csrows); 857 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 858 return; 859 } 860 861 if (channela >= mci->csrows[csrow].nr_channels) { 862 /* something is wrong */ 863 edac_mc_printk(mci, KERN_ERR, 864 "INTERNAL ERROR: channel-a out of range " 865 "(%d >= %d)\n", 866 channela, mci->csrows[csrow].nr_channels); 867 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 868 return; 869 } 870 871 if (channelb >= mci->csrows[csrow].nr_channels) { 872 /* something is wrong */ 873 edac_mc_printk(mci, KERN_ERR, 874 "INTERNAL ERROR: channel-b out of range " 875 "(%d >= %d)\n", 876 channelb, mci->csrows[csrow].nr_channels); 877 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 878 return; 879 } 880 881 mci->ue_count++; 882 mci->csrows[csrow].ue_count++; 883 884 /* Generate the DIMM labels from the specified channels */ 885 label = mci->csrows[csrow].channels[channela].dimm->label; 886 chars = snprintf(pos, len + 1, "%s", label); 887 len -= chars; 888 pos += chars; 889 890 chars = snprintf(pos, len + 1, "-%s", 891 mci->csrows[csrow].channels[channelb].dimm->label); 892 893 if (edac_mc_get_log_ue()) 894 edac_mc_printk(mci, KERN_EMERG, 895 "UE row %d, channel-a= %d channel-b= %d " 896 "labels \"%s\": %s\n", csrow, channela, channelb, 897 labels, msg); 898 899 if (edac_mc_get_panic_on_ue()) 900 panic("UE row %d, channel-a= %d channel-b= %d " 901 "labels \"%s\": %s\n", csrow, channela, 902 channelb, labels, msg); 903} 904EXPORT_SYMBOL(edac_mc_handle_fbd_ue); 905 906/************************************************************* 907 * On Fully Buffered DIMM modules, this help function is 908 * called to process CE events 909 */ 910void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, 911 unsigned int csrow, unsigned int channel, char *msg) 912{ 913 char *label = NULL; 914 915 /* Ensure boundary values */ 916 if (csrow >= mci->nr_csrows) { 917 /* something is wrong */ 918 edac_mc_printk(mci, KERN_ERR, 919 "INTERNAL ERROR: row out of range (%d >= %d)\n", 920 csrow, mci->nr_csrows); 921 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 922 return; 923 } 924 if (channel >= mci->csrows[csrow].nr_channels) { 925 /* something is wrong */ 926 edac_mc_printk(mci, KERN_ERR, 927 "INTERNAL ERROR: channel out of range (%d >= %d)\n", 928 channel, mci->csrows[csrow].nr_channels); 929 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 930 return; 931 } 932 933 label = mci->csrows[csrow].channels[channel].dimm->label; 934 935 if (edac_mc_get_log_ce()) 936 /* FIXME - put in DIMM location */ 937 edac_mc_printk(mci, KERN_WARNING, 938 "CE row %d, channel %d, label \"%s\": %s\n", 939 csrow, channel, label, msg); 940 941 mci->ce_count++; 942 mci->csrows[csrow].ce_count++; 943 mci->csrows[csrow].channels[channel].dimm->ce_count++; 944 mci->csrows[csrow].channels[channel].ce_count++; 945} 946EXPORT_SYMBOL(edac_mc_handle_fbd_ce); 947