edac_mc.c revision 6fe1108f14f4f9581af97cab752f37dc8fa9fdec
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/sysdev.h> 29#include <linux/ctype.h> 30#include <linux/edac.h> 31#include <asm/uaccess.h> 32#include <asm/page.h> 33#include <asm/edac.h> 34#include "edac_core.h" 35#include "edac_module.h" 36 37/* lock to memory controller's control array */ 38static DEFINE_MUTEX(mem_ctls_mutex); 39static LIST_HEAD(mc_devices); 40 41#ifdef CONFIG_EDAC_DEBUG 42 43static void edac_mc_dump_channel(struct channel_info *chan) 44{ 45 debugf4("\tchannel = %p\n", chan); 46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count); 48 debugf4("\tchannel->label = '%s'\n", chan->label); 49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 50} 51 52static void edac_mc_dump_csrow(struct csrow_info *csrow) 53{ 54 debugf4("\tcsrow = %p\n", csrow); 55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); 60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 61 debugf4("\tcsrow->channels = %p\n", csrow->channels); 62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 63} 64 65static void edac_mc_dump_mci(struct mem_ctl_info *mci) 66{ 67 debugf3("\tmci = %p\n", mci); 68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 71 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 73 mci->nr_csrows, mci->csrows); 74 debugf3("\tdev = %p\n", mci->dev); 75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 77} 78 79/* 80 * keep those in sync with the enum mem_type 81 */ 82const char *edac_mem_types[] = { 83 "Empty csrow", 84 "Reserved csrow type", 85 "Unknown csrow type", 86 "Fast page mode RAM", 87 "Extended data out RAM", 88 "Burst Extended data out RAM", 89 "Single data rate SDRAM", 90 "Registered single data rate SDRAM", 91 "Double data rate SDRAM", 92 "Registered Double data rate SDRAM", 93 "Rambus DRAM", 94 "Unbuffered DDR2 RAM", 95 "Fully buffered DDR2", 96 "Registered DDR2 RAM", 97 "Rambus XDR", 98 "Unbuffered DDR3 RAM", 99 "Registered DDR3 RAM", 100}; 101EXPORT_SYMBOL_GPL(edac_mem_types); 102 103#endif /* CONFIG_EDAC_DEBUG */ 104 105/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. 106 * Adjust 'ptr' so that its alignment is at least as stringent as what the 107 * compiler would provide for X and return the aligned result. 108 * 109 * If 'size' is a constant, the compiler will optimize this whole function 110 * down to either a no-op or the addition of a constant to the value of 'ptr'. 111 */ 112void *edac_align_ptr(void *ptr, unsigned size) 113{ 114 unsigned align, r; 115 116 /* Here we assume that the alignment of a "long long" is the most 117 * stringent alignment that the compiler will ever provide by default. 118 * As far as I know, this is a reasonable assumption. 119 */ 120 if (size > sizeof(long)) 121 align = sizeof(long long); 122 else if (size > sizeof(int)) 123 align = sizeof(long); 124 else if (size > sizeof(short)) 125 align = sizeof(int); 126 else if (size > sizeof(char)) 127 align = sizeof(short); 128 else 129 return (char *)ptr; 130 131 r = size % align; 132 133 if (r == 0) 134 return (char *)ptr; 135 136 return (void *)(((unsigned long)ptr) + align - r); 137} 138 139/** 140 * edac_mc_alloc: Allocate a struct mem_ctl_info structure 141 * @size_pvt: size of private storage needed 142 * @nr_csrows: Number of CWROWS needed for this MC 143 * @nr_chans: Number of channels for the MC 144 * 145 * Everything is kmalloc'ed as one big chunk - more efficient. 146 * Only can be used if all structures have the same lifetime - otherwise 147 * you have to allocate and initialize your own structures. 148 * 149 * Use edac_mc_free() to free mc structures allocated by this function. 150 * 151 * Returns: 152 * NULL allocation failed 153 * struct mem_ctl_info pointer 154 */ 155struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 156 unsigned nr_chans, int edac_index) 157{ 158 struct mem_ctl_info *mci; 159 struct csrow_info *csi, *csrow; 160 struct channel_info *chi, *chp, *chan; 161 void *pvt; 162 unsigned size; 163 int row, chn; 164 int err; 165 166 /* Figure out the offsets of the various items from the start of an mc 167 * structure. We want the alignment of each item to be at least as 168 * stringent as what the compiler would provide if we could simply 169 * hardcode everything into a single struct. 170 */ 171 mci = (struct mem_ctl_info *)0; 172 csi = edac_align_ptr(&mci[1], sizeof(*csi)); 173 chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); 174 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); 175 size = ((unsigned long)pvt) + sz_pvt; 176 177 mci = kzalloc(size, GFP_KERNEL); 178 if (mci == NULL) 179 return NULL; 180 181 /* Adjust pointers so they point within the memory we just allocated 182 * rather than an imaginary chunk of memory located at address 0. 183 */ 184 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 185 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); 186 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 187 188 /* setup index and various internal pointers */ 189 mci->mc_idx = edac_index; 190 mci->csrows = csi; 191 mci->pvt_info = pvt; 192 mci->nr_csrows = nr_csrows; 193 194 for (row = 0; row < nr_csrows; row++) { 195 csrow = &csi[row]; 196 csrow->csrow_idx = row; 197 csrow->mci = mci; 198 csrow->nr_channels = nr_chans; 199 chp = &chi[row * nr_chans]; 200 csrow->channels = chp; 201 202 for (chn = 0; chn < nr_chans; chn++) { 203 chan = &chp[chn]; 204 chan->chan_idx = chn; 205 chan->csrow = csrow; 206 } 207 } 208 209 mci->op_state = OP_ALLOC; 210 INIT_LIST_HEAD(&mci->grp_kobj_list); 211 212 /* 213 * Initialize the 'root' kobj for the edac_mc controller 214 */ 215 err = edac_mc_register_sysfs_main_kobj(mci); 216 if (err) { 217 kfree(mci); 218 return NULL; 219 } 220 221 /* at this point, the root kobj is valid, and in order to 222 * 'free' the object, then the function: 223 * edac_mc_unregister_sysfs_main_kobj() must be called 224 * which will perform kobj unregistration and the actual free 225 * will occur during the kobject callback operation 226 */ 227 return mci; 228} 229EXPORT_SYMBOL_GPL(edac_mc_alloc); 230 231/** 232 * edac_mc_free 233 * 'Free' a previously allocated 'mci' structure 234 * @mci: pointer to a struct mem_ctl_info structure 235 */ 236void edac_mc_free(struct mem_ctl_info *mci) 237{ 238 edac_mc_unregister_sysfs_main_kobj(mci); 239} 240EXPORT_SYMBOL_GPL(edac_mc_free); 241 242 243/** 244 * find_mci_by_dev 245 * 246 * scan list of controllers looking for the one that manages 247 * the 'dev' device 248 * @dev: pointer to a struct device related with the MCI 249 */ 250struct mem_ctl_info *find_mci_by_dev(struct device *dev) 251{ 252 struct mem_ctl_info *mci; 253 struct list_head *item; 254 255 debugf3("%s()\n", __func__); 256 257 list_for_each(item, &mc_devices) { 258 mci = list_entry(item, struct mem_ctl_info, link); 259 260 if (mci->dev == dev) 261 return mci; 262 } 263 264 return NULL; 265} 266EXPORT_SYMBOL_GPL(find_mci_by_dev); 267 268/* 269 * handler for EDAC to check if NMI type handler has asserted interrupt 270 */ 271static int edac_mc_assert_error_check_and_clear(void) 272{ 273 int old_state; 274 275 if (edac_op_state == EDAC_OPSTATE_POLL) 276 return 1; 277 278 old_state = edac_err_assert; 279 edac_err_assert = 0; 280 281 return old_state; 282} 283 284/* 285 * edac_mc_workq_function 286 * performs the operation scheduled by a workq request 287 */ 288static void edac_mc_workq_function(struct work_struct *work_req) 289{ 290 struct delayed_work *d_work = to_delayed_work(work_req); 291 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 292 293 mutex_lock(&mem_ctls_mutex); 294 295 /* if this control struct has movd to offline state, we are done */ 296 if (mci->op_state == OP_OFFLINE) { 297 mutex_unlock(&mem_ctls_mutex); 298 return; 299 } 300 301 /* Only poll controllers that are running polled and have a check */ 302 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 303 mci->edac_check(mci); 304 305 mutex_unlock(&mem_ctls_mutex); 306 307 /* Reschedule */ 308 queue_delayed_work(edac_workqueue, &mci->work, 309 msecs_to_jiffies(edac_mc_get_poll_msec())); 310} 311 312/* 313 * edac_mc_workq_setup 314 * initialize a workq item for this mci 315 * passing in the new delay period in msec 316 * 317 * locking model: 318 * 319 * called with the mem_ctls_mutex held 320 */ 321static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 322{ 323 debugf0("%s()\n", __func__); 324 325 /* if this instance is not in the POLL state, then simply return */ 326 if (mci->op_state != OP_RUNNING_POLL) 327 return; 328 329 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 330 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 331} 332 333/* 334 * edac_mc_workq_teardown 335 * stop the workq processing on this mci 336 * 337 * locking model: 338 * 339 * called WITHOUT lock held 340 */ 341static void edac_mc_workq_teardown(struct mem_ctl_info *mci) 342{ 343 int status; 344 345 if (mci->op_state != OP_RUNNING_POLL) 346 return; 347 348 status = cancel_delayed_work(&mci->work); 349 if (status == 0) { 350 debugf0("%s() not canceled, flush the queue\n", 351 __func__); 352 353 /* workq instance might be running, wait for it */ 354 flush_workqueue(edac_workqueue); 355 } 356} 357 358/* 359 * edac_mc_reset_delay_period(unsigned long value) 360 * 361 * user space has updated our poll period value, need to 362 * reset our workq delays 363 */ 364void edac_mc_reset_delay_period(int value) 365{ 366 struct mem_ctl_info *mci; 367 struct list_head *item; 368 369 mutex_lock(&mem_ctls_mutex); 370 371 /* scan the list and turn off all workq timers, doing so under lock 372 */ 373 list_for_each(item, &mc_devices) { 374 mci = list_entry(item, struct mem_ctl_info, link); 375 376 if (mci->op_state == OP_RUNNING_POLL) 377 cancel_delayed_work(&mci->work); 378 } 379 380 mutex_unlock(&mem_ctls_mutex); 381 382 383 /* re-walk the list, and reset the poll delay */ 384 mutex_lock(&mem_ctls_mutex); 385 386 list_for_each(item, &mc_devices) { 387 mci = list_entry(item, struct mem_ctl_info, link); 388 389 edac_mc_workq_setup(mci, (unsigned long) value); 390 } 391 392 mutex_unlock(&mem_ctls_mutex); 393} 394 395 396 397/* Return 0 on success, 1 on failure. 398 * Before calling this function, caller must 399 * assign a unique value to mci->mc_idx. 400 * 401 * locking model: 402 * 403 * called with the mem_ctls_mutex lock held 404 */ 405static int add_mc_to_global_list(struct mem_ctl_info *mci) 406{ 407 struct list_head *item, *insert_before; 408 struct mem_ctl_info *p; 409 410 insert_before = &mc_devices; 411 412 p = find_mci_by_dev(mci->dev); 413 if (unlikely(p != NULL)) 414 goto fail0; 415 416 list_for_each(item, &mc_devices) { 417 p = list_entry(item, struct mem_ctl_info, link); 418 419 if (p->mc_idx >= mci->mc_idx) { 420 if (unlikely(p->mc_idx == mci->mc_idx)) 421 goto fail1; 422 423 insert_before = item; 424 break; 425 } 426 } 427 428 list_add_tail_rcu(&mci->link, insert_before); 429 atomic_inc(&edac_handlers); 430 return 0; 431 432fail0: 433 edac_printk(KERN_WARNING, EDAC_MC, 434 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev), 435 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 436 return 1; 437 438fail1: 439 edac_printk(KERN_WARNING, EDAC_MC, 440 "bug in low-level driver: attempt to assign\n" 441 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 442 return 1; 443} 444 445static void complete_mc_list_del(struct rcu_head *head) 446{ 447 struct mem_ctl_info *mci; 448 449 mci = container_of(head, struct mem_ctl_info, rcu); 450 INIT_LIST_HEAD(&mci->link); 451} 452 453static void del_mc_from_global_list(struct mem_ctl_info *mci) 454{ 455 atomic_dec(&edac_handlers); 456 list_del_rcu(&mci->link); 457 call_rcu(&mci->rcu, complete_mc_list_del); 458 rcu_barrier(); 459} 460 461/** 462 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 463 * 464 * If found, return a pointer to the structure. 465 * Else return NULL. 466 * 467 * Caller must hold mem_ctls_mutex. 468 */ 469struct mem_ctl_info *edac_mc_find(int idx) 470{ 471 struct list_head *item; 472 struct mem_ctl_info *mci; 473 474 list_for_each(item, &mc_devices) { 475 mci = list_entry(item, struct mem_ctl_info, link); 476 477 if (mci->mc_idx >= idx) { 478 if (mci->mc_idx == idx) 479 return mci; 480 481 break; 482 } 483 } 484 485 return NULL; 486} 487EXPORT_SYMBOL(edac_mc_find); 488 489/** 490 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 491 * create sysfs entries associated with mci structure 492 * @mci: pointer to the mci structure to be added to the list 493 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. 494 * 495 * Return: 496 * 0 Success 497 * !0 Failure 498 */ 499 500/* FIXME - should a warning be printed if no error detection? correction? */ 501int edac_mc_add_mc(struct mem_ctl_info *mci) 502{ 503 debugf0("%s()\n", __func__); 504 505#ifdef CONFIG_EDAC_DEBUG 506 if (edac_debug_level >= 3) 507 edac_mc_dump_mci(mci); 508 509 if (edac_debug_level >= 4) { 510 int i; 511 512 for (i = 0; i < mci->nr_csrows; i++) { 513 int j; 514 515 edac_mc_dump_csrow(&mci->csrows[i]); 516 for (j = 0; j < mci->csrows[i].nr_channels; j++) 517 edac_mc_dump_channel(&mci->csrows[i]. 518 channels[j]); 519 } 520 } 521#endif 522 mutex_lock(&mem_ctls_mutex); 523 524 if (add_mc_to_global_list(mci)) 525 goto fail0; 526 527 /* set load time so that error rate can be tracked */ 528 mci->start_time = jiffies; 529 530 if (edac_create_sysfs_mci_device(mci)) { 531 edac_mc_printk(mci, KERN_WARNING, 532 "failed to create sysfs device\n"); 533 goto fail1; 534 } 535 536 /* If there IS a check routine, then we are running POLLED */ 537 if (mci->edac_check != NULL) { 538 /* This instance is NOW RUNNING */ 539 mci->op_state = OP_RUNNING_POLL; 540 541 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 542 } else { 543 mci->op_state = OP_RUNNING_INTERRUPT; 544 } 545 546 /* Report action taken */ 547 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 548 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 549 550 mutex_unlock(&mem_ctls_mutex); 551 return 0; 552 553fail1: 554 del_mc_from_global_list(mci); 555 556fail0: 557 mutex_unlock(&mem_ctls_mutex); 558 return 1; 559} 560EXPORT_SYMBOL_GPL(edac_mc_add_mc); 561 562/** 563 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 564 * remove mci structure from global list 565 * @pdev: Pointer to 'struct device' representing mci structure to remove. 566 * 567 * Return pointer to removed mci structure, or NULL if device not found. 568 */ 569struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 570{ 571 struct mem_ctl_info *mci; 572 573 debugf0("%s()\n", __func__); 574 575 mutex_lock(&mem_ctls_mutex); 576 577 /* find the requested mci struct in the global list */ 578 mci = find_mci_by_dev(dev); 579 if (mci == NULL) { 580 mutex_unlock(&mem_ctls_mutex); 581 return NULL; 582 } 583 584 /* marking MCI offline */ 585 mci->op_state = OP_OFFLINE; 586 587 del_mc_from_global_list(mci); 588 mutex_unlock(&mem_ctls_mutex); 589 590 /* flush workq processes and remove sysfs */ 591 edac_mc_workq_teardown(mci); 592 edac_remove_sysfs_mci_device(mci); 593 594 edac_printk(KERN_INFO, EDAC_MC, 595 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 596 mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 597 598 return mci; 599} 600EXPORT_SYMBOL_GPL(edac_mc_del_mc); 601 602static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 603 u32 size) 604{ 605 struct page *pg; 606 void *virt_addr; 607 unsigned long flags = 0; 608 609 debugf3("%s()\n", __func__); 610 611 /* ECC error page was not in our memory. Ignore it. */ 612 if (!pfn_valid(page)) 613 return; 614 615 /* Find the actual page structure then map it and fix */ 616 pg = pfn_to_page(page); 617 618 if (PageHighMem(pg)) 619 local_irq_save(flags); 620 621 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ); 622 623 /* Perform architecture specific atomic scrub operation */ 624 atomic_scrub(virt_addr + offset, size); 625 626 /* Unmap and complete */ 627 kunmap_atomic(virt_addr, KM_BOUNCE_READ); 628 629 if (PageHighMem(pg)) 630 local_irq_restore(flags); 631} 632 633/* FIXME - should return -1 */ 634int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 635{ 636 struct csrow_info *csrows = mci->csrows; 637 int row, i; 638 639 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 640 row = -1; 641 642 for (i = 0; i < mci->nr_csrows; i++) { 643 struct csrow_info *csrow = &csrows[i]; 644 645 if (csrow->nr_pages == 0) 646 continue; 647 648 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 649 "mask(0x%lx)\n", mci->mc_idx, __func__, 650 csrow->first_page, page, csrow->last_page, 651 csrow->page_mask); 652 653 if ((page >= csrow->first_page) && 654 (page <= csrow->last_page) && 655 ((page & csrow->page_mask) == 656 (csrow->first_page & csrow->page_mask))) { 657 row = i; 658 break; 659 } 660 } 661 662 if (row == -1) 663 edac_mc_printk(mci, KERN_ERR, 664 "could not look up page error address %lx\n", 665 (unsigned long)page); 666 667 return row; 668} 669EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 670 671/* FIXME - setable log (warning/emerg) levels */ 672/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ 673void edac_mc_handle_ce(struct mem_ctl_info *mci, 674 unsigned long page_frame_number, 675 unsigned long offset_in_page, unsigned long syndrome, 676 int row, int channel, const char *msg) 677{ 678 unsigned long remapped_page; 679 680 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 681 682 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 683 if (row >= mci->nr_csrows || row < 0) { 684 /* something is wrong */ 685 edac_mc_printk(mci, KERN_ERR, 686 "INTERNAL ERROR: row out of range " 687 "(%d >= %d)\n", row, mci->nr_csrows); 688 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 689 return; 690 } 691 692 if (channel >= mci->csrows[row].nr_channels || channel < 0) { 693 /* something is wrong */ 694 edac_mc_printk(mci, KERN_ERR, 695 "INTERNAL ERROR: channel out of range " 696 "(%d >= %d)\n", channel, 697 mci->csrows[row].nr_channels); 698 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 699 return; 700 } 701 702 if (edac_mc_get_log_ce()) 703 /* FIXME - put in DIMM location */ 704 edac_mc_printk(mci, KERN_WARNING, 705 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " 706 "0x%lx, row %d, channel %d, label \"%s\": %s\n", 707 page_frame_number, offset_in_page, 708 mci->csrows[row].grain, syndrome, row, channel, 709 mci->csrows[row].channels[channel].label, msg); 710 711 mci->ce_count++; 712 mci->csrows[row].ce_count++; 713 mci->csrows[row].channels[channel].ce_count++; 714 715 if (mci->scrub_mode & SCRUB_SW_SRC) { 716 /* 717 * Some MC's can remap memory so that it is still available 718 * at a different address when PCI devices map into memory. 719 * MC's that can't do this lose the memory where PCI devices 720 * are mapped. This mapping is MC dependant and so we call 721 * back into the MC driver for it to map the MC page to 722 * a physical (CPU) page which can then be mapped to a virtual 723 * page - which can then be scrubbed. 724 */ 725 remapped_page = mci->ctl_page_to_phys ? 726 mci->ctl_page_to_phys(mci, page_frame_number) : 727 page_frame_number; 728 729 edac_mc_scrub_block(remapped_page, offset_in_page, 730 mci->csrows[row].grain); 731 } 732} 733EXPORT_SYMBOL_GPL(edac_mc_handle_ce); 734 735void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) 736{ 737 if (edac_mc_get_log_ce()) 738 edac_mc_printk(mci, KERN_WARNING, 739 "CE - no information available: %s\n", msg); 740 741 mci->ce_noinfo_count++; 742 mci->ce_count++; 743} 744EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); 745 746void edac_mc_handle_ue(struct mem_ctl_info *mci, 747 unsigned long page_frame_number, 748 unsigned long offset_in_page, int row, const char *msg) 749{ 750 int len = EDAC_MC_LABEL_LEN * 4; 751 char labels[len + 1]; 752 char *pos = labels; 753 int chan; 754 int chars; 755 756 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 757 758 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 759 if (row >= mci->nr_csrows || row < 0) { 760 /* something is wrong */ 761 edac_mc_printk(mci, KERN_ERR, 762 "INTERNAL ERROR: row out of range " 763 "(%d >= %d)\n", row, mci->nr_csrows); 764 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 765 return; 766 } 767 768 chars = snprintf(pos, len + 1, "%s", 769 mci->csrows[row].channels[0].label); 770 len -= chars; 771 pos += chars; 772 773 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); 774 chan++) { 775 chars = snprintf(pos, len + 1, ":%s", 776 mci->csrows[row].channels[chan].label); 777 len -= chars; 778 pos += chars; 779 } 780 781 if (edac_mc_get_log_ue()) 782 edac_mc_printk(mci, KERN_EMERG, 783 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " 784 "labels \"%s\": %s\n", page_frame_number, 785 offset_in_page, mci->csrows[row].grain, row, 786 labels, msg); 787 788 if (edac_mc_get_panic_on_ue()) 789 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " 790 "row %d, labels \"%s\": %s\n", mci->mc_idx, 791 page_frame_number, offset_in_page, 792 mci->csrows[row].grain, row, labels, msg); 793 794 mci->ue_count++; 795 mci->csrows[row].ue_count++; 796} 797EXPORT_SYMBOL_GPL(edac_mc_handle_ue); 798 799void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) 800{ 801 if (edac_mc_get_panic_on_ue()) 802 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); 803 804 if (edac_mc_get_log_ue()) 805 edac_mc_printk(mci, KERN_WARNING, 806 "UE - no information available: %s\n", msg); 807 mci->ue_noinfo_count++; 808 mci->ue_count++; 809} 810EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 811 812/************************************************************* 813 * On Fully Buffered DIMM modules, this help function is 814 * called to process UE events 815 */ 816void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, 817 unsigned int csrow, 818 unsigned int channela, 819 unsigned int channelb, char *msg) 820{ 821 int len = EDAC_MC_LABEL_LEN * 4; 822 char labels[len + 1]; 823 char *pos = labels; 824 int chars; 825 826 if (csrow >= mci->nr_csrows) { 827 /* something is wrong */ 828 edac_mc_printk(mci, KERN_ERR, 829 "INTERNAL ERROR: row out of range (%d >= %d)\n", 830 csrow, mci->nr_csrows); 831 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 832 return; 833 } 834 835 if (channela >= mci->csrows[csrow].nr_channels) { 836 /* something is wrong */ 837 edac_mc_printk(mci, KERN_ERR, 838 "INTERNAL ERROR: channel-a out of range " 839 "(%d >= %d)\n", 840 channela, mci->csrows[csrow].nr_channels); 841 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 842 return; 843 } 844 845 if (channelb >= mci->csrows[csrow].nr_channels) { 846 /* something is wrong */ 847 edac_mc_printk(mci, KERN_ERR, 848 "INTERNAL ERROR: channel-b out of range " 849 "(%d >= %d)\n", 850 channelb, mci->csrows[csrow].nr_channels); 851 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 852 return; 853 } 854 855 mci->ue_count++; 856 mci->csrows[csrow].ue_count++; 857 858 /* Generate the DIMM labels from the specified channels */ 859 chars = snprintf(pos, len + 1, "%s", 860 mci->csrows[csrow].channels[channela].label); 861 len -= chars; 862 pos += chars; 863 chars = snprintf(pos, len + 1, "-%s", 864 mci->csrows[csrow].channels[channelb].label); 865 866 if (edac_mc_get_log_ue()) 867 edac_mc_printk(mci, KERN_EMERG, 868 "UE row %d, channel-a= %d channel-b= %d " 869 "labels \"%s\": %s\n", csrow, channela, channelb, 870 labels, msg); 871 872 if (edac_mc_get_panic_on_ue()) 873 panic("UE row %d, channel-a= %d channel-b= %d " 874 "labels \"%s\": %s\n", csrow, channela, 875 channelb, labels, msg); 876} 877EXPORT_SYMBOL(edac_mc_handle_fbd_ue); 878 879/************************************************************* 880 * On Fully Buffered DIMM modules, this help function is 881 * called to process CE events 882 */ 883void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, 884 unsigned int csrow, unsigned int channel, char *msg) 885{ 886 887 /* Ensure boundary values */ 888 if (csrow >= mci->nr_csrows) { 889 /* something is wrong */ 890 edac_mc_printk(mci, KERN_ERR, 891 "INTERNAL ERROR: row out of range (%d >= %d)\n", 892 csrow, mci->nr_csrows); 893 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 894 return; 895 } 896 if (channel >= mci->csrows[csrow].nr_channels) { 897 /* something is wrong */ 898 edac_mc_printk(mci, KERN_ERR, 899 "INTERNAL ERROR: channel out of range (%d >= %d)\n", 900 channel, mci->csrows[csrow].nr_channels); 901 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 902 return; 903 } 904 905 if (edac_mc_get_log_ce()) 906 /* FIXME - put in DIMM location */ 907 edac_mc_printk(mci, KERN_WARNING, 908 "CE row %d, channel %d, label \"%s\": %s\n", 909 csrow, channel, 910 mci->csrows[csrow].channels[channel].label, msg); 911 912 mci->ce_count++; 913 mci->csrows[csrow].ce_count++; 914 mci->csrows[csrow].channels[channel].ce_count++; 915} 916EXPORT_SYMBOL(edac_mc_handle_fbd_ce); 917