edac_mc.c revision 052dfb45ccb5ea354a426b52556bcfee75b9d2f5
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/sysdev.h> 29#include <linux/ctype.h> 30#include <linux/edac.h> 31#include <asm/uaccess.h> 32#include <asm/page.h> 33#include <asm/edac.h> 34#include "edac_core.h" 35#include "edac_module.h" 36 37/* lock to memory controller's control array */ 38static DEFINE_MUTEX(mem_ctls_mutex); 39static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); 40 41#ifdef CONFIG_EDAC_DEBUG 42 43static void edac_mc_dump_channel(struct channel_info *chan) 44{ 45 debugf4("\tchannel = %p\n", chan); 46 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 47 debugf4("\tchannel->ce_count = %d\n", chan->ce_count); 48 debugf4("\tchannel->label = '%s'\n", chan->label); 49 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 50} 51 52static void edac_mc_dump_csrow(struct csrow_info *csrow) 53{ 54 debugf4("\tcsrow = %p\n", csrow); 55 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 56 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page); 57 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 58 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 59 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); 60 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels); 61 debugf4("\tcsrow->channels = %p\n", csrow->channels); 62 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 63} 64 65static void edac_mc_dump_mci(struct mem_ctl_info *mci) 66{ 67 debugf3("\tmci = %p\n", mci); 68 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 69 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 70 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 71 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 72 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 73 mci->nr_csrows, mci->csrows); 74 debugf3("\tdev = %p\n", mci->dev); 75 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name); 76 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 77} 78 79#endif /* CONFIG_EDAC_DEBUG */ 80 81/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. 82 * Adjust 'ptr' so that its alignment is at least as stringent as what the 83 * compiler would provide for X and return the aligned result. 84 * 85 * If 'size' is a constant, the compiler will optimize this whole function 86 * down to either a no-op or the addition of a constant to the value of 'ptr'. 87 */ 88char *edac_align_ptr(void *ptr, unsigned size) 89{ 90 unsigned align, r; 91 92 /* Here we assume that the alignment of a "long long" is the most 93 * stringent alignment that the compiler will ever provide by default. 94 * As far as I know, this is a reasonable assumption. 95 */ 96 if (size > sizeof(long)) 97 align = sizeof(long long); 98 else if (size > sizeof(int)) 99 align = sizeof(long); 100 else if (size > sizeof(short)) 101 align = sizeof(int); 102 else if (size > sizeof(char)) 103 align = sizeof(short); 104 else 105 return (char *)ptr; 106 107 r = size % align; 108 109 if (r == 0) 110 return (char *)ptr; 111 112 return (char *)(((unsigned long)ptr) + align - r); 113} 114 115/** 116 * edac_mc_alloc: Allocate a struct mem_ctl_info structure 117 * @size_pvt: size of private storage needed 118 * @nr_csrows: Number of CWROWS needed for this MC 119 * @nr_chans: Number of channels for the MC 120 * 121 * Everything is kmalloc'ed as one big chunk - more efficient. 122 * Only can be used if all structures have the same lifetime - otherwise 123 * you have to allocate and initialize your own structures. 124 * 125 * Use edac_mc_free() to free mc structures allocated by this function. 126 * 127 * Returns: 128 * NULL allocation failed 129 * struct mem_ctl_info pointer 130 */ 131struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 132 unsigned nr_chans) 133{ 134 struct mem_ctl_info *mci; 135 struct csrow_info *csi, *csrow; 136 struct channel_info *chi, *chp, *chan; 137 void *pvt; 138 unsigned size; 139 int row, chn; 140 141 /* Figure out the offsets of the various items from the start of an mc 142 * structure. We want the alignment of each item to be at least as 143 * stringent as what the compiler would provide if we could simply 144 * hardcode everything into a single struct. 145 */ 146 mci = (struct mem_ctl_info *)0; 147 csi = (struct csrow_info *)edac_align_ptr(&mci[1], sizeof(*csi)); 148 chi = (struct channel_info *) 149 edac_align_ptr(&csi[nr_csrows], sizeof(*chi)); 150 pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); 151 size = ((unsigned long)pvt) + sz_pvt; 152 153 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL) 154 return NULL; 155 156 /* Adjust pointers so they point within the memory we just allocated 157 * rather than an imaginary chunk of memory located at address 0. 158 */ 159 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 160 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); 161 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 162 163 memset(mci, 0, size); /* clear all fields */ 164 mci->csrows = csi; 165 mci->pvt_info = pvt; 166 mci->nr_csrows = nr_csrows; 167 168 for (row = 0; row < nr_csrows; row++) { 169 csrow = &csi[row]; 170 csrow->csrow_idx = row; 171 csrow->mci = mci; 172 csrow->nr_channels = nr_chans; 173 chp = &chi[row * nr_chans]; 174 csrow->channels = chp; 175 176 for (chn = 0; chn < nr_chans; chn++) { 177 chan = &chp[chn]; 178 chan->chan_idx = chn; 179 chan->csrow = csrow; 180 } 181 } 182 183 mci->op_state = OP_ALLOC; 184 185 return mci; 186} 187 188EXPORT_SYMBOL_GPL(edac_mc_alloc); 189 190/** 191 * edac_mc_free: Free a previously allocated 'mci' structure 192 * @mci: pointer to a struct mem_ctl_info structure 193 */ 194void edac_mc_free(struct mem_ctl_info *mci) 195{ 196 kfree(mci); 197} 198 199EXPORT_SYMBOL_GPL(edac_mc_free); 200 201static struct mem_ctl_info *find_mci_by_dev(struct device *dev) 202{ 203 struct mem_ctl_info *mci; 204 struct list_head *item; 205 206 debugf3("%s()\n", __func__); 207 208 list_for_each(item, &mc_devices) { 209 mci = list_entry(item, struct mem_ctl_info, link); 210 211 if (mci->dev == dev) 212 return mci; 213 } 214 215 return NULL; 216} 217 218/* 219 * handler for EDAC to check if NMI type handler has asserted interrupt 220 */ 221static int edac_mc_assert_error_check_and_clear(void) 222{ 223 int old_state; 224 225 if (edac_op_state == EDAC_OPSTATE_POLL) 226 return 1; 227 228 old_state = edac_err_assert; 229 edac_err_assert = 0; 230 231 return old_state; 232} 233 234/* 235 * edac_mc_workq_function 236 * performs the operation scheduled by a workq request 237 */ 238static void edac_mc_workq_function(struct work_struct *work_req) 239{ 240 struct delayed_work *d_work = (struct delayed_work *)work_req; 241 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work); 242 243 mutex_lock(&mem_ctls_mutex); 244 245 /* Only poll controllers that are running polled and have a check */ 246 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) 247 mci->edac_check(mci); 248 249 /* 250 * FIXME: temp place holder for PCI checks, 251 * goes away when we break out PCI 252 */ 253 edac_pci_do_parity_check(); 254 255 mutex_unlock(&mem_ctls_mutex); 256 257 /* Reschedule */ 258 queue_delayed_work(edac_workqueue, &mci->work, 259 msecs_to_jiffies(edac_mc_get_poll_msec())); 260} 261 262/* 263 * edac_mc_workq_setup 264 * initialize a workq item for this mci 265 * passing in the new delay period in msec 266 */ 267void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) 268{ 269 debugf0("%s()\n", __func__); 270 271 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); 272 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); 273} 274 275/* 276 * edac_mc_workq_teardown 277 * stop the workq processing on this mci 278 */ 279void edac_mc_workq_teardown(struct mem_ctl_info *mci) 280{ 281 int status; 282 283 status = cancel_delayed_work(&mci->work); 284 if (status == 0) { 285 /* workq instance might be running, wait for it */ 286 flush_workqueue(edac_workqueue); 287 } 288} 289 290/* 291 * edac_reset_delay_period 292 */ 293 294void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) 295{ 296 mutex_lock(&mem_ctls_mutex); 297 298 /* cancel the current workq request */ 299 edac_mc_workq_teardown(mci); 300 301 /* restart the workq request, with new delay value */ 302 edac_mc_workq_setup(mci, value); 303 304 mutex_unlock(&mem_ctls_mutex); 305} 306 307/* Return 0 on success, 1 on failure. 308 * Before calling this function, caller must 309 * assign a unique value to mci->mc_idx. 310 */ 311static int add_mc_to_global_list(struct mem_ctl_info *mci) 312{ 313 struct list_head *item, *insert_before; 314 struct mem_ctl_info *p; 315 316 insert_before = &mc_devices; 317 318 if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) 319 goto fail0; 320 321 list_for_each(item, &mc_devices) { 322 p = list_entry(item, struct mem_ctl_info, link); 323 324 if (p->mc_idx >= mci->mc_idx) { 325 if (unlikely(p->mc_idx == mci->mc_idx)) 326 goto fail1; 327 328 insert_before = item; 329 break; 330 } 331 } 332 333 list_add_tail_rcu(&mci->link, insert_before); 334 atomic_inc(&edac_handlers); 335 return 0; 336 337fail0: 338 edac_printk(KERN_WARNING, EDAC_MC, 339 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, 340 dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx); 341 return 1; 342 343fail1: 344 edac_printk(KERN_WARNING, EDAC_MC, 345 "bug in low-level driver: attempt to assign\n" 346 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 347 return 1; 348} 349 350static void complete_mc_list_del(struct rcu_head *head) 351{ 352 struct mem_ctl_info *mci; 353 354 mci = container_of(head, struct mem_ctl_info, rcu); 355 INIT_LIST_HEAD(&mci->link); 356 complete(&mci->complete); 357} 358 359static void del_mc_from_global_list(struct mem_ctl_info *mci) 360{ 361 atomic_dec(&edac_handlers); 362 list_del_rcu(&mci->link); 363 init_completion(&mci->complete); 364 call_rcu(&mci->rcu, complete_mc_list_del); 365 wait_for_completion(&mci->complete); 366} 367 368/** 369 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'. 370 * 371 * If found, return a pointer to the structure. 372 * Else return NULL. 373 * 374 * Caller must hold mem_ctls_mutex. 375 */ 376struct mem_ctl_info *edac_mc_find(int idx) 377{ 378 struct list_head *item; 379 struct mem_ctl_info *mci; 380 381 list_for_each(item, &mc_devices) { 382 mci = list_entry(item, struct mem_ctl_info, link); 383 384 if (mci->mc_idx >= idx) { 385 if (mci->mc_idx == idx) 386 return mci; 387 388 break; 389 } 390 } 391 392 return NULL; 393} 394 395EXPORT_SYMBOL(edac_mc_find); 396 397/** 398 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 399 * create sysfs entries associated with mci structure 400 * @mci: pointer to the mci structure to be added to the list 401 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. 402 * 403 * Return: 404 * 0 Success 405 * !0 Failure 406 */ 407 408/* FIXME - should a warning be printed if no error detection? correction? */ 409int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) 410{ 411 debugf0("%s()\n", __func__); 412 mci->mc_idx = mc_idx; 413#ifdef CONFIG_EDAC_DEBUG 414 if (edac_debug_level >= 3) 415 edac_mc_dump_mci(mci); 416 417 if (edac_debug_level >= 4) { 418 int i; 419 420 for (i = 0; i < mci->nr_csrows; i++) { 421 int j; 422 423 edac_mc_dump_csrow(&mci->csrows[i]); 424 for (j = 0; j < mci->csrows[i].nr_channels; j++) 425 edac_mc_dump_channel(&mci->csrows[i]. 426 channels[j]); 427 } 428 } 429#endif 430 mutex_lock(&mem_ctls_mutex); 431 432 if (add_mc_to_global_list(mci)) 433 goto fail0; 434 435 /* set load time so that error rate can be tracked */ 436 mci->start_time = jiffies; 437 438 if (edac_create_sysfs_mci_device(mci)) { 439 edac_mc_printk(mci, KERN_WARNING, 440 "failed to create sysfs device\n"); 441 goto fail1; 442 } 443 444 /* If there IS a check routine, then we are running POLLED */ 445 if (mci->edac_check != NULL) { 446 /* This instance is NOW RUNNING */ 447 mci->op_state = OP_RUNNING_POLL; 448 449 edac_mc_workq_setup(mci, edac_mc_get_poll_msec()); 450 } else { 451 mci->op_state = OP_RUNNING_INTERRUPT; 452 } 453 454 /* Report action taken */ 455 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", 456 mci->mod_name, mci->ctl_name, dev_name(mci)); 457 458 mutex_unlock(&mem_ctls_mutex); 459 return 0; 460 461fail1: 462 del_mc_from_global_list(mci); 463 464fail0: 465 mutex_unlock(&mem_ctls_mutex); 466 return 1; 467} 468 469EXPORT_SYMBOL_GPL(edac_mc_add_mc); 470 471/** 472 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 473 * remove mci structure from global list 474 * @pdev: Pointer to 'struct device' representing mci structure to remove. 475 * 476 * Return pointer to removed mci structure, or NULL if device not found. 477 */ 478struct mem_ctl_info *edac_mc_del_mc(struct device *dev) 479{ 480 struct mem_ctl_info *mci; 481 482 debugf0("MC: %s()\n", __func__); 483 mutex_lock(&mem_ctls_mutex); 484 485 if ((mci = find_mci_by_dev(dev)) == NULL) { 486 mutex_unlock(&mem_ctls_mutex); 487 return NULL; 488 } 489 490 /* marking MCI offline */ 491 mci->op_state = OP_OFFLINE; 492 493 /* flush workq processes */ 494 edac_mc_workq_teardown(mci); 495 496 edac_remove_sysfs_mci_device(mci); 497 del_mc_from_global_list(mci); 498 mutex_unlock(&mem_ctls_mutex); 499 edac_printk(KERN_INFO, EDAC_MC, 500 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 501 mci->mod_name, mci->ctl_name, dev_name(mci)); 502 return mci; 503} 504 505EXPORT_SYMBOL_GPL(edac_mc_del_mc); 506 507static void edac_mc_scrub_block(unsigned long page, unsigned long offset, 508 u32 size) 509{ 510 struct page *pg; 511 void *virt_addr; 512 unsigned long flags = 0; 513 514 debugf3("%s()\n", __func__); 515 516 /* ECC error page was not in our memory. Ignore it. */ 517 if (!pfn_valid(page)) 518 return; 519 520 /* Find the actual page structure then map it and fix */ 521 pg = pfn_to_page(page); 522 523 if (PageHighMem(pg)) 524 local_irq_save(flags); 525 526 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ); 527 528 /* Perform architecture specific atomic scrub operation */ 529 atomic_scrub(virt_addr + offset, size); 530 531 /* Unmap and complete */ 532 kunmap_atomic(virt_addr, KM_BOUNCE_READ); 533 534 if (PageHighMem(pg)) 535 local_irq_restore(flags); 536} 537 538/* FIXME - should return -1 */ 539int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 540{ 541 struct csrow_info *csrows = mci->csrows; 542 int row, i; 543 544 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 545 row = -1; 546 547 for (i = 0; i < mci->nr_csrows; i++) { 548 struct csrow_info *csrow = &csrows[i]; 549 550 if (csrow->nr_pages == 0) 551 continue; 552 553 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 554 "mask(0x%lx)\n", mci->mc_idx, __func__, 555 csrow->first_page, page, csrow->last_page, 556 csrow->page_mask); 557 558 if ((page >= csrow->first_page) && 559 (page <= csrow->last_page) && 560 ((page & csrow->page_mask) == 561 (csrow->first_page & csrow->page_mask))) { 562 row = i; 563 break; 564 } 565 } 566 567 if (row == -1) 568 edac_mc_printk(mci, KERN_ERR, 569 "could not look up page error address %lx\n", 570 (unsigned long)page); 571 572 return row; 573} 574 575EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 576 577/* FIXME - setable log (warning/emerg) levels */ 578/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ 579void edac_mc_handle_ce(struct mem_ctl_info *mci, 580 unsigned long page_frame_number, 581 unsigned long offset_in_page, unsigned long syndrome, 582 int row, int channel, const char *msg) 583{ 584 unsigned long remapped_page; 585 586 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 587 588 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 589 if (row >= mci->nr_csrows || row < 0) { 590 /* something is wrong */ 591 edac_mc_printk(mci, KERN_ERR, 592 "INTERNAL ERROR: row out of range " 593 "(%d >= %d)\n", row, mci->nr_csrows); 594 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 595 return; 596 } 597 598 if (channel >= mci->csrows[row].nr_channels || channel < 0) { 599 /* something is wrong */ 600 edac_mc_printk(mci, KERN_ERR, 601 "INTERNAL ERROR: channel out of range " 602 "(%d >= %d)\n", channel, 603 mci->csrows[row].nr_channels); 604 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 605 return; 606 } 607 608 if (edac_mc_get_log_ce()) 609 /* FIXME - put in DIMM location */ 610 edac_mc_printk(mci, KERN_WARNING, 611 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " 612 "0x%lx, row %d, channel %d, label \"%s\": %s\n", 613 page_frame_number, offset_in_page, 614 mci->csrows[row].grain, syndrome, row, channel, 615 mci->csrows[row].channels[channel].label, msg); 616 617 mci->ce_count++; 618 mci->csrows[row].ce_count++; 619 mci->csrows[row].channels[channel].ce_count++; 620 621 if (mci->scrub_mode & SCRUB_SW_SRC) { 622 /* 623 * Some MC's can remap memory so that it is still available 624 * at a different address when PCI devices map into memory. 625 * MC's that can't do this lose the memory where PCI devices 626 * are mapped. This mapping is MC dependant and so we call 627 * back into the MC driver for it to map the MC page to 628 * a physical (CPU) page which can then be mapped to a virtual 629 * page - which can then be scrubbed. 630 */ 631 remapped_page = mci->ctl_page_to_phys ? 632 mci->ctl_page_to_phys(mci, page_frame_number) : 633 page_frame_number; 634 635 edac_mc_scrub_block(remapped_page, offset_in_page, 636 mci->csrows[row].grain); 637 } 638} 639 640EXPORT_SYMBOL_GPL(edac_mc_handle_ce); 641 642void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) 643{ 644 if (edac_mc_get_log_ce()) 645 edac_mc_printk(mci, KERN_WARNING, 646 "CE - no information available: %s\n", msg); 647 648 mci->ce_noinfo_count++; 649 mci->ce_count++; 650} 651 652EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); 653 654void edac_mc_handle_ue(struct mem_ctl_info *mci, 655 unsigned long page_frame_number, 656 unsigned long offset_in_page, int row, const char *msg) 657{ 658 int len = EDAC_MC_LABEL_LEN * 4; 659 char labels[len + 1]; 660 char *pos = labels; 661 int chan; 662 int chars; 663 664 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 665 666 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 667 if (row >= mci->nr_csrows || row < 0) { 668 /* something is wrong */ 669 edac_mc_printk(mci, KERN_ERR, 670 "INTERNAL ERROR: row out of range " 671 "(%d >= %d)\n", row, mci->nr_csrows); 672 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 673 return; 674 } 675 676 chars = snprintf(pos, len + 1, "%s", 677 mci->csrows[row].channels[0].label); 678 len -= chars; 679 pos += chars; 680 681 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); 682 chan++) { 683 chars = snprintf(pos, len + 1, ":%s", 684 mci->csrows[row].channels[chan].label); 685 len -= chars; 686 pos += chars; 687 } 688 689 if (edac_mc_get_log_ue()) 690 edac_mc_printk(mci, KERN_EMERG, 691 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " 692 "labels \"%s\": %s\n", page_frame_number, 693 offset_in_page, mci->csrows[row].grain, row, 694 labels, msg); 695 696 if (edac_mc_get_panic_on_ue()) 697 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " 698 "row %d, labels \"%s\": %s\n", mci->mc_idx, 699 page_frame_number, offset_in_page, 700 mci->csrows[row].grain, row, labels, msg); 701 702 mci->ue_count++; 703 mci->csrows[row].ue_count++; 704} 705 706EXPORT_SYMBOL_GPL(edac_mc_handle_ue); 707 708void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) 709{ 710 if (edac_mc_get_panic_on_ue()) 711 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); 712 713 if (edac_mc_get_log_ue()) 714 edac_mc_printk(mci, KERN_WARNING, 715 "UE - no information available: %s\n", msg); 716 mci->ue_noinfo_count++; 717 mci->ue_count++; 718} 719 720EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 721 722/************************************************************* 723 * On Fully Buffered DIMM modules, this help function is 724 * called to process UE events 725 */ 726void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci, 727 unsigned int csrow, 728 unsigned int channela, 729 unsigned int channelb, char *msg) 730{ 731 int len = EDAC_MC_LABEL_LEN * 4; 732 char labels[len + 1]; 733 char *pos = labels; 734 int chars; 735 736 if (csrow >= mci->nr_csrows) { 737 /* something is wrong */ 738 edac_mc_printk(mci, KERN_ERR, 739 "INTERNAL ERROR: row out of range (%d >= %d)\n", 740 csrow, mci->nr_csrows); 741 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 742 return; 743 } 744 745 if (channela >= mci->csrows[csrow].nr_channels) { 746 /* something is wrong */ 747 edac_mc_printk(mci, KERN_ERR, 748 "INTERNAL ERROR: channel-a out of range " 749 "(%d >= %d)\n", 750 channela, mci->csrows[csrow].nr_channels); 751 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 752 return; 753 } 754 755 if (channelb >= mci->csrows[csrow].nr_channels) { 756 /* something is wrong */ 757 edac_mc_printk(mci, KERN_ERR, 758 "INTERNAL ERROR: channel-b out of range " 759 "(%d >= %d)\n", 760 channelb, mci->csrows[csrow].nr_channels); 761 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 762 return; 763 } 764 765 mci->ue_count++; 766 mci->csrows[csrow].ue_count++; 767 768 /* Generate the DIMM labels from the specified channels */ 769 chars = snprintf(pos, len + 1, "%s", 770 mci->csrows[csrow].channels[channela].label); 771 len -= chars; 772 pos += chars; 773 chars = snprintf(pos, len + 1, "-%s", 774 mci->csrows[csrow].channels[channelb].label); 775 776 if (edac_mc_get_log_ue()) 777 edac_mc_printk(mci, KERN_EMERG, 778 "UE row %d, channel-a= %d channel-b= %d " 779 "labels \"%s\": %s\n", csrow, channela, channelb, 780 labels, msg); 781 782 if (edac_mc_get_panic_on_ue()) 783 panic("UE row %d, channel-a= %d channel-b= %d " 784 "labels \"%s\": %s\n", csrow, channela, 785 channelb, labels, msg); 786} 787 788EXPORT_SYMBOL(edac_mc_handle_fbd_ue); 789 790/************************************************************* 791 * On Fully Buffered DIMM modules, this help function is 792 * called to process CE events 793 */ 794void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci, 795 unsigned int csrow, unsigned int channel, char *msg) 796{ 797 798 /* Ensure boundary values */ 799 if (csrow >= mci->nr_csrows) { 800 /* something is wrong */ 801 edac_mc_printk(mci, KERN_ERR, 802 "INTERNAL ERROR: row out of range (%d >= %d)\n", 803 csrow, mci->nr_csrows); 804 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 805 return; 806 } 807 if (channel >= mci->csrows[csrow].nr_channels) { 808 /* something is wrong */ 809 edac_mc_printk(mci, KERN_ERR, 810 "INTERNAL ERROR: channel out of range (%d >= %d)\n", 811 channel, mci->csrows[csrow].nr_channels); 812 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 813 return; 814 } 815 816 if (edac_mc_get_log_ce()) 817 /* FIXME - put in DIMM location */ 818 edac_mc_printk(mci, KERN_WARNING, 819 "CE row %d, channel %d, label \"%s\": %s\n", 820 csrow, channel, 821 mci->csrows[csrow].channels[channel].label, msg); 822 823 mci->ce_count++; 824 mci->csrows[csrow].ce_count++; 825 mci->csrows[csrow].channels[channel].ce_count++; 826} 827 828EXPORT_SYMBOL(edac_mc_handle_fbd_ce); 829 830/* 831 * Iterate over all MC instances and check for ECC, et al, errors 832 */ 833void edac_check_mc_devices(void) 834{ 835 struct list_head *item; 836 struct mem_ctl_info *mci; 837 838 debugf3("%s()\n", __func__); 839 mutex_lock(&mem_ctls_mutex); 840 841 list_for_each(item, &mc_devices) { 842 mci = list_entry(item, struct mem_ctl_info, link); 843 844 if (mci->edac_check != NULL) 845 mci->edac_check(mci); 846 } 847 848 mutex_unlock(&mem_ctls_mutex); 849} 850