edac_mc.c revision 7dfb71030f7636a0d65200158113c37764552f93
1/* 2 * edac_mc kernel module 3 * (C) 2005, 2006 Linux Networx (http://lnxi.com) 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Thayne Harbaugh 8 * Based on work by Dan Hollis <goemon at anime dot net> and others. 9 * http://www.anime.net/~goemon/linux-ecc/ 10 * 11 * Modified by Dave Peterson and Doug Thompson 12 * 13 */ 14 15#include <linux/module.h> 16#include <linux/proc_fs.h> 17#include <linux/kernel.h> 18#include <linux/types.h> 19#include <linux/smp.h> 20#include <linux/init.h> 21#include <linux/sysctl.h> 22#include <linux/highmem.h> 23#include <linux/timer.h> 24#include <linux/slab.h> 25#include <linux/jiffies.h> 26#include <linux/spinlock.h> 27#include <linux/list.h> 28#include <linux/sysdev.h> 29#include <linux/ctype.h> 30#include <linux/kthread.h> 31#include <linux/freezer.h> 32#include <asm/uaccess.h> 33#include <asm/page.h> 34#include <asm/edac.h> 35#include "edac_mc.h" 36 37#define EDAC_MC_VERSION "Ver: 2.0.1 " __DATE__ 38 39 40#ifdef CONFIG_EDAC_DEBUG 41/* Values of 0 to 4 will generate output */ 42int edac_debug_level = 1; 43EXPORT_SYMBOL_GPL(edac_debug_level); 44#endif 45 46/* EDAC Controls, setable by module parameter, and sysfs */ 47static int log_ue = 1; 48static int log_ce = 1; 49static int panic_on_ue; 50static int poll_msec = 1000; 51 52/* lock to memory controller's control array */ 53static DECLARE_MUTEX(mem_ctls_mutex); 54static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); 55 56static struct task_struct *edac_thread; 57 58#ifdef CONFIG_PCI 59static int check_pci_parity = 0; /* default YES check PCI parity */ 60static int panic_on_pci_parity; /* default no panic on PCI Parity */ 61static atomic_t pci_parity_count = ATOMIC_INIT(0); 62 63static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ 64static struct completion edac_pci_kobj_complete; 65#endif /* CONFIG_PCI */ 66 67/* START sysfs data and methods */ 68 69 70static const char *mem_types[] = { 71 [MEM_EMPTY] = "Empty", 72 [MEM_RESERVED] = "Reserved", 73 [MEM_UNKNOWN] = "Unknown", 74 [MEM_FPM] = "FPM", 75 [MEM_EDO] = "EDO", 76 [MEM_BEDO] = "BEDO", 77 [MEM_SDR] = "Unbuffered-SDR", 78 [MEM_RDR] = "Registered-SDR", 79 [MEM_DDR] = "Unbuffered-DDR", 80 [MEM_RDDR] = "Registered-DDR", 81 [MEM_RMBS] = "RMBS" 82}; 83 84static const char *dev_types[] = { 85 [DEV_UNKNOWN] = "Unknown", 86 [DEV_X1] = "x1", 87 [DEV_X2] = "x2", 88 [DEV_X4] = "x4", 89 [DEV_X8] = "x8", 90 [DEV_X16] = "x16", 91 [DEV_X32] = "x32", 92 [DEV_X64] = "x64" 93}; 94 95static const char *edac_caps[] = { 96 [EDAC_UNKNOWN] = "Unknown", 97 [EDAC_NONE] = "None", 98 [EDAC_RESERVED] = "Reserved", 99 [EDAC_PARITY] = "PARITY", 100 [EDAC_EC] = "EC", 101 [EDAC_SECDED] = "SECDED", 102 [EDAC_S2ECD2ED] = "S2ECD2ED", 103 [EDAC_S4ECD4ED] = "S4ECD4ED", 104 [EDAC_S8ECD8ED] = "S8ECD8ED", 105 [EDAC_S16ECD16ED] = "S16ECD16ED" 106}; 107 108/* sysfs object: /sys/devices/system/edac */ 109static struct sysdev_class edac_class = { 110 set_kset_name("edac"), 111}; 112 113/* sysfs object: 114 * /sys/devices/system/edac/mc 115 */ 116static struct kobject edac_memctrl_kobj; 117 118/* We use these to wait for the reference counts on edac_memctrl_kobj and 119 * edac_pci_kobj to reach 0. 120 */ 121static struct completion edac_memctrl_kobj_complete; 122 123/* 124 * /sys/devices/system/edac/mc; 125 * data structures and methods 126 */ 127static ssize_t memctrl_int_show(void *ptr, char *buffer) 128{ 129 int *value = (int*) ptr; 130 return sprintf(buffer, "%u\n", *value); 131} 132 133static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) 134{ 135 int *value = (int*) ptr; 136 137 if (isdigit(*buffer)) 138 *value = simple_strtoul(buffer, NULL, 0); 139 140 return count; 141} 142 143struct memctrl_dev_attribute { 144 struct attribute attr; 145 void *value; 146 ssize_t (*show)(void *,char *); 147 ssize_t (*store)(void *, const char *, size_t); 148}; 149 150/* Set of show/store abstract level functions for memory control object */ 151static ssize_t memctrl_dev_show(struct kobject *kobj, 152 struct attribute *attr, char *buffer) 153{ 154 struct memctrl_dev_attribute *memctrl_dev; 155 memctrl_dev = (struct memctrl_dev_attribute*)attr; 156 157 if (memctrl_dev->show) 158 return memctrl_dev->show(memctrl_dev->value, buffer); 159 160 return -EIO; 161} 162 163static ssize_t memctrl_dev_store(struct kobject *kobj, struct attribute *attr, 164 const char *buffer, size_t count) 165{ 166 struct memctrl_dev_attribute *memctrl_dev; 167 memctrl_dev = (struct memctrl_dev_attribute*)attr; 168 169 if (memctrl_dev->store) 170 return memctrl_dev->store(memctrl_dev->value, buffer, count); 171 172 return -EIO; 173} 174 175static struct sysfs_ops memctrlfs_ops = { 176 .show = memctrl_dev_show, 177 .store = memctrl_dev_store 178}; 179 180#define MEMCTRL_ATTR(_name,_mode,_show,_store) \ 181struct memctrl_dev_attribute attr_##_name = { \ 182 .attr = {.name = __stringify(_name), .mode = _mode }, \ 183 .value = &_name, \ 184 .show = _show, \ 185 .store = _store, \ 186}; 187 188#define MEMCTRL_STRING_ATTR(_name,_data,_mode,_show,_store) \ 189struct memctrl_dev_attribute attr_##_name = { \ 190 .attr = {.name = __stringify(_name), .mode = _mode }, \ 191 .value = _data, \ 192 .show = _show, \ 193 .store = _store, \ 194}; 195 196/* csrow<id> control files */ 197MEMCTRL_ATTR(panic_on_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); 198MEMCTRL_ATTR(log_ue,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); 199MEMCTRL_ATTR(log_ce,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); 200MEMCTRL_ATTR(poll_msec,S_IRUGO|S_IWUSR,memctrl_int_show,memctrl_int_store); 201 202/* Base Attributes of the memory ECC object */ 203static struct memctrl_dev_attribute *memctrl_attr[] = { 204 &attr_panic_on_ue, 205 &attr_log_ue, 206 &attr_log_ce, 207 &attr_poll_msec, 208 NULL, 209}; 210 211/* Main MC kobject release() function */ 212static void edac_memctrl_master_release(struct kobject *kobj) 213{ 214 debugf1("%s()\n", __func__); 215 complete(&edac_memctrl_kobj_complete); 216} 217 218static struct kobj_type ktype_memctrl = { 219 .release = edac_memctrl_master_release, 220 .sysfs_ops = &memctrlfs_ops, 221 .default_attrs = (struct attribute **) memctrl_attr, 222}; 223 224/* Initialize the main sysfs entries for edac: 225 * /sys/devices/system/edac 226 * 227 * and children 228 * 229 * Return: 0 SUCCESS 230 * !0 FAILURE 231 */ 232static int edac_sysfs_memctrl_setup(void) 233{ 234 int err = 0; 235 236 debugf1("%s()\n", __func__); 237 238 /* create the /sys/devices/system/edac directory */ 239 err = sysdev_class_register(&edac_class); 240 241 if (err) { 242 debugf1("%s() error=%d\n", __func__, err); 243 return err; 244 } 245 246 /* Init the MC's kobject */ 247 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj)); 248 edac_memctrl_kobj.parent = &edac_class.kset.kobj; 249 edac_memctrl_kobj.ktype = &ktype_memctrl; 250 251 /* generate sysfs "..../edac/mc" */ 252 err = kobject_set_name(&edac_memctrl_kobj,"mc"); 253 254 if (err) 255 goto fail; 256 257 /* FIXME: maybe new sysdev_create_subdir() */ 258 err = kobject_register(&edac_memctrl_kobj); 259 260 if (err) { 261 debugf1("Failed to register '.../edac/mc'\n"); 262 goto fail; 263 } 264 265 debugf1("Registered '.../edac/mc' kobject\n"); 266 267 return 0; 268 269fail: 270 sysdev_class_unregister(&edac_class); 271 return err; 272} 273 274/* 275 * MC teardown: 276 * the '..../edac/mc' kobject followed by '..../edac' itself 277 */ 278static void edac_sysfs_memctrl_teardown(void) 279{ 280 debugf0("MC: " __FILE__ ": %s()\n", __func__); 281 282 /* Unregister the MC's kobject and wait for reference count to reach 283 * 0. 284 */ 285 init_completion(&edac_memctrl_kobj_complete); 286 kobject_unregister(&edac_memctrl_kobj); 287 wait_for_completion(&edac_memctrl_kobj_complete); 288 289 /* Unregister the 'edac' object */ 290 sysdev_class_unregister(&edac_class); 291} 292 293#ifdef CONFIG_PCI 294static ssize_t edac_pci_int_show(void *ptr, char *buffer) 295{ 296 int *value = ptr; 297 return sprintf(buffer,"%d\n",*value); 298} 299 300static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) 301{ 302 int *value = ptr; 303 304 if (isdigit(*buffer)) 305 *value = simple_strtoul(buffer,NULL,0); 306 307 return count; 308} 309 310struct edac_pci_dev_attribute { 311 struct attribute attr; 312 void *value; 313 ssize_t (*show)(void *,char *); 314 ssize_t (*store)(void *, const char *,size_t); 315}; 316 317/* Set of show/store abstract level functions for PCI Parity object */ 318static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, 319 char *buffer) 320{ 321 struct edac_pci_dev_attribute *edac_pci_dev; 322 edac_pci_dev= (struct edac_pci_dev_attribute*)attr; 323 324 if (edac_pci_dev->show) 325 return edac_pci_dev->show(edac_pci_dev->value, buffer); 326 return -EIO; 327} 328 329static ssize_t edac_pci_dev_store(struct kobject *kobj, 330 struct attribute *attr, const char *buffer, size_t count) 331{ 332 struct edac_pci_dev_attribute *edac_pci_dev; 333 edac_pci_dev= (struct edac_pci_dev_attribute*)attr; 334 335 if (edac_pci_dev->show) 336 return edac_pci_dev->store(edac_pci_dev->value, buffer, count); 337 return -EIO; 338} 339 340static struct sysfs_ops edac_pci_sysfs_ops = { 341 .show = edac_pci_dev_show, 342 .store = edac_pci_dev_store 343}; 344 345#define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ 346struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ 347 .attr = {.name = __stringify(_name), .mode = _mode }, \ 348 .value = &_name, \ 349 .show = _show, \ 350 .store = _store, \ 351}; 352 353#define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ 354struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ 355 .attr = {.name = __stringify(_name), .mode = _mode }, \ 356 .value = _data, \ 357 .show = _show, \ 358 .store = _store, \ 359}; 360 361/* PCI Parity control files */ 362EDAC_PCI_ATTR(check_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, 363 edac_pci_int_store); 364EDAC_PCI_ATTR(panic_on_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, 365 edac_pci_int_store); 366EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); 367 368/* Base Attributes of the memory ECC object */ 369static struct edac_pci_dev_attribute *edac_pci_attr[] = { 370 &edac_pci_attr_check_pci_parity, 371 &edac_pci_attr_panic_on_pci_parity, 372 &edac_pci_attr_pci_parity_count, 373 NULL, 374}; 375 376/* No memory to release */ 377static void edac_pci_release(struct kobject *kobj) 378{ 379 debugf1("%s()\n", __func__); 380 complete(&edac_pci_kobj_complete); 381} 382 383static struct kobj_type ktype_edac_pci = { 384 .release = edac_pci_release, 385 .sysfs_ops = &edac_pci_sysfs_ops, 386 .default_attrs = (struct attribute **) edac_pci_attr, 387}; 388 389/** 390 * edac_sysfs_pci_setup() 391 * 392 */ 393static int edac_sysfs_pci_setup(void) 394{ 395 int err; 396 397 debugf1("%s()\n", __func__); 398 399 memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj)); 400 edac_pci_kobj.parent = &edac_class.kset.kobj; 401 edac_pci_kobj.ktype = &ktype_edac_pci; 402 err = kobject_set_name(&edac_pci_kobj, "pci"); 403 404 if (!err) { 405 /* Instanstiate the csrow object */ 406 /* FIXME: maybe new sysdev_create_subdir() */ 407 err = kobject_register(&edac_pci_kobj); 408 409 if (err) 410 debugf1("Failed to register '.../edac/pci'\n"); 411 else 412 debugf1("Registered '.../edac/pci' kobject\n"); 413 } 414 415 return err; 416} 417 418static void edac_sysfs_pci_teardown(void) 419{ 420 debugf0("%s()\n", __func__); 421 init_completion(&edac_pci_kobj_complete); 422 kobject_unregister(&edac_pci_kobj); 423 wait_for_completion(&edac_pci_kobj_complete); 424} 425 426 427static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) 428{ 429 int where; 430 u16 status; 431 432 where = secondary ? PCI_SEC_STATUS : PCI_STATUS; 433 pci_read_config_word(dev, where, &status); 434 435 /* If we get back 0xFFFF then we must suspect that the card has been 436 * pulled but the Linux PCI layer has not yet finished cleaning up. 437 * We don't want to report on such devices 438 */ 439 440 if (status == 0xFFFF) { 441 u32 sanity; 442 443 pci_read_config_dword(dev, 0, &sanity); 444 445 if (sanity == 0xFFFFFFFF) 446 return 0; 447 } 448 449 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | 450 PCI_STATUS_PARITY; 451 452 if (status) 453 /* reset only the bits we are interested in */ 454 pci_write_config_word(dev, where, status); 455 456 return status; 457} 458 459typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); 460 461/* Clear any PCI parity errors logged by this device. */ 462static void edac_pci_dev_parity_clear(struct pci_dev *dev) 463{ 464 u8 header_type; 465 466 get_pci_parity_status(dev, 0); 467 468 /* read the device TYPE, looking for bridges */ 469 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); 470 471 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) 472 get_pci_parity_status(dev, 1); 473} 474 475/* 476 * PCI Parity polling 477 * 478 */ 479static void edac_pci_dev_parity_test(struct pci_dev *dev) 480{ 481 u16 status; 482 u8 header_type; 483 484 /* read the STATUS register on this device 485 */ 486 status = get_pci_parity_status(dev, 0); 487 488 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); 489 490 /* check the status reg for errors */ 491 if (status) { 492 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) 493 edac_printk(KERN_CRIT, EDAC_PCI, 494 "Signaled System Error on %s\n", 495 pci_name(dev)); 496 497 if (status & (PCI_STATUS_PARITY)) { 498 edac_printk(KERN_CRIT, EDAC_PCI, 499 "Master Data Parity Error on %s\n", 500 pci_name(dev)); 501 502 atomic_inc(&pci_parity_count); 503 } 504 505 if (status & (PCI_STATUS_DETECTED_PARITY)) { 506 edac_printk(KERN_CRIT, EDAC_PCI, 507 "Detected Parity Error on %s\n", 508 pci_name(dev)); 509 510 atomic_inc(&pci_parity_count); 511 } 512 } 513 514 /* read the device TYPE, looking for bridges */ 515 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); 516 517 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); 518 519 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { 520 /* On bridges, need to examine secondary status register */ 521 status = get_pci_parity_status(dev, 1); 522 523 debugf2("PCI SEC_STATUS= 0x%04x %s\n", 524 status, dev->dev.bus_id ); 525 526 /* check the secondary status reg for errors */ 527 if (status) { 528 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) 529 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " 530 "Signaled System Error on %s\n", 531 pci_name(dev)); 532 533 if (status & (PCI_STATUS_PARITY)) { 534 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " 535 "Master Data Parity Error on " 536 "%s\n", pci_name(dev)); 537 538 atomic_inc(&pci_parity_count); 539 } 540 541 if (status & (PCI_STATUS_DETECTED_PARITY)) { 542 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " 543 "Detected Parity Error on %s\n", 544 pci_name(dev)); 545 546 atomic_inc(&pci_parity_count); 547 } 548 } 549 } 550} 551 552/* 553 * pci_dev parity list iterator 554 * Scan the PCI device list for one iteration, looking for SERRORs 555 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices 556 */ 557static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) 558{ 559 struct pci_dev *dev = NULL; 560 561 /* request for kernel access to the next PCI device, if any, 562 * and while we are looking at it have its reference count 563 * bumped until we are done with it 564 */ 565 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 566 fn(dev); 567 } 568} 569 570static void do_pci_parity_check(void) 571{ 572 unsigned long flags; 573 int before_count; 574 575 debugf3("%s()\n", __func__); 576 577 if (!check_pci_parity) 578 return; 579 580 before_count = atomic_read(&pci_parity_count); 581 582 /* scan all PCI devices looking for a Parity Error on devices and 583 * bridges 584 */ 585 local_irq_save(flags); 586 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); 587 local_irq_restore(flags); 588 589 /* Only if operator has selected panic on PCI Error */ 590 if (panic_on_pci_parity) { 591 /* If the count is different 'after' from 'before' */ 592 if (before_count != atomic_read(&pci_parity_count)) 593 panic("EDAC: PCI Parity Error"); 594 } 595} 596 597static inline void clear_pci_parity_errors(void) 598{ 599 /* Clear any PCI bus parity errors that devices initially have logged 600 * in their registers. 601 */ 602 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); 603} 604 605#else /* CONFIG_PCI */ 606 607/* pre-process these away */ 608#define do_pci_parity_check() 609#define clear_pci_parity_errors() 610#define edac_sysfs_pci_teardown() 611#define edac_sysfs_pci_setup() (0) 612 613#endif /* CONFIG_PCI */ 614 615/* EDAC sysfs CSROW data structures and methods 616 */ 617 618/* Set of more default csrow<id> attribute show/store functions */ 619static ssize_t csrow_ue_count_show(struct csrow_info *csrow, char *data, int private) 620{ 621 return sprintf(data,"%u\n", csrow->ue_count); 622} 623 624static ssize_t csrow_ce_count_show(struct csrow_info *csrow, char *data, int private) 625{ 626 return sprintf(data,"%u\n", csrow->ce_count); 627} 628 629static ssize_t csrow_size_show(struct csrow_info *csrow, char *data, int private) 630{ 631 return sprintf(data,"%u\n", PAGES_TO_MiB(csrow->nr_pages)); 632} 633 634static ssize_t csrow_mem_type_show(struct csrow_info *csrow, char *data, int private) 635{ 636 return sprintf(data,"%s\n", mem_types[csrow->mtype]); 637} 638 639static ssize_t csrow_dev_type_show(struct csrow_info *csrow, char *data, int private) 640{ 641 return sprintf(data,"%s\n", dev_types[csrow->dtype]); 642} 643 644static ssize_t csrow_edac_mode_show(struct csrow_info *csrow, char *data, int private) 645{ 646 return sprintf(data,"%s\n", edac_caps[csrow->edac_mode]); 647} 648 649/* show/store functions for DIMM Label attributes */ 650static ssize_t channel_dimm_label_show(struct csrow_info *csrow, 651 char *data, int channel) 652{ 653 return snprintf(data, EDAC_MC_LABEL_LEN,"%s", 654 csrow->channels[channel].label); 655} 656 657static ssize_t channel_dimm_label_store(struct csrow_info *csrow, 658 const char *data, 659 size_t count, 660 int channel) 661{ 662 ssize_t max_size = 0; 663 664 max_size = min((ssize_t)count,(ssize_t)EDAC_MC_LABEL_LEN-1); 665 strncpy(csrow->channels[channel].label, data, max_size); 666 csrow->channels[channel].label[max_size] = '\0'; 667 668 return max_size; 669} 670 671/* show function for dynamic chX_ce_count attribute */ 672static ssize_t channel_ce_count_show(struct csrow_info *csrow, 673 char *data, 674 int channel) 675{ 676 return sprintf(data, "%u\n", csrow->channels[channel].ce_count); 677} 678 679/* csrow specific attribute structure */ 680struct csrowdev_attribute { 681 struct attribute attr; 682 ssize_t (*show)(struct csrow_info *,char *,int); 683 ssize_t (*store)(struct csrow_info *, const char *,size_t,int); 684 int private; 685}; 686 687#define to_csrow(k) container_of(k, struct csrow_info, kobj) 688#define to_csrowdev_attr(a) container_of(a, struct csrowdev_attribute, attr) 689 690/* Set of show/store higher level functions for default csrow attributes */ 691static ssize_t csrowdev_show(struct kobject *kobj, 692 struct attribute *attr, 693 char *buffer) 694{ 695 struct csrow_info *csrow = to_csrow(kobj); 696 struct csrowdev_attribute *csrowdev_attr = to_csrowdev_attr(attr); 697 698 if (csrowdev_attr->show) 699 return csrowdev_attr->show(csrow, 700 buffer, 701 csrowdev_attr->private); 702 return -EIO; 703} 704 705static ssize_t csrowdev_store(struct kobject *kobj, struct attribute *attr, 706 const char *buffer, size_t count) 707{ 708 struct csrow_info *csrow = to_csrow(kobj); 709 struct csrowdev_attribute * csrowdev_attr = to_csrowdev_attr(attr); 710 711 if (csrowdev_attr->store) 712 return csrowdev_attr->store(csrow, 713 buffer, 714 count, 715 csrowdev_attr->private); 716 return -EIO; 717} 718 719static struct sysfs_ops csrowfs_ops = { 720 .show = csrowdev_show, 721 .store = csrowdev_store 722}; 723 724#define CSROWDEV_ATTR(_name,_mode,_show,_store,_private) \ 725struct csrowdev_attribute attr_##_name = { \ 726 .attr = {.name = __stringify(_name), .mode = _mode }, \ 727 .show = _show, \ 728 .store = _store, \ 729 .private = _private, \ 730}; 731 732/* default cwrow<id>/attribute files */ 733CSROWDEV_ATTR(size_mb,S_IRUGO,csrow_size_show,NULL,0); 734CSROWDEV_ATTR(dev_type,S_IRUGO,csrow_dev_type_show,NULL,0); 735CSROWDEV_ATTR(mem_type,S_IRUGO,csrow_mem_type_show,NULL,0); 736CSROWDEV_ATTR(edac_mode,S_IRUGO,csrow_edac_mode_show,NULL,0); 737CSROWDEV_ATTR(ue_count,S_IRUGO,csrow_ue_count_show,NULL,0); 738CSROWDEV_ATTR(ce_count,S_IRUGO,csrow_ce_count_show,NULL,0); 739 740/* default attributes of the CSROW<id> object */ 741static struct csrowdev_attribute *default_csrow_attr[] = { 742 &attr_dev_type, 743 &attr_mem_type, 744 &attr_edac_mode, 745 &attr_size_mb, 746 &attr_ue_count, 747 &attr_ce_count, 748 NULL, 749}; 750 751 752/* possible dynamic channel DIMM Label attribute files */ 753CSROWDEV_ATTR(ch0_dimm_label,S_IRUGO|S_IWUSR, 754 channel_dimm_label_show, 755 channel_dimm_label_store, 756 0 ); 757CSROWDEV_ATTR(ch1_dimm_label,S_IRUGO|S_IWUSR, 758 channel_dimm_label_show, 759 channel_dimm_label_store, 760 1 ); 761CSROWDEV_ATTR(ch2_dimm_label,S_IRUGO|S_IWUSR, 762 channel_dimm_label_show, 763 channel_dimm_label_store, 764 2 ); 765CSROWDEV_ATTR(ch3_dimm_label,S_IRUGO|S_IWUSR, 766 channel_dimm_label_show, 767 channel_dimm_label_store, 768 3 ); 769CSROWDEV_ATTR(ch4_dimm_label,S_IRUGO|S_IWUSR, 770 channel_dimm_label_show, 771 channel_dimm_label_store, 772 4 ); 773CSROWDEV_ATTR(ch5_dimm_label,S_IRUGO|S_IWUSR, 774 channel_dimm_label_show, 775 channel_dimm_label_store, 776 5 ); 777 778/* Total possible dynamic DIMM Label attribute file table */ 779static struct csrowdev_attribute *dynamic_csrow_dimm_attr[] = { 780 &attr_ch0_dimm_label, 781 &attr_ch1_dimm_label, 782 &attr_ch2_dimm_label, 783 &attr_ch3_dimm_label, 784 &attr_ch4_dimm_label, 785 &attr_ch5_dimm_label 786}; 787 788/* possible dynamic channel ce_count attribute files */ 789CSROWDEV_ATTR(ch0_ce_count,S_IRUGO|S_IWUSR, 790 channel_ce_count_show, 791 NULL, 792 0 ); 793CSROWDEV_ATTR(ch1_ce_count,S_IRUGO|S_IWUSR, 794 channel_ce_count_show, 795 NULL, 796 1 ); 797CSROWDEV_ATTR(ch2_ce_count,S_IRUGO|S_IWUSR, 798 channel_ce_count_show, 799 NULL, 800 2 ); 801CSROWDEV_ATTR(ch3_ce_count,S_IRUGO|S_IWUSR, 802 channel_ce_count_show, 803 NULL, 804 3 ); 805CSROWDEV_ATTR(ch4_ce_count,S_IRUGO|S_IWUSR, 806 channel_ce_count_show, 807 NULL, 808 4 ); 809CSROWDEV_ATTR(ch5_ce_count,S_IRUGO|S_IWUSR, 810 channel_ce_count_show, 811 NULL, 812 5 ); 813 814/* Total possible dynamic ce_count attribute file table */ 815static struct csrowdev_attribute *dynamic_csrow_ce_count_attr[] = { 816 &attr_ch0_ce_count, 817 &attr_ch1_ce_count, 818 &attr_ch2_ce_count, 819 &attr_ch3_ce_count, 820 &attr_ch4_ce_count, 821 &attr_ch5_ce_count 822}; 823 824 825#define EDAC_NR_CHANNELS 6 826 827/* Create dynamic CHANNEL files, indexed by 'chan', under specifed CSROW */ 828static int edac_create_channel_files(struct kobject *kobj, int chan) 829{ 830 int err=-ENODEV; 831 832 if (chan >= EDAC_NR_CHANNELS) 833 return err; 834 835 /* create the DIMM label attribute file */ 836 err = sysfs_create_file(kobj, 837 (struct attribute *) dynamic_csrow_dimm_attr[chan]); 838 839 if (!err) { 840 /* create the CE Count attribute file */ 841 err = sysfs_create_file(kobj, 842 (struct attribute *) dynamic_csrow_ce_count_attr[chan]); 843 } else { 844 debugf1("%s() dimm labels and ce_count files created", __func__); 845 } 846 847 return err; 848} 849 850/* No memory to release for this kobj */ 851static void edac_csrow_instance_release(struct kobject *kobj) 852{ 853 struct csrow_info *cs; 854 855 cs = container_of(kobj, struct csrow_info, kobj); 856 complete(&cs->kobj_complete); 857} 858 859/* the kobj_type instance for a CSROW */ 860static struct kobj_type ktype_csrow = { 861 .release = edac_csrow_instance_release, 862 .sysfs_ops = &csrowfs_ops, 863 .default_attrs = (struct attribute **) default_csrow_attr, 864}; 865 866/* Create a CSROW object under specifed edac_mc_device */ 867static int edac_create_csrow_object( 868 struct kobject *edac_mci_kobj, 869 struct csrow_info *csrow, 870 int index) 871{ 872 int err = 0; 873 int chan; 874 875 memset(&csrow->kobj, 0, sizeof(csrow->kobj)); 876 877 /* generate ..../edac/mc/mc<id>/csrow<index> */ 878 879 csrow->kobj.parent = edac_mci_kobj; 880 csrow->kobj.ktype = &ktype_csrow; 881 882 /* name this instance of csrow<id> */ 883 err = kobject_set_name(&csrow->kobj,"csrow%d",index); 884 if (err) 885 goto error_exit; 886 887 /* Instanstiate the csrow object */ 888 err = kobject_register(&csrow->kobj); 889 if (!err) { 890 /* Create the dyanmic attribute files on this csrow, 891 * namely, the DIMM labels and the channel ce_count 892 */ 893 for (chan = 0; chan < csrow->nr_channels; chan++) { 894 err = edac_create_channel_files(&csrow->kobj,chan); 895 if (err) 896 break; 897 } 898 } 899 900error_exit: 901 return err; 902} 903 904/* default sysfs methods and data structures for the main MCI kobject */ 905 906static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, 907 const char *data, size_t count) 908{ 909 int row, chan; 910 911 mci->ue_noinfo_count = 0; 912 mci->ce_noinfo_count = 0; 913 mci->ue_count = 0; 914 mci->ce_count = 0; 915 916 for (row = 0; row < mci->nr_csrows; row++) { 917 struct csrow_info *ri = &mci->csrows[row]; 918 919 ri->ue_count = 0; 920 ri->ce_count = 0; 921 922 for (chan = 0; chan < ri->nr_channels; chan++) 923 ri->channels[chan].ce_count = 0; 924 } 925 926 mci->start_time = jiffies; 927 return count; 928} 929 930/* default attribute files for the MCI object */ 931static ssize_t mci_ue_count_show(struct mem_ctl_info *mci, char *data) 932{ 933 return sprintf(data,"%d\n", mci->ue_count); 934} 935 936static ssize_t mci_ce_count_show(struct mem_ctl_info *mci, char *data) 937{ 938 return sprintf(data,"%d\n", mci->ce_count); 939} 940 941static ssize_t mci_ce_noinfo_show(struct mem_ctl_info *mci, char *data) 942{ 943 return sprintf(data,"%d\n", mci->ce_noinfo_count); 944} 945 946static ssize_t mci_ue_noinfo_show(struct mem_ctl_info *mci, char *data) 947{ 948 return sprintf(data,"%d\n", mci->ue_noinfo_count); 949} 950 951static ssize_t mci_seconds_show(struct mem_ctl_info *mci, char *data) 952{ 953 return sprintf(data,"%ld\n", (jiffies - mci->start_time) / HZ); 954} 955 956static ssize_t mci_ctl_name_show(struct mem_ctl_info *mci, char *data) 957{ 958 return sprintf(data,"%s\n", mci->ctl_name); 959} 960 961static ssize_t mci_size_mb_show(struct mem_ctl_info *mci, char *data) 962{ 963 int total_pages, csrow_idx; 964 965 for (total_pages = csrow_idx = 0; csrow_idx < mci->nr_csrows; 966 csrow_idx++) { 967 struct csrow_info *csrow = &mci->csrows[csrow_idx]; 968 969 if (!csrow->nr_pages) 970 continue; 971 972 total_pages += csrow->nr_pages; 973 } 974 975 return sprintf(data,"%u\n", PAGES_TO_MiB(total_pages)); 976} 977 978struct mcidev_attribute { 979 struct attribute attr; 980 ssize_t (*show)(struct mem_ctl_info *,char *); 981 ssize_t (*store)(struct mem_ctl_info *, const char *,size_t); 982}; 983 984#define to_mci(k) container_of(k, struct mem_ctl_info, edac_mci_kobj) 985#define to_mcidev_attr(a) container_of(a, struct mcidev_attribute, attr) 986 987/* MCI show/store functions for top most object */ 988static ssize_t mcidev_show(struct kobject *kobj, struct attribute *attr, 989 char *buffer) 990{ 991 struct mem_ctl_info *mem_ctl_info = to_mci(kobj); 992 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr); 993 994 if (mcidev_attr->show) 995 return mcidev_attr->show(mem_ctl_info, buffer); 996 997 return -EIO; 998} 999 1000static ssize_t mcidev_store(struct kobject *kobj, struct attribute *attr, 1001 const char *buffer, size_t count) 1002{ 1003 struct mem_ctl_info *mem_ctl_info = to_mci(kobj); 1004 struct mcidev_attribute * mcidev_attr = to_mcidev_attr(attr); 1005 1006 if (mcidev_attr->store) 1007 return mcidev_attr->store(mem_ctl_info, buffer, count); 1008 1009 return -EIO; 1010} 1011 1012static struct sysfs_ops mci_ops = { 1013 .show = mcidev_show, 1014 .store = mcidev_store 1015}; 1016 1017#define MCIDEV_ATTR(_name,_mode,_show,_store) \ 1018struct mcidev_attribute mci_attr_##_name = { \ 1019 .attr = {.name = __stringify(_name), .mode = _mode }, \ 1020 .show = _show, \ 1021 .store = _store, \ 1022}; 1023 1024/* default Control file */ 1025MCIDEV_ATTR(reset_counters,S_IWUSR,NULL,mci_reset_counters_store); 1026 1027/* default Attribute files */ 1028MCIDEV_ATTR(mc_name,S_IRUGO,mci_ctl_name_show,NULL); 1029MCIDEV_ATTR(size_mb,S_IRUGO,mci_size_mb_show,NULL); 1030MCIDEV_ATTR(seconds_since_reset,S_IRUGO,mci_seconds_show,NULL); 1031MCIDEV_ATTR(ue_noinfo_count,S_IRUGO,mci_ue_noinfo_show,NULL); 1032MCIDEV_ATTR(ce_noinfo_count,S_IRUGO,mci_ce_noinfo_show,NULL); 1033MCIDEV_ATTR(ue_count,S_IRUGO,mci_ue_count_show,NULL); 1034MCIDEV_ATTR(ce_count,S_IRUGO,mci_ce_count_show,NULL); 1035 1036static struct mcidev_attribute *mci_attr[] = { 1037 &mci_attr_reset_counters, 1038 &mci_attr_mc_name, 1039 &mci_attr_size_mb, 1040 &mci_attr_seconds_since_reset, 1041 &mci_attr_ue_noinfo_count, 1042 &mci_attr_ce_noinfo_count, 1043 &mci_attr_ue_count, 1044 &mci_attr_ce_count, 1045 NULL 1046}; 1047 1048/* 1049 * Release of a MC controlling instance 1050 */ 1051static void edac_mci_instance_release(struct kobject *kobj) 1052{ 1053 struct mem_ctl_info *mci; 1054 1055 mci = to_mci(kobj); 1056 debugf0("%s() idx=%d\n", __func__, mci->mc_idx); 1057 complete(&mci->kobj_complete); 1058} 1059 1060static struct kobj_type ktype_mci = { 1061 .release = edac_mci_instance_release, 1062 .sysfs_ops = &mci_ops, 1063 .default_attrs = (struct attribute **) mci_attr, 1064}; 1065 1066 1067#define EDAC_DEVICE_SYMLINK "device" 1068 1069/* 1070 * Create a new Memory Controller kobject instance, 1071 * mc<id> under the 'mc' directory 1072 * 1073 * Return: 1074 * 0 Success 1075 * !0 Failure 1076 */ 1077static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) 1078{ 1079 int i; 1080 int err; 1081 struct csrow_info *csrow; 1082 struct kobject *edac_mci_kobj=&mci->edac_mci_kobj; 1083 1084 debugf0("%s() idx=%d\n", __func__, mci->mc_idx); 1085 memset(edac_mci_kobj, 0, sizeof(*edac_mci_kobj)); 1086 1087 /* set the name of the mc<id> object */ 1088 err = kobject_set_name(edac_mci_kobj,"mc%d",mci->mc_idx); 1089 if (err) 1090 return err; 1091 1092 /* link to our parent the '..../edac/mc' object */ 1093 edac_mci_kobj->parent = &edac_memctrl_kobj; 1094 edac_mci_kobj->ktype = &ktype_mci; 1095 1096 /* register the mc<id> kobject */ 1097 err = kobject_register(edac_mci_kobj); 1098 if (err) 1099 return err; 1100 1101 /* create a symlink for the device */ 1102 err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj, 1103 EDAC_DEVICE_SYMLINK); 1104 if (err) 1105 goto fail0; 1106 1107 /* Make directories for each CSROW object 1108 * under the mc<id> kobject 1109 */ 1110 for (i = 0; i < mci->nr_csrows; i++) { 1111 csrow = &mci->csrows[i]; 1112 1113 /* Only expose populated CSROWs */ 1114 if (csrow->nr_pages > 0) { 1115 err = edac_create_csrow_object(edac_mci_kobj,csrow,i); 1116 if (err) 1117 goto fail1; 1118 } 1119 } 1120 1121 return 0; 1122 1123 /* CSROW error: backout what has already been registered, */ 1124fail1: 1125 for ( i--; i >= 0; i--) { 1126 if (csrow->nr_pages > 0) { 1127 init_completion(&csrow->kobj_complete); 1128 kobject_unregister(&mci->csrows[i].kobj); 1129 wait_for_completion(&csrow->kobj_complete); 1130 } 1131 } 1132 1133fail0: 1134 init_completion(&mci->kobj_complete); 1135 kobject_unregister(edac_mci_kobj); 1136 wait_for_completion(&mci->kobj_complete); 1137 return err; 1138} 1139 1140/* 1141 * remove a Memory Controller instance 1142 */ 1143static void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) 1144{ 1145 int i; 1146 1147 debugf0("%s()\n", __func__); 1148 1149 /* remove all csrow kobjects */ 1150 for (i = 0; i < mci->nr_csrows; i++) { 1151 if (mci->csrows[i].nr_pages > 0) { 1152 init_completion(&mci->csrows[i].kobj_complete); 1153 kobject_unregister(&mci->csrows[i].kobj); 1154 wait_for_completion(&mci->csrows[i].kobj_complete); 1155 } 1156 } 1157 1158 sysfs_remove_link(&mci->edac_mci_kobj, EDAC_DEVICE_SYMLINK); 1159 init_completion(&mci->kobj_complete); 1160 kobject_unregister(&mci->edac_mci_kobj); 1161 wait_for_completion(&mci->kobj_complete); 1162} 1163 1164/* END OF sysfs data and methods */ 1165 1166#ifdef CONFIG_EDAC_DEBUG 1167 1168void edac_mc_dump_channel(struct channel_info *chan) 1169{ 1170 debugf4("\tchannel = %p\n", chan); 1171 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 1172 debugf4("\tchannel->ce_count = %d\n", chan->ce_count); 1173 debugf4("\tchannel->label = '%s'\n", chan->label); 1174 debugf4("\tchannel->csrow = %p\n\n", chan->csrow); 1175} 1176EXPORT_SYMBOL_GPL(edac_mc_dump_channel); 1177 1178void edac_mc_dump_csrow(struct csrow_info *csrow) 1179{ 1180 debugf4("\tcsrow = %p\n", csrow); 1181 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx); 1182 debugf4("\tcsrow->first_page = 0x%lx\n", 1183 csrow->first_page); 1184 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page); 1185 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask); 1186 debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages); 1187 debugf4("\tcsrow->nr_channels = %d\n", 1188 csrow->nr_channels); 1189 debugf4("\tcsrow->channels = %p\n", csrow->channels); 1190 debugf4("\tcsrow->mci = %p\n\n", csrow->mci); 1191} 1192EXPORT_SYMBOL_GPL(edac_mc_dump_csrow); 1193 1194void edac_mc_dump_mci(struct mem_ctl_info *mci) 1195{ 1196 debugf3("\tmci = %p\n", mci); 1197 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap); 1198 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap); 1199 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap); 1200 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 1201 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 1202 mci->nr_csrows, mci->csrows); 1203 debugf3("\tdev = %p\n", mci->dev); 1204 debugf3("\tmod_name:ctl_name = %s:%s\n", 1205 mci->mod_name, mci->ctl_name); 1206 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 1207} 1208EXPORT_SYMBOL_GPL(edac_mc_dump_mci); 1209 1210#endif /* CONFIG_EDAC_DEBUG */ 1211 1212/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'. 1213 * Adjust 'ptr' so that its alignment is at least as stringent as what the 1214 * compiler would provide for X and return the aligned result. 1215 * 1216 * If 'size' is a constant, the compiler will optimize this whole function 1217 * down to either a no-op or the addition of a constant to the value of 'ptr'. 1218 */ 1219static inline char * align_ptr(void *ptr, unsigned size) 1220{ 1221 unsigned align, r; 1222 1223 /* Here we assume that the alignment of a "long long" is the most 1224 * stringent alignment that the compiler will ever provide by default. 1225 * As far as I know, this is a reasonable assumption. 1226 */ 1227 if (size > sizeof(long)) 1228 align = sizeof(long long); 1229 else if (size > sizeof(int)) 1230 align = sizeof(long); 1231 else if (size > sizeof(short)) 1232 align = sizeof(int); 1233 else if (size > sizeof(char)) 1234 align = sizeof(short); 1235 else 1236 return (char *) ptr; 1237 1238 r = size % align; 1239 1240 if (r == 0) 1241 return (char *) ptr; 1242 1243 return (char *) (((unsigned long) ptr) + align - r); 1244} 1245 1246/** 1247 * edac_mc_alloc: Allocate a struct mem_ctl_info structure 1248 * @size_pvt: size of private storage needed 1249 * @nr_csrows: Number of CWROWS needed for this MC 1250 * @nr_chans: Number of channels for the MC 1251 * 1252 * Everything is kmalloc'ed as one big chunk - more efficient. 1253 * Only can be used if all structures have the same lifetime - otherwise 1254 * you have to allocate and initialize your own structures. 1255 * 1256 * Use edac_mc_free() to free mc structures allocated by this function. 1257 * 1258 * Returns: 1259 * NULL allocation failed 1260 * struct mem_ctl_info pointer 1261 */ 1262struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows, 1263 unsigned nr_chans) 1264{ 1265 struct mem_ctl_info *mci; 1266 struct csrow_info *csi, *csrow; 1267 struct channel_info *chi, *chp, *chan; 1268 void *pvt; 1269 unsigned size; 1270 int row, chn; 1271 1272 /* Figure out the offsets of the various items from the start of an mc 1273 * structure. We want the alignment of each item to be at least as 1274 * stringent as what the compiler would provide if we could simply 1275 * hardcode everything into a single struct. 1276 */ 1277 mci = (struct mem_ctl_info *) 0; 1278 csi = (struct csrow_info *)align_ptr(&mci[1], sizeof(*csi)); 1279 chi = (struct channel_info *) 1280 align_ptr(&csi[nr_csrows], sizeof(*chi)); 1281 pvt = align_ptr(&chi[nr_chans * nr_csrows], sz_pvt); 1282 size = ((unsigned long) pvt) + sz_pvt; 1283 1284 if ((mci = kmalloc(size, GFP_KERNEL)) == NULL) 1285 return NULL; 1286 1287 /* Adjust pointers so they point within the memory we just allocated 1288 * rather than an imaginary chunk of memory located at address 0. 1289 */ 1290 csi = (struct csrow_info *) (((char *) mci) + ((unsigned long) csi)); 1291 chi = (struct channel_info *) (((char *) mci) + ((unsigned long) chi)); 1292 pvt = sz_pvt ? (((char *) mci) + ((unsigned long) pvt)) : NULL; 1293 1294 memset(mci, 0, size); /* clear all fields */ 1295 mci->csrows = csi; 1296 mci->pvt_info = pvt; 1297 mci->nr_csrows = nr_csrows; 1298 1299 for (row = 0; row < nr_csrows; row++) { 1300 csrow = &csi[row]; 1301 csrow->csrow_idx = row; 1302 csrow->mci = mci; 1303 csrow->nr_channels = nr_chans; 1304 chp = &chi[row * nr_chans]; 1305 csrow->channels = chp; 1306 1307 for (chn = 0; chn < nr_chans; chn++) { 1308 chan = &chp[chn]; 1309 chan->chan_idx = chn; 1310 chan->csrow = csrow; 1311 } 1312 } 1313 1314 return mci; 1315} 1316EXPORT_SYMBOL_GPL(edac_mc_alloc); 1317 1318/** 1319 * edac_mc_free: Free a previously allocated 'mci' structure 1320 * @mci: pointer to a struct mem_ctl_info structure 1321 */ 1322void edac_mc_free(struct mem_ctl_info *mci) 1323{ 1324 kfree(mci); 1325} 1326EXPORT_SYMBOL_GPL(edac_mc_free); 1327 1328static struct mem_ctl_info *find_mci_by_dev(struct device *dev) 1329{ 1330 struct mem_ctl_info *mci; 1331 struct list_head *item; 1332 1333 debugf3("%s()\n", __func__); 1334 1335 list_for_each(item, &mc_devices) { 1336 mci = list_entry(item, struct mem_ctl_info, link); 1337 1338 if (mci->dev == dev) 1339 return mci; 1340 } 1341 1342 return NULL; 1343} 1344 1345/* Return 0 on success, 1 on failure. 1346 * Before calling this function, caller must 1347 * assign a unique value to mci->mc_idx. 1348 */ 1349static int add_mc_to_global_list (struct mem_ctl_info *mci) 1350{ 1351 struct list_head *item, *insert_before; 1352 struct mem_ctl_info *p; 1353 1354 insert_before = &mc_devices; 1355 1356 if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) 1357 goto fail0; 1358 1359 list_for_each(item, &mc_devices) { 1360 p = list_entry(item, struct mem_ctl_info, link); 1361 1362 if (p->mc_idx >= mci->mc_idx) { 1363 if (unlikely(p->mc_idx == mci->mc_idx)) 1364 goto fail1; 1365 1366 insert_before = item; 1367 break; 1368 } 1369 } 1370 1371 list_add_tail_rcu(&mci->link, insert_before); 1372 return 0; 1373 1374fail0: 1375 edac_printk(KERN_WARNING, EDAC_MC, 1376 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, 1377 dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx); 1378 return 1; 1379 1380fail1: 1381 edac_printk(KERN_WARNING, EDAC_MC, 1382 "bug in low-level driver: attempt to assign\n" 1383 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); 1384 return 1; 1385} 1386 1387static void complete_mc_list_del(struct rcu_head *head) 1388{ 1389 struct mem_ctl_info *mci; 1390 1391 mci = container_of(head, struct mem_ctl_info, rcu); 1392 INIT_LIST_HEAD(&mci->link); 1393 complete(&mci->complete); 1394} 1395 1396static void del_mc_from_global_list(struct mem_ctl_info *mci) 1397{ 1398 list_del_rcu(&mci->link); 1399 init_completion(&mci->complete); 1400 call_rcu(&mci->rcu, complete_mc_list_del); 1401 wait_for_completion(&mci->complete); 1402} 1403 1404/** 1405 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 1406 * create sysfs entries associated with mci structure 1407 * @mci: pointer to the mci structure to be added to the list 1408 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. 1409 * 1410 * Return: 1411 * 0 Success 1412 * !0 Failure 1413 */ 1414 1415/* FIXME - should a warning be printed if no error detection? correction? */ 1416int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) 1417{ 1418 debugf0("%s()\n", __func__); 1419 mci->mc_idx = mc_idx; 1420#ifdef CONFIG_EDAC_DEBUG 1421 if (edac_debug_level >= 3) 1422 edac_mc_dump_mci(mci); 1423 1424 if (edac_debug_level >= 4) { 1425 int i; 1426 1427 for (i = 0; i < mci->nr_csrows; i++) { 1428 int j; 1429 1430 edac_mc_dump_csrow(&mci->csrows[i]); 1431 for (j = 0; j < mci->csrows[i].nr_channels; j++) 1432 edac_mc_dump_channel( 1433 &mci->csrows[i].channels[j]); 1434 } 1435 } 1436#endif 1437 down(&mem_ctls_mutex); 1438 1439 if (add_mc_to_global_list(mci)) 1440 goto fail0; 1441 1442 /* set load time so that error rate can be tracked */ 1443 mci->start_time = jiffies; 1444 1445 if (edac_create_sysfs_mci_device(mci)) { 1446 edac_mc_printk(mci, KERN_WARNING, 1447 "failed to create sysfs device\n"); 1448 goto fail1; 1449 } 1450 1451 /* Report action taken */ 1452 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", 1453 mci->mod_name, mci->ctl_name, dev_name(mci->dev)); 1454 1455 up(&mem_ctls_mutex); 1456 return 0; 1457 1458fail1: 1459 del_mc_from_global_list(mci); 1460 1461fail0: 1462 up(&mem_ctls_mutex); 1463 return 1; 1464} 1465EXPORT_SYMBOL_GPL(edac_mc_add_mc); 1466 1467/** 1468 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 1469 * remove mci structure from global list 1470 * @pdev: Pointer to 'struct device' representing mci structure to remove. 1471 * 1472 * Return pointer to removed mci structure, or NULL if device not found. 1473 */ 1474struct mem_ctl_info * edac_mc_del_mc(struct device *dev) 1475{ 1476 struct mem_ctl_info *mci; 1477 1478 debugf0("MC: %s()\n", __func__); 1479 down(&mem_ctls_mutex); 1480 1481 if ((mci = find_mci_by_dev(dev)) == NULL) { 1482 up(&mem_ctls_mutex); 1483 return NULL; 1484 } 1485 1486 edac_remove_sysfs_mci_device(mci); 1487 del_mc_from_global_list(mci); 1488 up(&mem_ctls_mutex); 1489 edac_printk(KERN_INFO, EDAC_MC, 1490 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, 1491 mci->mod_name, mci->ctl_name, dev_name(mci->dev)); 1492 return mci; 1493} 1494EXPORT_SYMBOL_GPL(edac_mc_del_mc); 1495 1496void edac_mc_scrub_block(unsigned long page, unsigned long offset, u32 size) 1497{ 1498 struct page *pg; 1499 void *virt_addr; 1500 unsigned long flags = 0; 1501 1502 debugf3("%s()\n", __func__); 1503 1504 /* ECC error page was not in our memory. Ignore it. */ 1505 if(!pfn_valid(page)) 1506 return; 1507 1508 /* Find the actual page structure then map it and fix */ 1509 pg = pfn_to_page(page); 1510 1511 if (PageHighMem(pg)) 1512 local_irq_save(flags); 1513 1514 virt_addr = kmap_atomic(pg, KM_BOUNCE_READ); 1515 1516 /* Perform architecture specific atomic scrub operation */ 1517 atomic_scrub(virt_addr + offset, size); 1518 1519 /* Unmap and complete */ 1520 kunmap_atomic(virt_addr, KM_BOUNCE_READ); 1521 1522 if (PageHighMem(pg)) 1523 local_irq_restore(flags); 1524} 1525EXPORT_SYMBOL_GPL(edac_mc_scrub_block); 1526 1527/* FIXME - should return -1 */ 1528int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page) 1529{ 1530 struct csrow_info *csrows = mci->csrows; 1531 int row, i; 1532 1533 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page); 1534 row = -1; 1535 1536 for (i = 0; i < mci->nr_csrows; i++) { 1537 struct csrow_info *csrow = &csrows[i]; 1538 1539 if (csrow->nr_pages == 0) 1540 continue; 1541 1542 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) " 1543 "mask(0x%lx)\n", mci->mc_idx, __func__, 1544 csrow->first_page, page, csrow->last_page, 1545 csrow->page_mask); 1546 1547 if ((page >= csrow->first_page) && 1548 (page <= csrow->last_page) && 1549 ((page & csrow->page_mask) == 1550 (csrow->first_page & csrow->page_mask))) { 1551 row = i; 1552 break; 1553 } 1554 } 1555 1556 if (row == -1) 1557 edac_mc_printk(mci, KERN_ERR, 1558 "could not look up page error address %lx\n", 1559 (unsigned long) page); 1560 1561 return row; 1562} 1563EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page); 1564 1565/* FIXME - setable log (warning/emerg) levels */ 1566/* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */ 1567void edac_mc_handle_ce(struct mem_ctl_info *mci, 1568 unsigned long page_frame_number, unsigned long offset_in_page, 1569 unsigned long syndrome, int row, int channel, const char *msg) 1570{ 1571 unsigned long remapped_page; 1572 1573 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 1574 1575 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 1576 if (row >= mci->nr_csrows || row < 0) { 1577 /* something is wrong */ 1578 edac_mc_printk(mci, KERN_ERR, 1579 "INTERNAL ERROR: row out of range " 1580 "(%d >= %d)\n", row, mci->nr_csrows); 1581 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 1582 return; 1583 } 1584 1585 if (channel >= mci->csrows[row].nr_channels || channel < 0) { 1586 /* something is wrong */ 1587 edac_mc_printk(mci, KERN_ERR, 1588 "INTERNAL ERROR: channel out of range " 1589 "(%d >= %d)\n", channel, 1590 mci->csrows[row].nr_channels); 1591 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR"); 1592 return; 1593 } 1594 1595 if (log_ce) 1596 /* FIXME - put in DIMM location */ 1597 edac_mc_printk(mci, KERN_WARNING, 1598 "CE page 0x%lx, offset 0x%lx, grain %d, syndrome " 1599 "0x%lx, row %d, channel %d, label \"%s\": %s\n", 1600 page_frame_number, offset_in_page, 1601 mci->csrows[row].grain, syndrome, row, channel, 1602 mci->csrows[row].channels[channel].label, msg); 1603 1604 mci->ce_count++; 1605 mci->csrows[row].ce_count++; 1606 mci->csrows[row].channels[channel].ce_count++; 1607 1608 if (mci->scrub_mode & SCRUB_SW_SRC) { 1609 /* 1610 * Some MC's can remap memory so that it is still available 1611 * at a different address when PCI devices map into memory. 1612 * MC's that can't do this lose the memory where PCI devices 1613 * are mapped. This mapping is MC dependant and so we call 1614 * back into the MC driver for it to map the MC page to 1615 * a physical (CPU) page which can then be mapped to a virtual 1616 * page - which can then be scrubbed. 1617 */ 1618 remapped_page = mci->ctl_page_to_phys ? 1619 mci->ctl_page_to_phys(mci, page_frame_number) : 1620 page_frame_number; 1621 1622 edac_mc_scrub_block(remapped_page, offset_in_page, 1623 mci->csrows[row].grain); 1624 } 1625} 1626EXPORT_SYMBOL_GPL(edac_mc_handle_ce); 1627 1628void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg) 1629{ 1630 if (log_ce) 1631 edac_mc_printk(mci, KERN_WARNING, 1632 "CE - no information available: %s\n", msg); 1633 1634 mci->ce_noinfo_count++; 1635 mci->ce_count++; 1636} 1637EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info); 1638 1639void edac_mc_handle_ue(struct mem_ctl_info *mci, 1640 unsigned long page_frame_number, unsigned long offset_in_page, 1641 int row, const char *msg) 1642{ 1643 int len = EDAC_MC_LABEL_LEN * 4; 1644 char labels[len + 1]; 1645 char *pos = labels; 1646 int chan; 1647 int chars; 1648 1649 debugf3("MC%d: %s()\n", mci->mc_idx, __func__); 1650 1651 /* FIXME - maybe make panic on INTERNAL ERROR an option */ 1652 if (row >= mci->nr_csrows || row < 0) { 1653 /* something is wrong */ 1654 edac_mc_printk(mci, KERN_ERR, 1655 "INTERNAL ERROR: row out of range " 1656 "(%d >= %d)\n", row, mci->nr_csrows); 1657 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR"); 1658 return; 1659 } 1660 1661 chars = snprintf(pos, len + 1, "%s", 1662 mci->csrows[row].channels[0].label); 1663 len -= chars; 1664 pos += chars; 1665 1666 for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0); 1667 chan++) { 1668 chars = snprintf(pos, len + 1, ":%s", 1669 mci->csrows[row].channels[chan].label); 1670 len -= chars; 1671 pos += chars; 1672 } 1673 1674 if (log_ue) 1675 edac_mc_printk(mci, KERN_EMERG, 1676 "UE page 0x%lx, offset 0x%lx, grain %d, row %d, " 1677 "labels \"%s\": %s\n", page_frame_number, 1678 offset_in_page, mci->csrows[row].grain, row, labels, 1679 msg); 1680 1681 if (panic_on_ue) 1682 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, " 1683 "row %d, labels \"%s\": %s\n", mci->mc_idx, 1684 page_frame_number, offset_in_page, 1685 mci->csrows[row].grain, row, labels, msg); 1686 1687 mci->ue_count++; 1688 mci->csrows[row].ue_count++; 1689} 1690EXPORT_SYMBOL_GPL(edac_mc_handle_ue); 1691 1692void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) 1693{ 1694 if (panic_on_ue) 1695 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx); 1696 1697 if (log_ue) 1698 edac_mc_printk(mci, KERN_WARNING, 1699 "UE - no information available: %s\n", msg); 1700 mci->ue_noinfo_count++; 1701 mci->ue_count++; 1702} 1703EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 1704 1705 1706/* 1707 * Iterate over all MC instances and check for ECC, et al, errors 1708 */ 1709static inline void check_mc_devices(void) 1710{ 1711 struct list_head *item; 1712 struct mem_ctl_info *mci; 1713 1714 debugf3("%s()\n", __func__); 1715 down(&mem_ctls_mutex); 1716 1717 list_for_each(item, &mc_devices) { 1718 mci = list_entry(item, struct mem_ctl_info, link); 1719 1720 if (mci->edac_check != NULL) 1721 mci->edac_check(mci); 1722 } 1723 1724 up(&mem_ctls_mutex); 1725} 1726 1727/* 1728 * Check MC status every poll_msec. 1729 * Check PCI status every poll_msec as well. 1730 * 1731 * This where the work gets done for edac. 1732 * 1733 * SMP safe, doesn't use NMI, and auto-rate-limits. 1734 */ 1735static void do_edac_check(void) 1736{ 1737 debugf3("%s()\n", __func__); 1738 check_mc_devices(); 1739 do_pci_parity_check(); 1740} 1741 1742static int edac_kernel_thread(void *arg) 1743{ 1744 while (!kthread_should_stop()) { 1745 do_edac_check(); 1746 1747 /* goto sleep for the interval */ 1748 schedule_timeout_interruptible((HZ * poll_msec) / 1000); 1749 try_to_freeze(); 1750 } 1751 1752 return 0; 1753} 1754 1755/* 1756 * edac_mc_init 1757 * module initialization entry point 1758 */ 1759static int __init edac_mc_init(void) 1760{ 1761 edac_printk(KERN_INFO, EDAC_MC, EDAC_MC_VERSION "\n"); 1762 1763 /* 1764 * Harvest and clear any boot/initialization PCI parity errors 1765 * 1766 * FIXME: This only clears errors logged by devices present at time of 1767 * module initialization. We should also do an initial clear 1768 * of each newly hotplugged device. 1769 */ 1770 clear_pci_parity_errors(); 1771 1772 /* Create the MC sysfs entries */ 1773 if (edac_sysfs_memctrl_setup()) { 1774 edac_printk(KERN_ERR, EDAC_MC, 1775 "Error initializing sysfs code\n"); 1776 return -ENODEV; 1777 } 1778 1779 /* Create the PCI parity sysfs entries */ 1780 if (edac_sysfs_pci_setup()) { 1781 edac_sysfs_memctrl_teardown(); 1782 edac_printk(KERN_ERR, EDAC_MC, 1783 "EDAC PCI: Error initializing sysfs code\n"); 1784 return -ENODEV; 1785 } 1786 1787 /* create our kernel thread */ 1788 edac_thread = kthread_run(edac_kernel_thread, NULL, "kedac"); 1789 1790 if (IS_ERR(edac_thread)) { 1791 /* remove the sysfs entries */ 1792 edac_sysfs_memctrl_teardown(); 1793 edac_sysfs_pci_teardown(); 1794 return PTR_ERR(edac_thread); 1795 } 1796 1797 return 0; 1798} 1799 1800/* 1801 * edac_mc_exit() 1802 * module exit/termination functioni 1803 */ 1804static void __exit edac_mc_exit(void) 1805{ 1806 debugf0("%s()\n", __func__); 1807 kthread_stop(edac_thread); 1808 1809 /* tear down the sysfs device */ 1810 edac_sysfs_memctrl_teardown(); 1811 edac_sysfs_pci_teardown(); 1812} 1813 1814module_init(edac_mc_init); 1815module_exit(edac_mc_exit); 1816 1817MODULE_LICENSE("GPL"); 1818MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" 1819 "Based on work by Dan Hollis et al"); 1820MODULE_DESCRIPTION("Core library routines for MC reporting"); 1821 1822module_param(panic_on_ue, int, 0644); 1823MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); 1824#ifdef CONFIG_PCI 1825module_param(check_pci_parity, int, 0644); 1826MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); 1827module_param(panic_on_pci_parity, int, 0644); 1828MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); 1829#endif 1830module_param(log_ue, int, 0644); 1831MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); 1832module_param(log_ce, int, 0644); 1833MODULE_PARM_DESC(log_ce, "Log correctable error to console: 0=off 1=on"); 1834module_param(poll_msec, int, 0644); 1835MODULE_PARM_DESC(poll_msec, "Polling period in milliseconds"); 1836#ifdef CONFIG_EDAC_DEBUG 1837module_param(edac_debug_level, int, 0644); 1838MODULE_PARM_DESC(edac_debug_level, "Debug level"); 1839#endif 1840