r82600_edac.c revision 680cbbbb0e336b04b74be48b8ddd870537f1e226
1/* 2 * Radisys 82600 Embedded chipset Memory Controller kernel module 3 * (C) 2005 EADS Astrium 4 * This file may be distributed under the terms of the 5 * GNU General Public License. 6 * 7 * Written by Tim Small <tim@buttersideup.com>, based on work by Thayne 8 * Harbaugh, Dan Hollis <goemon at anime dot net> and others. 9 * 10 * $Id: edac_r82600.c,v 1.1.2.6 2005/10/05 00:43:44 dsp_llnl Exp $ 11 * 12 * Written with reference to 82600 High Integration Dual PCI System 13 * Controller Data Book: 14 * http://www.radisys.com/files/support_downloads/007-01277-0002.82600DataBook.pdf 15 * references to this document given in [] 16 */ 17 18#include <linux/config.h> 19#include <linux/module.h> 20#include <linux/init.h> 21 22#include <linux/pci.h> 23#include <linux/pci_ids.h> 24 25#include <linux/slab.h> 26 27#include "edac_mc.h" 28 29#define r82600_printk(level, fmt, arg...) \ 30 edac_printk(level, "r82600", fmt, ##arg) 31 32#define r82600_mc_printk(mci, level, fmt, arg...) \ 33 edac_mc_chipset_printk(mci, level, "r82600", fmt, ##arg) 34 35/* Radisys say "The 82600 integrates a main memory SDRAM controller that 36 * supports up to four banks of memory. The four banks can support a mix of 37 * sizes of 64 bit wide (72 bits with ECC) Synchronous DRAM (SDRAM) DIMMs, 38 * each of which can be any size from 16MB to 512MB. Both registered (control 39 * signals buffered) and unbuffered DIMM types are supported. Mixing of 40 * registered and unbuffered DIMMs as well as mixing of ECC and non-ECC DIMMs 41 * is not allowed. The 82600 SDRAM interface operates at the same frequency as 42 * the CPU bus, 66MHz, 100MHz or 133MHz." 43 */ 44 45#define R82600_NR_CSROWS 4 46#define R82600_NR_CHANS 1 47#define R82600_NR_DIMMS 4 48 49#define R82600_BRIDGE_ID 0x8200 50 51/* Radisys 82600 register addresses - device 0 function 0 - PCI bridge */ 52#define R82600_DRAMC 0x57 /* Various SDRAM related control bits 53 * all bits are R/W 54 * 55 * 7 SDRAM ISA Hole Enable 56 * 6 Flash Page Mode Enable 57 * 5 ECC Enable: 1=ECC 0=noECC 58 * 4 DRAM DIMM Type: 1= 59 * 3 BIOS Alias Disable 60 * 2 SDRAM BIOS Flash Write Enable 61 * 1:0 SDRAM Refresh Rate: 00=Disabled 62 * 01=7.8usec (256Mbit SDRAMs) 63 * 10=15.6us 11=125usec 64 */ 65 66#define R82600_SDRAMC 0x76 /* "SDRAM Control Register" 67 * More SDRAM related control bits 68 * all bits are R/W 69 * 70 * 15:8 Reserved. 71 * 72 * 7:5 Special SDRAM Mode Select 73 * 74 * 4 Force ECC 75 * 76 * 1=Drive ECC bits to 0 during 77 * write cycles (i.e. ECC test mode) 78 * 79 * 0=Normal ECC functioning 80 * 81 * 3 Enhanced Paging Enable 82 * 83 * 2 CAS# Latency 0=3clks 1=2clks 84 * 85 * 1 RAS# to CAS# Delay 0=3 1=2 86 * 87 * 0 RAS# Precharge 0=3 1=2 88 */ 89 90#define R82600_EAP 0x80 /* ECC Error Address Pointer Register 91 * 92 * 31 Disable Hardware Scrubbing (RW) 93 * 0=Scrub on corrected read 94 * 1=Don't scrub on corrected read 95 * 96 * 30:12 Error Address Pointer (RO) 97 * Upper 19 bits of error address 98 * 99 * 11:4 Syndrome Bits (RO) 100 * 101 * 3 BSERR# on multibit error (RW) 102 * 1=enable 0=disable 103 * 104 * 2 NMI on Single Bit Eror (RW) 105 * 1=NMI triggered by SBE n.b. other 106 * prerequeists 107 * 0=NMI not triggered 108 * 109 * 1 MBE (R/WC) 110 * read 1=MBE at EAP (see above) 111 * read 0=no MBE, or SBE occurred first 112 * write 1=Clear MBE status (must also 113 * clear SBE) 114 * write 0=NOP 115 * 116 * 1 SBE (R/WC) 117 * read 1=SBE at EAP (see above) 118 * read 0=no SBE, or MBE occurred first 119 * write 1=Clear SBE status (must also 120 * clear MBE) 121 * write 0=NOP 122 */ 123 124#define R82600_DRBA 0x60 /* + 0x60..0x63 SDRAM Row Boundry Address 125 * Registers 126 * 127 * 7:0 Address lines 30:24 - upper limit of 128 * each row [p57] 129 */ 130 131struct r82600_error_info { 132 u32 eapr; 133}; 134 135 136static unsigned int disable_hardware_scrub = 0; 137 138 139static void r82600_get_error_info (struct mem_ctl_info *mci, 140 struct r82600_error_info *info) 141{ 142 pci_read_config_dword(mci->pdev, R82600_EAP, &info->eapr); 143 144 if (info->eapr & BIT(0)) 145 /* Clear error to allow next error to be reported [p.62] */ 146 pci_write_bits32(mci->pdev, R82600_EAP, 147 ((u32) BIT(0) & (u32) BIT(1)), 148 ((u32) BIT(0) & (u32) BIT(1))); 149 150 if (info->eapr & BIT(1)) 151 /* Clear error to allow next error to be reported [p.62] */ 152 pci_write_bits32(mci->pdev, R82600_EAP, 153 ((u32) BIT(0) & (u32) BIT(1)), 154 ((u32) BIT(0) & (u32) BIT(1))); 155} 156 157 158static int r82600_process_error_info (struct mem_ctl_info *mci, 159 struct r82600_error_info *info, int handle_errors) 160{ 161 int error_found; 162 u32 eapaddr, page; 163 u32 syndrome; 164 165 error_found = 0; 166 167 /* bits 30:12 store the upper 19 bits of the 32 bit error address */ 168 eapaddr = ((info->eapr >> 12) & 0x7FFF) << 13; 169 /* Syndrome in bits 11:4 [p.62] */ 170 syndrome = (info->eapr >> 4) & 0xFF; 171 172 /* the R82600 reports at less than page * 173 * granularity (upper 19 bits only) */ 174 page = eapaddr >> PAGE_SHIFT; 175 176 if (info->eapr & BIT(0)) { /* CE? */ 177 error_found = 1; 178 179 if (handle_errors) 180 edac_mc_handle_ce( 181 mci, page, 0, /* not avail */ 182 syndrome, 183 edac_mc_find_csrow_by_page(mci, page), 184 0, /* channel */ 185 mci->ctl_name); 186 } 187 188 if (info->eapr & BIT(1)) { /* UE? */ 189 error_found = 1; 190 191 if (handle_errors) 192 /* 82600 doesn't give enough info */ 193 edac_mc_handle_ue(mci, page, 0, 194 edac_mc_find_csrow_by_page(mci, page), 195 mci->ctl_name); 196 } 197 198 return error_found; 199} 200 201static void r82600_check(struct mem_ctl_info *mci) 202{ 203 struct r82600_error_info info; 204 205 debugf1("MC%d: %s()\n", mci->mc_idx, __func__); 206 r82600_get_error_info(mci, &info); 207 r82600_process_error_info(mci, &info, 1); 208} 209 210static int r82600_probe1(struct pci_dev *pdev, int dev_idx) 211{ 212 int rc = -ENODEV; 213 int index; 214 struct mem_ctl_info *mci = NULL; 215 u8 dramcr; 216 u32 ecc_on; 217 u32 reg_sdram; 218 u32 eapr; 219 u32 scrub_disabled; 220 u32 sdram_refresh_rate; 221 u32 row_high_limit_last = 0; 222 u32 eap_init_bits; 223 224 debugf0("%s()\n", __func__); 225 226 227 pci_read_config_byte(pdev, R82600_DRAMC, &dramcr); 228 pci_read_config_dword(pdev, R82600_EAP, &eapr); 229 230 ecc_on = dramcr & BIT(5); 231 reg_sdram = dramcr & BIT(4); 232 scrub_disabled = eapr & BIT(31); 233 sdram_refresh_rate = dramcr & (BIT(0) | BIT(1)); 234 235 debugf2("%s(): sdram refresh rate = %#0x\n", __func__, 236 sdram_refresh_rate); 237 238 debugf2("%s(): DRAMC register = %#0x\n", __func__, dramcr); 239 240 mci = edac_mc_alloc(0, R82600_NR_CSROWS, R82600_NR_CHANS); 241 242 if (mci == NULL) { 243 rc = -ENOMEM; 244 goto fail; 245 } 246 247 debugf0("%s(): mci = %p\n", __func__, mci); 248 249 mci->pdev = pdev; 250 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_DDR; 251 252 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; 253 /* FIXME try to work out if the chip leads have been * 254 * used for COM2 instead on this board? [MA6?] MAYBE: */ 255 256 /* On the R82600, the pins for memory bits 72:65 - i.e. the * 257 * EC bits are shared with the pins for COM2 (!), so if COM2 * 258 * is enabled, we assume COM2 is wired up, and thus no EDAC * 259 * is possible. */ 260 mci->edac_cap = EDAC_FLAG_NONE | EDAC_FLAG_EC | EDAC_FLAG_SECDED; 261 if (ecc_on) { 262 if (scrub_disabled) 263 debugf3("%s(): mci = %p - Scrubbing disabled! EAP: " 264 "%#0x\n", __func__, mci, eapr); 265 } else 266 mci->edac_cap = EDAC_FLAG_NONE; 267 268 mci->mod_name = EDAC_MOD_STR; 269 mci->mod_ver = "$Revision: 1.1.2.6 $"; 270 mci->ctl_name = "R82600"; 271 mci->edac_check = r82600_check; 272 mci->ctl_page_to_phys = NULL; 273 274 for (index = 0; index < mci->nr_csrows; index++) { 275 struct csrow_info *csrow = &mci->csrows[index]; 276 u8 drbar; /* sDram Row Boundry Address Register */ 277 u32 row_high_limit; 278 u32 row_base; 279 280 /* find the DRAM Chip Select Base address and mask */ 281 pci_read_config_byte(mci->pdev, R82600_DRBA + index, &drbar); 282 283 debugf1("MC%d: %s() Row=%d DRBA = %#0x\n", mci->mc_idx, 284 __func__, index, drbar); 285 286 row_high_limit = ((u32) drbar << 24); 287/* row_high_limit = ((u32)drbar << 24) | 0xffffffUL; */ 288 289 debugf1("MC%d: %s() Row=%d, Boundry Address=%#0x, Last = " 290 "%#0x \n", mci->mc_idx, __func__, index, 291 row_high_limit, row_high_limit_last); 292 293 /* Empty row [p.57] */ 294 if (row_high_limit == row_high_limit_last) 295 continue; 296 297 row_base = row_high_limit_last; 298 299 csrow->first_page = row_base >> PAGE_SHIFT; 300 csrow->last_page = (row_high_limit >> PAGE_SHIFT) - 1; 301 csrow->nr_pages = csrow->last_page - csrow->first_page + 1; 302 /* Error address is top 19 bits - so granularity is * 303 * 14 bits */ 304 csrow->grain = 1 << 14; 305 csrow->mtype = reg_sdram ? MEM_RDDR : MEM_DDR; 306 /* FIXME - check that this is unknowable with this chipset */ 307 csrow->dtype = DEV_UNKNOWN; 308 309 /* Mode is global on 82600 */ 310 csrow->edac_mode = ecc_on ? EDAC_SECDED : EDAC_NONE; 311 row_high_limit_last = row_high_limit; 312 } 313 314 /* clear counters */ 315 /* FIXME should we? */ 316 317 if (edac_mc_add_mc(mci)) { 318 debugf3("%s(): failed edac_mc_add_mc()\n", __func__); 319 goto fail; 320 } 321 322 /* get this far and it's successful */ 323 324 /* Clear error flags to allow next error to be reported [p.62] */ 325 /* Test systems seem to always have the UE flag raised on boot */ 326 327 eap_init_bits = BIT(0) & BIT(1); 328 if (disable_hardware_scrub) { 329 eap_init_bits |= BIT(31); 330 debugf3("%s(): Disabling Hardware Scrub (scrub on error)\n", 331 __func__); 332 } 333 334 pci_write_bits32(mci->pdev, R82600_EAP, eap_init_bits, 335 eap_init_bits); 336 337 debugf3("%s(): success\n", __func__); 338 return 0; 339 340fail: 341 if (mci) 342 edac_mc_free(mci); 343 344 return rc; 345} 346 347/* returns count (>= 0), or negative on error */ 348static int __devinit r82600_init_one(struct pci_dev *pdev, 349 const struct pci_device_id *ent) 350{ 351 debugf0("%s()\n", __func__); 352 353 /* don't need to call pci_device_enable() */ 354 return r82600_probe1(pdev, ent->driver_data); 355} 356 357 358static void __devexit r82600_remove_one(struct pci_dev *pdev) 359{ 360 struct mem_ctl_info *mci; 361 362 debugf0("%s()\n", __func__); 363 364 if (((mci = edac_mc_find_mci_by_pdev(pdev)) != NULL) && 365 !edac_mc_del_mc(mci)) 366 edac_mc_free(mci); 367} 368 369 370static const struct pci_device_id r82600_pci_tbl[] __devinitdata = { 371 {PCI_DEVICE(PCI_VENDOR_ID_RADISYS, R82600_BRIDGE_ID)}, 372 {0,} /* 0 terminated list. */ 373}; 374 375MODULE_DEVICE_TABLE(pci, r82600_pci_tbl); 376 377 378static struct pci_driver r82600_driver = { 379 .name = EDAC_MOD_STR, 380 .probe = r82600_init_one, 381 .remove = __devexit_p(r82600_remove_one), 382 .id_table = r82600_pci_tbl, 383}; 384 385 386static int __init r82600_init(void) 387{ 388 return pci_register_driver(&r82600_driver); 389} 390 391 392static void __exit r82600_exit(void) 393{ 394 pci_unregister_driver(&r82600_driver); 395} 396 397 398module_init(r82600_init); 399module_exit(r82600_exit); 400 401 402MODULE_LICENSE("GPL"); 403MODULE_AUTHOR("Tim Small <tim@buttersideup.com> - WPAD Ltd. " 404 "on behalf of EADS Astrium"); 405MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); 406 407module_param(disable_hardware_scrub, bool, 0644); 408MODULE_PARM_DESC(disable_hardware_scrub, 409 "If set, disable the chipset's automatic scrub for CEs"); 410