1/* 2 * PCIe AER software error injection support. 3 * 4 * Debuging PCIe AER code is quite difficult because it is hard to 5 * trigger various real hardware errors. Software based error 6 * injection can fake almost all kinds of errors with the help of a 7 * user space helper tool aer-inject, which can be gotten from: 8 * http://www.kernel.org/pub/linux/utils/pci/aer-inject/ 9 * 10 * Copyright 2009 Intel Corporation. 11 * Huang Ying <ying.huang@intel.com> 12 * 13 * This program is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU General Public License 15 * as published by the Free Software Foundation; version 2 16 * of the License. 17 * 18 */ 19 20#include <linux/module.h> 21#include <linux/init.h> 22#include <linux/miscdevice.h> 23#include <linux/pci.h> 24#include <linux/slab.h> 25#include <linux/fs.h> 26#include <linux/uaccess.h> 27#include <linux/stddef.h> 28#include "aerdrv.h" 29 30/* Override the existing corrected and uncorrected error masks */ 31static bool aer_mask_override; 32module_param(aer_mask_override, bool, 0); 33 34struct aer_error_inj { 35 u8 bus; 36 u8 dev; 37 u8 fn; 38 u32 uncor_status; 39 u32 cor_status; 40 u32 header_log0; 41 u32 header_log1; 42 u32 header_log2; 43 u32 header_log3; 44 u16 domain; 45}; 46 47struct aer_error { 48 struct list_head list; 49 u16 domain; 50 unsigned int bus; 51 unsigned int devfn; 52 int pos_cap_err; 53 54 u32 uncor_status; 55 u32 cor_status; 56 u32 header_log0; 57 u32 header_log1; 58 u32 header_log2; 59 u32 header_log3; 60 u32 root_status; 61 u32 source_id; 62}; 63 64struct pci_bus_ops { 65 struct list_head list; 66 struct pci_bus *bus; 67 struct pci_ops *ops; 68}; 69 70static LIST_HEAD(einjected); 71 72static LIST_HEAD(pci_bus_ops_list); 73 74/* Protect einjected and pci_bus_ops_list */ 75static DEFINE_SPINLOCK(inject_lock); 76 77static void aer_error_init(struct aer_error *err, u16 domain, 78 unsigned int bus, unsigned int devfn, 79 int pos_cap_err) 80{ 81 INIT_LIST_HEAD(&err->list); 82 err->domain = domain; 83 err->bus = bus; 84 err->devfn = devfn; 85 err->pos_cap_err = pos_cap_err; 86} 87 88/* inject_lock must be held before calling */ 89static struct aer_error *__find_aer_error(u16 domain, unsigned int bus, 90 unsigned int devfn) 91{ 92 struct aer_error *err; 93 94 list_for_each_entry(err, &einjected, list) { 95 if (domain == err->domain && 96 bus == err->bus && 97 devfn == err->devfn) 98 return err; 99 } 100 return NULL; 101} 102 103/* inject_lock must be held before calling */ 104static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) 105{ 106 int domain = pci_domain_nr(dev->bus); 107 if (domain < 0) 108 return NULL; 109 return __find_aer_error((u16)domain, dev->bus->number, dev->devfn); 110} 111 112/* inject_lock must be held before calling */ 113static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) 114{ 115 struct pci_bus_ops *bus_ops; 116 117 list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { 118 if (bus_ops->bus == bus) 119 return bus_ops->ops; 120 } 121 return NULL; 122} 123 124static struct pci_bus_ops *pci_bus_ops_pop(void) 125{ 126 unsigned long flags; 127 struct pci_bus_ops *bus_ops = NULL; 128 129 spin_lock_irqsave(&inject_lock, flags); 130 if (list_empty(&pci_bus_ops_list)) 131 bus_ops = NULL; 132 else { 133 struct list_head *lh = pci_bus_ops_list.next; 134 list_del(lh); 135 bus_ops = list_entry(lh, struct pci_bus_ops, list); 136 } 137 spin_unlock_irqrestore(&inject_lock, flags); 138 return bus_ops; 139} 140 141static u32 *find_pci_config_dword(struct aer_error *err, int where, 142 int *prw1cs) 143{ 144 int rw1cs = 0; 145 u32 *target = NULL; 146 147 if (err->pos_cap_err == -1) 148 return NULL; 149 150 switch (where - err->pos_cap_err) { 151 case PCI_ERR_UNCOR_STATUS: 152 target = &err->uncor_status; 153 rw1cs = 1; 154 break; 155 case PCI_ERR_COR_STATUS: 156 target = &err->cor_status; 157 rw1cs = 1; 158 break; 159 case PCI_ERR_HEADER_LOG: 160 target = &err->header_log0; 161 break; 162 case PCI_ERR_HEADER_LOG+4: 163 target = &err->header_log1; 164 break; 165 case PCI_ERR_HEADER_LOG+8: 166 target = &err->header_log2; 167 break; 168 case PCI_ERR_HEADER_LOG+12: 169 target = &err->header_log3; 170 break; 171 case PCI_ERR_ROOT_STATUS: 172 target = &err->root_status; 173 rw1cs = 1; 174 break; 175 case PCI_ERR_ROOT_ERR_SRC: 176 target = &err->source_id; 177 break; 178 } 179 if (prw1cs) 180 *prw1cs = rw1cs; 181 return target; 182} 183 184static int pci_read_aer(struct pci_bus *bus, unsigned int devfn, int where, 185 int size, u32 *val) 186{ 187 u32 *sim; 188 struct aer_error *err; 189 unsigned long flags; 190 struct pci_ops *ops; 191 int domain; 192 193 spin_lock_irqsave(&inject_lock, flags); 194 if (size != sizeof(u32)) 195 goto out; 196 domain = pci_domain_nr(bus); 197 if (domain < 0) 198 goto out; 199 err = __find_aer_error((u16)domain, bus->number, devfn); 200 if (!err) 201 goto out; 202 203 sim = find_pci_config_dword(err, where, NULL); 204 if (sim) { 205 *val = *sim; 206 spin_unlock_irqrestore(&inject_lock, flags); 207 return 0; 208 } 209out: 210 ops = __find_pci_bus_ops(bus); 211 spin_unlock_irqrestore(&inject_lock, flags); 212 return ops->read(bus, devfn, where, size, val); 213} 214 215int pci_write_aer(struct pci_bus *bus, unsigned int devfn, int where, int size, 216 u32 val) 217{ 218 u32 *sim; 219 struct aer_error *err; 220 unsigned long flags; 221 int rw1cs; 222 struct pci_ops *ops; 223 int domain; 224 225 spin_lock_irqsave(&inject_lock, flags); 226 if (size != sizeof(u32)) 227 goto out; 228 domain = pci_domain_nr(bus); 229 if (domain < 0) 230 goto out; 231 err = __find_aer_error((u16)domain, bus->number, devfn); 232 if (!err) 233 goto out; 234 235 sim = find_pci_config_dword(err, where, &rw1cs); 236 if (sim) { 237 if (rw1cs) 238 *sim ^= val; 239 else 240 *sim = val; 241 spin_unlock_irqrestore(&inject_lock, flags); 242 return 0; 243 } 244out: 245 ops = __find_pci_bus_ops(bus); 246 spin_unlock_irqrestore(&inject_lock, flags); 247 return ops->write(bus, devfn, where, size, val); 248} 249 250static struct pci_ops pci_ops_aer = { 251 .read = pci_read_aer, 252 .write = pci_write_aer, 253}; 254 255static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, 256 struct pci_bus *bus, 257 struct pci_ops *ops) 258{ 259 INIT_LIST_HEAD(&bus_ops->list); 260 bus_ops->bus = bus; 261 bus_ops->ops = ops; 262} 263 264static int pci_bus_set_aer_ops(struct pci_bus *bus) 265{ 266 struct pci_ops *ops; 267 struct pci_bus_ops *bus_ops; 268 unsigned long flags; 269 270 bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); 271 if (!bus_ops) 272 return -ENOMEM; 273 ops = pci_bus_set_ops(bus, &pci_ops_aer); 274 spin_lock_irqsave(&inject_lock, flags); 275 if (ops == &pci_ops_aer) 276 goto out; 277 pci_bus_ops_init(bus_ops, bus, ops); 278 list_add(&bus_ops->list, &pci_bus_ops_list); 279 bus_ops = NULL; 280out: 281 spin_unlock_irqrestore(&inject_lock, flags); 282 kfree(bus_ops); 283 return 0; 284} 285 286static struct pci_dev *pcie_find_root_port(struct pci_dev *dev) 287{ 288 while (1) { 289 if (!pci_is_pcie(dev)) 290 break; 291 if (dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) 292 return dev; 293 if (!dev->bus->self) 294 break; 295 dev = dev->bus->self; 296 } 297 return NULL; 298} 299 300static int find_aer_device_iter(struct device *device, void *data) 301{ 302 struct pcie_device **result = data; 303 struct pcie_device *pcie_dev; 304 305 if (device->bus == &pcie_port_bus_type) { 306 pcie_dev = to_pcie_device(device); 307 if (pcie_dev->service & PCIE_PORT_SERVICE_AER) { 308 *result = pcie_dev; 309 return 1; 310 } 311 } 312 return 0; 313} 314 315static int find_aer_device(struct pci_dev *dev, struct pcie_device **result) 316{ 317 return device_for_each_child(&dev->dev, result, find_aer_device_iter); 318} 319 320static int aer_inject(struct aer_error_inj *einj) 321{ 322 struct aer_error *err, *rperr; 323 struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; 324 struct pci_dev *dev, *rpdev; 325 struct pcie_device *edev; 326 unsigned long flags; 327 unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); 328 int pos_cap_err, rp_pos_cap_err; 329 u32 sever, cor_mask, uncor_mask, cor_mask_orig = 0, uncor_mask_orig = 0; 330 int ret = 0; 331 332 dev = pci_get_domain_bus_and_slot((int)einj->domain, einj->bus, devfn); 333 if (!dev) 334 return -ENODEV; 335 rpdev = pcie_find_root_port(dev); 336 if (!rpdev) { 337 ret = -ENOTTY; 338 goto out_put; 339 } 340 341 pos_cap_err = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); 342 if (!pos_cap_err) { 343 ret = -ENOTTY; 344 goto out_put; 345 } 346 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); 347 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, &cor_mask); 348 pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 349 &uncor_mask); 350 351 rp_pos_cap_err = pci_find_ext_capability(rpdev, PCI_EXT_CAP_ID_ERR); 352 if (!rp_pos_cap_err) { 353 ret = -ENOTTY; 354 goto out_put; 355 } 356 357 err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 358 if (!err_alloc) { 359 ret = -ENOMEM; 360 goto out_put; 361 } 362 rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 363 if (!rperr_alloc) { 364 ret = -ENOMEM; 365 goto out_put; 366 } 367 368 if (aer_mask_override) { 369 cor_mask_orig = cor_mask; 370 cor_mask &= !(einj->cor_status); 371 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, 372 cor_mask); 373 374 uncor_mask_orig = uncor_mask; 375 uncor_mask &= !(einj->uncor_status); 376 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 377 uncor_mask); 378 } 379 380 spin_lock_irqsave(&inject_lock, flags); 381 382 err = __find_aer_error_by_dev(dev); 383 if (!err) { 384 err = err_alloc; 385 err_alloc = NULL; 386 aer_error_init(err, einj->domain, einj->bus, devfn, 387 pos_cap_err); 388 list_add(&err->list, &einjected); 389 } 390 err->uncor_status |= einj->uncor_status; 391 err->cor_status |= einj->cor_status; 392 err->header_log0 = einj->header_log0; 393 err->header_log1 = einj->header_log1; 394 err->header_log2 = einj->header_log2; 395 err->header_log3 = einj->header_log3; 396 397 if (!aer_mask_override && einj->cor_status && 398 !(einj->cor_status & ~cor_mask)) { 399 ret = -EINVAL; 400 printk(KERN_WARNING "The correctable error(s) is masked " 401 "by device\n"); 402 spin_unlock_irqrestore(&inject_lock, flags); 403 goto out_put; 404 } 405 if (!aer_mask_override && einj->uncor_status && 406 !(einj->uncor_status & ~uncor_mask)) { 407 ret = -EINVAL; 408 printk(KERN_WARNING "The uncorrectable error(s) is masked " 409 "by device\n"); 410 spin_unlock_irqrestore(&inject_lock, flags); 411 goto out_put; 412 } 413 414 rperr = __find_aer_error_by_dev(rpdev); 415 if (!rperr) { 416 rperr = rperr_alloc; 417 rperr_alloc = NULL; 418 aer_error_init(rperr, pci_domain_nr(rpdev->bus), 419 rpdev->bus->number, rpdev->devfn, 420 rp_pos_cap_err); 421 list_add(&rperr->list, &einjected); 422 } 423 if (einj->cor_status) { 424 if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) 425 rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; 426 else 427 rperr->root_status |= PCI_ERR_ROOT_COR_RCV; 428 rperr->source_id &= 0xffff0000; 429 rperr->source_id |= (einj->bus << 8) | devfn; 430 } 431 if (einj->uncor_status) { 432 if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) 433 rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; 434 if (sever & einj->uncor_status) { 435 rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; 436 if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) 437 rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; 438 } else 439 rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; 440 rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; 441 rperr->source_id &= 0x0000ffff; 442 rperr->source_id |= ((einj->bus << 8) | devfn) << 16; 443 } 444 spin_unlock_irqrestore(&inject_lock, flags); 445 446 if (aer_mask_override) { 447 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_COR_MASK, 448 cor_mask_orig); 449 pci_write_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_MASK, 450 uncor_mask_orig); 451 } 452 453 ret = pci_bus_set_aer_ops(dev->bus); 454 if (ret) 455 goto out_put; 456 ret = pci_bus_set_aer_ops(rpdev->bus); 457 if (ret) 458 goto out_put; 459 460 if (find_aer_device(rpdev, &edev)) { 461 if (!get_service_data(edev)) { 462 printk(KERN_WARNING "AER service is not initialized\n"); 463 ret = -EINVAL; 464 goto out_put; 465 } 466 aer_irq(-1, edev); 467 } 468 else 469 ret = -EINVAL; 470out_put: 471 kfree(err_alloc); 472 kfree(rperr_alloc); 473 pci_dev_put(dev); 474 return ret; 475} 476 477static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, 478 size_t usize, loff_t *off) 479{ 480 struct aer_error_inj einj; 481 int ret; 482 483 if (!capable(CAP_SYS_ADMIN)) 484 return -EPERM; 485 if (usize < offsetof(struct aer_error_inj, domain) || 486 usize > sizeof(einj)) 487 return -EINVAL; 488 489 memset(&einj, 0, sizeof(einj)); 490 if (copy_from_user(&einj, ubuf, usize)) 491 return -EFAULT; 492 493 ret = aer_inject(&einj); 494 return ret ? ret : usize; 495} 496 497static const struct file_operations aer_inject_fops = { 498 .write = aer_inject_write, 499 .owner = THIS_MODULE, 500 .llseek = noop_llseek, 501}; 502 503static struct miscdevice aer_inject_device = { 504 .minor = MISC_DYNAMIC_MINOR, 505 .name = "aer_inject", 506 .fops = &aer_inject_fops, 507}; 508 509static int __init aer_inject_init(void) 510{ 511 return misc_register(&aer_inject_device); 512} 513 514static void __exit aer_inject_exit(void) 515{ 516 struct aer_error *err, *err_next; 517 unsigned long flags; 518 struct pci_bus_ops *bus_ops; 519 520 misc_deregister(&aer_inject_device); 521 522 while ((bus_ops = pci_bus_ops_pop())) { 523 pci_bus_set_ops(bus_ops->bus, bus_ops->ops); 524 kfree(bus_ops); 525 } 526 527 spin_lock_irqsave(&inject_lock, flags); 528 list_for_each_entry_safe(err, err_next, &einjected, list) { 529 list_del(&err->list); 530 kfree(err); 531 } 532 spin_unlock_irqrestore(&inject_lock, flags); 533} 534 535module_init(aer_inject_init); 536module_exit(aer_inject_exit); 537 538MODULE_DESCRIPTION("PCIe AER software error injector"); 539MODULE_LICENSE("GPL"); 540