1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/sched.h> 35#include <linux/spinlock.h> 36#include <linux/idr.h> 37#include <linux/pci.h> 38#include <linux/io.h> 39#include <linux/delay.h> 40#include <linux/netdevice.h> 41#include <linux/vmalloc.h> 42#include <linux/bitmap.h> 43#include <linux/slab.h> 44#include <linux/module.h> 45 46#include "ipath_kernel.h" 47#include "ipath_verbs.h" 48 49static void ipath_update_pio_bufs(struct ipath_devdata *); 50 51const char *ipath_get_unit_name(int unit) 52{ 53 static char iname[16]; 54 snprintf(iname, sizeof iname, "infinipath%u", unit); 55 return iname; 56} 57 58#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " 59#define PFX IPATH_DRV_NAME ": " 60 61/* 62 * The size has to be longer than this string, so we can append 63 * board/chip information to it in the init code. 64 */ 65const char ib_ipath_version[] = IPATH_IDSTR "\n"; 66 67static struct idr unit_table; 68DEFINE_SPINLOCK(ipath_devs_lock); 69LIST_HEAD(ipath_dev_list); 70 71wait_queue_head_t ipath_state_wait; 72 73unsigned ipath_debug = __IPATH_INFO; 74 75module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); 76MODULE_PARM_DESC(debug, "mask for debug prints"); 77EXPORT_SYMBOL_GPL(ipath_debug); 78 79unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ 80module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); 81MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); 82 83static unsigned ipath_hol_timeout_ms = 13000; 84module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); 85MODULE_PARM_DESC(hol_timeout_ms, 86 "duration of user app suspension after link failure"); 87 88unsigned ipath_linkrecovery = 1; 89module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); 90MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); 91 92MODULE_LICENSE("GPL"); 93MODULE_AUTHOR("QLogic <support@qlogic.com>"); 94MODULE_DESCRIPTION("QLogic InfiniPath driver"); 95 96/* 97 * Table to translate the LINKTRAININGSTATE portion of 98 * IBCStatus to a human-readable form. 99 */ 100const char *ipath_ibcstatus_str[] = { 101 "Disabled", 102 "LinkUp", 103 "PollActive", 104 "PollQuiet", 105 "SleepDelay", 106 "SleepQuiet", 107 "LState6", /* unused */ 108 "LState7", /* unused */ 109 "CfgDebounce", 110 "CfgRcvfCfg", 111 "CfgWaitRmt", 112 "CfgIdle", 113 "RecovRetrain", 114 "CfgTxRevLane", /* unused before IBA7220 */ 115 "RecovWaitRmt", 116 "RecovIdle", 117 /* below were added for IBA7220 */ 118 "CfgEnhanced", 119 "CfgTest", 120 "CfgWaitRmtTest", 121 "CfgWaitCfgEnhanced", 122 "SendTS_T", 123 "SendTstIdles", 124 "RcvTS_T", 125 "SendTst_TS1s", 126 "LTState18", "LTState19", "LTState1A", "LTState1B", 127 "LTState1C", "LTState1D", "LTState1E", "LTState1F" 128}; 129 130static void __devexit ipath_remove_one(struct pci_dev *); 131static int __devinit ipath_init_one(struct pci_dev *, 132 const struct pci_device_id *); 133 134/* Only needed for registration, nothing else needs this info */ 135#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 136#define PCI_DEVICE_ID_INFINIPATH_HT 0xd 137 138/* Number of seconds before our card status check... */ 139#define STATUS_TIMEOUT 60 140 141static const struct pci_device_id ipath_pci_tbl[] = { 142 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, 143 { 0, } 144}; 145 146MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); 147 148static struct pci_driver ipath_driver = { 149 .name = IPATH_DRV_NAME, 150 .probe = ipath_init_one, 151 .remove = __devexit_p(ipath_remove_one), 152 .id_table = ipath_pci_tbl, 153 .driver = { 154 .groups = ipath_driver_attr_groups, 155 }, 156}; 157 158static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, 159 u32 *bar0, u32 *bar1) 160{ 161 int ret; 162 163 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); 164 if (ret) 165 ipath_dev_err(dd, "failed to read bar0 before enable: " 166 "error %d\n", -ret); 167 168 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); 169 if (ret) 170 ipath_dev_err(dd, "failed to read bar1 before enable: " 171 "error %d\n", -ret); 172 173 ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); 174} 175 176static void ipath_free_devdata(struct pci_dev *pdev, 177 struct ipath_devdata *dd) 178{ 179 unsigned long flags; 180 181 pci_set_drvdata(pdev, NULL); 182 183 if (dd->ipath_unit != -1) { 184 spin_lock_irqsave(&ipath_devs_lock, flags); 185 idr_remove(&unit_table, dd->ipath_unit); 186 list_del(&dd->ipath_list); 187 spin_unlock_irqrestore(&ipath_devs_lock, flags); 188 } 189 vfree(dd); 190} 191 192static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) 193{ 194 unsigned long flags; 195 struct ipath_devdata *dd; 196 int ret; 197 198 if (!idr_pre_get(&unit_table, GFP_KERNEL)) { 199 dd = ERR_PTR(-ENOMEM); 200 goto bail; 201 } 202 203 dd = vzalloc(sizeof(*dd)); 204 if (!dd) { 205 dd = ERR_PTR(-ENOMEM); 206 goto bail; 207 } 208 dd->ipath_unit = -1; 209 210 spin_lock_irqsave(&ipath_devs_lock, flags); 211 212 ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); 213 if (ret < 0) { 214 printk(KERN_ERR IPATH_DRV_NAME 215 ": Could not allocate unit ID: error %d\n", -ret); 216 ipath_free_devdata(pdev, dd); 217 dd = ERR_PTR(ret); 218 goto bail_unlock; 219 } 220 221 dd->pcidev = pdev; 222 pci_set_drvdata(pdev, dd); 223 224 list_add(&dd->ipath_list, &ipath_dev_list); 225 226bail_unlock: 227 spin_unlock_irqrestore(&ipath_devs_lock, flags); 228 229bail: 230 return dd; 231} 232 233static inline struct ipath_devdata *__ipath_lookup(int unit) 234{ 235 return idr_find(&unit_table, unit); 236} 237 238struct ipath_devdata *ipath_lookup(int unit) 239{ 240 struct ipath_devdata *dd; 241 unsigned long flags; 242 243 spin_lock_irqsave(&ipath_devs_lock, flags); 244 dd = __ipath_lookup(unit); 245 spin_unlock_irqrestore(&ipath_devs_lock, flags); 246 247 return dd; 248} 249 250int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) 251{ 252 int nunits, npresent, nup; 253 struct ipath_devdata *dd; 254 unsigned long flags; 255 int maxports; 256 257 nunits = npresent = nup = maxports = 0; 258 259 spin_lock_irqsave(&ipath_devs_lock, flags); 260 261 list_for_each_entry(dd, &ipath_dev_list, ipath_list) { 262 nunits++; 263 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) 264 npresent++; 265 if (dd->ipath_lid && 266 !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN 267 | IPATH_LINKUNK))) 268 nup++; 269 if (dd->ipath_cfgports > maxports) 270 maxports = dd->ipath_cfgports; 271 } 272 273 spin_unlock_irqrestore(&ipath_devs_lock, flags); 274 275 if (npresentp) 276 *npresentp = npresent; 277 if (nupp) 278 *nupp = nup; 279 if (maxportsp) 280 *maxportsp = maxports; 281 282 return nunits; 283} 284 285/* 286 * These next two routines are placeholders in case we don't have per-arch 287 * code for controlling write combining. If explicit control of write 288 * combining is not available, performance will probably be awful. 289 */ 290 291int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) 292{ 293 return -EOPNOTSUPP; 294} 295 296void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) 297{ 298} 299 300/* 301 * Perform a PIO buffer bandwidth write test, to verify proper system 302 * configuration. Even when all the setup calls work, occasionally 303 * BIOS or other issues can prevent write combining from working, or 304 * can cause other bandwidth problems to the chip. 305 * 306 * This test simply writes the same buffer over and over again, and 307 * measures close to the peak bandwidth to the chip (not testing 308 * data bandwidth to the wire). On chips that use an address-based 309 * trigger to send packets to the wire, this is easy. On chips that 310 * use a count to trigger, we want to make sure that the packet doesn't 311 * go out on the wire, or trigger flow control checks. 312 */ 313static void ipath_verify_pioperf(struct ipath_devdata *dd) 314{ 315 u32 pbnum, cnt, lcnt; 316 u32 __iomem *piobuf; 317 u32 *addr; 318 u64 msecs, emsecs; 319 320 piobuf = ipath_getpiobuf(dd, 0, &pbnum); 321 if (!piobuf) { 322 dev_info(&dd->pcidev->dev, 323 "No PIObufs for checking perf, skipping\n"); 324 return; 325 } 326 327 /* 328 * Enough to give us a reasonable test, less than piobuf size, and 329 * likely multiple of store buffer length. 330 */ 331 cnt = 1024; 332 333 addr = vmalloc(cnt); 334 if (!addr) { 335 dev_info(&dd->pcidev->dev, 336 "Couldn't get memory for checking PIO perf," 337 " skipping\n"); 338 goto done; 339 } 340 341 preempt_disable(); /* we want reasonably accurate elapsed time */ 342 msecs = 1 + jiffies_to_msecs(jiffies); 343 for (lcnt = 0; lcnt < 10000U; lcnt++) { 344 /* wait until we cross msec boundary */ 345 if (jiffies_to_msecs(jiffies) >= msecs) 346 break; 347 udelay(1); 348 } 349 350 ipath_disable_armlaunch(dd); 351 352 /* 353 * length 0, no dwords actually sent, and mark as VL15 354 * on chips where that may matter (due to IB flowcontrol) 355 */ 356 if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) 357 writeq(1UL << 63, piobuf); 358 else 359 writeq(0, piobuf); 360 ipath_flush_wc(); 361 362 /* 363 * this is only roughly accurate, since even with preempt we 364 * still take interrupts that could take a while. Running for 365 * >= 5 msec seems to get us "close enough" to accurate values 366 */ 367 msecs = jiffies_to_msecs(jiffies); 368 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 369 __iowrite32_copy(piobuf + 64, addr, cnt >> 2); 370 emsecs = jiffies_to_msecs(jiffies) - msecs; 371 } 372 373 /* 1 GiB/sec, slightly over IB SDR line rate */ 374 if (lcnt < (emsecs * 1024U)) 375 ipath_dev_err(dd, 376 "Performance problem: bandwidth to PIO buffers is " 377 "only %u MiB/sec\n", 378 lcnt / (u32) emsecs); 379 else 380 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", 381 lcnt / (u32) emsecs); 382 383 preempt_enable(); 384 385 vfree(addr); 386 387done: 388 /* disarm piobuf, so it's available again */ 389 ipath_disarm_piobufs(dd, pbnum, 1); 390 ipath_enable_armlaunch(dd); 391} 392 393static void cleanup_device(struct ipath_devdata *dd); 394 395static int __devinit ipath_init_one(struct pci_dev *pdev, 396 const struct pci_device_id *ent) 397{ 398 int ret, len, j; 399 struct ipath_devdata *dd; 400 unsigned long long addr; 401 u32 bar0 = 0, bar1 = 0; 402 403 dd = ipath_alloc_devdata(pdev); 404 if (IS_ERR(dd)) { 405 ret = PTR_ERR(dd); 406 printk(KERN_ERR IPATH_DRV_NAME 407 ": Could not allocate devdata: error %d\n", -ret); 408 goto bail; 409 } 410 411 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); 412 413 ret = pci_enable_device(pdev); 414 if (ret) { 415 /* This can happen iff: 416 * 417 * We did a chip reset, and then failed to reprogram the 418 * BAR, or the chip reset due to an internal error. We then 419 * unloaded the driver and reloaded it. 420 * 421 * Both reset cases set the BAR back to initial state. For 422 * the latter case, the AER sticky error bit at offset 0x718 423 * should be set, but the Linux kernel doesn't yet know 424 * about that, it appears. If the original BAR was retained 425 * in the kernel data structures, this may be OK. 426 */ 427 ipath_dev_err(dd, "enable unit %d failed: error %d\n", 428 dd->ipath_unit, -ret); 429 goto bail_devdata; 430 } 431 addr = pci_resource_start(pdev, 0); 432 len = pci_resource_len(pdev, 0); 433 ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " 434 "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, 435 ent->device, ent->driver_data); 436 437 read_bars(dd, pdev, &bar0, &bar1); 438 439 if (!bar1 && !(bar0 & ~0xf)) { 440 if (addr) { 441 dev_info(&pdev->dev, "BAR is 0 (probable RESET), " 442 "rewriting as %llx\n", addr); 443 ret = pci_write_config_dword( 444 pdev, PCI_BASE_ADDRESS_0, addr); 445 if (ret) { 446 ipath_dev_err(dd, "rewrite of BAR0 " 447 "failed: err %d\n", -ret); 448 goto bail_disable; 449 } 450 ret = pci_write_config_dword( 451 pdev, PCI_BASE_ADDRESS_1, addr >> 32); 452 if (ret) { 453 ipath_dev_err(dd, "rewrite of BAR1 " 454 "failed: err %d\n", -ret); 455 goto bail_disable; 456 } 457 } else { 458 ipath_dev_err(dd, "BAR is 0 (probable RESET), " 459 "not usable until reboot\n"); 460 ret = -ENODEV; 461 goto bail_disable; 462 } 463 } 464 465 ret = pci_request_regions(pdev, IPATH_DRV_NAME); 466 if (ret) { 467 dev_info(&pdev->dev, "pci_request_regions unit %u fails: " 468 "err %d\n", dd->ipath_unit, -ret); 469 goto bail_disable; 470 } 471 472 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 473 if (ret) { 474 /* 475 * if the 64 bit setup fails, try 32 bit. Some systems 476 * do not setup 64 bit maps on systems with 2GB or less 477 * memory installed. 478 */ 479 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 480 if (ret) { 481 dev_info(&pdev->dev, 482 "Unable to set DMA mask for unit %u: %d\n", 483 dd->ipath_unit, ret); 484 goto bail_regions; 485 } 486 else { 487 ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); 488 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 489 if (ret) 490 dev_info(&pdev->dev, 491 "Unable to set DMA consistent mask " 492 "for unit %u: %d\n", 493 dd->ipath_unit, ret); 494 495 } 496 } 497 else { 498 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 499 if (ret) 500 dev_info(&pdev->dev, 501 "Unable to set DMA consistent mask " 502 "for unit %u: %d\n", 503 dd->ipath_unit, ret); 504 } 505 506 pci_set_master(pdev); 507 508 /* 509 * Save BARs to rewrite after device reset. Save all 64 bits of 510 * BAR, just in case. 511 */ 512 dd->ipath_pcibar0 = addr; 513 dd->ipath_pcibar1 = addr >> 32; 514 dd->ipath_deviceid = ent->device; /* save for later use */ 515 dd->ipath_vendorid = ent->vendor; 516 517 /* setup the chip-specific functions, as early as possible. */ 518 switch (ent->device) { 519 case PCI_DEVICE_ID_INFINIPATH_HT: 520 ipath_init_iba6110_funcs(dd); 521 break; 522 523 default: 524 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 525 "failing\n", ent->device); 526 return -ENODEV; 527 } 528 529 for (j = 0; j < 6; j++) { 530 if (!pdev->resource[j].start) 531 continue; 532 ipath_cdbg(VERBOSE, "BAR %d %pR, len %llx\n", 533 j, &pdev->resource[j], 534 (unsigned long long)pci_resource_len(pdev, j)); 535 } 536 537 if (!addr) { 538 ipath_dev_err(dd, "No valid address in BAR 0!\n"); 539 ret = -ENODEV; 540 goto bail_regions; 541 } 542 543 dd->ipath_pcirev = pdev->revision; 544 545#if defined(__powerpc__) 546 /* There isn't a generic way to specify writethrough mappings */ 547 dd->ipath_kregbase = __ioremap(addr, len, 548 (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); 549#else 550 dd->ipath_kregbase = ioremap_nocache(addr, len); 551#endif 552 553 if (!dd->ipath_kregbase) { 554 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", 555 addr); 556 ret = -ENOMEM; 557 goto bail_iounmap; 558 } 559 dd->ipath_kregend = (u64 __iomem *) 560 ((void __iomem *)dd->ipath_kregbase + len); 561 dd->ipath_physaddr = addr; /* used for io_remap, etc. */ 562 /* for user mmap */ 563 ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", 564 addr, dd->ipath_kregbase); 565 566 if (dd->ipath_f_bus(dd, pdev)) 567 ipath_dev_err(dd, "Failed to setup config space; " 568 "continuing anyway\n"); 569 570 /* 571 * set up our interrupt handler; IRQF_SHARED probably not needed, 572 * since MSI interrupts shouldn't be shared but won't hurt for now. 573 * check 0 irq after we return from chip-specific bus setup, since 574 * that can affect this due to setup 575 */ 576 if (!dd->ipath_irq) 577 ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " 578 "work\n"); 579 else { 580 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, 581 IPATH_DRV_NAME, dd); 582 if (ret) { 583 ipath_dev_err(dd, "Couldn't setup irq handler, " 584 "irq=%d: %d\n", dd->ipath_irq, ret); 585 goto bail_iounmap; 586 } 587 } 588 589 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 590 if (ret) 591 goto bail_irqsetup; 592 593 ret = ipath_enable_wc(dd); 594 595 if (ret) { 596 ipath_dev_err(dd, "Write combining not enabled " 597 "(err %d): performance may be poor\n", 598 -ret); 599 ret = 0; 600 } 601 602 ipath_verify_pioperf(dd); 603 604 ipath_device_create_group(&pdev->dev, dd); 605 ipathfs_add_device(dd); 606 ipath_user_add(dd); 607 ipath_diag_add(dd); 608 ipath_register_ib_device(dd); 609 610 goto bail; 611 612bail_irqsetup: 613 cleanup_device(dd); 614 615 if (dd->ipath_irq) 616 dd->ipath_f_free_irq(dd); 617 618 if (dd->ipath_f_cleanup) 619 dd->ipath_f_cleanup(dd); 620 621bail_iounmap: 622 iounmap((volatile void __iomem *) dd->ipath_kregbase); 623 624bail_regions: 625 pci_release_regions(pdev); 626 627bail_disable: 628 pci_disable_device(pdev); 629 630bail_devdata: 631 ipath_free_devdata(pdev, dd); 632 633bail: 634 return ret; 635} 636 637static void cleanup_device(struct ipath_devdata *dd) 638{ 639 int port; 640 struct ipath_portdata **tmp; 641 unsigned long flags; 642 643 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 644 /* can't do anything more with chip; needs re-init */ 645 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 646 if (dd->ipath_kregbase) { 647 /* 648 * if we haven't already cleaned up before these are 649 * to ensure any register reads/writes "fail" until 650 * re-init 651 */ 652 dd->ipath_kregbase = NULL; 653 dd->ipath_uregbase = 0; 654 dd->ipath_sregbase = 0; 655 dd->ipath_cregbase = 0; 656 dd->ipath_kregsize = 0; 657 } 658 ipath_disable_wc(dd); 659 } 660 661 if (dd->ipath_spectriggerhit) 662 dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", 663 dd->ipath_spectriggerhit); 664 665 if (dd->ipath_pioavailregs_dma) { 666 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 667 (void *) dd->ipath_pioavailregs_dma, 668 dd->ipath_pioavailregs_phys); 669 dd->ipath_pioavailregs_dma = NULL; 670 } 671 if (dd->ipath_dummy_hdrq) { 672 dma_free_coherent(&dd->pcidev->dev, 673 dd->ipath_pd[0]->port_rcvhdrq_size, 674 dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); 675 dd->ipath_dummy_hdrq = NULL; 676 } 677 678 if (dd->ipath_pageshadow) { 679 struct page **tmpp = dd->ipath_pageshadow; 680 dma_addr_t *tmpd = dd->ipath_physshadow; 681 int i, cnt = 0; 682 683 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " 684 "locked\n"); 685 for (port = 0; port < dd->ipath_cfgports; port++) { 686 int port_tidbase = port * dd->ipath_rcvtidcnt; 687 int maxtid = port_tidbase + dd->ipath_rcvtidcnt; 688 for (i = port_tidbase; i < maxtid; i++) { 689 if (!tmpp[i]) 690 continue; 691 pci_unmap_page(dd->pcidev, tmpd[i], 692 PAGE_SIZE, PCI_DMA_FROMDEVICE); 693 ipath_release_user_pages(&tmpp[i], 1); 694 tmpp[i] = NULL; 695 cnt++; 696 } 697 } 698 if (cnt) { 699 ipath_stats.sps_pageunlocks += cnt; 700 ipath_cdbg(VERBOSE, "There were still %u expTID " 701 "entries locked\n", cnt); 702 } 703 if (ipath_stats.sps_pagelocks || 704 ipath_stats.sps_pageunlocks) 705 ipath_cdbg(VERBOSE, "%llu pages locked, %llu " 706 "unlocked via ipath_m{un}lock\n", 707 (unsigned long long) 708 ipath_stats.sps_pagelocks, 709 (unsigned long long) 710 ipath_stats.sps_pageunlocks); 711 712 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 713 dd->ipath_pageshadow); 714 tmpp = dd->ipath_pageshadow; 715 dd->ipath_pageshadow = NULL; 716 vfree(tmpp); 717 718 dd->ipath_egrtidbase = NULL; 719 } 720 721 /* 722 * free any resources still in use (usually just kernel ports) 723 * at unload; we do for portcnt, because that's what we allocate. 724 * We acquire lock to be really paranoid that ipath_pd isn't being 725 * accessed from some interrupt-related code (that should not happen, 726 * but best to be sure). 727 */ 728 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 729 tmp = dd->ipath_pd; 730 dd->ipath_pd = NULL; 731 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 732 for (port = 0; port < dd->ipath_portcnt; port++) { 733 struct ipath_portdata *pd = tmp[port]; 734 tmp[port] = NULL; /* debugging paranoia */ 735 ipath_free_pddata(dd, pd); 736 } 737 kfree(tmp); 738} 739 740static void __devexit ipath_remove_one(struct pci_dev *pdev) 741{ 742 struct ipath_devdata *dd = pci_get_drvdata(pdev); 743 744 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 745 746 /* 747 * disable the IB link early, to be sure no new packets arrive, which 748 * complicates the shutdown process 749 */ 750 ipath_shutdown_device(dd); 751 752 flush_workqueue(ib_wq); 753 754 if (dd->verbs_dev) 755 ipath_unregister_ib_device(dd->verbs_dev); 756 757 ipath_diag_remove(dd); 758 ipath_user_remove(dd); 759 ipathfs_remove_device(dd); 760 ipath_device_remove_group(&pdev->dev, dd); 761 762 ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " 763 "unit %u\n", dd, (u32) dd->ipath_unit); 764 765 cleanup_device(dd); 766 767 /* 768 * turn off rcv, send, and interrupts for all ports, all drivers 769 * should also hard reset the chip here? 770 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs 771 * for all versions of the driver, if they were allocated 772 */ 773 if (dd->ipath_irq) { 774 ipath_cdbg(VERBOSE, "unit %u free irq %d\n", 775 dd->ipath_unit, dd->ipath_irq); 776 dd->ipath_f_free_irq(dd); 777 } else 778 ipath_dbg("irq is 0, not doing free_irq " 779 "for unit %u\n", dd->ipath_unit); 780 /* 781 * we check for NULL here, because it's outside 782 * the kregbase check, and we need to call it 783 * after the free_irq. Thus it's possible that 784 * the function pointers were never initialized. 785 */ 786 if (dd->ipath_f_cleanup) 787 /* clean up chip-specific stuff */ 788 dd->ipath_f_cleanup(dd); 789 790 ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); 791 iounmap((volatile void __iomem *) dd->ipath_kregbase); 792 pci_release_regions(pdev); 793 ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); 794 pci_disable_device(pdev); 795 796 ipath_free_devdata(pdev, dd); 797} 798 799/* general driver use */ 800DEFINE_MUTEX(ipath_mutex); 801 802static DEFINE_SPINLOCK(ipath_pioavail_lock); 803 804/** 805 * ipath_disarm_piobufs - cancel a range of PIO buffers 806 * @dd: the infinipath device 807 * @first: the first PIO buffer to cancel 808 * @cnt: the number of PIO buffers to cancel 809 * 810 * cancel a range of PIO buffers, used when they might be armed, but 811 * not triggered. Used at init to ensure buffer state, and also user 812 * process close, in case it died while writing to a PIO buffer 813 * Also after errors. 814 */ 815void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, 816 unsigned cnt) 817{ 818 unsigned i, last = first + cnt; 819 unsigned long flags; 820 821 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); 822 for (i = first; i < last; i++) { 823 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 824 /* 825 * The disarm-related bits are write-only, so it 826 * is ok to OR them in with our copy of sendctrl 827 * while we hold the lock. 828 */ 829 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 830 dd->ipath_sendctrl | INFINIPATH_S_DISARM | 831 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); 832 /* can't disarm bufs back-to-back per iba7220 spec */ 833 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 834 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 835 } 836 /* on some older chips, update may not happen after cancel */ 837 ipath_force_pio_avail_update(dd); 838} 839 840/** 841 * ipath_wait_linkstate - wait for an IB link state change to occur 842 * @dd: the infinipath device 843 * @state: the state to wait for 844 * @msecs: the number of milliseconds to wait 845 * 846 * wait up to msecs milliseconds for IB link state change to occur for 847 * now, take the easy polling route. Currently used only by 848 * ipath_set_linkstate. Returns 0 if state reached, otherwise 849 * -ETIMEDOUT state can have multiple states set, for any of several 850 * transitions. 851 */ 852int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) 853{ 854 dd->ipath_state_wanted = state; 855 wait_event_interruptible_timeout(ipath_state_wait, 856 (dd->ipath_flags & state), 857 msecs_to_jiffies(msecs)); 858 dd->ipath_state_wanted = 0; 859 860 if (!(dd->ipath_flags & state)) { 861 u64 val; 862 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" 863 " ms\n", 864 /* test INIT ahead of DOWN, both can be set */ 865 (state & IPATH_LINKINIT) ? "INIT" : 866 ((state & IPATH_LINKDOWN) ? "DOWN" : 867 ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), 868 msecs); 869 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 870 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", 871 (unsigned long long) ipath_read_kreg64( 872 dd, dd->ipath_kregs->kr_ibcctrl), 873 (unsigned long long) val, 874 ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); 875 } 876 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 877} 878 879static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, 880 char *buf, size_t blen) 881{ 882 static const struct { 883 ipath_err_t err; 884 const char *msg; 885 } errs[] = { 886 { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, 887 { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, 888 { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, 889 { INFINIPATH_E_SDMABASE, "SDmaBase" }, 890 { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, 891 { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, 892 { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, 893 { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, 894 { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, 895 { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, 896 { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, 897 { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, 898 }; 899 int i; 900 int expected; 901 size_t bidx = 0; 902 903 for (i = 0; i < ARRAY_SIZE(errs); i++) { 904 expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : 905 test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); 906 if ((err & errs[i].err) && !expected) 907 bidx += snprintf(buf + bidx, blen - bidx, 908 "%s ", errs[i].msg); 909 } 910} 911 912/* 913 * Decode the error status into strings, deciding whether to always 914 * print * it or not depending on "normal packet errors" vs everything 915 * else. Return 1 if "real" errors, otherwise 0 if only packet 916 * errors, so caller can decide what to print with the string. 917 */ 918int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, 919 ipath_err_t err) 920{ 921 int iserr = 1; 922 *buf = '\0'; 923 if (err & INFINIPATH_E_PKTERRS) { 924 if (!(err & ~INFINIPATH_E_PKTERRS)) 925 iserr = 0; // if only packet errors. 926 if (ipath_debug & __IPATH_ERRPKTDBG) { 927 if (err & INFINIPATH_E_REBP) 928 strlcat(buf, "EBP ", blen); 929 if (err & INFINIPATH_E_RVCRC) 930 strlcat(buf, "VCRC ", blen); 931 if (err & INFINIPATH_E_RICRC) { 932 strlcat(buf, "CRC ", blen); 933 // clear for check below, so only once 934 err &= INFINIPATH_E_RICRC; 935 } 936 if (err & INFINIPATH_E_RSHORTPKTLEN) 937 strlcat(buf, "rshortpktlen ", blen); 938 if (err & INFINIPATH_E_SDROPPEDDATAPKT) 939 strlcat(buf, "sdroppeddatapkt ", blen); 940 if (err & INFINIPATH_E_SPKTLEN) 941 strlcat(buf, "spktlen ", blen); 942 } 943 if ((err & INFINIPATH_E_RICRC) && 944 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) 945 strlcat(buf, "CRC ", blen); 946 if (!iserr) 947 goto done; 948 } 949 if (err & INFINIPATH_E_RHDRLEN) 950 strlcat(buf, "rhdrlen ", blen); 951 if (err & INFINIPATH_E_RBADTID) 952 strlcat(buf, "rbadtid ", blen); 953 if (err & INFINIPATH_E_RBADVERSION) 954 strlcat(buf, "rbadversion ", blen); 955 if (err & INFINIPATH_E_RHDR) 956 strlcat(buf, "rhdr ", blen); 957 if (err & INFINIPATH_E_SENDSPECIALTRIGGER) 958 strlcat(buf, "sendspecialtrigger ", blen); 959 if (err & INFINIPATH_E_RLONGPKTLEN) 960 strlcat(buf, "rlongpktlen ", blen); 961 if (err & INFINIPATH_E_RMAXPKTLEN) 962 strlcat(buf, "rmaxpktlen ", blen); 963 if (err & INFINIPATH_E_RMINPKTLEN) 964 strlcat(buf, "rminpktlen ", blen); 965 if (err & INFINIPATH_E_SMINPKTLEN) 966 strlcat(buf, "sminpktlen ", blen); 967 if (err & INFINIPATH_E_RFORMATERR) 968 strlcat(buf, "rformaterr ", blen); 969 if (err & INFINIPATH_E_RUNSUPVL) 970 strlcat(buf, "runsupvl ", blen); 971 if (err & INFINIPATH_E_RUNEXPCHAR) 972 strlcat(buf, "runexpchar ", blen); 973 if (err & INFINIPATH_E_RIBFLOW) 974 strlcat(buf, "ribflow ", blen); 975 if (err & INFINIPATH_E_SUNDERRUN) 976 strlcat(buf, "sunderrun ", blen); 977 if (err & INFINIPATH_E_SPIOARMLAUNCH) 978 strlcat(buf, "spioarmlaunch ", blen); 979 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 980 strlcat(buf, "sunexperrpktnum ", blen); 981 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 982 strlcat(buf, "sdroppedsmppkt ", blen); 983 if (err & INFINIPATH_E_SMAXPKTLEN) 984 strlcat(buf, "smaxpktlen ", blen); 985 if (err & INFINIPATH_E_SUNSUPVL) 986 strlcat(buf, "sunsupVL ", blen); 987 if (err & INFINIPATH_E_INVALIDADDR) 988 strlcat(buf, "invalidaddr ", blen); 989 if (err & INFINIPATH_E_RRCVEGRFULL) 990 strlcat(buf, "rcvegrfull ", blen); 991 if (err & INFINIPATH_E_RRCVHDRFULL) 992 strlcat(buf, "rcvhdrfull ", blen); 993 if (err & INFINIPATH_E_IBSTATUSCHANGED) 994 strlcat(buf, "ibcstatuschg ", blen); 995 if (err & INFINIPATH_E_RIBLOSTLINK) 996 strlcat(buf, "riblostlink ", blen); 997 if (err & INFINIPATH_E_HARDWARE) 998 strlcat(buf, "hardware ", blen); 999 if (err & INFINIPATH_E_RESET) 1000 strlcat(buf, "reset ", blen); 1001 if (err & INFINIPATH_E_SDMAERRS) 1002 decode_sdma_errs(dd, err, buf, blen); 1003 if (err & INFINIPATH_E_INVALIDEEPCMD) 1004 strlcat(buf, "invalideepromcmd ", blen); 1005done: 1006 return iserr; 1007} 1008 1009/** 1010 * get_rhf_errstring - decode RHF errors 1011 * @err: the err number 1012 * @msg: the output buffer 1013 * @len: the length of the output buffer 1014 * 1015 * only used one place now, may want more later 1016 */ 1017static void get_rhf_errstring(u32 err, char *msg, size_t len) 1018{ 1019 /* if no errors, and so don't need to check what's first */ 1020 *msg = '\0'; 1021 1022 if (err & INFINIPATH_RHF_H_ICRCERR) 1023 strlcat(msg, "icrcerr ", len); 1024 if (err & INFINIPATH_RHF_H_VCRCERR) 1025 strlcat(msg, "vcrcerr ", len); 1026 if (err & INFINIPATH_RHF_H_PARITYERR) 1027 strlcat(msg, "parityerr ", len); 1028 if (err & INFINIPATH_RHF_H_LENERR) 1029 strlcat(msg, "lenerr ", len); 1030 if (err & INFINIPATH_RHF_H_MTUERR) 1031 strlcat(msg, "mtuerr ", len); 1032 if (err & INFINIPATH_RHF_H_IHDRERR) 1033 /* infinipath hdr checksum error */ 1034 strlcat(msg, "ipathhdrerr ", len); 1035 if (err & INFINIPATH_RHF_H_TIDERR) 1036 strlcat(msg, "tiderr ", len); 1037 if (err & INFINIPATH_RHF_H_MKERR) 1038 /* bad port, offset, etc. */ 1039 strlcat(msg, "invalid ipathhdr ", len); 1040 if (err & INFINIPATH_RHF_H_IBERR) 1041 strlcat(msg, "iberr ", len); 1042 if (err & INFINIPATH_RHF_L_SWA) 1043 strlcat(msg, "swA ", len); 1044 if (err & INFINIPATH_RHF_L_SWB) 1045 strlcat(msg, "swB ", len); 1046} 1047 1048/** 1049 * ipath_get_egrbuf - get an eager buffer 1050 * @dd: the infinipath device 1051 * @bufnum: the eager buffer to get 1052 * 1053 * must only be called if ipath_pd[port] is known to be allocated 1054 */ 1055static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) 1056{ 1057 return dd->ipath_port0_skbinfo ? 1058 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; 1059} 1060 1061/** 1062 * ipath_alloc_skb - allocate an skb and buffer with possible constraints 1063 * @dd: the infinipath device 1064 * @gfp_mask: the sk_buff SFP mask 1065 */ 1066struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, 1067 gfp_t gfp_mask) 1068{ 1069 struct sk_buff *skb; 1070 u32 len; 1071 1072 /* 1073 * Only fully supported way to handle this is to allocate lots 1074 * extra, align as needed, and then do skb_reserve(). That wastes 1075 * a lot of memory... I'll have to hack this into infinipath_copy 1076 * also. 1077 */ 1078 1079 /* 1080 * We need 2 extra bytes for ipath_ether data sent in the 1081 * key header. In order to keep everything dword aligned, 1082 * we'll reserve 4 bytes. 1083 */ 1084 len = dd->ipath_ibmaxlen + 4; 1085 1086 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1087 /* We need a 2KB multiple alignment, and there is no way 1088 * to do it except to allocate extra and then skb_reserve 1089 * enough to bring it up to the right alignment. 1090 */ 1091 len += 2047; 1092 } 1093 1094 skb = __dev_alloc_skb(len, gfp_mask); 1095 if (!skb) { 1096 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", 1097 len); 1098 goto bail; 1099 } 1100 1101 skb_reserve(skb, 4); 1102 1103 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1104 u32 una = (unsigned long)skb->data & 2047; 1105 if (una) 1106 skb_reserve(skb, 2048 - una); 1107 } 1108 1109bail: 1110 return skb; 1111} 1112 1113static void ipath_rcv_hdrerr(struct ipath_devdata *dd, 1114 u32 eflags, 1115 u32 l, 1116 u32 etail, 1117 __le32 *rhf_addr, 1118 struct ipath_message_header *hdr) 1119{ 1120 char emsg[128]; 1121 1122 get_rhf_errstring(eflags, emsg, sizeof emsg); 1123 ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " 1124 "tlen=%x opcode=%x egridx=%x: %s\n", 1125 eflags, l, 1126 ipath_hdrget_rcv_type(rhf_addr), 1127 ipath_hdrget_length_in_bytes(rhf_addr), 1128 be32_to_cpu(hdr->bth[0]) >> 24, 1129 etail, emsg); 1130 1131 /* Count local link integrity errors. */ 1132 if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { 1133 u8 n = (dd->ipath_ibcctrl >> 1134 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & 1135 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; 1136 1137 if (++dd->ipath_lli_counter > n) { 1138 dd->ipath_lli_counter = 0; 1139 dd->ipath_lli_errors++; 1140 } 1141 } 1142} 1143 1144/* 1145 * ipath_kreceive - receive a packet 1146 * @pd: the infinipath port 1147 * 1148 * called from interrupt handler for errors or receive interrupt 1149 */ 1150void ipath_kreceive(struct ipath_portdata *pd) 1151{ 1152 struct ipath_devdata *dd = pd->port_dd; 1153 __le32 *rhf_addr; 1154 void *ebuf; 1155 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ 1156 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ 1157 u32 etail = -1, l, hdrqtail; 1158 struct ipath_message_header *hdr; 1159 u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; 1160 static u64 totcalls; /* stats, may eventually remove */ 1161 int last; 1162 1163 l = pd->port_head; 1164 rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; 1165 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1166 u32 seq = ipath_hdrget_seq(rhf_addr); 1167 1168 if (seq != pd->port_seq_cnt) 1169 goto bail; 1170 hdrqtail = 0; 1171 } else { 1172 hdrqtail = ipath_get_rcvhdrtail(pd); 1173 if (l == hdrqtail) 1174 goto bail; 1175 smp_rmb(); 1176 } 1177 1178reloop: 1179 for (last = 0, i = 1; !last; i += !last) { 1180 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); 1181 eflags = ipath_hdrget_err_flags(rhf_addr); 1182 etype = ipath_hdrget_rcv_type(rhf_addr); 1183 /* total length */ 1184 tlen = ipath_hdrget_length_in_bytes(rhf_addr); 1185 ebuf = NULL; 1186 if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? 1187 ipath_hdrget_use_egr_buf(rhf_addr) : 1188 (etype != RCVHQ_RCV_TYPE_EXPECTED)) { 1189 /* 1190 * It turns out that the chip uses an eager buffer 1191 * for all non-expected packets, whether it "needs" 1192 * one or not. So always get the index, but don't 1193 * set ebuf (so we try to copy data) unless the 1194 * length requires it. 1195 */ 1196 etail = ipath_hdrget_index(rhf_addr); 1197 updegr = 1; 1198 if (tlen > sizeof(*hdr) || 1199 etype == RCVHQ_RCV_TYPE_NON_KD) 1200 ebuf = ipath_get_egrbuf(dd, etail); 1201 } 1202 1203 /* 1204 * both tiderr and ipathhdrerr are set for all plain IB 1205 * packets; only ipathhdrerr should be set. 1206 */ 1207 1208 if (etype != RCVHQ_RCV_TYPE_NON_KD && 1209 etype != RCVHQ_RCV_TYPE_ERROR && 1210 ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != 1211 IPS_PROTO_VERSION) 1212 ipath_cdbg(PKT, "Bad InfiniPath protocol version " 1213 "%x\n", etype); 1214 1215 if (unlikely(eflags)) 1216 ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); 1217 else if (etype == RCVHQ_RCV_TYPE_NON_KD) { 1218 ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); 1219 if (dd->ipath_lli_counter) 1220 dd->ipath_lli_counter--; 1221 } else if (etype == RCVHQ_RCV_TYPE_EAGER) { 1222 u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; 1223 u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; 1224 ipath_cdbg(PKT, "typ %x, opcode %x (eager, " 1225 "qp=%x), len %x; ignored\n", 1226 etype, opcode, qp, tlen); 1227 } 1228 else if (etype == RCVHQ_RCV_TYPE_EXPECTED) 1229 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", 1230 be32_to_cpu(hdr->bth[0]) >> 24); 1231 else { 1232 /* 1233 * error packet, type of error unknown. 1234 * Probably type 3, but we don't know, so don't 1235 * even try to print the opcode, etc. 1236 * Usually caused by a "bad packet", that has no 1237 * BTH, when the LRH says it should. 1238 */ 1239 ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" 1240 " %x, len %x hdrq+%x rhf: %Lx\n", 1241 etail, tlen, l, (unsigned long long) 1242 le64_to_cpu(*(__le64 *) rhf_addr)); 1243 if (ipath_debug & __IPATH_ERRPKTDBG) { 1244 u32 j, *d, dw = rsize-2; 1245 if (rsize > (tlen>>2)) 1246 dw = tlen>>2; 1247 d = (u32 *)hdr; 1248 printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", 1249 dw); 1250 for (j = 0; j < dw; j++) 1251 printk(KERN_DEBUG "%8x%s", d[j], 1252 (j%8) == 7 ? "\n" : " "); 1253 printk(KERN_DEBUG ".\n"); 1254 } 1255 } 1256 l += rsize; 1257 if (l >= maxcnt) 1258 l = 0; 1259 rhf_addr = (__le32 *) pd->port_rcvhdrq + 1260 l + dd->ipath_rhf_offset; 1261 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1262 u32 seq = ipath_hdrget_seq(rhf_addr); 1263 1264 if (++pd->port_seq_cnt > 13) 1265 pd->port_seq_cnt = 1; 1266 if (seq != pd->port_seq_cnt) 1267 last = 1; 1268 } else if (l == hdrqtail) 1269 last = 1; 1270 /* 1271 * update head regs on last packet, and every 16 packets. 1272 * Reduce bus traffic, while still trying to prevent 1273 * rcvhdrq overflows, for when the queue is nearly full 1274 */ 1275 if (last || !(i & 0xf)) { 1276 u64 lval = l; 1277 1278 /* request IBA6120 and 7220 interrupt only on last */ 1279 if (last) 1280 lval |= dd->ipath_rhdrhead_intr_off; 1281 ipath_write_ureg(dd, ur_rcvhdrhead, lval, 1282 pd->port_port); 1283 if (updegr) { 1284 ipath_write_ureg(dd, ur_rcvegrindexhead, 1285 etail, pd->port_port); 1286 updegr = 0; 1287 } 1288 } 1289 } 1290 1291 if (!dd->ipath_rhdrhead_intr_off && !reloop && 1292 !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1293 /* IBA6110 workaround; we can have a race clearing chip 1294 * interrupt with another interrupt about to be delivered, 1295 * and can clear it before it is delivered on the GPIO 1296 * workaround. By doing the extra check here for the 1297 * in-memory tail register updating while we were doing 1298 * earlier packets, we "almost" guarantee we have covered 1299 * that case. 1300 */ 1301 u32 hqtail = ipath_get_rcvhdrtail(pd); 1302 if (hqtail != hdrqtail) { 1303 hdrqtail = hqtail; 1304 reloop = 1; /* loop 1 extra time at most */ 1305 goto reloop; 1306 } 1307 } 1308 1309 pkttot += i; 1310 1311 pd->port_head = l; 1312 1313 if (pkttot > ipath_stats.sps_maxpkts_call) 1314 ipath_stats.sps_maxpkts_call = pkttot; 1315 ipath_stats.sps_port0pkts += pkttot; 1316 ipath_stats.sps_avgpkts_call = 1317 ipath_stats.sps_port0pkts / ++totcalls; 1318 1319bail:; 1320} 1321 1322/** 1323 * ipath_update_pio_bufs - update shadow copy of the PIO availability map 1324 * @dd: the infinipath device 1325 * 1326 * called whenever our local copy indicates we have run out of send buffers 1327 * NOTE: This can be called from interrupt context by some code 1328 * and from non-interrupt context by ipath_getpiobuf(). 1329 */ 1330 1331static void ipath_update_pio_bufs(struct ipath_devdata *dd) 1332{ 1333 unsigned long flags; 1334 int i; 1335 const unsigned piobregs = (unsigned)dd->ipath_pioavregs; 1336 1337 /* If the generation (check) bits have changed, then we update the 1338 * busy bit for the corresponding PIO buffer. This algorithm will 1339 * modify positions to the value they already have in some cases 1340 * (i.e., no change), but it's faster than changing only the bits 1341 * that have changed. 1342 * 1343 * We would like to do this atomicly, to avoid spinlocks in the 1344 * critical send path, but that's not really possible, given the 1345 * type of changes, and that this routine could be called on 1346 * multiple cpu's simultaneously, so we lock in this routine only, 1347 * to avoid conflicting updates; all we change is the shadow, and 1348 * it's a single 64 bit memory location, so by definition the update 1349 * is atomic in terms of what other cpu's can see in testing the 1350 * bits. The spin_lock overhead isn't too bad, since it only 1351 * happens when all buffers are in use, so only cpu overhead, not 1352 * latency or bandwidth is affected. 1353 */ 1354 if (!dd->ipath_pioavailregs_dma) { 1355 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); 1356 return; 1357 } 1358 if (ipath_debug & __IPATH_VERBDBG) { 1359 /* only if packet debug and verbose */ 1360 volatile __le64 *dma = dd->ipath_pioavailregs_dma; 1361 unsigned long *shadow = dd->ipath_pioavailshadow; 1362 1363 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " 1364 "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " 1365 "s3=%lx\n", 1366 (unsigned long long) le64_to_cpu(dma[0]), 1367 shadow[0], 1368 (unsigned long long) le64_to_cpu(dma[1]), 1369 shadow[1], 1370 (unsigned long long) le64_to_cpu(dma[2]), 1371 shadow[2], 1372 (unsigned long long) le64_to_cpu(dma[3]), 1373 shadow[3]); 1374 if (piobregs > 4) 1375 ipath_cdbg( 1376 PKT, "2nd group, dma4=%llx shad4=%lx, " 1377 "d5=%llx s5=%lx, d6=%llx s6=%lx, " 1378 "d7=%llx s7=%lx\n", 1379 (unsigned long long) le64_to_cpu(dma[4]), 1380 shadow[4], 1381 (unsigned long long) le64_to_cpu(dma[5]), 1382 shadow[5], 1383 (unsigned long long) le64_to_cpu(dma[6]), 1384 shadow[6], 1385 (unsigned long long) le64_to_cpu(dma[7]), 1386 shadow[7]); 1387 } 1388 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1389 for (i = 0; i < piobregs; i++) { 1390 u64 pchbusy, pchg, piov, pnew; 1391 /* 1392 * Chip Errata: bug 6641; even and odd qwords>3 are swapped 1393 */ 1394 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) 1395 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); 1396 else 1397 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); 1398 pchg = dd->ipath_pioavailkernel[i] & 1399 ~(dd->ipath_pioavailshadow[i] ^ piov); 1400 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; 1401 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { 1402 pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; 1403 pnew |= piov & pchbusy; 1404 dd->ipath_pioavailshadow[i] = pnew; 1405 } 1406 } 1407 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1408} 1409 1410/* 1411 * used to force update of pioavailshadow if we can't get a pio buffer. 1412 * Needed primarily due to exitting freeze mode after recovering 1413 * from errors. Done lazily, because it's safer (known to not 1414 * be writing pio buffers). 1415 */ 1416static void ipath_reset_availshadow(struct ipath_devdata *dd) 1417{ 1418 int i, im; 1419 unsigned long flags; 1420 1421 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1422 for (i = 0; i < dd->ipath_pioavregs; i++) { 1423 u64 val, oldval; 1424 /* deal with 6110 chip bug on high register #s */ 1425 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1426 i ^ 1 : i; 1427 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); 1428 /* 1429 * busy out the buffers not in the kernel avail list, 1430 * without changing the generation bits. 1431 */ 1432 oldval = dd->ipath_pioavailshadow[i]; 1433 dd->ipath_pioavailshadow[i] = val | 1434 ((~dd->ipath_pioavailkernel[i] << 1435 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & 1436 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ 1437 if (oldval != dd->ipath_pioavailshadow[i]) 1438 ipath_dbg("shadow[%d] was %Lx, now %lx\n", 1439 i, (unsigned long long) oldval, 1440 dd->ipath_pioavailshadow[i]); 1441 } 1442 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1443} 1444 1445/** 1446 * ipath_setrcvhdrsize - set the receive header size 1447 * @dd: the infinipath device 1448 * @rhdrsize: the receive header size 1449 * 1450 * called from user init code, and also layered driver init 1451 */ 1452int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) 1453{ 1454 int ret = 0; 1455 1456 if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { 1457 if (dd->ipath_rcvhdrsize != rhdrsize) { 1458 dev_info(&dd->pcidev->dev, 1459 "Error: can't set protocol header " 1460 "size %u, already %u\n", 1461 rhdrsize, dd->ipath_rcvhdrsize); 1462 ret = -EAGAIN; 1463 } else 1464 ipath_cdbg(VERBOSE, "Reuse same protocol header " 1465 "size %u\n", dd->ipath_rcvhdrsize); 1466 } else if (rhdrsize > (dd->ipath_rcvhdrentsize - 1467 (sizeof(u64) / sizeof(u32)))) { 1468 ipath_dbg("Error: can't set protocol header size %u " 1469 "(> max %u)\n", rhdrsize, 1470 dd->ipath_rcvhdrentsize - 1471 (u32) (sizeof(u64) / sizeof(u32))); 1472 ret = -EOVERFLOW; 1473 } else { 1474 dd->ipath_flags |= IPATH_RCVHDRSZ_SET; 1475 dd->ipath_rcvhdrsize = rhdrsize; 1476 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, 1477 dd->ipath_rcvhdrsize); 1478 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", 1479 dd->ipath_rcvhdrsize); 1480 } 1481 return ret; 1482} 1483 1484/* 1485 * debugging code and stats updates if no pio buffers available. 1486 */ 1487static noinline void no_pio_bufs(struct ipath_devdata *dd) 1488{ 1489 unsigned long *shadow = dd->ipath_pioavailshadow; 1490 __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; 1491 1492 dd->ipath_upd_pio_shadow = 1; 1493 1494 /* 1495 * not atomic, but if we lose a stat count in a while, that's OK 1496 */ 1497 ipath_stats.sps_nopiobufs++; 1498 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1499 ipath_force_pio_avail_update(dd); /* at start */ 1500 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " 1501 "%llx %llx %llx %llx\n" 1502 "ipath shadow: %lx %lx %lx %lx\n", 1503 dd->ipath_consec_nopiobuf, 1504 (unsigned long)get_cycles(), 1505 (unsigned long long) le64_to_cpu(dma[0]), 1506 (unsigned long long) le64_to_cpu(dma[1]), 1507 (unsigned long long) le64_to_cpu(dma[2]), 1508 (unsigned long long) le64_to_cpu(dma[3]), 1509 shadow[0], shadow[1], shadow[2], shadow[3]); 1510 /* 1511 * 4 buffers per byte, 4 registers above, cover rest 1512 * below 1513 */ 1514 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1515 (sizeof(shadow[0]) * 4 * 4)) 1516 ipath_dbg("2nd group: dmacopy: " 1517 "%llx %llx %llx %llx\n" 1518 "ipath shadow: %lx %lx %lx %lx\n", 1519 (unsigned long long)le64_to_cpu(dma[4]), 1520 (unsigned long long)le64_to_cpu(dma[5]), 1521 (unsigned long long)le64_to_cpu(dma[6]), 1522 (unsigned long long)le64_to_cpu(dma[7]), 1523 shadow[4], shadow[5], shadow[6], shadow[7]); 1524 1525 /* at end, so update likely happened */ 1526 ipath_reset_availshadow(dd); 1527 } 1528} 1529 1530/* 1531 * common code for normal driver pio buffer allocation, and reserved 1532 * allocation. 1533 * 1534 * do appropriate marking as busy, etc. 1535 * returns buffer number if one found (>=0), negative number is error. 1536 */ 1537static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, 1538 u32 *pbufnum, u32 first, u32 last, u32 firsti) 1539{ 1540 int i, j, updated = 0; 1541 unsigned piobcnt; 1542 unsigned long flags; 1543 unsigned long *shadow = dd->ipath_pioavailshadow; 1544 u32 __iomem *buf; 1545 1546 piobcnt = last - first; 1547 if (dd->ipath_upd_pio_shadow) { 1548 /* 1549 * Minor optimization. If we had no buffers on last call, 1550 * start out by doing the update; continue and do scan even 1551 * if no buffers were updated, to be paranoid 1552 */ 1553 ipath_update_pio_bufs(dd); 1554 updated++; 1555 i = first; 1556 } else 1557 i = firsti; 1558rescan: 1559 /* 1560 * while test_and_set_bit() is atomic, we do that and then the 1561 * change_bit(), and the pair is not. See if this is the cause 1562 * of the remaining armlaunch errors. 1563 */ 1564 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1565 for (j = 0; j < piobcnt; j++, i++) { 1566 if (i >= last) 1567 i = first; 1568 if (__test_and_set_bit((2 * i) + 1, shadow)) 1569 continue; 1570 /* flip generation bit */ 1571 __change_bit(2 * i, shadow); 1572 break; 1573 } 1574 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1575 1576 if (j == piobcnt) { 1577 if (!updated) { 1578 /* 1579 * first time through; shadow exhausted, but may be 1580 * buffers available, try an update and then rescan. 1581 */ 1582 ipath_update_pio_bufs(dd); 1583 updated++; 1584 i = first; 1585 goto rescan; 1586 } else if (updated == 1 && piobcnt <= 1587 ((dd->ipath_sendctrl 1588 >> INFINIPATH_S_UPDTHRESH_SHIFT) & 1589 INFINIPATH_S_UPDTHRESH_MASK)) { 1590 /* 1591 * for chips supporting and using the update 1592 * threshold we need to force an update of the 1593 * in-memory copy if the count is less than the 1594 * thershold, then check one more time. 1595 */ 1596 ipath_force_pio_avail_update(dd); 1597 ipath_update_pio_bufs(dd); 1598 updated++; 1599 i = first; 1600 goto rescan; 1601 } 1602 1603 no_pio_bufs(dd); 1604 buf = NULL; 1605 } else { 1606 if (i < dd->ipath_piobcnt2k) 1607 buf = (u32 __iomem *) (dd->ipath_pio2kbase + 1608 i * dd->ipath_palign); 1609 else 1610 buf = (u32 __iomem *) 1611 (dd->ipath_pio4kbase + 1612 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); 1613 if (pbufnum) 1614 *pbufnum = i; 1615 } 1616 1617 return buf; 1618} 1619 1620/** 1621 * ipath_getpiobuf - find an available pio buffer 1622 * @dd: the infinipath device 1623 * @plen: the size of the PIO buffer needed in 32-bit words 1624 * @pbufnum: the buffer number is placed here 1625 */ 1626u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) 1627{ 1628 u32 __iomem *buf; 1629 u32 pnum, nbufs; 1630 u32 first, lasti; 1631 1632 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { 1633 first = dd->ipath_piobcnt2k; 1634 lasti = dd->ipath_lastpioindexl; 1635 } else { 1636 first = 0; 1637 lasti = dd->ipath_lastpioindex; 1638 } 1639 nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 1640 buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); 1641 1642 if (buf) { 1643 /* 1644 * Set next starting place. It's just an optimization, 1645 * it doesn't matter who wins on this, so no locking 1646 */ 1647 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) 1648 dd->ipath_lastpioindexl = pnum + 1; 1649 else 1650 dd->ipath_lastpioindex = pnum + 1; 1651 if (dd->ipath_upd_pio_shadow) 1652 dd->ipath_upd_pio_shadow = 0; 1653 if (dd->ipath_consec_nopiobuf) 1654 dd->ipath_consec_nopiobuf = 0; 1655 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", 1656 pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); 1657 if (pbufnum) 1658 *pbufnum = pnum; 1659 1660 } 1661 return buf; 1662} 1663 1664/** 1665 * ipath_chg_pioavailkernel - change which send buffers are available for kernel 1666 * @dd: the infinipath device 1667 * @start: the starting send buffer number 1668 * @len: the number of send buffers 1669 * @avail: true if the buffers are available for kernel use, false otherwise 1670 */ 1671void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, 1672 unsigned len, int avail) 1673{ 1674 unsigned long flags; 1675 unsigned end, cnt = 0; 1676 1677 /* There are two bits per send buffer (busy and generation) */ 1678 start *= 2; 1679 end = start + len * 2; 1680 1681 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1682 /* Set or clear the busy bit in the shadow. */ 1683 while (start < end) { 1684 if (avail) { 1685 unsigned long dma; 1686 int i, im; 1687 /* 1688 * the BUSY bit will never be set, because we disarm 1689 * the user buffers before we hand them back to the 1690 * kernel. We do have to make sure the generation 1691 * bit is set correctly in shadow, since it could 1692 * have changed many times while allocated to user. 1693 * We can't use the bitmap functions on the full 1694 * dma array because it is always little-endian, so 1695 * we have to flip to host-order first. 1696 * BITS_PER_LONG is slightly wrong, since it's 1697 * always 64 bits per register in chip... 1698 * We only work on 64 bit kernels, so that's OK. 1699 */ 1700 /* deal with 6110 chip bug on high register #s */ 1701 i = start / BITS_PER_LONG; 1702 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1703 i ^ 1 : i; 1704 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1705 + start, dd->ipath_pioavailshadow); 1706 dma = (unsigned long) le64_to_cpu( 1707 dd->ipath_pioavailregs_dma[im]); 1708 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1709 + start) % BITS_PER_LONG, &dma)) 1710 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1711 + start, dd->ipath_pioavailshadow); 1712 else 1713 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1714 + start, dd->ipath_pioavailshadow); 1715 __set_bit(start, dd->ipath_pioavailkernel); 1716 } else { 1717 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1718 dd->ipath_pioavailshadow); 1719 __clear_bit(start, dd->ipath_pioavailkernel); 1720 } 1721 start += 2; 1722 } 1723 1724 if (dd->ipath_pioupd_thresh) { 1725 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1726 cnt = bitmap_weight(dd->ipath_pioavailkernel, end); 1727 } 1728 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1729 1730 /* 1731 * When moving buffers from kernel to user, if number assigned to 1732 * the user is less than the pio update threshold, and threshold 1733 * is supported (cnt was computed > 0), drop the update threshold 1734 * so we update at least once per allocated number of buffers. 1735 * In any case, if the kernel buffers are less than the threshold, 1736 * drop the threshold. We don't bother increasing it, having once 1737 * decreased it, since it would typically just cycle back and forth. 1738 * If we don't decrease below buffers in use, we can wait a long 1739 * time for an update, until some other context uses PIO buffers. 1740 */ 1741 if (!avail && len < cnt) 1742 cnt = len; 1743 if (cnt < dd->ipath_pioupd_thresh) { 1744 dd->ipath_pioupd_thresh = cnt; 1745 ipath_dbg("Decreased pio update threshold to %u\n", 1746 dd->ipath_pioupd_thresh); 1747 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1748 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK 1749 << INFINIPATH_S_UPDTHRESH_SHIFT); 1750 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh 1751 << INFINIPATH_S_UPDTHRESH_SHIFT; 1752 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1753 dd->ipath_sendctrl); 1754 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1755 } 1756} 1757 1758/** 1759 * ipath_create_rcvhdrq - create a receive header queue 1760 * @dd: the infinipath device 1761 * @pd: the port data 1762 * 1763 * this must be contiguous memory (from an i/o perspective), and must be 1764 * DMA'able (which means for some systems, it will go through an IOMMU, 1765 * or be forced into a low address range). 1766 */ 1767int ipath_create_rcvhdrq(struct ipath_devdata *dd, 1768 struct ipath_portdata *pd) 1769{ 1770 int ret = 0; 1771 1772 if (!pd->port_rcvhdrq) { 1773 dma_addr_t phys_hdrqtail; 1774 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 1775 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1776 sizeof(u32), PAGE_SIZE); 1777 1778 pd->port_rcvhdrq = dma_alloc_coherent( 1779 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, 1780 gfp_flags); 1781 1782 if (!pd->port_rcvhdrq) { 1783 ipath_dev_err(dd, "attempt to allocate %d bytes " 1784 "for port %u rcvhdrq failed\n", 1785 amt, pd->port_port); 1786 ret = -ENOMEM; 1787 goto bail; 1788 } 1789 1790 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1791 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( 1792 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1793 GFP_KERNEL); 1794 if (!pd->port_rcvhdrtail_kvaddr) { 1795 ipath_dev_err(dd, "attempt to allocate 1 page " 1796 "for port %u rcvhdrqtailaddr " 1797 "failed\n", pd->port_port); 1798 ret = -ENOMEM; 1799 dma_free_coherent(&dd->pcidev->dev, amt, 1800 pd->port_rcvhdrq, 1801 pd->port_rcvhdrq_phys); 1802 pd->port_rcvhdrq = NULL; 1803 goto bail; 1804 } 1805 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; 1806 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " 1807 "physical\n", pd->port_port, 1808 (unsigned long long) phys_hdrqtail); 1809 } 1810 1811 pd->port_rcvhdrq_size = amt; 1812 1813 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " 1814 "for port %u rcvhdr Q\n", 1815 amt >> PAGE_SHIFT, pd->port_rcvhdrq, 1816 (unsigned long) pd->port_rcvhdrq_phys, 1817 (unsigned long) pd->port_rcvhdrq_size, 1818 pd->port_port); 1819 } 1820 else 1821 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " 1822 "hdrtailaddr@%p %llx physical\n", 1823 pd->port_port, pd->port_rcvhdrq, 1824 (unsigned long long) pd->port_rcvhdrq_phys, 1825 pd->port_rcvhdrtail_kvaddr, (unsigned long long) 1826 pd->port_rcvhdrqtailaddr_phys); 1827 1828 /* clear for security and sanity on each use */ 1829 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); 1830 if (pd->port_rcvhdrtail_kvaddr) 1831 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1832 1833 /* 1834 * tell chip each time we init it, even if we are re-using previous 1835 * memory (we zero the register at process close) 1836 */ 1837 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 1838 pd->port_port, pd->port_rcvhdrqtailaddr_phys); 1839 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 1840 pd->port_port, pd->port_rcvhdrq_phys); 1841 1842bail: 1843 return ret; 1844} 1845 1846 1847/* 1848 * Flush all sends that might be in the ready to send state, as well as any 1849 * that are in the process of being sent. Used whenever we need to be 1850 * sure the send side is idle. Cleans up all buffer state by canceling 1851 * all pio buffers, and issuing an abort, which cleans up anything in the 1852 * launch fifo. The cancel is superfluous on some chip versions, but 1853 * it's safer to always do it. 1854 * PIOAvail bits are updated by the chip as if normal send had happened. 1855 */ 1856void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) 1857{ 1858 unsigned long flags; 1859 1860 if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { 1861 ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); 1862 goto bail; 1863 } 1864 /* 1865 * If we have SDMA, and it's not disabled, we have to kick off the 1866 * abort state machine, provided we aren't already aborting. 1867 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), 1868 * we skip the rest of this routine. It is already "in progress" 1869 */ 1870 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { 1871 int skip_cancel; 1872 unsigned long *statp = &dd->ipath_sdma_status; 1873 1874 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1875 skip_cancel = 1876 test_and_set_bit(IPATH_SDMA_ABORTING, statp) 1877 && !test_bit(IPATH_SDMA_DISABLED, statp); 1878 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1879 if (skip_cancel) 1880 goto bail; 1881 } 1882 1883 ipath_dbg("Cancelling all in-progress send buffers\n"); 1884 1885 /* skip armlaunch errs for a while */ 1886 dd->ipath_lastcancel = jiffies + HZ / 2; 1887 1888 /* 1889 * The abort bit is auto-clearing. We also don't want pioavail 1890 * update happening during this, and we don't want any other 1891 * sends going out, so turn those off for the duration. We read 1892 * the scratch register to be sure that cancels and the abort 1893 * have taken effect in the chip. Otherwise two parts are same 1894 * as ipath_force_pio_avail_update() 1895 */ 1896 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1897 dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD 1898 | INFINIPATH_S_PIOENABLE); 1899 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1900 dd->ipath_sendctrl | INFINIPATH_S_ABORT); 1901 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1902 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1903 1904 /* disarm all send buffers */ 1905 ipath_disarm_piobufs(dd, 0, 1906 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1907 1908 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1909 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); 1910 1911 if (restore_sendctrl) { 1912 /* else done by caller later if needed */ 1913 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1914 dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | 1915 INFINIPATH_S_PIOENABLE; 1916 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1917 dd->ipath_sendctrl); 1918 /* and again, be sure all have hit the chip */ 1919 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1920 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1921 } 1922 1923 if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && 1924 !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && 1925 test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { 1926 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1927 /* only wait so long for intr */ 1928 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; 1929 dd->ipath_sdma_reset_wait = 200; 1930 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 1931 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 1932 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1933 } 1934bail:; 1935} 1936 1937/* 1938 * Force an update of in-memory copy of the pioavail registers, when 1939 * needed for any of a variety of reasons. We read the scratch register 1940 * to make it highly likely that the update will have happened by the 1941 * time we return. If already off (as in cancel_sends above), this 1942 * routine is a nop, on the assumption that the caller will "do the 1943 * right thing". 1944 */ 1945void ipath_force_pio_avail_update(struct ipath_devdata *dd) 1946{ 1947 unsigned long flags; 1948 1949 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1950 if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { 1951 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1952 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); 1953 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1954 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1955 dd->ipath_sendctrl); 1956 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1957 } 1958 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1959} 1960 1961static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, 1962 int linitcmd) 1963{ 1964 u64 mod_wd; 1965 static const char *what[4] = { 1966 [0] = "NOP", 1967 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", 1968 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", 1969 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" 1970 }; 1971 1972 if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { 1973 /* 1974 * If we are told to disable, note that so link-recovery 1975 * code does not attempt to bring us back up. 1976 */ 1977 preempt_disable(); 1978 dd->ipath_flags |= IPATH_IB_LINK_DISABLED; 1979 preempt_enable(); 1980 } else if (linitcmd) { 1981 /* 1982 * Any other linkinitcmd will lead to LINKDOWN and then 1983 * to INIT (if all is well), so clear flag to let 1984 * link-recovery code attempt to bring us back up. 1985 */ 1986 preempt_disable(); 1987 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; 1988 preempt_enable(); 1989 } 1990 1991 mod_wd = (linkcmd << dd->ibcc_lc_shift) | 1992 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); 1993 ipath_cdbg(VERBOSE, 1994 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", 1995 dd->ipath_unit, what[linkcmd], linitcmd, 1996 ipath_ibcstatus_str[ipath_ib_linktrstate(dd, 1997 ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); 1998 1999 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2000 dd->ipath_ibcctrl | mod_wd); 2001 /* read from chip so write is flushed */ 2002 (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2003} 2004 2005int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) 2006{ 2007 u32 lstate; 2008 int ret; 2009 2010 switch (newstate) { 2011 case IPATH_IB_LINKDOWN_ONLY: 2012 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); 2013 /* don't wait */ 2014 ret = 0; 2015 goto bail; 2016 2017 case IPATH_IB_LINKDOWN: 2018 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2019 INFINIPATH_IBCC_LINKINITCMD_POLL); 2020 /* don't wait */ 2021 ret = 0; 2022 goto bail; 2023 2024 case IPATH_IB_LINKDOWN_SLEEP: 2025 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2026 INFINIPATH_IBCC_LINKINITCMD_SLEEP); 2027 /* don't wait */ 2028 ret = 0; 2029 goto bail; 2030 2031 case IPATH_IB_LINKDOWN_DISABLE: 2032 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2033 INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2034 /* don't wait */ 2035 ret = 0; 2036 goto bail; 2037 2038 case IPATH_IB_LINKARM: 2039 if (dd->ipath_flags & IPATH_LINKARMED) { 2040 ret = 0; 2041 goto bail; 2042 } 2043 if (!(dd->ipath_flags & 2044 (IPATH_LINKINIT | IPATH_LINKACTIVE))) { 2045 ret = -EINVAL; 2046 goto bail; 2047 } 2048 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); 2049 2050 /* 2051 * Since the port can transition to ACTIVE by receiving 2052 * a non VL 15 packet, wait for either state. 2053 */ 2054 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; 2055 break; 2056 2057 case IPATH_IB_LINKACTIVE: 2058 if (dd->ipath_flags & IPATH_LINKACTIVE) { 2059 ret = 0; 2060 goto bail; 2061 } 2062 if (!(dd->ipath_flags & IPATH_LINKARMED)) { 2063 ret = -EINVAL; 2064 goto bail; 2065 } 2066 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); 2067 lstate = IPATH_LINKACTIVE; 2068 break; 2069 2070 case IPATH_IB_LINK_LOOPBACK: 2071 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); 2072 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; 2073 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2074 dd->ipath_ibcctrl); 2075 2076 /* turn heartbeat off, as it causes loopback to fail */ 2077 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2078 IPATH_IB_HRTBT_OFF); 2079 /* don't wait */ 2080 ret = 0; 2081 goto bail; 2082 2083 case IPATH_IB_LINK_EXTERNAL: 2084 dev_info(&dd->pcidev->dev, 2085 "Disabling IB local loopback (normal)\n"); 2086 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2087 IPATH_IB_HRTBT_ON); 2088 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; 2089 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2090 dd->ipath_ibcctrl); 2091 /* don't wait */ 2092 ret = 0; 2093 goto bail; 2094 2095 /* 2096 * Heartbeat can be explicitly enabled by the user via 2097 * "hrtbt_enable" "file", and if disabled, trying to enable here 2098 * will have no effect. Implicit changes (heartbeat off when 2099 * loopback on, and vice versa) are included to ease testing. 2100 */ 2101 case IPATH_IB_LINK_HRTBT: 2102 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2103 IPATH_IB_HRTBT_ON); 2104 goto bail; 2105 2106 case IPATH_IB_LINK_NO_HRTBT: 2107 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2108 IPATH_IB_HRTBT_OFF); 2109 goto bail; 2110 2111 default: 2112 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 2113 ret = -EINVAL; 2114 goto bail; 2115 } 2116 ret = ipath_wait_linkstate(dd, lstate, 2000); 2117 2118bail: 2119 return ret; 2120} 2121 2122/** 2123 * ipath_set_mtu - set the MTU 2124 * @dd: the infinipath device 2125 * @arg: the new MTU 2126 * 2127 * we can handle "any" incoming size, the issue here is whether we 2128 * need to restrict our outgoing size. For now, we don't do any 2129 * sanity checking on this, and we don't deal with what happens to 2130 * programs that are already running when the size changes. 2131 * NOTE: changing the MTU will usually cause the IBC to go back to 2132 * link INIT state... 2133 */ 2134int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) 2135{ 2136 u32 piosize; 2137 int changed = 0; 2138 int ret; 2139 2140 /* 2141 * mtu is IB data payload max. It's the largest power of 2 less 2142 * than piosize (or even larger, since it only really controls the 2143 * largest we can receive; we can send the max of the mtu and 2144 * piosize). We check that it's one of the valid IB sizes. 2145 */ 2146 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && 2147 (arg != 4096 || !ipath_mtu4096)) { 2148 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); 2149 ret = -EINVAL; 2150 goto bail; 2151 } 2152 if (dd->ipath_ibmtu == arg) { 2153 ret = 0; /* same as current */ 2154 goto bail; 2155 } 2156 2157 piosize = dd->ipath_ibmaxlen; 2158 dd->ipath_ibmtu = arg; 2159 2160 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { 2161 /* Only if it's not the initial value (or reset to it) */ 2162 if (piosize != dd->ipath_init_ibmaxlen) { 2163 if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) 2164 piosize = dd->ipath_init_ibmaxlen; 2165 dd->ipath_ibmaxlen = piosize; 2166 changed = 1; 2167 } 2168 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { 2169 piosize = arg + IPATH_PIO_MAXIBHDR; 2170 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " 2171 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, 2172 arg); 2173 dd->ipath_ibmaxlen = piosize; 2174 changed = 1; 2175 } 2176 2177 if (changed) { 2178 u64 ibc = dd->ipath_ibcctrl, ibdw; 2179 /* 2180 * update our housekeeping variables, and set IBC max 2181 * size, same as init code; max IBC is max we allow in 2182 * buffer, less the qword pbc, plus 1 for ICRC, in dwords 2183 */ 2184 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); 2185 ibdw = (dd->ipath_ibmaxlen >> 2) + 1; 2186 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << 2187 dd->ibcc_mpl_shift); 2188 ibc |= ibdw << dd->ibcc_mpl_shift; 2189 dd->ipath_ibcctrl = ibc; 2190 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2191 dd->ipath_ibcctrl); 2192 dd->ipath_f_tidtemplate(dd); 2193 } 2194 2195 ret = 0; 2196 2197bail: 2198 return ret; 2199} 2200 2201int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) 2202{ 2203 dd->ipath_lid = lid; 2204 dd->ipath_lmc = lmc; 2205 2206 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | 2207 (~((1U << lmc) - 1)) << 16); 2208 2209 dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); 2210 2211 return 0; 2212} 2213 2214 2215/** 2216 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 2217 * @dd: the infinipath device 2218 * @regno: the register number to write 2219 * @port: the port containing the register 2220 * @value: the value to write 2221 * 2222 * Registers that vary with the chip implementation constants (port) 2223 * use this routine. 2224 */ 2225void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, 2226 unsigned port, u64 value) 2227{ 2228 u16 where; 2229 2230 if (port < dd->ipath_portcnt && 2231 (regno == dd->ipath_kregs->kr_rcvhdraddr || 2232 regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) 2233 where = regno + port; 2234 else 2235 where = -1; 2236 2237 ipath_write_kreg(dd, where, value); 2238} 2239 2240/* 2241 * Following deal with the "obviously simple" task of overriding the state 2242 * of the LEDS, which normally indicate link physical and logical status. 2243 * The complications arise in dealing with different hardware mappings 2244 * and the board-dependent routine being called from interrupts. 2245 * and then there's the requirement to _flash_ them. 2246 */ 2247#define LED_OVER_FREQ_SHIFT 8 2248#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) 2249/* Below is "non-zero" to force override, but both actual LEDs are off */ 2250#define LED_OVER_BOTH_OFF (8) 2251 2252static void ipath_run_led_override(unsigned long opaque) 2253{ 2254 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2255 int timeoff; 2256 int pidx; 2257 u64 lstate, ltstate, val; 2258 2259 if (!(dd->ipath_flags & IPATH_INITTED)) 2260 return; 2261 2262 pidx = dd->ipath_led_override_phase++ & 1; 2263 dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; 2264 timeoff = dd->ipath_led_override_timeoff; 2265 2266 /* 2267 * below potentially restores the LED values per current status, 2268 * should also possibly setup the traffic-blink register, 2269 * but leave that to per-chip functions. 2270 */ 2271 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2272 ltstate = ipath_ib_linktrstate(dd, val); 2273 lstate = ipath_ib_linkstate(dd, val); 2274 2275 dd->ipath_f_setextled(dd, lstate, ltstate); 2276 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); 2277} 2278 2279void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) 2280{ 2281 int timeoff, freq; 2282 2283 if (!(dd->ipath_flags & IPATH_INITTED)) 2284 return; 2285 2286 /* First check if we are blinking. If not, use 1HZ polling */ 2287 timeoff = HZ; 2288 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; 2289 2290 if (freq) { 2291 /* For blink, set each phase from one nybble of val */ 2292 dd->ipath_led_override_vals[0] = val & 0xF; 2293 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; 2294 timeoff = (HZ << 4)/freq; 2295 } else { 2296 /* Non-blink set both phases the same. */ 2297 dd->ipath_led_override_vals[0] = val & 0xF; 2298 dd->ipath_led_override_vals[1] = val & 0xF; 2299 } 2300 dd->ipath_led_override_timeoff = timeoff; 2301 2302 /* 2303 * If the timer has not already been started, do so. Use a "quick" 2304 * timeout so the function will be called soon, to look at our request. 2305 */ 2306 if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { 2307 /* Need to start timer */ 2308 init_timer(&dd->ipath_led_override_timer); 2309 dd->ipath_led_override_timer.function = 2310 ipath_run_led_override; 2311 dd->ipath_led_override_timer.data = (unsigned long) dd; 2312 dd->ipath_led_override_timer.expires = jiffies + 1; 2313 add_timer(&dd->ipath_led_override_timer); 2314 } else 2315 atomic_dec(&dd->ipath_led_override_timer_active); 2316} 2317 2318/** 2319 * ipath_shutdown_device - shut down a device 2320 * @dd: the infinipath device 2321 * 2322 * This is called to make the device quiet when we are about to 2323 * unload the driver, and also when the device is administratively 2324 * disabled. It does not free any data structures. 2325 * Everything it does has to be setup again by ipath_init_chip(dd,1) 2326 */ 2327void ipath_shutdown_device(struct ipath_devdata *dd) 2328{ 2329 unsigned long flags; 2330 2331 ipath_dbg("Shutting down the device\n"); 2332 2333 ipath_hol_up(dd); /* make sure user processes aren't suspended */ 2334 2335 dd->ipath_flags |= IPATH_LINKUNK; 2336 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | 2337 IPATH_LINKINIT | IPATH_LINKARMED | 2338 IPATH_LINKACTIVE); 2339 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | 2340 IPATH_STATUS_IB_READY); 2341 2342 /* mask interrupts, but not errors */ 2343 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2344 2345 dd->ipath_rcvctrl = 0; 2346 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 2347 dd->ipath_rcvctrl); 2348 2349 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2350 teardown_sdma(dd); 2351 2352 /* 2353 * gracefully stop all sends allowing any in progress to trickle out 2354 * first. 2355 */ 2356 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 2357 dd->ipath_sendctrl = 0; 2358 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); 2359 /* flush it */ 2360 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2361 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 2362 2363 /* 2364 * enough for anything that's going to trickle out to have actually 2365 * done so. 2366 */ 2367 udelay(5); 2368 2369 dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ 2370 2371 ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2372 ipath_cancel_sends(dd, 0); 2373 2374 /* 2375 * we are shutting down, so tell components that care. We don't do 2376 * this on just a link state change, much like ethernet, a cable 2377 * unplug, etc. doesn't change driver state 2378 */ 2379 signal_ib_event(dd, IB_EVENT_PORT_ERR); 2380 2381 /* disable IBC */ 2382 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 2383 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 2384 dd->ipath_control | INFINIPATH_C_FREEZEMODE); 2385 2386 /* 2387 * clear SerdesEnable and turn the leds off; do this here because 2388 * we are unloading, so don't count on interrupts to move along 2389 * Turn the LEDs off explicitly for the same reason. 2390 */ 2391 dd->ipath_f_quiet_serdes(dd); 2392 2393 /* stop all the timers that might still be running */ 2394 del_timer_sync(&dd->ipath_hol_timer); 2395 if (dd->ipath_stats_timer_active) { 2396 del_timer_sync(&dd->ipath_stats_timer); 2397 dd->ipath_stats_timer_active = 0; 2398 } 2399 if (dd->ipath_intrchk_timer.data) { 2400 del_timer_sync(&dd->ipath_intrchk_timer); 2401 dd->ipath_intrchk_timer.data = 0; 2402 } 2403 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2404 del_timer_sync(&dd->ipath_led_override_timer); 2405 atomic_set(&dd->ipath_led_override_timer_active, 0); 2406 } 2407 2408 /* 2409 * clear all interrupts and errors, so that the next time the driver 2410 * is loaded or device is enabled, we know that whatever is set 2411 * happened while we were unloaded 2412 */ 2413 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 2414 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); 2415 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); 2416 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); 2417 2418 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); 2419 ipath_update_eeprom_log(dd); 2420} 2421 2422/** 2423 * ipath_free_pddata - free a port's allocated data 2424 * @dd: the infinipath device 2425 * @pd: the portdata structure 2426 * 2427 * free up any allocated data for a port 2428 * This should not touch anything that would affect a simultaneous 2429 * re-allocation of port data, because it is called after ipath_mutex 2430 * is released (and can be called from reinit as well). 2431 * It should never change any chip state, or global driver state. 2432 * (The only exception to global state is freeing the port0 port0_skbs.) 2433 */ 2434void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) 2435{ 2436 if (!pd) 2437 return; 2438 2439 if (pd->port_rcvhdrq) { 2440 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " 2441 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, 2442 (unsigned long) pd->port_rcvhdrq_size); 2443 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, 2444 pd->port_rcvhdrq, pd->port_rcvhdrq_phys); 2445 pd->port_rcvhdrq = NULL; 2446 if (pd->port_rcvhdrtail_kvaddr) { 2447 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 2448 pd->port_rcvhdrtail_kvaddr, 2449 pd->port_rcvhdrqtailaddr_phys); 2450 pd->port_rcvhdrtail_kvaddr = NULL; 2451 } 2452 } 2453 if (pd->port_port && pd->port_rcvegrbuf) { 2454 unsigned e; 2455 2456 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 2457 void *base = pd->port_rcvegrbuf[e]; 2458 size_t size = pd->port_rcvegrbuf_size; 2459 2460 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " 2461 "chunk %u/%u\n", base, 2462 (unsigned long) size, 2463 e, pd->port_rcvegrbuf_chunks); 2464 dma_free_coherent(&dd->pcidev->dev, size, 2465 base, pd->port_rcvegrbuf_phys[e]); 2466 } 2467 kfree(pd->port_rcvegrbuf); 2468 pd->port_rcvegrbuf = NULL; 2469 kfree(pd->port_rcvegrbuf_phys); 2470 pd->port_rcvegrbuf_phys = NULL; 2471 pd->port_rcvegrbuf_chunks = 0; 2472 } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { 2473 unsigned e; 2474 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; 2475 2476 dd->ipath_port0_skbinfo = NULL; 2477 ipath_cdbg(VERBOSE, "free closed port %d " 2478 "ipath_port0_skbinfo @ %p\n", pd->port_port, 2479 skbinfo); 2480 for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) 2481 if (skbinfo[e].skb) { 2482 pci_unmap_single(dd->pcidev, skbinfo[e].phys, 2483 dd->ipath_ibmaxlen, 2484 PCI_DMA_FROMDEVICE); 2485 dev_kfree_skb(skbinfo[e].skb); 2486 } 2487 vfree(skbinfo); 2488 } 2489 kfree(pd->port_tid_pg_list); 2490 vfree(pd->subport_uregbase); 2491 vfree(pd->subport_rcvegrbuf); 2492 vfree(pd->subport_rcvhdr_base); 2493 kfree(pd); 2494} 2495 2496static int __init infinipath_init(void) 2497{ 2498 int ret; 2499 2500 if (ipath_debug & __IPATH_DBG) 2501 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2502 2503 /* 2504 * These must be called before the driver is registered with 2505 * the PCI subsystem. 2506 */ 2507 idr_init(&unit_table); 2508 if (!idr_pre_get(&unit_table, GFP_KERNEL)) { 2509 printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); 2510 ret = -ENOMEM; 2511 goto bail; 2512 } 2513 2514 ret = pci_register_driver(&ipath_driver); 2515 if (ret < 0) { 2516 printk(KERN_ERR IPATH_DRV_NAME 2517 ": Unable to register driver: error %d\n", -ret); 2518 goto bail_unit; 2519 } 2520 2521 ret = ipath_init_ipathfs(); 2522 if (ret < 0) { 2523 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " 2524 "ipathfs: error %d\n", -ret); 2525 goto bail_pci; 2526 } 2527 2528 goto bail; 2529 2530bail_pci: 2531 pci_unregister_driver(&ipath_driver); 2532 2533bail_unit: 2534 idr_destroy(&unit_table); 2535 2536bail: 2537 return ret; 2538} 2539 2540static void __exit infinipath_cleanup(void) 2541{ 2542 ipath_exit_ipathfs(); 2543 2544 ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); 2545 pci_unregister_driver(&ipath_driver); 2546 2547 idr_destroy(&unit_table); 2548} 2549 2550/** 2551 * ipath_reset_device - reset the chip if possible 2552 * @unit: the device to reset 2553 * 2554 * Whether or not reset is successful, we attempt to re-initialize the chip 2555 * (that is, much like a driver unload/reload). We clear the INITTED flag 2556 * so that the various entry points will fail until we reinitialize. For 2557 * now, we only allow this if no user ports are open that use chip resources 2558 */ 2559int ipath_reset_device(int unit) 2560{ 2561 int ret, i; 2562 struct ipath_devdata *dd = ipath_lookup(unit); 2563 unsigned long flags; 2564 2565 if (!dd) { 2566 ret = -ENODEV; 2567 goto bail; 2568 } 2569 2570 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2571 /* Need to stop LED timer, _then_ shut off LEDs */ 2572 del_timer_sync(&dd->ipath_led_override_timer); 2573 atomic_set(&dd->ipath_led_override_timer_active, 0); 2574 } 2575 2576 /* Shut off LEDs after we are sure timer is not running */ 2577 dd->ipath_led_override = LED_OVER_BOTH_OFF; 2578 dd->ipath_f_setextled(dd, 0, 0); 2579 2580 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); 2581 2582 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { 2583 dev_info(&dd->pcidev->dev, "Invalid unit number %u or " 2584 "not initialized or not present\n", unit); 2585 ret = -ENXIO; 2586 goto bail; 2587 } 2588 2589 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2590 if (dd->ipath_pd) 2591 for (i = 1; i < dd->ipath_cfgports; i++) { 2592 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2593 continue; 2594 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2595 ipath_dbg("unit %u port %d is in use " 2596 "(PID %u cmd %s), can't reset\n", 2597 unit, i, 2598 pid_nr(dd->ipath_pd[i]->port_pid), 2599 dd->ipath_pd[i]->port_comm); 2600 ret = -EBUSY; 2601 goto bail; 2602 } 2603 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2604 2605 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2606 teardown_sdma(dd); 2607 2608 dd->ipath_flags &= ~IPATH_INITTED; 2609 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2610 ret = dd->ipath_f_reset(dd); 2611 if (ret == 1) { 2612 ipath_dbg("Reinitializing unit %u after reset attempt\n", 2613 unit); 2614 ret = ipath_init_chip(dd, 1); 2615 } else 2616 ret = -EAGAIN; 2617 if (ret) 2618 ipath_dev_err(dd, "Reinitialize unit %u after " 2619 "reset failed with %d\n", unit, ret); 2620 else 2621 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " 2622 "resetting\n", unit); 2623 2624bail: 2625 return ret; 2626} 2627 2628/* 2629 * send a signal to all the processes that have the driver open 2630 * through the normal interfaces (i.e., everything other than diags 2631 * interface). Returns number of signalled processes. 2632 */ 2633static int ipath_signal_procs(struct ipath_devdata *dd, int sig) 2634{ 2635 int i, sub, any = 0; 2636 struct pid *pid; 2637 unsigned long flags; 2638 2639 if (!dd->ipath_pd) 2640 return 0; 2641 2642 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2643 for (i = 1; i < dd->ipath_cfgports; i++) { 2644 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2645 continue; 2646 pid = dd->ipath_pd[i]->port_pid; 2647 if (!pid) 2648 continue; 2649 2650 dev_info(&dd->pcidev->dev, "context %d in use " 2651 "(PID %u), sending signal %d\n", 2652 i, pid_nr(pid), sig); 2653 kill_pid(pid, sig, 1); 2654 any++; 2655 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { 2656 pid = dd->ipath_pd[i]->port_subpid[sub]; 2657 if (!pid) 2658 continue; 2659 dev_info(&dd->pcidev->dev, "sub-context " 2660 "%d:%d in use (PID %u), sending " 2661 "signal %d\n", i, sub, pid_nr(pid), sig); 2662 kill_pid(pid, sig, 1); 2663 any++; 2664 } 2665 } 2666 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2667 return any; 2668} 2669 2670static void ipath_hol_signal_down(struct ipath_devdata *dd) 2671{ 2672 if (ipath_signal_procs(dd, SIGSTOP)) 2673 ipath_dbg("Stopped some processes\n"); 2674 ipath_cancel_sends(dd, 1); 2675} 2676 2677 2678static void ipath_hol_signal_up(struct ipath_devdata *dd) 2679{ 2680 if (ipath_signal_procs(dd, SIGCONT)) 2681 ipath_dbg("Continued some processes\n"); 2682} 2683 2684/* 2685 * link is down, stop any users processes, and flush pending sends 2686 * to prevent HoL blocking, then start the HoL timer that 2687 * periodically continues, then stop procs, so they can detect 2688 * link down if they want, and do something about it. 2689 * Timer may already be running, so use mod_timer, not add_timer. 2690 */ 2691void ipath_hol_down(struct ipath_devdata *dd) 2692{ 2693 dd->ipath_hol_state = IPATH_HOL_DOWN; 2694 ipath_hol_signal_down(dd); 2695 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2696 dd->ipath_hol_timer.expires = jiffies + 2697 msecs_to_jiffies(ipath_hol_timeout_ms); 2698 mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); 2699} 2700 2701/* 2702 * link is up, continue any user processes, and ensure timer 2703 * is a nop, if running. Let timer keep running, if set; it 2704 * will nop when it sees the link is up 2705 */ 2706void ipath_hol_up(struct ipath_devdata *dd) 2707{ 2708 ipath_hol_signal_up(dd); 2709 dd->ipath_hol_state = IPATH_HOL_UP; 2710} 2711 2712/* 2713 * toggle the running/not running state of user proceses 2714 * to prevent HoL blocking on chip resources, but still allow 2715 * user processes to do link down special case handling. 2716 * Should only be called via the timer 2717 */ 2718void ipath_hol_event(unsigned long opaque) 2719{ 2720 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2721 2722 if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP 2723 && dd->ipath_hol_state != IPATH_HOL_UP) { 2724 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2725 ipath_dbg("Stopping processes\n"); 2726 ipath_hol_signal_down(dd); 2727 } else { /* may do "extra" if also in ipath_hol_up() */ 2728 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; 2729 ipath_dbg("Continuing processes\n"); 2730 ipath_hol_signal_up(dd); 2731 } 2732 if (dd->ipath_hol_state == IPATH_HOL_UP) 2733 ipath_dbg("link's up, don't resched timer\n"); 2734 else { 2735 dd->ipath_hol_timer.expires = jiffies + 2736 msecs_to_jiffies(ipath_hol_timeout_ms); 2737 mod_timer(&dd->ipath_hol_timer, 2738 dd->ipath_hol_timer.expires); 2739 } 2740} 2741 2742int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) 2743{ 2744 u64 val; 2745 2746 if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) 2747 return -1; 2748 if (dd->ipath_rx_pol_inv != new_pol_inv) { 2749 dd->ipath_rx_pol_inv = new_pol_inv; 2750 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); 2751 val &= ~(INFINIPATH_XGXS_RX_POL_MASK << 2752 INFINIPATH_XGXS_RX_POL_SHIFT); 2753 val |= ((u64)dd->ipath_rx_pol_inv) << 2754 INFINIPATH_XGXS_RX_POL_SHIFT; 2755 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 2756 } 2757 return 0; 2758} 2759 2760/* 2761 * Disable and enable the armlaunch error. Used for PIO bandwidth testing on 2762 * the 7220, which is count-based, rather than trigger-based. Safe for the 2763 * driver check, since it's at init. Not completely safe when used for 2764 * user-mode checking, since some error checking can be lost, but not 2765 * particularly risky, and only has problematic side-effects in the face of 2766 * very buggy user code. There is no reference counting, but that's also 2767 * fine, given the intended use. 2768 */ 2769void ipath_enable_armlaunch(struct ipath_devdata *dd) 2770{ 2771 dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; 2772 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 2773 INFINIPATH_E_SPIOARMLAUNCH); 2774 dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; 2775 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2776 dd->ipath_errormask); 2777} 2778 2779void ipath_disable_armlaunch(struct ipath_devdata *dd) 2780{ 2781 /* so don't re-enable if already set */ 2782 dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; 2783 dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; 2784 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2785 dd->ipath_errormask); 2786} 2787 2788module_init(infinipath_init); 2789module_exit(infinipath_cleanup); 2790