1/* 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation. 3 * All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35#include <linux/pci.h> 36#include <linux/netdevice.h> 37#include <linux/vmalloc.h> 38#include <linux/delay.h> 39#include <linux/idr.h> 40#include <linux/module.h> 41 42#include "qib.h" 43#include "qib_common.h" 44 45/* 46 * min buffers we want to have per context, after driver 47 */ 48#define QIB_MIN_USER_CTXT_BUFCNT 7 49 50#define QLOGIC_IB_R_SOFTWARE_MASK 0xFF 51#define QLOGIC_IB_R_SOFTWARE_SHIFT 24 52#define QLOGIC_IB_R_EMULATOR_MASK (1ULL<<62) 53 54/* 55 * Number of ctxts we are configured to use (to allow for more pio 56 * buffers per ctxt, etc.) Zero means use chip value. 57 */ 58ushort qib_cfgctxts; 59module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 60MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 61 62/* 63 * If set, do not write to any regs if avoidable, hack to allow 64 * check for deranged default register values. 65 */ 66ushort qib_mini_init; 67module_param_named(mini_init, qib_mini_init, ushort, S_IRUGO); 68MODULE_PARM_DESC(mini_init, "If set, do minimal diag init"); 69 70unsigned qib_n_krcv_queues; 71module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 72MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 73 74/* 75 * qib_wc_pat parameter: 76 * 0 is WC via MTRR 77 * 1 is WC via PAT 78 * If PAT initialization fails, code reverts back to MTRR 79 */ 80unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */ 81module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 82MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 83 84struct workqueue_struct *qib_cq_wq; 85 86static void verify_interrupt(unsigned long); 87 88static struct idr qib_unit_table; 89u32 qib_cpulist_count; 90unsigned long *qib_cpulist; 91 92/* set number of contexts we'll actually use */ 93void qib_set_ctxtcnt(struct qib_devdata *dd) 94{ 95 if (!qib_cfgctxts) { 96 dd->cfgctxts = dd->first_user_ctxt + num_online_cpus(); 97 if (dd->cfgctxts > dd->ctxtcnt) 98 dd->cfgctxts = dd->ctxtcnt; 99 } else if (qib_cfgctxts < dd->num_pports) 100 dd->cfgctxts = dd->ctxtcnt; 101 else if (qib_cfgctxts <= dd->ctxtcnt) 102 dd->cfgctxts = qib_cfgctxts; 103 else 104 dd->cfgctxts = dd->ctxtcnt; 105} 106 107/* 108 * Common code for creating the receive context array. 109 */ 110int qib_create_ctxts(struct qib_devdata *dd) 111{ 112 unsigned i; 113 int ret; 114 115 /* 116 * Allocate full ctxtcnt array, rather than just cfgctxts, because 117 * cleanup iterates across all possible ctxts. 118 */ 119 dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); 120 if (!dd->rcd) { 121 qib_dev_err(dd, "Unable to allocate ctxtdata array, " 122 "failing\n"); 123 ret = -ENOMEM; 124 goto done; 125 } 126 127 /* create (one or more) kctxt */ 128 for (i = 0; i < dd->first_user_ctxt; ++i) { 129 struct qib_pportdata *ppd; 130 struct qib_ctxtdata *rcd; 131 132 if (dd->skip_kctxt_mask & (1 << i)) 133 continue; 134 135 ppd = dd->pport + (i % dd->num_pports); 136 rcd = qib_create_ctxtdata(ppd, i); 137 if (!rcd) { 138 qib_dev_err(dd, "Unable to allocate ctxtdata" 139 " for Kernel ctxt, failing\n"); 140 ret = -ENOMEM; 141 goto done; 142 } 143 rcd->pkeys[0] = QIB_DEFAULT_P_KEY; 144 rcd->seq_cnt = 1; 145 } 146 ret = 0; 147done: 148 return ret; 149} 150 151/* 152 * Common code for user and kernel context setup. 153 */ 154struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) 155{ 156 struct qib_devdata *dd = ppd->dd; 157 struct qib_ctxtdata *rcd; 158 159 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 160 if (rcd) { 161 INIT_LIST_HEAD(&rcd->qp_wait_list); 162 rcd->ppd = ppd; 163 rcd->dd = dd; 164 rcd->cnt = 1; 165 rcd->ctxt = ctxt; 166 dd->rcd[ctxt] = rcd; 167 168 dd->f_init_ctxt(rcd); 169 170 /* 171 * To avoid wasting a lot of memory, we allocate 32KB chunks 172 * of physically contiguous memory, advance through it until 173 * used up and then allocate more. Of course, we need 174 * memory to store those extra pointers, now. 32KB seems to 175 * be the most that is "safe" under memory pressure 176 * (creating large files and then copying them over 177 * NFS while doing lots of MPI jobs). The OOM killer can 178 * get invoked, even though we say we can sleep and this can 179 * cause significant system problems.... 180 */ 181 rcd->rcvegrbuf_size = 0x8000; 182 rcd->rcvegrbufs_perchunk = 183 rcd->rcvegrbuf_size / dd->rcvegrbufsize; 184 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + 185 rcd->rcvegrbufs_perchunk - 1) / 186 rcd->rcvegrbufs_perchunk; 187 BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); 188 rcd->rcvegrbufs_perchunk_shift = 189 ilog2(rcd->rcvegrbufs_perchunk); 190 } 191 return rcd; 192} 193 194/* 195 * Common code for initializing the physical port structure. 196 */ 197void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 198 u8 hw_pidx, u8 port) 199{ 200 ppd->dd = dd; 201 ppd->hw_pidx = hw_pidx; 202 ppd->port = port; /* IB port number, not index */ 203 204 spin_lock_init(&ppd->sdma_lock); 205 spin_lock_init(&ppd->lflags_lock); 206 init_waitqueue_head(&ppd->state_wait); 207 208 init_timer(&ppd->symerr_clear_timer); 209 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; 210 ppd->symerr_clear_timer.data = (unsigned long)ppd; 211} 212 213static int init_pioavailregs(struct qib_devdata *dd) 214{ 215 int ret, pidx; 216 u64 *status_page; 217 218 dd->pioavailregs_dma = dma_alloc_coherent( 219 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, 220 GFP_KERNEL); 221 if (!dd->pioavailregs_dma) { 222 qib_dev_err(dd, "failed to allocate PIOavail reg area " 223 "in memory\n"); 224 ret = -ENOMEM; 225 goto done; 226 } 227 228 /* 229 * We really want L2 cache aligned, but for current CPUs of 230 * interest, they are the same. 231 */ 232 status_page = (u64 *) 233 ((char *) dd->pioavailregs_dma + 234 ((2 * L1_CACHE_BYTES + 235 dd->pioavregs * sizeof(u64)) & ~L1_CACHE_BYTES)); 236 /* device status comes first, for backwards compatibility */ 237 dd->devstatusp = status_page; 238 *status_page++ = 0; 239 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 240 dd->pport[pidx].statusp = status_page; 241 *status_page++ = 0; 242 } 243 244 /* 245 * Setup buffer to hold freeze and other messages, accessible to 246 * apps, following statusp. This is per-unit, not per port. 247 */ 248 dd->freezemsg = (char *) status_page; 249 *dd->freezemsg = 0; 250 /* length of msg buffer is "whatever is left" */ 251 ret = (char *) status_page - (char *) dd->pioavailregs_dma; 252 dd->freezelen = PAGE_SIZE - ret; 253 254 ret = 0; 255 256done: 257 return ret; 258} 259 260/** 261 * init_shadow_tids - allocate the shadow TID array 262 * @dd: the qlogic_ib device 263 * 264 * allocate the shadow TID array, so we can qib_munlock previous 265 * entries. It may make more sense to move the pageshadow to the 266 * ctxt data structure, so we only allocate memory for ctxts actually 267 * in use, since we at 8k per ctxt, now. 268 * We don't want failures here to prevent use of the driver/chip, 269 * so no return value. 270 */ 271static void init_shadow_tids(struct qib_devdata *dd) 272{ 273 struct page **pages; 274 dma_addr_t *addrs; 275 276 pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 277 if (!pages) { 278 qib_dev_err(dd, "failed to allocate shadow page * " 279 "array, no expected sends!\n"); 280 goto bail; 281 } 282 283 addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 284 if (!addrs) { 285 qib_dev_err(dd, "failed to allocate shadow dma handle " 286 "array, no expected sends!\n"); 287 goto bail_free; 288 } 289 290 dd->pageshadow = pages; 291 dd->physshadow = addrs; 292 return; 293 294bail_free: 295 vfree(pages); 296bail: 297 dd->pageshadow = NULL; 298} 299 300/* 301 * Do initialization for device that is only needed on 302 * first detect, not on resets. 303 */ 304static int loadtime_init(struct qib_devdata *dd) 305{ 306 int ret = 0; 307 308 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & 309 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { 310 qib_dev_err(dd, "Driver only handles version %d, " 311 "chip swversion is %d (%llx), failng\n", 312 QIB_CHIP_SWVERSION, 313 (int)(dd->revision >> 314 QLOGIC_IB_R_SOFTWARE_SHIFT) & 315 QLOGIC_IB_R_SOFTWARE_MASK, 316 (unsigned long long) dd->revision); 317 ret = -ENOSYS; 318 goto done; 319 } 320 321 if (dd->revision & QLOGIC_IB_R_EMULATOR_MASK) 322 qib_devinfo(dd->pcidev, "%s", dd->boardversion); 323 324 spin_lock_init(&dd->pioavail_lock); 325 spin_lock_init(&dd->sendctrl_lock); 326 spin_lock_init(&dd->uctxt_lock); 327 spin_lock_init(&dd->qib_diag_trans_lock); 328 spin_lock_init(&dd->eep_st_lock); 329 mutex_init(&dd->eep_lock); 330 331 if (qib_mini_init) 332 goto done; 333 334 ret = init_pioavailregs(dd); 335 init_shadow_tids(dd); 336 337 qib_get_eeprom_info(dd); 338 339 /* setup time (don't start yet) to verify we got interrupt */ 340 init_timer(&dd->intrchk_timer); 341 dd->intrchk_timer.function = verify_interrupt; 342 dd->intrchk_timer.data = (unsigned long) dd; 343 344done: 345 return ret; 346} 347 348/** 349 * init_after_reset - re-initialize after a reset 350 * @dd: the qlogic_ib device 351 * 352 * sanity check at least some of the values after reset, and 353 * ensure no receive or transmit (explicitly, in case reset 354 * failed 355 */ 356static int init_after_reset(struct qib_devdata *dd) 357{ 358 int i; 359 360 /* 361 * Ensure chip does no sends or receives, tail updates, or 362 * pioavail updates while we re-initialize. This is mostly 363 * for the driver data structures, not chip registers. 364 */ 365 for (i = 0; i < dd->num_pports; ++i) { 366 /* 367 * ctxt == -1 means "all contexts". Only really safe for 368 * _dis_abling things, as here. 369 */ 370 dd->f_rcvctrl(dd->pport + i, QIB_RCVCTRL_CTXT_DIS | 371 QIB_RCVCTRL_INTRAVAIL_DIS | 372 QIB_RCVCTRL_TAILUPD_DIS, -1); 373 /* Redundant across ports for some, but no big deal. */ 374 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_DIS | 375 QIB_SENDCTRL_AVAIL_DIS); 376 } 377 378 return 0; 379} 380 381static void enable_chip(struct qib_devdata *dd) 382{ 383 u64 rcvmask; 384 int i; 385 386 /* 387 * Enable PIO send, and update of PIOavail regs to memory. 388 */ 389 for (i = 0; i < dd->num_pports; ++i) 390 dd->f_sendctrl(dd->pport + i, QIB_SENDCTRL_SEND_ENB | 391 QIB_SENDCTRL_AVAIL_ENB); 392 /* 393 * Enable kernel ctxts' receive and receive interrupt. 394 * Other ctxts done as user opens and inits them. 395 */ 396 rcvmask = QIB_RCVCTRL_CTXT_ENB | QIB_RCVCTRL_INTRAVAIL_ENB; 397 rcvmask |= (dd->flags & QIB_NODMA_RTAIL) ? 398 QIB_RCVCTRL_TAILUPD_DIS : QIB_RCVCTRL_TAILUPD_ENB; 399 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 400 struct qib_ctxtdata *rcd = dd->rcd[i]; 401 402 if (rcd) 403 dd->f_rcvctrl(rcd->ppd, rcvmask, i); 404 } 405 dd->freectxts = dd->cfgctxts - dd->first_user_ctxt; 406} 407 408static void verify_interrupt(unsigned long opaque) 409{ 410 struct qib_devdata *dd = (struct qib_devdata *) opaque; 411 412 if (!dd) 413 return; /* being torn down */ 414 415 /* 416 * If we don't have a lid or any interrupts, let the user know and 417 * don't bother checking again. 418 */ 419 if (dd->int_counter == 0) { 420 if (!dd->f_intr_fallback(dd)) 421 dev_err(&dd->pcidev->dev, "No interrupts detected, " 422 "not usable.\n"); 423 else /* re-arm the timer to see if fallback works */ 424 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 425 } 426} 427 428static void init_piobuf_state(struct qib_devdata *dd) 429{ 430 int i, pidx; 431 u32 uctxts; 432 433 /* 434 * Ensure all buffers are free, and fifos empty. Buffers 435 * are common, so only do once for port 0. 436 * 437 * After enable and qib_chg_pioavailkernel so we can safely 438 * enable pioavail updates and PIOENABLE. After this, packets 439 * are ready and able to go out. 440 */ 441 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_ALL); 442 for (pidx = 0; pidx < dd->num_pports; ++pidx) 443 dd->f_sendctrl(dd->pport + pidx, QIB_SENDCTRL_FLUSH); 444 445 /* 446 * If not all sendbufs are used, add the one to each of the lower 447 * numbered contexts. pbufsctxt and lastctxt_piobuf are 448 * calculated in chip-specific code because it may cause some 449 * chip-specific adjustments to be made. 450 */ 451 uctxts = dd->cfgctxts - dd->first_user_ctxt; 452 dd->ctxts_extrabuf = dd->pbufsctxt ? 453 dd->lastctxt_piobuf - (dd->pbufsctxt * uctxts) : 0; 454 455 /* 456 * Set up the shadow copies of the piobufavail registers, 457 * which we compare against the chip registers for now, and 458 * the in memory DMA'ed copies of the registers. 459 * By now pioavail updates to memory should have occurred, so 460 * copy them into our working/shadow registers; this is in 461 * case something went wrong with abort, but mostly to get the 462 * initial values of the generation bit correct. 463 */ 464 for (i = 0; i < dd->pioavregs; i++) { 465 __le64 tmp; 466 467 tmp = dd->pioavailregs_dma[i]; 468 /* 469 * Don't need to worry about pioavailkernel here 470 * because we will call qib_chg_pioavailkernel() later 471 * in initialization, to busy out buffers as needed. 472 */ 473 dd->pioavailshadow[i] = le64_to_cpu(tmp); 474 } 475 while (i < ARRAY_SIZE(dd->pioavailshadow)) 476 dd->pioavailshadow[i++] = 0; /* for debugging sanity */ 477 478 /* after pioavailshadow is setup */ 479 qib_chg_pioavailkernel(dd, 0, dd->piobcnt2k + dd->piobcnt4k, 480 TXCHK_CHG_TYPE_KERN, NULL); 481 dd->f_initvl15_bufs(dd); 482} 483 484/** 485 * qib_init - do the actual initialization sequence on the chip 486 * @dd: the qlogic_ib device 487 * @reinit: reinitializing, so don't allocate new memory 488 * 489 * Do the actual initialization sequence on the chip. This is done 490 * both from the init routine called from the PCI infrastructure, and 491 * when we reset the chip, or detect that it was reset internally, 492 * or it's administratively re-enabled. 493 * 494 * Memory allocation here and in called routines is only done in 495 * the first case (reinit == 0). We have to be careful, because even 496 * without memory allocation, we need to re-write all the chip registers 497 * TIDs, etc. after the reset or enable has completed. 498 */ 499int qib_init(struct qib_devdata *dd, int reinit) 500{ 501 int ret = 0, pidx, lastfail = 0; 502 u32 portok = 0; 503 unsigned i; 504 struct qib_ctxtdata *rcd; 505 struct qib_pportdata *ppd; 506 unsigned long flags; 507 508 /* Set linkstate to unknown, so we can watch for a transition. */ 509 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 510 ppd = dd->pport + pidx; 511 spin_lock_irqsave(&ppd->lflags_lock, flags); 512 ppd->lflags &= ~(QIBL_LINKACTIVE | QIBL_LINKARMED | 513 QIBL_LINKDOWN | QIBL_LINKINIT | 514 QIBL_LINKV); 515 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 516 } 517 518 if (reinit) 519 ret = init_after_reset(dd); 520 else 521 ret = loadtime_init(dd); 522 if (ret) 523 goto done; 524 525 /* Bypass most chip-init, to get to device creation */ 526 if (qib_mini_init) 527 return 0; 528 529 ret = dd->f_late_initreg(dd); 530 if (ret) 531 goto done; 532 533 /* dd->rcd can be NULL if early init failed */ 534 for (i = 0; dd->rcd && i < dd->first_user_ctxt; ++i) { 535 /* 536 * Set up the (kernel) rcvhdr queue and egr TIDs. If doing 537 * re-init, the simplest way to handle this is to free 538 * existing, and re-allocate. 539 * Need to re-create rest of ctxt 0 ctxtdata as well. 540 */ 541 rcd = dd->rcd[i]; 542 if (!rcd) 543 continue; 544 545 lastfail = qib_create_rcvhdrq(dd, rcd); 546 if (!lastfail) 547 lastfail = qib_setup_eagerbufs(rcd); 548 if (lastfail) { 549 qib_dev_err(dd, "failed to allocate kernel ctxt's " 550 "rcvhdrq and/or egr bufs\n"); 551 continue; 552 } 553 } 554 555 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 556 int mtu; 557 if (lastfail) 558 ret = lastfail; 559 ppd = dd->pport + pidx; 560 mtu = ib_mtu_enum_to_int(qib_ibmtu); 561 if (mtu == -1) { 562 mtu = QIB_DEFAULT_MTU; 563 qib_ibmtu = 0; /* don't leave invalid value */ 564 } 565 /* set max we can ever have for this driver load */ 566 ppd->init_ibmaxlen = min(mtu > 2048 ? 567 dd->piosize4k : dd->piosize2k, 568 dd->rcvegrbufsize + 569 (dd->rcvhdrentsize << 2)); 570 /* 571 * Have to initialize ibmaxlen, but this will normally 572 * change immediately in qib_set_mtu(). 573 */ 574 ppd->ibmaxlen = ppd->init_ibmaxlen; 575 qib_set_mtu(ppd, mtu); 576 577 spin_lock_irqsave(&ppd->lflags_lock, flags); 578 ppd->lflags |= QIBL_IB_LINK_DISABLED; 579 spin_unlock_irqrestore(&ppd->lflags_lock, flags); 580 581 lastfail = dd->f_bringup_serdes(ppd); 582 if (lastfail) { 583 qib_devinfo(dd->pcidev, 584 "Failed to bringup IB port %u\n", ppd->port); 585 lastfail = -ENETDOWN; 586 continue; 587 } 588 589 portok++; 590 } 591 592 if (!portok) { 593 /* none of the ports initialized */ 594 if (!ret && lastfail) 595 ret = lastfail; 596 else if (!ret) 597 ret = -ENETDOWN; 598 /* but continue on, so we can debug cause */ 599 } 600 601 enable_chip(dd); 602 603 init_piobuf_state(dd); 604 605done: 606 if (!ret) { 607 /* chip is OK for user apps; mark it as initialized */ 608 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 609 ppd = dd->pport + pidx; 610 /* 611 * Set status even if port serdes is not initialized 612 * so that diags will work. 613 */ 614 *ppd->statusp |= QIB_STATUS_CHIP_PRESENT | 615 QIB_STATUS_INITTED; 616 if (!ppd->link_speed_enabled) 617 continue; 618 if (dd->flags & QIB_HAS_SEND_DMA) 619 ret = qib_setup_sdma(ppd); 620 init_timer(&ppd->hol_timer); 621 ppd->hol_timer.function = qib_hol_event; 622 ppd->hol_timer.data = (unsigned long)ppd; 623 ppd->hol_state = QIB_HOL_UP; 624 } 625 626 /* now we can enable all interrupts from the chip */ 627 dd->f_set_intr_state(dd, 1); 628 629 /* 630 * Setup to verify we get an interrupt, and fallback 631 * to an alternate if necessary and possible. 632 */ 633 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 634 /* start stats retrieval timer */ 635 mod_timer(&dd->stats_timer, jiffies + HZ * ACTIVITY_TIMER); 636 } 637 638 /* if ret is non-zero, we probably should do some cleanup here... */ 639 return ret; 640} 641 642/* 643 * These next two routines are placeholders in case we don't have per-arch 644 * code for controlling write combining. If explicit control of write 645 * combining is not available, performance will probably be awful. 646 */ 647 648int __attribute__((weak)) qib_enable_wc(struct qib_devdata *dd) 649{ 650 return -EOPNOTSUPP; 651} 652 653void __attribute__((weak)) qib_disable_wc(struct qib_devdata *dd) 654{ 655} 656 657static inline struct qib_devdata *__qib_lookup(int unit) 658{ 659 return idr_find(&qib_unit_table, unit); 660} 661 662struct qib_devdata *qib_lookup(int unit) 663{ 664 struct qib_devdata *dd; 665 unsigned long flags; 666 667 spin_lock_irqsave(&qib_devs_lock, flags); 668 dd = __qib_lookup(unit); 669 spin_unlock_irqrestore(&qib_devs_lock, flags); 670 671 return dd; 672} 673 674/* 675 * Stop the timers during unit shutdown, or after an error late 676 * in initialization. 677 */ 678static void qib_stop_timers(struct qib_devdata *dd) 679{ 680 struct qib_pportdata *ppd; 681 int pidx; 682 683 if (dd->stats_timer.data) { 684 del_timer_sync(&dd->stats_timer); 685 dd->stats_timer.data = 0; 686 } 687 if (dd->intrchk_timer.data) { 688 del_timer_sync(&dd->intrchk_timer); 689 dd->intrchk_timer.data = 0; 690 } 691 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 692 ppd = dd->pport + pidx; 693 if (ppd->hol_timer.data) 694 del_timer_sync(&ppd->hol_timer); 695 if (ppd->led_override_timer.data) { 696 del_timer_sync(&ppd->led_override_timer); 697 atomic_set(&ppd->led_override_timer_active, 0); 698 } 699 if (ppd->symerr_clear_timer.data) 700 del_timer_sync(&ppd->symerr_clear_timer); 701 } 702} 703 704/** 705 * qib_shutdown_device - shut down a device 706 * @dd: the qlogic_ib device 707 * 708 * This is called to make the device quiet when we are about to 709 * unload the driver, and also when the device is administratively 710 * disabled. It does not free any data structures. 711 * Everything it does has to be setup again by qib_init(dd, 1) 712 */ 713static void qib_shutdown_device(struct qib_devdata *dd) 714{ 715 struct qib_pportdata *ppd; 716 unsigned pidx; 717 718 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 719 ppd = dd->pport + pidx; 720 721 spin_lock_irq(&ppd->lflags_lock); 722 ppd->lflags &= ~(QIBL_LINKDOWN | QIBL_LINKINIT | 723 QIBL_LINKARMED | QIBL_LINKACTIVE | 724 QIBL_LINKV); 725 spin_unlock_irq(&ppd->lflags_lock); 726 *ppd->statusp &= ~(QIB_STATUS_IB_CONF | QIB_STATUS_IB_READY); 727 } 728 dd->flags &= ~QIB_INITTED; 729 730 /* mask interrupts, but not errors */ 731 dd->f_set_intr_state(dd, 0); 732 733 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 734 ppd = dd->pport + pidx; 735 dd->f_rcvctrl(ppd, QIB_RCVCTRL_TAILUPD_DIS | 736 QIB_RCVCTRL_CTXT_DIS | 737 QIB_RCVCTRL_INTRAVAIL_DIS | 738 QIB_RCVCTRL_PKEY_ENB, -1); 739 /* 740 * Gracefully stop all sends allowing any in progress to 741 * trickle out first. 742 */ 743 dd->f_sendctrl(ppd, QIB_SENDCTRL_CLEAR); 744 } 745 746 /* 747 * Enough for anything that's going to trickle out to have actually 748 * done so. 749 */ 750 udelay(20); 751 752 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 753 ppd = dd->pport + pidx; 754 dd->f_setextled(ppd, 0); /* make sure LEDs are off */ 755 756 if (dd->flags & QIB_HAS_SEND_DMA) 757 qib_teardown_sdma(ppd); 758 759 dd->f_sendctrl(ppd, QIB_SENDCTRL_AVAIL_DIS | 760 QIB_SENDCTRL_SEND_DIS); 761 /* 762 * Clear SerdesEnable. 763 * We can't count on interrupts since we are stopping. 764 */ 765 dd->f_quiet_serdes(ppd); 766 } 767 768 qib_update_eeprom_log(dd); 769} 770 771/** 772 * qib_free_ctxtdata - free a context's allocated data 773 * @dd: the qlogic_ib device 774 * @rcd: the ctxtdata structure 775 * 776 * free up any allocated data for a context 777 * This should not touch anything that would affect a simultaneous 778 * re-allocation of context data, because it is called after qib_mutex 779 * is released (and can be called from reinit as well). 780 * It should never change any chip state, or global driver state. 781 */ 782void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 783{ 784 if (!rcd) 785 return; 786 787 if (rcd->rcvhdrq) { 788 dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, 789 rcd->rcvhdrq, rcd->rcvhdrq_phys); 790 rcd->rcvhdrq = NULL; 791 if (rcd->rcvhdrtail_kvaddr) { 792 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 793 rcd->rcvhdrtail_kvaddr, 794 rcd->rcvhdrqtailaddr_phys); 795 rcd->rcvhdrtail_kvaddr = NULL; 796 } 797 } 798 if (rcd->rcvegrbuf) { 799 unsigned e; 800 801 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 802 void *base = rcd->rcvegrbuf[e]; 803 size_t size = rcd->rcvegrbuf_size; 804 805 dma_free_coherent(&dd->pcidev->dev, size, 806 base, rcd->rcvegrbuf_phys[e]); 807 } 808 kfree(rcd->rcvegrbuf); 809 rcd->rcvegrbuf = NULL; 810 kfree(rcd->rcvegrbuf_phys); 811 rcd->rcvegrbuf_phys = NULL; 812 rcd->rcvegrbuf_chunks = 0; 813 } 814 815 kfree(rcd->tid_pg_list); 816 vfree(rcd->user_event_mask); 817 vfree(rcd->subctxt_uregbase); 818 vfree(rcd->subctxt_rcvegrbuf); 819 vfree(rcd->subctxt_rcvhdr_base); 820 kfree(rcd); 821} 822 823/* 824 * Perform a PIO buffer bandwidth write test, to verify proper system 825 * configuration. Even when all the setup calls work, occasionally 826 * BIOS or other issues can prevent write combining from working, or 827 * can cause other bandwidth problems to the chip. 828 * 829 * This test simply writes the same buffer over and over again, and 830 * measures close to the peak bandwidth to the chip (not testing 831 * data bandwidth to the wire). On chips that use an address-based 832 * trigger to send packets to the wire, this is easy. On chips that 833 * use a count to trigger, we want to make sure that the packet doesn't 834 * go out on the wire, or trigger flow control checks. 835 */ 836static void qib_verify_pioperf(struct qib_devdata *dd) 837{ 838 u32 pbnum, cnt, lcnt; 839 u32 __iomem *piobuf; 840 u32 *addr; 841 u64 msecs, emsecs; 842 843 piobuf = dd->f_getsendbuf(dd->pport, 0ULL, &pbnum); 844 if (!piobuf) { 845 qib_devinfo(dd->pcidev, 846 "No PIObufs for checking perf, skipping\n"); 847 return; 848 } 849 850 /* 851 * Enough to give us a reasonable test, less than piobuf size, and 852 * likely multiple of store buffer length. 853 */ 854 cnt = 1024; 855 856 addr = vmalloc(cnt); 857 if (!addr) { 858 qib_devinfo(dd->pcidev, 859 "Couldn't get memory for checking PIO perf," 860 " skipping\n"); 861 goto done; 862 } 863 864 preempt_disable(); /* we want reasonably accurate elapsed time */ 865 msecs = 1 + jiffies_to_msecs(jiffies); 866 for (lcnt = 0; lcnt < 10000U; lcnt++) { 867 /* wait until we cross msec boundary */ 868 if (jiffies_to_msecs(jiffies) >= msecs) 869 break; 870 udelay(1); 871 } 872 873 dd->f_set_armlaunch(dd, 0); 874 875 /* 876 * length 0, no dwords actually sent 877 */ 878 writeq(0, piobuf); 879 qib_flush_wc(); 880 881 /* 882 * This is only roughly accurate, since even with preempt we 883 * still take interrupts that could take a while. Running for 884 * >= 5 msec seems to get us "close enough" to accurate values. 885 */ 886 msecs = jiffies_to_msecs(jiffies); 887 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 888 qib_pio_copy(piobuf + 64, addr, cnt >> 2); 889 emsecs = jiffies_to_msecs(jiffies) - msecs; 890 } 891 892 /* 1 GiB/sec, slightly over IB SDR line rate */ 893 if (lcnt < (emsecs * 1024U)) 894 qib_dev_err(dd, 895 "Performance problem: bandwidth to PIO buffers is " 896 "only %u MiB/sec\n", 897 lcnt / (u32) emsecs); 898 899 preempt_enable(); 900 901 vfree(addr); 902 903done: 904 /* disarm piobuf, so it's available again */ 905 dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbnum)); 906 qib_sendbuf_done(dd, pbnum); 907 dd->f_set_armlaunch(dd, 1); 908} 909 910 911void qib_free_devdata(struct qib_devdata *dd) 912{ 913 unsigned long flags; 914 915 spin_lock_irqsave(&qib_devs_lock, flags); 916 idr_remove(&qib_unit_table, dd->unit); 917 list_del(&dd->list); 918 spin_unlock_irqrestore(&qib_devs_lock, flags); 919 920 ib_dealloc_device(&dd->verbs_dev.ibdev); 921} 922 923/* 924 * Allocate our primary per-unit data structure. Must be done via verbs 925 * allocator, because the verbs cleanup process both does cleanup and 926 * free of the data structure. 927 * "extra" is for chip-specific data. 928 * 929 * Use the idr mechanism to get a unit number for this unit. 930 */ 931struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) 932{ 933 unsigned long flags; 934 struct qib_devdata *dd; 935 int ret; 936 937 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 938 dd = ERR_PTR(-ENOMEM); 939 goto bail; 940 } 941 942 dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); 943 if (!dd) { 944 dd = ERR_PTR(-ENOMEM); 945 goto bail; 946 } 947 948 spin_lock_irqsave(&qib_devs_lock, flags); 949 ret = idr_get_new(&qib_unit_table, dd, &dd->unit); 950 if (ret >= 0) 951 list_add(&dd->list, &qib_dev_list); 952 spin_unlock_irqrestore(&qib_devs_lock, flags); 953 954 if (ret < 0) { 955 qib_early_err(&pdev->dev, 956 "Could not allocate unit ID: error %d\n", -ret); 957 ib_dealloc_device(&dd->verbs_dev.ibdev); 958 dd = ERR_PTR(ret); 959 goto bail; 960 } 961 962 if (!qib_cpulist_count) { 963 u32 count = num_online_cpus(); 964 qib_cpulist = kzalloc(BITS_TO_LONGS(count) * 965 sizeof(long), GFP_KERNEL); 966 if (qib_cpulist) 967 qib_cpulist_count = count; 968 else 969 qib_early_err(&pdev->dev, "Could not alloc cpulist " 970 "info, cpu affinity might be wrong\n"); 971 } 972 973bail: 974 return dd; 975} 976 977/* 978 * Called from freeze mode handlers, and from PCI error 979 * reporting code. Should be paranoid about state of 980 * system and data structures. 981 */ 982void qib_disable_after_error(struct qib_devdata *dd) 983{ 984 if (dd->flags & QIB_INITTED) { 985 u32 pidx; 986 987 dd->flags &= ~QIB_INITTED; 988 if (dd->pport) 989 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 990 struct qib_pportdata *ppd; 991 992 ppd = dd->pport + pidx; 993 if (dd->flags & QIB_PRESENT) { 994 qib_set_linkstate(ppd, 995 QIB_IB_LINKDOWN_DISABLE); 996 dd->f_setextled(ppd, 0); 997 } 998 *ppd->statusp &= ~QIB_STATUS_IB_READY; 999 } 1000 } 1001 1002 /* 1003 * Mark as having had an error for driver, and also 1004 * for /sys and status word mapped to user programs. 1005 * This marks unit as not usable, until reset. 1006 */ 1007 if (dd->devstatusp) 1008 *dd->devstatusp |= QIB_STATUS_HWERROR; 1009} 1010 1011static void __devexit qib_remove_one(struct pci_dev *); 1012static int __devinit qib_init_one(struct pci_dev *, 1013 const struct pci_device_id *); 1014 1015#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " 1016#define PFX QIB_DRV_NAME ": " 1017 1018static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { 1019 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, 1020 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, 1021 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, 1022 { 0, } 1023}; 1024 1025MODULE_DEVICE_TABLE(pci, qib_pci_tbl); 1026 1027struct pci_driver qib_driver = { 1028 .name = QIB_DRV_NAME, 1029 .probe = qib_init_one, 1030 .remove = __devexit_p(qib_remove_one), 1031 .id_table = qib_pci_tbl, 1032 .err_handler = &qib_pci_err_handler, 1033}; 1034 1035/* 1036 * Do all the generic driver unit- and chip-independent memory 1037 * allocation and initialization. 1038 */ 1039static int __init qlogic_ib_init(void) 1040{ 1041 int ret; 1042 1043 ret = qib_dev_init(); 1044 if (ret) 1045 goto bail; 1046 1047 qib_cq_wq = create_singlethread_workqueue("qib_cq"); 1048 if (!qib_cq_wq) { 1049 ret = -ENOMEM; 1050 goto bail_dev; 1051 } 1052 1053 /* 1054 * These must be called before the driver is registered with 1055 * the PCI subsystem. 1056 */ 1057 idr_init(&qib_unit_table); 1058 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 1059 printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n"); 1060 ret = -ENOMEM; 1061 goto bail_cq_wq; 1062 } 1063 1064 ret = pci_register_driver(&qib_driver); 1065 if (ret < 0) { 1066 printk(KERN_ERR QIB_DRV_NAME 1067 ": Unable to register driver: error %d\n", -ret); 1068 goto bail_unit; 1069 } 1070 1071 /* not fatal if it doesn't work */ 1072 if (qib_init_qibfs()) 1073 printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n"); 1074 goto bail; /* all OK */ 1075 1076bail_unit: 1077 idr_destroy(&qib_unit_table); 1078bail_cq_wq: 1079 destroy_workqueue(qib_cq_wq); 1080bail_dev: 1081 qib_dev_cleanup(); 1082bail: 1083 return ret; 1084} 1085 1086module_init(qlogic_ib_init); 1087 1088/* 1089 * Do the non-unit driver cleanup, memory free, etc. at unload. 1090 */ 1091static void __exit qlogic_ib_cleanup(void) 1092{ 1093 int ret; 1094 1095 ret = qib_exit_qibfs(); 1096 if (ret) 1097 printk(KERN_ERR QIB_DRV_NAME ": " 1098 "Unable to cleanup counter filesystem: " 1099 "error %d\n", -ret); 1100 1101 pci_unregister_driver(&qib_driver); 1102 1103 destroy_workqueue(qib_cq_wq); 1104 1105 qib_cpulist_count = 0; 1106 kfree(qib_cpulist); 1107 1108 idr_destroy(&qib_unit_table); 1109 qib_dev_cleanup(); 1110} 1111 1112module_exit(qlogic_ib_cleanup); 1113 1114/* this can only be called after a successful initialization */ 1115static void cleanup_device_data(struct qib_devdata *dd) 1116{ 1117 int ctxt; 1118 int pidx; 1119 struct qib_ctxtdata **tmp; 1120 unsigned long flags; 1121 1122 /* users can't do anything more with chip */ 1123 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1124 if (dd->pport[pidx].statusp) 1125 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1126 1127 if (!qib_wc_pat) 1128 qib_disable_wc(dd); 1129 1130 if (dd->pioavailregs_dma) { 1131 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 1132 (void *) dd->pioavailregs_dma, 1133 dd->pioavailregs_phys); 1134 dd->pioavailregs_dma = NULL; 1135 } 1136 1137 if (dd->pageshadow) { 1138 struct page **tmpp = dd->pageshadow; 1139 dma_addr_t *tmpd = dd->physshadow; 1140 int i, cnt = 0; 1141 1142 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1143 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1144 int maxtid = ctxt_tidbase + dd->rcvtidcnt; 1145 1146 for (i = ctxt_tidbase; i < maxtid; i++) { 1147 if (!tmpp[i]) 1148 continue; 1149 pci_unmap_page(dd->pcidev, tmpd[i], 1150 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1151 qib_release_user_pages(&tmpp[i], 1); 1152 tmpp[i] = NULL; 1153 cnt++; 1154 } 1155 } 1156 1157 tmpp = dd->pageshadow; 1158 dd->pageshadow = NULL; 1159 vfree(tmpp); 1160 } 1161 1162 /* 1163 * Free any resources still in use (usually just kernel contexts) 1164 * at unload; we do for ctxtcnt, because that's what we allocate. 1165 * We acquire lock to be really paranoid that rcd isn't being 1166 * accessed from some interrupt-related code (that should not happen, 1167 * but best to be sure). 1168 */ 1169 spin_lock_irqsave(&dd->uctxt_lock, flags); 1170 tmp = dd->rcd; 1171 dd->rcd = NULL; 1172 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 1173 for (ctxt = 0; tmp && ctxt < dd->ctxtcnt; ctxt++) { 1174 struct qib_ctxtdata *rcd = tmp[ctxt]; 1175 1176 tmp[ctxt] = NULL; /* debugging paranoia */ 1177 qib_free_ctxtdata(dd, rcd); 1178 } 1179 kfree(tmp); 1180 kfree(dd->boardname); 1181} 1182 1183/* 1184 * Clean up on unit shutdown, or error during unit load after 1185 * successful initialization. 1186 */ 1187static void qib_postinit_cleanup(struct qib_devdata *dd) 1188{ 1189 /* 1190 * Clean up chip-specific stuff. 1191 * We check for NULL here, because it's outside 1192 * the kregbase check, and we need to call it 1193 * after the free_irq. Thus it's possible that 1194 * the function pointers were never initialized. 1195 */ 1196 if (dd->f_cleanup) 1197 dd->f_cleanup(dd); 1198 1199 qib_pcie_ddcleanup(dd); 1200 1201 cleanup_device_data(dd); 1202 1203 qib_free_devdata(dd); 1204} 1205 1206static int __devinit qib_init_one(struct pci_dev *pdev, 1207 const struct pci_device_id *ent) 1208{ 1209 int ret, j, pidx, initfail; 1210 struct qib_devdata *dd = NULL; 1211 1212 ret = qib_pcie_init(pdev, ent); 1213 if (ret) 1214 goto bail; 1215 1216 /* 1217 * Do device-specific initialiation, function table setup, dd 1218 * allocation, etc. 1219 */ 1220 switch (ent->device) { 1221 case PCI_DEVICE_ID_QLOGIC_IB_6120: 1222#ifdef CONFIG_PCI_MSI 1223 dd = qib_init_iba6120_funcs(pdev, ent); 1224#else 1225 qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot " 1226 "work if CONFIG_PCI_MSI is not enabled\n", 1227 ent->device); 1228 dd = ERR_PTR(-ENODEV); 1229#endif 1230 break; 1231 1232 case PCI_DEVICE_ID_QLOGIC_IB_7220: 1233 dd = qib_init_iba7220_funcs(pdev, ent); 1234 break; 1235 1236 case PCI_DEVICE_ID_QLOGIC_IB_7322: 1237 dd = qib_init_iba7322_funcs(pdev, ent); 1238 break; 1239 1240 default: 1241 qib_early_err(&pdev->dev, "Failing on unknown QLogic " 1242 "deviceid 0x%x\n", ent->device); 1243 ret = -ENODEV; 1244 } 1245 1246 if (IS_ERR(dd)) 1247 ret = PTR_ERR(dd); 1248 if (ret) 1249 goto bail; /* error already printed */ 1250 1251 /* do the generic initialization */ 1252 initfail = qib_init(dd, 0); 1253 1254 ret = qib_register_ib_device(dd); 1255 1256 /* 1257 * Now ready for use. this should be cleared whenever we 1258 * detect a reset, or initiate one. If earlier failure, 1259 * we still create devices, so diags, etc. can be used 1260 * to determine cause of problem. 1261 */ 1262 if (!qib_mini_init && !initfail && !ret) 1263 dd->flags |= QIB_INITTED; 1264 1265 j = qib_device_create(dd); 1266 if (j) 1267 qib_dev_err(dd, "Failed to create /dev devices: %d\n", -j); 1268 j = qibfs_add(dd); 1269 if (j) 1270 qib_dev_err(dd, "Failed filesystem setup for counters: %d\n", 1271 -j); 1272 1273 if (qib_mini_init || initfail || ret) { 1274 qib_stop_timers(dd); 1275 flush_workqueue(ib_wq); 1276 for (pidx = 0; pidx < dd->num_pports; ++pidx) 1277 dd->f_quiet_serdes(dd->pport + pidx); 1278 if (qib_mini_init) 1279 goto bail; 1280 if (!j) { 1281 (void) qibfs_remove(dd); 1282 qib_device_remove(dd); 1283 } 1284 if (!ret) 1285 qib_unregister_ib_device(dd); 1286 qib_postinit_cleanup(dd); 1287 if (initfail) 1288 ret = initfail; 1289 goto bail; 1290 } 1291 1292 if (!qib_wc_pat) { 1293 ret = qib_enable_wc(dd); 1294 if (ret) { 1295 qib_dev_err(dd, "Write combining not enabled " 1296 "(err %d): performance may be poor\n", 1297 -ret); 1298 ret = 0; 1299 } 1300 } 1301 1302 qib_verify_pioperf(dd); 1303bail: 1304 return ret; 1305} 1306 1307static void __devexit qib_remove_one(struct pci_dev *pdev) 1308{ 1309 struct qib_devdata *dd = pci_get_drvdata(pdev); 1310 int ret; 1311 1312 /* unregister from IB core */ 1313 qib_unregister_ib_device(dd); 1314 1315 /* 1316 * Disable the IB link, disable interrupts on the device, 1317 * clear dma engines, etc. 1318 */ 1319 if (!qib_mini_init) 1320 qib_shutdown_device(dd); 1321 1322 qib_stop_timers(dd); 1323 1324 /* wait until all of our (qsfp) queue_work() calls complete */ 1325 flush_workqueue(ib_wq); 1326 1327 ret = qibfs_remove(dd); 1328 if (ret) 1329 qib_dev_err(dd, "Failed counters filesystem cleanup: %d\n", 1330 -ret); 1331 1332 qib_device_remove(dd); 1333 1334 qib_postinit_cleanup(dd); 1335} 1336 1337/** 1338 * qib_create_rcvhdrq - create a receive header queue 1339 * @dd: the qlogic_ib device 1340 * @rcd: the context data 1341 * 1342 * This must be contiguous memory (from an i/o perspective), and must be 1343 * DMA'able (which means for some systems, it will go through an IOMMU, 1344 * or be forced into a low address range). 1345 */ 1346int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1347{ 1348 unsigned amt; 1349 1350 if (!rcd->rcvhdrq) { 1351 dma_addr_t phys_hdrqtail; 1352 gfp_t gfp_flags; 1353 1354 amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * 1355 sizeof(u32), PAGE_SIZE); 1356 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1357 GFP_USER : GFP_KERNEL; 1358 rcd->rcvhdrq = dma_alloc_coherent( 1359 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1360 gfp_flags | __GFP_COMP); 1361 1362 if (!rcd->rcvhdrq) { 1363 qib_dev_err(dd, "attempt to allocate %d bytes " 1364 "for ctxt %u rcvhdrq failed\n", 1365 amt, rcd->ctxt); 1366 goto bail; 1367 } 1368 1369 if (rcd->ctxt >= dd->first_user_ctxt) { 1370 rcd->user_event_mask = vmalloc_user(PAGE_SIZE); 1371 if (!rcd->user_event_mask) 1372 goto bail_free_hdrq; 1373 } 1374 1375 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1376 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1377 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1378 gfp_flags); 1379 if (!rcd->rcvhdrtail_kvaddr) 1380 goto bail_free; 1381 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1382 } 1383 1384 rcd->rcvhdrq_size = amt; 1385 } 1386 1387 /* clear for security and sanity on each use */ 1388 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size); 1389 if (rcd->rcvhdrtail_kvaddr) 1390 memset(rcd->rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1391 return 0; 1392 1393bail_free: 1394 qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u " 1395 "rcvhdrqtailaddr failed\n", rcd->ctxt); 1396 vfree(rcd->user_event_mask); 1397 rcd->user_event_mask = NULL; 1398bail_free_hdrq: 1399 dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, 1400 rcd->rcvhdrq_phys); 1401 rcd->rcvhdrq = NULL; 1402bail: 1403 return -ENOMEM; 1404} 1405 1406/** 1407 * allocate eager buffers, both kernel and user contexts. 1408 * @rcd: the context we are setting up. 1409 * 1410 * Allocate the eager TID buffers and program them into hip. 1411 * They are no longer completely contiguous, we do multiple allocation 1412 * calls. Otherwise we get the OOM code involved, by asking for too 1413 * much per call, with disastrous results on some kernels. 1414 */ 1415int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) 1416{ 1417 struct qib_devdata *dd = rcd->dd; 1418 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1419 size_t size; 1420 gfp_t gfp_flags; 1421 1422 /* 1423 * GFP_USER, but without GFP_FS, so buffer cache can be 1424 * coalesced (we hope); otherwise, even at order 4, 1425 * heavy filesystem activity makes these fail, and we can 1426 * use compound pages. 1427 */ 1428 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 1429 1430 egrcnt = rcd->rcvegrcnt; 1431 egroff = rcd->rcvegr_tid_base; 1432 egrsize = dd->rcvegrbufsize; 1433 1434 chunk = rcd->rcvegrbuf_chunks; 1435 egrperchunk = rcd->rcvegrbufs_perchunk; 1436 size = rcd->rcvegrbuf_size; 1437 if (!rcd->rcvegrbuf) { 1438 rcd->rcvegrbuf = 1439 kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), 1440 GFP_KERNEL); 1441 if (!rcd->rcvegrbuf) 1442 goto bail; 1443 } 1444 if (!rcd->rcvegrbuf_phys) { 1445 rcd->rcvegrbuf_phys = 1446 kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1447 GFP_KERNEL); 1448 if (!rcd->rcvegrbuf_phys) 1449 goto bail_rcvegrbuf; 1450 } 1451 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1452 if (rcd->rcvegrbuf[e]) 1453 continue; 1454 rcd->rcvegrbuf[e] = 1455 dma_alloc_coherent(&dd->pcidev->dev, size, 1456 &rcd->rcvegrbuf_phys[e], 1457 gfp_flags); 1458 if (!rcd->rcvegrbuf[e]) 1459 goto bail_rcvegrbuf_phys; 1460 } 1461 1462 rcd->rcvegr_phys = rcd->rcvegrbuf_phys[0]; 1463 1464 for (e = chunk = 0; chunk < rcd->rcvegrbuf_chunks; chunk++) { 1465 dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; 1466 unsigned i; 1467 1468 /* clear for security and sanity on each use */ 1469 memset(rcd->rcvegrbuf[chunk], 0, size); 1470 1471 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 1472 dd->f_put_tid(dd, e + egroff + 1473 (u64 __iomem *) 1474 ((char __iomem *) 1475 dd->kregbase + 1476 dd->rcvegrbase), 1477 RCVHQ_RCV_TYPE_EAGER, pa); 1478 pa += egrsize; 1479 } 1480 cond_resched(); /* don't hog the cpu */ 1481 } 1482 1483 return 0; 1484 1485bail_rcvegrbuf_phys: 1486 for (e = 0; e < rcd->rcvegrbuf_chunks && rcd->rcvegrbuf[e]; e++) 1487 dma_free_coherent(&dd->pcidev->dev, size, 1488 rcd->rcvegrbuf[e], rcd->rcvegrbuf_phys[e]); 1489 kfree(rcd->rcvegrbuf_phys); 1490 rcd->rcvegrbuf_phys = NULL; 1491bail_rcvegrbuf: 1492 kfree(rcd->rcvegrbuf); 1493 rcd->rcvegrbuf = NULL; 1494bail: 1495 return -ENOMEM; 1496} 1497 1498/* 1499 * Note: Changes to this routine should be mirrored 1500 * for the diagnostics routine qib_remap_ioaddr32(). 1501 * There is also related code for VL15 buffers in qib_init_7322_variables(). 1502 * The teardown code that unmaps is in qib_pcie_ddcleanup() 1503 */ 1504int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) 1505{ 1506 u64 __iomem *qib_kregbase = NULL; 1507 void __iomem *qib_piobase = NULL; 1508 u64 __iomem *qib_userbase = NULL; 1509 u64 qib_kreglen; 1510 u64 qib_pio2koffset = dd->piobufbase & 0xffffffff; 1511 u64 qib_pio4koffset = dd->piobufbase >> 32; 1512 u64 qib_pio2klen = dd->piobcnt2k * dd->palign; 1513 u64 qib_pio4klen = dd->piobcnt4k * dd->align4k; 1514 u64 qib_physaddr = dd->physaddr; 1515 u64 qib_piolen; 1516 u64 qib_userlen = 0; 1517 1518 /* 1519 * Free the old mapping because the kernel will try to reuse the 1520 * old mapping and not create a new mapping with the 1521 * write combining attribute. 1522 */ 1523 iounmap(dd->kregbase); 1524 dd->kregbase = NULL; 1525 1526 /* 1527 * Assumes chip address space looks like: 1528 * - kregs + sregs + cregs + uregs (in any order) 1529 * - piobufs (2K and 4K bufs in either order) 1530 * or: 1531 * - kregs + sregs + cregs (in any order) 1532 * - piobufs (2K and 4K bufs in either order) 1533 * - uregs 1534 */ 1535 if (dd->piobcnt4k == 0) { 1536 qib_kreglen = qib_pio2koffset; 1537 qib_piolen = qib_pio2klen; 1538 } else if (qib_pio2koffset < qib_pio4koffset) { 1539 qib_kreglen = qib_pio2koffset; 1540 qib_piolen = qib_pio4koffset + qib_pio4klen - qib_kreglen; 1541 } else { 1542 qib_kreglen = qib_pio4koffset; 1543 qib_piolen = qib_pio2koffset + qib_pio2klen - qib_kreglen; 1544 } 1545 qib_piolen += vl15buflen; 1546 /* Map just the configured ports (not all hw ports) */ 1547 if (dd->uregbase > qib_kreglen) 1548 qib_userlen = dd->ureg_align * dd->cfgctxts; 1549 1550 /* Sanity checks passed, now create the new mappings */ 1551 qib_kregbase = ioremap_nocache(qib_physaddr, qib_kreglen); 1552 if (!qib_kregbase) 1553 goto bail; 1554 1555 qib_piobase = ioremap_wc(qib_physaddr + qib_kreglen, qib_piolen); 1556 if (!qib_piobase) 1557 goto bail_kregbase; 1558 1559 if (qib_userlen) { 1560 qib_userbase = ioremap_nocache(qib_physaddr + dd->uregbase, 1561 qib_userlen); 1562 if (!qib_userbase) 1563 goto bail_piobase; 1564 } 1565 1566 dd->kregbase = qib_kregbase; 1567 dd->kregend = (u64 __iomem *) 1568 ((char __iomem *) qib_kregbase + qib_kreglen); 1569 dd->piobase = qib_piobase; 1570 dd->pio2kbase = (void __iomem *) 1571 (((char __iomem *) dd->piobase) + 1572 qib_pio2koffset - qib_kreglen); 1573 if (dd->piobcnt4k) 1574 dd->pio4kbase = (void __iomem *) 1575 (((char __iomem *) dd->piobase) + 1576 qib_pio4koffset - qib_kreglen); 1577 if (qib_userlen) 1578 /* ureg will now be accessed relative to dd->userbase */ 1579 dd->userbase = qib_userbase; 1580 return 0; 1581 1582bail_piobase: 1583 iounmap(qib_piobase); 1584bail_kregbase: 1585 iounmap(qib_kregbase); 1586bail: 1587 return -ENOMEM; 1588} 1589