ipath_intr.c revision b4d390d8d219452e5d4257c87134a6934d7fabeb
1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/pci.h> 35#include <linux/delay.h> 36 37#include "ipath_kernel.h" 38#include "ipath_verbs.h" 39#include "ipath_common.h" 40 41/* 42 * clear (write) a pio buffer, to clear a parity error. This routine 43 * should only be called when in freeze mode, and the buffer should be 44 * canceled afterwards. 45 */ 46static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) 47{ 48 u32 __iomem *pbuf; 49 u32 dwcnt; /* dword count to write */ 50 if (pnum < dd->ipath_piobcnt2k) { 51 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum * 52 dd->ipath_palign); 53 dwcnt = dd->ipath_piosize2k >> 2; 54 } 55 else { 56 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase + 57 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign); 58 dwcnt = dd->ipath_piosize4k >> 2; 59 } 60 dev_info(&dd->pcidev->dev, 61 "Rewrite PIO buffer %u, to recover from parity error\n", 62 pnum); 63 64 /* no flush required, since already in freeze */ 65 writel(dwcnt + 1, pbuf); 66 while (--dwcnt) 67 writel(0, pbuf++); 68} 69 70/* 71 * Called when we might have an error that is specific to a particular 72 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 73 * If rewrite is true, and bits are set in the sendbufferror registers, 74 * we'll write to the buffer, for error recovery on parity errors. 75 */ 76void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) 77{ 78 u32 piobcnt; 79 unsigned long sbuf[4]; 80 /* 81 * it's possible that sendbuffererror could have bits set; might 82 * have already done this as a result of hardware error handling 83 */ 84 piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 85 /* read these before writing errorclear */ 86 sbuf[0] = ipath_read_kreg64( 87 dd, dd->ipath_kregs->kr_sendbuffererror); 88 sbuf[1] = ipath_read_kreg64( 89 dd, dd->ipath_kregs->kr_sendbuffererror + 1); 90 if (piobcnt > 128) 91 sbuf[2] = ipath_read_kreg64( 92 dd, dd->ipath_kregs->kr_sendbuffererror + 2); 93 if (piobcnt > 192) 94 sbuf[3] = ipath_read_kreg64( 95 dd, dd->ipath_kregs->kr_sendbuffererror + 3); 96 else 97 sbuf[3] = 0; 98 99 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { 100 int i; 101 if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) && 102 dd->ipath_lastcancel > jiffies) { 103 __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG, 104 "SendbufErrs %lx %lx", sbuf[0], 105 sbuf[1]); 106 if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128) 107 printk(" %lx %lx ", sbuf[2], sbuf[3]); 108 printk("\n"); 109 } 110 111 for (i = 0; i < piobcnt; i++) 112 if (test_bit(i, sbuf)) { 113 if (rewrite) 114 ipath_clrpiobuf(dd, i); 115 ipath_disarm_piobufs(dd, i, 1); 116 } 117 /* ignore armlaunch errs for a bit */ 118 dd->ipath_lastcancel = jiffies+3; 119 } 120} 121 122 123/* These are all rcv-related errors which we want to count for stats */ 124#define E_SUM_PKTERRS \ 125 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ 126 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ 127 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \ 128 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \ 129 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ 130 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) 131 132/* These are all send-related errors which we want to count for stats */ 133#define E_SUM_ERRS \ 134 (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ 135 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ 136 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \ 137 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ 138 INFINIPATH_E_INVALIDADDR) 139 140/* 141 * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore 142 * errors not related to freeze and cancelling buffers. Can't ignore 143 * armlaunch because could get more while still cleaning up, and need 144 * to cancel those as they happen. 145 */ 146#define E_SPKT_ERRS_IGNORE \ 147 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ 148 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \ 149 INFINIPATH_E_SPKTLEN) 150 151/* 152 * these are errors that can occur when the link changes state while 153 * a packet is being sent or received. This doesn't cover things 154 * like EBP or VCRC that can be the result of a sending having the 155 * link change state, so we receive a "known bad" packet. 156 */ 157#define E_SUM_LINK_PKTERRS \ 158 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ 159 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ 160 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \ 161 INFINIPATH_E_RUNEXPCHAR) 162 163static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) 164{ 165 u64 ignore_this_time = 0; 166 167 ipath_disarm_senderrbufs(dd, 0); 168 if ((errs & E_SUM_LINK_PKTERRS) && 169 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 170 /* 171 * This can happen when SMA is trying to bring the link 172 * up, but the IB link changes state at the "wrong" time. 173 * The IB logic then complains that the packet isn't 174 * valid. We don't want to confuse people, so we just 175 * don't print them, except at debug 176 */ 177 ipath_dbg("Ignoring packet errors %llx, because link not " 178 "ACTIVE\n", (unsigned long long) errs); 179 ignore_this_time = errs & E_SUM_LINK_PKTERRS; 180 } 181 182 return ignore_this_time; 183} 184 185/* generic hw error messages... */ 186#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \ 187 { \ 188 .mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a << \ 189 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ), \ 190 .msg = "TXE " #a " Memory Parity" \ 191 } 192#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \ 193 { \ 194 .mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a << \ 195 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ), \ 196 .msg = "RXE " #a " Memory Parity" \ 197 } 198 199static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = { 200 INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"), 201 INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"), 202 203 INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF), 204 INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC), 205 INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO), 206 207 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF), 208 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ), 209 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID), 210 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID), 211 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF), 212 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO), 213 INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO), 214}; 215 216/** 217 * ipath_format_hwmsg - format a single hwerror message 218 * @msg message buffer 219 * @msgl length of message buffer 220 * @hwmsg message to add to message buffer 221 */ 222static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) 223{ 224 strlcat(msg, "[", msgl); 225 strlcat(msg, hwmsg, msgl); 226 strlcat(msg, "]", msgl); 227} 228 229/** 230 * ipath_format_hwerrors - format hardware error messages for display 231 * @hwerrs hardware errors bit vector 232 * @hwerrmsgs hardware error descriptions 233 * @nhwerrmsgs number of hwerrmsgs 234 * @msg message buffer 235 * @msgl message buffer length 236 */ 237void ipath_format_hwerrors(u64 hwerrs, 238 const struct ipath_hwerror_msgs *hwerrmsgs, 239 size_t nhwerrmsgs, 240 char *msg, size_t msgl) 241{ 242 int i; 243 const int glen = 244 sizeof(ipath_generic_hwerror_msgs) / 245 sizeof(ipath_generic_hwerror_msgs[0]); 246 247 for (i=0; i<glen; i++) { 248 if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { 249 ipath_format_hwmsg(msg, msgl, 250 ipath_generic_hwerror_msgs[i].msg); 251 } 252 } 253 254 for (i=0; i<nhwerrmsgs; i++) { 255 if (hwerrs & hwerrmsgs[i].mask) { 256 ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg); 257 } 258 } 259} 260 261/* return the strings for the most common link states */ 262static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs) 263{ 264 char *ret; 265 u32 state; 266 267 state = ipath_ib_state(dd, ibcs); 268 if (state == dd->ib_init) 269 ret = "Init"; 270 else if (state == dd->ib_arm) 271 ret = "Arm"; 272 else if (state == dd->ib_active) 273 ret = "Active"; 274 else 275 ret = "Down"; 276 return ret; 277} 278 279void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev) 280{ 281 struct ib_event event; 282 283 event.device = &dd->verbs_dev->ibdev; 284 event.element.port_num = 1; 285 event.event = ev; 286 ib_dispatch_event(&event); 287} 288 289static void handle_e_ibstatuschanged(struct ipath_devdata *dd, 290 ipath_err_t errs) 291{ 292 u32 ltstate, lstate, ibstate, lastlstate; 293 u32 init = dd->ib_init; 294 u32 arm = dd->ib_arm; 295 u32 active = dd->ib_active; 296 const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 297 298 lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */ 299 ibstate = ipath_ib_state(dd, ibcs); 300 /* linkstate at last interrupt */ 301 lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat); 302 ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */ 303 304 /* 305 * Since going into a recovery state causes the link state to go 306 * down and since recovery is transitory, it is better if we "miss" 307 * ever seeing the link training state go into recovery (i.e., 308 * ignore this transition for link state special handling purposes) 309 * without even updating ipath_lastibcstat. 310 */ 311 if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) || 312 (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) || 313 (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE)) 314 goto done; 315 316 /* 317 * if linkstate transitions into INIT from any of the various down 318 * states, or if it transitions from any of the up (INIT or better) 319 * states into any of the down states (except link recovery), then 320 * call the chip-specific code to take appropriate actions. 321 */ 322 if (lstate >= INFINIPATH_IBCS_L_STATE_INIT && 323 lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) { 324 /* transitioned to UP */ 325 if (dd->ipath_f_ib_updown(dd, 1, ibcs)) { 326 /* link came up, so we must no longer be disabled */ 327 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; 328 ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n"); 329 goto skip_ibchange; /* chip-code handled */ 330 } 331 } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT || 332 (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) && 333 ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT && 334 ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { 335 int handled; 336 handled = dd->ipath_f_ib_updown(dd, 0, ibcs); 337 dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY; 338 if (handled) { 339 ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n"); 340 goto skip_ibchange; /* chip-code handled */ 341 } 342 } 343 344 /* 345 * Significant enough to always print and get into logs, if it was 346 * unexpected. If it was a requested state change, we'll have 347 * already cleared the flags, so we won't print this warning 348 */ 349 if ((ibstate != arm && ibstate != active) && 350 (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) { 351 dev_info(&dd->pcidev->dev, "Link state changed from %s " 352 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ? 353 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs)); 354 } 355 356 if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || 357 ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { 358 u32 lastlts; 359 lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat); 360 /* 361 * Ignore cycling back and forth from Polling.Active to 362 * Polling.Quiet while waiting for the other end of the link 363 * to come up, except to try and decide if we are connected 364 * to a live IB device or not. We will cycle back and 365 * forth between them if no cable is plugged in, the other 366 * device is powered off or disabled, etc. 367 */ 368 if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || 369 lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { 370 if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) && 371 (++dd->ipath_ibpollcnt == 40)) { 372 dd->ipath_flags |= IPATH_NOCABLE; 373 *dd->ipath_statusp |= 374 IPATH_STATUS_IB_NOCABLE; 375 ipath_cdbg(LINKVERB, "Set NOCABLE\n"); 376 } 377 ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n", 378 ipath_ibcstatus_str[ltstate], ibstate); 379 goto skip_ibchange; 380 } 381 } 382 383 dd->ipath_ibpollcnt = 0; /* not poll*, now */ 384 ipath_stats.sps_iblink++; 385 386 if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) { 387 u64 linkrecov; 388 linkrecov = ipath_snap_cntr(dd, 389 dd->ipath_cregs->cr_iblinkerrrecovcnt); 390 if (linkrecov != dd->ipath_lastlinkrecov) { 391 ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n", 392 ibcs, ib_linkstate(dd, ibcs), 393 ipath_ibcstatus_str[ltstate], 394 linkrecov); 395 /* and no more until active again */ 396 dd->ipath_lastlinkrecov = 0; 397 ipath_set_linkstate(dd, IPATH_IB_LINKDOWN); 398 goto skip_ibchange; 399 } 400 } 401 402 if (ibstate == init || ibstate == arm || ibstate == active) { 403 *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; 404 if (ibstate == init || ibstate == arm) { 405 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; 406 if (dd->ipath_flags & IPATH_LINKACTIVE) 407 signal_ib_event(dd, IB_EVENT_PORT_ERR); 408 } 409 if (ibstate == arm) { 410 dd->ipath_flags |= IPATH_LINKARMED; 411 dd->ipath_flags &= ~(IPATH_LINKUNK | 412 IPATH_LINKINIT | IPATH_LINKDOWN | 413 IPATH_LINKACTIVE | IPATH_NOCABLE); 414 ipath_hol_down(dd); 415 } else if (ibstate == init) { 416 /* 417 * set INIT and DOWN. Down is checked by 418 * most of the other code, but INIT is 419 * useful to know in a few places. 420 */ 421 dd->ipath_flags |= IPATH_LINKINIT | 422 IPATH_LINKDOWN; 423 dd->ipath_flags &= ~(IPATH_LINKUNK | 424 IPATH_LINKARMED | IPATH_LINKACTIVE | 425 IPATH_NOCABLE); 426 ipath_hol_down(dd); 427 } else { /* active */ 428 dd->ipath_lastlinkrecov = ipath_snap_cntr(dd, 429 dd->ipath_cregs->cr_iblinkerrrecovcnt); 430 *dd->ipath_statusp |= 431 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; 432 dd->ipath_flags |= IPATH_LINKACTIVE; 433 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT 434 | IPATH_LINKDOWN | IPATH_LINKARMED | 435 IPATH_NOCABLE); 436 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 437 ipath_restart_sdma(dd); 438 signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); 439 /* LED active not handled in chip _f_updown */ 440 dd->ipath_f_setextled(dd, lstate, ltstate); 441 ipath_hol_up(dd); 442 } 443 444 /* 445 * print after we've already done the work, so as not to 446 * delay the state changes and notifications, for debugging 447 */ 448 if (lstate == lastlstate) 449 ipath_cdbg(LINKVERB, "Unchanged from last: %s " 450 "(%x)\n", ib_linkstate(dd, ibcs), ibstate); 451 else 452 ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n", 453 dd->ipath_unit, ib_linkstate(dd, ibcs), 454 ipath_ibcstatus_str[ltstate], ibstate); 455 } else { /* down */ 456 if (dd->ipath_flags & IPATH_LINKACTIVE) 457 signal_ib_event(dd, IB_EVENT_PORT_ERR); 458 dd->ipath_flags |= IPATH_LINKDOWN; 459 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT 460 | IPATH_LINKACTIVE | 461 IPATH_LINKARMED); 462 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; 463 dd->ipath_lli_counter = 0; 464 465 if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN) 466 ipath_cdbg(VERBOSE, "Unit %u link state down " 467 "(state 0x%x), from %s\n", 468 dd->ipath_unit, lstate, 469 ib_linkstate(dd, dd->ipath_lastibcstat)); 470 else 471 ipath_cdbg(LINKVERB, "Unit %u link state changed " 472 "to %s (0x%x) from down (%x)\n", 473 dd->ipath_unit, 474 ipath_ibcstatus_str[ltstate], 475 ibstate, lastlstate); 476 } 477 478skip_ibchange: 479 dd->ipath_lastibcstat = ibcs; 480done: 481 return; 482} 483 484static void handle_supp_msgs(struct ipath_devdata *dd, 485 unsigned supp_msgs, char *msg, u32 msgsz) 486{ 487 /* 488 * Print the message unless it's ibc status change only, which 489 * happens so often we never want to count it. 490 */ 491 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { 492 int iserr; 493 ipath_err_t mask; 494 iserr = ipath_decode_err(dd, msg, msgsz, 495 dd->ipath_lasterror & 496 ~INFINIPATH_E_IBSTATUSCHANGED); 497 498 mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | 499 INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED; 500 501 /* if we're in debug, then don't mask SDMADISABLED msgs */ 502 if (ipath_debug & __IPATH_DBG) 503 mask &= ~INFINIPATH_E_SDMADISABLED; 504 505 if (dd->ipath_lasterror & ~mask) 506 ipath_dev_err(dd, "Suppressed %u messages for " 507 "fast-repeating errors (%s) (%llx)\n", 508 supp_msgs, msg, 509 (unsigned long long) 510 dd->ipath_lasterror); 511 else { 512 /* 513 * rcvegrfull and rcvhdrqfull are "normal", for some 514 * types of processes (mostly benchmarks) that send 515 * huge numbers of messages, while not processing 516 * them. So only complain about these at debug 517 * level. 518 */ 519 if (iserr) 520 ipath_dbg("Suppressed %u messages for %s\n", 521 supp_msgs, msg); 522 else 523 ipath_cdbg(ERRPKT, 524 "Suppressed %u messages for %s\n", 525 supp_msgs, msg); 526 } 527 } 528} 529 530static unsigned handle_frequent_errors(struct ipath_devdata *dd, 531 ipath_err_t errs, char *msg, 532 u32 msgsz, int *noprint) 533{ 534 unsigned long nc; 535 static unsigned long nextmsg_time; 536 static unsigned nmsgs, supp_msgs; 537 538 /* 539 * Throttle back "fast" messages to no more than 10 per 5 seconds. 540 * This isn't perfect, but it's a reasonable heuristic. If we get 541 * more than 10, give a 6x longer delay. 542 */ 543 nc = jiffies; 544 if (nmsgs > 10) { 545 if (time_before(nc, nextmsg_time)) { 546 *noprint = 1; 547 if (!supp_msgs++) 548 nextmsg_time = nc + HZ * 3; 549 } 550 else if (supp_msgs) { 551 handle_supp_msgs(dd, supp_msgs, msg, msgsz); 552 supp_msgs = 0; 553 nmsgs = 0; 554 } 555 } 556 else if (!nmsgs++ || time_after(nc, nextmsg_time)) 557 nextmsg_time = nc + HZ / 2; 558 559 return supp_msgs; 560} 561 562static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs) 563{ 564 unsigned long flags; 565 int expected; 566 567 if (ipath_debug & __IPATH_DBG) { 568 char msg[128]; 569 ipath_decode_err(dd, msg, sizeof msg, errs & 570 INFINIPATH_E_SDMAERRS); 571 ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg); 572 } 573 if (ipath_debug & __IPATH_VERBDBG) { 574 unsigned long tl, hd, status, lengen; 575 tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail); 576 hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead); 577 status = ipath_read_kreg64(dd 578 , dd->ipath_kregs->kr_senddmastatus); 579 lengen = ipath_read_kreg64(dd, 580 dd->ipath_kregs->kr_senddmalengen); 581 ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx " 582 "lengen 0x%lx\n", tl, hd, status, lengen); 583 } 584 585 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 586 __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); 587 expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); 588 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 589 if (!expected) 590 ipath_cancel_sends(dd, 1); 591} 592 593static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat) 594{ 595 unsigned long flags; 596 int expected; 597 598 if ((istat & INFINIPATH_I_SDMAINT) && 599 !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 600 ipath_sdma_intr(dd); 601 602 if (istat & INFINIPATH_I_SDMADISABLED) { 603 expected = test_bit(IPATH_SDMA_ABORTING, 604 &dd->ipath_sdma_status); 605 ipath_dbg("%s SDmaDisabled intr\n", 606 expected ? "expected" : "unexpected"); 607 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 608 __set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status); 609 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 610 if (!expected) 611 ipath_cancel_sends(dd, 1); 612 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 613 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 614 } 615} 616 617static int handle_hdrq_full(struct ipath_devdata *dd) 618{ 619 int chkerrpkts = 0; 620 u32 hd, tl; 621 u32 i; 622 623 ipath_stats.sps_hdrqfull++; 624 for (i = 0; i < dd->ipath_cfgports; i++) { 625 struct ipath_portdata *pd = dd->ipath_pd[i]; 626 627 if (i == 0) { 628 /* 629 * For kernel receive queues, we just want to know 630 * if there are packets in the queue that we can 631 * process. 632 */ 633 if (pd->port_head != ipath_get_hdrqtail(pd)) 634 chkerrpkts |= 1 << i; 635 continue; 636 } 637 638 /* Skip if user context is not open */ 639 if (!pd || !pd->port_cnt) 640 continue; 641 642 /* Don't report the same point multiple times. */ 643 if (dd->ipath_flags & IPATH_NODMA_RTAIL) 644 tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i); 645 else 646 tl = ipath_get_rcvhdrtail(pd); 647 if (tl == pd->port_lastrcvhdrqtail) 648 continue; 649 650 hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i); 651 if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) { 652 pd->port_lastrcvhdrqtail = tl; 653 pd->port_hdrqfull++; 654 /* flush hdrqfull so that poll() sees it */ 655 wmb(); 656 wake_up_interruptible(&pd->port_wait); 657 } 658 } 659 660 return chkerrpkts; 661} 662 663static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) 664{ 665 char msg[128]; 666 u64 ignore_this_time = 0; 667 u64 iserr = 0; 668 int chkerrpkts = 0, noprint = 0; 669 unsigned supp_msgs; 670 int log_idx; 671 672 /* 673 * don't report errors that are masked, either at init 674 * (not set in ipath_errormask), or temporarily (set in 675 * ipath_maskederrs) 676 */ 677 errs &= dd->ipath_errormask & ~dd->ipath_maskederrs; 678 679 supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg, 680 &noprint); 681 682 /* do these first, they are most important */ 683 if (errs & INFINIPATH_E_HARDWARE) { 684 /* reuse same msg buf */ 685 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); 686 } else { 687 u64 mask; 688 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { 689 mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; 690 if (errs & mask) 691 ipath_inc_eeprom_err(dd, log_idx, 1); 692 } 693 } 694 695 if (errs & INFINIPATH_E_SDMAERRS) 696 handle_sdma_errors(dd, errs); 697 698 if (!noprint && (errs & ~dd->ipath_e_bitsextant)) 699 ipath_dev_err(dd, "error interrupt with unknown errors " 700 "%llx set\n", (unsigned long long) 701 (errs & ~dd->ipath_e_bitsextant)); 702 703 if (errs & E_SUM_ERRS) 704 ignore_this_time = handle_e_sum_errs(dd, errs); 705 else if ((errs & E_SUM_LINK_PKTERRS) && 706 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 707 /* 708 * This can happen when SMA is trying to bring the link 709 * up, but the IB link changes state at the "wrong" time. 710 * The IB logic then complains that the packet isn't 711 * valid. We don't want to confuse people, so we just 712 * don't print them, except at debug 713 */ 714 ipath_dbg("Ignoring packet errors %llx, because link not " 715 "ACTIVE\n", (unsigned long long) errs); 716 ignore_this_time = errs & E_SUM_LINK_PKTERRS; 717 } 718 719 if (supp_msgs == 250000) { 720 int s_iserr; 721 /* 722 * It's not entirely reasonable assuming that the errors set 723 * in the last clear period are all responsible for the 724 * problem, but the alternative is to assume it's the only 725 * ones on this particular interrupt, which also isn't great 726 */ 727 dd->ipath_maskederrs |= dd->ipath_lasterror | errs; 728 729 dd->ipath_errormask &= ~dd->ipath_maskederrs; 730 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 731 dd->ipath_errormask); 732 s_iserr = ipath_decode_err(dd, msg, sizeof msg, 733 dd->ipath_maskederrs); 734 735 if (dd->ipath_maskederrs & 736 ~(INFINIPATH_E_RRCVEGRFULL | 737 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) 738 ipath_dev_err(dd, "Temporarily disabling " 739 "error(s) %llx reporting; too frequent (%s)\n", 740 (unsigned long long) dd->ipath_maskederrs, 741 msg); 742 else { 743 /* 744 * rcvegrfull and rcvhdrqfull are "normal", 745 * for some types of processes (mostly benchmarks) 746 * that send huge numbers of messages, while not 747 * processing them. So only complain about 748 * these at debug level. 749 */ 750 if (s_iserr) 751 ipath_dbg("Temporarily disabling reporting " 752 "too frequent queue full errors (%s)\n", 753 msg); 754 else 755 ipath_cdbg(ERRPKT, 756 "Temporarily disabling reporting too" 757 " frequent packet errors (%s)\n", 758 msg); 759 } 760 761 /* 762 * Re-enable the masked errors after around 3 minutes. in 763 * ipath_get_faststats(). If we have a series of fast 764 * repeating but different errors, the interval will keep 765 * stretching out, but that's OK, as that's pretty 766 * catastrophic. 767 */ 768 dd->ipath_unmasktime = jiffies + HZ * 180; 769 } 770 771 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs); 772 if (ignore_this_time) 773 errs &= ~ignore_this_time; 774 if (errs & ~dd->ipath_lasterror) { 775 errs &= ~dd->ipath_lasterror; 776 /* never suppress duplicate hwerrors or ibstatuschange */ 777 dd->ipath_lasterror |= errs & 778 ~(INFINIPATH_E_HARDWARE | 779 INFINIPATH_E_IBSTATUSCHANGED); 780 } 781 782 if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) { 783 dd->ipath_spectriggerhit++; 784 ipath_dbg("%lu special trigger hits\n", 785 dd->ipath_spectriggerhit); 786 } 787 788 /* likely due to cancel; so suppress message unless verbose */ 789 if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) && 790 dd->ipath_lastcancel > jiffies) { 791 /* armlaunch takes precedence; it often causes both. */ 792 ipath_cdbg(VERBOSE, 793 "Suppressed %s error (%llx) after sendbuf cancel\n", 794 (errs & INFINIPATH_E_SPIOARMLAUNCH) ? 795 "armlaunch" : "sendpktlen", (unsigned long long)errs); 796 errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN); 797 } 798 799 if (!errs) 800 return 0; 801 802 if (!noprint) { 803 ipath_err_t mask; 804 /* 805 * The ones we mask off are handled specially below 806 * or above. Also mask SDMADISABLED by default as it 807 * is too chatty. 808 */ 809 mask = INFINIPATH_E_IBSTATUSCHANGED | 810 INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | 811 INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED; 812 813 /* if we're in debug, then don't mask SDMADISABLED msgs */ 814 if (ipath_debug & __IPATH_DBG) 815 mask &= ~INFINIPATH_E_SDMADISABLED; 816 817 ipath_decode_err(dd, msg, sizeof msg, errs & ~mask); 818 } else 819 /* so we don't need if (!noprint) at strlcat's below */ 820 *msg = 0; 821 822 if (errs & E_SUM_PKTERRS) { 823 ipath_stats.sps_pkterrs++; 824 chkerrpkts = 1; 825 } 826 if (errs & E_SUM_ERRS) 827 ipath_stats.sps_errs++; 828 829 if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) { 830 ipath_stats.sps_crcerrs++; 831 chkerrpkts = 1; 832 } 833 iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS); 834 835 836 /* 837 * We don't want to print these two as they happen, or we can make 838 * the situation even worse, because it takes so long to print 839 * messages to serial consoles. Kernel ports get printed from 840 * fast_stats, no more than every 5 seconds, user ports get printed 841 * on close 842 */ 843 if (errs & INFINIPATH_E_RRCVHDRFULL) 844 chkerrpkts |= handle_hdrq_full(dd); 845 if (errs & INFINIPATH_E_RRCVEGRFULL) { 846 struct ipath_portdata *pd = dd->ipath_pd[0]; 847 848 /* 849 * since this is of less importance and not likely to 850 * happen without also getting hdrfull, only count 851 * occurrences; don't check each port (or even the kernel 852 * vs user) 853 */ 854 ipath_stats.sps_etidfull++; 855 if (pd->port_head != ipath_get_hdrqtail(pd)) 856 chkerrpkts |= 1; 857 } 858 859 /* 860 * do this before IBSTATUSCHANGED, in case both bits set in a single 861 * interrupt; we want the STATUSCHANGE to "win", so we do our 862 * internal copy of state machine correctly 863 */ 864 if (errs & INFINIPATH_E_RIBLOSTLINK) { 865 /* 866 * force through block below 867 */ 868 errs |= INFINIPATH_E_IBSTATUSCHANGED; 869 ipath_stats.sps_iblink++; 870 dd->ipath_flags |= IPATH_LINKDOWN; 871 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT 872 | IPATH_LINKARMED | IPATH_LINKACTIVE); 873 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; 874 875 ipath_dbg("Lost link, link now down (%s)\n", 876 ipath_ibcstatus_str[ipath_read_kreg64(dd, 877 dd->ipath_kregs->kr_ibcstatus) & 0xf]); 878 } 879 if (errs & INFINIPATH_E_IBSTATUSCHANGED) 880 handle_e_ibstatuschanged(dd, errs); 881 882 if (errs & INFINIPATH_E_RESET) { 883 if (!noprint) 884 ipath_dev_err(dd, "Got reset, requires re-init " 885 "(unload and reload driver)\n"); 886 dd->ipath_flags &= ~IPATH_INITTED; /* needs re-init */ 887 /* mark as having had error */ 888 *dd->ipath_statusp |= IPATH_STATUS_HWERROR; 889 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; 890 } 891 892 if (!noprint && *msg) { 893 if (iserr) 894 ipath_dev_err(dd, "%s error\n", msg); 895 } 896 if (dd->ipath_state_wanted & dd->ipath_flags) { 897 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " 898 "waking\n", dd->ipath_state_wanted, 899 dd->ipath_flags); 900 wake_up_interruptible(&ipath_state_wait); 901 } 902 903 return chkerrpkts; 904} 905 906/* 907 * try to cleanup as much as possible for anything that might have gone 908 * wrong while in freeze mode, such as pio buffers being written by user 909 * processes (causing armlaunch), send errors due to going into freeze mode, 910 * etc., and try to avoid causing extra interrupts while doing so. 911 * Forcibly update the in-memory pioavail register copies after cleanup 912 * because the chip won't do it for anything changing while in freeze mode 913 * (we don't want to wait for the next pio buffer state change). 914 * Make sure that we don't lose any important interrupts by using the chip 915 * feature that says that writing 0 to a bit in *clear that is set in 916 * *status will cause an interrupt to be generated again (if allowed by 917 * the *mask value). 918 */ 919void ipath_clear_freeze(struct ipath_devdata *dd) 920{ 921 int i, im; 922 u64 val; 923 924 /* disable error interrupts, to avoid confusion */ 925 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); 926 927 /* also disable interrupts; errormask is sometimes overwriten */ 928 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 929 930 /* 931 * clear all sends, because they have may been 932 * completed by usercode while in freeze mode, and 933 * therefore would not be sent, and eventually 934 * might cause the process to run out of bufs 935 */ 936 ipath_cancel_sends(dd, 1); 937 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 938 dd->ipath_control); 939 940 /* 941 * ensure pio avail updates continue (because the update 942 * won't have happened from cancel_sends because we were 943 * still in freeze 944 */ 945 ipath_force_pio_avail_update(dd); 946 947 /* 948 * We just enabled pioavailupdate, so dma copy is almost certainly 949 * not yet right, so read the registers directly. Similar to init 950 */ 951 for (i = 0; i < dd->ipath_pioavregs; i++) { 952 /* deal with 6110 chip bug */ 953 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 954 i ^ 1 : i; 955 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im); 956 dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val); 957 dd->ipath_pioavailshadow[i] = val | 958 (~dd->ipath_pioavailkernel[i] << 959 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); 960 } 961 962 /* 963 * force new interrupt if any hwerr, error or interrupt bits are 964 * still set, and clear "safe" send packet errors related to freeze 965 * and cancelling sends. Re-enable error interrupts before possible 966 * force of re-interrupt on pending interrupts. 967 */ 968 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); 969 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 970 E_SPKT_ERRS_IGNORE); 971 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 972 dd->ipath_errormask); 973 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL); 974 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); 975} 976 977 978/* this is separate to allow for better optimization of ipath_intr() */ 979 980static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp) 981{ 982 /* 983 * sometimes happen during driver init and unload, don't want 984 * to process any interrupts at that point 985 */ 986 987 /* this is just a bandaid, not a fix, if something goes badly 988 * wrong */ 989 if (++*unexpectp > 100) { 990 if (++*unexpectp > 105) { 991 /* 992 * ok, we must be taking somebody else's interrupts, 993 * due to a messed up mptable and/or PIRQ table, so 994 * unregister the interrupt. We've seen this during 995 * linuxbios development work, and it may happen in 996 * the future again. 997 */ 998 if (dd->pcidev && dd->ipath_irq) { 999 ipath_dev_err(dd, "Now %u unexpected " 1000 "interrupts, unregistering " 1001 "interrupt handler\n", 1002 *unexpectp); 1003 ipath_dbg("free_irq of irq %d\n", 1004 dd->ipath_irq); 1005 dd->ipath_f_free_irq(dd); 1006 } 1007 } 1008 if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) { 1009 ipath_dev_err(dd, "%u unexpected interrupts, " 1010 "disabling interrupts completely\n", 1011 *unexpectp); 1012 /* 1013 * disable all interrupts, something is very wrong 1014 */ 1015 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 1016 0ULL); 1017 } 1018 } else if (*unexpectp > 1) 1019 ipath_dbg("Interrupt when not ready, should not happen, " 1020 "ignoring\n"); 1021} 1022 1023static noinline void ipath_bad_regread(struct ipath_devdata *dd) 1024{ 1025 static int allbits; 1026 1027 /* separate routine, for better optimization of ipath_intr() */ 1028 1029 /* 1030 * We print the message and disable interrupts, in hope of 1031 * having a better chance of debugging the problem. 1032 */ 1033 ipath_dev_err(dd, 1034 "Read of interrupt status failed (all bits set)\n"); 1035 if (allbits++) { 1036 /* disable all interrupts, something is very wrong */ 1037 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 1038 if (allbits == 2) { 1039 ipath_dev_err(dd, "Still bad interrupt status, " 1040 "unregistering interrupt\n"); 1041 dd->ipath_f_free_irq(dd); 1042 } else if (allbits > 2) { 1043 if ((allbits % 10000) == 0) 1044 printk("."); 1045 } else 1046 ipath_dev_err(dd, "Disabling interrupts, " 1047 "multiple errors\n"); 1048 } 1049} 1050 1051static void handle_layer_pioavail(struct ipath_devdata *dd) 1052{ 1053 unsigned long flags; 1054 int ret; 1055 1056 ret = ipath_ib_piobufavail(dd->verbs_dev); 1057 if (ret > 0) 1058 goto set; 1059 1060 return; 1061set: 1062 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1063 dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL; 1064 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1065 dd->ipath_sendctrl); 1066 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1067 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1068} 1069 1070/* 1071 * Handle receive interrupts for user ports; this means a user 1072 * process was waiting for a packet to arrive, and didn't want 1073 * to poll 1074 */ 1075static void handle_urcv(struct ipath_devdata *dd, u64 istat) 1076{ 1077 u64 portr; 1078 int i; 1079 int rcvdint = 0; 1080 1081 /* 1082 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and 1083 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below 1084 * would both like timely updates of the bits so that 1085 * we don't pass them by unnecessarily. the rmb() 1086 * here ensures that we see them promptly -- the 1087 * corresponding wmb()'s are in ipath_poll_urgent() 1088 * and ipath_poll_next()... 1089 */ 1090 rmb(); 1091 portr = ((istat >> dd->ipath_i_rcvavail_shift) & 1092 dd->ipath_i_rcvavail_mask) | 1093 ((istat >> dd->ipath_i_rcvurg_shift) & 1094 dd->ipath_i_rcvurg_mask); 1095 for (i = 1; i < dd->ipath_cfgports; i++) { 1096 struct ipath_portdata *pd = dd->ipath_pd[i]; 1097 1098 if (portr & (1 << i) && pd && pd->port_cnt) { 1099 if (test_and_clear_bit(IPATH_PORT_WAITING_RCV, 1100 &pd->port_flag)) { 1101 clear_bit(i + dd->ipath_r_intravail_shift, 1102 &dd->ipath_rcvctrl); 1103 wake_up_interruptible(&pd->port_wait); 1104 rcvdint = 1; 1105 } else if (test_and_clear_bit(IPATH_PORT_WAITING_URG, 1106 &pd->port_flag)) { 1107 pd->port_urgent++; 1108 wake_up_interruptible(&pd->port_wait); 1109 } 1110 } 1111 } 1112 if (rcvdint) { 1113 /* only want to take one interrupt, so turn off the rcv 1114 * interrupt for all the ports that we set the rcv_waiting 1115 * (but never for kernel port) 1116 */ 1117 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1118 dd->ipath_rcvctrl); 1119 } 1120} 1121 1122irqreturn_t ipath_intr(int irq, void *data) 1123{ 1124 struct ipath_devdata *dd = data; 1125 u64 istat, chk0rcv = 0; 1126 ipath_err_t estat = 0; 1127 irqreturn_t ret; 1128 static unsigned unexpected = 0; 1129 u64 kportrbits; 1130 1131 ipath_stats.sps_ints++; 1132 1133 if (dd->ipath_int_counter != (u32) -1) 1134 dd->ipath_int_counter++; 1135 1136 if (!(dd->ipath_flags & IPATH_PRESENT)) { 1137 /* 1138 * This return value is not great, but we do not want the 1139 * interrupt core code to remove our interrupt handler 1140 * because we don't appear to be handling an interrupt 1141 * during a chip reset. 1142 */ 1143 return IRQ_HANDLED; 1144 } 1145 1146 /* 1147 * this needs to be flags&initted, not statusp, so we keep 1148 * taking interrupts even after link goes down, etc. 1149 * Also, we *must* clear the interrupt at some point, or we won't 1150 * take it again, which can be real bad for errors, etc... 1151 */ 1152 1153 if (!(dd->ipath_flags & IPATH_INITTED)) { 1154 ipath_bad_intr(dd, &unexpected); 1155 ret = IRQ_NONE; 1156 goto bail; 1157 } 1158 1159 istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus); 1160 1161 if (unlikely(!istat)) { 1162 ipath_stats.sps_nullintr++; 1163 ret = IRQ_NONE; /* not our interrupt, or already handled */ 1164 goto bail; 1165 } 1166 if (unlikely(istat == -1)) { 1167 ipath_bad_regread(dd); 1168 /* don't know if it was our interrupt or not */ 1169 ret = IRQ_NONE; 1170 goto bail; 1171 } 1172 1173 if (unexpected) 1174 unexpected = 0; 1175 1176 if (unlikely(istat & ~dd->ipath_i_bitsextant)) 1177 ipath_dev_err(dd, 1178 "interrupt with unknown interrupts %Lx set\n", 1179 istat & ~dd->ipath_i_bitsextant); 1180 else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */ 1181 ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat); 1182 1183 if (istat & INFINIPATH_I_ERROR) { 1184 ipath_stats.sps_errints++; 1185 estat = ipath_read_kreg64(dd, 1186 dd->ipath_kregs->kr_errorstatus); 1187 if (!estat) 1188 dev_info(&dd->pcidev->dev, "error interrupt (%Lx), " 1189 "but no error bits set!\n", istat); 1190 else if (estat == -1LL) 1191 /* 1192 * should we try clearing all, or hope next read 1193 * works? 1194 */ 1195 ipath_dev_err(dd, "Read of error status failed " 1196 "(all bits set); ignoring\n"); 1197 else 1198 chk0rcv |= handle_errors(dd, estat); 1199 } 1200 1201 if (istat & INFINIPATH_I_GPIO) { 1202 /* 1203 * GPIO interrupts fall in two broad classes: 1204 * GPIO_2 indicates (on some HT4xx boards) that a packet 1205 * has arrived for Port 0. Checking for this 1206 * is controlled by flag IPATH_GPIO_INTR. 1207 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate 1208 * errors that we need to count. Checking for this 1209 * is controlled by flag IPATH_GPIO_ERRINTRS. 1210 */ 1211 u32 gpiostatus; 1212 u32 to_clear = 0; 1213 1214 gpiostatus = ipath_read_kreg32( 1215 dd, dd->ipath_kregs->kr_gpio_status); 1216 /* First the error-counter case. */ 1217 if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) && 1218 (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) { 1219 /* want to clear the bits we see asserted. */ 1220 to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK); 1221 1222 /* 1223 * Count appropriately, clear bits out of our copy, 1224 * as they have been "handled". 1225 */ 1226 if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) { 1227 ipath_dbg("FlowCtl on UnsupVL\n"); 1228 dd->ipath_rxfc_unsupvl_errs++; 1229 } 1230 if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) { 1231 ipath_dbg("Overrun Threshold exceeded\n"); 1232 dd->ipath_overrun_thresh_errs++; 1233 } 1234 if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) { 1235 ipath_dbg("Local Link Integrity error\n"); 1236 dd->ipath_lli_errs++; 1237 } 1238 gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK; 1239 } 1240 /* Now the Port0 Receive case */ 1241 if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) && 1242 (dd->ipath_flags & IPATH_GPIO_INTR)) { 1243 /* 1244 * GPIO status bit 2 is set, and we expected it. 1245 * clear it and indicate in p0bits. 1246 * This probably only happens if a Port0 pkt 1247 * arrives at _just_ the wrong time, and we 1248 * handle that by seting chk0rcv; 1249 */ 1250 to_clear |= (1 << IPATH_GPIO_PORT0_BIT); 1251 gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); 1252 chk0rcv = 1; 1253 } 1254 if (gpiostatus) { 1255 /* 1256 * Some unexpected bits remain. If they could have 1257 * caused the interrupt, complain and clear. 1258 * To avoid repetition of this condition, also clear 1259 * the mask. It is almost certainly due to error. 1260 */ 1261 const u32 mask = (u32) dd->ipath_gpio_mask; 1262 1263 if (mask & gpiostatus) { 1264 ipath_dbg("Unexpected GPIO IRQ bits %x\n", 1265 gpiostatus & mask); 1266 to_clear |= (gpiostatus & mask); 1267 dd->ipath_gpio_mask &= ~(gpiostatus & mask); 1268 ipath_write_kreg(dd, 1269 dd->ipath_kregs->kr_gpio_mask, 1270 dd->ipath_gpio_mask); 1271 } 1272 } 1273 if (to_clear) { 1274 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear, 1275 (u64) to_clear); 1276 } 1277 } 1278 1279 /* 1280 * Clear the interrupt bits we found set, unless they are receive 1281 * related, in which case we already cleared them above, and don't 1282 * want to clear them again, because we might lose an interrupt. 1283 * Clear it early, so we "know" know the chip will have seen this by 1284 * the time we process the queue, and will re-interrupt if necessary. 1285 * The processor itself won't take the interrupt again until we return. 1286 */ 1287 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat); 1288 1289 /* 1290 * Handle kernel receive queues before checking for pio buffers 1291 * available since receives can overflow; piobuf waiters can afford 1292 * a few extra cycles, since they were waiting anyway, and user's 1293 * waiting for receive are at the bottom. 1294 */ 1295 kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) | 1296 (1ULL << dd->ipath_i_rcvurg_shift); 1297 if (chk0rcv || (istat & kportrbits)) { 1298 istat &= ~kportrbits; 1299 ipath_kreceive(dd->ipath_pd[0]); 1300 } 1301 1302 if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) | 1303 (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift))) 1304 handle_urcv(dd, istat); 1305 1306 if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED)) 1307 handle_sdma_intr(dd, istat); 1308 1309 if (istat & INFINIPATH_I_SPIOBUFAVAIL) { 1310 unsigned long flags; 1311 1312 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1313 dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL; 1314 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1315 dd->ipath_sendctrl); 1316 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1317 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1318 1319 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1320 handle_layer_pioavail(dd); 1321 else 1322 ipath_dbg("unexpected BUFAVAIL intr\n"); 1323 } 1324 1325 ret = IRQ_HANDLED; 1326 1327bail: 1328 return ret; 1329} 1330