ipath_intr.c revision d43c36dc6b357fa1806800f18aa30123c747a6d1
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/pci.h>
35#include <linux/delay.h>
36#include <linux/sched.h>
37
38#include "ipath_kernel.h"
39#include "ipath_verbs.h"
40#include "ipath_common.h"
41
42
43/*
44 * Called when we might have an error that is specific to a particular
45 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
46 */
47void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
48{
49	u32 piobcnt;
50	unsigned long sbuf[4];
51	/*
52	 * it's possible that sendbuffererror could have bits set; might
53	 * have already done this as a result of hardware error handling
54	 */
55	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
56	/* read these before writing errorclear */
57	sbuf[0] = ipath_read_kreg64(
58		dd, dd->ipath_kregs->kr_sendbuffererror);
59	sbuf[1] = ipath_read_kreg64(
60		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
61	if (piobcnt > 128)
62		sbuf[2] = ipath_read_kreg64(
63			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
64	if (piobcnt > 192)
65		sbuf[3] = ipath_read_kreg64(
66			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
67	else
68		sbuf[3] = 0;
69
70	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
71		int i;
72		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
73			dd->ipath_lastcancel > jiffies) {
74			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
75					  "SendbufErrs %lx %lx", sbuf[0],
76					  sbuf[1]);
77			if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
78				printk(" %lx %lx ", sbuf[2], sbuf[3]);
79			printk("\n");
80		}
81
82		for (i = 0; i < piobcnt; i++)
83			if (test_bit(i, sbuf))
84				ipath_disarm_piobufs(dd, i, 1);
85		/* ignore armlaunch errs for a bit */
86		dd->ipath_lastcancel = jiffies+3;
87	}
88}
89
90
91/* These are all rcv-related errors which we want to count for stats */
92#define E_SUM_PKTERRS \
93	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
94	 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
95	 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
96	 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
97	 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
98	 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
99
100/* These are all send-related errors which we want to count for stats */
101#define E_SUM_ERRS \
102	(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
103	 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
104	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
105	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
106	 INFINIPATH_E_INVALIDADDR)
107
108/*
109 * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
110 * errors not related to freeze and cancelling buffers.  Can't ignore
111 * armlaunch because could get more while still cleaning up, and need
112 * to cancel those as they happen.
113 */
114#define E_SPKT_ERRS_IGNORE \
115	 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
116	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
117	 INFINIPATH_E_SPKTLEN)
118
119/*
120 * these are errors that can occur when the link changes state while
121 * a packet is being sent or received.  This doesn't cover things
122 * like EBP or VCRC that can be the result of a sending having the
123 * link change state, so we receive a "known bad" packet.
124 */
125#define E_SUM_LINK_PKTERRS \
126	(INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
127	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
128	 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
129	 INFINIPATH_E_RUNEXPCHAR)
130
131static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
132{
133	u64 ignore_this_time = 0;
134
135	ipath_disarm_senderrbufs(dd);
136	if ((errs & E_SUM_LINK_PKTERRS) &&
137	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
138		/*
139		 * This can happen when SMA is trying to bring the link
140		 * up, but the IB link changes state at the "wrong" time.
141		 * The IB logic then complains that the packet isn't
142		 * valid.  We don't want to confuse people, so we just
143		 * don't print them, except at debug
144		 */
145		ipath_dbg("Ignoring packet errors %llx, because link not "
146			  "ACTIVE\n", (unsigned long long) errs);
147		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
148	}
149
150	return ignore_this_time;
151}
152
153/* generic hw error messages... */
154#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
155	{ \
156		.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
157			  INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
158		.msg = "TXE " #a " Memory Parity"	     \
159	}
160#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
161	{ \
162		.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
163			  INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
164		.msg = "RXE " #a " Memory Parity"	     \
165	}
166
167static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
168	INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
169	INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
170
171	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
172	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
173	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
174
175	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
176	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
177	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
178	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
179	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
180	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
181	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
182};
183
184/**
185 * ipath_format_hwmsg - format a single hwerror message
186 * @msg message buffer
187 * @msgl length of message buffer
188 * @hwmsg message to add to message buffer
189 */
190static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
191{
192	strlcat(msg, "[", msgl);
193	strlcat(msg, hwmsg, msgl);
194	strlcat(msg, "]", msgl);
195}
196
197/**
198 * ipath_format_hwerrors - format hardware error messages for display
199 * @hwerrs hardware errors bit vector
200 * @hwerrmsgs hardware error descriptions
201 * @nhwerrmsgs number of hwerrmsgs
202 * @msg message buffer
203 * @msgl message buffer length
204 */
205void ipath_format_hwerrors(u64 hwerrs,
206			   const struct ipath_hwerror_msgs *hwerrmsgs,
207			   size_t nhwerrmsgs,
208			   char *msg, size_t msgl)
209{
210	int i;
211	const int glen =
212	    sizeof(ipath_generic_hwerror_msgs) /
213	    sizeof(ipath_generic_hwerror_msgs[0]);
214
215	for (i=0; i<glen; i++) {
216		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
217			ipath_format_hwmsg(msg, msgl,
218					   ipath_generic_hwerror_msgs[i].msg);
219		}
220	}
221
222	for (i=0; i<nhwerrmsgs; i++) {
223		if (hwerrs & hwerrmsgs[i].mask) {
224			ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
225		}
226	}
227}
228
229/* return the strings for the most common link states */
230static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
231{
232	char *ret;
233	u32 state;
234
235	state = ipath_ib_state(dd, ibcs);
236	if (state == dd->ib_init)
237		ret = "Init";
238	else if (state == dd->ib_arm)
239		ret = "Arm";
240	else if (state == dd->ib_active)
241		ret = "Active";
242	else
243		ret = "Down";
244	return ret;
245}
246
247void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
248{
249	struct ib_event event;
250
251	event.device = &dd->verbs_dev->ibdev;
252	event.element.port_num = 1;
253	event.event = ev;
254	ib_dispatch_event(&event);
255}
256
257static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
258				     ipath_err_t errs)
259{
260	u32 ltstate, lstate, ibstate, lastlstate;
261	u32 init = dd->ib_init;
262	u32 arm = dd->ib_arm;
263	u32 active = dd->ib_active;
264	const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
265
266	lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
267	ibstate = ipath_ib_state(dd, ibcs);
268	/* linkstate at last interrupt */
269	lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
270	ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
271
272	/*
273	 * Since going into a recovery state causes the link state to go
274	 * down and since recovery is transitory, it is better if we "miss"
275	 * ever seeing the link training state go into recovery (i.e.,
276	 * ignore this transition for link state special handling purposes)
277	 * without even updating ipath_lastibcstat.
278	 */
279	if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
280	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
281	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
282		goto done;
283
284	/*
285	 * if linkstate transitions into INIT from any of the various down
286	 * states, or if it transitions from any of the up (INIT or better)
287	 * states into any of the down states (except link recovery), then
288	 * call the chip-specific code to take appropriate actions.
289	 */
290	if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
291		lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
292		/* transitioned to UP */
293		if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
294			/* link came up, so we must no longer be disabled */
295			dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
296			ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
297			goto skip_ibchange; /* chip-code handled */
298		}
299	} else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
300		(dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
301		ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
302		ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
303		int handled;
304		handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
305		dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
306		if (handled) {
307			ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
308			goto skip_ibchange; /* chip-code handled */
309		}
310	}
311
312	/*
313	 * Significant enough to always print and get into logs, if it was
314	 * unexpected.  If it was a requested state change, we'll have
315	 * already cleared the flags, so we won't print this warning
316	 */
317	if ((ibstate != arm && ibstate != active) &&
318	    (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
319		dev_info(&dd->pcidev->dev, "Link state changed from %s "
320			 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
321			 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
322	}
323
324	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
325	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
326		u32 lastlts;
327		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
328		/*
329		 * Ignore cycling back and forth from Polling.Active to
330		 * Polling.Quiet while waiting for the other end of the link
331		 * to come up, except to try and decide if we are connected
332		 * to a live IB device or not.  We will cycle back and
333		 * forth between them if no cable is plugged in, the other
334		 * device is powered off or disabled, etc.
335		 */
336		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
337		    lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
338			if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
339			     (++dd->ipath_ibpollcnt == 40)) {
340				dd->ipath_flags |= IPATH_NOCABLE;
341				*dd->ipath_statusp |=
342					IPATH_STATUS_IB_NOCABLE;
343				ipath_cdbg(LINKVERB, "Set NOCABLE\n");
344			}
345			ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
346				ipath_ibcstatus_str[ltstate], ibstate);
347			goto skip_ibchange;
348		}
349	}
350
351	dd->ipath_ibpollcnt = 0; /* not poll*, now */
352	ipath_stats.sps_iblink++;
353
354	if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
355		u64 linkrecov;
356		linkrecov = ipath_snap_cntr(dd,
357			dd->ipath_cregs->cr_iblinkerrrecovcnt);
358		if (linkrecov != dd->ipath_lastlinkrecov) {
359			ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
360				(unsigned long long) ibcs,
361				ib_linkstate(dd, ibcs),
362				ipath_ibcstatus_str[ltstate],
363				(unsigned long long) linkrecov);
364			/* and no more until active again */
365			dd->ipath_lastlinkrecov = 0;
366			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
367			goto skip_ibchange;
368		}
369	}
370
371	if (ibstate == init || ibstate == arm || ibstate == active) {
372		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
373		if (ibstate == init || ibstate == arm) {
374			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
375			if (dd->ipath_flags & IPATH_LINKACTIVE)
376				signal_ib_event(dd, IB_EVENT_PORT_ERR);
377		}
378		if (ibstate == arm) {
379			dd->ipath_flags |= IPATH_LINKARMED;
380			dd->ipath_flags &= ~(IPATH_LINKUNK |
381				IPATH_LINKINIT | IPATH_LINKDOWN |
382				IPATH_LINKACTIVE | IPATH_NOCABLE);
383			ipath_hol_down(dd);
384		} else  if (ibstate == init) {
385			/*
386			 * set INIT and DOWN.  Down is checked by
387			 * most of the other code, but INIT is
388			 * useful to know in a few places.
389			 */
390			dd->ipath_flags |= IPATH_LINKINIT |
391				IPATH_LINKDOWN;
392			dd->ipath_flags &= ~(IPATH_LINKUNK |
393				IPATH_LINKARMED | IPATH_LINKACTIVE |
394				IPATH_NOCABLE);
395			ipath_hol_down(dd);
396		} else {  /* active */
397			dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
398				dd->ipath_cregs->cr_iblinkerrrecovcnt);
399			*dd->ipath_statusp |=
400				IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
401			dd->ipath_flags |= IPATH_LINKACTIVE;
402			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
403				| IPATH_LINKDOWN | IPATH_LINKARMED |
404				IPATH_NOCABLE);
405			if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
406				ipath_restart_sdma(dd);
407			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
408			/* LED active not handled in chip _f_updown */
409			dd->ipath_f_setextled(dd, lstate, ltstate);
410			ipath_hol_up(dd);
411		}
412
413		/*
414		 * print after we've already done the work, so as not to
415		 * delay the state changes and notifications, for debugging
416		 */
417		if (lstate == lastlstate)
418			ipath_cdbg(LINKVERB, "Unchanged from last: %s "
419				"(%x)\n", ib_linkstate(dd, ibcs), ibstate);
420		else
421			ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
422				  dd->ipath_unit, ib_linkstate(dd, ibcs),
423				  ipath_ibcstatus_str[ltstate],  ibstate);
424	} else { /* down */
425		if (dd->ipath_flags & IPATH_LINKACTIVE)
426			signal_ib_event(dd, IB_EVENT_PORT_ERR);
427		dd->ipath_flags |= IPATH_LINKDOWN;
428		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
429				     | IPATH_LINKACTIVE |
430				     IPATH_LINKARMED);
431		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
432		dd->ipath_lli_counter = 0;
433
434		if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
435			ipath_cdbg(VERBOSE, "Unit %u link state down "
436				   "(state 0x%x), from %s\n",
437				   dd->ipath_unit, lstate,
438				   ib_linkstate(dd, dd->ipath_lastibcstat));
439		else
440			ipath_cdbg(LINKVERB, "Unit %u link state changed "
441				   "to %s (0x%x) from down (%x)\n",
442				   dd->ipath_unit,
443				   ipath_ibcstatus_str[ltstate],
444				   ibstate, lastlstate);
445	}
446
447skip_ibchange:
448	dd->ipath_lastibcstat = ibcs;
449done:
450	return;
451}
452
453static void handle_supp_msgs(struct ipath_devdata *dd,
454			     unsigned supp_msgs, char *msg, u32 msgsz)
455{
456	/*
457	 * Print the message unless it's ibc status change only, which
458	 * happens so often we never want to count it.
459	 */
460	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
461		int iserr;
462		ipath_err_t mask;
463		iserr = ipath_decode_err(dd, msg, msgsz,
464					 dd->ipath_lasterror &
465					 ~INFINIPATH_E_IBSTATUSCHANGED);
466
467		mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
468			INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
469
470		/* if we're in debug, then don't mask SDMADISABLED msgs */
471		if (ipath_debug & __IPATH_DBG)
472			mask &= ~INFINIPATH_E_SDMADISABLED;
473
474		if (dd->ipath_lasterror & ~mask)
475			ipath_dev_err(dd, "Suppressed %u messages for "
476				      "fast-repeating errors (%s) (%llx)\n",
477				      supp_msgs, msg,
478				      (unsigned long long)
479				      dd->ipath_lasterror);
480		else {
481			/*
482			 * rcvegrfull and rcvhdrqfull are "normal", for some
483			 * types of processes (mostly benchmarks) that send
484			 * huge numbers of messages, while not processing
485			 * them. So only complain about these at debug
486			 * level.
487			 */
488			if (iserr)
489				ipath_dbg("Suppressed %u messages for %s\n",
490					  supp_msgs, msg);
491			else
492				ipath_cdbg(ERRPKT,
493					"Suppressed %u messages for %s\n",
494					  supp_msgs, msg);
495		}
496	}
497}
498
499static unsigned handle_frequent_errors(struct ipath_devdata *dd,
500				       ipath_err_t errs, char *msg,
501				       u32 msgsz, int *noprint)
502{
503	unsigned long nc;
504	static unsigned long nextmsg_time;
505	static unsigned nmsgs, supp_msgs;
506
507	/*
508	 * Throttle back "fast" messages to no more than 10 per 5 seconds.
509	 * This isn't perfect, but it's a reasonable heuristic. If we get
510	 * more than 10, give a 6x longer delay.
511	 */
512	nc = jiffies;
513	if (nmsgs > 10) {
514		if (time_before(nc, nextmsg_time)) {
515			*noprint = 1;
516			if (!supp_msgs++)
517				nextmsg_time = nc + HZ * 3;
518		}
519		else if (supp_msgs) {
520			handle_supp_msgs(dd, supp_msgs, msg, msgsz);
521			supp_msgs = 0;
522			nmsgs = 0;
523		}
524	}
525	else if (!nmsgs++ || time_after(nc, nextmsg_time))
526		nextmsg_time = nc + HZ / 2;
527
528	return supp_msgs;
529}
530
531static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
532{
533	unsigned long flags;
534	int expected;
535
536	if (ipath_debug & __IPATH_DBG) {
537		char msg[128];
538		ipath_decode_err(dd, msg, sizeof msg, errs &
539			INFINIPATH_E_SDMAERRS);
540		ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
541	}
542	if (ipath_debug & __IPATH_VERBDBG) {
543		unsigned long tl, hd, status, lengen;
544		tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
545		hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
546		status = ipath_read_kreg64(dd
547			, dd->ipath_kregs->kr_senddmastatus);
548		lengen = ipath_read_kreg64(dd,
549			dd->ipath_kregs->kr_senddmalengen);
550		ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
551			"lengen 0x%lx\n", tl, hd, status, lengen);
552	}
553
554	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
555	__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
556	expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
557	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
558	if (!expected)
559		ipath_cancel_sends(dd, 1);
560}
561
562static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
563{
564	unsigned long flags;
565	int expected;
566
567	if ((istat & INFINIPATH_I_SDMAINT) &&
568	    !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
569		ipath_sdma_intr(dd);
570
571	if (istat & INFINIPATH_I_SDMADISABLED) {
572		expected = test_bit(IPATH_SDMA_ABORTING,
573			&dd->ipath_sdma_status);
574		ipath_dbg("%s SDmaDisabled intr\n",
575			expected ? "expected" : "unexpected");
576		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
577		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
578		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
579		if (!expected)
580			ipath_cancel_sends(dd, 1);
581		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
582			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
583	}
584}
585
586static int handle_hdrq_full(struct ipath_devdata *dd)
587{
588	int chkerrpkts = 0;
589	u32 hd, tl;
590	u32 i;
591
592	ipath_stats.sps_hdrqfull++;
593	for (i = 0; i < dd->ipath_cfgports; i++) {
594		struct ipath_portdata *pd = dd->ipath_pd[i];
595
596		if (i == 0) {
597			/*
598			 * For kernel receive queues, we just want to know
599			 * if there are packets in the queue that we can
600			 * process.
601			 */
602			if (pd->port_head != ipath_get_hdrqtail(pd))
603				chkerrpkts |= 1 << i;
604			continue;
605		}
606
607		/* Skip if user context is not open */
608		if (!pd || !pd->port_cnt)
609			continue;
610
611		/* Don't report the same point multiple times. */
612		if (dd->ipath_flags & IPATH_NODMA_RTAIL)
613			tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
614		else
615			tl = ipath_get_rcvhdrtail(pd);
616		if (tl == pd->port_lastrcvhdrqtail)
617			continue;
618
619		hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
620		if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
621			pd->port_lastrcvhdrqtail = tl;
622			pd->port_hdrqfull++;
623			/* flush hdrqfull so that poll() sees it */
624			wmb();
625			wake_up_interruptible(&pd->port_wait);
626		}
627	}
628
629	return chkerrpkts;
630}
631
632static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
633{
634	char msg[128];
635	u64 ignore_this_time = 0;
636	u64 iserr = 0;
637	int chkerrpkts = 0, noprint = 0;
638	unsigned supp_msgs;
639	int log_idx;
640
641	/*
642	 * don't report errors that are masked, either at init
643	 * (not set in ipath_errormask), or temporarily (set in
644	 * ipath_maskederrs)
645	 */
646	errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
647
648	supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
649		&noprint);
650
651	/* do these first, they are most important */
652	if (errs & INFINIPATH_E_HARDWARE) {
653		/* reuse same msg buf */
654		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
655	} else {
656		u64 mask;
657		for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
658			mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
659			if (errs & mask)
660				ipath_inc_eeprom_err(dd, log_idx, 1);
661		}
662	}
663
664	if (errs & INFINIPATH_E_SDMAERRS)
665		handle_sdma_errors(dd, errs);
666
667	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
668		ipath_dev_err(dd, "error interrupt with unknown errors "
669			      "%llx set\n", (unsigned long long)
670			      (errs & ~dd->ipath_e_bitsextant));
671
672	if (errs & E_SUM_ERRS)
673		ignore_this_time = handle_e_sum_errs(dd, errs);
674	else if ((errs & E_SUM_LINK_PKTERRS) &&
675	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
676		/*
677		 * This can happen when SMA is trying to bring the link
678		 * up, but the IB link changes state at the "wrong" time.
679		 * The IB logic then complains that the packet isn't
680		 * valid.  We don't want to confuse people, so we just
681		 * don't print them, except at debug
682		 */
683		ipath_dbg("Ignoring packet errors %llx, because link not "
684			  "ACTIVE\n", (unsigned long long) errs);
685		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
686	}
687
688	if (supp_msgs == 250000) {
689		int s_iserr;
690		/*
691		 * It's not entirely reasonable assuming that the errors set
692		 * in the last clear period are all responsible for the
693		 * problem, but the alternative is to assume it's the only
694		 * ones on this particular interrupt, which also isn't great
695		 */
696		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
697
698		dd->ipath_errormask &= ~dd->ipath_maskederrs;
699		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
700				 dd->ipath_errormask);
701		s_iserr = ipath_decode_err(dd, msg, sizeof msg,
702					   dd->ipath_maskederrs);
703
704		if (dd->ipath_maskederrs &
705		    ~(INFINIPATH_E_RRCVEGRFULL |
706		      INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
707			ipath_dev_err(dd, "Temporarily disabling "
708			    "error(s) %llx reporting; too frequent (%s)\n",
709				(unsigned long long) dd->ipath_maskederrs,
710				msg);
711		else {
712			/*
713			 * rcvegrfull and rcvhdrqfull are "normal",
714			 * for some types of processes (mostly benchmarks)
715			 * that send huge numbers of messages, while not
716			 * processing them.  So only complain about
717			 * these at debug level.
718			 */
719			if (s_iserr)
720				ipath_dbg("Temporarily disabling reporting "
721				    "too frequent queue full errors (%s)\n",
722				    msg);
723			else
724				ipath_cdbg(ERRPKT,
725				    "Temporarily disabling reporting too"
726				    " frequent packet errors (%s)\n",
727				    msg);
728		}
729
730		/*
731		 * Re-enable the masked errors after around 3 minutes.  in
732		 * ipath_get_faststats().  If we have a series of fast
733		 * repeating but different errors, the interval will keep
734		 * stretching out, but that's OK, as that's pretty
735		 * catastrophic.
736		 */
737		dd->ipath_unmasktime = jiffies + HZ * 180;
738	}
739
740	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
741	if (ignore_this_time)
742		errs &= ~ignore_this_time;
743	if (errs & ~dd->ipath_lasterror) {
744		errs &= ~dd->ipath_lasterror;
745		/* never suppress duplicate hwerrors or ibstatuschange */
746		dd->ipath_lasterror |= errs &
747			~(INFINIPATH_E_HARDWARE |
748			  INFINIPATH_E_IBSTATUSCHANGED);
749	}
750
751	if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
752		dd->ipath_spectriggerhit++;
753		ipath_dbg("%lu special trigger hits\n",
754			dd->ipath_spectriggerhit);
755	}
756
757	/* likely due to cancel; so suppress message unless verbose */
758	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
759		dd->ipath_lastcancel > jiffies) {
760		/* armlaunch takes precedence; it often causes both. */
761		ipath_cdbg(VERBOSE,
762			"Suppressed %s error (%llx) after sendbuf cancel\n",
763			(errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
764			"armlaunch" : "sendpktlen", (unsigned long long)errs);
765		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
766	}
767
768	if (!errs)
769		return 0;
770
771	if (!noprint) {
772		ipath_err_t mask;
773		/*
774		 * The ones we mask off are handled specially below
775		 * or above.  Also mask SDMADISABLED by default as it
776		 * is too chatty.
777		 */
778		mask = INFINIPATH_E_IBSTATUSCHANGED |
779			INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
780			INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
781
782		/* if we're in debug, then don't mask SDMADISABLED msgs */
783		if (ipath_debug & __IPATH_DBG)
784			mask &= ~INFINIPATH_E_SDMADISABLED;
785
786		ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
787	} else
788		/* so we don't need if (!noprint) at strlcat's below */
789		*msg = 0;
790
791	if (errs & E_SUM_PKTERRS) {
792		ipath_stats.sps_pkterrs++;
793		chkerrpkts = 1;
794	}
795	if (errs & E_SUM_ERRS)
796		ipath_stats.sps_errs++;
797
798	if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
799		ipath_stats.sps_crcerrs++;
800		chkerrpkts = 1;
801	}
802	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
803
804
805	/*
806	 * We don't want to print these two as they happen, or we can make
807	 * the situation even worse, because it takes so long to print
808	 * messages to serial consoles.  Kernel ports get printed from
809	 * fast_stats, no more than every 5 seconds, user ports get printed
810	 * on close
811	 */
812	if (errs & INFINIPATH_E_RRCVHDRFULL)
813		chkerrpkts |= handle_hdrq_full(dd);
814	if (errs & INFINIPATH_E_RRCVEGRFULL) {
815		struct ipath_portdata *pd = dd->ipath_pd[0];
816
817		/*
818		 * since this is of less importance and not likely to
819		 * happen without also getting hdrfull, only count
820		 * occurrences; don't check each port (or even the kernel
821		 * vs user)
822		 */
823		ipath_stats.sps_etidfull++;
824		if (pd->port_head != ipath_get_hdrqtail(pd))
825			chkerrpkts |= 1;
826	}
827
828	/*
829	 * do this before IBSTATUSCHANGED, in case both bits set in a single
830	 * interrupt; we want the STATUSCHANGE to "win", so we do our
831	 * internal copy of state machine correctly
832	 */
833	if (errs & INFINIPATH_E_RIBLOSTLINK) {
834		/*
835		 * force through block below
836		 */
837		errs |= INFINIPATH_E_IBSTATUSCHANGED;
838		ipath_stats.sps_iblink++;
839		dd->ipath_flags |= IPATH_LINKDOWN;
840		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
841				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
842		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
843
844		ipath_dbg("Lost link, link now down (%s)\n",
845			ipath_ibcstatus_str[ipath_read_kreg64(dd,
846			dd->ipath_kregs->kr_ibcstatus) & 0xf]);
847	}
848	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
849		handle_e_ibstatuschanged(dd, errs);
850
851	if (errs & INFINIPATH_E_RESET) {
852		if (!noprint)
853			ipath_dev_err(dd, "Got reset, requires re-init "
854				      "(unload and reload driver)\n");
855		dd->ipath_flags &= ~IPATH_INITTED;	/* needs re-init */
856		/* mark as having had error */
857		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
858		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
859	}
860
861	if (!noprint && *msg) {
862		if (iserr)
863			ipath_dev_err(dd, "%s error\n", msg);
864	}
865	if (dd->ipath_state_wanted & dd->ipath_flags) {
866		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
867			   "waking\n", dd->ipath_state_wanted,
868			   dd->ipath_flags);
869		wake_up_interruptible(&ipath_state_wait);
870	}
871
872	return chkerrpkts;
873}
874
875/*
876 * try to cleanup as much as possible for anything that might have gone
877 * wrong while in freeze mode, such as pio buffers being written by user
878 * processes (causing armlaunch), send errors due to going into freeze mode,
879 * etc., and try to avoid causing extra interrupts while doing so.
880 * Forcibly update the in-memory pioavail register copies after cleanup
881 * because the chip won't do it while in freeze mode (the register values
882 * themselves are kept correct).
883 * Make sure that we don't lose any important interrupts by using the chip
884 * feature that says that writing 0 to a bit in *clear that is set in
885 * *status will cause an interrupt to be generated again (if allowed by
886 * the *mask value).
887 */
888void ipath_clear_freeze(struct ipath_devdata *dd)
889{
890	/* disable error interrupts, to avoid confusion */
891	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
892
893	/* also disable interrupts; errormask is sometimes overwriten */
894	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
895
896	ipath_cancel_sends(dd, 1);
897
898	/* clear the freeze, and be sure chip saw it */
899	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
900			 dd->ipath_control);
901	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
902
903	/* force in-memory update now we are out of freeze */
904	ipath_force_pio_avail_update(dd);
905
906	/*
907	 * force new interrupt if any hwerr, error or interrupt bits are
908	 * still set, and clear "safe" send packet errors related to freeze
909	 * and cancelling sends.  Re-enable error interrupts before possible
910	 * force of re-interrupt on pending interrupts.
911	 */
912	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
913	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
914		E_SPKT_ERRS_IGNORE);
915	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
916		dd->ipath_errormask);
917	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
918	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
919}
920
921
922/* this is separate to allow for better optimization of ipath_intr() */
923
924static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
925{
926	/*
927	 * sometimes happen during driver init and unload, don't want
928	 * to process any interrupts at that point
929	 */
930
931	/* this is just a bandaid, not a fix, if something goes badly
932	 * wrong */
933	if (++*unexpectp > 100) {
934		if (++*unexpectp > 105) {
935			/*
936			 * ok, we must be taking somebody else's interrupts,
937			 * due to a messed up mptable and/or PIRQ table, so
938			 * unregister the interrupt.  We've seen this during
939			 * linuxbios development work, and it may happen in
940			 * the future again.
941			 */
942			if (dd->pcidev && dd->ipath_irq) {
943				ipath_dev_err(dd, "Now %u unexpected "
944					      "interrupts, unregistering "
945					      "interrupt handler\n",
946					      *unexpectp);
947				ipath_dbg("free_irq of irq %d\n",
948					  dd->ipath_irq);
949				dd->ipath_f_free_irq(dd);
950			}
951		}
952		if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
953			ipath_dev_err(dd, "%u unexpected interrupts, "
954				      "disabling interrupts completely\n",
955				      *unexpectp);
956			/*
957			 * disable all interrupts, something is very wrong
958			 */
959			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
960					 0ULL);
961		}
962	} else if (*unexpectp > 1)
963		ipath_dbg("Interrupt when not ready, should not happen, "
964			  "ignoring\n");
965}
966
967static noinline void ipath_bad_regread(struct ipath_devdata *dd)
968{
969	static int allbits;
970
971	/* separate routine, for better optimization of ipath_intr() */
972
973	/*
974	 * We print the message and disable interrupts, in hope of
975	 * having a better chance of debugging the problem.
976	 */
977	ipath_dev_err(dd,
978		      "Read of interrupt status failed (all bits set)\n");
979	if (allbits++) {
980		/* disable all interrupts, something is very wrong */
981		ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
982		if (allbits == 2) {
983			ipath_dev_err(dd, "Still bad interrupt status, "
984				      "unregistering interrupt\n");
985			dd->ipath_f_free_irq(dd);
986		} else if (allbits > 2) {
987			if ((allbits % 10000) == 0)
988				printk(".");
989		} else
990			ipath_dev_err(dd, "Disabling interrupts, "
991				      "multiple errors\n");
992	}
993}
994
995static void handle_layer_pioavail(struct ipath_devdata *dd)
996{
997	unsigned long flags;
998	int ret;
999
1000	ret = ipath_ib_piobufavail(dd->verbs_dev);
1001	if (ret > 0)
1002		goto set;
1003
1004	return;
1005set:
1006	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1007	dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
1008	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1009			 dd->ipath_sendctrl);
1010	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1011	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1012}
1013
1014/*
1015 * Handle receive interrupts for user ports; this means a user
1016 * process was waiting for a packet to arrive, and didn't want
1017 * to poll
1018 */
1019static void handle_urcv(struct ipath_devdata *dd, u64 istat)
1020{
1021	u64 portr;
1022	int i;
1023	int rcvdint = 0;
1024
1025	/*
1026	 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
1027	 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
1028	 * would both like timely updates of the bits so that
1029	 * we don't pass them by unnecessarily.  the rmb()
1030	 * here ensures that we see them promptly -- the
1031	 * corresponding wmb()'s are in ipath_poll_urgent()
1032	 * and ipath_poll_next()...
1033	 */
1034	rmb();
1035	portr = ((istat >> dd->ipath_i_rcvavail_shift) &
1036		 dd->ipath_i_rcvavail_mask) |
1037		((istat >> dd->ipath_i_rcvurg_shift) &
1038		 dd->ipath_i_rcvurg_mask);
1039	for (i = 1; i < dd->ipath_cfgports; i++) {
1040		struct ipath_portdata *pd = dd->ipath_pd[i];
1041
1042		if (portr & (1 << i) && pd && pd->port_cnt) {
1043			if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
1044					       &pd->port_flag)) {
1045				clear_bit(i + dd->ipath_r_intravail_shift,
1046					  &dd->ipath_rcvctrl);
1047				wake_up_interruptible(&pd->port_wait);
1048				rcvdint = 1;
1049			} else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
1050						      &pd->port_flag)) {
1051				pd->port_urgent++;
1052				wake_up_interruptible(&pd->port_wait);
1053			}
1054		}
1055	}
1056	if (rcvdint) {
1057		/* only want to take one interrupt, so turn off the rcv
1058		 * interrupt for all the ports that we set the rcv_waiting
1059		 * (but never for kernel port)
1060		 */
1061		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1062				 dd->ipath_rcvctrl);
1063	}
1064}
1065
1066irqreturn_t ipath_intr(int irq, void *data)
1067{
1068	struct ipath_devdata *dd = data;
1069	u64 istat, chk0rcv = 0;
1070	ipath_err_t estat = 0;
1071	irqreturn_t ret;
1072	static unsigned unexpected = 0;
1073	u64 kportrbits;
1074
1075	ipath_stats.sps_ints++;
1076
1077	if (dd->ipath_int_counter != (u32) -1)
1078		dd->ipath_int_counter++;
1079
1080	if (!(dd->ipath_flags & IPATH_PRESENT)) {
1081		/*
1082		 * This return value is not great, but we do not want the
1083		 * interrupt core code to remove our interrupt handler
1084		 * because we don't appear to be handling an interrupt
1085		 * during a chip reset.
1086		 */
1087		return IRQ_HANDLED;
1088	}
1089
1090	/*
1091	 * this needs to be flags&initted, not statusp, so we keep
1092	 * taking interrupts even after link goes down, etc.
1093	 * Also, we *must* clear the interrupt at some point, or we won't
1094	 * take it again, which can be real bad for errors, etc...
1095	 */
1096
1097	if (!(dd->ipath_flags & IPATH_INITTED)) {
1098		ipath_bad_intr(dd, &unexpected);
1099		ret = IRQ_NONE;
1100		goto bail;
1101	}
1102
1103	istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
1104
1105	if (unlikely(!istat)) {
1106		ipath_stats.sps_nullintr++;
1107		ret = IRQ_NONE; /* not our interrupt, or already handled */
1108		goto bail;
1109	}
1110	if (unlikely(istat == -1)) {
1111		ipath_bad_regread(dd);
1112		/* don't know if it was our interrupt or not */
1113		ret = IRQ_NONE;
1114		goto bail;
1115	}
1116
1117	if (unexpected)
1118		unexpected = 0;
1119
1120	if (unlikely(istat & ~dd->ipath_i_bitsextant))
1121		ipath_dev_err(dd,
1122			      "interrupt with unknown interrupts %Lx set\n",
1123			      (unsigned long long)
1124			      istat & ~dd->ipath_i_bitsextant);
1125	else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
1126		ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n",
1127			(unsigned long long) istat);
1128
1129	if (istat & INFINIPATH_I_ERROR) {
1130		ipath_stats.sps_errints++;
1131		estat = ipath_read_kreg64(dd,
1132					  dd->ipath_kregs->kr_errorstatus);
1133		if (!estat)
1134			dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
1135				 "but no error bits set!\n",
1136				 (unsigned long long) istat);
1137		else if (estat == -1LL)
1138			/*
1139			 * should we try clearing all, or hope next read
1140			 * works?
1141			 */
1142			ipath_dev_err(dd, "Read of error status failed "
1143				      "(all bits set); ignoring\n");
1144		else
1145			chk0rcv |= handle_errors(dd, estat);
1146	}
1147
1148	if (istat & INFINIPATH_I_GPIO) {
1149		/*
1150		 * GPIO interrupts fall in two broad classes:
1151		 * GPIO_2 indicates (on some HT4xx boards) that a packet
1152		 *        has arrived for Port 0. Checking for this
1153		 *        is controlled by flag IPATH_GPIO_INTR.
1154		 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
1155		 *        errors that we need to count. Checking for this
1156		 *        is controlled by flag IPATH_GPIO_ERRINTRS.
1157		 */
1158		u32 gpiostatus;
1159		u32 to_clear = 0;
1160
1161		gpiostatus = ipath_read_kreg32(
1162			dd, dd->ipath_kregs->kr_gpio_status);
1163		/* First the error-counter case. */
1164		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
1165		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
1166			/* want to clear the bits we see asserted. */
1167			to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
1168
1169			/*
1170			 * Count appropriately, clear bits out of our copy,
1171			 * as they have been "handled".
1172			 */
1173			if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
1174				ipath_dbg("FlowCtl on UnsupVL\n");
1175				dd->ipath_rxfc_unsupvl_errs++;
1176			}
1177			if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
1178				ipath_dbg("Overrun Threshold exceeded\n");
1179				dd->ipath_overrun_thresh_errs++;
1180			}
1181			if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
1182				ipath_dbg("Local Link Integrity error\n");
1183				dd->ipath_lli_errs++;
1184			}
1185			gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
1186		}
1187		/* Now the Port0 Receive case */
1188		if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
1189		    (dd->ipath_flags & IPATH_GPIO_INTR)) {
1190			/*
1191			 * GPIO status bit 2 is set, and we expected it.
1192			 * clear it and indicate in p0bits.
1193			 * This probably only happens if a Port0 pkt
1194			 * arrives at _just_ the wrong time, and we
1195			 * handle that by seting chk0rcv;
1196			 */
1197			to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
1198			gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
1199			chk0rcv = 1;
1200		}
1201		if (gpiostatus) {
1202			/*
1203			 * Some unexpected bits remain. If they could have
1204			 * caused the interrupt, complain and clear.
1205			 * To avoid repetition of this condition, also clear
1206			 * the mask. It is almost certainly due to error.
1207			 */
1208			const u32 mask = (u32) dd->ipath_gpio_mask;
1209
1210			if (mask & gpiostatus) {
1211				ipath_dbg("Unexpected GPIO IRQ bits %x\n",
1212				  gpiostatus & mask);
1213				to_clear |= (gpiostatus & mask);
1214				dd->ipath_gpio_mask &= ~(gpiostatus & mask);
1215				ipath_write_kreg(dd,
1216					dd->ipath_kregs->kr_gpio_mask,
1217					dd->ipath_gpio_mask);
1218			}
1219		}
1220		if (to_clear) {
1221			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
1222					(u64) to_clear);
1223		}
1224	}
1225
1226	/*
1227	 * Clear the interrupt bits we found set, unless they are receive
1228	 * related, in which case we already cleared them above, and don't
1229	 * want to clear them again, because we might lose an interrupt.
1230	 * Clear it early, so we "know" know the chip will have seen this by
1231	 * the time we process the queue, and will re-interrupt if necessary.
1232	 * The processor itself won't take the interrupt again until we return.
1233	 */
1234	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
1235
1236	/*
1237	 * Handle kernel receive queues before checking for pio buffers
1238	 * available since receives can overflow; piobuf waiters can afford
1239	 * a few extra cycles, since they were waiting anyway, and user's
1240	 * waiting for receive are at the bottom.
1241	 */
1242	kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
1243		(1ULL << dd->ipath_i_rcvurg_shift);
1244	if (chk0rcv || (istat & kportrbits)) {
1245		istat &= ~kportrbits;
1246		ipath_kreceive(dd->ipath_pd[0]);
1247	}
1248
1249	if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
1250		     (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
1251		handle_urcv(dd, istat);
1252
1253	if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
1254		handle_sdma_intr(dd, istat);
1255
1256	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
1257		unsigned long flags;
1258
1259		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1260		dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
1261		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1262				 dd->ipath_sendctrl);
1263		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1264		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1265
1266		/* always process; sdma verbs uses PIO for acks and VL15  */
1267		handle_layer_pioavail(dd);
1268	}
1269
1270	ret = IRQ_HANDLED;
1271
1272bail:
1273	return ret;
1274}
1275