ipath_intr.c revision b4d390d8d219452e5d4257c87134a6934d7fabeb
1/*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/pci.h>
35#include <linux/delay.h>
36
37#include "ipath_kernel.h"
38#include "ipath_verbs.h"
39#include "ipath_common.h"
40
41/*
42 * clear (write) a pio buffer, to clear a parity error.   This routine
43 * should only be called when in freeze mode, and the buffer should be
44 * canceled afterwards.
45 */
46static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
47{
48	u32 __iomem *pbuf;
49	u32 dwcnt; /* dword count to write */
50	if (pnum < dd->ipath_piobcnt2k) {
51		pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
52			dd->ipath_palign);
53		dwcnt = dd->ipath_piosize2k >> 2;
54	}
55	else {
56		pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
57			(pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
58		dwcnt = dd->ipath_piosize4k >> 2;
59	}
60	dev_info(&dd->pcidev->dev,
61		"Rewrite PIO buffer %u, to recover from parity error\n",
62		pnum);
63
64	/* no flush required, since already in freeze */
65	writel(dwcnt + 1, pbuf);
66	while (--dwcnt)
67		writel(0, pbuf++);
68}
69
70/*
71 * Called when we might have an error that is specific to a particular
72 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
73 * If rewrite is true, and bits are set in the sendbufferror registers,
74 * we'll write to the buffer, for error recovery on parity errors.
75 */
76void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
77{
78	u32 piobcnt;
79	unsigned long sbuf[4];
80	/*
81	 * it's possible that sendbuffererror could have bits set; might
82	 * have already done this as a result of hardware error handling
83	 */
84	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
85	/* read these before writing errorclear */
86	sbuf[0] = ipath_read_kreg64(
87		dd, dd->ipath_kregs->kr_sendbuffererror);
88	sbuf[1] = ipath_read_kreg64(
89		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
90	if (piobcnt > 128)
91		sbuf[2] = ipath_read_kreg64(
92			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
93	if (piobcnt > 192)
94		sbuf[3] = ipath_read_kreg64(
95			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
96	else
97		sbuf[3] = 0;
98
99	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
100		int i;
101		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
102			dd->ipath_lastcancel > jiffies) {
103			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
104					  "SendbufErrs %lx %lx", sbuf[0],
105					  sbuf[1]);
106			if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
107				printk(" %lx %lx ", sbuf[2], sbuf[3]);
108			printk("\n");
109		}
110
111		for (i = 0; i < piobcnt; i++)
112			if (test_bit(i, sbuf)) {
113				if (rewrite)
114					ipath_clrpiobuf(dd, i);
115				ipath_disarm_piobufs(dd, i, 1);
116			}
117		/* ignore armlaunch errs for a bit */
118		dd->ipath_lastcancel = jiffies+3;
119	}
120}
121
122
123/* These are all rcv-related errors which we want to count for stats */
124#define E_SUM_PKTERRS \
125	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
126	 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
127	 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
128	 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
129	 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
130	 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
131
132/* These are all send-related errors which we want to count for stats */
133#define E_SUM_ERRS \
134	(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
135	 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
136	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
137	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
138	 INFINIPATH_E_INVALIDADDR)
139
140/*
141 * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
142 * errors not related to freeze and cancelling buffers.  Can't ignore
143 * armlaunch because could get more while still cleaning up, and need
144 * to cancel those as they happen.
145 */
146#define E_SPKT_ERRS_IGNORE \
147	 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
148	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
149	 INFINIPATH_E_SPKTLEN)
150
151/*
152 * these are errors that can occur when the link changes state while
153 * a packet is being sent or received.  This doesn't cover things
154 * like EBP or VCRC that can be the result of a sending having the
155 * link change state, so we receive a "known bad" packet.
156 */
157#define E_SUM_LINK_PKTERRS \
158	(INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
159	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
160	 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
161	 INFINIPATH_E_RUNEXPCHAR)
162
163static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
164{
165	u64 ignore_this_time = 0;
166
167	ipath_disarm_senderrbufs(dd, 0);
168	if ((errs & E_SUM_LINK_PKTERRS) &&
169	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
170		/*
171		 * This can happen when SMA is trying to bring the link
172		 * up, but the IB link changes state at the "wrong" time.
173		 * The IB logic then complains that the packet isn't
174		 * valid.  We don't want to confuse people, so we just
175		 * don't print them, except at debug
176		 */
177		ipath_dbg("Ignoring packet errors %llx, because link not "
178			  "ACTIVE\n", (unsigned long long) errs);
179		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
180	}
181
182	return ignore_this_time;
183}
184
185/* generic hw error messages... */
186#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
187	{ \
188		.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
189			  INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
190		.msg = "TXE " #a " Memory Parity"	     \
191	}
192#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
193	{ \
194		.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
195			  INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
196		.msg = "RXE " #a " Memory Parity"	     \
197	}
198
199static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
200	INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
201	INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
202
203	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
204	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
205	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
206
207	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
208	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
209	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
210	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
211	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
212	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
213	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
214};
215
216/**
217 * ipath_format_hwmsg - format a single hwerror message
218 * @msg message buffer
219 * @msgl length of message buffer
220 * @hwmsg message to add to message buffer
221 */
222static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
223{
224	strlcat(msg, "[", msgl);
225	strlcat(msg, hwmsg, msgl);
226	strlcat(msg, "]", msgl);
227}
228
229/**
230 * ipath_format_hwerrors - format hardware error messages for display
231 * @hwerrs hardware errors bit vector
232 * @hwerrmsgs hardware error descriptions
233 * @nhwerrmsgs number of hwerrmsgs
234 * @msg message buffer
235 * @msgl message buffer length
236 */
237void ipath_format_hwerrors(u64 hwerrs,
238			   const struct ipath_hwerror_msgs *hwerrmsgs,
239			   size_t nhwerrmsgs,
240			   char *msg, size_t msgl)
241{
242	int i;
243	const int glen =
244	    sizeof(ipath_generic_hwerror_msgs) /
245	    sizeof(ipath_generic_hwerror_msgs[0]);
246
247	for (i=0; i<glen; i++) {
248		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
249			ipath_format_hwmsg(msg, msgl,
250					   ipath_generic_hwerror_msgs[i].msg);
251		}
252	}
253
254	for (i=0; i<nhwerrmsgs; i++) {
255		if (hwerrs & hwerrmsgs[i].mask) {
256			ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
257		}
258	}
259}
260
261/* return the strings for the most common link states */
262static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
263{
264	char *ret;
265	u32 state;
266
267	state = ipath_ib_state(dd, ibcs);
268	if (state == dd->ib_init)
269		ret = "Init";
270	else if (state == dd->ib_arm)
271		ret = "Arm";
272	else if (state == dd->ib_active)
273		ret = "Active";
274	else
275		ret = "Down";
276	return ret;
277}
278
279void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
280{
281	struct ib_event event;
282
283	event.device = &dd->verbs_dev->ibdev;
284	event.element.port_num = 1;
285	event.event = ev;
286	ib_dispatch_event(&event);
287}
288
289static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
290				     ipath_err_t errs)
291{
292	u32 ltstate, lstate, ibstate, lastlstate;
293	u32 init = dd->ib_init;
294	u32 arm = dd->ib_arm;
295	u32 active = dd->ib_active;
296	const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
297
298	lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
299	ibstate = ipath_ib_state(dd, ibcs);
300	/* linkstate at last interrupt */
301	lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
302	ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
303
304	/*
305	 * Since going into a recovery state causes the link state to go
306	 * down and since recovery is transitory, it is better if we "miss"
307	 * ever seeing the link training state go into recovery (i.e.,
308	 * ignore this transition for link state special handling purposes)
309	 * without even updating ipath_lastibcstat.
310	 */
311	if ((ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN) ||
312	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT) ||
313	    (ltstate == INFINIPATH_IBCS_LT_STATE_RECOVERIDLE))
314		goto done;
315
316	/*
317	 * if linkstate transitions into INIT from any of the various down
318	 * states, or if it transitions from any of the up (INIT or better)
319	 * states into any of the down states (except link recovery), then
320	 * call the chip-specific code to take appropriate actions.
321	 */
322	if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
323		lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
324		/* transitioned to UP */
325		if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
326			/* link came up, so we must no longer be disabled */
327			dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED;
328			ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
329			goto skip_ibchange; /* chip-code handled */
330		}
331	} else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
332		(dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
333		ltstate <= INFINIPATH_IBCS_LT_STATE_CFGWAITRMT &&
334		ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
335		int handled;
336		handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
337		dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
338		if (handled) {
339			ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
340			goto skip_ibchange; /* chip-code handled */
341		}
342	}
343
344	/*
345	 * Significant enough to always print and get into logs, if it was
346	 * unexpected.  If it was a requested state change, we'll have
347	 * already cleared the flags, so we won't print this warning
348	 */
349	if ((ibstate != arm && ibstate != active) &&
350	    (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
351		dev_info(&dd->pcidev->dev, "Link state changed from %s "
352			 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
353			 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
354	}
355
356	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
357	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
358		u32 lastlts;
359		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
360		/*
361		 * Ignore cycling back and forth from Polling.Active to
362		 * Polling.Quiet while waiting for the other end of the link
363		 * to come up, except to try and decide if we are connected
364		 * to a live IB device or not.  We will cycle back and
365		 * forth between them if no cable is plugged in, the other
366		 * device is powered off or disabled, etc.
367		 */
368		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
369		    lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
370			if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) &&
371			     (++dd->ipath_ibpollcnt == 40)) {
372				dd->ipath_flags |= IPATH_NOCABLE;
373				*dd->ipath_statusp |=
374					IPATH_STATUS_IB_NOCABLE;
375				ipath_cdbg(LINKVERB, "Set NOCABLE\n");
376			}
377			ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
378				ipath_ibcstatus_str[ltstate], ibstate);
379			goto skip_ibchange;
380		}
381	}
382
383	dd->ipath_ibpollcnt = 0; /* not poll*, now */
384	ipath_stats.sps_iblink++;
385
386	if (ibstate != init && dd->ipath_lastlinkrecov && ipath_linkrecovery) {
387		u64 linkrecov;
388		linkrecov = ipath_snap_cntr(dd,
389			dd->ipath_cregs->cr_iblinkerrrecovcnt);
390		if (linkrecov != dd->ipath_lastlinkrecov) {
391			ipath_dbg("IB linkrecov up %Lx (%s %s) recov %Lu\n",
392				ibcs, ib_linkstate(dd, ibcs),
393				ipath_ibcstatus_str[ltstate],
394				linkrecov);
395			/* and no more until active again */
396			dd->ipath_lastlinkrecov = 0;
397			ipath_set_linkstate(dd, IPATH_IB_LINKDOWN);
398			goto skip_ibchange;
399		}
400	}
401
402	if (ibstate == init || ibstate == arm || ibstate == active) {
403		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
404		if (ibstate == init || ibstate == arm) {
405			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
406			if (dd->ipath_flags & IPATH_LINKACTIVE)
407				signal_ib_event(dd, IB_EVENT_PORT_ERR);
408		}
409		if (ibstate == arm) {
410			dd->ipath_flags |= IPATH_LINKARMED;
411			dd->ipath_flags &= ~(IPATH_LINKUNK |
412				IPATH_LINKINIT | IPATH_LINKDOWN |
413				IPATH_LINKACTIVE | IPATH_NOCABLE);
414			ipath_hol_down(dd);
415		} else  if (ibstate == init) {
416			/*
417			 * set INIT and DOWN.  Down is checked by
418			 * most of the other code, but INIT is
419			 * useful to know in a few places.
420			 */
421			dd->ipath_flags |= IPATH_LINKINIT |
422				IPATH_LINKDOWN;
423			dd->ipath_flags &= ~(IPATH_LINKUNK |
424				IPATH_LINKARMED | IPATH_LINKACTIVE |
425				IPATH_NOCABLE);
426			ipath_hol_down(dd);
427		} else {  /* active */
428			dd->ipath_lastlinkrecov = ipath_snap_cntr(dd,
429				dd->ipath_cregs->cr_iblinkerrrecovcnt);
430			*dd->ipath_statusp |=
431				IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
432			dd->ipath_flags |= IPATH_LINKACTIVE;
433			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
434				| IPATH_LINKDOWN | IPATH_LINKARMED |
435				IPATH_NOCABLE);
436			if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
437				ipath_restart_sdma(dd);
438			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
439			/* LED active not handled in chip _f_updown */
440			dd->ipath_f_setextled(dd, lstate, ltstate);
441			ipath_hol_up(dd);
442		}
443
444		/*
445		 * print after we've already done the work, so as not to
446		 * delay the state changes and notifications, for debugging
447		 */
448		if (lstate == lastlstate)
449			ipath_cdbg(LINKVERB, "Unchanged from last: %s "
450				"(%x)\n", ib_linkstate(dd, ibcs), ibstate);
451		else
452			ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
453				  dd->ipath_unit, ib_linkstate(dd, ibcs),
454				  ipath_ibcstatus_str[ltstate],  ibstate);
455	} else { /* down */
456		if (dd->ipath_flags & IPATH_LINKACTIVE)
457			signal_ib_event(dd, IB_EVENT_PORT_ERR);
458		dd->ipath_flags |= IPATH_LINKDOWN;
459		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
460				     | IPATH_LINKACTIVE |
461				     IPATH_LINKARMED);
462		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
463		dd->ipath_lli_counter = 0;
464
465		if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
466			ipath_cdbg(VERBOSE, "Unit %u link state down "
467				   "(state 0x%x), from %s\n",
468				   dd->ipath_unit, lstate,
469				   ib_linkstate(dd, dd->ipath_lastibcstat));
470		else
471			ipath_cdbg(LINKVERB, "Unit %u link state changed "
472				   "to %s (0x%x) from down (%x)\n",
473				   dd->ipath_unit,
474				   ipath_ibcstatus_str[ltstate],
475				   ibstate, lastlstate);
476	}
477
478skip_ibchange:
479	dd->ipath_lastibcstat = ibcs;
480done:
481	return;
482}
483
484static void handle_supp_msgs(struct ipath_devdata *dd,
485			     unsigned supp_msgs, char *msg, u32 msgsz)
486{
487	/*
488	 * Print the message unless it's ibc status change only, which
489	 * happens so often we never want to count it.
490	 */
491	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
492		int iserr;
493		ipath_err_t mask;
494		iserr = ipath_decode_err(dd, msg, msgsz,
495					 dd->ipath_lasterror &
496					 ~INFINIPATH_E_IBSTATUSCHANGED);
497
498		mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
499			INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
500
501		/* if we're in debug, then don't mask SDMADISABLED msgs */
502		if (ipath_debug & __IPATH_DBG)
503			mask &= ~INFINIPATH_E_SDMADISABLED;
504
505		if (dd->ipath_lasterror & ~mask)
506			ipath_dev_err(dd, "Suppressed %u messages for "
507				      "fast-repeating errors (%s) (%llx)\n",
508				      supp_msgs, msg,
509				      (unsigned long long)
510				      dd->ipath_lasterror);
511		else {
512			/*
513			 * rcvegrfull and rcvhdrqfull are "normal", for some
514			 * types of processes (mostly benchmarks) that send
515			 * huge numbers of messages, while not processing
516			 * them. So only complain about these at debug
517			 * level.
518			 */
519			if (iserr)
520				ipath_dbg("Suppressed %u messages for %s\n",
521					  supp_msgs, msg);
522			else
523				ipath_cdbg(ERRPKT,
524					"Suppressed %u messages for %s\n",
525					  supp_msgs, msg);
526		}
527	}
528}
529
530static unsigned handle_frequent_errors(struct ipath_devdata *dd,
531				       ipath_err_t errs, char *msg,
532				       u32 msgsz, int *noprint)
533{
534	unsigned long nc;
535	static unsigned long nextmsg_time;
536	static unsigned nmsgs, supp_msgs;
537
538	/*
539	 * Throttle back "fast" messages to no more than 10 per 5 seconds.
540	 * This isn't perfect, but it's a reasonable heuristic. If we get
541	 * more than 10, give a 6x longer delay.
542	 */
543	nc = jiffies;
544	if (nmsgs > 10) {
545		if (time_before(nc, nextmsg_time)) {
546			*noprint = 1;
547			if (!supp_msgs++)
548				nextmsg_time = nc + HZ * 3;
549		}
550		else if (supp_msgs) {
551			handle_supp_msgs(dd, supp_msgs, msg, msgsz);
552			supp_msgs = 0;
553			nmsgs = 0;
554		}
555	}
556	else if (!nmsgs++ || time_after(nc, nextmsg_time))
557		nextmsg_time = nc + HZ / 2;
558
559	return supp_msgs;
560}
561
562static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
563{
564	unsigned long flags;
565	int expected;
566
567	if (ipath_debug & __IPATH_DBG) {
568		char msg[128];
569		ipath_decode_err(dd, msg, sizeof msg, errs &
570			INFINIPATH_E_SDMAERRS);
571		ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
572	}
573	if (ipath_debug & __IPATH_VERBDBG) {
574		unsigned long tl, hd, status, lengen;
575		tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
576		hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
577		status = ipath_read_kreg64(dd
578			, dd->ipath_kregs->kr_senddmastatus);
579		lengen = ipath_read_kreg64(dd,
580			dd->ipath_kregs->kr_senddmalengen);
581		ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
582			"lengen 0x%lx\n", tl, hd, status, lengen);
583	}
584
585	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
586	__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
587	expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
588	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
589	if (!expected)
590		ipath_cancel_sends(dd, 1);
591}
592
593static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
594{
595	unsigned long flags;
596	int expected;
597
598	if ((istat & INFINIPATH_I_SDMAINT) &&
599	    !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
600		ipath_sdma_intr(dd);
601
602	if (istat & INFINIPATH_I_SDMADISABLED) {
603		expected = test_bit(IPATH_SDMA_ABORTING,
604			&dd->ipath_sdma_status);
605		ipath_dbg("%s SDmaDisabled intr\n",
606			expected ? "expected" : "unexpected");
607		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
608		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
609		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
610		if (!expected)
611			ipath_cancel_sends(dd, 1);
612		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
613			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
614	}
615}
616
617static int handle_hdrq_full(struct ipath_devdata *dd)
618{
619	int chkerrpkts = 0;
620	u32 hd, tl;
621	u32 i;
622
623	ipath_stats.sps_hdrqfull++;
624	for (i = 0; i < dd->ipath_cfgports; i++) {
625		struct ipath_portdata *pd = dd->ipath_pd[i];
626
627		if (i == 0) {
628			/*
629			 * For kernel receive queues, we just want to know
630			 * if there are packets in the queue that we can
631			 * process.
632			 */
633			if (pd->port_head != ipath_get_hdrqtail(pd))
634				chkerrpkts |= 1 << i;
635			continue;
636		}
637
638		/* Skip if user context is not open */
639		if (!pd || !pd->port_cnt)
640			continue;
641
642		/* Don't report the same point multiple times. */
643		if (dd->ipath_flags & IPATH_NODMA_RTAIL)
644			tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
645		else
646			tl = ipath_get_rcvhdrtail(pd);
647		if (tl == pd->port_lastrcvhdrqtail)
648			continue;
649
650		hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
651		if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
652			pd->port_lastrcvhdrqtail = tl;
653			pd->port_hdrqfull++;
654			/* flush hdrqfull so that poll() sees it */
655			wmb();
656			wake_up_interruptible(&pd->port_wait);
657		}
658	}
659
660	return chkerrpkts;
661}
662
663static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
664{
665	char msg[128];
666	u64 ignore_this_time = 0;
667	u64 iserr = 0;
668	int chkerrpkts = 0, noprint = 0;
669	unsigned supp_msgs;
670	int log_idx;
671
672	/*
673	 * don't report errors that are masked, either at init
674	 * (not set in ipath_errormask), or temporarily (set in
675	 * ipath_maskederrs)
676	 */
677	errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
678
679	supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
680		&noprint);
681
682	/* do these first, they are most important */
683	if (errs & INFINIPATH_E_HARDWARE) {
684		/* reuse same msg buf */
685		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
686	} else {
687		u64 mask;
688		for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
689			mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
690			if (errs & mask)
691				ipath_inc_eeprom_err(dd, log_idx, 1);
692		}
693	}
694
695	if (errs & INFINIPATH_E_SDMAERRS)
696		handle_sdma_errors(dd, errs);
697
698	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
699		ipath_dev_err(dd, "error interrupt with unknown errors "
700			      "%llx set\n", (unsigned long long)
701			      (errs & ~dd->ipath_e_bitsextant));
702
703	if (errs & E_SUM_ERRS)
704		ignore_this_time = handle_e_sum_errs(dd, errs);
705	else if ((errs & E_SUM_LINK_PKTERRS) &&
706	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
707		/*
708		 * This can happen when SMA is trying to bring the link
709		 * up, but the IB link changes state at the "wrong" time.
710		 * The IB logic then complains that the packet isn't
711		 * valid.  We don't want to confuse people, so we just
712		 * don't print them, except at debug
713		 */
714		ipath_dbg("Ignoring packet errors %llx, because link not "
715			  "ACTIVE\n", (unsigned long long) errs);
716		ignore_this_time = errs & E_SUM_LINK_PKTERRS;
717	}
718
719	if (supp_msgs == 250000) {
720		int s_iserr;
721		/*
722		 * It's not entirely reasonable assuming that the errors set
723		 * in the last clear period are all responsible for the
724		 * problem, but the alternative is to assume it's the only
725		 * ones on this particular interrupt, which also isn't great
726		 */
727		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
728
729		dd->ipath_errormask &= ~dd->ipath_maskederrs;
730		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
731				 dd->ipath_errormask);
732		s_iserr = ipath_decode_err(dd, msg, sizeof msg,
733					   dd->ipath_maskederrs);
734
735		if (dd->ipath_maskederrs &
736		    ~(INFINIPATH_E_RRCVEGRFULL |
737		      INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
738			ipath_dev_err(dd, "Temporarily disabling "
739			    "error(s) %llx reporting; too frequent (%s)\n",
740				(unsigned long long) dd->ipath_maskederrs,
741				msg);
742		else {
743			/*
744			 * rcvegrfull and rcvhdrqfull are "normal",
745			 * for some types of processes (mostly benchmarks)
746			 * that send huge numbers of messages, while not
747			 * processing them.  So only complain about
748			 * these at debug level.
749			 */
750			if (s_iserr)
751				ipath_dbg("Temporarily disabling reporting "
752				    "too frequent queue full errors (%s)\n",
753				    msg);
754			else
755				ipath_cdbg(ERRPKT,
756				    "Temporarily disabling reporting too"
757				    " frequent packet errors (%s)\n",
758				    msg);
759		}
760
761		/*
762		 * Re-enable the masked errors after around 3 minutes.  in
763		 * ipath_get_faststats().  If we have a series of fast
764		 * repeating but different errors, the interval will keep
765		 * stretching out, but that's OK, as that's pretty
766		 * catastrophic.
767		 */
768		dd->ipath_unmasktime = jiffies + HZ * 180;
769	}
770
771	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
772	if (ignore_this_time)
773		errs &= ~ignore_this_time;
774	if (errs & ~dd->ipath_lasterror) {
775		errs &= ~dd->ipath_lasterror;
776		/* never suppress duplicate hwerrors or ibstatuschange */
777		dd->ipath_lasterror |= errs &
778			~(INFINIPATH_E_HARDWARE |
779			  INFINIPATH_E_IBSTATUSCHANGED);
780	}
781
782	if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
783		dd->ipath_spectriggerhit++;
784		ipath_dbg("%lu special trigger hits\n",
785			dd->ipath_spectriggerhit);
786	}
787
788	/* likely due to cancel; so suppress message unless verbose */
789	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
790		dd->ipath_lastcancel > jiffies) {
791		/* armlaunch takes precedence; it often causes both. */
792		ipath_cdbg(VERBOSE,
793			"Suppressed %s error (%llx) after sendbuf cancel\n",
794			(errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
795			"armlaunch" : "sendpktlen", (unsigned long long)errs);
796		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
797	}
798
799	if (!errs)
800		return 0;
801
802	if (!noprint) {
803		ipath_err_t mask;
804		/*
805		 * The ones we mask off are handled specially below
806		 * or above.  Also mask SDMADISABLED by default as it
807		 * is too chatty.
808		 */
809		mask = INFINIPATH_E_IBSTATUSCHANGED |
810			INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
811			INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
812
813		/* if we're in debug, then don't mask SDMADISABLED msgs */
814		if (ipath_debug & __IPATH_DBG)
815			mask &= ~INFINIPATH_E_SDMADISABLED;
816
817		ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
818	} else
819		/* so we don't need if (!noprint) at strlcat's below */
820		*msg = 0;
821
822	if (errs & E_SUM_PKTERRS) {
823		ipath_stats.sps_pkterrs++;
824		chkerrpkts = 1;
825	}
826	if (errs & E_SUM_ERRS)
827		ipath_stats.sps_errs++;
828
829	if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
830		ipath_stats.sps_crcerrs++;
831		chkerrpkts = 1;
832	}
833	iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
834
835
836	/*
837	 * We don't want to print these two as they happen, or we can make
838	 * the situation even worse, because it takes so long to print
839	 * messages to serial consoles.  Kernel ports get printed from
840	 * fast_stats, no more than every 5 seconds, user ports get printed
841	 * on close
842	 */
843	if (errs & INFINIPATH_E_RRCVHDRFULL)
844		chkerrpkts |= handle_hdrq_full(dd);
845	if (errs & INFINIPATH_E_RRCVEGRFULL) {
846		struct ipath_portdata *pd = dd->ipath_pd[0];
847
848		/*
849		 * since this is of less importance and not likely to
850		 * happen without also getting hdrfull, only count
851		 * occurrences; don't check each port (or even the kernel
852		 * vs user)
853		 */
854		ipath_stats.sps_etidfull++;
855		if (pd->port_head != ipath_get_hdrqtail(pd))
856			chkerrpkts |= 1;
857	}
858
859	/*
860	 * do this before IBSTATUSCHANGED, in case both bits set in a single
861	 * interrupt; we want the STATUSCHANGE to "win", so we do our
862	 * internal copy of state machine correctly
863	 */
864	if (errs & INFINIPATH_E_RIBLOSTLINK) {
865		/*
866		 * force through block below
867		 */
868		errs |= INFINIPATH_E_IBSTATUSCHANGED;
869		ipath_stats.sps_iblink++;
870		dd->ipath_flags |= IPATH_LINKDOWN;
871		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
872				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
873		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
874
875		ipath_dbg("Lost link, link now down (%s)\n",
876			ipath_ibcstatus_str[ipath_read_kreg64(dd,
877			dd->ipath_kregs->kr_ibcstatus) & 0xf]);
878	}
879	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
880		handle_e_ibstatuschanged(dd, errs);
881
882	if (errs & INFINIPATH_E_RESET) {
883		if (!noprint)
884			ipath_dev_err(dd, "Got reset, requires re-init "
885				      "(unload and reload driver)\n");
886		dd->ipath_flags &= ~IPATH_INITTED;	/* needs re-init */
887		/* mark as having had error */
888		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
889		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
890	}
891
892	if (!noprint && *msg) {
893		if (iserr)
894			ipath_dev_err(dd, "%s error\n", msg);
895	}
896	if (dd->ipath_state_wanted & dd->ipath_flags) {
897		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
898			   "waking\n", dd->ipath_state_wanted,
899			   dd->ipath_flags);
900		wake_up_interruptible(&ipath_state_wait);
901	}
902
903	return chkerrpkts;
904}
905
906/*
907 * try to cleanup as much as possible for anything that might have gone
908 * wrong while in freeze mode, such as pio buffers being written by user
909 * processes (causing armlaunch), send errors due to going into freeze mode,
910 * etc., and try to avoid causing extra interrupts while doing so.
911 * Forcibly update the in-memory pioavail register copies after cleanup
912 * because the chip won't do it for anything changing while in freeze mode
913 * (we don't want to wait for the next pio buffer state change).
914 * Make sure that we don't lose any important interrupts by using the chip
915 * feature that says that writing 0 to a bit in *clear that is set in
916 * *status will cause an interrupt to be generated again (if allowed by
917 * the *mask value).
918 */
919void ipath_clear_freeze(struct ipath_devdata *dd)
920{
921	int i, im;
922	u64 val;
923
924	/* disable error interrupts, to avoid confusion */
925	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
926
927	/* also disable interrupts; errormask is sometimes overwriten */
928	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
929
930	/*
931	 * clear all sends, because they have may been
932	 * completed by usercode while in freeze mode, and
933	 * therefore would not be sent, and eventually
934	 * might cause the process to run out of bufs
935	 */
936	ipath_cancel_sends(dd, 1);
937	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
938			 dd->ipath_control);
939
940	/*
941	 * ensure pio avail updates continue (because the update
942	 * won't have happened from cancel_sends because we were
943	 * still in freeze
944	 */
945	ipath_force_pio_avail_update(dd);
946
947	/*
948	 * We just enabled pioavailupdate, so dma copy is almost certainly
949	 * not yet right, so read the registers directly.  Similar to init
950	 */
951	for (i = 0; i < dd->ipath_pioavregs; i++) {
952		/* deal with 6110 chip bug */
953		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
954			i ^ 1 : i;
955		val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
956		dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
957		dd->ipath_pioavailshadow[i] = val |
958			(~dd->ipath_pioavailkernel[i] <<
959			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
960	}
961
962	/*
963	 * force new interrupt if any hwerr, error or interrupt bits are
964	 * still set, and clear "safe" send packet errors related to freeze
965	 * and cancelling sends.  Re-enable error interrupts before possible
966	 * force of re-interrupt on pending interrupts.
967	 */
968	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
969	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
970		E_SPKT_ERRS_IGNORE);
971	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
972		dd->ipath_errormask);
973	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
974	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
975}
976
977
978/* this is separate to allow for better optimization of ipath_intr() */
979
980static noinline void ipath_bad_intr(struct ipath_devdata *dd, u32 *unexpectp)
981{
982	/*
983	 * sometimes happen during driver init and unload, don't want
984	 * to process any interrupts at that point
985	 */
986
987	/* this is just a bandaid, not a fix, if something goes badly
988	 * wrong */
989	if (++*unexpectp > 100) {
990		if (++*unexpectp > 105) {
991			/*
992			 * ok, we must be taking somebody else's interrupts,
993			 * due to a messed up mptable and/or PIRQ table, so
994			 * unregister the interrupt.  We've seen this during
995			 * linuxbios development work, and it may happen in
996			 * the future again.
997			 */
998			if (dd->pcidev && dd->ipath_irq) {
999				ipath_dev_err(dd, "Now %u unexpected "
1000					      "interrupts, unregistering "
1001					      "interrupt handler\n",
1002					      *unexpectp);
1003				ipath_dbg("free_irq of irq %d\n",
1004					  dd->ipath_irq);
1005				dd->ipath_f_free_irq(dd);
1006			}
1007		}
1008		if (ipath_read_ireg(dd, dd->ipath_kregs->kr_intmask)) {
1009			ipath_dev_err(dd, "%u unexpected interrupts, "
1010				      "disabling interrupts completely\n",
1011				      *unexpectp);
1012			/*
1013			 * disable all interrupts, something is very wrong
1014			 */
1015			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
1016					 0ULL);
1017		}
1018	} else if (*unexpectp > 1)
1019		ipath_dbg("Interrupt when not ready, should not happen, "
1020			  "ignoring\n");
1021}
1022
1023static noinline void ipath_bad_regread(struct ipath_devdata *dd)
1024{
1025	static int allbits;
1026
1027	/* separate routine, for better optimization of ipath_intr() */
1028
1029	/*
1030	 * We print the message and disable interrupts, in hope of
1031	 * having a better chance of debugging the problem.
1032	 */
1033	ipath_dev_err(dd,
1034		      "Read of interrupt status failed (all bits set)\n");
1035	if (allbits++) {
1036		/* disable all interrupts, something is very wrong */
1037		ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
1038		if (allbits == 2) {
1039			ipath_dev_err(dd, "Still bad interrupt status, "
1040				      "unregistering interrupt\n");
1041			dd->ipath_f_free_irq(dd);
1042		} else if (allbits > 2) {
1043			if ((allbits % 10000) == 0)
1044				printk(".");
1045		} else
1046			ipath_dev_err(dd, "Disabling interrupts, "
1047				      "multiple errors\n");
1048	}
1049}
1050
1051static void handle_layer_pioavail(struct ipath_devdata *dd)
1052{
1053	unsigned long flags;
1054	int ret;
1055
1056	ret = ipath_ib_piobufavail(dd->verbs_dev);
1057	if (ret > 0)
1058		goto set;
1059
1060	return;
1061set:
1062	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1063	dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
1064	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1065			 dd->ipath_sendctrl);
1066	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1067	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1068}
1069
1070/*
1071 * Handle receive interrupts for user ports; this means a user
1072 * process was waiting for a packet to arrive, and didn't want
1073 * to poll
1074 */
1075static void handle_urcv(struct ipath_devdata *dd, u64 istat)
1076{
1077	u64 portr;
1078	int i;
1079	int rcvdint = 0;
1080
1081	/*
1082	 * test_and_clear_bit(IPATH_PORT_WAITING_RCV) and
1083	 * test_and_clear_bit(IPATH_PORT_WAITING_URG) below
1084	 * would both like timely updates of the bits so that
1085	 * we don't pass them by unnecessarily.  the rmb()
1086	 * here ensures that we see them promptly -- the
1087	 * corresponding wmb()'s are in ipath_poll_urgent()
1088	 * and ipath_poll_next()...
1089	 */
1090	rmb();
1091	portr = ((istat >> dd->ipath_i_rcvavail_shift) &
1092		 dd->ipath_i_rcvavail_mask) |
1093		((istat >> dd->ipath_i_rcvurg_shift) &
1094		 dd->ipath_i_rcvurg_mask);
1095	for (i = 1; i < dd->ipath_cfgports; i++) {
1096		struct ipath_portdata *pd = dd->ipath_pd[i];
1097
1098		if (portr & (1 << i) && pd && pd->port_cnt) {
1099			if (test_and_clear_bit(IPATH_PORT_WAITING_RCV,
1100					       &pd->port_flag)) {
1101				clear_bit(i + dd->ipath_r_intravail_shift,
1102					  &dd->ipath_rcvctrl);
1103				wake_up_interruptible(&pd->port_wait);
1104				rcvdint = 1;
1105			} else if (test_and_clear_bit(IPATH_PORT_WAITING_URG,
1106						      &pd->port_flag)) {
1107				pd->port_urgent++;
1108				wake_up_interruptible(&pd->port_wait);
1109			}
1110		}
1111	}
1112	if (rcvdint) {
1113		/* only want to take one interrupt, so turn off the rcv
1114		 * interrupt for all the ports that we set the rcv_waiting
1115		 * (but never for kernel port)
1116		 */
1117		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1118				 dd->ipath_rcvctrl);
1119	}
1120}
1121
1122irqreturn_t ipath_intr(int irq, void *data)
1123{
1124	struct ipath_devdata *dd = data;
1125	u64 istat, chk0rcv = 0;
1126	ipath_err_t estat = 0;
1127	irqreturn_t ret;
1128	static unsigned unexpected = 0;
1129	u64 kportrbits;
1130
1131	ipath_stats.sps_ints++;
1132
1133	if (dd->ipath_int_counter != (u32) -1)
1134		dd->ipath_int_counter++;
1135
1136	if (!(dd->ipath_flags & IPATH_PRESENT)) {
1137		/*
1138		 * This return value is not great, but we do not want the
1139		 * interrupt core code to remove our interrupt handler
1140		 * because we don't appear to be handling an interrupt
1141		 * during a chip reset.
1142		 */
1143		return IRQ_HANDLED;
1144	}
1145
1146	/*
1147	 * this needs to be flags&initted, not statusp, so we keep
1148	 * taking interrupts even after link goes down, etc.
1149	 * Also, we *must* clear the interrupt at some point, or we won't
1150	 * take it again, which can be real bad for errors, etc...
1151	 */
1152
1153	if (!(dd->ipath_flags & IPATH_INITTED)) {
1154		ipath_bad_intr(dd, &unexpected);
1155		ret = IRQ_NONE;
1156		goto bail;
1157	}
1158
1159	istat = ipath_read_ireg(dd, dd->ipath_kregs->kr_intstatus);
1160
1161	if (unlikely(!istat)) {
1162		ipath_stats.sps_nullintr++;
1163		ret = IRQ_NONE; /* not our interrupt, or already handled */
1164		goto bail;
1165	}
1166	if (unlikely(istat == -1)) {
1167		ipath_bad_regread(dd);
1168		/* don't know if it was our interrupt or not */
1169		ret = IRQ_NONE;
1170		goto bail;
1171	}
1172
1173	if (unexpected)
1174		unexpected = 0;
1175
1176	if (unlikely(istat & ~dd->ipath_i_bitsextant))
1177		ipath_dev_err(dd,
1178			      "interrupt with unknown interrupts %Lx set\n",
1179			      istat & ~dd->ipath_i_bitsextant);
1180	else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
1181		ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat);
1182
1183	if (istat & INFINIPATH_I_ERROR) {
1184		ipath_stats.sps_errints++;
1185		estat = ipath_read_kreg64(dd,
1186					  dd->ipath_kregs->kr_errorstatus);
1187		if (!estat)
1188			dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
1189				 "but no error bits set!\n", istat);
1190		else if (estat == -1LL)
1191			/*
1192			 * should we try clearing all, or hope next read
1193			 * works?
1194			 */
1195			ipath_dev_err(dd, "Read of error status failed "
1196				      "(all bits set); ignoring\n");
1197		else
1198			chk0rcv |= handle_errors(dd, estat);
1199	}
1200
1201	if (istat & INFINIPATH_I_GPIO) {
1202		/*
1203		 * GPIO interrupts fall in two broad classes:
1204		 * GPIO_2 indicates (on some HT4xx boards) that a packet
1205		 *        has arrived for Port 0. Checking for this
1206		 *        is controlled by flag IPATH_GPIO_INTR.
1207		 * GPIO_3..5 on IBA6120 Rev2 and IBA6110 Rev4 chips indicate
1208		 *        errors that we need to count. Checking for this
1209		 *        is controlled by flag IPATH_GPIO_ERRINTRS.
1210		 */
1211		u32 gpiostatus;
1212		u32 to_clear = 0;
1213
1214		gpiostatus = ipath_read_kreg32(
1215			dd, dd->ipath_kregs->kr_gpio_status);
1216		/* First the error-counter case. */
1217		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
1218		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
1219			/* want to clear the bits we see asserted. */
1220			to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
1221
1222			/*
1223			 * Count appropriately, clear bits out of our copy,
1224			 * as they have been "handled".
1225			 */
1226			if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
1227				ipath_dbg("FlowCtl on UnsupVL\n");
1228				dd->ipath_rxfc_unsupvl_errs++;
1229			}
1230			if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
1231				ipath_dbg("Overrun Threshold exceeded\n");
1232				dd->ipath_overrun_thresh_errs++;
1233			}
1234			if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
1235				ipath_dbg("Local Link Integrity error\n");
1236				dd->ipath_lli_errs++;
1237			}
1238			gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
1239		}
1240		/* Now the Port0 Receive case */
1241		if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
1242		    (dd->ipath_flags & IPATH_GPIO_INTR)) {
1243			/*
1244			 * GPIO status bit 2 is set, and we expected it.
1245			 * clear it and indicate in p0bits.
1246			 * This probably only happens if a Port0 pkt
1247			 * arrives at _just_ the wrong time, and we
1248			 * handle that by seting chk0rcv;
1249			 */
1250			to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
1251			gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
1252			chk0rcv = 1;
1253		}
1254		if (gpiostatus) {
1255			/*
1256			 * Some unexpected bits remain. If they could have
1257			 * caused the interrupt, complain and clear.
1258			 * To avoid repetition of this condition, also clear
1259			 * the mask. It is almost certainly due to error.
1260			 */
1261			const u32 mask = (u32) dd->ipath_gpio_mask;
1262
1263			if (mask & gpiostatus) {
1264				ipath_dbg("Unexpected GPIO IRQ bits %x\n",
1265				  gpiostatus & mask);
1266				to_clear |= (gpiostatus & mask);
1267				dd->ipath_gpio_mask &= ~(gpiostatus & mask);
1268				ipath_write_kreg(dd,
1269					dd->ipath_kregs->kr_gpio_mask,
1270					dd->ipath_gpio_mask);
1271			}
1272		}
1273		if (to_clear) {
1274			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
1275					(u64) to_clear);
1276		}
1277	}
1278
1279	/*
1280	 * Clear the interrupt bits we found set, unless they are receive
1281	 * related, in which case we already cleared them above, and don't
1282	 * want to clear them again, because we might lose an interrupt.
1283	 * Clear it early, so we "know" know the chip will have seen this by
1284	 * the time we process the queue, and will re-interrupt if necessary.
1285	 * The processor itself won't take the interrupt again until we return.
1286	 */
1287	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
1288
1289	/*
1290	 * Handle kernel receive queues before checking for pio buffers
1291	 * available since receives can overflow; piobuf waiters can afford
1292	 * a few extra cycles, since they were waiting anyway, and user's
1293	 * waiting for receive are at the bottom.
1294	 */
1295	kportrbits = (1ULL << dd->ipath_i_rcvavail_shift) |
1296		(1ULL << dd->ipath_i_rcvurg_shift);
1297	if (chk0rcv || (istat & kportrbits)) {
1298		istat &= ~kportrbits;
1299		ipath_kreceive(dd->ipath_pd[0]);
1300	}
1301
1302	if (istat & ((dd->ipath_i_rcvavail_mask << dd->ipath_i_rcvavail_shift) |
1303		     (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
1304		handle_urcv(dd, istat);
1305
1306	if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
1307		handle_sdma_intr(dd, istat);
1308
1309	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
1310		unsigned long flags;
1311
1312		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1313		dd->ipath_sendctrl &= ~INFINIPATH_S_PIOINTBUFAVAIL;
1314		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1315				 dd->ipath_sendctrl);
1316		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1317		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1318
1319		if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1320			handle_layer_pioavail(dd);
1321		else
1322			ipath_dbg("unexpected BUFAVAIL intr\n");
1323	}
1324
1325	ret = IRQ_HANDLED;
1326
1327bail:
1328	return ret;
1329}
1330