ipath_intr.c revision d562a5ae69bd5643d777788117d02acb22fab347
1/*
2 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/pci.h>
34
35#include "ipath_kernel.h"
36#include "ips_common.h"
37#include "ipath_layer.h"
38
39#define E_SUM_PKTERRS \
40	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
41	 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
42	 INFINIPATH_E_RLONGPKTLEN | INFINIPATH_E_RSHORTPKTLEN | \
43	 INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RMINPKTLEN | \
44	 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
45	 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
46
47#define E_SUM_ERRS \
48	(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
49	 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
50	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SUNSUPVL | \
51	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
52	 INFINIPATH_E_INVALIDADDR)
53
54static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
55{
56	unsigned long sbuf[4];
57	u64 ignore_this_time = 0;
58	u32 piobcnt;
59
60	/* if possible that sendbuffererror could be valid */
61	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
62	/* read these before writing errorclear */
63	sbuf[0] = ipath_read_kreg64(
64		dd, dd->ipath_kregs->kr_sendbuffererror);
65	sbuf[1] = ipath_read_kreg64(
66		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
67	if (piobcnt > 128) {
68		sbuf[2] = ipath_read_kreg64(
69			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
70		sbuf[3] = ipath_read_kreg64(
71			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
72	}
73
74	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
75		int i;
76
77		ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
78		if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
79			printk("%lx %lx ", sbuf[2], sbuf[3]);
80		for (i = 0; i < piobcnt; i++) {
81			if (test_bit(i, sbuf)) {
82				u32 __iomem *piobuf;
83				if (i < dd->ipath_piobcnt2k)
84					piobuf = (u32 __iomem *)
85						(dd->ipath_pio2kbase +
86						 i * dd->ipath_palign);
87				else
88					piobuf = (u32 __iomem *)
89						(dd->ipath_pio4kbase +
90						 (i - dd->ipath_piobcnt2k) *
91						 dd->ipath_4kalign);
92
93				ipath_cdbg(PKT,
94					   "PIObuf[%u] @%p pbc is %x; ",
95					   i, piobuf, readl(piobuf));
96
97				ipath_disarm_piobufs(dd, i, 1);
98			}
99		}
100		if (ipath_debug & __IPATH_PKTDBG)
101			printk("\n");
102	}
103	if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT |
104		     INFINIPATH_E_SDROPPEDSMPPKT |
105		     INFINIPATH_E_SMINPKTLEN)) &&
106	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
107		/*
108		 * This can happen when SMA is trying to bring the link
109		 * up, but the IB link changes state at the "wrong" time.
110		 * The IB logic then complains that the packet isn't
111		 * valid.  We don't want to confuse people, so we just
112		 * don't print them, except at debug
113		 */
114		ipath_dbg("Ignoring pktsend errors %llx, because not "
115			  "yet active\n", (unsigned long long) errs);
116		ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT |
117			INFINIPATH_E_SDROPPEDSMPPKT |
118			INFINIPATH_E_SMINPKTLEN;
119	}
120
121	return ignore_this_time;
122}
123
124/* return the strings for the most common link states */
125static char *ib_linkstate(u32 linkstate)
126{
127	char *ret;
128
129	switch (linkstate) {
130	case IPATH_IBSTATE_INIT:
131		ret = "Init";
132		break;
133	case IPATH_IBSTATE_ARM:
134		ret = "Arm";
135		break;
136	case IPATH_IBSTATE_ACTIVE:
137		ret = "Active";
138		break;
139	default:
140		ret = "Down";
141	}
142
143	return ret;
144}
145
146static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
147				     ipath_err_t errs, int noprint)
148{
149	u64 val;
150	u32 ltstate, lstate;
151
152	/*
153	 * even if diags are enabled, we want to notice LINKINIT, etc.
154	 * We just don't want to change the LED state, or
155	 * dd->ipath_kregs->kr_ibcctrl
156	 */
157	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
158	lstate = val & IPATH_IBSTATE_MASK;
159	if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
160	    lstate == IPATH_IBSTATE_ACTIVE) {
161		/*
162		 * only print at SMA if there is a change, debug if not
163		 * (sometimes we want to know that, usually not).
164		 */
165		if (lstate == ((unsigned) dd->ipath_lastibcstat
166			       & IPATH_IBSTATE_MASK)) {
167			ipath_dbg("Status change intr but no change (%s)\n",
168				  ib_linkstate(lstate));
169		}
170		else
171			ipath_cdbg(SMA, "Unit %u link state %s, last "
172				   "was %s\n", dd->ipath_unit,
173				   ib_linkstate(lstate),
174				   ib_linkstate((unsigned)
175						dd->ipath_lastibcstat
176						& IPATH_IBSTATE_MASK));
177	}
178	else {
179		lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
180		if (lstate == IPATH_IBSTATE_INIT ||
181		    lstate == IPATH_IBSTATE_ARM ||
182		    lstate == IPATH_IBSTATE_ACTIVE)
183			ipath_cdbg(SMA, "Unit %u link state down"
184				   " (state 0x%x), from %s\n",
185				   dd->ipath_unit,
186				   (u32)val & IPATH_IBSTATE_MASK,
187				   ib_linkstate(lstate));
188		else
189			ipath_cdbg(VERBOSE, "Unit %u link state changed "
190				   "to 0x%x from down (%x)\n",
191				   dd->ipath_unit, (u32) val, lstate);
192	}
193	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
194		INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
195	lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
196		INFINIPATH_IBCS_LINKSTATE_MASK;
197
198	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
199	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
200		u32 last_ltstate;
201
202		/*
203		 * Ignore cycling back and forth from Polling.Active
204		 * to Polling.Quiet while waiting for the other end of
205		 * the link to come up. We will cycle back and forth
206		 * between them if no cable is plugged in,
207		 * the other device is powered off or disabled, etc.
208		 */
209		last_ltstate = (dd->ipath_lastibcstat >>
210				INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
211			& INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
212		if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
213		    || last_ltstate ==
214		    INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
215			if (dd->ipath_ibpollcnt > 40) {
216				dd->ipath_flags |= IPATH_NOCABLE;
217				*dd->ipath_statusp |=
218					IPATH_STATUS_IB_NOCABLE;
219			} else
220				dd->ipath_ibpollcnt++;
221			goto skip_ibchange;
222		}
223	}
224	dd->ipath_ibpollcnt = 0;	/* some state other than 2 or 3 */
225	ipath_stats.sps_iblink++;
226	if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
227		dd->ipath_flags |= IPATH_LINKDOWN;
228		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
229				     | IPATH_LINKACTIVE |
230				     IPATH_LINKARMED);
231		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
232		if (!noprint) {
233			if (((dd->ipath_lastibcstat >>
234			      INFINIPATH_IBCS_LINKSTATE_SHIFT) &
235			     INFINIPATH_IBCS_LINKSTATE_MASK)
236			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
237				/* if from up to down be more vocal */
238				ipath_cdbg(SMA,
239					   "Unit %u link now down (%s)\n",
240					   dd->ipath_unit,
241					   ipath_ibcstatus_str[ltstate]);
242			else
243				ipath_cdbg(VERBOSE, "Unit %u link is "
244					   "down (%s)\n", dd->ipath_unit,
245					   ipath_ibcstatus_str[ltstate]);
246		}
247
248		dd->ipath_f_setextled(dd, lstate, ltstate);
249	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
250		dd->ipath_flags |= IPATH_LINKACTIVE;
251		dd->ipath_flags &=
252			~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
253			  IPATH_LINKARMED | IPATH_NOCABLE);
254		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
255		*dd->ipath_statusp |=
256			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
257		dd->ipath_f_setextled(dd, lstate, ltstate);
258
259		__ipath_layer_intr(dd, IPATH_LAYER_INT_IF_UP);
260	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
261		/*
262		 * set INIT and DOWN.  Down is checked by most of the other
263		 * code, but INIT is useful to know in a few places.
264		 */
265		dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
266		dd->ipath_flags &=
267			~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
268			  | IPATH_NOCABLE);
269		*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
270					| IPATH_STATUS_IB_READY);
271		dd->ipath_f_setextled(dd, lstate, ltstate);
272	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
273		dd->ipath_flags |= IPATH_LINKARMED;
274		dd->ipath_flags &=
275			~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
276			  IPATH_LINKACTIVE | IPATH_NOCABLE);
277		*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
278					| IPATH_STATUS_IB_READY);
279		dd->ipath_f_setextled(dd, lstate, ltstate);
280	} else {
281		if (!noprint)
282			ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
283				  dd->ipath_unit,
284				  ipath_ibcstatus_str[ltstate], ltstate);
285	}
286skip_ibchange:
287	dd->ipath_lastibcstat = val;
288}
289
290static void handle_supp_msgs(struct ipath_devdata *dd,
291			     unsigned supp_msgs, char msg[512])
292{
293	/*
294	 * Print the message unless it's ibc status change only, which
295	 * happens so often we never want to count it.
296	 */
297	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
298		ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror &
299				 ~INFINIPATH_E_IBSTATUSCHANGED);
300		if (dd->ipath_lasterror &
301		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
302			ipath_dev_err(dd, "Suppressed %u messages for "
303				      "fast-repeating errors (%s) (%llx)\n",
304				      supp_msgs, msg,
305				      (unsigned long long)
306				      dd->ipath_lasterror);
307		else {
308			/*
309			 * rcvegrfull and rcvhdrqfull are "normal", for some
310			 * types of processes (mostly benchmarks) that send
311			 * huge numbers of messages, while not processing
312			 * them. So only complain about these at debug
313			 * level.
314			 */
315			ipath_dbg("Suppressed %u messages for %s\n",
316				  supp_msgs, msg);
317		}
318	}
319}
320
321static unsigned handle_frequent_errors(struct ipath_devdata *dd,
322				       ipath_err_t errs, char msg[512],
323				       int *noprint)
324{
325	unsigned long nc;
326	static unsigned long nextmsg_time;
327	static unsigned nmsgs, supp_msgs;
328
329	/*
330	 * Throttle back "fast" messages to no more than 10 per 5 seconds.
331	 * This isn't perfect, but it's a reasonable heuristic. If we get
332	 * more than 10, give a 6x longer delay.
333	 */
334	nc = jiffies;
335	if (nmsgs > 10) {
336		if (time_before(nc, nextmsg_time)) {
337			*noprint = 1;
338			if (!supp_msgs++)
339				nextmsg_time = nc + HZ * 3;
340		}
341		else if (supp_msgs) {
342			handle_supp_msgs(dd, supp_msgs, msg);
343			supp_msgs = 0;
344			nmsgs = 0;
345		}
346	}
347	else if (!nmsgs++ || time_after(nc, nextmsg_time))
348		nextmsg_time = nc + HZ / 2;
349
350	return supp_msgs;
351}
352
353static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
354{
355	char msg[512];
356	u64 ignore_this_time = 0;
357	int i;
358	int chkerrpkts = 0, noprint = 0;
359	unsigned supp_msgs;
360
361	supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
362
363	/*
364	 * don't report errors that are masked (includes those always
365	 * ignored)
366	 */
367	errs &= ~dd->ipath_maskederrs;
368
369	/* do these first, they are most important */
370	if (errs & INFINIPATH_E_HARDWARE) {
371		/* reuse same msg buf */
372		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
373	}
374
375	if (!noprint && (errs & ~infinipath_e_bitsextant))
376		ipath_dev_err(dd, "error interrupt with unknown errors "
377			      "%llx set\n", (unsigned long long)
378			      (errs & ~infinipath_e_bitsextant));
379
380	if (errs & E_SUM_ERRS)
381		ignore_this_time = handle_e_sum_errs(dd, errs);
382
383	if (supp_msgs == 250000) {
384		/*
385		 * It's not entirely reasonable assuming that the errors set
386		 * in the last clear period are all responsible for the
387		 * problem, but the alternative is to assume it's the only
388		 * ones on this particular interrupt, which also isn't great
389		 */
390		dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
391		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
392				 ~dd->ipath_maskederrs);
393		ipath_decode_err(msg, sizeof msg,
394				 (dd->ipath_maskederrs & ~dd->
395				  ipath_ignorederrs));
396
397		if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
398		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL))
399			ipath_dev_err(dd, "Disabling error(s) %llx because "
400				      "occuring too frequently (%s)\n",
401				      (unsigned long long)
402				      (dd->ipath_maskederrs &
403				       ~dd->ipath_ignorederrs), msg);
404		else {
405			/*
406			 * rcvegrfull and rcvhdrqfull are "normal",
407			 * for some types of processes (mostly benchmarks)
408			 * that send huge numbers of messages, while not
409			 * processing them.  So only complain about
410			 * these at debug level.
411			 */
412			ipath_dbg("Disabling frequent queue full errors "
413				  "(%s)\n", msg);
414		}
415
416		/*
417		 * Re-enable the masked errors after around 3 minutes.  in
418		 * ipath_get_faststats().  If we have a series of fast
419		 * repeating but different errors, the interval will keep
420		 * stretching out, but that's OK, as that's pretty
421		 * catastrophic.
422		 */
423		dd->ipath_unmasktime = jiffies + HZ * 180;
424	}
425
426	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, errs);
427	if (ignore_this_time)
428		errs &= ~ignore_this_time;
429	if (errs & ~dd->ipath_lasterror) {
430		errs &= ~dd->ipath_lasterror;
431		/* never suppress duplicate hwerrors or ibstatuschange */
432		dd->ipath_lasterror |= errs &
433			~(INFINIPATH_E_HARDWARE |
434			  INFINIPATH_E_IBSTATUSCHANGED);
435	}
436	if (!errs)
437		return;
438
439	if (!noprint)
440		/*
441		 * the ones we mask off are handled specially below or above
442		 */
443		ipath_decode_err(msg, sizeof msg,
444				 errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
445					  INFINIPATH_E_RRCVEGRFULL |
446					  INFINIPATH_E_RRCVHDRFULL |
447					  INFINIPATH_E_HARDWARE));
448	else
449		/* so we don't need if (!noprint) at strlcat's below */
450		*msg = 0;
451
452	if (errs & E_SUM_PKTERRS) {
453		ipath_stats.sps_pkterrs++;
454		chkerrpkts = 1;
455	}
456	if (errs & E_SUM_ERRS)
457		ipath_stats.sps_errs++;
458
459	if (errs & (INFINIPATH_E_RICRC | INFINIPATH_E_RVCRC)) {
460		ipath_stats.sps_crcerrs++;
461		chkerrpkts = 1;
462	}
463
464	/*
465	 * We don't want to print these two as they happen, or we can make
466	 * the situation even worse, because it takes so long to print
467	 * messages to serial consoles.  Kernel ports get printed from
468	 * fast_stats, no more than every 5 seconds, user ports get printed
469	 * on close
470	 */
471	if (errs & INFINIPATH_E_RRCVHDRFULL) {
472		int any;
473		u32 hd, tl;
474		ipath_stats.sps_hdrqfull++;
475		for (any = i = 0; i < dd->ipath_cfgports; i++) {
476			struct ipath_portdata *pd = dd->ipath_pd[i];
477			if (i == 0) {
478				hd = dd->ipath_port0head;
479				tl = (u32) le64_to_cpu(
480					*dd->ipath_hdrqtailptr);
481			} else if (pd && pd->port_cnt &&
482				   pd->port_rcvhdrtail_kvaddr) {
483				/*
484				 * don't report same point multiple times,
485				 * except kernel
486				 */
487				tl = (u32) * pd->port_rcvhdrtail_kvaddr;
488				if (tl == dd->ipath_lastrcvhdrqtails[i])
489					continue;
490				hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
491						       i);
492			} else
493				continue;
494			if (hd == (tl + 1) ||
495			    (!hd && tl == dd->ipath_hdrqlast)) {
496				dd->ipath_lastrcvhdrqtails[i] = tl;
497				pd->port_hdrqfull++;
498				if (i == 0)
499					chkerrpkts = 1;
500			}
501		}
502	}
503	if (errs & INFINIPATH_E_RRCVEGRFULL) {
504		/*
505		 * since this is of less importance and not likely to
506		 * happen without also getting hdrfull, only count
507		 * occurrences; don't check each port (or even the kernel
508		 * vs user)
509		 */
510		ipath_stats.sps_etidfull++;
511		if (dd->ipath_port0head !=
512		    (u32) le64_to_cpu(*dd->ipath_hdrqtailptr))
513			chkerrpkts = 1;
514	}
515
516	/*
517	 * do this before IBSTATUSCHANGED, in case both bits set in a single
518	 * interrupt; we want the STATUSCHANGE to "win", so we do our
519	 * internal copy of state machine correctly
520	 */
521	if (errs & INFINIPATH_E_RIBLOSTLINK) {
522		/*
523		 * force through block below
524		 */
525		errs |= INFINIPATH_E_IBSTATUSCHANGED;
526		ipath_stats.sps_iblink++;
527		dd->ipath_flags |= IPATH_LINKDOWN;
528		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
529				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
530		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
531		if (!noprint) {
532			u64 st = ipath_read_kreg64(
533				dd, dd->ipath_kregs->kr_ibcstatus);
534
535			ipath_dbg("Lost link, link now down (%s)\n",
536				  ipath_ibcstatus_str[st & 0xf]);
537		}
538	}
539	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
540		handle_e_ibstatuschanged(dd, errs, noprint);
541
542	if (errs & INFINIPATH_E_RESET) {
543		if (!noprint)
544			ipath_dev_err(dd, "Got reset, requires re-init "
545				      "(unload and reload driver)\n");
546		dd->ipath_flags &= ~IPATH_INITTED;	/* needs re-init */
547		/* mark as having had error */
548		*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
549		*dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
550	}
551
552	if (!noprint && *msg)
553		ipath_dev_err(dd, "%s error\n", msg);
554	if (dd->ipath_sma_state_wanted & dd->ipath_flags) {
555		ipath_cdbg(VERBOSE, "sma wanted state %x, iflags now %x, "
556			   "waking\n", dd->ipath_sma_state_wanted,
557			   dd->ipath_flags);
558		wake_up_interruptible(&ipath_sma_state_wait);
559	}
560
561	if (chkerrpkts)
562		/* process possible error packets in hdrq */
563		ipath_kreceive(dd);
564}
565
566/* this is separate to allow for better optimization of ipath_intr() */
567
568static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
569{
570	/*
571	 * sometimes happen during driver init and unload, don't want
572	 * to process any interrupts at that point
573	 */
574
575	/* this is just a bandaid, not a fix, if something goes badly
576	 * wrong */
577	if (++*unexpectp > 100) {
578		if (++*unexpectp > 105) {
579			/*
580			 * ok, we must be taking somebody else's interrupts,
581			 * due to a messed up mptable and/or PIRQ table, so
582			 * unregister the interrupt.  We've seen this during
583			 * linuxbios development work, and it may happen in
584			 * the future again.
585			 */
586			if (dd->pcidev && dd->pcidev->irq) {
587				ipath_dev_err(dd, "Now %u unexpected "
588					      "interrupts, unregistering "
589					      "interrupt handler\n",
590					      *unexpectp);
591				ipath_dbg("free_irq of irq %x\n",
592					  dd->pcidev->irq);
593				free_irq(dd->pcidev->irq, dd);
594			}
595		}
596		if (ipath_read_kreg32(dd, dd->ipath_kregs->kr_intmask)) {
597			ipath_dev_err(dd, "%u unexpected interrupts, "
598				      "disabling interrupts completely\n",
599				      *unexpectp);
600			/*
601			 * disable all interrupts, something is very wrong
602			 */
603			ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
604					 0ULL);
605		}
606	} else if (*unexpectp > 1)
607		ipath_dbg("Interrupt when not ready, should not happen, "
608			  "ignoring\n");
609}
610
611static void ipath_bad_regread(struct ipath_devdata *dd)
612{
613	static int allbits;
614
615	/* separate routine, for better optimization of ipath_intr() */
616
617	/*
618	 * We print the message and disable interrupts, in hope of
619	 * having a better chance of debugging the problem.
620	 */
621	ipath_dev_err(dd,
622		      "Read of interrupt status failed (all bits set)\n");
623	if (allbits++) {
624		/* disable all interrupts, something is very wrong */
625		ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
626		if (allbits == 2) {
627			ipath_dev_err(dd, "Still bad interrupt status, "
628				      "unregistering interrupt\n");
629			free_irq(dd->pcidev->irq, dd);
630		} else if (allbits > 2) {
631			if ((allbits % 10000) == 0)
632				printk(".");
633		} else
634			ipath_dev_err(dd, "Disabling interrupts, "
635				      "multiple errors\n");
636	}
637}
638
639static void handle_port_pioavail(struct ipath_devdata *dd)
640{
641	u32 i;
642	/*
643	 * start from port 1, since for now port 0  is never using
644	 * wait_event for PIO
645	 */
646	for (i = 1; dd->ipath_portpiowait && i < dd->ipath_cfgports; i++) {
647		struct ipath_portdata *pd = dd->ipath_pd[i];
648
649		if (pd && pd->port_cnt &&
650		    dd->ipath_portpiowait & (1U << i)) {
651			clear_bit(i, &dd->ipath_portpiowait);
652			if (test_bit(IPATH_PORT_WAITING_PIO,
653				     &pd->port_flag)) {
654				clear_bit(IPATH_PORT_WAITING_PIO,
655					  &pd->port_flag);
656				wake_up_interruptible(&pd->port_wait);
657			}
658		}
659	}
660}
661
662static void handle_layer_pioavail(struct ipath_devdata *dd)
663{
664	int ret;
665
666	ret = __ipath_layer_intr(dd, IPATH_LAYER_INT_SEND_CONTINUE);
667	if (ret > 0)
668		goto set;
669
670	ret = __ipath_verbs_piobufavail(dd);
671	if (ret > 0)
672		goto set;
673
674	return;
675set:
676	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
677	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
678			 dd->ipath_sendctrl);
679}
680
681static void handle_rcv(struct ipath_devdata *dd, u32 istat)
682{
683	u64 portr;
684	int i;
685	int rcvdint = 0;
686
687	portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
688		 infinipath_i_rcvavail_mask)
689		| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
690		   infinipath_i_rcvurg_mask);
691	for (i = 0; i < dd->ipath_cfgports; i++) {
692		struct ipath_portdata *pd = dd->ipath_pd[i];
693		if (portr & (1 << i) && pd &&
694		    pd->port_cnt) {
695			if (i == 0)
696				ipath_kreceive(dd);
697			else if (test_bit(IPATH_PORT_WAITING_RCV,
698					  &pd->port_flag)) {
699				int rcbit;
700				clear_bit(IPATH_PORT_WAITING_RCV,
701					  &pd->port_flag);
702				rcbit = i + INFINIPATH_R_INTRAVAIL_SHIFT;
703				clear_bit(1UL << rcbit, &dd->ipath_rcvctrl);
704				wake_up_interruptible(&pd->port_wait);
705				rcvdint = 1;
706			}
707		}
708	}
709	if (rcvdint) {
710		/* only want to take one interrupt, so turn off the rcv
711		 * interrupt for all the ports that we did the wakeup on
712		 * (but never for kernel port)
713		 */
714		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
715				 dd->ipath_rcvctrl);
716	}
717}
718
719irqreturn_t ipath_intr(int irq, void *data, struct pt_regs *regs)
720{
721	struct ipath_devdata *dd = data;
722	u32 istat;
723	ipath_err_t estat = 0;
724	static unsigned unexpected = 0;
725	irqreturn_t ret;
726
727	if(!(dd->ipath_flags & IPATH_PRESENT)) {
728		/* this is mostly so we don't try to touch the chip while
729		 * it is being reset */
730		/*
731		 * This return value is perhaps odd, but we do not want the
732		 * interrupt core code to remove our interrupt handler
733		 * because we don't appear to be handling an interrupt
734		 * during a chip reset.
735		 */
736		return IRQ_HANDLED;
737	}
738
739	istat = ipath_read_kreg32(dd, dd->ipath_kregs->kr_intstatus);
740	if (unlikely(!istat)) {
741		ipath_stats.sps_nullintr++;
742		ret = IRQ_NONE; /* not our interrupt, or already handled */
743		goto bail;
744	}
745	if (unlikely(istat == -1)) {
746		ipath_bad_regread(dd);
747		/* don't know if it was our interrupt or not */
748		ret = IRQ_NONE;
749		goto bail;
750	}
751
752	ipath_stats.sps_ints++;
753
754	/*
755	 * this needs to be flags&initted, not statusp, so we keep
756	 * taking interrupts even after link goes down, etc.
757	 * Also, we *must* clear the interrupt at some point, or we won't
758	 * take it again, which can be real bad for errors, etc...
759	 */
760
761	if (!(dd->ipath_flags & IPATH_INITTED)) {
762		ipath_bad_intr(dd, &unexpected);
763		ret = IRQ_NONE;
764		goto bail;
765	}
766	if (unexpected)
767		unexpected = 0;
768
769	ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
770
771	if (istat & ~infinipath_i_bitsextant)
772		ipath_dev_err(dd,
773			      "interrupt with unknown interrupts %x set\n",
774			      istat & (u32) ~ infinipath_i_bitsextant);
775
776	if (istat & INFINIPATH_I_ERROR) {
777		ipath_stats.sps_errints++;
778		estat = ipath_read_kreg64(dd,
779					  dd->ipath_kregs->kr_errorstatus);
780		if (!estat)
781			dev_info(&dd->pcidev->dev, "error interrupt (%x), "
782				 "but no error bits set!\n", istat);
783		else if (estat == -1LL)
784			/*
785			 * should we try clearing all, or hope next read
786			 * works?
787			 */
788			ipath_dev_err(dd, "Read of error status failed "
789				      "(all bits set); ignoring\n");
790		else
791			handle_errors(dd, estat);
792	}
793
794	if (istat & INFINIPATH_I_GPIO) {
795		if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
796			u32 gpiostatus;
797			gpiostatus = ipath_read_kreg32(
798				dd, dd->ipath_kregs->kr_gpio_status);
799			ipath_dbg("Unexpected GPIO interrupt bits %x\n",
800				  gpiostatus);
801			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
802					 gpiostatus);
803		}
804		else {
805			/* Clear GPIO status bit 2 */
806			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
807					 (u64) (1 << 2));
808
809			/*
810			 * Packets are available in the port 0 rcv queue.
811			 * Eventually this needs to be generalized to check
812			 * IPATH_GPIO_INTR, and the specific GPIO bit, if
813			 * GPIO interrupts are used for anything else.
814			 */
815			ipath_kreceive(dd);
816		}
817	}
818
819	/*
820	 * clear the ones we will deal with on this round
821	 * We clear it early, mostly for receive interrupts, so we
822	 * know the chip will have seen this by the time we process
823	 * the queue, and will re-interrupt if necessary.  The processor
824	 * itself won't take the interrupt again until we return.
825	 */
826	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, istat);
827
828	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
829		clear_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
830		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
831				 dd->ipath_sendctrl);
832
833		if (dd->ipath_portpiowait)
834			handle_port_pioavail(dd);
835
836		handle_layer_pioavail(dd);
837	}
838
839	/*
840	 * we check for both transition from empty to non-empty, and urgent
841	 * packets (those with the interrupt bit set in the header)
842	 */
843
844	if (istat & ((infinipath_i_rcvavail_mask <<
845		      INFINIPATH_I_RCVAVAIL_SHIFT)
846		     | (infinipath_i_rcvurg_mask <<
847			INFINIPATH_I_RCVURG_SHIFT)))
848		handle_rcv(dd, istat);
849
850	ret = IRQ_HANDLED;
851
852bail:
853	return ret;
854}
855