1/*
2 * Copyright (c) 2010 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35/*
36 * This file contains support for diagnostic functions.  It is accessed by
37 * opening the qib_diag device, normally minor number 129.  Diagnostic use
38 * of the QLogic_IB chip may render the chip or board unusable until the
39 * driver is unloaded, or in some cases, until the system is rebooted.
40 *
41 * Accesses to the chip through this interface are not similar to going
42 * through the /sys/bus/pci resource mmap interface.
43 */
44
45#include <linux/io.h>
46#include <linux/pci.h>
47#include <linux/poll.h>
48#include <linux/vmalloc.h>
49#include <linux/export.h>
50#include <linux/fs.h>
51#include <linux/uaccess.h>
52
53#include "qib.h"
54#include "qib_common.h"
55
56/*
57 * Each client that opens the diag device must read then write
58 * offset 0, to prevent lossage from random cat or od. diag_state
59 * sequences this "handshake".
60 */
61enum diag_state { UNUSED = 0, OPENED, INIT, READY };
62
63/* State for an individual client. PID so children cannot abuse handshake */
64static struct qib_diag_client {
65	struct qib_diag_client *next;
66	struct qib_devdata *dd;
67	pid_t pid;
68	enum diag_state state;
69} *client_pool;
70
71/*
72 * Get a client struct. Recycled if possible, else kmalloc.
73 * Must be called with qib_mutex held
74 */
75static struct qib_diag_client *get_client(struct qib_devdata *dd)
76{
77	struct qib_diag_client *dc;
78
79	dc = client_pool;
80	if (dc)
81		/* got from pool remove it and use */
82		client_pool = dc->next;
83	else
84		/* None in pool, alloc and init */
85		dc = kmalloc(sizeof *dc, GFP_KERNEL);
86
87	if (dc) {
88		dc->next = NULL;
89		dc->dd = dd;
90		dc->pid = current->pid;
91		dc->state = OPENED;
92	}
93	return dc;
94}
95
96/*
97 * Return to pool. Must be called with qib_mutex held
98 */
99static void return_client(struct qib_diag_client *dc)
100{
101	struct qib_devdata *dd = dc->dd;
102	struct qib_diag_client *tdc, *rdc;
103
104	rdc = NULL;
105	if (dc == dd->diag_client) {
106		dd->diag_client = dc->next;
107		rdc = dc;
108	} else {
109		tdc = dc->dd->diag_client;
110		while (tdc) {
111			if (dc == tdc->next) {
112				tdc->next = dc->next;
113				rdc = dc;
114				break;
115			}
116			tdc = tdc->next;
117		}
118	}
119	if (rdc) {
120		rdc->state = UNUSED;
121		rdc->dd = NULL;
122		rdc->pid = 0;
123		rdc->next = client_pool;
124		client_pool = rdc;
125	}
126}
127
128static int qib_diag_open(struct inode *in, struct file *fp);
129static int qib_diag_release(struct inode *in, struct file *fp);
130static ssize_t qib_diag_read(struct file *fp, char __user *data,
131			     size_t count, loff_t *off);
132static ssize_t qib_diag_write(struct file *fp, const char __user *data,
133			      size_t count, loff_t *off);
134
135static const struct file_operations diag_file_ops = {
136	.owner = THIS_MODULE,
137	.write = qib_diag_write,
138	.read = qib_diag_read,
139	.open = qib_diag_open,
140	.release = qib_diag_release,
141	.llseek = default_llseek,
142};
143
144static atomic_t diagpkt_count = ATOMIC_INIT(0);
145static struct cdev *diagpkt_cdev;
146static struct device *diagpkt_device;
147
148static ssize_t qib_diagpkt_write(struct file *fp, const char __user *data,
149				 size_t count, loff_t *off);
150
151static const struct file_operations diagpkt_file_ops = {
152	.owner = THIS_MODULE,
153	.write = qib_diagpkt_write,
154	.llseek = noop_llseek,
155};
156
157int qib_diag_add(struct qib_devdata *dd)
158{
159	char name[16];
160	int ret = 0;
161
162	if (atomic_inc_return(&diagpkt_count) == 1) {
163		ret = qib_cdev_init(QIB_DIAGPKT_MINOR, "ipath_diagpkt",
164				    &diagpkt_file_ops, &diagpkt_cdev,
165				    &diagpkt_device);
166		if (ret)
167			goto done;
168	}
169
170	snprintf(name, sizeof(name), "ipath_diag%d", dd->unit);
171	ret = qib_cdev_init(QIB_DIAG_MINOR_BASE + dd->unit, name,
172			    &diag_file_ops, &dd->diag_cdev,
173			    &dd->diag_device);
174done:
175	return ret;
176}
177
178static void qib_unregister_observers(struct qib_devdata *dd);
179
180void qib_diag_remove(struct qib_devdata *dd)
181{
182	struct qib_diag_client *dc;
183
184	if (atomic_dec_and_test(&diagpkt_count))
185		qib_cdev_cleanup(&diagpkt_cdev, &diagpkt_device);
186
187	qib_cdev_cleanup(&dd->diag_cdev, &dd->diag_device);
188
189	/*
190	 * Return all diag_clients of this device. There should be none,
191	 * as we are "guaranteed" that no clients are still open
192	 */
193	while (dd->diag_client)
194		return_client(dd->diag_client);
195
196	/* Now clean up all unused client structs */
197	while (client_pool) {
198		dc = client_pool;
199		client_pool = dc->next;
200		kfree(dc);
201	}
202	/* Clean up observer list */
203	qib_unregister_observers(dd);
204}
205
206/* qib_remap_ioaddr32 - remap an offset into chip address space to __iomem *
207 *
208 * @dd: the qlogic_ib device
209 * @offs: the offset in chip-space
210 * @cntp: Pointer to max (byte) count for transfer starting at offset
211 * This returns a u32 __iomem * so it can be used for both 64 and 32-bit
212 * mapping. It is needed because with the use of PAT for control of
213 * write-combining, the logically contiguous address-space of the chip
214 * may be split into virtually non-contiguous spaces, with different
215 * attributes, which are them mapped to contiguous physical space
216 * based from the first BAR.
217 *
218 * The code below makes the same assumptions as were made in
219 * init_chip_wc_pat() (qib_init.c), copied here:
220 * Assumes chip address space looks like:
221 *		- kregs + sregs + cregs + uregs (in any order)
222 *		- piobufs (2K and 4K bufs in either order)
223 *	or:
224 *		- kregs + sregs + cregs (in any order)
225 *		- piobufs (2K and 4K bufs in either order)
226 *		- uregs
227 *
228 * If cntp is non-NULL, returns how many bytes from offset can be accessed
229 * Returns 0 if the offset is not mapped.
230 */
231static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset,
232				       u32 *cntp)
233{
234	u32 kreglen;
235	u32 snd_bottom, snd_lim = 0;
236	u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase;
237	u32 __iomem *map = NULL;
238	u32 cnt = 0;
239	u32 tot4k, offs4k;
240
241	/* First, simplest case, offset is within the first map. */
242	kreglen = (dd->kregend - dd->kregbase) * sizeof(u64);
243	if (offset < kreglen) {
244		map = krb32 + (offset / sizeof(u32));
245		cnt = kreglen - offset;
246		goto mapped;
247	}
248
249	/*
250	 * Next check for user regs, the next most common case,
251	 * and a cheap check because if they are not in the first map
252	 * they are last in chip.
253	 */
254	if (dd->userbase) {
255		/* If user regs mapped, they are after send, so set limit. */
256		u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase;
257		if (!dd->piovl15base)
258			snd_lim = dd->uregbase;
259		krb32 = (u32 __iomem *)dd->userbase;
260		if (offset >= dd->uregbase && offset < ulim) {
261			map = krb32 + (offset - dd->uregbase) / sizeof(u32);
262			cnt = ulim - offset;
263			goto mapped;
264		}
265	}
266
267	/*
268	 * Lastly, check for offset within Send Buffers.
269	 * This is gnarly because struct devdata is deliberately vague
270	 * about things like 7322 VL15 buffers, and we are not in
271	 * chip-specific code here, so should not make many assumptions.
272	 * The one we _do_ make is that the only chip that has more sndbufs
273	 * than we admit is the 7322, and it has userregs above that, so
274	 * we know the snd_lim.
275	 */
276	/* Assume 2K buffers are first. */
277	snd_bottom = dd->pio2k_bufbase;
278	if (snd_lim == 0) {
279		u32 tot2k = dd->piobcnt2k * ALIGN(dd->piosize2k, dd->palign);
280		snd_lim = snd_bottom + tot2k;
281	}
282	/* If 4k buffers exist, account for them by bumping
283	 * appropriate limit.
284	 */
285	tot4k = dd->piobcnt4k * dd->align4k;
286	offs4k = dd->piobufbase >> 32;
287	if (dd->piobcnt4k) {
288		if (snd_bottom > offs4k)
289			snd_bottom = offs4k;
290		else {
291			/* 4k above 2k. Bump snd_lim, if needed*/
292			if (!dd->userbase || dd->piovl15base)
293				snd_lim = offs4k + tot4k;
294		}
295	}
296	/*
297	 * Judgement call: can we ignore the space between SendBuffs and
298	 * UserRegs, where we would like to see vl15 buffs, but not more?
299	 */
300	if (offset >= snd_bottom && offset < snd_lim) {
301		offset -= snd_bottom;
302		map = (u32 __iomem *)dd->piobase + (offset / sizeof(u32));
303		cnt = snd_lim - offset;
304	}
305
306	if (!map && offs4k && dd->piovl15base) {
307		snd_lim = offs4k + tot4k + 2 * dd->align4k;
308		if (offset >= (offs4k + tot4k) && offset < snd_lim) {
309			map = (u32 __iomem *)dd->piovl15base +
310				((offset - (offs4k + tot4k)) / sizeof(u32));
311			cnt = snd_lim - offset;
312		}
313	}
314
315mapped:
316	if (cntp)
317		*cntp = cnt;
318	return map;
319}
320
321/*
322 * qib_read_umem64 - read a 64-bit quantity from the chip into user space
323 * @dd: the qlogic_ib device
324 * @uaddr: the location to store the data in user memory
325 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
326 * @count: number of bytes to copy (multiple of 32 bits)
327 *
328 * This function also localizes all chip memory accesses.
329 * The copy should be written such that we read full cacheline packets
330 * from the chip.  This is usually used for a single qword
331 *
332 * NOTE:  This assumes the chip address is 64-bit aligned.
333 */
334static int qib_read_umem64(struct qib_devdata *dd, void __user *uaddr,
335			   u32 regoffs, size_t count)
336{
337	const u64 __iomem *reg_addr;
338	const u64 __iomem *reg_end;
339	u32 limit;
340	int ret;
341
342	reg_addr = (const u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
343	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
344		ret = -EINVAL;
345		goto bail;
346	}
347	if (count >= limit)
348		count = limit;
349	reg_end = reg_addr + (count / sizeof(u64));
350
351	/* not very efficient, but it works for now */
352	while (reg_addr < reg_end) {
353		u64 data = readq(reg_addr);
354
355		if (copy_to_user(uaddr, &data, sizeof(u64))) {
356			ret = -EFAULT;
357			goto bail;
358		}
359		reg_addr++;
360		uaddr += sizeof(u64);
361	}
362	ret = 0;
363bail:
364	return ret;
365}
366
367/*
368 * qib_write_umem64 - write a 64-bit quantity to the chip from user space
369 * @dd: the qlogic_ib device
370 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
371 * @uaddr: the source of the data in user memory
372 * @count: the number of bytes to copy (multiple of 32 bits)
373 *
374 * This is usually used for a single qword
375 * NOTE:  This assumes the chip address is 64-bit aligned.
376 */
377
378static int qib_write_umem64(struct qib_devdata *dd, u32 regoffs,
379			    const void __user *uaddr, size_t count)
380{
381	u64 __iomem *reg_addr;
382	const u64 __iomem *reg_end;
383	u32 limit;
384	int ret;
385
386	reg_addr = (u64 __iomem *)qib_remap_ioaddr32(dd, regoffs, &limit);
387	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
388		ret = -EINVAL;
389		goto bail;
390	}
391	if (count >= limit)
392		count = limit;
393	reg_end = reg_addr + (count / sizeof(u64));
394
395	/* not very efficient, but it works for now */
396	while (reg_addr < reg_end) {
397		u64 data;
398		if (copy_from_user(&data, uaddr, sizeof(data))) {
399			ret = -EFAULT;
400			goto bail;
401		}
402		writeq(data, reg_addr);
403
404		reg_addr++;
405		uaddr += sizeof(u64);
406	}
407	ret = 0;
408bail:
409	return ret;
410}
411
412/*
413 * qib_read_umem32 - read a 32-bit quantity from the chip into user space
414 * @dd: the qlogic_ib device
415 * @uaddr: the location to store the data in user memory
416 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
417 * @count: number of bytes to copy
418 *
419 * read 32 bit values, not 64 bit; for memories that only
420 * support 32 bit reads; usually a single dword.
421 */
422static int qib_read_umem32(struct qib_devdata *dd, void __user *uaddr,
423			   u32 regoffs, size_t count)
424{
425	const u32 __iomem *reg_addr;
426	const u32 __iomem *reg_end;
427	u32 limit;
428	int ret;
429
430	reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
431	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
432		ret = -EINVAL;
433		goto bail;
434	}
435	if (count >= limit)
436		count = limit;
437	reg_end = reg_addr + (count / sizeof(u32));
438
439	/* not very efficient, but it works for now */
440	while (reg_addr < reg_end) {
441		u32 data = readl(reg_addr);
442
443		if (copy_to_user(uaddr, &data, sizeof(data))) {
444			ret = -EFAULT;
445			goto bail;
446		}
447
448		reg_addr++;
449		uaddr += sizeof(u32);
450
451	}
452	ret = 0;
453bail:
454	return ret;
455}
456
457/*
458 * qib_write_umem32 - write a 32-bit quantity to the chip from user space
459 * @dd: the qlogic_ib device
460 * @regoffs: the offset from BAR0 (_NOT_ full pointer, anymore)
461 * @uaddr: the source of the data in user memory
462 * @count: number of bytes to copy
463 *
464 * write 32 bit values, not 64 bit; for memories that only
465 * support 32 bit write; usually a single dword.
466 */
467
468static int qib_write_umem32(struct qib_devdata *dd, u32 regoffs,
469			    const void __user *uaddr, size_t count)
470{
471	u32 __iomem *reg_addr;
472	const u32 __iomem *reg_end;
473	u32 limit;
474	int ret;
475
476	reg_addr = qib_remap_ioaddr32(dd, regoffs, &limit);
477	if (reg_addr == NULL || limit == 0 || !(dd->flags & QIB_PRESENT)) {
478		ret = -EINVAL;
479		goto bail;
480	}
481	if (count >= limit)
482		count = limit;
483	reg_end = reg_addr + (count / sizeof(u32));
484
485	while (reg_addr < reg_end) {
486		u32 data;
487
488		if (copy_from_user(&data, uaddr, sizeof(data))) {
489			ret = -EFAULT;
490			goto bail;
491		}
492		writel(data, reg_addr);
493
494		reg_addr++;
495		uaddr += sizeof(u32);
496	}
497	ret = 0;
498bail:
499	return ret;
500}
501
502static int qib_diag_open(struct inode *in, struct file *fp)
503{
504	int unit = iminor(in) - QIB_DIAG_MINOR_BASE;
505	struct qib_devdata *dd;
506	struct qib_diag_client *dc;
507	int ret;
508
509	mutex_lock(&qib_mutex);
510
511	dd = qib_lookup(unit);
512
513	if (dd == NULL || !(dd->flags & QIB_PRESENT) ||
514	    !dd->kregbase) {
515		ret = -ENODEV;
516		goto bail;
517	}
518
519	dc = get_client(dd);
520	if (!dc) {
521		ret = -ENOMEM;
522		goto bail;
523	}
524	dc->next = dd->diag_client;
525	dd->diag_client = dc;
526	fp->private_data = dc;
527	ret = 0;
528bail:
529	mutex_unlock(&qib_mutex);
530
531	return ret;
532}
533
534/**
535 * qib_diagpkt_write - write an IB packet
536 * @fp: the diag data device file pointer
537 * @data: qib_diag_pkt structure saying where to get the packet
538 * @count: size of data to write
539 * @off: unused by this code
540 */
541static ssize_t qib_diagpkt_write(struct file *fp,
542				 const char __user *data,
543				 size_t count, loff_t *off)
544{
545	u32 __iomem *piobuf;
546	u32 plen, clen, pbufn;
547	struct qib_diag_xpkt dp;
548	u32 *tmpbuf = NULL;
549	struct qib_devdata *dd;
550	struct qib_pportdata *ppd;
551	ssize_t ret = 0;
552
553	if (count != sizeof(dp)) {
554		ret = -EINVAL;
555		goto bail;
556	}
557	if (copy_from_user(&dp, data, sizeof(dp))) {
558		ret = -EFAULT;
559		goto bail;
560	}
561
562	dd = qib_lookup(dp.unit);
563	if (!dd || !(dd->flags & QIB_PRESENT) || !dd->kregbase) {
564		ret = -ENODEV;
565		goto bail;
566	}
567	if (!(dd->flags & QIB_INITTED)) {
568		/* no hardware, freeze, etc. */
569		ret = -ENODEV;
570		goto bail;
571	}
572
573	if (dp.version != _DIAG_XPKT_VERS) {
574		qib_dev_err(dd, "Invalid version %u for diagpkt_write\n",
575			    dp.version);
576		ret = -EINVAL;
577		goto bail;
578	}
579	/* send count must be an exact number of dwords */
580	if (dp.len & 3) {
581		ret = -EINVAL;
582		goto bail;
583	}
584	if (!dp.port || dp.port > dd->num_pports) {
585		ret = -EINVAL;
586		goto bail;
587	}
588	ppd = &dd->pport[dp.port - 1];
589
590	/* need total length before first word written */
591	/* +1 word is for the qword padding */
592	plen = sizeof(u32) + dp.len;
593	clen = dp.len >> 2;
594
595	if ((plen + 4) > ppd->ibmaxlen) {
596		ret = -EINVAL;
597		goto bail;      /* before writing pbc */
598	}
599	tmpbuf = vmalloc(plen);
600	if (!tmpbuf) {
601		qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, "
602			 "failing\n");
603		ret = -ENOMEM;
604		goto bail;
605	}
606
607	if (copy_from_user(tmpbuf,
608			   (const void __user *) (unsigned long) dp.data,
609			   dp.len)) {
610		ret = -EFAULT;
611		goto bail;
612	}
613
614	plen >>= 2;             /* in dwords */
615
616	if (dp.pbc_wd == 0)
617		dp.pbc_wd = plen;
618
619	piobuf = dd->f_getsendbuf(ppd, dp.pbc_wd, &pbufn);
620	if (!piobuf) {
621		ret = -EBUSY;
622		goto bail;
623	}
624	/* disarm it just to be extra sure */
625	dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(pbufn));
626
627	/* disable header check on pbufn for this packet */
628	dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_DIS1, NULL);
629
630	writeq(dp.pbc_wd, piobuf);
631	/*
632	 * Copy all but the trigger word, then flush, so it's written
633	 * to chip before trigger word, then write trigger word, then
634	 * flush again, so packet is sent.
635	 */
636	if (dd->flags & QIB_PIO_FLUSH_WC) {
637		qib_flush_wc();
638		qib_pio_copy(piobuf + 2, tmpbuf, clen - 1);
639		qib_flush_wc();
640		__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
641	} else
642		qib_pio_copy(piobuf + 2, tmpbuf, clen);
643
644	if (dd->flags & QIB_USE_SPCL_TRIG) {
645		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
646
647		qib_flush_wc();
648		__raw_writel(0xaebecede, piobuf + spcl_off);
649	}
650
651	/*
652	 * Ensure buffer is written to the chip, then re-enable
653	 * header checks (if supported by chip).  The txchk
654	 * code will ensure seen by chip before returning.
655	 */
656	qib_flush_wc();
657	qib_sendbuf_done(dd, pbufn);
658	dd->f_txchk_change(dd, pbufn, 1, TXCHK_CHG_TYPE_ENAB1, NULL);
659
660	ret = sizeof(dp);
661
662bail:
663	vfree(tmpbuf);
664	return ret;
665}
666
667static int qib_diag_release(struct inode *in, struct file *fp)
668{
669	mutex_lock(&qib_mutex);
670	return_client(fp->private_data);
671	fp->private_data = NULL;
672	mutex_unlock(&qib_mutex);
673	return 0;
674}
675
676/*
677 * Chip-specific code calls to register its interest in
678 * a specific range.
679 */
680struct diag_observer_list_elt {
681	struct diag_observer_list_elt *next;
682	const struct diag_observer *op;
683};
684
685int qib_register_observer(struct qib_devdata *dd,
686			  const struct diag_observer *op)
687{
688	struct diag_observer_list_elt *olp;
689	int ret = -EINVAL;
690
691	if (!dd || !op)
692		goto bail;
693	ret = -ENOMEM;
694	olp = vmalloc(sizeof *olp);
695	if (!olp) {
696		printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n");
697		goto bail;
698	}
699	if (olp) {
700		unsigned long flags;
701
702		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
703		olp->op = op;
704		olp->next = dd->diag_observer_list;
705		dd->diag_observer_list = olp;
706		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
707		ret = 0;
708	}
709bail:
710	return ret;
711}
712
713/* Remove all registered observers when device is closed */
714static void qib_unregister_observers(struct qib_devdata *dd)
715{
716	struct diag_observer_list_elt *olp;
717	unsigned long flags;
718
719	spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
720	olp = dd->diag_observer_list;
721	while (olp) {
722		/* Pop one observer, let go of lock */
723		dd->diag_observer_list = olp->next;
724		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
725		vfree(olp);
726		/* try again. */
727		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
728		olp = dd->diag_observer_list;
729	}
730	spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
731}
732
733/*
734 * Find the observer, if any, for the specified address. Initial implementation
735 * is simple stack of observers. This must be called with diag transaction
736 * lock held.
737 */
738static const struct diag_observer *diag_get_observer(struct qib_devdata *dd,
739						     u32 addr)
740{
741	struct diag_observer_list_elt *olp;
742	const struct diag_observer *op = NULL;
743
744	olp = dd->diag_observer_list;
745	while (olp) {
746		op = olp->op;
747		if (addr >= op->bottom && addr <= op->top)
748			break;
749		olp = olp->next;
750	}
751	if (!olp)
752		op = NULL;
753
754	return op;
755}
756
757static ssize_t qib_diag_read(struct file *fp, char __user *data,
758			     size_t count, loff_t *off)
759{
760	struct qib_diag_client *dc = fp->private_data;
761	struct qib_devdata *dd = dc->dd;
762	void __iomem *kreg_base;
763	ssize_t ret;
764
765	if (dc->pid != current->pid) {
766		ret = -EPERM;
767		goto bail;
768	}
769
770	kreg_base = dd->kregbase;
771
772	if (count == 0)
773		ret = 0;
774	else if ((count % 4) || (*off % 4))
775		/* address or length is not 32-bit aligned, hence invalid */
776		ret = -EINVAL;
777	else if (dc->state < READY && (*off || count != 8))
778		ret = -EINVAL;  /* prevent cat /dev/qib_diag* */
779	else {
780		unsigned long flags;
781		u64 data64 = 0;
782		int use_32;
783		const struct diag_observer *op;
784
785		use_32 = (count % 8) || (*off % 8);
786		ret = -1;
787		spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
788		/*
789		 * Check for observer on this address range.
790		 * we only support a single 32 or 64-bit read
791		 * via observer, currently.
792		 */
793		op = diag_get_observer(dd, *off);
794		if (op) {
795			u32 offset = *off;
796			ret = op->hook(dd, op, offset, &data64, 0, use_32);
797		}
798		/*
799		 * We need to release lock before any copy_to_user(),
800		 * whether implicit in qib_read_umem* or explicit below.
801		 */
802		spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
803		if (!op) {
804			if (use_32)
805				/*
806				 * Address or length is not 64-bit aligned;
807				 * do 32-bit rd
808				 */
809				ret = qib_read_umem32(dd, data, (u32) *off,
810						      count);
811			else
812				ret = qib_read_umem64(dd, data, (u32) *off,
813						      count);
814		} else if (ret == count) {
815			/* Below finishes case where observer existed */
816			ret = copy_to_user(data, &data64, use_32 ?
817					   sizeof(u32) : sizeof(u64));
818			if (ret)
819				ret = -EFAULT;
820		}
821	}
822
823	if (ret >= 0) {
824		*off += count;
825		ret = count;
826		if (dc->state == OPENED)
827			dc->state = INIT;
828	}
829bail:
830	return ret;
831}
832
833static ssize_t qib_diag_write(struct file *fp, const char __user *data,
834			      size_t count, loff_t *off)
835{
836	struct qib_diag_client *dc = fp->private_data;
837	struct qib_devdata *dd = dc->dd;
838	void __iomem *kreg_base;
839	ssize_t ret;
840
841	if (dc->pid != current->pid) {
842		ret = -EPERM;
843		goto bail;
844	}
845
846	kreg_base = dd->kregbase;
847
848	if (count == 0)
849		ret = 0;
850	else if ((count % 4) || (*off % 4))
851		/* address or length is not 32-bit aligned, hence invalid */
852		ret = -EINVAL;
853	else if (dc->state < READY &&
854		((*off || count != 8) || dc->state != INIT))
855		/* No writes except second-step of init seq */
856		ret = -EINVAL;  /* before any other write allowed */
857	else {
858		unsigned long flags;
859		const struct diag_observer *op = NULL;
860		int use_32 =  (count % 8) || (*off % 8);
861
862		/*
863		 * Check for observer on this address range.
864		 * We only support a single 32 or 64-bit write
865		 * via observer, currently. This helps, because
866		 * we would otherwise have to jump through hoops
867		 * to make "diag transaction" meaningful when we
868		 * cannot do a copy_from_user while holding the lock.
869		 */
870		if (count == 4 || count == 8) {
871			u64 data64;
872			u32 offset = *off;
873			ret = copy_from_user(&data64, data, count);
874			if (ret) {
875				ret = -EFAULT;
876				goto bail;
877			}
878			spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
879			op = diag_get_observer(dd, *off);
880			if (op)
881				ret = op->hook(dd, op, offset, &data64, ~0Ull,
882					       use_32);
883			spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags);
884		}
885
886		if (!op) {
887			if (use_32)
888				/*
889				 * Address or length is not 64-bit aligned;
890				 * do 32-bit write
891				 */
892				ret = qib_write_umem32(dd, (u32) *off, data,
893						       count);
894			else
895				ret = qib_write_umem64(dd, (u32) *off, data,
896						       count);
897		}
898	}
899
900	if (ret >= 0) {
901		*off += count;
902		ret = count;
903		if (dc->state == INIT)
904			dc->state = READY; /* all read/write OK now */
905	}
906bail:
907	return ret;
908}
909