1/*
2 * Copyright (c) 2010 Broadcom Corporation
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/slab.h>
20#include <linux/delay.h>
21#include <linux/pci.h>
22
23#include <brcmu_utils.h>
24#include <aiutils.h>
25#include "types.h"
26#include "dma.h"
27#include "soc.h"
28
29/*
30 * dma register field offset calculation
31 */
32#define DMA64REGOFFS(field)		offsetof(struct dma64regs, field)
33#define DMA64TXREGOFFS(di, field)	(di->d64txregbase + DMA64REGOFFS(field))
34#define DMA64RXREGOFFS(di, field)	(di->d64rxregbase + DMA64REGOFFS(field))
35
36/*
37 * DMA hardware requires each descriptor ring to be 8kB aligned, and fit within
38 * a contiguous 8kB physical address.
39 */
40#define D64RINGALIGN_BITS	13
41#define	D64MAXRINGSZ		(1 << D64RINGALIGN_BITS)
42#define	D64RINGALIGN		(1 << D64RINGALIGN_BITS)
43
44#define	D64MAXDD	(D64MAXRINGSZ / sizeof(struct dma64desc))
45
46/* transmit channel control */
47#define	D64_XC_XE		0x00000001	/* transmit enable */
48#define	D64_XC_SE		0x00000002	/* transmit suspend request */
49#define	D64_XC_LE		0x00000004	/* loopback enable */
50#define	D64_XC_FL		0x00000010	/* flush request */
51#define	D64_XC_PD		0x00000800	/* parity check disable */
52#define	D64_XC_AE		0x00030000	/* address extension bits */
53#define	D64_XC_AE_SHIFT		16
54
55/* transmit descriptor table pointer */
56#define	D64_XP_LD_MASK		0x00000fff	/* last valid descriptor */
57
58/* transmit channel status */
59#define	D64_XS0_CD_MASK		0x00001fff	/* current descriptor pointer */
60#define	D64_XS0_XS_MASK		0xf0000000	/* transmit state */
61#define	D64_XS0_XS_SHIFT		28
62#define	D64_XS0_XS_DISABLED	0x00000000	/* disabled */
63#define	D64_XS0_XS_ACTIVE	0x10000000	/* active */
64#define	D64_XS0_XS_IDLE		0x20000000	/* idle wait */
65#define	D64_XS0_XS_STOPPED	0x30000000	/* stopped */
66#define	D64_XS0_XS_SUSP		0x40000000	/* suspend pending */
67
68#define	D64_XS1_AD_MASK		0x00001fff	/* active descriptor */
69#define	D64_XS1_XE_MASK		0xf0000000	/* transmit errors */
70#define	D64_XS1_XE_SHIFT		28
71#define	D64_XS1_XE_NOERR	0x00000000	/* no error */
72#define	D64_XS1_XE_DPE		0x10000000	/* descriptor protocol error */
73#define	D64_XS1_XE_DFU		0x20000000	/* data fifo underrun */
74#define	D64_XS1_XE_DTE		0x30000000	/* data transfer error */
75#define	D64_XS1_XE_DESRE	0x40000000	/* descriptor read error */
76#define	D64_XS1_XE_COREE	0x50000000	/* core error */
77
78/* receive channel control */
79/* receive enable */
80#define	D64_RC_RE		0x00000001
81/* receive frame offset */
82#define	D64_RC_RO_MASK		0x000000fe
83#define	D64_RC_RO_SHIFT		1
84/* direct fifo receive (pio) mode */
85#define	D64_RC_FM		0x00000100
86/* separate rx header descriptor enable */
87#define	D64_RC_SH		0x00000200
88/* overflow continue */
89#define	D64_RC_OC		0x00000400
90/* parity check disable */
91#define	D64_RC_PD		0x00000800
92/* address extension bits */
93#define	D64_RC_AE		0x00030000
94#define	D64_RC_AE_SHIFT		16
95
96/* flags for dma controller */
97/* partity enable */
98#define DMA_CTRL_PEN		(1 << 0)
99/* rx overflow continue */
100#define DMA_CTRL_ROC		(1 << 1)
101/* allow rx scatter to multiple descriptors */
102#define DMA_CTRL_RXMULTI	(1 << 2)
103/* Unframed Rx/Tx data */
104#define DMA_CTRL_UNFRAMED	(1 << 3)
105
106/* receive descriptor table pointer */
107#define	D64_RP_LD_MASK		0x00000fff	/* last valid descriptor */
108
109/* receive channel status */
110#define	D64_RS0_CD_MASK		0x00001fff	/* current descriptor pointer */
111#define	D64_RS0_RS_MASK		0xf0000000	/* receive state */
112#define	D64_RS0_RS_SHIFT		28
113#define	D64_RS0_RS_DISABLED	0x00000000	/* disabled */
114#define	D64_RS0_RS_ACTIVE	0x10000000	/* active */
115#define	D64_RS0_RS_IDLE		0x20000000	/* idle wait */
116#define	D64_RS0_RS_STOPPED	0x30000000	/* stopped */
117#define	D64_RS0_RS_SUSP		0x40000000	/* suspend pending */
118
119#define	D64_RS1_AD_MASK		0x0001ffff	/* active descriptor */
120#define	D64_RS1_RE_MASK		0xf0000000	/* receive errors */
121#define	D64_RS1_RE_SHIFT		28
122#define	D64_RS1_RE_NOERR	0x00000000	/* no error */
123#define	D64_RS1_RE_DPO		0x10000000	/* descriptor protocol error */
124#define	D64_RS1_RE_DFU		0x20000000	/* data fifo overflow */
125#define	D64_RS1_RE_DTE		0x30000000	/* data transfer error */
126#define	D64_RS1_RE_DESRE	0x40000000	/* descriptor read error */
127#define	D64_RS1_RE_COREE	0x50000000	/* core error */
128
129/* fifoaddr */
130#define	D64_FA_OFF_MASK		0xffff	/* offset */
131#define	D64_FA_SEL_MASK		0xf0000	/* select */
132#define	D64_FA_SEL_SHIFT	16
133#define	D64_FA_SEL_XDD		0x00000	/* transmit dma data */
134#define	D64_FA_SEL_XDP		0x10000	/* transmit dma pointers */
135#define	D64_FA_SEL_RDD		0x40000	/* receive dma data */
136#define	D64_FA_SEL_RDP		0x50000	/* receive dma pointers */
137#define	D64_FA_SEL_XFD		0x80000	/* transmit fifo data */
138#define	D64_FA_SEL_XFP		0x90000	/* transmit fifo pointers */
139#define	D64_FA_SEL_RFD		0xc0000	/* receive fifo data */
140#define	D64_FA_SEL_RFP		0xd0000	/* receive fifo pointers */
141#define	D64_FA_SEL_RSD		0xe0000	/* receive frame status data */
142#define	D64_FA_SEL_RSP		0xf0000	/* receive frame status pointers */
143
144/* descriptor control flags 1 */
145#define D64_CTRL_COREFLAGS	0x0ff00000	/* core specific flags */
146#define	D64_CTRL1_EOT		((u32)1 << 28)	/* end of descriptor table */
147#define	D64_CTRL1_IOC		((u32)1 << 29)	/* interrupt on completion */
148#define	D64_CTRL1_EOF		((u32)1 << 30)	/* end of frame */
149#define	D64_CTRL1_SOF		((u32)1 << 31)	/* start of frame */
150
151/* descriptor control flags 2 */
152/* buffer byte count. real data len must <= 16KB */
153#define	D64_CTRL2_BC_MASK	0x00007fff
154/* address extension bits */
155#define	D64_CTRL2_AE		0x00030000
156#define	D64_CTRL2_AE_SHIFT	16
157/* parity bit */
158#define D64_CTRL2_PARITY	0x00040000
159
160/* control flags in the range [27:20] are core-specific and not defined here */
161#define	D64_CTRL_CORE_MASK	0x0ff00000
162
163#define D64_RX_FRM_STS_LEN	0x0000ffff	/* frame length mask */
164#define D64_RX_FRM_STS_OVFL	0x00800000	/* RxOverFlow */
165#define D64_RX_FRM_STS_DSCRCNT	0x0f000000  /* no. of descriptors used - 1 */
166#define D64_RX_FRM_STS_DATATYPE	0xf0000000	/* core-dependent data type */
167
168/*
169 * packet headroom necessary to accommodate the largest header
170 * in the system, (i.e TXOFF). By doing, we avoid the need to
171 * allocate an extra buffer for the header when bridging to WL.
172 * There is a compile time check in wlc.c which ensure that this
173 * value is at least as big as TXOFF. This value is used in
174 * dma_rxfill().
175 */
176
177#define BCMEXTRAHDROOM 172
178
179/* debug/trace */
180#ifdef DEBUG
181#define	DMA_ERROR(fmt, ...)					\
182do {								\
183	if (*di->msg_level & 1)					\
184		pr_debug("%s: " fmt, __func__, ##__VA_ARGS__);	\
185} while (0)
186#define	DMA_TRACE(fmt, ...)					\
187do {								\
188	if (*di->msg_level & 2)					\
189		pr_debug("%s: " fmt, __func__, ##__VA_ARGS__);	\
190} while (0)
191#else
192#define	DMA_ERROR(fmt, ...)			\
193	no_printk(fmt, ##__VA_ARGS__)
194#define	DMA_TRACE(fmt, ...)			\
195	no_printk(fmt, ##__VA_ARGS__)
196#endif				/* DEBUG */
197
198#define	DMA_NONE(fmt, ...)			\
199	no_printk(fmt, ##__VA_ARGS__)
200
201#define	MAXNAMEL	8	/* 8 char names */
202
203/* macros to convert between byte offsets and indexes */
204#define	B2I(bytes, type)	((bytes) / sizeof(type))
205#define	I2B(index, type)	((index) * sizeof(type))
206
207#define	PCI32ADDR_HIGH		0xc0000000	/* address[31:30] */
208#define	PCI32ADDR_HIGH_SHIFT	30	/* address[31:30] */
209
210#define	PCI64ADDR_HIGH		0x80000000	/* address[63] */
211#define	PCI64ADDR_HIGH_SHIFT	31	/* address[63] */
212
213/*
214 * DMA Descriptor
215 * Descriptors are only read by the hardware, never written back.
216 */
217struct dma64desc {
218	__le32 ctrl1;	/* misc control bits & bufcount */
219	__le32 ctrl2;	/* buffer count and address extension */
220	__le32 addrlow;	/* memory address of the date buffer, bits 31:0 */
221	__le32 addrhigh; /* memory address of the date buffer, bits 63:32 */
222};
223
224/* dma engine software state */
225struct dma_info {
226	struct dma_pub dma; /* exported structure */
227	uint *msg_level;	/* message level pointer */
228	char name[MAXNAMEL];	/* callers name for diag msgs */
229
230	struct bcma_device *core;
231	struct device *dmadev;
232
233	bool dma64;	/* this dma engine is operating in 64-bit mode */
234	bool addrext;	/* this dma engine supports DmaExtendedAddrChanges */
235
236	/* 64-bit dma tx engine registers */
237	uint d64txregbase;
238	/* 64-bit dma rx engine registers */
239	uint d64rxregbase;
240	/* pointer to dma64 tx descriptor ring */
241	struct dma64desc *txd64;
242	/* pointer to dma64 rx descriptor ring */
243	struct dma64desc *rxd64;
244
245	u16 dmadesc_align;	/* alignment requirement for dma descriptors */
246
247	u16 ntxd;		/* # tx descriptors tunable */
248	u16 txin;		/* index of next descriptor to reclaim */
249	u16 txout;		/* index of next descriptor to post */
250	/* pointer to parallel array of pointers to packets */
251	struct sk_buff **txp;
252	/* Aligned physical address of descriptor ring */
253	dma_addr_t txdpa;
254	/* Original physical address of descriptor ring */
255	dma_addr_t txdpaorig;
256	u16 txdalign;	/* #bytes added to alloc'd mem to align txd */
257	u32 txdalloc;	/* #bytes allocated for the ring */
258	u32 xmtptrbase;	/* When using unaligned descriptors, the ptr register
259			 * is not just an index, it needs all 13 bits to be
260			 * an offset from the addr register.
261			 */
262
263	u16 nrxd;	/* # rx descriptors tunable */
264	u16 rxin;	/* index of next descriptor to reclaim */
265	u16 rxout;	/* index of next descriptor to post */
266	/* pointer to parallel array of pointers to packets */
267	struct sk_buff **rxp;
268	/* Aligned physical address of descriptor ring */
269	dma_addr_t rxdpa;
270	/* Original physical address of descriptor ring */
271	dma_addr_t rxdpaorig;
272	u16 rxdalign;	/* #bytes added to alloc'd mem to align rxd */
273	u32 rxdalloc;	/* #bytes allocated for the ring */
274	u32 rcvptrbase;	/* Base for ptr reg when using unaligned descriptors */
275
276	/* tunables */
277	unsigned int rxbufsize;	/* rx buffer size in bytes, not including
278				 * the extra headroom
279				 */
280	uint rxextrahdrroom;	/* extra rx headroom, reverseved to assist upper
281				 * stack, e.g. some rx pkt buffers will be
282				 * bridged to tx side without byte copying.
283				 * The extra headroom needs to be large enough
284				 * to fit txheader needs. Some dongle driver may
285				 * not need it.
286				 */
287	uint nrxpost;		/* # rx buffers to keep posted */
288	unsigned int rxoffset;	/* rxcontrol offset */
289	/* add to get dma address of descriptor ring, low 32 bits */
290	uint ddoffsetlow;
291	/*   high 32 bits */
292	uint ddoffsethigh;
293	/* add to get dma address of data buffer, low 32 bits */
294	uint dataoffsetlow;
295	/*   high 32 bits */
296	uint dataoffsethigh;
297	/* descriptor base need to be aligned or not */
298	bool aligndesc_4k;
299};
300
301/*
302 * default dma message level (if input msg_level
303 * pointer is null in dma_attach())
304 */
305static uint dma_msg_level;
306
307/* Check for odd number of 1's */
308static u32 parity32(__le32 data)
309{
310	/* no swap needed for counting 1's */
311	u32 par_data = *(u32 *)&data;
312
313	par_data ^= par_data >> 16;
314	par_data ^= par_data >> 8;
315	par_data ^= par_data >> 4;
316	par_data ^= par_data >> 2;
317	par_data ^= par_data >> 1;
318
319	return par_data & 1;
320}
321
322static bool dma64_dd_parity(struct dma64desc *dd)
323{
324	return parity32(dd->addrlow ^ dd->addrhigh ^ dd->ctrl1 ^ dd->ctrl2);
325}
326
327/* descriptor bumping functions */
328
329static uint xxd(uint x, uint n)
330{
331	return x & (n - 1); /* faster than %, but n must be power of 2 */
332}
333
334static uint txd(struct dma_info *di, uint x)
335{
336	return xxd(x, di->ntxd);
337}
338
339static uint rxd(struct dma_info *di, uint x)
340{
341	return xxd(x, di->nrxd);
342}
343
344static uint nexttxd(struct dma_info *di, uint i)
345{
346	return txd(di, i + 1);
347}
348
349static uint prevtxd(struct dma_info *di, uint i)
350{
351	return txd(di, i - 1);
352}
353
354static uint nextrxd(struct dma_info *di, uint i)
355{
356	return txd(di, i + 1);
357}
358
359static uint ntxdactive(struct dma_info *di, uint h, uint t)
360{
361	return txd(di, t-h);
362}
363
364static uint nrxdactive(struct dma_info *di, uint h, uint t)
365{
366	return rxd(di, t-h);
367}
368
369static uint _dma_ctrlflags(struct dma_info *di, uint mask, uint flags)
370{
371	uint dmactrlflags;
372
373	if (di == NULL) {
374		DMA_ERROR("NULL dma handle\n");
375		return 0;
376	}
377
378	dmactrlflags = di->dma.dmactrlflags;
379	dmactrlflags &= ~mask;
380	dmactrlflags |= flags;
381
382	/* If trying to enable parity, check if parity is actually supported */
383	if (dmactrlflags & DMA_CTRL_PEN) {
384		u32 control;
385
386		control = bcma_read32(di->core, DMA64TXREGOFFS(di, control));
387		bcma_write32(di->core, DMA64TXREGOFFS(di, control),
388		      control | D64_XC_PD);
389		if (bcma_read32(di->core, DMA64TXREGOFFS(di, control)) &
390		    D64_XC_PD)
391			/* We *can* disable it so it is supported,
392			 * restore control register
393			 */
394			bcma_write32(di->core, DMA64TXREGOFFS(di, control),
395				     control);
396		else
397			/* Not supported, don't allow it to be enabled */
398			dmactrlflags &= ~DMA_CTRL_PEN;
399	}
400
401	di->dma.dmactrlflags = dmactrlflags;
402
403	return dmactrlflags;
404}
405
406static bool _dma64_addrext(struct dma_info *di, uint ctrl_offset)
407{
408	u32 w;
409	bcma_set32(di->core, ctrl_offset, D64_XC_AE);
410	w = bcma_read32(di->core, ctrl_offset);
411	bcma_mask32(di->core, ctrl_offset, ~D64_XC_AE);
412	return (w & D64_XC_AE) == D64_XC_AE;
413}
414
415/*
416 * return true if this dma engine supports DmaExtendedAddrChanges,
417 * otherwise false
418 */
419static bool _dma_isaddrext(struct dma_info *di)
420{
421	/* DMA64 supports full 32- or 64-bit operation. AE is always valid */
422
423	/* not all tx or rx channel are available */
424	if (di->d64txregbase != 0) {
425		if (!_dma64_addrext(di, DMA64TXREGOFFS(di, control)))
426			DMA_ERROR("%s: DMA64 tx doesn't have AE set\n",
427				  di->name);
428		return true;
429	} else if (di->d64rxregbase != 0) {
430		if (!_dma64_addrext(di, DMA64RXREGOFFS(di, control)))
431			DMA_ERROR("%s: DMA64 rx doesn't have AE set\n",
432				  di->name);
433		return true;
434	}
435
436	return false;
437}
438
439static bool _dma_descriptor_align(struct dma_info *di)
440{
441	u32 addrl;
442
443	/* Check to see if the descriptors need to be aligned on 4K/8K or not */
444	if (di->d64txregbase != 0) {
445		bcma_write32(di->core, DMA64TXREGOFFS(di, addrlow), 0xff0);
446		addrl = bcma_read32(di->core, DMA64TXREGOFFS(di, addrlow));
447		if (addrl != 0)
448			return false;
449	} else if (di->d64rxregbase != 0) {
450		bcma_write32(di->core, DMA64RXREGOFFS(di, addrlow), 0xff0);
451		addrl = bcma_read32(di->core, DMA64RXREGOFFS(di, addrlow));
452		if (addrl != 0)
453			return false;
454	}
455	return true;
456}
457
458/*
459 * Descriptor table must start at the DMA hardware dictated alignment, so
460 * allocated memory must be large enough to support this requirement.
461 */
462static void *dma_alloc_consistent(struct dma_info *di, uint size,
463				  u16 align_bits, uint *alloced,
464				  dma_addr_t *pap)
465{
466	if (align_bits) {
467		u16 align = (1 << align_bits);
468		if (!IS_ALIGNED(PAGE_SIZE, align))
469			size += align;
470		*alloced = size;
471	}
472	return dma_alloc_coherent(di->dmadev, size, pap, GFP_ATOMIC);
473}
474
475static
476u8 dma_align_sizetobits(uint size)
477{
478	u8 bitpos = 0;
479	while (size >>= 1)
480		bitpos++;
481	return bitpos;
482}
483
484/* This function ensures that the DMA descriptor ring will not get allocated
485 * across Page boundary. If the allocation is done across the page boundary
486 * at the first time, then it is freed and the allocation is done at
487 * descriptor ring size aligned location. This will ensure that the ring will
488 * not cross page boundary
489 */
490static void *dma_ringalloc(struct dma_info *di, u32 boundary, uint size,
491			   u16 *alignbits, uint *alloced,
492			   dma_addr_t *descpa)
493{
494	void *va;
495	u32 desc_strtaddr;
496	u32 alignbytes = 1 << *alignbits;
497
498	va = dma_alloc_consistent(di, size, *alignbits, alloced, descpa);
499
500	if (NULL == va)
501		return NULL;
502
503	desc_strtaddr = (u32) roundup((unsigned long)va, alignbytes);
504	if (((desc_strtaddr + size - 1) & boundary) != (desc_strtaddr
505							& boundary)) {
506		*alignbits = dma_align_sizetobits(size);
507		dma_free_coherent(di->dmadev, size, va, *descpa);
508		va = dma_alloc_consistent(di, size, *alignbits,
509			alloced, descpa);
510	}
511	return va;
512}
513
514static bool dma64_alloc(struct dma_info *di, uint direction)
515{
516	u16 size;
517	uint ddlen;
518	void *va;
519	uint alloced = 0;
520	u16 align;
521	u16 align_bits;
522
523	ddlen = sizeof(struct dma64desc);
524
525	size = (direction == DMA_TX) ? (di->ntxd * ddlen) : (di->nrxd * ddlen);
526	align_bits = di->dmadesc_align;
527	align = (1 << align_bits);
528
529	if (direction == DMA_TX) {
530		va = dma_ringalloc(di, D64RINGALIGN, size, &align_bits,
531			&alloced, &di->txdpaorig);
532		if (va == NULL) {
533			DMA_ERROR("%s: DMA_ALLOC_CONSISTENT(ntxd) failed\n",
534				  di->name);
535			return false;
536		}
537		align = (1 << align_bits);
538		di->txd64 = (struct dma64desc *)
539					roundup((unsigned long)va, align);
540		di->txdalign = (uint) ((s8 *)di->txd64 - (s8 *) va);
541		di->txdpa = di->txdpaorig + di->txdalign;
542		di->txdalloc = alloced;
543	} else {
544		va = dma_ringalloc(di, D64RINGALIGN, size, &align_bits,
545			&alloced, &di->rxdpaorig);
546		if (va == NULL) {
547			DMA_ERROR("%s: DMA_ALLOC_CONSISTENT(nrxd) failed\n",
548				  di->name);
549			return false;
550		}
551		align = (1 << align_bits);
552		di->rxd64 = (struct dma64desc *)
553					roundup((unsigned long)va, align);
554		di->rxdalign = (uint) ((s8 *)di->rxd64 - (s8 *) va);
555		di->rxdpa = di->rxdpaorig + di->rxdalign;
556		di->rxdalloc = alloced;
557	}
558
559	return true;
560}
561
562static bool _dma_alloc(struct dma_info *di, uint direction)
563{
564	return dma64_alloc(di, direction);
565}
566
567struct dma_pub *dma_attach(char *name, struct si_pub *sih,
568			   struct bcma_device *core,
569			   uint txregbase, uint rxregbase, uint ntxd, uint nrxd,
570			   uint rxbufsize, int rxextheadroom,
571			   uint nrxpost, uint rxoffset, uint *msg_level)
572{
573	struct dma_info *di;
574	u8 rev = core->id.rev;
575	uint size;
576
577	/* allocate private info structure */
578	di = kzalloc(sizeof(struct dma_info), GFP_ATOMIC);
579	if (di == NULL)
580		return NULL;
581
582	di->msg_level = msg_level ? msg_level : &dma_msg_level;
583
584
585	di->dma64 =
586		((bcma_aread32(core, BCMA_IOST) & SISF_DMA64) == SISF_DMA64);
587
588	/* init dma reg info */
589	di->core = core;
590	di->d64txregbase = txregbase;
591	di->d64rxregbase = rxregbase;
592
593	/*
594	 * Default flags (which can be changed by the driver calling
595	 * dma_ctrlflags before enable): For backwards compatibility
596	 * both Rx Overflow Continue and Parity are DISABLED.
597	 */
598	_dma_ctrlflags(di, DMA_CTRL_ROC | DMA_CTRL_PEN, 0);
599
600	DMA_TRACE("%s: %s flags 0x%x ntxd %d nrxd %d "
601		  "rxbufsize %d rxextheadroom %d nrxpost %d rxoffset %d "
602		  "txregbase %u rxregbase %u\n", name, "DMA64",
603		  di->dma.dmactrlflags, ntxd, nrxd, rxbufsize,
604		  rxextheadroom, nrxpost, rxoffset, txregbase, rxregbase);
605
606	/* make a private copy of our callers name */
607	strncpy(di->name, name, MAXNAMEL);
608	di->name[MAXNAMEL - 1] = '\0';
609
610	di->dmadev = core->dma_dev;
611
612	/* save tunables */
613	di->ntxd = (u16) ntxd;
614	di->nrxd = (u16) nrxd;
615
616	/* the actual dma size doesn't include the extra headroom */
617	di->rxextrahdrroom =
618	    (rxextheadroom == -1) ? BCMEXTRAHDROOM : rxextheadroom;
619	if (rxbufsize > BCMEXTRAHDROOM)
620		di->rxbufsize = (u16) (rxbufsize - di->rxextrahdrroom);
621	else
622		di->rxbufsize = (u16) rxbufsize;
623
624	di->nrxpost = (u16) nrxpost;
625	di->rxoffset = (u8) rxoffset;
626
627	/*
628	 * figure out the DMA physical address offset for dd and data
629	 *     PCI/PCIE: they map silicon backplace address to zero
630	 *     based memory, need offset
631	 *     Other bus: use zero SI_BUS BIGENDIAN kludge: use sdram
632	 *     swapped region for data buffer, not descriptor
633	 */
634	di->ddoffsetlow = 0;
635	di->dataoffsetlow = 0;
636	/* add offset for pcie with DMA64 bus */
637	di->ddoffsetlow = 0;
638	di->ddoffsethigh = SI_PCIE_DMA_H32;
639	di->dataoffsetlow = di->ddoffsetlow;
640	di->dataoffsethigh = di->ddoffsethigh;
641	/* WAR64450 : DMACtl.Addr ext fields are not supported in SDIOD core. */
642	if ((core->id.id == SDIOD_CORE_ID)
643	    && ((rev > 0) && (rev <= 2)))
644		di->addrext = false;
645	else if ((core->id.id == I2S_CORE_ID) &&
646		 ((rev == 0) || (rev == 1)))
647		di->addrext = false;
648	else
649		di->addrext = _dma_isaddrext(di);
650
651	/* does the descriptor need to be aligned and if yes, on 4K/8K or not */
652	di->aligndesc_4k = _dma_descriptor_align(di);
653	if (di->aligndesc_4k) {
654		di->dmadesc_align = D64RINGALIGN_BITS;
655		if ((ntxd < D64MAXDD / 2) && (nrxd < D64MAXDD / 2))
656			/* for smaller dd table, HW relax alignment reqmnt */
657			di->dmadesc_align = D64RINGALIGN_BITS - 1;
658	} else {
659		di->dmadesc_align = 4;	/* 16 byte alignment */
660	}
661
662	DMA_NONE("DMA descriptor align_needed %d, align %d\n",
663		 di->aligndesc_4k, di->dmadesc_align);
664
665	/* allocate tx packet pointer vector */
666	if (ntxd) {
667		size = ntxd * sizeof(void *);
668		di->txp = kzalloc(size, GFP_ATOMIC);
669		if (di->txp == NULL)
670			goto fail;
671	}
672
673	/* allocate rx packet pointer vector */
674	if (nrxd) {
675		size = nrxd * sizeof(void *);
676		di->rxp = kzalloc(size, GFP_ATOMIC);
677		if (di->rxp == NULL)
678			goto fail;
679	}
680
681	/*
682	 * allocate transmit descriptor ring, only need ntxd descriptors
683	 * but it must be aligned
684	 */
685	if (ntxd) {
686		if (!_dma_alloc(di, DMA_TX))
687			goto fail;
688	}
689
690	/*
691	 * allocate receive descriptor ring, only need nrxd descriptors
692	 * but it must be aligned
693	 */
694	if (nrxd) {
695		if (!_dma_alloc(di, DMA_RX))
696			goto fail;
697	}
698
699	if ((di->ddoffsetlow != 0) && !di->addrext) {
700		if (di->txdpa > SI_PCI_DMA_SZ) {
701			DMA_ERROR("%s: txdpa 0x%x: addrext not supported\n",
702				  di->name, (u32)di->txdpa);
703			goto fail;
704		}
705		if (di->rxdpa > SI_PCI_DMA_SZ) {
706			DMA_ERROR("%s: rxdpa 0x%x: addrext not supported\n",
707				  di->name, (u32)di->rxdpa);
708			goto fail;
709		}
710	}
711
712	DMA_TRACE("ddoffsetlow 0x%x ddoffsethigh 0x%x dataoffsetlow 0x%x dataoffsethigh 0x%x addrext %d\n",
713		  di->ddoffsetlow, di->ddoffsethigh,
714		  di->dataoffsetlow, di->dataoffsethigh,
715		  di->addrext);
716
717	return (struct dma_pub *) di;
718
719 fail:
720	dma_detach((struct dma_pub *)di);
721	return NULL;
722}
723
724static inline void
725dma64_dd_upd(struct dma_info *di, struct dma64desc *ddring,
726	     dma_addr_t pa, uint outidx, u32 *flags, u32 bufcount)
727{
728	u32 ctrl2 = bufcount & D64_CTRL2_BC_MASK;
729
730	/* PCI bus with big(>1G) physical address, use address extension */
731	if ((di->dataoffsetlow == 0) || !(pa & PCI32ADDR_HIGH)) {
732		ddring[outidx].addrlow = cpu_to_le32(pa + di->dataoffsetlow);
733		ddring[outidx].addrhigh = cpu_to_le32(di->dataoffsethigh);
734		ddring[outidx].ctrl1 = cpu_to_le32(*flags);
735		ddring[outidx].ctrl2 = cpu_to_le32(ctrl2);
736	} else {
737		/* address extension for 32-bit PCI */
738		u32 ae;
739
740		ae = (pa & PCI32ADDR_HIGH) >> PCI32ADDR_HIGH_SHIFT;
741		pa &= ~PCI32ADDR_HIGH;
742
743		ctrl2 |= (ae << D64_CTRL2_AE_SHIFT) & D64_CTRL2_AE;
744		ddring[outidx].addrlow = cpu_to_le32(pa + di->dataoffsetlow);
745		ddring[outidx].addrhigh = cpu_to_le32(di->dataoffsethigh);
746		ddring[outidx].ctrl1 = cpu_to_le32(*flags);
747		ddring[outidx].ctrl2 = cpu_to_le32(ctrl2);
748	}
749	if (di->dma.dmactrlflags & DMA_CTRL_PEN) {
750		if (dma64_dd_parity(&ddring[outidx]))
751			ddring[outidx].ctrl2 =
752			     cpu_to_le32(ctrl2 | D64_CTRL2_PARITY);
753	}
754}
755
756/* !! may be called with core in reset */
757void dma_detach(struct dma_pub *pub)
758{
759	struct dma_info *di = (struct dma_info *)pub;
760
761	DMA_TRACE("%s:\n", di->name);
762
763	/* free dma descriptor rings */
764	if (di->txd64)
765		dma_free_coherent(di->dmadev, di->txdalloc,
766				  ((s8 *)di->txd64 - di->txdalign),
767				  (di->txdpaorig));
768	if (di->rxd64)
769		dma_free_coherent(di->dmadev, di->rxdalloc,
770				  ((s8 *)di->rxd64 - di->rxdalign),
771				  (di->rxdpaorig));
772
773	/* free packet pointer vectors */
774	kfree(di->txp);
775	kfree(di->rxp);
776
777	/* free our private info structure */
778	kfree(di);
779
780}
781
782/* initialize descriptor table base address */
783static void
784_dma_ddtable_init(struct dma_info *di, uint direction, dma_addr_t pa)
785{
786	if (!di->aligndesc_4k) {
787		if (direction == DMA_TX)
788			di->xmtptrbase = pa;
789		else
790			di->rcvptrbase = pa;
791	}
792
793	if ((di->ddoffsetlow == 0)
794	    || !(pa & PCI32ADDR_HIGH)) {
795		if (direction == DMA_TX) {
796			bcma_write32(di->core, DMA64TXREGOFFS(di, addrlow),
797				     pa + di->ddoffsetlow);
798			bcma_write32(di->core, DMA64TXREGOFFS(di, addrhigh),
799				     di->ddoffsethigh);
800		} else {
801			bcma_write32(di->core, DMA64RXREGOFFS(di, addrlow),
802				     pa + di->ddoffsetlow);
803			bcma_write32(di->core, DMA64RXREGOFFS(di, addrhigh),
804				     di->ddoffsethigh);
805		}
806	} else {
807		/* DMA64 32bits address extension */
808		u32 ae;
809
810		/* shift the high bit(s) from pa to ae */
811		ae = (pa & PCI32ADDR_HIGH) >> PCI32ADDR_HIGH_SHIFT;
812		pa &= ~PCI32ADDR_HIGH;
813
814		if (direction == DMA_TX) {
815			bcma_write32(di->core, DMA64TXREGOFFS(di, addrlow),
816				     pa + di->ddoffsetlow);
817			bcma_write32(di->core, DMA64TXREGOFFS(di, addrhigh),
818				     di->ddoffsethigh);
819			bcma_maskset32(di->core, DMA64TXREGOFFS(di, control),
820				       D64_XC_AE, (ae << D64_XC_AE_SHIFT));
821		} else {
822			bcma_write32(di->core, DMA64RXREGOFFS(di, addrlow),
823				     pa + di->ddoffsetlow);
824			bcma_write32(di->core, DMA64RXREGOFFS(di, addrhigh),
825				     di->ddoffsethigh);
826			bcma_maskset32(di->core, DMA64RXREGOFFS(di, control),
827				       D64_RC_AE, (ae << D64_RC_AE_SHIFT));
828		}
829	}
830}
831
832static void _dma_rxenable(struct dma_info *di)
833{
834	uint dmactrlflags = di->dma.dmactrlflags;
835	u32 control;
836
837	DMA_TRACE("%s:\n", di->name);
838
839	control = D64_RC_RE | (bcma_read32(di->core,
840					   DMA64RXREGOFFS(di, control)) &
841			       D64_RC_AE);
842
843	if ((dmactrlflags & DMA_CTRL_PEN) == 0)
844		control |= D64_RC_PD;
845
846	if (dmactrlflags & DMA_CTRL_ROC)
847		control |= D64_RC_OC;
848
849	bcma_write32(di->core, DMA64RXREGOFFS(di, control),
850		((di->rxoffset << D64_RC_RO_SHIFT) | control));
851}
852
853void dma_rxinit(struct dma_pub *pub)
854{
855	struct dma_info *di = (struct dma_info *)pub;
856
857	DMA_TRACE("%s:\n", di->name);
858
859	if (di->nrxd == 0)
860		return;
861
862	di->rxin = di->rxout = 0;
863
864	/* clear rx descriptor ring */
865	memset(di->rxd64, '\0', di->nrxd * sizeof(struct dma64desc));
866
867	/* DMA engine with out alignment requirement requires table to be inited
868	 * before enabling the engine
869	 */
870	if (!di->aligndesc_4k)
871		_dma_ddtable_init(di, DMA_RX, di->rxdpa);
872
873	_dma_rxenable(di);
874
875	if (di->aligndesc_4k)
876		_dma_ddtable_init(di, DMA_RX, di->rxdpa);
877}
878
879static struct sk_buff *dma64_getnextrxp(struct dma_info *di, bool forceall)
880{
881	uint i, curr;
882	struct sk_buff *rxp;
883	dma_addr_t pa;
884
885	i = di->rxin;
886
887	/* return if no packets posted */
888	if (i == di->rxout)
889		return NULL;
890
891	curr =
892	    B2I(((bcma_read32(di->core,
893			      DMA64RXREGOFFS(di, status0)) & D64_RS0_CD_MASK) -
894		 di->rcvptrbase) & D64_RS0_CD_MASK, struct dma64desc);
895
896	/* ignore curr if forceall */
897	if (!forceall && (i == curr))
898		return NULL;
899
900	/* get the packet pointer that corresponds to the rx descriptor */
901	rxp = di->rxp[i];
902	di->rxp[i] = NULL;
903
904	pa = le32_to_cpu(di->rxd64[i].addrlow) - di->dataoffsetlow;
905
906	/* clear this packet from the descriptor ring */
907	dma_unmap_single(di->dmadev, pa, di->rxbufsize, DMA_FROM_DEVICE);
908
909	di->rxd64[i].addrlow = cpu_to_le32(0xdeadbeef);
910	di->rxd64[i].addrhigh = cpu_to_le32(0xdeadbeef);
911
912	di->rxin = nextrxd(di, i);
913
914	return rxp;
915}
916
917static struct sk_buff *_dma_getnextrxp(struct dma_info *di, bool forceall)
918{
919	if (di->nrxd == 0)
920		return NULL;
921
922	return dma64_getnextrxp(di, forceall);
923}
924
925/*
926 * !! rx entry routine
927 * returns the number packages in the next frame, or 0 if there are no more
928 *   if DMA_CTRL_RXMULTI is defined, DMA scattering(multiple buffers) is
929 *   supported with pkts chain
930 *   otherwise, it's treated as giant pkt and will be tossed.
931 *   The DMA scattering starts with normal DMA header, followed by first
932 *   buffer data. After it reaches the max size of buffer, the data continues
933 *   in next DMA descriptor buffer WITHOUT DMA header
934 */
935int dma_rx(struct dma_pub *pub, struct sk_buff_head *skb_list)
936{
937	struct dma_info *di = (struct dma_info *)pub;
938	struct sk_buff_head dma_frames;
939	struct sk_buff *p, *next;
940	uint len;
941	uint pkt_len;
942	int resid = 0;
943	int pktcnt = 1;
944
945	skb_queue_head_init(&dma_frames);
946 next_frame:
947	p = _dma_getnextrxp(di, false);
948	if (p == NULL)
949		return 0;
950
951	len = le16_to_cpu(*(__le16 *) (p->data));
952	DMA_TRACE("%s: dma_rx len %d\n", di->name, len);
953	dma_spin_for_len(len, p);
954
955	/* set actual length */
956	pkt_len = min((di->rxoffset + len), di->rxbufsize);
957	__skb_trim(p, pkt_len);
958	skb_queue_tail(&dma_frames, p);
959	resid = len - (di->rxbufsize - di->rxoffset);
960
961	/* check for single or multi-buffer rx */
962	if (resid > 0) {
963		while ((resid > 0) && (p = _dma_getnextrxp(di, false))) {
964			pkt_len = min_t(uint, resid, di->rxbufsize);
965			__skb_trim(p, pkt_len);
966			skb_queue_tail(&dma_frames, p);
967			resid -= di->rxbufsize;
968			pktcnt++;
969		}
970
971#ifdef DEBUG
972		if (resid > 0) {
973			uint cur;
974			cur =
975			    B2I(((bcma_read32(di->core,
976					      DMA64RXREGOFFS(di, status0)) &
977				  D64_RS0_CD_MASK) - di->rcvptrbase) &
978				D64_RS0_CD_MASK, struct dma64desc);
979			DMA_ERROR("rxin %d rxout %d, hw_curr %d\n",
980				   di->rxin, di->rxout, cur);
981		}
982#endif				/* DEBUG */
983
984		if ((di->dma.dmactrlflags & DMA_CTRL_RXMULTI) == 0) {
985			DMA_ERROR("%s: bad frame length (%d)\n",
986				  di->name, len);
987			skb_queue_walk_safe(&dma_frames, p, next) {
988				skb_unlink(p, &dma_frames);
989				brcmu_pkt_buf_free_skb(p);
990			}
991			di->dma.rxgiants++;
992			pktcnt = 1;
993			goto next_frame;
994		}
995	}
996
997	skb_queue_splice_tail(&dma_frames, skb_list);
998	return pktcnt;
999}
1000
1001static bool dma64_rxidle(struct dma_info *di)
1002{
1003	DMA_TRACE("%s:\n", di->name);
1004
1005	if (di->nrxd == 0)
1006		return true;
1007
1008	return ((bcma_read32(di->core,
1009			     DMA64RXREGOFFS(di, status0)) & D64_RS0_CD_MASK) ==
1010		(bcma_read32(di->core, DMA64RXREGOFFS(di, ptr)) &
1011		 D64_RS0_CD_MASK));
1012}
1013
1014/*
1015 * post receive buffers
1016 *  return false is refill failed completely and ring is empty this will stall
1017 *  the rx dma and user might want to call rxfill again asap. This unlikely
1018 *  happens on memory-rich NIC, but often on memory-constrained dongle
1019 */
1020bool dma_rxfill(struct dma_pub *pub)
1021{
1022	struct dma_info *di = (struct dma_info *)pub;
1023	struct sk_buff *p;
1024	u16 rxin, rxout;
1025	u32 flags = 0;
1026	uint n;
1027	uint i;
1028	dma_addr_t pa;
1029	uint extra_offset = 0;
1030	bool ring_empty;
1031
1032	ring_empty = false;
1033
1034	/*
1035	 * Determine how many receive buffers we're lacking
1036	 * from the full complement, allocate, initialize,
1037	 * and post them, then update the chip rx lastdscr.
1038	 */
1039
1040	rxin = di->rxin;
1041	rxout = di->rxout;
1042
1043	n = di->nrxpost - nrxdactive(di, rxin, rxout);
1044
1045	DMA_TRACE("%s: post %d\n", di->name, n);
1046
1047	if (di->rxbufsize > BCMEXTRAHDROOM)
1048		extra_offset = di->rxextrahdrroom;
1049
1050	for (i = 0; i < n; i++) {
1051		/*
1052		 * the di->rxbufsize doesn't include the extra headroom,
1053		 * we need to add it to the size to be allocated
1054		 */
1055		p = brcmu_pkt_buf_get_skb(di->rxbufsize + extra_offset);
1056
1057		if (p == NULL) {
1058			DMA_ERROR("%s: out of rxbufs\n", di->name);
1059			if (i == 0 && dma64_rxidle(di)) {
1060				DMA_ERROR("%s: ring is empty !\n", di->name);
1061				ring_empty = true;
1062			}
1063			di->dma.rxnobuf++;
1064			break;
1065		}
1066		/* reserve an extra headroom, if applicable */
1067		if (extra_offset)
1068			skb_pull(p, extra_offset);
1069
1070		/* Do a cached write instead of uncached write since DMA_MAP
1071		 * will flush the cache.
1072		 */
1073		*(u32 *) (p->data) = 0;
1074
1075		pa = dma_map_single(di->dmadev, p->data, di->rxbufsize,
1076				    DMA_FROM_DEVICE);
1077
1078		/* save the free packet pointer */
1079		di->rxp[rxout] = p;
1080
1081		/* reset flags for each descriptor */
1082		flags = 0;
1083		if (rxout == (di->nrxd - 1))
1084			flags = D64_CTRL1_EOT;
1085
1086		dma64_dd_upd(di, di->rxd64, pa, rxout, &flags,
1087			     di->rxbufsize);
1088		rxout = nextrxd(di, rxout);
1089	}
1090
1091	di->rxout = rxout;
1092
1093	/* update the chip lastdscr pointer */
1094	bcma_write32(di->core, DMA64RXREGOFFS(di, ptr),
1095	      di->rcvptrbase + I2B(rxout, struct dma64desc));
1096
1097	return ring_empty;
1098}
1099
1100void dma_rxreclaim(struct dma_pub *pub)
1101{
1102	struct dma_info *di = (struct dma_info *)pub;
1103	struct sk_buff *p;
1104
1105	DMA_TRACE("%s:\n", di->name);
1106
1107	while ((p = _dma_getnextrxp(di, true)))
1108		brcmu_pkt_buf_free_skb(p);
1109}
1110
1111void dma_counterreset(struct dma_pub *pub)
1112{
1113	/* reset all software counters */
1114	pub->rxgiants = 0;
1115	pub->rxnobuf = 0;
1116	pub->txnobuf = 0;
1117}
1118
1119/* get the address of the var in order to change later */
1120unsigned long dma_getvar(struct dma_pub *pub, const char *name)
1121{
1122	struct dma_info *di = (struct dma_info *)pub;
1123
1124	if (!strcmp(name, "&txavail"))
1125		return (unsigned long)&(di->dma.txavail);
1126	return 0;
1127}
1128
1129/* 64-bit DMA functions */
1130
1131void dma_txinit(struct dma_pub *pub)
1132{
1133	struct dma_info *di = (struct dma_info *)pub;
1134	u32 control = D64_XC_XE;
1135
1136	DMA_TRACE("%s:\n", di->name);
1137
1138	if (di->ntxd == 0)
1139		return;
1140
1141	di->txin = di->txout = 0;
1142	di->dma.txavail = di->ntxd - 1;
1143
1144	/* clear tx descriptor ring */
1145	memset(di->txd64, '\0', (di->ntxd * sizeof(struct dma64desc)));
1146
1147	/* DMA engine with out alignment requirement requires table to be inited
1148	 * before enabling the engine
1149	 */
1150	if (!di->aligndesc_4k)
1151		_dma_ddtable_init(di, DMA_TX, di->txdpa);
1152
1153	if ((di->dma.dmactrlflags & DMA_CTRL_PEN) == 0)
1154		control |= D64_XC_PD;
1155	bcma_set32(di->core, DMA64TXREGOFFS(di, control), control);
1156
1157	/* DMA engine with alignment requirement requires table to be inited
1158	 * before enabling the engine
1159	 */
1160	if (di->aligndesc_4k)
1161		_dma_ddtable_init(di, DMA_TX, di->txdpa);
1162}
1163
1164void dma_txsuspend(struct dma_pub *pub)
1165{
1166	struct dma_info *di = (struct dma_info *)pub;
1167
1168	DMA_TRACE("%s:\n", di->name);
1169
1170	if (di->ntxd == 0)
1171		return;
1172
1173	bcma_set32(di->core, DMA64TXREGOFFS(di, control), D64_XC_SE);
1174}
1175
1176void dma_txresume(struct dma_pub *pub)
1177{
1178	struct dma_info *di = (struct dma_info *)pub;
1179
1180	DMA_TRACE("%s:\n", di->name);
1181
1182	if (di->ntxd == 0)
1183		return;
1184
1185	bcma_mask32(di->core, DMA64TXREGOFFS(di, control), ~D64_XC_SE);
1186}
1187
1188bool dma_txsuspended(struct dma_pub *pub)
1189{
1190	struct dma_info *di = (struct dma_info *)pub;
1191
1192	return (di->ntxd == 0) ||
1193	       ((bcma_read32(di->core,
1194			     DMA64TXREGOFFS(di, control)) & D64_XC_SE) ==
1195		D64_XC_SE);
1196}
1197
1198void dma_txreclaim(struct dma_pub *pub, enum txd_range range)
1199{
1200	struct dma_info *di = (struct dma_info *)pub;
1201	struct sk_buff *p;
1202
1203	DMA_TRACE("%s: %s\n",
1204		  di->name,
1205		  range == DMA_RANGE_ALL ? "all" :
1206		  range == DMA_RANGE_TRANSMITTED ? "transmitted" :
1207		  "transferred");
1208
1209	if (di->txin == di->txout)
1210		return;
1211
1212	while ((p = dma_getnexttxp(pub, range))) {
1213		/* For unframed data, we don't have any packets to free */
1214		if (!(di->dma.dmactrlflags & DMA_CTRL_UNFRAMED))
1215			brcmu_pkt_buf_free_skb(p);
1216	}
1217}
1218
1219bool dma_txreset(struct dma_pub *pub)
1220{
1221	struct dma_info *di = (struct dma_info *)pub;
1222	u32 status;
1223
1224	if (di->ntxd == 0)
1225		return true;
1226
1227	/* suspend tx DMA first */
1228	bcma_write32(di->core, DMA64TXREGOFFS(di, control), D64_XC_SE);
1229	SPINWAIT(((status =
1230		   (bcma_read32(di->core, DMA64TXREGOFFS(di, status0)) &
1231		    D64_XS0_XS_MASK)) != D64_XS0_XS_DISABLED) &&
1232		  (status != D64_XS0_XS_IDLE) && (status != D64_XS0_XS_STOPPED),
1233		 10000);
1234
1235	bcma_write32(di->core, DMA64TXREGOFFS(di, control), 0);
1236	SPINWAIT(((status =
1237		   (bcma_read32(di->core, DMA64TXREGOFFS(di, status0)) &
1238		    D64_XS0_XS_MASK)) != D64_XS0_XS_DISABLED), 10000);
1239
1240	/* wait for the last transaction to complete */
1241	udelay(300);
1242
1243	return status == D64_XS0_XS_DISABLED;
1244}
1245
1246bool dma_rxreset(struct dma_pub *pub)
1247{
1248	struct dma_info *di = (struct dma_info *)pub;
1249	u32 status;
1250
1251	if (di->nrxd == 0)
1252		return true;
1253
1254	bcma_write32(di->core, DMA64RXREGOFFS(di, control), 0);
1255	SPINWAIT(((status =
1256		   (bcma_read32(di->core, DMA64RXREGOFFS(di, status0)) &
1257		    D64_RS0_RS_MASK)) != D64_RS0_RS_DISABLED), 10000);
1258
1259	return status == D64_RS0_RS_DISABLED;
1260}
1261
1262/*
1263 * !! tx entry routine
1264 * WARNING: call must check the return value for error.
1265 *   the error(toss frames) could be fatal and cause many subsequent hard
1266 *   to debug problems
1267 */
1268int dma_txfast(struct dma_pub *pub, struct sk_buff *p, bool commit)
1269{
1270	struct dma_info *di = (struct dma_info *)pub;
1271	unsigned char *data;
1272	uint len;
1273	u16 txout;
1274	u32 flags = 0;
1275	dma_addr_t pa;
1276
1277	DMA_TRACE("%s:\n", di->name);
1278
1279	txout = di->txout;
1280
1281	/*
1282	 * obtain and initialize transmit descriptor entry.
1283	 */
1284	data = p->data;
1285	len = p->len;
1286
1287	/* no use to transmit a zero length packet */
1288	if (len == 0)
1289		return 0;
1290
1291	/* return nonzero if out of tx descriptors */
1292	if (nexttxd(di, txout) == di->txin)
1293		goto outoftxd;
1294
1295	/* get physical address of buffer start */
1296	pa = dma_map_single(di->dmadev, data, len, DMA_TO_DEVICE);
1297
1298	/* With a DMA segment list, Descriptor table is filled
1299	 * using the segment list instead of looping over
1300	 * buffers in multi-chain DMA. Therefore, EOF for SGLIST
1301	 * is when end of segment list is reached.
1302	 */
1303	flags = D64_CTRL1_SOF | D64_CTRL1_IOC | D64_CTRL1_EOF;
1304	if (txout == (di->ntxd - 1))
1305		flags |= D64_CTRL1_EOT;
1306
1307	dma64_dd_upd(di, di->txd64, pa, txout, &flags, len);
1308
1309	txout = nexttxd(di, txout);
1310
1311	/* save the packet */
1312	di->txp[prevtxd(di, txout)] = p;
1313
1314	/* bump the tx descriptor index */
1315	di->txout = txout;
1316
1317	/* kick the chip */
1318	if (commit)
1319		bcma_write32(di->core, DMA64TXREGOFFS(di, ptr),
1320		      di->xmtptrbase + I2B(txout, struct dma64desc));
1321
1322	/* tx flow control */
1323	di->dma.txavail = di->ntxd - ntxdactive(di, di->txin, di->txout) - 1;
1324
1325	return 0;
1326
1327 outoftxd:
1328	DMA_ERROR("%s: out of txds !!!\n", di->name);
1329	brcmu_pkt_buf_free_skb(p);
1330	di->dma.txavail = 0;
1331	di->dma.txnobuf++;
1332	return -1;
1333}
1334
1335/*
1336 * Reclaim next completed txd (txds if using chained buffers) in the range
1337 * specified and return associated packet.
1338 * If range is DMA_RANGE_TRANSMITTED, reclaim descriptors that have be
1339 * transmitted as noted by the hardware "CurrDescr" pointer.
1340 * If range is DMA_RANGE_TRANSFERED, reclaim descriptors that have be
1341 * transferred by the DMA as noted by the hardware "ActiveDescr" pointer.
1342 * If range is DMA_RANGE_ALL, reclaim all txd(s) posted to the ring and
1343 * return associated packet regardless of the value of hardware pointers.
1344 */
1345struct sk_buff *dma_getnexttxp(struct dma_pub *pub, enum txd_range range)
1346{
1347	struct dma_info *di = (struct dma_info *)pub;
1348	u16 start, end, i;
1349	u16 active_desc;
1350	struct sk_buff *txp;
1351
1352	DMA_TRACE("%s: %s\n",
1353		  di->name,
1354		  range == DMA_RANGE_ALL ? "all" :
1355		  range == DMA_RANGE_TRANSMITTED ? "transmitted" :
1356		  "transferred");
1357
1358	if (di->ntxd == 0)
1359		return NULL;
1360
1361	txp = NULL;
1362
1363	start = di->txin;
1364	if (range == DMA_RANGE_ALL)
1365		end = di->txout;
1366	else {
1367		end = (u16) (B2I(((bcma_read32(di->core,
1368					       DMA64TXREGOFFS(di, status0)) &
1369				   D64_XS0_CD_MASK) - di->xmtptrbase) &
1370				 D64_XS0_CD_MASK, struct dma64desc));
1371
1372		if (range == DMA_RANGE_TRANSFERED) {
1373			active_desc =
1374				(u16)(bcma_read32(di->core,
1375						  DMA64TXREGOFFS(di, status1)) &
1376				      D64_XS1_AD_MASK);
1377			active_desc =
1378			    (active_desc - di->xmtptrbase) & D64_XS0_CD_MASK;
1379			active_desc = B2I(active_desc, struct dma64desc);
1380			if (end != active_desc)
1381				end = prevtxd(di, active_desc);
1382		}
1383	}
1384
1385	if ((start == 0) && (end > di->txout))
1386		goto bogus;
1387
1388	for (i = start; i != end && !txp; i = nexttxd(di, i)) {
1389		dma_addr_t pa;
1390		uint size;
1391
1392		pa = le32_to_cpu(di->txd64[i].addrlow) - di->dataoffsetlow;
1393
1394		size =
1395		    (le32_to_cpu(di->txd64[i].ctrl2) &
1396		     D64_CTRL2_BC_MASK);
1397
1398		di->txd64[i].addrlow = cpu_to_le32(0xdeadbeef);
1399		di->txd64[i].addrhigh = cpu_to_le32(0xdeadbeef);
1400
1401		txp = di->txp[i];
1402		di->txp[i] = NULL;
1403
1404		dma_unmap_single(di->dmadev, pa, size, DMA_TO_DEVICE);
1405	}
1406
1407	di->txin = i;
1408
1409	/* tx flow control */
1410	di->dma.txavail = di->ntxd - ntxdactive(di, di->txin, di->txout) - 1;
1411
1412	return txp;
1413
1414 bogus:
1415	DMA_NONE("bogus curr: start %d end %d txout %d\n",
1416		 start, end, di->txout);
1417	return NULL;
1418}
1419
1420/*
1421 * Mac80211 initiated actions sometimes require packets in the DMA queue to be
1422 * modified. The modified portion of the packet is not under control of the DMA
1423 * engine. This function calls a caller-supplied function for each packet in
1424 * the caller specified dma chain.
1425 */
1426void dma_walk_packets(struct dma_pub *dmah, void (*callback_fnc)
1427		      (void *pkt, void *arg_a), void *arg_a)
1428{
1429	struct dma_info *di = (struct dma_info *) dmah;
1430	uint i =   di->txin;
1431	uint end = di->txout;
1432	struct sk_buff *skb;
1433	struct ieee80211_tx_info *tx_info;
1434
1435	while (i != end) {
1436		skb = (struct sk_buff *)di->txp[i];
1437		if (skb != NULL) {
1438			tx_info = (struct ieee80211_tx_info *)skb->cb;
1439			(callback_fnc)(tx_info, arg_a);
1440		}
1441		i = nexttxd(di, i);
1442	}
1443}
1444