ce.c revision 98563d5aafa45498f8c2f6885b2bd03eab648f19
1/*
2 * Copyright (c) 2005-2011 Atheros Communications Inc.
3 * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "hif.h"
19#include "pci.h"
20#include "ce.h"
21#include "debug.h"
22
23/*
24 * Support for Copy Engine hardware, which is mainly used for
25 * communication between Host and Target over a PCIe interconnect.
26 */
27
28/*
29 * A single CopyEngine (CE) comprises two "rings":
30 *   a source ring
31 *   a destination ring
32 *
33 * Each ring consists of a number of descriptors which specify
34 * an address, length, and meta-data.
35 *
36 * Typically, one side of the PCIe interconnect (Host or Target)
37 * controls one ring and the other side controls the other ring.
38 * The source side chooses when to initiate a transfer and it
39 * chooses what to send (buffer address, length). The destination
40 * side keeps a supply of "anonymous receive buffers" available and
41 * it handles incoming data as it arrives (when the destination
42 * recieves an interrupt).
43 *
44 * The sender may send a simple buffer (address/length) or it may
45 * send a small list of buffers.  When a small list is sent, hardware
46 * "gathers" these and they end up in a single destination buffer
47 * with a single interrupt.
48 *
49 * There are several "contexts" managed by this layer -- more, it
50 * may seem -- than should be needed. These are provided mainly for
51 * maximum flexibility and especially to facilitate a simpler HIF
52 * implementation. There are per-CopyEngine recv, send, and watermark
53 * contexts. These are supplied by the caller when a recv, send,
54 * or watermark handler is established and they are echoed back to
55 * the caller when the respective callbacks are invoked. There is
56 * also a per-transfer context supplied by the caller when a buffer
57 * (or sendlist) is sent and when a buffer is enqueued for recv.
58 * These per-transfer contexts are echoed back to the caller when
59 * the buffer is sent/received.
60 */
61
62static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
63						       u32 ce_ctrl_addr,
64						       unsigned int n)
65{
66	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n);
67}
68
69static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
70						      u32 ce_ctrl_addr)
71{
72	return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS);
73}
74
75static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
76						      u32 ce_ctrl_addr,
77						      unsigned int n)
78{
79	ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n);
80}
81
82static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
83						     u32 ce_ctrl_addr)
84{
85	return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS);
86}
87
88static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
89						    u32 ce_ctrl_addr)
90{
91	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS);
92}
93
94static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
95						    u32 ce_ctrl_addr,
96						    unsigned int addr)
97{
98	ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr);
99}
100
101static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
102					       u32 ce_ctrl_addr,
103					       unsigned int n)
104{
105	ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n);
106}
107
108static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
109					       u32 ce_ctrl_addr,
110					       unsigned int n)
111{
112	u32 ctrl1_addr = ath10k_pci_read32((ar),
113					   (ce_ctrl_addr) + CE_CTRL1_ADDRESS);
114
115	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
116			   (ctrl1_addr &  ~CE_CTRL1_DMAX_LENGTH_MASK) |
117			   CE_CTRL1_DMAX_LENGTH_SET(n));
118}
119
120static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
121						    u32 ce_ctrl_addr,
122						    unsigned int n)
123{
124	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
125
126	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
127			   (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) |
128			   CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n));
129}
130
131static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
132						     u32 ce_ctrl_addr,
133						     unsigned int n)
134{
135	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
136
137	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
138			   (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) |
139			   CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n));
140}
141
142static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
143						     u32 ce_ctrl_addr)
144{
145	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS);
146}
147
148static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
149						     u32 ce_ctrl_addr,
150						     u32 addr)
151{
152	ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr);
153}
154
155static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
156						u32 ce_ctrl_addr,
157						unsigned int n)
158{
159	ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n);
160}
161
162static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
163						   u32 ce_ctrl_addr,
164						   unsigned int n)
165{
166	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
167
168	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
169			   (addr & ~SRC_WATERMARK_HIGH_MASK) |
170			   SRC_WATERMARK_HIGH_SET(n));
171}
172
173static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
174						  u32 ce_ctrl_addr,
175						  unsigned int n)
176{
177	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
178
179	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
180			   (addr & ~SRC_WATERMARK_LOW_MASK) |
181			   SRC_WATERMARK_LOW_SET(n));
182}
183
184static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
185						    u32 ce_ctrl_addr,
186						    unsigned int n)
187{
188	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
189
190	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
191			   (addr & ~DST_WATERMARK_HIGH_MASK) |
192			   DST_WATERMARK_HIGH_SET(n));
193}
194
195static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
196						   u32 ce_ctrl_addr,
197						   unsigned int n)
198{
199	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
200
201	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
202			   (addr & ~DST_WATERMARK_LOW_MASK) |
203			   DST_WATERMARK_LOW_SET(n));
204}
205
206static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
207							u32 ce_ctrl_addr)
208{
209	u32 host_ie_addr = ath10k_pci_read32(ar,
210					     ce_ctrl_addr + HOST_IE_ADDRESS);
211
212	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
213			   host_ie_addr | HOST_IE_COPY_COMPLETE_MASK);
214}
215
216static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
217							u32 ce_ctrl_addr)
218{
219	u32 host_ie_addr = ath10k_pci_read32(ar,
220					     ce_ctrl_addr + HOST_IE_ADDRESS);
221
222	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
223			   host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK);
224}
225
226static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
227						    u32 ce_ctrl_addr)
228{
229	u32 host_ie_addr = ath10k_pci_read32(ar,
230					     ce_ctrl_addr + HOST_IE_ADDRESS);
231
232	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
233			   host_ie_addr & ~CE_WATERMARK_MASK);
234}
235
236static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
237					       u32 ce_ctrl_addr)
238{
239	u32 misc_ie_addr = ath10k_pci_read32(ar,
240					     ce_ctrl_addr + MISC_IE_ADDRESS);
241
242	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
243			   misc_ie_addr | CE_ERROR_MASK);
244}
245
246static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
247						u32 ce_ctrl_addr)
248{
249	u32 misc_ie_addr = ath10k_pci_read32(ar,
250					     ce_ctrl_addr + MISC_IE_ADDRESS);
251
252	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
253			   misc_ie_addr & ~CE_ERROR_MASK);
254}
255
256static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
257						     u32 ce_ctrl_addr,
258						     unsigned int mask)
259{
260	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask);
261}
262
263
264/*
265 * Guts of ath10k_ce_send, used by both ath10k_ce_send and
266 * ath10k_ce_sendlist_send.
267 * The caller takes responsibility for any needed locking.
268 */
269static int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
270				 void *per_transfer_context,
271				 u32 buffer,
272				 unsigned int nbytes,
273				 unsigned int transfer_id,
274				 unsigned int flags)
275{
276	struct ath10k *ar = ce_state->ar;
277	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
278	struct ce_desc *desc, *sdesc;
279	unsigned int nentries_mask = src_ring->nentries_mask;
280	unsigned int sw_index = src_ring->sw_index;
281	unsigned int write_index = src_ring->write_index;
282	u32 ctrl_addr = ce_state->ctrl_addr;
283	u32 desc_flags = 0;
284	int ret = 0;
285
286	if (nbytes > ce_state->src_sz_max)
287		ath10k_warn("%s: send more we can (nbytes: %d, max: %d)\n",
288			    __func__, nbytes, ce_state->src_sz_max);
289
290	ret = ath10k_pci_wake(ar);
291	if (ret)
292		return ret;
293
294	if (unlikely(CE_RING_DELTA(nentries_mask,
295				   write_index, sw_index - 1) <= 0)) {
296		ret = -ENOSR;
297		goto exit;
298	}
299
300	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
301				   write_index);
302	sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index);
303
304	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
305
306	if (flags & CE_SEND_FLAG_GATHER)
307		desc_flags |= CE_DESC_FLAGS_GATHER;
308	if (flags & CE_SEND_FLAG_BYTE_SWAP)
309		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
310
311	sdesc->addr   = __cpu_to_le32(buffer);
312	sdesc->nbytes = __cpu_to_le16(nbytes);
313	sdesc->flags  = __cpu_to_le16(desc_flags);
314
315	*desc = *sdesc;
316
317	src_ring->per_transfer_context[write_index] = per_transfer_context;
318
319	/* Update Source Ring Write Index */
320	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
321
322	/* WORKAROUND */
323	if (!(flags & CE_SEND_FLAG_GATHER))
324		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
325
326	src_ring->write_index = write_index;
327exit:
328	ath10k_pci_sleep(ar);
329	return ret;
330}
331
332int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
333		   void *per_transfer_context,
334		   u32 buffer,
335		   unsigned int nbytes,
336		   unsigned int transfer_id,
337		   unsigned int flags)
338{
339	struct ath10k *ar = ce_state->ar;
340	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
341	int ret;
342
343	spin_lock_bh(&ar_pci->ce_lock);
344	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
345				    buffer, nbytes, transfer_id, flags);
346	spin_unlock_bh(&ar_pci->ce_lock);
347
348	return ret;
349}
350
351int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
352{
353	struct ath10k *ar = pipe->ar;
354	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
355	int delta;
356
357	spin_lock_bh(&ar_pci->ce_lock);
358	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
359			      pipe->src_ring->write_index,
360			      pipe->src_ring->sw_index - 1);
361	spin_unlock_bh(&ar_pci->ce_lock);
362
363	return delta;
364}
365
366int ath10k_ce_recv_buf_enqueue(struct ath10k_ce_pipe *ce_state,
367			       void *per_recv_context,
368			       u32 buffer)
369{
370	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
371	u32 ctrl_addr = ce_state->ctrl_addr;
372	struct ath10k *ar = ce_state->ar;
373	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
374	unsigned int nentries_mask = dest_ring->nentries_mask;
375	unsigned int write_index;
376	unsigned int sw_index;
377	int ret;
378
379	spin_lock_bh(&ar_pci->ce_lock);
380	write_index = dest_ring->write_index;
381	sw_index = dest_ring->sw_index;
382
383	ret = ath10k_pci_wake(ar);
384	if (ret)
385		goto out;
386
387	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) > 0) {
388		struct ce_desc *base = dest_ring->base_addr_owner_space;
389		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
390
391		/* Update destination descriptor */
392		desc->addr    = __cpu_to_le32(buffer);
393		desc->nbytes = 0;
394
395		dest_ring->per_transfer_context[write_index] =
396							per_recv_context;
397
398		/* Update Destination Ring Write Index */
399		write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
400		ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
401		dest_ring->write_index = write_index;
402		ret = 0;
403	} else {
404		ret = -EIO;
405	}
406	ath10k_pci_sleep(ar);
407
408out:
409	spin_unlock_bh(&ar_pci->ce_lock);
410
411	return ret;
412}
413
414/*
415 * Guts of ath10k_ce_completed_recv_next.
416 * The caller takes responsibility for any necessary locking.
417 */
418static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
419						void **per_transfer_contextp,
420						u32 *bufferp,
421						unsigned int *nbytesp,
422						unsigned int *transfer_idp,
423						unsigned int *flagsp)
424{
425	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
426	unsigned int nentries_mask = dest_ring->nentries_mask;
427	unsigned int sw_index = dest_ring->sw_index;
428
429	struct ce_desc *base = dest_ring->base_addr_owner_space;
430	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
431	struct ce_desc sdesc;
432	u16 nbytes;
433
434	/* Copy in one go for performance reasons */
435	sdesc = *desc;
436
437	nbytes = __le16_to_cpu(sdesc.nbytes);
438	if (nbytes == 0) {
439		/*
440		 * This closes a relatively unusual race where the Host
441		 * sees the updated DRRI before the update to the
442		 * corresponding descriptor has completed. We treat this
443		 * as a descriptor that is not yet done.
444		 */
445		return -EIO;
446	}
447
448	desc->nbytes = 0;
449
450	/* Return data from completed destination descriptor */
451	*bufferp = __le32_to_cpu(sdesc.addr);
452	*nbytesp = nbytes;
453	*transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA);
454
455	if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP)
456		*flagsp = CE_RECV_FLAG_SWAPPED;
457	else
458		*flagsp = 0;
459
460	if (per_transfer_contextp)
461		*per_transfer_contextp =
462			dest_ring->per_transfer_context[sw_index];
463
464	/* sanity */
465	dest_ring->per_transfer_context[sw_index] = NULL;
466
467	/* Update sw_index */
468	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
469	dest_ring->sw_index = sw_index;
470
471	return 0;
472}
473
474int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
475				  void **per_transfer_contextp,
476				  u32 *bufferp,
477				  unsigned int *nbytesp,
478				  unsigned int *transfer_idp,
479				  unsigned int *flagsp)
480{
481	struct ath10k *ar = ce_state->ar;
482	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
483	int ret;
484
485	spin_lock_bh(&ar_pci->ce_lock);
486	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
487						   per_transfer_contextp,
488						   bufferp, nbytesp,
489						   transfer_idp, flagsp);
490	spin_unlock_bh(&ar_pci->ce_lock);
491
492	return ret;
493}
494
495int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
496			       void **per_transfer_contextp,
497			       u32 *bufferp)
498{
499	struct ath10k_ce_ring *dest_ring;
500	unsigned int nentries_mask;
501	unsigned int sw_index;
502	unsigned int write_index;
503	int ret;
504	struct ath10k *ar;
505	struct ath10k_pci *ar_pci;
506
507	dest_ring = ce_state->dest_ring;
508
509	if (!dest_ring)
510		return -EIO;
511
512	ar = ce_state->ar;
513	ar_pci = ath10k_pci_priv(ar);
514
515	spin_lock_bh(&ar_pci->ce_lock);
516
517	nentries_mask = dest_ring->nentries_mask;
518	sw_index = dest_ring->sw_index;
519	write_index = dest_ring->write_index;
520	if (write_index != sw_index) {
521		struct ce_desc *base = dest_ring->base_addr_owner_space;
522		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
523
524		/* Return data from completed destination descriptor */
525		*bufferp = __le32_to_cpu(desc->addr);
526
527		if (per_transfer_contextp)
528			*per_transfer_contextp =
529				dest_ring->per_transfer_context[sw_index];
530
531		/* sanity */
532		dest_ring->per_transfer_context[sw_index] = NULL;
533
534		/* Update sw_index */
535		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
536		dest_ring->sw_index = sw_index;
537		ret = 0;
538	} else {
539		ret = -EIO;
540	}
541
542	spin_unlock_bh(&ar_pci->ce_lock);
543
544	return ret;
545}
546
547/*
548 * Guts of ath10k_ce_completed_send_next.
549 * The caller takes responsibility for any necessary locking.
550 */
551static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
552						void **per_transfer_contextp,
553						u32 *bufferp,
554						unsigned int *nbytesp,
555						unsigned int *transfer_idp)
556{
557	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
558	u32 ctrl_addr = ce_state->ctrl_addr;
559	struct ath10k *ar = ce_state->ar;
560	unsigned int nentries_mask = src_ring->nentries_mask;
561	unsigned int sw_index = src_ring->sw_index;
562	struct ce_desc *sdesc, *sbase;
563	unsigned int read_index;
564	int ret;
565
566	if (src_ring->hw_index == sw_index) {
567		/*
568		 * The SW completion index has caught up with the cached
569		 * version of the HW completion index.
570		 * Update the cached HW completion index to see whether
571		 * the SW has really caught up to the HW, or if the cached
572		 * value of the HW index has become stale.
573		 */
574
575		ret = ath10k_pci_wake(ar);
576		if (ret)
577			return ret;
578
579		src_ring->hw_index =
580			ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
581		src_ring->hw_index &= nentries_mask;
582
583		ath10k_pci_sleep(ar);
584	}
585
586	read_index = src_ring->hw_index;
587
588	if ((read_index == sw_index) || (read_index == 0xffffffff))
589		return -EIO;
590
591	sbase = src_ring->shadow_base;
592	sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index);
593
594	/* Return data from completed source descriptor */
595	*bufferp = __le32_to_cpu(sdesc->addr);
596	*nbytesp = __le16_to_cpu(sdesc->nbytes);
597	*transfer_idp = MS(__le16_to_cpu(sdesc->flags),
598			   CE_DESC_FLAGS_META_DATA);
599
600	if (per_transfer_contextp)
601		*per_transfer_contextp =
602			src_ring->per_transfer_context[sw_index];
603
604	/* sanity */
605	src_ring->per_transfer_context[sw_index] = NULL;
606
607	/* Update sw_index */
608	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
609	src_ring->sw_index = sw_index;
610
611	return 0;
612}
613
614/* NB: Modeled after ath10k_ce_completed_send_next */
615int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
616			       void **per_transfer_contextp,
617			       u32 *bufferp,
618			       unsigned int *nbytesp,
619			       unsigned int *transfer_idp)
620{
621	struct ath10k_ce_ring *src_ring;
622	unsigned int nentries_mask;
623	unsigned int sw_index;
624	unsigned int write_index;
625	int ret;
626	struct ath10k *ar;
627	struct ath10k_pci *ar_pci;
628
629	src_ring = ce_state->src_ring;
630
631	if (!src_ring)
632		return -EIO;
633
634	ar = ce_state->ar;
635	ar_pci = ath10k_pci_priv(ar);
636
637	spin_lock_bh(&ar_pci->ce_lock);
638
639	nentries_mask = src_ring->nentries_mask;
640	sw_index = src_ring->sw_index;
641	write_index = src_ring->write_index;
642
643	if (write_index != sw_index) {
644		struct ce_desc *base = src_ring->base_addr_owner_space;
645		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
646
647		/* Return data from completed source descriptor */
648		*bufferp = __le32_to_cpu(desc->addr);
649		*nbytesp = __le16_to_cpu(desc->nbytes);
650		*transfer_idp = MS(__le16_to_cpu(desc->flags),
651						CE_DESC_FLAGS_META_DATA);
652
653		if (per_transfer_contextp)
654			*per_transfer_contextp =
655				src_ring->per_transfer_context[sw_index];
656
657		/* sanity */
658		src_ring->per_transfer_context[sw_index] = NULL;
659
660		/* Update sw_index */
661		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
662		src_ring->sw_index = sw_index;
663		ret = 0;
664	} else {
665		ret = -EIO;
666	}
667
668	spin_unlock_bh(&ar_pci->ce_lock);
669
670	return ret;
671}
672
673int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
674				  void **per_transfer_contextp,
675				  u32 *bufferp,
676				  unsigned int *nbytesp,
677				  unsigned int *transfer_idp)
678{
679	struct ath10k *ar = ce_state->ar;
680	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
681	int ret;
682
683	spin_lock_bh(&ar_pci->ce_lock);
684	ret = ath10k_ce_completed_send_next_nolock(ce_state,
685						   per_transfer_contextp,
686						   bufferp, nbytesp,
687						   transfer_idp);
688	spin_unlock_bh(&ar_pci->ce_lock);
689
690	return ret;
691}
692
693/*
694 * Guts of interrupt handler for per-engine interrupts on a particular CE.
695 *
696 * Invokes registered callbacks for recv_complete,
697 * send_complete, and watermarks.
698 */
699void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
700{
701	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
702	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
703	u32 ctrl_addr = ce_state->ctrl_addr;
704	int ret;
705
706	ret = ath10k_pci_wake(ar);
707	if (ret)
708		return;
709
710	spin_lock_bh(&ar_pci->ce_lock);
711
712	/* Clear the copy-complete interrupts that will be handled here. */
713	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
714					  HOST_IS_COPY_COMPLETE_MASK);
715
716	spin_unlock_bh(&ar_pci->ce_lock);
717
718	if (ce_state->recv_cb)
719		ce_state->recv_cb(ce_state);
720
721	if (ce_state->send_cb)
722		ce_state->send_cb(ce_state);
723
724	spin_lock_bh(&ar_pci->ce_lock);
725
726	/*
727	 * Misc CE interrupts are not being handled, but still need
728	 * to be cleared.
729	 */
730	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK);
731
732	spin_unlock_bh(&ar_pci->ce_lock);
733	ath10k_pci_sleep(ar);
734}
735
736/*
737 * Handler for per-engine interrupts on ALL active CEs.
738 * This is used in cases where the system is sharing a
739 * single interrput for all CEs
740 */
741
742void ath10k_ce_per_engine_service_any(struct ath10k *ar)
743{
744	int ce_id, ret;
745	u32 intr_summary;
746
747	ret = ath10k_pci_wake(ar);
748	if (ret)
749		return;
750
751	intr_summary = CE_INTERRUPT_SUMMARY(ar);
752
753	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
754		if (intr_summary & (1 << ce_id))
755			intr_summary &= ~(1 << ce_id);
756		else
757			/* no intr pending on this CE */
758			continue;
759
760		ath10k_ce_per_engine_service(ar, ce_id);
761	}
762
763	ath10k_pci_sleep(ar);
764}
765
766/*
767 * Adjust interrupts for the copy complete handler.
768 * If it's needed for either send or recv, then unmask
769 * this interrupt; otherwise, mask it.
770 *
771 * Called with ce_lock held.
772 */
773static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state,
774						int disable_copy_compl_intr)
775{
776	u32 ctrl_addr = ce_state->ctrl_addr;
777	struct ath10k *ar = ce_state->ar;
778	int ret;
779
780	ret = ath10k_pci_wake(ar);
781	if (ret)
782		return;
783
784	if ((!disable_copy_compl_intr) &&
785	    (ce_state->send_cb || ce_state->recv_cb))
786		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
787	else
788		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
789
790	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
791
792	ath10k_pci_sleep(ar);
793}
794
795int ath10k_ce_enable_err_irq(struct ath10k *ar)
796{
797	int i, ret;
798
799	ret = ath10k_pci_wake(ar);
800	if (ret)
801		return ret;
802
803	for (i = 0; i < CE_COUNT; i++)
804		ath10k_ce_error_intr_enable(ar, ath10k_ce_base_address(i));
805
806	ath10k_pci_sleep(ar);
807	return 0;
808}
809
810int ath10k_ce_disable_interrupts(struct ath10k *ar)
811{
812	int ce_id, ret;
813
814	ret = ath10k_pci_wake(ar);
815	if (ret)
816		return ret;
817
818	for (ce_id = 0; ce_id < CE_COUNT; ce_id++) {
819		u32 ctrl_addr = ath10k_ce_base_address(ce_id);
820
821		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
822		ath10k_ce_error_intr_disable(ar, ctrl_addr);
823		ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
824	}
825
826	ath10k_pci_sleep(ar);
827
828	return 0;
829}
830
831void ath10k_ce_send_cb_register(struct ath10k_ce_pipe *ce_state,
832				void (*send_cb)(struct ath10k_ce_pipe *),
833				int disable_interrupts)
834{
835	struct ath10k *ar = ce_state->ar;
836	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
837
838	spin_lock_bh(&ar_pci->ce_lock);
839	ce_state->send_cb = send_cb;
840	ath10k_ce_per_engine_handler_adjust(ce_state, disable_interrupts);
841	spin_unlock_bh(&ar_pci->ce_lock);
842}
843
844void ath10k_ce_recv_cb_register(struct ath10k_ce_pipe *ce_state,
845				void (*recv_cb)(struct ath10k_ce_pipe *))
846{
847	struct ath10k *ar = ce_state->ar;
848	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
849
850	spin_lock_bh(&ar_pci->ce_lock);
851	ce_state->recv_cb = recv_cb;
852	ath10k_ce_per_engine_handler_adjust(ce_state, 0);
853	spin_unlock_bh(&ar_pci->ce_lock);
854}
855
856static int ath10k_ce_init_src_ring(struct ath10k *ar,
857				   unsigned int ce_id,
858				   struct ath10k_ce_pipe *ce_state,
859				   const struct ce_attr *attr)
860{
861	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
862	struct ath10k_ce_ring *src_ring;
863	unsigned int nentries = attr->src_nentries;
864	unsigned int ce_nbytes;
865	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
866	dma_addr_t base_addr;
867	char *ptr;
868
869	nentries = roundup_pow_of_two(nentries);
870
871	if (ce_state->src_ring) {
872		WARN_ON(ce_state->src_ring->nentries != nentries);
873		return 0;
874	}
875
876	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
877	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
878	if (ptr == NULL)
879		return -ENOMEM;
880
881	ce_state->src_ring = (struct ath10k_ce_ring *)ptr;
882	src_ring = ce_state->src_ring;
883
884	ptr += sizeof(struct ath10k_ce_ring);
885	src_ring->nentries = nentries;
886	src_ring->nentries_mask = nentries - 1;
887
888	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
889	src_ring->sw_index &= src_ring->nentries_mask;
890	src_ring->hw_index = src_ring->sw_index;
891
892	src_ring->write_index =
893		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
894	src_ring->write_index &= src_ring->nentries_mask;
895
896	src_ring->per_transfer_context = (void **)ptr;
897
898	/*
899	 * Legacy platforms that do not support cache
900	 * coherent DMA are unsupported
901	 */
902	src_ring->base_addr_owner_space_unaligned =
903		pci_alloc_consistent(ar_pci->pdev,
904				     (nentries * sizeof(struct ce_desc) +
905				      CE_DESC_RING_ALIGN),
906				     &base_addr);
907	if (!src_ring->base_addr_owner_space_unaligned) {
908		kfree(ce_state->src_ring);
909		ce_state->src_ring = NULL;
910		return -ENOMEM;
911	}
912
913	src_ring->base_addr_ce_space_unaligned = base_addr;
914
915	src_ring->base_addr_owner_space = PTR_ALIGN(
916			src_ring->base_addr_owner_space_unaligned,
917			CE_DESC_RING_ALIGN);
918	src_ring->base_addr_ce_space = ALIGN(
919			src_ring->base_addr_ce_space_unaligned,
920			CE_DESC_RING_ALIGN);
921
922	/*
923	 * Also allocate a shadow src ring in regular
924	 * mem to use for faster access.
925	 */
926	src_ring->shadow_base_unaligned =
927		kmalloc((nentries * sizeof(struct ce_desc) +
928			 CE_DESC_RING_ALIGN), GFP_KERNEL);
929	if (!src_ring->shadow_base_unaligned) {
930		pci_free_consistent(ar_pci->pdev,
931				    (nentries * sizeof(struct ce_desc) +
932				     CE_DESC_RING_ALIGN),
933				    src_ring->base_addr_owner_space,
934				    src_ring->base_addr_ce_space);
935		kfree(ce_state->src_ring);
936		ce_state->src_ring = NULL;
937		return -ENOMEM;
938	}
939
940	src_ring->shadow_base = PTR_ALIGN(
941			src_ring->shadow_base_unaligned,
942			CE_DESC_RING_ALIGN);
943
944	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
945					 src_ring->base_addr_ce_space);
946	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
947	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
948	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
949	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
950	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
951
952	ath10k_dbg(ATH10K_DBG_BOOT,
953		   "boot ce src ring id %d entries %d base_addr %p\n",
954		   ce_id, nentries, src_ring->base_addr_owner_space);
955
956	return 0;
957}
958
959static int ath10k_ce_init_dest_ring(struct ath10k *ar,
960				    unsigned int ce_id,
961				    struct ath10k_ce_pipe *ce_state,
962				    const struct ce_attr *attr)
963{
964	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
965	struct ath10k_ce_ring *dest_ring;
966	unsigned int nentries = attr->dest_nentries;
967	unsigned int ce_nbytes;
968	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
969	dma_addr_t base_addr;
970	char *ptr;
971
972	nentries = roundup_pow_of_two(nentries);
973
974	if (ce_state->dest_ring) {
975		WARN_ON(ce_state->dest_ring->nentries != nentries);
976		return 0;
977	}
978
979	ce_nbytes = sizeof(struct ath10k_ce_ring) + (nentries * sizeof(void *));
980	ptr = kzalloc(ce_nbytes, GFP_KERNEL);
981	if (ptr == NULL)
982		return -ENOMEM;
983
984	ce_state->dest_ring = (struct ath10k_ce_ring *)ptr;
985	dest_ring = ce_state->dest_ring;
986
987	ptr += sizeof(struct ath10k_ce_ring);
988	dest_ring->nentries = nentries;
989	dest_ring->nentries_mask = nentries - 1;
990
991	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
992	dest_ring->sw_index &= dest_ring->nentries_mask;
993	dest_ring->write_index =
994		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
995	dest_ring->write_index &= dest_ring->nentries_mask;
996
997	dest_ring->per_transfer_context = (void **)ptr;
998
999	/*
1000	 * Legacy platforms that do not support cache
1001	 * coherent DMA are unsupported
1002	 */
1003	dest_ring->base_addr_owner_space_unaligned =
1004		pci_alloc_consistent(ar_pci->pdev,
1005				     (nentries * sizeof(struct ce_desc) +
1006				      CE_DESC_RING_ALIGN),
1007				     &base_addr);
1008	if (!dest_ring->base_addr_owner_space_unaligned) {
1009		kfree(ce_state->dest_ring);
1010		ce_state->dest_ring = NULL;
1011		return -ENOMEM;
1012	}
1013
1014	dest_ring->base_addr_ce_space_unaligned = base_addr;
1015
1016	/*
1017	 * Correctly initialize memory to 0 to prevent garbage
1018	 * data crashing system when download firmware
1019	 */
1020	memset(dest_ring->base_addr_owner_space_unaligned, 0,
1021	       nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN);
1022
1023	dest_ring->base_addr_owner_space = PTR_ALIGN(
1024			dest_ring->base_addr_owner_space_unaligned,
1025			CE_DESC_RING_ALIGN);
1026	dest_ring->base_addr_ce_space = ALIGN(
1027			dest_ring->base_addr_ce_space_unaligned,
1028			CE_DESC_RING_ALIGN);
1029
1030	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
1031					  dest_ring->base_addr_ce_space);
1032	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
1033	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
1034	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
1035	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
1036
1037	ath10k_dbg(ATH10K_DBG_BOOT,
1038		   "boot ce dest ring id %d entries %d base_addr %p\n",
1039		   ce_id, nentries, dest_ring->base_addr_owner_space);
1040
1041	return 0;
1042}
1043
1044static struct ath10k_ce_pipe *ath10k_ce_init_state(struct ath10k *ar,
1045					     unsigned int ce_id,
1046					     const struct ce_attr *attr)
1047{
1048	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1049	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1050	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1051
1052	spin_lock_bh(&ar_pci->ce_lock);
1053
1054	ce_state->ar = ar;
1055	ce_state->id = ce_id;
1056	ce_state->ctrl_addr = ctrl_addr;
1057	ce_state->attr_flags = attr->flags;
1058	ce_state->src_sz_max = attr->src_sz_max;
1059
1060	spin_unlock_bh(&ar_pci->ce_lock);
1061
1062	return ce_state;
1063}
1064
1065/*
1066 * Initialize a Copy Engine based on caller-supplied attributes.
1067 * This may be called once to initialize both source and destination
1068 * rings or it may be called twice for separate source and destination
1069 * initialization. It may be that only one side or the other is
1070 * initialized by software/firmware.
1071 */
1072struct ath10k_ce_pipe *ath10k_ce_init(struct ath10k *ar,
1073				unsigned int ce_id,
1074				const struct ce_attr *attr)
1075{
1076	struct ath10k_ce_pipe *ce_state;
1077	int ret;
1078
1079	/*
1080	 * Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1081	 * additional TX locking checks.
1082	 *
1083	 * For the lack of a better place do the check here.
1084	 */
1085	BUILD_BUG_ON(TARGET_NUM_MSDU_DESC >
1086		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1087	BUILD_BUG_ON(TARGET_10X_NUM_MSDU_DESC >
1088		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1089
1090	ret = ath10k_pci_wake(ar);
1091	if (ret)
1092		return NULL;
1093
1094	ce_state = ath10k_ce_init_state(ar, ce_id, attr);
1095	if (!ce_state) {
1096		ath10k_err("Failed to initialize CE state for ID: %d\n", ce_id);
1097		goto out;
1098	}
1099
1100	if (attr->src_nentries) {
1101		ret = ath10k_ce_init_src_ring(ar, ce_id, ce_state, attr);
1102		if (ret) {
1103			ath10k_err("Failed to initialize CE src ring for ID: %d (%d)\n",
1104				   ce_id, ret);
1105			ath10k_ce_deinit(ce_state);
1106			ce_state = NULL;
1107			goto out;
1108		}
1109	}
1110
1111	if (attr->dest_nentries) {
1112		ret = ath10k_ce_init_dest_ring(ar, ce_id, ce_state, attr);
1113		if (ret) {
1114			ath10k_err("Failed to initialize CE dest ring for ID: %d (%d)\n",
1115				   ce_id, ret);
1116			ath10k_ce_deinit(ce_state);
1117			ce_state = NULL;
1118			goto out;
1119		}
1120	}
1121
1122out:
1123	ath10k_pci_sleep(ar);
1124	return ce_state;
1125}
1126
1127void ath10k_ce_deinit(struct ath10k_ce_pipe *ce_state)
1128{
1129	struct ath10k *ar = ce_state->ar;
1130	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1131
1132	if (ce_state->src_ring) {
1133		kfree(ce_state->src_ring->shadow_base_unaligned);
1134		pci_free_consistent(ar_pci->pdev,
1135				    (ce_state->src_ring->nentries *
1136				     sizeof(struct ce_desc) +
1137				     CE_DESC_RING_ALIGN),
1138				    ce_state->src_ring->base_addr_owner_space,
1139				    ce_state->src_ring->base_addr_ce_space);
1140		kfree(ce_state->src_ring);
1141	}
1142
1143	if (ce_state->dest_ring) {
1144		pci_free_consistent(ar_pci->pdev,
1145				    (ce_state->dest_ring->nentries *
1146				     sizeof(struct ce_desc) +
1147				     CE_DESC_RING_ALIGN),
1148				    ce_state->dest_ring->base_addr_owner_space,
1149				    ce_state->dest_ring->base_addr_ce_space);
1150		kfree(ce_state->dest_ring);
1151	}
1152
1153	ce_state->src_ring = NULL;
1154	ce_state->dest_ring = NULL;
1155}
1156