ce.c revision 728f95eef5238bffdb20e511f5cd553321d404c3
1/*
2 * Copyright (c) 2005-2011 Atheros Communications Inc.
3 * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "hif.h"
19#include "pci.h"
20#include "ce.h"
21#include "debug.h"
22
23/*
24 * Support for Copy Engine hardware, which is mainly used for
25 * communication between Host and Target over a PCIe interconnect.
26 */
27
28/*
29 * A single CopyEngine (CE) comprises two "rings":
30 *   a source ring
31 *   a destination ring
32 *
33 * Each ring consists of a number of descriptors which specify
34 * an address, length, and meta-data.
35 *
36 * Typically, one side of the PCIe interconnect (Host or Target)
37 * controls one ring and the other side controls the other ring.
38 * The source side chooses when to initiate a transfer and it
39 * chooses what to send (buffer address, length). The destination
40 * side keeps a supply of "anonymous receive buffers" available and
41 * it handles incoming data as it arrives (when the destination
42 * recieves an interrupt).
43 *
44 * The sender may send a simple buffer (address/length) or it may
45 * send a small list of buffers.  When a small list is sent, hardware
46 * "gathers" these and they end up in a single destination buffer
47 * with a single interrupt.
48 *
49 * There are several "contexts" managed by this layer -- more, it
50 * may seem -- than should be needed. These are provided mainly for
51 * maximum flexibility and especially to facilitate a simpler HIF
52 * implementation. There are per-CopyEngine recv, send, and watermark
53 * contexts. These are supplied by the caller when a recv, send,
54 * or watermark handler is established and they are echoed back to
55 * the caller when the respective callbacks are invoked. There is
56 * also a per-transfer context supplied by the caller when a buffer
57 * (or sendlist) is sent and when a buffer is enqueued for recv.
58 * These per-transfer contexts are echoed back to the caller when
59 * the buffer is sent/received.
60 */
61
62static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
63						       u32 ce_ctrl_addr,
64						       unsigned int n)
65{
66	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n);
67}
68
69static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
70						      u32 ce_ctrl_addr)
71{
72	return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS);
73}
74
75static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
76						      u32 ce_ctrl_addr,
77						      unsigned int n)
78{
79	ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n);
80}
81
82static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
83						     u32 ce_ctrl_addr)
84{
85	return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS);
86}
87
88static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
89						    u32 ce_ctrl_addr)
90{
91	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS);
92}
93
94static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
95						    u32 ce_ctrl_addr,
96						    unsigned int addr)
97{
98	ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr);
99}
100
101static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
102					       u32 ce_ctrl_addr,
103					       unsigned int n)
104{
105	ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n);
106}
107
108static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
109					       u32 ce_ctrl_addr,
110					       unsigned int n)
111{
112	u32 ctrl1_addr = ath10k_pci_read32((ar),
113					   (ce_ctrl_addr) + CE_CTRL1_ADDRESS);
114
115	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
116			   (ctrl1_addr &  ~CE_CTRL1_DMAX_LENGTH_MASK) |
117			   CE_CTRL1_DMAX_LENGTH_SET(n));
118}
119
120static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
121						    u32 ce_ctrl_addr,
122						    unsigned int n)
123{
124	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
125
126	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
127			   (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) |
128			   CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n));
129}
130
131static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
132						     u32 ce_ctrl_addr,
133						     unsigned int n)
134{
135	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
136
137	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
138			   (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) |
139			   CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n));
140}
141
142static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
143						     u32 ce_ctrl_addr)
144{
145	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS);
146}
147
148static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
149						     u32 ce_ctrl_addr,
150						     u32 addr)
151{
152	ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr);
153}
154
155static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
156						u32 ce_ctrl_addr,
157						unsigned int n)
158{
159	ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n);
160}
161
162static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
163						   u32 ce_ctrl_addr,
164						   unsigned int n)
165{
166	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
167
168	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
169			   (addr & ~SRC_WATERMARK_HIGH_MASK) |
170			   SRC_WATERMARK_HIGH_SET(n));
171}
172
173static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
174						  u32 ce_ctrl_addr,
175						  unsigned int n)
176{
177	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
178
179	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
180			   (addr & ~SRC_WATERMARK_LOW_MASK) |
181			   SRC_WATERMARK_LOW_SET(n));
182}
183
184static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
185						    u32 ce_ctrl_addr,
186						    unsigned int n)
187{
188	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
189
190	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
191			   (addr & ~DST_WATERMARK_HIGH_MASK) |
192			   DST_WATERMARK_HIGH_SET(n));
193}
194
195static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
196						   u32 ce_ctrl_addr,
197						   unsigned int n)
198{
199	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
200
201	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
202			   (addr & ~DST_WATERMARK_LOW_MASK) |
203			   DST_WATERMARK_LOW_SET(n));
204}
205
206static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
207							u32 ce_ctrl_addr)
208{
209	u32 host_ie_addr = ath10k_pci_read32(ar,
210					     ce_ctrl_addr + HOST_IE_ADDRESS);
211
212	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
213			   host_ie_addr | HOST_IE_COPY_COMPLETE_MASK);
214}
215
216static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
217							u32 ce_ctrl_addr)
218{
219	u32 host_ie_addr = ath10k_pci_read32(ar,
220					     ce_ctrl_addr + HOST_IE_ADDRESS);
221
222	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
223			   host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK);
224}
225
226static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
227						    u32 ce_ctrl_addr)
228{
229	u32 host_ie_addr = ath10k_pci_read32(ar,
230					     ce_ctrl_addr + HOST_IE_ADDRESS);
231
232	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
233			   host_ie_addr & ~CE_WATERMARK_MASK);
234}
235
236static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
237					       u32 ce_ctrl_addr)
238{
239	u32 misc_ie_addr = ath10k_pci_read32(ar,
240					     ce_ctrl_addr + MISC_IE_ADDRESS);
241
242	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
243			   misc_ie_addr | CE_ERROR_MASK);
244}
245
246static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
247						u32 ce_ctrl_addr)
248{
249	u32 misc_ie_addr = ath10k_pci_read32(ar,
250					     ce_ctrl_addr + MISC_IE_ADDRESS);
251
252	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
253			   misc_ie_addr & ~CE_ERROR_MASK);
254}
255
256static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
257						     u32 ce_ctrl_addr,
258						     unsigned int mask)
259{
260	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask);
261}
262
263
264/*
265 * Guts of ath10k_ce_send, used by both ath10k_ce_send and
266 * ath10k_ce_sendlist_send.
267 * The caller takes responsibility for any needed locking.
268 */
269int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
270			  void *per_transfer_context,
271			  u32 buffer,
272			  unsigned int nbytes,
273			  unsigned int transfer_id,
274			  unsigned int flags)
275{
276	struct ath10k *ar = ce_state->ar;
277	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
278	struct ce_desc *desc, *sdesc;
279	unsigned int nentries_mask = src_ring->nentries_mask;
280	unsigned int sw_index = src_ring->sw_index;
281	unsigned int write_index = src_ring->write_index;
282	u32 ctrl_addr = ce_state->ctrl_addr;
283	u32 desc_flags = 0;
284	int ret = 0;
285
286	if (nbytes > ce_state->src_sz_max)
287		ath10k_warn("%s: send more we can (nbytes: %d, max: %d)\n",
288			    __func__, nbytes, ce_state->src_sz_max);
289
290	if (unlikely(CE_RING_DELTA(nentries_mask,
291				   write_index, sw_index - 1) <= 0)) {
292		ret = -ENOSR;
293		goto exit;
294	}
295
296	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
297				   write_index);
298	sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index);
299
300	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
301
302	if (flags & CE_SEND_FLAG_GATHER)
303		desc_flags |= CE_DESC_FLAGS_GATHER;
304	if (flags & CE_SEND_FLAG_BYTE_SWAP)
305		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
306
307	sdesc->addr   = __cpu_to_le32(buffer);
308	sdesc->nbytes = __cpu_to_le16(nbytes);
309	sdesc->flags  = __cpu_to_le16(desc_flags);
310
311	*desc = *sdesc;
312
313	src_ring->per_transfer_context[write_index] = per_transfer_context;
314
315	/* Update Source Ring Write Index */
316	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
317
318	/* WORKAROUND */
319	if (!(flags & CE_SEND_FLAG_GATHER))
320		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
321
322	src_ring->write_index = write_index;
323exit:
324	return ret;
325}
326
327void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
328{
329	struct ath10k *ar = pipe->ar;
330	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
331	struct ath10k_ce_ring *src_ring = pipe->src_ring;
332	u32 ctrl_addr = pipe->ctrl_addr;
333
334	lockdep_assert_held(&ar_pci->ce_lock);
335
336	/*
337	 * This function must be called only if there is an incomplete
338	 * scatter-gather transfer (before index register is updated)
339	 * that needs to be cleaned up.
340	 */
341	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
342		return;
343
344	if (WARN_ON_ONCE(src_ring->write_index ==
345			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
346		return;
347
348	src_ring->write_index--;
349	src_ring->write_index &= src_ring->nentries_mask;
350
351	src_ring->per_transfer_context[src_ring->write_index] = NULL;
352}
353
354int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
355		   void *per_transfer_context,
356		   u32 buffer,
357		   unsigned int nbytes,
358		   unsigned int transfer_id,
359		   unsigned int flags)
360{
361	struct ath10k *ar = ce_state->ar;
362	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
363	int ret;
364
365	spin_lock_bh(&ar_pci->ce_lock);
366	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
367				    buffer, nbytes, transfer_id, flags);
368	spin_unlock_bh(&ar_pci->ce_lock);
369
370	return ret;
371}
372
373int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
374{
375	struct ath10k *ar = pipe->ar;
376	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
377	int delta;
378
379	spin_lock_bh(&ar_pci->ce_lock);
380	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
381			      pipe->src_ring->write_index,
382			      pipe->src_ring->sw_index - 1);
383	spin_unlock_bh(&ar_pci->ce_lock);
384
385	return delta;
386}
387
388
389int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
390{
391	struct ath10k *ar = pipe->ar;
392	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
393	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
394	unsigned int nentries_mask = dest_ring->nentries_mask;
395	unsigned int write_index = dest_ring->write_index;
396	unsigned int sw_index = dest_ring->sw_index;
397
398	lockdep_assert_held(&ar_pci->ce_lock);
399
400	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
401}
402
403int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
404{
405	struct ath10k *ar = pipe->ar;
406	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
407	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
408	unsigned int nentries_mask = dest_ring->nentries_mask;
409	unsigned int write_index = dest_ring->write_index;
410	unsigned int sw_index = dest_ring->sw_index;
411	struct ce_desc *base = dest_ring->base_addr_owner_space;
412	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
413	u32 ctrl_addr = pipe->ctrl_addr;
414
415	lockdep_assert_held(&ar_pci->ce_lock);
416
417	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
418		return -EIO;
419
420	desc->addr = __cpu_to_le32(paddr);
421	desc->nbytes = 0;
422
423	dest_ring->per_transfer_context[write_index] = ctx;
424	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
425	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
426	dest_ring->write_index = write_index;
427
428	return 0;
429}
430
431int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
432{
433	struct ath10k *ar = pipe->ar;
434	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
435	int ret;
436
437	spin_lock_bh(&ar_pci->ce_lock);
438	ret = __ath10k_ce_rx_post_buf(pipe, ctx, paddr);
439	spin_unlock_bh(&ar_pci->ce_lock);
440
441	return ret;
442}
443
444/*
445 * Guts of ath10k_ce_completed_recv_next.
446 * The caller takes responsibility for any necessary locking.
447 */
448static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
449						void **per_transfer_contextp,
450						u32 *bufferp,
451						unsigned int *nbytesp,
452						unsigned int *transfer_idp,
453						unsigned int *flagsp)
454{
455	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
456	unsigned int nentries_mask = dest_ring->nentries_mask;
457	unsigned int sw_index = dest_ring->sw_index;
458
459	struct ce_desc *base = dest_ring->base_addr_owner_space;
460	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
461	struct ce_desc sdesc;
462	u16 nbytes;
463
464	/* Copy in one go for performance reasons */
465	sdesc = *desc;
466
467	nbytes = __le16_to_cpu(sdesc.nbytes);
468	if (nbytes == 0) {
469		/*
470		 * This closes a relatively unusual race where the Host
471		 * sees the updated DRRI before the update to the
472		 * corresponding descriptor has completed. We treat this
473		 * as a descriptor that is not yet done.
474		 */
475		return -EIO;
476	}
477
478	desc->nbytes = 0;
479
480	/* Return data from completed destination descriptor */
481	*bufferp = __le32_to_cpu(sdesc.addr);
482	*nbytesp = nbytes;
483	*transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA);
484
485	if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP)
486		*flagsp = CE_RECV_FLAG_SWAPPED;
487	else
488		*flagsp = 0;
489
490	if (per_transfer_contextp)
491		*per_transfer_contextp =
492			dest_ring->per_transfer_context[sw_index];
493
494	/* sanity */
495	dest_ring->per_transfer_context[sw_index] = NULL;
496
497	/* Update sw_index */
498	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
499	dest_ring->sw_index = sw_index;
500
501	return 0;
502}
503
504int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
505				  void **per_transfer_contextp,
506				  u32 *bufferp,
507				  unsigned int *nbytesp,
508				  unsigned int *transfer_idp,
509				  unsigned int *flagsp)
510{
511	struct ath10k *ar = ce_state->ar;
512	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
513	int ret;
514
515	spin_lock_bh(&ar_pci->ce_lock);
516	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
517						   per_transfer_contextp,
518						   bufferp, nbytesp,
519						   transfer_idp, flagsp);
520	spin_unlock_bh(&ar_pci->ce_lock);
521
522	return ret;
523}
524
525int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
526			       void **per_transfer_contextp,
527			       u32 *bufferp)
528{
529	struct ath10k_ce_ring *dest_ring;
530	unsigned int nentries_mask;
531	unsigned int sw_index;
532	unsigned int write_index;
533	int ret;
534	struct ath10k *ar;
535	struct ath10k_pci *ar_pci;
536
537	dest_ring = ce_state->dest_ring;
538
539	if (!dest_ring)
540		return -EIO;
541
542	ar = ce_state->ar;
543	ar_pci = ath10k_pci_priv(ar);
544
545	spin_lock_bh(&ar_pci->ce_lock);
546
547	nentries_mask = dest_ring->nentries_mask;
548	sw_index = dest_ring->sw_index;
549	write_index = dest_ring->write_index;
550	if (write_index != sw_index) {
551		struct ce_desc *base = dest_ring->base_addr_owner_space;
552		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
553
554		/* Return data from completed destination descriptor */
555		*bufferp = __le32_to_cpu(desc->addr);
556
557		if (per_transfer_contextp)
558			*per_transfer_contextp =
559				dest_ring->per_transfer_context[sw_index];
560
561		/* sanity */
562		dest_ring->per_transfer_context[sw_index] = NULL;
563
564		/* Update sw_index */
565		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
566		dest_ring->sw_index = sw_index;
567		ret = 0;
568	} else {
569		ret = -EIO;
570	}
571
572	spin_unlock_bh(&ar_pci->ce_lock);
573
574	return ret;
575}
576
577/*
578 * Guts of ath10k_ce_completed_send_next.
579 * The caller takes responsibility for any necessary locking.
580 */
581static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
582						void **per_transfer_contextp,
583						u32 *bufferp,
584						unsigned int *nbytesp,
585						unsigned int *transfer_idp)
586{
587	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
588	u32 ctrl_addr = ce_state->ctrl_addr;
589	struct ath10k *ar = ce_state->ar;
590	unsigned int nentries_mask = src_ring->nentries_mask;
591	unsigned int sw_index = src_ring->sw_index;
592	struct ce_desc *sdesc, *sbase;
593	unsigned int read_index;
594
595	if (src_ring->hw_index == sw_index) {
596		/*
597		 * The SW completion index has caught up with the cached
598		 * version of the HW completion index.
599		 * Update the cached HW completion index to see whether
600		 * the SW has really caught up to the HW, or if the cached
601		 * value of the HW index has become stale.
602		 */
603
604		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
605		if (read_index == 0xffffffff)
606			return -ENODEV;
607
608		read_index &= nentries_mask;
609		src_ring->hw_index = read_index;
610	}
611
612	read_index = src_ring->hw_index;
613
614	if (read_index == sw_index)
615		return -EIO;
616
617	sbase = src_ring->shadow_base;
618	sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index);
619
620	/* Return data from completed source descriptor */
621	*bufferp = __le32_to_cpu(sdesc->addr);
622	*nbytesp = __le16_to_cpu(sdesc->nbytes);
623	*transfer_idp = MS(__le16_to_cpu(sdesc->flags),
624			   CE_DESC_FLAGS_META_DATA);
625
626	if (per_transfer_contextp)
627		*per_transfer_contextp =
628			src_ring->per_transfer_context[sw_index];
629
630	/* sanity */
631	src_ring->per_transfer_context[sw_index] = NULL;
632
633	/* Update sw_index */
634	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
635	src_ring->sw_index = sw_index;
636
637	return 0;
638}
639
640/* NB: Modeled after ath10k_ce_completed_send_next */
641int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
642			       void **per_transfer_contextp,
643			       u32 *bufferp,
644			       unsigned int *nbytesp,
645			       unsigned int *transfer_idp)
646{
647	struct ath10k_ce_ring *src_ring;
648	unsigned int nentries_mask;
649	unsigned int sw_index;
650	unsigned int write_index;
651	int ret;
652	struct ath10k *ar;
653	struct ath10k_pci *ar_pci;
654
655	src_ring = ce_state->src_ring;
656
657	if (!src_ring)
658		return -EIO;
659
660	ar = ce_state->ar;
661	ar_pci = ath10k_pci_priv(ar);
662
663	spin_lock_bh(&ar_pci->ce_lock);
664
665	nentries_mask = src_ring->nentries_mask;
666	sw_index = src_ring->sw_index;
667	write_index = src_ring->write_index;
668
669	if (write_index != sw_index) {
670		struct ce_desc *base = src_ring->base_addr_owner_space;
671		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
672
673		/* Return data from completed source descriptor */
674		*bufferp = __le32_to_cpu(desc->addr);
675		*nbytesp = __le16_to_cpu(desc->nbytes);
676		*transfer_idp = MS(__le16_to_cpu(desc->flags),
677						CE_DESC_FLAGS_META_DATA);
678
679		if (per_transfer_contextp)
680			*per_transfer_contextp =
681				src_ring->per_transfer_context[sw_index];
682
683		/* sanity */
684		src_ring->per_transfer_context[sw_index] = NULL;
685
686		/* Update sw_index */
687		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
688		src_ring->sw_index = sw_index;
689		ret = 0;
690	} else {
691		ret = -EIO;
692	}
693
694	spin_unlock_bh(&ar_pci->ce_lock);
695
696	return ret;
697}
698
699int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
700				  void **per_transfer_contextp,
701				  u32 *bufferp,
702				  unsigned int *nbytesp,
703				  unsigned int *transfer_idp)
704{
705	struct ath10k *ar = ce_state->ar;
706	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
707	int ret;
708
709	spin_lock_bh(&ar_pci->ce_lock);
710	ret = ath10k_ce_completed_send_next_nolock(ce_state,
711						   per_transfer_contextp,
712						   bufferp, nbytesp,
713						   transfer_idp);
714	spin_unlock_bh(&ar_pci->ce_lock);
715
716	return ret;
717}
718
719/*
720 * Guts of interrupt handler for per-engine interrupts on a particular CE.
721 *
722 * Invokes registered callbacks for recv_complete,
723 * send_complete, and watermarks.
724 */
725void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
726{
727	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
728	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
729	u32 ctrl_addr = ce_state->ctrl_addr;
730
731	spin_lock_bh(&ar_pci->ce_lock);
732
733	/* Clear the copy-complete interrupts that will be handled here. */
734	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
735					  HOST_IS_COPY_COMPLETE_MASK);
736
737	spin_unlock_bh(&ar_pci->ce_lock);
738
739	if (ce_state->recv_cb)
740		ce_state->recv_cb(ce_state);
741
742	if (ce_state->send_cb)
743		ce_state->send_cb(ce_state);
744
745	spin_lock_bh(&ar_pci->ce_lock);
746
747	/*
748	 * Misc CE interrupts are not being handled, but still need
749	 * to be cleared.
750	 */
751	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK);
752
753	spin_unlock_bh(&ar_pci->ce_lock);
754}
755
756/*
757 * Handler for per-engine interrupts on ALL active CEs.
758 * This is used in cases where the system is sharing a
759 * single interrput for all CEs
760 */
761
762void ath10k_ce_per_engine_service_any(struct ath10k *ar)
763{
764	int ce_id;
765	u32 intr_summary;
766
767	intr_summary = CE_INTERRUPT_SUMMARY(ar);
768
769	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
770		if (intr_summary & (1 << ce_id))
771			intr_summary &= ~(1 << ce_id);
772		else
773			/* no intr pending on this CE */
774			continue;
775
776		ath10k_ce_per_engine_service(ar, ce_id);
777	}
778}
779
780/*
781 * Adjust interrupts for the copy complete handler.
782 * If it's needed for either send or recv, then unmask
783 * this interrupt; otherwise, mask it.
784 *
785 * Called with ce_lock held.
786 */
787static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
788{
789	u32 ctrl_addr = ce_state->ctrl_addr;
790	struct ath10k *ar = ce_state->ar;
791	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
792
793	if ((!disable_copy_compl_intr) &&
794	    (ce_state->send_cb || ce_state->recv_cb))
795		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
796	else
797		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
798
799	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
800}
801
802int ath10k_ce_disable_interrupts(struct ath10k *ar)
803{
804	int ce_id;
805
806	for (ce_id = 0; ce_id < CE_COUNT; ce_id++) {
807		u32 ctrl_addr = ath10k_ce_base_address(ce_id);
808
809		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
810		ath10k_ce_error_intr_disable(ar, ctrl_addr);
811		ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
812	}
813
814	return 0;
815}
816
817void ath10k_ce_enable_interrupts(struct ath10k *ar)
818{
819	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
820	int ce_id;
821
822	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
823		ath10k_ce_per_engine_handler_adjust(&ar_pci->ce_states[ce_id]);
824}
825
826static int ath10k_ce_init_src_ring(struct ath10k *ar,
827				   unsigned int ce_id,
828				   const struct ce_attr *attr)
829{
830	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
831	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
832	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
833	u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id);
834
835	nentries = roundup_pow_of_two(attr->src_nentries);
836
837	memset(src_ring->per_transfer_context, 0,
838	       nentries * sizeof(*src_ring->per_transfer_context));
839
840	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
841	src_ring->sw_index &= src_ring->nentries_mask;
842	src_ring->hw_index = src_ring->sw_index;
843
844	src_ring->write_index =
845		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
846	src_ring->write_index &= src_ring->nentries_mask;
847
848	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
849					 src_ring->base_addr_ce_space);
850	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
851	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
852	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
853	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
854	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
855
856	ath10k_dbg(ATH10K_DBG_BOOT,
857		   "boot init ce src ring id %d entries %d base_addr %p\n",
858		   ce_id, nentries, src_ring->base_addr_owner_space);
859
860	return 0;
861}
862
863static int ath10k_ce_init_dest_ring(struct ath10k *ar,
864				    unsigned int ce_id,
865				    const struct ce_attr *attr)
866{
867	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
868	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
869	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
870	u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id);
871
872	nentries = roundup_pow_of_two(attr->dest_nentries);
873
874	memset(dest_ring->per_transfer_context, 0,
875	       nentries * sizeof(*dest_ring->per_transfer_context));
876
877	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
878	dest_ring->sw_index &= dest_ring->nentries_mask;
879	dest_ring->write_index =
880		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
881	dest_ring->write_index &= dest_ring->nentries_mask;
882
883	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
884					  dest_ring->base_addr_ce_space);
885	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
886	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
887	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
888	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
889
890	ath10k_dbg(ATH10K_DBG_BOOT,
891		   "boot ce dest ring id %d entries %d base_addr %p\n",
892		   ce_id, nentries, dest_ring->base_addr_owner_space);
893
894	return 0;
895}
896
897static struct ath10k_ce_ring *
898ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
899			 const struct ce_attr *attr)
900{
901	struct ath10k_ce_ring *src_ring;
902	u32 nentries = attr->src_nentries;
903	dma_addr_t base_addr;
904
905	nentries = roundup_pow_of_two(nentries);
906
907	src_ring = kzalloc(sizeof(*src_ring) +
908			   (nentries *
909			    sizeof(*src_ring->per_transfer_context)),
910			   GFP_KERNEL);
911	if (src_ring == NULL)
912		return ERR_PTR(-ENOMEM);
913
914	src_ring->nentries = nentries;
915	src_ring->nentries_mask = nentries - 1;
916
917	/*
918	 * Legacy platforms that do not support cache
919	 * coherent DMA are unsupported
920	 */
921	src_ring->base_addr_owner_space_unaligned =
922		dma_alloc_coherent(ar->dev,
923				   (nentries * sizeof(struct ce_desc) +
924				    CE_DESC_RING_ALIGN),
925				   &base_addr, GFP_KERNEL);
926	if (!src_ring->base_addr_owner_space_unaligned) {
927		kfree(src_ring);
928		return ERR_PTR(-ENOMEM);
929	}
930
931	src_ring->base_addr_ce_space_unaligned = base_addr;
932
933	src_ring->base_addr_owner_space = PTR_ALIGN(
934			src_ring->base_addr_owner_space_unaligned,
935			CE_DESC_RING_ALIGN);
936	src_ring->base_addr_ce_space = ALIGN(
937			src_ring->base_addr_ce_space_unaligned,
938			CE_DESC_RING_ALIGN);
939
940	/*
941	 * Also allocate a shadow src ring in regular
942	 * mem to use for faster access.
943	 */
944	src_ring->shadow_base_unaligned =
945		kmalloc((nentries * sizeof(struct ce_desc) +
946			 CE_DESC_RING_ALIGN), GFP_KERNEL);
947	if (!src_ring->shadow_base_unaligned) {
948		dma_free_coherent(ar->dev,
949				  (nentries * sizeof(struct ce_desc) +
950				   CE_DESC_RING_ALIGN),
951				  src_ring->base_addr_owner_space,
952				  src_ring->base_addr_ce_space);
953		kfree(src_ring);
954		return ERR_PTR(-ENOMEM);
955	}
956
957	src_ring->shadow_base = PTR_ALIGN(
958			src_ring->shadow_base_unaligned,
959			CE_DESC_RING_ALIGN);
960
961	return src_ring;
962}
963
964static struct ath10k_ce_ring *
965ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
966			  const struct ce_attr *attr)
967{
968	struct ath10k_ce_ring *dest_ring;
969	u32 nentries;
970	dma_addr_t base_addr;
971
972	nentries = roundup_pow_of_two(attr->dest_nentries);
973
974	dest_ring = kzalloc(sizeof(*dest_ring) +
975			    (nentries *
976			     sizeof(*dest_ring->per_transfer_context)),
977			    GFP_KERNEL);
978	if (dest_ring == NULL)
979		return ERR_PTR(-ENOMEM);
980
981	dest_ring->nentries = nentries;
982	dest_ring->nentries_mask = nentries - 1;
983
984	/*
985	 * Legacy platforms that do not support cache
986	 * coherent DMA are unsupported
987	 */
988	dest_ring->base_addr_owner_space_unaligned =
989		dma_alloc_coherent(ar->dev,
990				   (nentries * sizeof(struct ce_desc) +
991				    CE_DESC_RING_ALIGN),
992				   &base_addr, GFP_KERNEL);
993	if (!dest_ring->base_addr_owner_space_unaligned) {
994		kfree(dest_ring);
995		return ERR_PTR(-ENOMEM);
996	}
997
998	dest_ring->base_addr_ce_space_unaligned = base_addr;
999
1000	/*
1001	 * Correctly initialize memory to 0 to prevent garbage
1002	 * data crashing system when download firmware
1003	 */
1004	memset(dest_ring->base_addr_owner_space_unaligned, 0,
1005	       nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN);
1006
1007	dest_ring->base_addr_owner_space = PTR_ALIGN(
1008			dest_ring->base_addr_owner_space_unaligned,
1009			CE_DESC_RING_ALIGN);
1010	dest_ring->base_addr_ce_space = ALIGN(
1011			dest_ring->base_addr_ce_space_unaligned,
1012			CE_DESC_RING_ALIGN);
1013
1014	return dest_ring;
1015}
1016
1017/*
1018 * Initialize a Copy Engine based on caller-supplied attributes.
1019 * This may be called once to initialize both source and destination
1020 * rings or it may be called twice for separate source and destination
1021 * initialization. It may be that only one side or the other is
1022 * initialized by software/firmware.
1023 */
1024int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1025			const struct ce_attr *attr,
1026			void (*send_cb)(struct ath10k_ce_pipe *),
1027			void (*recv_cb)(struct ath10k_ce_pipe *))
1028{
1029	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1030	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1031	int ret;
1032
1033	/*
1034	 * Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1035	 * additional TX locking checks.
1036	 *
1037	 * For the lack of a better place do the check here.
1038	 */
1039	BUILD_BUG_ON(2*TARGET_NUM_MSDU_DESC >
1040		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1041	BUILD_BUG_ON(2*TARGET_10X_NUM_MSDU_DESC >
1042		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1043
1044	spin_lock_bh(&ar_pci->ce_lock);
1045	ce_state->ar = ar;
1046	ce_state->id = ce_id;
1047	ce_state->ctrl_addr = ath10k_ce_base_address(ce_id);
1048	ce_state->attr_flags = attr->flags;
1049	ce_state->src_sz_max = attr->src_sz_max;
1050	if (attr->src_nentries)
1051		ce_state->send_cb = send_cb;
1052	if (attr->dest_nentries)
1053		ce_state->recv_cb = recv_cb;
1054	spin_unlock_bh(&ar_pci->ce_lock);
1055
1056	if (attr->src_nentries) {
1057		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1058		if (ret) {
1059			ath10k_err("Failed to initialize CE src ring for ID: %d (%d)\n",
1060				   ce_id, ret);
1061			return ret;
1062		}
1063	}
1064
1065	if (attr->dest_nentries) {
1066		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1067		if (ret) {
1068			ath10k_err("Failed to initialize CE dest ring for ID: %d (%d)\n",
1069				   ce_id, ret);
1070			return ret;
1071		}
1072	}
1073
1074	return 0;
1075}
1076
1077static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1078{
1079	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1080
1081	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 0);
1082	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1083	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1084	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1085}
1086
1087static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1088{
1089	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1090
1091	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 0);
1092	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1093	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1094}
1095
1096void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1097{
1098	ath10k_ce_deinit_src_ring(ar, ce_id);
1099	ath10k_ce_deinit_dest_ring(ar, ce_id);
1100}
1101
1102int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1103			 const struct ce_attr *attr)
1104{
1105	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1106	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1107	int ret;
1108
1109	if (attr->src_nentries) {
1110		ce_state->src_ring = ath10k_ce_alloc_src_ring(ar, ce_id, attr);
1111		if (IS_ERR(ce_state->src_ring)) {
1112			ret = PTR_ERR(ce_state->src_ring);
1113			ath10k_err("failed to allocate copy engine source ring %d: %d\n",
1114				   ce_id, ret);
1115			ce_state->src_ring = NULL;
1116			return ret;
1117		}
1118	}
1119
1120	if (attr->dest_nentries) {
1121		ce_state->dest_ring = ath10k_ce_alloc_dest_ring(ar, ce_id,
1122								attr);
1123		if (IS_ERR(ce_state->dest_ring)) {
1124			ret = PTR_ERR(ce_state->dest_ring);
1125			ath10k_err("failed to allocate copy engine destination ring %d: %d\n",
1126				   ce_id, ret);
1127			ce_state->dest_ring = NULL;
1128			return ret;
1129		}
1130	}
1131
1132	return 0;
1133}
1134
1135void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1136{
1137	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1138	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1139
1140	if (ce_state->src_ring) {
1141		kfree(ce_state->src_ring->shadow_base_unaligned);
1142		dma_free_coherent(ar->dev,
1143				  (ce_state->src_ring->nentries *
1144				   sizeof(struct ce_desc) +
1145				   CE_DESC_RING_ALIGN),
1146				  ce_state->src_ring->base_addr_owner_space,
1147				  ce_state->src_ring->base_addr_ce_space);
1148		kfree(ce_state->src_ring);
1149	}
1150
1151	if (ce_state->dest_ring) {
1152		dma_free_coherent(ar->dev,
1153				  (ce_state->dest_ring->nentries *
1154				   sizeof(struct ce_desc) +
1155				   CE_DESC_RING_ALIGN),
1156				  ce_state->dest_ring->base_addr_owner_space,
1157				  ce_state->dest_ring->base_addr_ce_space);
1158		kfree(ce_state->dest_ring);
1159	}
1160
1161	ce_state->src_ring = NULL;
1162	ce_state->dest_ring = NULL;
1163}
1164