1/*
2 * Copyright (c) 2005-2011 Atheros Communications Inc.
3 * Copyright (c) 2011-2013 Qualcomm Atheros, Inc.
4 *
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include "hif.h"
19#include "pci.h"
20#include "ce.h"
21#include "debug.h"
22
23/*
24 * Support for Copy Engine hardware, which is mainly used for
25 * communication between Host and Target over a PCIe interconnect.
26 */
27
28/*
29 * A single CopyEngine (CE) comprises two "rings":
30 *   a source ring
31 *   a destination ring
32 *
33 * Each ring consists of a number of descriptors which specify
34 * an address, length, and meta-data.
35 *
36 * Typically, one side of the PCIe interconnect (Host or Target)
37 * controls one ring and the other side controls the other ring.
38 * The source side chooses when to initiate a transfer and it
39 * chooses what to send (buffer address, length). The destination
40 * side keeps a supply of "anonymous receive buffers" available and
41 * it handles incoming data as it arrives (when the destination
42 * recieves an interrupt).
43 *
44 * The sender may send a simple buffer (address/length) or it may
45 * send a small list of buffers.  When a small list is sent, hardware
46 * "gathers" these and they end up in a single destination buffer
47 * with a single interrupt.
48 *
49 * There are several "contexts" managed by this layer -- more, it
50 * may seem -- than should be needed. These are provided mainly for
51 * maximum flexibility and especially to facilitate a simpler HIF
52 * implementation. There are per-CopyEngine recv, send, and watermark
53 * contexts. These are supplied by the caller when a recv, send,
54 * or watermark handler is established and they are echoed back to
55 * the caller when the respective callbacks are invoked. There is
56 * also a per-transfer context supplied by the caller when a buffer
57 * (or sendlist) is sent and when a buffer is enqueued for recv.
58 * These per-transfer contexts are echoed back to the caller when
59 * the buffer is sent/received.
60 */
61
62static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar,
63						       u32 ce_ctrl_addr,
64						       unsigned int n)
65{
66	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n);
67}
68
69static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar,
70						      u32 ce_ctrl_addr)
71{
72	return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS);
73}
74
75static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar,
76						      u32 ce_ctrl_addr,
77						      unsigned int n)
78{
79	ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n);
80}
81
82static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar,
83						     u32 ce_ctrl_addr)
84{
85	return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS);
86}
87
88static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar,
89						    u32 ce_ctrl_addr)
90{
91	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS);
92}
93
94static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar,
95						    u32 ce_ctrl_addr,
96						    unsigned int addr)
97{
98	ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr);
99}
100
101static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar,
102					       u32 ce_ctrl_addr,
103					       unsigned int n)
104{
105	ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n);
106}
107
108static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar,
109					       u32 ce_ctrl_addr,
110					       unsigned int n)
111{
112	u32 ctrl1_addr = ath10k_pci_read32((ar),
113					   (ce_ctrl_addr) + CE_CTRL1_ADDRESS);
114
115	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
116			   (ctrl1_addr &  ~CE_CTRL1_DMAX_LENGTH_MASK) |
117			   CE_CTRL1_DMAX_LENGTH_SET(n));
118}
119
120static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar,
121						    u32 ce_ctrl_addr,
122						    unsigned int n)
123{
124	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
125
126	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
127			   (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) |
128			   CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n));
129}
130
131static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar,
132						     u32 ce_ctrl_addr,
133						     unsigned int n)
134{
135	u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS);
136
137	ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS,
138			   (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) |
139			   CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n));
140}
141
142static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar,
143						     u32 ce_ctrl_addr)
144{
145	return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS);
146}
147
148static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar,
149						     u32 ce_ctrl_addr,
150						     u32 addr)
151{
152	ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr);
153}
154
155static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar,
156						u32 ce_ctrl_addr,
157						unsigned int n)
158{
159	ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n);
160}
161
162static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar,
163						   u32 ce_ctrl_addr,
164						   unsigned int n)
165{
166	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
167
168	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
169			   (addr & ~SRC_WATERMARK_HIGH_MASK) |
170			   SRC_WATERMARK_HIGH_SET(n));
171}
172
173static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar,
174						  u32 ce_ctrl_addr,
175						  unsigned int n)
176{
177	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS);
178
179	ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS,
180			   (addr & ~SRC_WATERMARK_LOW_MASK) |
181			   SRC_WATERMARK_LOW_SET(n));
182}
183
184static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar,
185						    u32 ce_ctrl_addr,
186						    unsigned int n)
187{
188	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
189
190	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
191			   (addr & ~DST_WATERMARK_HIGH_MASK) |
192			   DST_WATERMARK_HIGH_SET(n));
193}
194
195static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar,
196						   u32 ce_ctrl_addr,
197						   unsigned int n)
198{
199	u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS);
200
201	ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS,
202			   (addr & ~DST_WATERMARK_LOW_MASK) |
203			   DST_WATERMARK_LOW_SET(n));
204}
205
206static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar,
207							u32 ce_ctrl_addr)
208{
209	u32 host_ie_addr = ath10k_pci_read32(ar,
210					     ce_ctrl_addr + HOST_IE_ADDRESS);
211
212	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
213			   host_ie_addr | HOST_IE_COPY_COMPLETE_MASK);
214}
215
216static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar,
217							u32 ce_ctrl_addr)
218{
219	u32 host_ie_addr = ath10k_pci_read32(ar,
220					     ce_ctrl_addr + HOST_IE_ADDRESS);
221
222	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
223			   host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK);
224}
225
226static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar,
227						    u32 ce_ctrl_addr)
228{
229	u32 host_ie_addr = ath10k_pci_read32(ar,
230					     ce_ctrl_addr + HOST_IE_ADDRESS);
231
232	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS,
233			   host_ie_addr & ~CE_WATERMARK_MASK);
234}
235
236static inline void ath10k_ce_error_intr_enable(struct ath10k *ar,
237					       u32 ce_ctrl_addr)
238{
239	u32 misc_ie_addr = ath10k_pci_read32(ar,
240					     ce_ctrl_addr + MISC_IE_ADDRESS);
241
242	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
243			   misc_ie_addr | CE_ERROR_MASK);
244}
245
246static inline void ath10k_ce_error_intr_disable(struct ath10k *ar,
247						u32 ce_ctrl_addr)
248{
249	u32 misc_ie_addr = ath10k_pci_read32(ar,
250					     ce_ctrl_addr + MISC_IE_ADDRESS);
251
252	ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS,
253			   misc_ie_addr & ~CE_ERROR_MASK);
254}
255
256static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar,
257						     u32 ce_ctrl_addr,
258						     unsigned int mask)
259{
260	ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask);
261}
262
263/*
264 * Guts of ath10k_ce_send, used by both ath10k_ce_send and
265 * ath10k_ce_sendlist_send.
266 * The caller takes responsibility for any needed locking.
267 */
268int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state,
269			  void *per_transfer_context,
270			  u32 buffer,
271			  unsigned int nbytes,
272			  unsigned int transfer_id,
273			  unsigned int flags)
274{
275	struct ath10k *ar = ce_state->ar;
276	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
277	struct ce_desc *desc, *sdesc;
278	unsigned int nentries_mask = src_ring->nentries_mask;
279	unsigned int sw_index = src_ring->sw_index;
280	unsigned int write_index = src_ring->write_index;
281	u32 ctrl_addr = ce_state->ctrl_addr;
282	u32 desc_flags = 0;
283	int ret = 0;
284
285	if (nbytes > ce_state->src_sz_max)
286		ath10k_warn(ar, "%s: send more we can (nbytes: %d, max: %d)\n",
287			    __func__, nbytes, ce_state->src_sz_max);
288
289	if (unlikely(CE_RING_DELTA(nentries_mask,
290				   write_index, sw_index - 1) <= 0)) {
291		ret = -ENOSR;
292		goto exit;
293	}
294
295	desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space,
296				   write_index);
297	sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index);
298
299	desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA);
300
301	if (flags & CE_SEND_FLAG_GATHER)
302		desc_flags |= CE_DESC_FLAGS_GATHER;
303	if (flags & CE_SEND_FLAG_BYTE_SWAP)
304		desc_flags |= CE_DESC_FLAGS_BYTE_SWAP;
305
306	sdesc->addr   = __cpu_to_le32(buffer);
307	sdesc->nbytes = __cpu_to_le16(nbytes);
308	sdesc->flags  = __cpu_to_le16(desc_flags);
309
310	*desc = *sdesc;
311
312	src_ring->per_transfer_context[write_index] = per_transfer_context;
313
314	/* Update Source Ring Write Index */
315	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
316
317	/* WORKAROUND */
318	if (!(flags & CE_SEND_FLAG_GATHER))
319		ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index);
320
321	src_ring->write_index = write_index;
322exit:
323	return ret;
324}
325
326void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe)
327{
328	struct ath10k *ar = pipe->ar;
329	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
330	struct ath10k_ce_ring *src_ring = pipe->src_ring;
331	u32 ctrl_addr = pipe->ctrl_addr;
332
333	lockdep_assert_held(&ar_pci->ce_lock);
334
335	/*
336	 * This function must be called only if there is an incomplete
337	 * scatter-gather transfer (before index register is updated)
338	 * that needs to be cleaned up.
339	 */
340	if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index))
341		return;
342
343	if (WARN_ON_ONCE(src_ring->write_index ==
344			 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr)))
345		return;
346
347	src_ring->write_index--;
348	src_ring->write_index &= src_ring->nentries_mask;
349
350	src_ring->per_transfer_context[src_ring->write_index] = NULL;
351}
352
353int ath10k_ce_send(struct ath10k_ce_pipe *ce_state,
354		   void *per_transfer_context,
355		   u32 buffer,
356		   unsigned int nbytes,
357		   unsigned int transfer_id,
358		   unsigned int flags)
359{
360	struct ath10k *ar = ce_state->ar;
361	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
362	int ret;
363
364	spin_lock_bh(&ar_pci->ce_lock);
365	ret = ath10k_ce_send_nolock(ce_state, per_transfer_context,
366				    buffer, nbytes, transfer_id, flags);
367	spin_unlock_bh(&ar_pci->ce_lock);
368
369	return ret;
370}
371
372int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe)
373{
374	struct ath10k *ar = pipe->ar;
375	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
376	int delta;
377
378	spin_lock_bh(&ar_pci->ce_lock);
379	delta = CE_RING_DELTA(pipe->src_ring->nentries_mask,
380			      pipe->src_ring->write_index,
381			      pipe->src_ring->sw_index - 1);
382	spin_unlock_bh(&ar_pci->ce_lock);
383
384	return delta;
385}
386
387int __ath10k_ce_rx_num_free_bufs(struct ath10k_ce_pipe *pipe)
388{
389	struct ath10k *ar = pipe->ar;
390	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
391	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
392	unsigned int nentries_mask = dest_ring->nentries_mask;
393	unsigned int write_index = dest_ring->write_index;
394	unsigned int sw_index = dest_ring->sw_index;
395
396	lockdep_assert_held(&ar_pci->ce_lock);
397
398	return CE_RING_DELTA(nentries_mask, write_index, sw_index - 1);
399}
400
401int __ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
402{
403	struct ath10k *ar = pipe->ar;
404	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
405	struct ath10k_ce_ring *dest_ring = pipe->dest_ring;
406	unsigned int nentries_mask = dest_ring->nentries_mask;
407	unsigned int write_index = dest_ring->write_index;
408	unsigned int sw_index = dest_ring->sw_index;
409	struct ce_desc *base = dest_ring->base_addr_owner_space;
410	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index);
411	u32 ctrl_addr = pipe->ctrl_addr;
412
413	lockdep_assert_held(&ar_pci->ce_lock);
414
415	if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0)
416		return -EIO;
417
418	desc->addr = __cpu_to_le32(paddr);
419	desc->nbytes = 0;
420
421	dest_ring->per_transfer_context[write_index] = ctx;
422	write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
423	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
424	dest_ring->write_index = write_index;
425
426	return 0;
427}
428
429int ath10k_ce_rx_post_buf(struct ath10k_ce_pipe *pipe, void *ctx, u32 paddr)
430{
431	struct ath10k *ar = pipe->ar;
432	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
433	int ret;
434
435	spin_lock_bh(&ar_pci->ce_lock);
436	ret = __ath10k_ce_rx_post_buf(pipe, ctx, paddr);
437	spin_unlock_bh(&ar_pci->ce_lock);
438
439	return ret;
440}
441
442/*
443 * Guts of ath10k_ce_completed_recv_next.
444 * The caller takes responsibility for any necessary locking.
445 */
446static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state,
447						void **per_transfer_contextp,
448						u32 *bufferp,
449						unsigned int *nbytesp,
450						unsigned int *transfer_idp,
451						unsigned int *flagsp)
452{
453	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
454	unsigned int nentries_mask = dest_ring->nentries_mask;
455	unsigned int sw_index = dest_ring->sw_index;
456
457	struct ce_desc *base = dest_ring->base_addr_owner_space;
458	struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
459	struct ce_desc sdesc;
460	u16 nbytes;
461
462	/* Copy in one go for performance reasons */
463	sdesc = *desc;
464
465	nbytes = __le16_to_cpu(sdesc.nbytes);
466	if (nbytes == 0) {
467		/*
468		 * This closes a relatively unusual race where the Host
469		 * sees the updated DRRI before the update to the
470		 * corresponding descriptor has completed. We treat this
471		 * as a descriptor that is not yet done.
472		 */
473		return -EIO;
474	}
475
476	desc->nbytes = 0;
477
478	/* Return data from completed destination descriptor */
479	*bufferp = __le32_to_cpu(sdesc.addr);
480	*nbytesp = nbytes;
481	*transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA);
482
483	if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP)
484		*flagsp = CE_RECV_FLAG_SWAPPED;
485	else
486		*flagsp = 0;
487
488	if (per_transfer_contextp)
489		*per_transfer_contextp =
490			dest_ring->per_transfer_context[sw_index];
491
492	/* sanity */
493	dest_ring->per_transfer_context[sw_index] = NULL;
494
495	/* Update sw_index */
496	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
497	dest_ring->sw_index = sw_index;
498
499	return 0;
500}
501
502int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state,
503				  void **per_transfer_contextp,
504				  u32 *bufferp,
505				  unsigned int *nbytesp,
506				  unsigned int *transfer_idp,
507				  unsigned int *flagsp)
508{
509	struct ath10k *ar = ce_state->ar;
510	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
511	int ret;
512
513	spin_lock_bh(&ar_pci->ce_lock);
514	ret = ath10k_ce_completed_recv_next_nolock(ce_state,
515						   per_transfer_contextp,
516						   bufferp, nbytesp,
517						   transfer_idp, flagsp);
518	spin_unlock_bh(&ar_pci->ce_lock);
519
520	return ret;
521}
522
523int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state,
524			       void **per_transfer_contextp,
525			       u32 *bufferp)
526{
527	struct ath10k_ce_ring *dest_ring;
528	unsigned int nentries_mask;
529	unsigned int sw_index;
530	unsigned int write_index;
531	int ret;
532	struct ath10k *ar;
533	struct ath10k_pci *ar_pci;
534
535	dest_ring = ce_state->dest_ring;
536
537	if (!dest_ring)
538		return -EIO;
539
540	ar = ce_state->ar;
541	ar_pci = ath10k_pci_priv(ar);
542
543	spin_lock_bh(&ar_pci->ce_lock);
544
545	nentries_mask = dest_ring->nentries_mask;
546	sw_index = dest_ring->sw_index;
547	write_index = dest_ring->write_index;
548	if (write_index != sw_index) {
549		struct ce_desc *base = dest_ring->base_addr_owner_space;
550		struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index);
551
552		/* Return data from completed destination descriptor */
553		*bufferp = __le32_to_cpu(desc->addr);
554
555		if (per_transfer_contextp)
556			*per_transfer_contextp =
557				dest_ring->per_transfer_context[sw_index];
558
559		/* sanity */
560		dest_ring->per_transfer_context[sw_index] = NULL;
561
562		/* Update sw_index */
563		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
564		dest_ring->sw_index = sw_index;
565		ret = 0;
566	} else {
567		ret = -EIO;
568	}
569
570	spin_unlock_bh(&ar_pci->ce_lock);
571
572	return ret;
573}
574
575/*
576 * Guts of ath10k_ce_completed_send_next.
577 * The caller takes responsibility for any necessary locking.
578 */
579static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state,
580						void **per_transfer_contextp,
581						u32 *bufferp,
582						unsigned int *nbytesp,
583						unsigned int *transfer_idp)
584{
585	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
586	u32 ctrl_addr = ce_state->ctrl_addr;
587	struct ath10k *ar = ce_state->ar;
588	unsigned int nentries_mask = src_ring->nentries_mask;
589	unsigned int sw_index = src_ring->sw_index;
590	struct ce_desc *sdesc, *sbase;
591	unsigned int read_index;
592
593	if (src_ring->hw_index == sw_index) {
594		/*
595		 * The SW completion index has caught up with the cached
596		 * version of the HW completion index.
597		 * Update the cached HW completion index to see whether
598		 * the SW has really caught up to the HW, or if the cached
599		 * value of the HW index has become stale.
600		 */
601
602		read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
603		if (read_index == 0xffffffff)
604			return -ENODEV;
605
606		read_index &= nentries_mask;
607		src_ring->hw_index = read_index;
608	}
609
610	read_index = src_ring->hw_index;
611
612	if (read_index == sw_index)
613		return -EIO;
614
615	sbase = src_ring->shadow_base;
616	sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index);
617
618	/* Return data from completed source descriptor */
619	*bufferp = __le32_to_cpu(sdesc->addr);
620	*nbytesp = __le16_to_cpu(sdesc->nbytes);
621	*transfer_idp = MS(__le16_to_cpu(sdesc->flags),
622			   CE_DESC_FLAGS_META_DATA);
623
624	if (per_transfer_contextp)
625		*per_transfer_contextp =
626			src_ring->per_transfer_context[sw_index];
627
628	/* sanity */
629	src_ring->per_transfer_context[sw_index] = NULL;
630
631	/* Update sw_index */
632	sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
633	src_ring->sw_index = sw_index;
634
635	return 0;
636}
637
638/* NB: Modeled after ath10k_ce_completed_send_next */
639int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state,
640			       void **per_transfer_contextp,
641			       u32 *bufferp,
642			       unsigned int *nbytesp,
643			       unsigned int *transfer_idp)
644{
645	struct ath10k_ce_ring *src_ring;
646	unsigned int nentries_mask;
647	unsigned int sw_index;
648	unsigned int write_index;
649	int ret;
650	struct ath10k *ar;
651	struct ath10k_pci *ar_pci;
652
653	src_ring = ce_state->src_ring;
654
655	if (!src_ring)
656		return -EIO;
657
658	ar = ce_state->ar;
659	ar_pci = ath10k_pci_priv(ar);
660
661	spin_lock_bh(&ar_pci->ce_lock);
662
663	nentries_mask = src_ring->nentries_mask;
664	sw_index = src_ring->sw_index;
665	write_index = src_ring->write_index;
666
667	if (write_index != sw_index) {
668		struct ce_desc *base = src_ring->base_addr_owner_space;
669		struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index);
670
671		/* Return data from completed source descriptor */
672		*bufferp = __le32_to_cpu(desc->addr);
673		*nbytesp = __le16_to_cpu(desc->nbytes);
674		*transfer_idp = MS(__le16_to_cpu(desc->flags),
675						CE_DESC_FLAGS_META_DATA);
676
677		if (per_transfer_contextp)
678			*per_transfer_contextp =
679				src_ring->per_transfer_context[sw_index];
680
681		/* sanity */
682		src_ring->per_transfer_context[sw_index] = NULL;
683
684		/* Update sw_index */
685		sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
686		src_ring->sw_index = sw_index;
687		ret = 0;
688	} else {
689		ret = -EIO;
690	}
691
692	spin_unlock_bh(&ar_pci->ce_lock);
693
694	return ret;
695}
696
697int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state,
698				  void **per_transfer_contextp,
699				  u32 *bufferp,
700				  unsigned int *nbytesp,
701				  unsigned int *transfer_idp)
702{
703	struct ath10k *ar = ce_state->ar;
704	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
705	int ret;
706
707	spin_lock_bh(&ar_pci->ce_lock);
708	ret = ath10k_ce_completed_send_next_nolock(ce_state,
709						   per_transfer_contextp,
710						   bufferp, nbytesp,
711						   transfer_idp);
712	spin_unlock_bh(&ar_pci->ce_lock);
713
714	return ret;
715}
716
717/*
718 * Guts of interrupt handler for per-engine interrupts on a particular CE.
719 *
720 * Invokes registered callbacks for recv_complete,
721 * send_complete, and watermarks.
722 */
723void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id)
724{
725	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
726	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
727	u32 ctrl_addr = ce_state->ctrl_addr;
728
729	spin_lock_bh(&ar_pci->ce_lock);
730
731	/* Clear the copy-complete interrupts that will be handled here. */
732	ath10k_ce_engine_int_status_clear(ar, ctrl_addr,
733					  HOST_IS_COPY_COMPLETE_MASK);
734
735	spin_unlock_bh(&ar_pci->ce_lock);
736
737	if (ce_state->recv_cb)
738		ce_state->recv_cb(ce_state);
739
740	if (ce_state->send_cb)
741		ce_state->send_cb(ce_state);
742
743	spin_lock_bh(&ar_pci->ce_lock);
744
745	/*
746	 * Misc CE interrupts are not being handled, but still need
747	 * to be cleared.
748	 */
749	ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK);
750
751	spin_unlock_bh(&ar_pci->ce_lock);
752}
753
754/*
755 * Handler for per-engine interrupts on ALL active CEs.
756 * This is used in cases where the system is sharing a
757 * single interrput for all CEs
758 */
759
760void ath10k_ce_per_engine_service_any(struct ath10k *ar)
761{
762	int ce_id;
763	u32 intr_summary;
764
765	intr_summary = CE_INTERRUPT_SUMMARY(ar);
766
767	for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) {
768		if (intr_summary & (1 << ce_id))
769			intr_summary &= ~(1 << ce_id);
770		else
771			/* no intr pending on this CE */
772			continue;
773
774		ath10k_ce_per_engine_service(ar, ce_id);
775	}
776}
777
778/*
779 * Adjust interrupts for the copy complete handler.
780 * If it's needed for either send or recv, then unmask
781 * this interrupt; otherwise, mask it.
782 *
783 * Called with ce_lock held.
784 */
785static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state)
786{
787	u32 ctrl_addr = ce_state->ctrl_addr;
788	struct ath10k *ar = ce_state->ar;
789	bool disable_copy_compl_intr = ce_state->attr_flags & CE_ATTR_DIS_INTR;
790
791	if ((!disable_copy_compl_intr) &&
792	    (ce_state->send_cb || ce_state->recv_cb))
793		ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr);
794	else
795		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
796
797	ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
798}
799
800int ath10k_ce_disable_interrupts(struct ath10k *ar)
801{
802	int ce_id;
803
804	for (ce_id = 0; ce_id < CE_COUNT; ce_id++) {
805		u32 ctrl_addr = ath10k_ce_base_address(ce_id);
806
807		ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr);
808		ath10k_ce_error_intr_disable(ar, ctrl_addr);
809		ath10k_ce_watermark_intr_disable(ar, ctrl_addr);
810	}
811
812	return 0;
813}
814
815void ath10k_ce_enable_interrupts(struct ath10k *ar)
816{
817	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
818	int ce_id;
819
820	for (ce_id = 0; ce_id < CE_COUNT; ce_id++)
821		ath10k_ce_per_engine_handler_adjust(&ar_pci->ce_states[ce_id]);
822}
823
824static int ath10k_ce_init_src_ring(struct ath10k *ar,
825				   unsigned int ce_id,
826				   const struct ce_attr *attr)
827{
828	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
829	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
830	struct ath10k_ce_ring *src_ring = ce_state->src_ring;
831	u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id);
832
833	nentries = roundup_pow_of_two(attr->src_nentries);
834
835	memset(src_ring->per_transfer_context, 0,
836	       nentries * sizeof(*src_ring->per_transfer_context));
837
838	src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr);
839	src_ring->sw_index &= src_ring->nentries_mask;
840	src_ring->hw_index = src_ring->sw_index;
841
842	src_ring->write_index =
843		ath10k_ce_src_ring_write_index_get(ar, ctrl_addr);
844	src_ring->write_index &= src_ring->nentries_mask;
845
846	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr,
847					 src_ring->base_addr_ce_space);
848	ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries);
849	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max);
850	ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0);
851	ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0);
852	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
853
854	ath10k_dbg(ar, ATH10K_DBG_BOOT,
855		   "boot init ce src ring id %d entries %d base_addr %p\n",
856		   ce_id, nentries, src_ring->base_addr_owner_space);
857
858	return 0;
859}
860
861static int ath10k_ce_init_dest_ring(struct ath10k *ar,
862				    unsigned int ce_id,
863				    const struct ce_attr *attr)
864{
865	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
866	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
867	struct ath10k_ce_ring *dest_ring = ce_state->dest_ring;
868	u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id);
869
870	nentries = roundup_pow_of_two(attr->dest_nentries);
871
872	memset(dest_ring->per_transfer_context, 0,
873	       nentries * sizeof(*dest_ring->per_transfer_context));
874
875	dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr);
876	dest_ring->sw_index &= dest_ring->nentries_mask;
877	dest_ring->write_index =
878		ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
879	dest_ring->write_index &= dest_ring->nentries_mask;
880
881	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr,
882					  dest_ring->base_addr_ce_space);
883	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries);
884	ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0);
885	ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0);
886	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
887
888	ath10k_dbg(ar, ATH10K_DBG_BOOT,
889		   "boot ce dest ring id %d entries %d base_addr %p\n",
890		   ce_id, nentries, dest_ring->base_addr_owner_space);
891
892	return 0;
893}
894
895static struct ath10k_ce_ring *
896ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id,
897			 const struct ce_attr *attr)
898{
899	struct ath10k_ce_ring *src_ring;
900	u32 nentries = attr->src_nentries;
901	dma_addr_t base_addr;
902
903	nentries = roundup_pow_of_two(nentries);
904
905	src_ring = kzalloc(sizeof(*src_ring) +
906			   (nentries *
907			    sizeof(*src_ring->per_transfer_context)),
908			   GFP_KERNEL);
909	if (src_ring == NULL)
910		return ERR_PTR(-ENOMEM);
911
912	src_ring->nentries = nentries;
913	src_ring->nentries_mask = nentries - 1;
914
915	/*
916	 * Legacy platforms that do not support cache
917	 * coherent DMA are unsupported
918	 */
919	src_ring->base_addr_owner_space_unaligned =
920		dma_alloc_coherent(ar->dev,
921				   (nentries * sizeof(struct ce_desc) +
922				    CE_DESC_RING_ALIGN),
923				   &base_addr, GFP_KERNEL);
924	if (!src_ring->base_addr_owner_space_unaligned) {
925		kfree(src_ring);
926		return ERR_PTR(-ENOMEM);
927	}
928
929	src_ring->base_addr_ce_space_unaligned = base_addr;
930
931	src_ring->base_addr_owner_space = PTR_ALIGN(
932			src_ring->base_addr_owner_space_unaligned,
933			CE_DESC_RING_ALIGN);
934	src_ring->base_addr_ce_space = ALIGN(
935			src_ring->base_addr_ce_space_unaligned,
936			CE_DESC_RING_ALIGN);
937
938	/*
939	 * Also allocate a shadow src ring in regular
940	 * mem to use for faster access.
941	 */
942	src_ring->shadow_base_unaligned =
943		kmalloc((nentries * sizeof(struct ce_desc) +
944			 CE_DESC_RING_ALIGN), GFP_KERNEL);
945	if (!src_ring->shadow_base_unaligned) {
946		dma_free_coherent(ar->dev,
947				  (nentries * sizeof(struct ce_desc) +
948				   CE_DESC_RING_ALIGN),
949				  src_ring->base_addr_owner_space,
950				  src_ring->base_addr_ce_space);
951		kfree(src_ring);
952		return ERR_PTR(-ENOMEM);
953	}
954
955	src_ring->shadow_base = PTR_ALIGN(
956			src_ring->shadow_base_unaligned,
957			CE_DESC_RING_ALIGN);
958
959	return src_ring;
960}
961
962static struct ath10k_ce_ring *
963ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id,
964			  const struct ce_attr *attr)
965{
966	struct ath10k_ce_ring *dest_ring;
967	u32 nentries;
968	dma_addr_t base_addr;
969
970	nentries = roundup_pow_of_two(attr->dest_nentries);
971
972	dest_ring = kzalloc(sizeof(*dest_ring) +
973			    (nentries *
974			     sizeof(*dest_ring->per_transfer_context)),
975			    GFP_KERNEL);
976	if (dest_ring == NULL)
977		return ERR_PTR(-ENOMEM);
978
979	dest_ring->nentries = nentries;
980	dest_ring->nentries_mask = nentries - 1;
981
982	/*
983	 * Legacy platforms that do not support cache
984	 * coherent DMA are unsupported
985	 */
986	dest_ring->base_addr_owner_space_unaligned =
987		dma_alloc_coherent(ar->dev,
988				   (nentries * sizeof(struct ce_desc) +
989				    CE_DESC_RING_ALIGN),
990				   &base_addr, GFP_KERNEL);
991	if (!dest_ring->base_addr_owner_space_unaligned) {
992		kfree(dest_ring);
993		return ERR_PTR(-ENOMEM);
994	}
995
996	dest_ring->base_addr_ce_space_unaligned = base_addr;
997
998	/*
999	 * Correctly initialize memory to 0 to prevent garbage
1000	 * data crashing system when download firmware
1001	 */
1002	memset(dest_ring->base_addr_owner_space_unaligned, 0,
1003	       nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN);
1004
1005	dest_ring->base_addr_owner_space = PTR_ALIGN(
1006			dest_ring->base_addr_owner_space_unaligned,
1007			CE_DESC_RING_ALIGN);
1008	dest_ring->base_addr_ce_space = ALIGN(
1009			dest_ring->base_addr_ce_space_unaligned,
1010			CE_DESC_RING_ALIGN);
1011
1012	return dest_ring;
1013}
1014
1015/*
1016 * Initialize a Copy Engine based on caller-supplied attributes.
1017 * This may be called once to initialize both source and destination
1018 * rings or it may be called twice for separate source and destination
1019 * initialization. It may be that only one side or the other is
1020 * initialized by software/firmware.
1021 */
1022int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id,
1023			const struct ce_attr *attr,
1024			void (*send_cb)(struct ath10k_ce_pipe *),
1025			void (*recv_cb)(struct ath10k_ce_pipe *))
1026{
1027	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1028	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1029	int ret;
1030
1031	/*
1032	 * Make sure there's enough CE ringbuffer entries for HTT TX to avoid
1033	 * additional TX locking checks.
1034	 *
1035	 * For the lack of a better place do the check here.
1036	 */
1037	BUILD_BUG_ON(2*TARGET_NUM_MSDU_DESC >
1038		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1039	BUILD_BUG_ON(2*TARGET_10X_NUM_MSDU_DESC >
1040		     (CE_HTT_H2T_MSG_SRC_NENTRIES - 1));
1041
1042	spin_lock_bh(&ar_pci->ce_lock);
1043	ce_state->ar = ar;
1044	ce_state->id = ce_id;
1045	ce_state->ctrl_addr = ath10k_ce_base_address(ce_id);
1046	ce_state->attr_flags = attr->flags;
1047	ce_state->src_sz_max = attr->src_sz_max;
1048	if (attr->src_nentries)
1049		ce_state->send_cb = send_cb;
1050	if (attr->dest_nentries)
1051		ce_state->recv_cb = recv_cb;
1052	spin_unlock_bh(&ar_pci->ce_lock);
1053
1054	if (attr->src_nentries) {
1055		ret = ath10k_ce_init_src_ring(ar, ce_id, attr);
1056		if (ret) {
1057			ath10k_err(ar, "Failed to initialize CE src ring for ID: %d (%d)\n",
1058				   ce_id, ret);
1059			return ret;
1060		}
1061	}
1062
1063	if (attr->dest_nentries) {
1064		ret = ath10k_ce_init_dest_ring(ar, ce_id, attr);
1065		if (ret) {
1066			ath10k_err(ar, "Failed to initialize CE dest ring for ID: %d (%d)\n",
1067				   ce_id, ret);
1068			return ret;
1069		}
1070	}
1071
1072	return 0;
1073}
1074
1075static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id)
1076{
1077	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1078
1079	ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 0);
1080	ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0);
1081	ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0);
1082	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0);
1083}
1084
1085static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id)
1086{
1087	u32 ctrl_addr = ath10k_ce_base_address(ce_id);
1088
1089	ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 0);
1090	ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0);
1091	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0);
1092}
1093
1094void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id)
1095{
1096	ath10k_ce_deinit_src_ring(ar, ce_id);
1097	ath10k_ce_deinit_dest_ring(ar, ce_id);
1098}
1099
1100int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id,
1101			 const struct ce_attr *attr)
1102{
1103	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1104	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1105	int ret;
1106
1107	if (attr->src_nentries) {
1108		ce_state->src_ring = ath10k_ce_alloc_src_ring(ar, ce_id, attr);
1109		if (IS_ERR(ce_state->src_ring)) {
1110			ret = PTR_ERR(ce_state->src_ring);
1111			ath10k_err(ar, "failed to allocate copy engine source ring %d: %d\n",
1112				   ce_id, ret);
1113			ce_state->src_ring = NULL;
1114			return ret;
1115		}
1116	}
1117
1118	if (attr->dest_nentries) {
1119		ce_state->dest_ring = ath10k_ce_alloc_dest_ring(ar, ce_id,
1120								attr);
1121		if (IS_ERR(ce_state->dest_ring)) {
1122			ret = PTR_ERR(ce_state->dest_ring);
1123			ath10k_err(ar, "failed to allocate copy engine destination ring %d: %d\n",
1124				   ce_id, ret);
1125			ce_state->dest_ring = NULL;
1126			return ret;
1127		}
1128	}
1129
1130	return 0;
1131}
1132
1133void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id)
1134{
1135	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
1136	struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id];
1137
1138	if (ce_state->src_ring) {
1139		kfree(ce_state->src_ring->shadow_base_unaligned);
1140		dma_free_coherent(ar->dev,
1141				  (ce_state->src_ring->nentries *
1142				   sizeof(struct ce_desc) +
1143				   CE_DESC_RING_ALIGN),
1144				  ce_state->src_ring->base_addr_owner_space,
1145				  ce_state->src_ring->base_addr_ce_space);
1146		kfree(ce_state->src_ring);
1147	}
1148
1149	if (ce_state->dest_ring) {
1150		dma_free_coherent(ar->dev,
1151				  (ce_state->dest_ring->nentries *
1152				   sizeof(struct ce_desc) +
1153				   CE_DESC_RING_ALIGN),
1154				  ce_state->dest_ring->base_addr_owner_space,
1155				  ce_state->dest_ring->base_addr_ce_space);
1156		kfree(ce_state->dest_ring);
1157	}
1158
1159	ce_state->src_ring = NULL;
1160	ce_state->dest_ring = NULL;
1161}
1162