11d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf/***************************************************************************
21d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf Copyright (c) 2009-2013 The Linux Foundation. All rights reserved.
31d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
41d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf Redistribution and use in source and binary forms, with or without
51d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf modification, are permitted provided that the following conditions are met:
61d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf     * Redistributions of source code must retain the above copyright
71d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf       notice, this list of conditions and the following disclaimer.
81d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf     * Redistributions in binary form must reproduce the above copyright
91d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf       notice, this list of conditions and the following disclaimer in the
101d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf       documentation and/or other materials provided with the distribution.
111d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf     * Neither the name of The Linux Foundation nor the names of its contributors may
121d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf       be used to endorse or promote products derived from this software
131d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf       without specific prior written permission.
141d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
151d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
161d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
171d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
181d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
191d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
201d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
211d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
221d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
231d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
241d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
251d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf POSSIBILITY OF SUCH DAMAGE.
261d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf  ***************************************************************************/
271d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
281d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf/* Assumes neon instructions and a cache line size of 64 bytes. */
291d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
301d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define PLDOFFS	(10)
311d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define PLDTHRESH (PLDOFFS)
321d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define BBTHRESH (4096/64)
331d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#define PLDSIZE (64)
341d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
351d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#if (PLDOFFS < 1)
361d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#error Routine does not support offsets less than 1
371d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#endif
381d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
391d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#if (PLDTHRESH < PLDOFFS)
401d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#error PLD threshold must be greater than or equal to the PLD offset
411d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf#endif
421d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
431d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.text
441d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.fpu    neon
451d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
461d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_memcpy_base:
471d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r2, #4
481d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	blt	.L_neon_lt4
491d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r2, #16
501d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	blt	.L_neon_lt16
511d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r2, #32
521d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	blt	.L_neon_16
531d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r2, #64
541d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	blt	.L_neon_copy_32_a
551d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
561d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	mov	r12, r2, lsr #6
571d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r12, #PLDTHRESH
581d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ble	.L_neon_copy_64_loop_nopld
591d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
601d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	push	{r9, r10}
611d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.cfi_adjust_cfa_offset 8
621d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.cfi_rel_offset r9, 0
631d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.cfi_rel_offset r10, 4
641d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
651d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r12, #BBTHRESH
661d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ble	.L_neon_prime_pump
671d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
681d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	lr, r0, #0x400
691d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	r9, r1, #(PLDOFFS*PLDSIZE)
701d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	sub	lr, lr, r9
711d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	lsl	lr, lr, #21
721d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	lsr	lr, lr, #21
731d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	lr, lr, #(PLDOFFS*PLDSIZE)
741d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r12, lr, lsr #6
751d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ble	.L_neon_prime_pump
761d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
771d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	itt	gt
781d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	movgt	r9, #(PLDOFFS)
791d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	rsbsgt	r9, r9, lr, lsr #6
801d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ble	.L_neon_prime_pump
811d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
821d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	r10, r1, lr
831d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bic	r10, #0x3F
841d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
851d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	sub	r12, r12, lr, lsr #6
861d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
871d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r9, r12
881d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	itee	le
891d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	suble	r12, r12, r9
901d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	movgt	r9, r12
911d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	movgt	r12, #0
921d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
931d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	pld	[r1, #((PLDOFFS-1)*PLDSIZE)]
941d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_outer_doublepld:
951d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	pld	[r1, #((PLDOFFS)*PLDSIZE)]
961d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q0, q1}, [r1]!
971d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q2, q3}, [r1]!
981d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ldr	r3, [r10]
991d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	subs	r9, r9, #1
1001d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q0, q1}, [r0]!
1011d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q2, q3}, [r0]!
1021d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	r10, #64
1031d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bne	.L_neon_copy_64_loop_outer_doublepld
1041d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r12, #0
1051d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	beq	.L_neon_pop_before_nopld
1061d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1071d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	cmp	r12, #(512*1024/64)
1081d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	blt	.L_neon_copy_64_loop_outer
1091d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1101d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_ddr:
1111d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q0, q1}, [r1]!
1121d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q2, q3}, [r1]!
1131d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	pld	[r10]
1141d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	subs	r12, r12, #1
1151d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q0, q1}, [r0]!
1161d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q2, q3}, [r0]!
1171d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	r10, #64
1181d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bne	.L_neon_copy_64_loop_ddr
1191d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	b	.L_neon_pop_before_nopld
1201d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1211d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_prime_pump:
1221d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	mov	lr, #(PLDOFFS*PLDSIZE)
1231d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	r10, r1, #(PLDOFFS*PLDSIZE)
1241d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bic	r10, #0x3F
1251d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	sub	r12, r12, #PLDOFFS
1261d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ldr	r3, [r10, #(-1*PLDSIZE)]
1271d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1281d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_outer:
1291d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q0, q1}, [r1]!
1301d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q2, q3}, [r1]!
1311d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ldr	r3, [r10]
1321d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	subs	r12, r12, #1
1331d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q0, q1}, [r0]!
1341d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q2, q3}, [r0]!
1351d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	add	r10, #64
1361d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bne	.L_neon_copy_64_loop_outer
1371d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1381d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_pop_before_nopld:
1391d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	mov	r12, lr, lsr #6
1401d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	pop	{r9, r10}
1411d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.cfi_adjust_cfa_offset -8
1421d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.cfi_restore r9
1431d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	.cfi_restore r10
1441d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1451d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_64_loop_nopld:
1461d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q8, q9}, [r1]!
1471d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q10, q11}, [r1]!
1481d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	subs	r12, r12, #1
1491d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q8, q9}, [r0]!
1501d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q10, q11}, [r0]!
1511d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bne	.L_neon_copy_64_loop_nopld
1521d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ands	r2, r2, #0x3f
1531d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	beq	.L_neon_exit
1541d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1551d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_copy_32_a:
1561d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	movs	r3, r2, lsl #27
1571d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bcc	.L_neon_16
1581d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q0,q1}, [r1]!
1591d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q0,q1}, [r0]!
1601d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1611d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_16:
1621d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bpl	.L_neon_lt16
1631d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.32	{q8}, [r1]!
1641d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.32	{q8}, [r0]!
1651d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ands	r2, r2, #0x0f
1661d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	beq	.L_neon_exit
1671d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1681d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_lt16:
1691d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	movs	r3, r2, lsl #29
1701d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bcc	1f
1711d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld1.8	{d0}, [r1]!
1721d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst1.8	{d0}, [r0]!
1731d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf1:
1741d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	bge	.L_neon_lt4
1751d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vld4.8	{d0[0], d1[0], d2[0], d3[0]}, [r1]!
1761d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	vst4.8	{d0[0], d1[0], d2[0], d3[0]}, [r0]!
1771d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1781d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_lt4:
1791d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	movs	r2, r2, lsl #31
1801d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	itt	cs
1811d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ldrhcs	r3, [r1], #2
1821d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	strhcs	r3, [r0], #2
1831d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	itt	mi
1841d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	ldrbmi	r3, [r1]
1851d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	strbmi	r3, [r0]
1861d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf
1871d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf.L_neon_exit:
1881d0268c6b855531eedd297f1cb7e4ac5817c9103Brent DeGraaf	pop	{r0, pc}
189