R4R8First_v7.s revision b676a05348e4c516fa8b57e33b10548e6142c3f8
1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/*
2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc.
3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License");
5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License.
6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at
7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **     http://www.apache.org/licenses/LICENSE-2.0
9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software
11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS,
12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and
14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License.
15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */
16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	File:		R4R8First_v7.s
19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	Content:	Radix8First and Radix4First function armv7 assemble
21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.section .text
25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.global	Radix8First
26e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
27e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix8First:
28e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	stmdb     		sp!, {r4 - r11, lr}
29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
30e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr       		r3, SQRT1_2
31e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	cmp       		r1, #0
32b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
33b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VDUP.I32  		Q15, r3
34e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	beq       		Radix8First_END
35b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
36e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix8First_LOOP:
37e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD1.I32			{d0, d1, d2, d3},	[r0]!
38e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD1.I32			{d8, d9, d10, d11},	[r0]!
39b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
40e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d4, d0, d1		@ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
41b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			d5, d0, d1		@ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@
42b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			d7, d2, d3		@ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@
43e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d6, d2, d3		@ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
44b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.I32			d7, d7
45b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
46e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q0, Q2, Q3		@ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
47e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q1, Q2, Q3		@ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@
48e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
49b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.I32			d3, d3
50e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
51e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d4, d8, d9		@ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
52b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			d7, d10, d11	@ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@
53e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d6, d10, d11	@ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
54b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.I32			d7, d7
55e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d5, d8, d9		@ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
56b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
57b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VTRN.32				d1, d3
58b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q4, Q2, Q3		@ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
60e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q5, Q2, Q3		@ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
61b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d3, d3
63b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
64b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSHR.S32			d8, d8, #1
65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSHR.S32			Q0, Q0, #1
66e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d10, d10
67e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VTRN.32				d11, d9
68e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSHR.S32			Q1, Q1, #1
69e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSHR.S32			d10, d10, #1
70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d9, d9
71b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
72e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub       			r0, r0, #0x40
73b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
74e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d12, d0, d8
75b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			d16, d0, d8
76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d14, d2, d10
77e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d18, d2, d10
78b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
79e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d4, d11, d9
80e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d5, d11, d9
81b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
82e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d18, d18
83b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
84e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32			Q3, Q2, Q15
85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VTRN.32				d14, d18
86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VTRN.32				d6, d7
87b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.I32			d18, d18
88b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
89e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d15, d3, d6
90e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d7, d7
91e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d19, d3, d6
92e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d13, d1, d7
93e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d17, d1, d7
94b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
95e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d17, d17
96e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VTRN.32				d13, d17
97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32			d17, d17
98b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
99b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	subs       			r1, r1, #1
100b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
101e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST1.I32			{d12, d13, d14, d15}, [r0]!
102b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VST1.I32			{d16, d17, d18, d19}, [r0]!
103e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	bne       			Radix8First_LOOP
104b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
105e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix8First_END:
106b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	ldmia     sp!, {r4 - r11, pc}
107e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardSQRT1_2:
108e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.word      0x2d413ccd
109b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
110e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@ENDP  @ |Radix8First|
111b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
112e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.section .text
113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.global	Radix4First
114e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
115e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4First:
116e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	stmdb     	sp!, {r4 - r11, lr}
117e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	cmp       	r1, #0
119e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	beq       	Radix4First_END
120b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
121e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4First_LOOP:
122b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VLD1.I32			{d0, d1, d2, d3}, [r0]
123b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
124b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VADD.S32			d4, d0, d1							@ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@
125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d5, d0, d1							@ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
126e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			d7, d2, d3							@ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
127e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			d6, d2, d3							@ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
128b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
129b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VREV64.I32		d7, d7									@
130b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
131e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q4, Q2, Q3
132e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q5, Q2, Q3
133b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
134e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32		d11, d11
135e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VTRN.32				d9, d11
136b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	subs       		r1, r1, #1
137e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VREV64.I32		d11, d11
138e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST1.I32			{d8, d9, d10, d11}, [r0]!
139e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
140e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	bne       		Radix4First_LOOP
141b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
142e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4First_END:
143e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldmia    		sp!, {r4 - r11, pc}
144e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
145e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@ENDP  @ |Radix4First|
146956c553ab0ce72f8074ad0fda2ffd66a0305700cJames Dong	.end