1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/*
2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc.
3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License");
5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License.
6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at
7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **     http://www.apache.org/licenses/LICENSE-2.0
9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software
11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS,
12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and
14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License.
15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */
16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	File:		Radix4FFT_v7.s
19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	Content:	Radix4FFT armv7 assemble
21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.section .text
25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.global	Radix4FFT
262857b47a2731579772c76d46285660972c0ba23dBen Cheng	.fnstart
27e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
28e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4FFT:
29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	stmdb    sp!, {r4 - r11, lr}
302857b47a2731579772c76d46285660972c0ba23dBen Cheng	.save	 {r4 - r11, lr}
312857b47a2731579772c76d46285660972c0ba23dBen Cheng	fstmfdd  sp!, {d8 - d15}
322857b47a2731579772c76d46285660972c0ba23dBen Cheng	.vsave	 {d8 - d15}
33e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
34e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov			r1, r1, asr #2
35b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp     	r1, #0
36b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq     	Radix4FFT_END
37b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
38b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1:
39b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     	r5, r2, lsl #1
40b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     	r8, r0
41b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     	r7, r1
42b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     	r5, r5, lsl #2
43b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp     	r1, #0
44b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	rsbeq   	r12, r5, r5, lsl #2
45b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq     	Radix4FFT_LOOP1_END
46b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
47b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	rsb     	r12, r5, r5, lsl #2
48b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
49b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2:
50b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     	r6, r3
51b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     	r4, r2
52b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp     	r2, #0
53b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq     	Radix4FFT_LOOP2_END
54b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
55b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP3:
56e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@r0 = xptr[0]@
57e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@r1 = xptr[1]@
58b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VLD2.I32			{D0, D1, D2, D3}, [r8]
59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{D28, D29, D30, D31}, [r6]!		@ cosx = csptr[0]@ sinx = csptr[1]@
60b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
61b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	add					r8, r8, r5										@ xptr += step@
62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32			{D4, D5, D6,D7}, [r8]					@ r2 = xptr[0]@ r3 = xptr[1]@
63b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
64e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q2, Q14									@ MULHIGH(cosx, t0)
65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q3, Q15									@ MULHIGH(sinx, t1)
66e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q3, Q14									@ MULHIGH(cosx, t1)
67e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q2, Q15									@ MULHIGH(sinx, t0)
68b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
69e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q2, Q10, Q11									@ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q3, Q12, Q13									@ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
71b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
72e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add					r8, r8, r5										@ xptr += step@
73e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSHR.S32			Q10, Q0, #2										@ t0 = r0 >> 2@
74e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSHR.S32			Q11, Q1, #2										@ t1 = r1 >> 2@
75b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q0,	Q10, Q2										@ r0 = t0 - r2@
77e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q1,	Q11, Q3										@ r1 = t1 - r3@
78e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q2, Q10, Q2										@ r2 = t0 + r2@
79e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q3, Q11, Q3										@ r3 = t1 + r3@
80b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
81b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VLD2.I32			{D8, D9, D10, D11}, [r8]
82b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VLD2.I32			{D28, D29, D30, D31}, [r6]!
83e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add						r8, r8, r5
84e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q4, Q14									@ MULHIGH(cosx, t0)
86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q5, Q15									@ MULHIGH(sinx, t1)
87e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q5, Q14									@ MULHIGH(cosx, t1)
88e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q4, Q15									@ MULHIGH(sinx, t0)
89b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
90e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q8, Q10, Q11									@ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
91b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			Q9, Q12, Q13									@ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
92b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
93b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VLD2.I32		{D12, D13, D14, D15}, [r8]
94e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VLD2.I32		{D28, D29, D30, D31}, [r6]!
95b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
96e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q10, Q6, Q14									@ MULHIGH(cosx, t0)
97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q11, Q7, Q15									@ MULHIGH(sinx, t1)
98e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q12, Q7, Q14									@ MULHIGH(cosx, t1)
99e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VQDMULH.S32		Q13, Q6, Q15									@ MULHIGH(sinx, t0)
100b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
101e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q6, Q10, Q11									@ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
102b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	VSUB.S32			Q7, Q12, Q13									@ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
103b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
104e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q4, Q8, Q6										@ r4 = t0 + r6@
105e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q5, Q7, Q9										@ r5 = r7 - t1@
106e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q6, Q8, Q6										@ r6 = t0 - r6@
107e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q7, Q7, Q9										@ r7 = r7 + t1@
108b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
109e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q8, Q0, Q5										@ xptr[0] = r0 + r5@
110e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q9, Q1, Q6										@ xptr[1] = r1 + r6@
111e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST2.I32			{D16, D17, D18, D19}, [r8]
112b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q10, Q2, Q4										@ xptr[0] = r2 - r4@
114e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub					r8, r8, r5										@ xptr -= step@
115e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q11, Q3, Q7										@ xptr[1] = r3 - r7@
116e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST2.I32			{D20, D21, D22, D23}, [r8]
117b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q8, Q0, Q5										@ xptr[0] = r0 - r5@
119e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub					r8, r8, r5										@ xptr -= step@
120e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VSUB.S32			Q9, Q1, Q6										@ xptr[1] = r1 - r6@
121e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST2.I32			{D16, D17, D18, D19}, [r8]
122b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
123e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q10, Q2, Q4										@ xptr[0] = r2 + r4@
124e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub					r8, r8, r5										@ xptr -= step@
125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VADD.S32			Q11, Q3, Q7										@ xptr[1] = r3 + r7@
126e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	VST2.I32			{D20, D21, D22, D23}, [r8]!
127b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
128b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	subs    			r4, r4, #4
129b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	bne     			Radix4FFT_LOOP3
130b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
131b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2_END:
132b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	add     			r8, r8, r12
133b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	sub    				r7, r7, #1
134e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	cmp					r7, #0
135b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	bhi     			Radix4FFT_LOOP2
136b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
137b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1_END:
138b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	add     			r3, r12, r3
139b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     			r2, r2, lsl #2
140b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	movs    			r1, r1, asr #2
141b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	bne     			Radix4FFT_LOOP1
142b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
143b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_END:
1442857b47a2731579772c76d46285660972c0ba23dBen Cheng	fldmfdd   			sp!, {d8 - d15}
145e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldmia   			sp!, {r4 - r11, pc}
146b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
147e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@ENDP  @ |Radix4FFT|
1482857b47a2731579772c76d46285660972c0ba23dBen Cheng	.fnend
149