1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/*
2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc.
3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License");
5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License.
6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at
7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **     http://www.apache.org/licenses/LICENSE-2.0
9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ **
10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software
11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS,
12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and
14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License.
15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */
16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	File:		Radix4FFT_v5.s
19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@	Content:	Radix4FFT armv5 assemble
21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@
22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.section .text
24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	.global	Radix4FFT
25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
26e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4FFT:
27e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	stmdb     sp!, {r4 - r11, lr}
28b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	sub       sp, sp, #32
29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
30e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov			r1, r1, asr #2
31b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp     r1, #0
32b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq     Radix4FFT_END
33b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
34b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1:
35b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     r14, r0          							@ xptr = buf@
36e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov		r10, r1 												@ i = num@
37e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov     r9, r2, lsl #3  							@ step = 2*bgn@
38b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp     r10, #0
39b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	str		r0, [sp]
40b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	str		r1, [sp, #4]
41e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	str		r2, [sp, #8]
42b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	str		r3, [sp, #12]
43b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq     Radix4FFT_LOOP1_END
44b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
45b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2:
46e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov     r12, r3				        				@ csptr = twidTab@
47e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov		r11, r2												@ j = bgn
48b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp     r11, #0
49e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	str		r10, [sp, #16]
50b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	beq     Radix4FFT_LOOP2_END
51b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
52b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP3:
53b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	str			r11, [sp, #20]
54b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
55e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldrd		r0, [r14, #0]									@ r0 = xptr[0]@ r1 = xptr[1]@
56e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r14, r14, r9 	 								@ xptr += step@
57b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
58b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	ldrd		r10,	[r14, #0]  					 			@ r2 = xptr[0]@ r3 = xptr[1]@
59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r8, [r12], #4									@ cosxsinx = csptr[0]@
60b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
61e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwt	r4, r10, r8										@ L_mpy_wx(cosx, t0)
62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwt	r3, r11, r8										@ L_mpy_wx(cosx, t1)
63b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
64e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smlawb	r2, r11, r8, r4								@ r2 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwb	r5, r10, r8										@ L_mpy_wx(sinx, t0)
66b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
67e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov			r10, r0, asr #2								@ t0 = r0 >> 2@
68e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov			r11, r1, asr #2								@	t1 = r1 >> 2@
69b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r3, r3, r5										@ r3 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
71e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add     r14, r14, r9 	 								@ xptr += step@
72b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
73e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r0, r10, r2										@ r0 = t0 - r2@
74e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r1, r11, r3									  @ r1 = t1 - r3@
75b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r2, r10, r2										@ r2 = t0 + r2@
77e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r3, r11, r3										@ r3 = t1 + r3@
78b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
79e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	str			r2, [sp, #24]
80e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	str			r3, [sp, #28]
81b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
82e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldrd		r10, [r14, #0]								@ r4 = xptr[0]@ r5 = xptr[1]@
83e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r8, [r12], #4									@ cosxsinx = csptr[1]@
84b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwt	r6, r10, r8										@ L_mpy_wx(cosx, t0)
86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwt	r5, r11, r8										@ L_mpy_wx(cosx, t1)
87b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
88e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smlawb	r4, r11, r8, r6								@ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
89e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwb	r7, r10, r8										@ L_mpy_wx(sinx, t0)
90b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
91e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r14, r14, r9									@ xptr += step@
92e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r5, r5, r7										@ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
93b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
94e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldrd		r10, [r14]										@ r6 = xptr[0]@ r7 = xptr[1]@
95e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r8, [r12], #4									@ cosxsinx = csptr[1]@
96b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwt	r2, r10, r8										@ L_mpy_wx(cosx, t0)
98e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwt	r7, r11, r8										@ L_mpy_wx(cosx, t1)
99b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
100e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smlawb	r6, r11, r8, r2								@ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
101e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	smulwb	r3, r10, r8										@ L_mpy_wx(sinx, t0)
102b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
103e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	mov			r10, r4												@ t0 = r4@
104b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov			r11, r5												@ t1 = r5@
105b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
106e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r7, r7, r3										@ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
107e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard
108b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
109b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	add			r4,  r10, r6									@	r4 = t0 + r6@
110e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r5, r7, r11										@ r5 = r7 - t1@
111b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
112e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r6, r10, r6										@ r6 = t0 - r6@
113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r7, r7, r11										@ r7 = r7 + t1@
114b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
115e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r2, [sp, #24]
116e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r3, [sp, #28]
117b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r10, r0, r5										@ xptr[0] = r0 + r5@
119e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r11, r1, r6										@ xptr[0] = r1 + r6
120b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
121b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	strd		r10, [r14]
122e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r14, r14, r9									@ xptr -= step@
123b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
124e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r10, r2, r4										@	xptr[0] = r2 - r4@
125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r11, r3, r7										@ xptr[1] = r3 - r7@
126b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
127b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	strd		r10, [r14]
128e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r14, r14, r9									@ xptr -= step@
129b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
130e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r10, r0, r5										@ xptr[0] = r0 - r5@
131e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r11, r1, r6										@ xptr[0] = r1 - r6
132b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
133b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	strd		r10, [r14]
134e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r14, r14, r9									@ xptr -= step@
135b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
136e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r10, r2, r4										@	xptr[0] = r2 - r4@
137e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r11, r3, r7										@ xptr[1] = r3 - r7@
138b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
139b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	strd		r10, [r14]
140e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add			r14, r14, #8									@ xptr += 2@
141b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
142e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r11, [sp, #20]
143e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	subs		r11, r11, #1
144b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	bne			Radix4FFT_LOOP3
145b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
146b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2_END:
147e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r10, [sp, #16]
148e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r3, [sp, #12]
149e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr			r2, [sp, #8]
150b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	rsb			r8, r9, r9, lsl #2
151e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	sub			r10, r10, #1
152b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	add			r14, r14, r8
153b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	cmp			r10, #0
154b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	bhi     Radix4FFT_LOOP2
155b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
156b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1_END:
157b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	ldr     r0, [sp]
158e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldr		r1, [sp, #4]
159e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	add     r3, r3, r8, asr #1
160b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	mov     r2, r2, lsl #2
161b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	movs    r1, r1, asr #2
162b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	bne     Radix4FFT_LOOP1
163b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
164b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_END:
165b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard	add     sp, sp, #32
166e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	ldmia   sp!, {r4 - r11, pc}
167b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard
168e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard	@ENDP  @ |Radix4FFT|
169891abc0ee089f2ba5b92dcc014e5efc2ef07f01eMartin Storsjo	.end
170