117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@/*
217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Copyright 2003-2010, VisualOn, Inc.
317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **
417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Licensed under the Apache License, Version 2.0 (the "License");
517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** you may not use this file except in compliance with the License.
617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** You may obtain a copy of the License at
717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **
817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **     http://www.apache.org/licenses/LICENSE-2.0
917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ **
1017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** Unless required by applicable law or agreed to in writing, software
1117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** distributed under the License is distributed on an "AS IS" BASIS,
1217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** See the License for the specific language governing permissions and
1417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ ** limitations under the License.
1517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@ */
1617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
1717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@	File:		Radix4FFT_v5.s
1917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@
2017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@	Content:	Radix4FFT armv5 assemble
2117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@
2217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	.section .text
2417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	.global	Radix4FFT
2517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
2617299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT:
2717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	stmdb     sp!, {r4 - r11, lr}
2817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub       sp, sp, #32
2917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
3017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov			r1, r1, asr #2
3117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	cmp     r1, #0
3217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	beq     Radix4FFT_END
3317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
3417299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT_LOOP1:
3517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov     r14, r0          							@ xptr = buf@
3617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov		r10, r1 												@ i = num@
3717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov     r9, r2, lsl #3  							@ step = 2*bgn@
3817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	cmp     r10, #0
3917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str		r0, [sp]
4017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str		r1, [sp, #4]
4117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str		r2, [sp, #8]
4217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str		r3, [sp, #12]
4317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	beq     Radix4FFT_LOOP1_END
4417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
4517299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT_LOOP2:
4617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov     r12, r3				        				@ csptr = twidTab@
4717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov		r11, r2												@ j = bgn
4817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	cmp     r11, #0
4917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str		r10, [sp, #16]
5017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	beq     Radix4FFT_LOOP2_END
5117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
5217299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT_LOOP3:
5317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str			r11, [sp, #20]
5417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
5517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldrd		r0, [r14, #0]									@ r0 = xptr[0]@ r1 = xptr[1]@
5617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r14, r14, r9 	 								@ xptr += step@
5717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
5817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldrd		r10,	[r14, #0]  					 			@ r2 = xptr[0]@ r3 = xptr[1]@
5917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r8, [r12], #4									@ cosxsinx = csptr[0]@
6017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
6117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwt	r4, r10, r8										@ L_mpy_wx(cosx, t0)
6217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwt	r3, r11, r8										@ L_mpy_wx(cosx, t1)
6317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
6417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smlawb	r2, r11, r8, r4								@ r2 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
6517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwb	r5, r10, r8										@ L_mpy_wx(sinx, t0)
6617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
6717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov			r10, r0, asr #2								@ t0 = r0 >> 2@
6817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov			r11, r1, asr #2								@	t1 = r1 >> 2@
6917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
7017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r3, r3, r5										@ r3 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
7117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add     r14, r14, r9 	 								@ xptr += step@
7217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
7317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r0, r10, r2										@ r0 = t0 - r2@
7417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r1, r11, r3									  @ r1 = t1 - r3@
7517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
7617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r2, r10, r2										@ r2 = t0 + r2@
7717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r3, r11, r3										@ r3 = t1 + r3@
7817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
7917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str			r2, [sp, #24]
8017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	str			r3, [sp, #28]
8117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
8217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldrd		r10, [r14, #0]								@ r4 = xptr[0]@ r5 = xptr[1]@
8317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r8, [r12], #4									@ cosxsinx = csptr[1]@
8417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
8517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwt	r6, r10, r8										@ L_mpy_wx(cosx, t0)
8617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwt	r5, r11, r8										@ L_mpy_wx(cosx, t1)
8717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
8817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smlawb	r4, r11, r8, r6								@ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
8917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwb	r7, r10, r8										@ L_mpy_wx(sinx, t0)
9017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
9117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r14, r14, r9									@ xptr += step@
9217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r5, r5, r7										@ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
9317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
9417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldrd		r10, [r14]										@ r6 = xptr[0]@ r7 = xptr[1]@
9517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r8, [r12], #4									@ cosxsinx = csptr[1]@
9617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
9717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwt	r2, r10, r8										@ L_mpy_wx(cosx, t0)
9817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwt	r7, r11, r8										@ L_mpy_wx(cosx, t1)
9917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smlawb	r6, r11, r8, r2								@ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@
10117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	smulwb	r3, r10, r8										@ L_mpy_wx(sinx, t0)
10217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov			r10, r4												@ t0 = r4@
10417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov			r11, r5												@ t1 = r5@
10517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r7, r7, r3										@ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@
10717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
10917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r4,  r10, r6									@	r4 = t0 + r6@
11017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r5, r7, r11										@ r5 = r7 - t1@
11117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
11217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r6, r10, r6										@ r6 = t0 - r6@
11317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r7, r7, r11										@ r7 = r7 + t1@
11417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
11517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r2, [sp, #24]
11617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r3, [sp, #28]
11717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
11817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r10, r0, r5										@ xptr[0] = r0 + r5@
11917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r11, r1, r6										@ xptr[0] = r1 + r6
12017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
12117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	strd		r10, [r14]
12217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r14, r14, r9									@ xptr -= step@
12317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
12417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r10, r2, r4										@	xptr[0] = r2 - r4@
12517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r11, r3, r7										@ xptr[1] = r3 - r7@
12617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
12717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	strd		r10, [r14]
12817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r14, r14, r9									@ xptr -= step@
12917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
13017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r10, r0, r5										@ xptr[0] = r0 - r5@
13117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r11, r1, r6										@ xptr[0] = r1 - r6
13217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
13317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	strd		r10, [r14]
13417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r14, r14, r9									@ xptr -= step@
13517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
13617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r10, r2, r4										@	xptr[0] = r2 - r4@
13717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r11, r3, r7										@ xptr[1] = r3 - r7@
13817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
13917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	strd		r10, [r14]
14017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r14, r14, #8									@ xptr += 2@
14117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
14217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r11, [sp, #20]
14317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	subs		r11, r11, #1
14417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	bne			Radix4FFT_LOOP3
14517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
14617299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT_LOOP2_END:
14717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r10, [sp, #16]
14817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r3, [sp, #12]
14917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr			r2, [sp, #8]
15017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	rsb			r8, r9, r9, lsl #2
15117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	sub			r10, r10, #1
15217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add			r14, r14, r8
15317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	cmp			r10, #0
15417299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	bhi     Radix4FFT_LOOP2
15517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
15617299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT_LOOP1_END:
15717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr     r0, [sp]
15817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldr		r1, [sp, #4]
15917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add     r3, r3, r8, asr #1
16017299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	mov     r2, r2, lsl #2
16117299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	movs    r1, r1, asr #2
16217299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	bne     Radix4FFT_LOOP1
16317299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
16417299ab50ceb70d904e610e3b2d7fb2361a11e03James DongRadix4FFT_END:
16517299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	add     sp, sp, #32
16617299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	ldmia   sp!, {r4 - r11, pc}
16717299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong
16817299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	@ENDP  @ |Radix4FFT|
16917299ab50ceb70d904e610e3b2d7fb2361a11e03James Dong	.end