1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/* 2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc. 3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** 4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License. 6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at 7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** 8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** http://www.apache.org/licenses/LICENSE-2.0 9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** 10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software 11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS, 12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and 14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License. 15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */ 16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ File: Radix4FFT_v5.s 19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ 20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ Content: Radix4FFT armv5 assemble 21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ 22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard .section .text 24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard .global Radix4FFT 25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 26e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4FFT: 27e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard stmdb sp!, {r4 - r11, lr} 28b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard sub sp, sp, #32 29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 30e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r1, r1, asr #2 31b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r1, #0 32b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard beq Radix4FFT_END 33b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 34b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1: 35b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r14, r0 @ xptr = buf@ 36e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r10, r1 @ i = num@ 37e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r9, r2, lsl #3 @ step = 2*bgn@ 38b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r10, #0 39b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard str r0, [sp] 40b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard str r1, [sp, #4] 41e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard str r2, [sp, #8] 42b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard str r3, [sp, #12] 43b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard beq Radix4FFT_LOOP1_END 44b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 45b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2: 46e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r12, r3 @ csptr = twidTab@ 47e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r11, r2 @ j = bgn 48b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r11, #0 49e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard str r10, [sp, #16] 50b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard beq Radix4FFT_LOOP2_END 51b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 52b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP3: 53b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard str r11, [sp, #20] 54b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 55e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldrd r0, [r14, #0] @ r0 = xptr[0]@ r1 = xptr[1]@ 56e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r14, r14, r9 @ xptr += step@ 57b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 58b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard ldrd r10, [r14, #0] @ r2 = xptr[0]@ r3 = xptr[1]@ 59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r8, [r12], #4 @ cosxsinx = csptr[0]@ 60b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 61e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwt r4, r10, r8 @ L_mpy_wx(cosx, t0) 62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwt r3, r11, r8 @ L_mpy_wx(cosx, t1) 63b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 64e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smlawb r2, r11, r8, r4 @ r2 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ 65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwb r5, r10, r8 @ L_mpy_wx(sinx, t0) 66b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 67e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r10, r0, asr #2 @ t0 = r0 >> 2@ 68e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r11, r1, asr #2 @ t1 = r1 >> 2@ 69b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r3, r3, r5 @ r3 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ 71e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r14, r14, r9 @ xptr += step@ 72b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 73e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r0, r10, r2 @ r0 = t0 - r2@ 74e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r1, r11, r3 @ r1 = t1 - r3@ 75b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r2, r10, r2 @ r2 = t0 + r2@ 77e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r3, r11, r3 @ r3 = t1 + r3@ 78b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 79e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard str r2, [sp, #24] 80e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard str r3, [sp, #28] 81b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 82e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldrd r10, [r14, #0] @ r4 = xptr[0]@ r5 = xptr[1]@ 83e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r8, [r12], #4 @ cosxsinx = csptr[1]@ 84b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwt r6, r10, r8 @ L_mpy_wx(cosx, t0) 86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwt r5, r11, r8 @ L_mpy_wx(cosx, t1) 87b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 88e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smlawb r4, r11, r8, r6 @ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ 89e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwb r7, r10, r8 @ L_mpy_wx(sinx, t0) 90b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 91e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r14, r14, r9 @ xptr += step@ 92e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r5, r5, r7 @ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ 93b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 94e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldrd r10, [r14] @ r6 = xptr[0]@ r7 = xptr[1]@ 95e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r8, [r12], #4 @ cosxsinx = csptr[1]@ 96b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwt r2, r10, r8 @ L_mpy_wx(cosx, t0) 98e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwt r7, r11, r8 @ L_mpy_wx(cosx, t1) 99b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 100e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smlawb r6, r11, r8, r2 @ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ 101e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard smulwb r3, r10, r8 @ L_mpy_wx(sinx, t0) 102b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 103e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r10, r4 @ t0 = r4@ 104b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r11, r5 @ t1 = r5@ 105b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 106e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r7, r7, r3 @ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ 107e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 108b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 109b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard add r4, r10, r6 @ r4 = t0 + r6@ 110e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r5, r7, r11 @ r5 = r7 - t1@ 111b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 112e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r6, r10, r6 @ r6 = t0 - r6@ 113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r7, r7, r11 @ r7 = r7 + t1@ 114b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 115e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r2, [sp, #24] 116e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r3, [sp, #28] 117b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r10, r0, r5 @ xptr[0] = r0 + r5@ 119e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r11, r1, r6 @ xptr[0] = r1 + r6 120b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 121b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard strd r10, [r14] 122e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r14, r14, r9 @ xptr -= step@ 123b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 124e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r10, r2, r4 @ xptr[0] = r2 - r4@ 125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r11, r3, r7 @ xptr[1] = r3 - r7@ 126b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 127b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard strd r10, [r14] 128e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r14, r14, r9 @ xptr -= step@ 129b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 130e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r10, r0, r5 @ xptr[0] = r0 - r5@ 131e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r11, r1, r6 @ xptr[0] = r1 - r6 132b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 133b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard strd r10, [r14] 134e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r14, r14, r9 @ xptr -= step@ 135b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 136e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r10, r2, r4 @ xptr[0] = r2 - r4@ 137e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r11, r3, r7 @ xptr[1] = r3 - r7@ 138b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 139b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard strd r10, [r14] 140e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r14, r14, #8 @ xptr += 2@ 141b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 142e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r11, [sp, #20] 143e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard subs r11, r11, #1 144b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard bne Radix4FFT_LOOP3 145b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 146b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2_END: 147e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r10, [sp, #16] 148e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r3, [sp, #12] 149e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r2, [sp, #8] 150b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard rsb r8, r9, r9, lsl #2 151e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r10, r10, #1 152b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard add r14, r14, r8 153b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r10, #0 154b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard bhi Radix4FFT_LOOP2 155b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 156b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1_END: 157b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard ldr r0, [sp] 158e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldr r1, [sp, #4] 159e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r3, r3, r8, asr #1 160b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r2, r2, lsl #2 161b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard movs r1, r1, asr #2 162b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard bne Radix4FFT_LOOP1 163b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 164b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_END: 165b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard add sp, sp, #32 166e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldmia sp!, {r4 - r11, pc} 167b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 168e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard @ENDP @ |Radix4FFT| 169891abc0ee089f2ba5b92dcc014e5efc2ef07f01eMartin Storsjo .end 170