1e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@/* 2e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Copyright 2003-2010, VisualOn, Inc. 3e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** 4e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** you may not use this file except in compliance with the License. 6e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** You may obtain a copy of the License at 7e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** 8e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** http://www.apache.org/licenses/LICENSE-2.0 9e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** 10e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** Unless required by applicable law or agreed to in writing, software 11e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** distributed under the License is distributed on an "AS IS" BASIS, 12e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** See the License for the specific language governing permissions and 14e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ ** limitations under the License. 15e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ */ 16e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 17e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 18e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ File: Radix4FFT_v7.s 19e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ 20e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ Content: Radix4FFT armv7 assemble 21e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@ 22e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 23e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 24e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard .section .text 25e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard .global Radix4FFT 262857b47a2731579772c76d46285660972c0ba23dBen Cheng .fnstart 27e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 28e2e838afcf03e603a41a0455846eaf9614537c16Mans RullgardRadix4FFT: 29e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard stmdb sp!, {r4 - r11, lr} 302857b47a2731579772c76d46285660972c0ba23dBen Cheng .save {r4 - r11, lr} 312857b47a2731579772c76d46285660972c0ba23dBen Cheng fstmfdd sp!, {d8 - d15} 322857b47a2731579772c76d46285660972c0ba23dBen Cheng .vsave {d8 - d15} 33e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 34e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard mov r1, r1, asr #2 35b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r1, #0 36b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard beq Radix4FFT_END 37b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 38b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1: 39b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r5, r2, lsl #1 40b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r8, r0 41b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r7, r1 42b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r5, r5, lsl #2 43b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r1, #0 44b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard rsbeq r12, r5, r5, lsl #2 45b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard beq Radix4FFT_LOOP1_END 46b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 47b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard rsb r12, r5, r5, lsl #2 48b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 49b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2: 50b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r6, r3 51b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r4, r2 52b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard cmp r2, #0 53b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard beq Radix4FFT_LOOP2_END 54b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 55b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP3: 56e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard @r0 = xptr[0]@ 57e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard @r1 = xptr[1]@ 58b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard VLD2.I32 {D0, D1, D2, D3}, [r8] 59e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VLD2.I32 {D28, D29, D30, D31}, [r6]! @ cosx = csptr[0]@ sinx = csptr[1]@ 60b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 61b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard add r8, r8, r5 @ xptr += step@ 62e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VLD2.I32 {D4, D5, D6,D7}, [r8] @ r2 = xptr[0]@ r3 = xptr[1]@ 63b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 64e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q10, Q2, Q14 @ MULHIGH(cosx, t0) 65e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q11, Q3, Q15 @ MULHIGH(sinx, t1) 66e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q12, Q3, Q14 @ MULHIGH(cosx, t1) 67e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q13, Q2, Q15 @ MULHIGH(sinx, t0) 68b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 69e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q2, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) 70e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q3, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) 71b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 72e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r8, r8, r5 @ xptr += step@ 73e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSHR.S32 Q10, Q0, #2 @ t0 = r0 >> 2@ 74e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSHR.S32 Q11, Q1, #2 @ t1 = r1 >> 2@ 75b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 76e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q0, Q10, Q2 @ r0 = t0 - r2@ 77e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q1, Q11, Q3 @ r1 = t1 - r3@ 78e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q2, Q10, Q2 @ r2 = t0 + r2@ 79e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q3, Q11, Q3 @ r3 = t1 + r3@ 80b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 81b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard VLD2.I32 {D8, D9, D10, D11}, [r8] 82b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard VLD2.I32 {D28, D29, D30, D31}, [r6]! 83e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard add r8, r8, r5 84e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard 85e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q10, Q4, Q14 @ MULHIGH(cosx, t0) 86e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q11, Q5, Q15 @ MULHIGH(sinx, t1) 87e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q12, Q5, Q14 @ MULHIGH(cosx, t1) 88e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q13, Q4, Q15 @ MULHIGH(sinx, t0) 89b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 90e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q8, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) 91b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard VSUB.S32 Q9, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) 92b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 93b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard VLD2.I32 {D12, D13, D14, D15}, [r8] 94e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VLD2.I32 {D28, D29, D30, D31}, [r6]! 95b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 96e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q10, Q6, Q14 @ MULHIGH(cosx, t0) 97e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q11, Q7, Q15 @ MULHIGH(sinx, t1) 98e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q12, Q7, Q14 @ MULHIGH(cosx, t1) 99e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VQDMULH.S32 Q13, Q6, Q15 @ MULHIGH(sinx, t0) 100b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 101e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q6, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) 102b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard VSUB.S32 Q7, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) 103b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 104e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q4, Q8, Q6 @ r4 = t0 + r6@ 105e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q5, Q7, Q9 @ r5 = r7 - t1@ 106e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q6, Q8, Q6 @ r6 = t0 - r6@ 107e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q7, Q7, Q9 @ r7 = r7 + t1@ 108b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 109e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q8, Q0, Q5 @ xptr[0] = r0 + r5@ 110e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q9, Q1, Q6 @ xptr[1] = r1 + r6@ 111e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VST2.I32 {D16, D17, D18, D19}, [r8] 112b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 113e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q10, Q2, Q4 @ xptr[0] = r2 - r4@ 114e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r8, r8, r5 @ xptr -= step@ 115e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q11, Q3, Q7 @ xptr[1] = r3 - r7@ 116e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VST2.I32 {D20, D21, D22, D23}, [r8] 117b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 118e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q8, Q0, Q5 @ xptr[0] = r0 - r5@ 119e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r8, r8, r5 @ xptr -= step@ 120e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VSUB.S32 Q9, Q1, Q6 @ xptr[1] = r1 - r6@ 121e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VST2.I32 {D16, D17, D18, D19}, [r8] 122b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 123e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q10, Q2, Q4 @ xptr[0] = r2 + r4@ 124e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard sub r8, r8, r5 @ xptr -= step@ 125e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VADD.S32 Q11, Q3, Q7 @ xptr[1] = r3 + r7@ 126e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard VST2.I32 {D20, D21, D22, D23}, [r8]! 127b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 128b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard subs r4, r4, #4 129b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard bne Radix4FFT_LOOP3 130b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 131b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP2_END: 132b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard add r8, r8, r12 133b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard sub r7, r7, #1 134e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard cmp r7, #0 135b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard bhi Radix4FFT_LOOP2 136b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 137b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_LOOP1_END: 138b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard add r3, r12, r3 139b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard mov r2, r2, lsl #2 140b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard movs r1, r1, asr #2 141b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard bne Radix4FFT_LOOP1 142b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 143b676a05348e4c516fa8b57e33b10548e6142c3f8Mans RullgardRadix4FFT_END: 1442857b47a2731579772c76d46285660972c0ba23dBen Cheng fldmfdd sp!, {d8 - d15} 145e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard ldmia sp!, {r4 - r11, pc} 146b676a05348e4c516fa8b57e33b10548e6142c3f8Mans Rullgard 147e2e838afcf03e603a41a0455846eaf9614537c16Mans Rullgard @ENDP @ |Radix4FFT| 1482857b47a2731579772c76d46285660972c0ba23dBen Cheng .fnend 149