1@/* 2@ ** Copyright 2003-2010, VisualOn, Inc. 3@ ** 4@ ** Licensed under the Apache License, Version 2.0 (the "License"); 5@ ** you may not use this file except in compliance with the License. 6@ ** You may obtain a copy of the License at 7@ ** 8@ ** http://www.apache.org/licenses/LICENSE-2.0 9@ ** 10@ ** Unless required by applicable law or agreed to in writing, software 11@ ** distributed under the License is distributed on an "AS IS" BASIS, 12@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13@ ** See the License for the specific language governing permissions and 14@ ** limitations under the License. 15@ */ 16 17@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 18@ File: R4R8First_v7.s 19@ 20@ Content: Radix8First and Radix4First function armv7 assemble 21@ 22@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 23 24 .section .text 25 .global Radix8First 26 27Radix8First: 28 stmdb sp!, {r4 - r11, lr} 29 30 ldr r3, SQRT1_2 31 cmp r1, #0 32 33 VDUP.I32 Q15, r3 34 beq Radix8First_END 35 36Radix8First_LOOP: 37 VLD1.I32 {d0, d1, d2, d3}, [r0]! 38 VLD1.I32 {d8, d9, d10, d11}, [r0]! 39 40 VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@ 41 VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ 42 VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ 43 VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@ 44 VREV64.I32 d7, d7 45 46 VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3) 47 VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@ 48 49 VREV64.I32 d3, d3 50 51 VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@ 52 VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ 53 VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@ 54 VREV64.I32 d7, d7 55 VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@ 56 57 VTRN.32 d1, d3 58 59 VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@ 60 VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@ 61 62 VREV64.I32 d3, d3 63 64 VSHR.S32 d8, d8, #1 65 VSHR.S32 Q0, Q0, #1 66 VREV64.I32 d10, d10 67 VTRN.32 d11, d9 68 VSHR.S32 Q1, Q1, #1 69 VSHR.S32 d10, d10, #1 70 VREV64.I32 d9, d9 71 72 sub r0, r0, #0x40 73 74 VADD.S32 d12, d0, d8 75 VSUB.S32 d16, d0, d8 76 VADD.S32 d14, d2, d10 77 VSUB.S32 d18, d2, d10 78 79 VSUB.S32 d4, d11, d9 80 VADD.S32 d5, d11, d9 81 82 VREV64.I32 d18, d18 83 84 VQDMULH.S32 Q3, Q2, Q15 85 VTRN.32 d14, d18 86 VTRN.32 d6, d7 87 VREV64.I32 d18, d18 88 89 VSUB.S32 d15, d3, d6 90 VREV64.I32 d7, d7 91 VADD.S32 d19, d3, d6 92 VADD.S32 d13, d1, d7 93 VSUB.S32 d17, d1, d7 94 95 VREV64.I32 d17, d17 96 VTRN.32 d13, d17 97 VREV64.I32 d17, d17 98 99 subs r1, r1, #1 100 101 VST1.I32 {d12, d13, d14, d15}, [r0]! 102 VST1.I32 {d16, d17, d18, d19}, [r0]! 103 bne Radix8First_LOOP 104 105Radix8First_END: 106 ldmia sp!, {r4 - r11, pc} 107SQRT1_2: 108 .word 0x2d413ccd 109 110 @ENDP @ |Radix8First| 111 112 .section .text 113 .global Radix4First 114 115Radix4First: 116 stmdb sp!, {r4 - r11, lr} 117 118 cmp r1, #0 119 beq Radix4First_END 120 121Radix4First_LOOP: 122 VLD1.I32 {d0, d1, d2, d3}, [r0] 123 124 VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ 125 VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@ 126 VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@ 127 VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@ 128 129 VREV64.I32 d7, d7 @ 130 131 VADD.S32 Q4, Q2, Q3 132 VSUB.S32 Q5, Q2, Q3 133 134 VREV64.I32 d11, d11 135 VTRN.32 d9, d11 136 subs r1, r1, #1 137 VREV64.I32 d11, d11 138 VST1.I32 {d8, d9, d10, d11}, [r0]! 139 140 bne Radix4First_LOOP 141 142Radix4First_END: 143 ldmia sp!, {r4 - r11, pc} 144 145 @ENDP @ |Radix4First| 146 .end