1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2018 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http:@www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21 22.text 23.p2align 2 24 25 .global ixheaacd_esbr_cos_sin_mod_loop1 26ixheaacd_esbr_cos_sin_mod_loop1: 27 28 STMFD sp!, {r4-r12, r14} 29 VPUSH {D8-D11} 30@generating load addresses 31 ADD r4, r0, r1, lsl #3 @psubband1 32 SUB r4, r4, #4 33 ADD r5, r3, r1, lsl #3 @psubband1_t 34 SUB r5, r5, #8 35 MOV r6, r1, ASR #2 36 37LOOP1: 38@first part 39 vld1.32 {d0} , [r2]! 40 vrev64.32 d1, d0 41 vld1.32 {d2[0]}, [r0]! 42 ADD r7, r0, #252 43 vld1.32 {d2[1]}, [r7] 44 vld1.32 {d3[0]}, [r4] 45 ADD r7, r4, #256 46 vld1.32 {d3[1]}, [r7] 47 SUB r4, r4, #4 48 49 VMULL.S32 q2, d0, d2 @qsub 2nd 50 VMULL.S32 q3, d0, d3 @add 2nd 51 VMULL.S32 q4, d1, d2 @add 1st 52 VMULL.S32 q5, d1, d3 @qsub 1st 53 54 vadd.I64 q0, q4, q3 55 VQSUB.S64 Q1, Q5, Q2 56 57 VSHRN.I64 D0, Q0, #32 58 VSHRN.I64 D2, Q1, #32 59 VMOV.32 D3, D0 60 VST2.32 {D0[0], D2[0]}, [R3]! 61 ADD r7, r3, #248 62 VST2.32 {D2[1], D3[1]}, [R7] 63 64@second part 65 vld1.32 {d0} , [r2]! 66 vrev64.32 d1, d0 67 vld1.32 {d2[0]}, [r0]! 68 ADD R7, R0, #252 69 vld1.32 {d2[1]}, [r7] 70 vld1.32 {d3[0]}, [r4] 71 ADD R7, R4, #256 72 vld1.32 {d3[1]}, [r7] 73 SUB r4, r4, #4 74 75 VMULL.S32 q2, d0, d2 @add 2nd 76 VMULL.S32 q3, d0, d3 @sub 2nd 77 VMULL.S32 q4, d1, d2 @sub 1st 78 VMULL.S32 q5, d1, d3 @add 1st 79 80 VADD.I64 Q0, Q5, Q2 81 VQSUB.S64 Q1, Q4, Q3 82 83 VSHRN.I64 D0, Q0, #32 84 VSHRN.I64 D2, Q1, #32 85 VMOV.32 D3, D0 86 VST2.32 {D0[0], D2[0]}, [R5] 87 ADD R7, R5, #256 88 VST2.32 {D2[1], D3[1]}, [R7] 89 SUB r5, r5, #8 90@Third part 91 vld1.32 {d0} , [r2]! 92 vrev64.32 d1, d0 93 vld1.32 {d2[0]}, [r0]! 94 ADD r7, r0, #252 95 vld1.32 {d2[1]}, [r7] 96 vld1.32 {d3[0]}, [r4] 97 ADD r7, r4, #256 98 vld1.32 {d3[1]}, [r7] 99 SUB r4, r4, #4 100 101 VMULL.S32 q2, d0, d2 @qsub 2nd 102 VMULL.S32 q3, d0, d3 @add 2nd 103 VMULL.S32 q4, d1, d2 @add 1st 104 VMULL.S32 q5, d1, d3 @qsub 1st 105 106 vadd.I64 q0, q4, q3 107 VQSUB.S64 Q1, Q5, Q2 108 109 VSHRN.I64 D0, Q0, #32 110 VSHRN.I64 D2, Q1, #32 111 VMOV.32 D3, D0 112 VST2.32 {D0[0], D2[0]}, [R3]! 113 ADD r7, r3, #248 114 VST2.32 {D2[1], D3[1]}, [R7] 115 116@Fourth part 117 vld1.32 {d0} , [r2]! 118 vrev64.32 d1, d0 119 vld1.32 {d2[0]}, [r0]! 120 ADD R7, R0, #252 121 vld1.32 {d2[1]}, [r7] 122 vld1.32 {d3[0]}, [r4] 123 ADD R7, R4, #256 124 vld1.32 {d3[1]}, [r7] 125 SUB r4, r4, #4 126 127 VMULL.S32 q2, d0, d2 @add 2nd 128 VMULL.S32 q3, d0, d3 @sub 2nd 129 VMULL.S32 q4, d1, d2 @sub 1st 130 VMULL.S32 q5, d1, d3 @add 1st 131 132 VADD.I64 Q0, Q5, Q2 133 VQSUB.S64 Q1, Q4, Q3 134 135 VSHRN.I64 D0, Q0, #32 136 VSHRN.I64 D2, Q1, #32 137 VMOV.32 D3, D0 138 VST2.32 {D0[0], D2[0]}, [R5] 139 ADD R7, R5, #256 140 SUBS R6, R6, #1 141 VST2.32 {D2[1], D3[1]}, [R7] 142 SUB r5, r5, #8 143 144 BGT LOOP1 145 VPOP {D8-D11} 146 LDMFD sp!, {r4-r12, r15} 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173