1a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ 2a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ 4a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Use of this source code is governed by a BSD-style license 5a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ that can be found in the LICENSE file in the root of the source 6a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ tree. An additional intellectual property rights grant can be found 7a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ in the file PATENTS. All contributing project authors may 8a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ be found in the AUTHORS file in the root of the source tree. 9a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ 10a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 11a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Contains a function for WebRtcIsacfix_CalculateResidualEnergyNeon() in 12a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ iSAC codec, optimized for ARM Neon platform. Reference code in 13a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ lpc_masking_model.c. 14a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 15a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.arch armv7-a 16a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fpu neon 17a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.global WebRtcIsacfix_CalculateResidualEnergyNeon 18a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.align 2 19a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 20a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int32_t WebRtcIsacfix_CalculateResidualEnergyNeon(int lpc_order, 21a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int32_t q_val_corr, 22a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int q_val_polynomial, 23a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int16_t* a_polynomial, 24a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int32_t* corr_coeffs, 25a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int* q_val_residual_energy); 26a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 27a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinWebRtcIsacfix_CalculateResidualEnergyNeon: 28a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fnstart 29a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.save {r4-r11} 30a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin push {r4-r11} 31a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 32a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin sub r13, r13, #16 33a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin str r1, [r13, #8] 34a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin str r2, [r13, #12] 35a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 36a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r4, #1 37a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 q11, #0 @ Initialize shift_internal. 38a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 q13, #0 @ Initialize sum64. 39a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 q10, #0 40a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.u8 d20[0], r4 @ Set q10 to 1. 41a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 42a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r0, #0 43a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin blt POST_LOOP_I 44a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 45a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r9, r3, r0, asl #1 @ &a_polynomial[lpc_order] 46a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r6, #0 @ Loop counter i. 47a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r11, [r13, #48] 48a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin sub r10, r0, #1 49a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r7, r3 @ &a_polynomial[0] 50a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin str r9, [r13, #4] 51a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 52a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinLOOP_I: 53a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r2, [r11], #4 @ corr_coeffs[i] 54a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 q15, #0 @ Initialize the sum64_tmp. 55a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vdup.s32 d25, r2 56a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 57a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r0, r6 @ Compare lpc_order to i. 58a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin movle r2, r6 59a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ble POST_LOOP_J 60a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 61a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r1, r6 @ j = i; 62a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r12, r7 @ &a_polynomial[i] 63a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r4, r3 @ &a_polynomial[j - i] 64a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 65a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinLOOP_J: 66a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r8, [r12], #4 67a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r5, [r4], #4 68a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.u32 d0[0], r8 69a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.u32 d1[0], r5 70a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmull.s16 q0, d0, d1 71a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmull.s32 q0, d0, d25 72a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r6, #0 @ i == 0? 73a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshl.s64 q0, q11 74a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin beq SUM1 75a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshl.s64 q0, #1 76a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 77a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinSUM1: 78a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vqadd.s64 q14, q0, q15 @ Sum and test overflow. 79a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r1, r1, #2 80a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bvc MOV1 @ Skip the shift if there's no overflow. 81a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 q0, #1 82a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 q15, #1 83a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vadd.s64 q14, q0, q15 84a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vsub.s64 q11, q10 85a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 86a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinMOV1: 87a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r0, r1 @ Compare lpc_order to j. 88a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 q15, q14 89a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bgt LOOP_J 90a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 91a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bic r1, r10, #1 92a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r2, r6, #2 93a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r2, r1, r2 94a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 95a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinPOST_LOOP_J: 96a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vqadd.s64 q0, q13, q15 @ Sum and test overflow. 97a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bvc MOV2 @ Skip the shift if there's no overflow. 98a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 q13, #1 99a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 q15, #1 100a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vadd.s64 q0, q13, q15 101a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vsub.s64 q11, q10 102a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 103a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinMOV2: 104a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 q13, q0 @ update sum64. 105a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r2, r0 106a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bne CHECK_LOOP_CONDITION 107a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 108a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin @ Last sample in the inner loop. 109a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r4, [r13, #4] 110a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldrsh r8, [r4] 111a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldrsh r12, [r9] 112a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mul r8, r8, r12 113a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s32 d0[0], r8 114a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmull.s32 q0, d0, d25 115a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r6, #0 @ i == 0? 116a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshl.s64 q0, q11 117a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin beq SUM2 118a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshl.s64 q0, #1 119a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 120a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinSUM2: 121a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vqadd.s64 d1, d0, d26 @ Sum and test overflow. 122a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bvc MOV3 @ Skip the shift if there's no overflow. 123a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 q13, #1 124a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 d0, #1 125a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vadd.s64 d1, d0, d26 126a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vsub.s64 q11, q10 127a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 128a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinMOV3: 129a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s64 d26, d1 @ update sum64. 130a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 131a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinCHECK_LOOP_CONDITION: 132a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r6, r6, #1 133a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin sub r9, r9, #2 134a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin cmp r0, r6 @ Compare i to lpc_order. 135a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin sub r10, r10, #1 136a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r7, r7, #2 137a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bge LOOP_I 138a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 139a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinPOST_LOOP_I: 140a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin mov r3, #0 141a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vqadd.s64 d0, d26, d27 @ Sum and test overflow. 142a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bvc GET_SHIFT_NORM @ Skip the shift if there's no overflow. 143a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshr.s64 q13, #1 144a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vadd.s64 d0, d26, d27 145a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vsub.s64 q11, q10 146a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 147a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinGET_SHIFT_NORM: 148a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vcls.s32 d1, d0 @ Count leading extra sign bits. 149a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.32 r2, d1[1] @ Store # of sign bits of only the 32 MSBs. 150a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmovl.s32 q1, d1 151a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshl.s64 d0, d3 @ d3 contains # of sign bits of the 32 MSBs. 152a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 153a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vcls.s32 d1, d0 @ Count again the leading extra sign bits. 154a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s32 r1, d1[1] @ Store # of sign bits of only the 32 MSBs. 155a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmovl.s32 q1, d1 156a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vshl.s64 d0, d3 @ d3 contains # of sign bits of the 32 MSBs. 157a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 158a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s32 r0, d0[1] @ residual_energy 159a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin vmov.s32 r3, d22[0] @ shift_internal 160a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 161a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin @ Calculate the value for q_val_residual_energy. 162a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r4, [r13, #8] @ q_val_corr 163a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r5, [r13, #12] @ q_val_polynomial 164a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin sub r12, r4, #32 165a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r12, r12, r5, asl #1 166a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r1, r12, r1 @ add 1st part of shift_internal. 167a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r12, r1, r2 @ add 2nd part of shift_internal. 168a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin ldr r2, [r13, #52] 169a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r3, r12, r3 @ value for q_val_residual_energy. 170a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin str r3, [r2, #0] 171a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 172a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin add r13, r13, #16 173a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin pop {r4-r11} 174a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin bx r14 175a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 176a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fnend 177a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin 178