1a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
2a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
4a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Use of this source code is governed by a BSD-style license
5a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ that can be found in the LICENSE file in the root of the source
6a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ tree. An additional intellectual property rights grant can be found
7a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ in the file PATENTS.  All contributing project authors may
8a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ be found in the AUTHORS file in the root of the source tree.
9a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
10a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
11a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Contains a function for WebRtcIsacfix_CalculateResidualEnergyNeon() in
12a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ iSAC codec, optimized for ARM Neon platform. Reference code in
13a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ lpc_masking_model.c.
14a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
15a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.arch armv7-a
16a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fpu neon
17a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.global WebRtcIsacfix_CalculateResidualEnergyNeon
18a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.align  2
19a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
20a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ int32_t WebRtcIsacfix_CalculateResidualEnergyNeon(int lpc_order,
21a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                                   int32_t q_val_corr,
22a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                                   int q_val_polynomial,
23a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                                   int16_t* a_polynomial,
24a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                                   int32_t* corr_coeffs,
25a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                                   int* q_val_residual_energy);
26a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
27a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinWebRtcIsacfix_CalculateResidualEnergyNeon:
28a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fnstart
29a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.save {r4-r11}
30a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  push {r4-r11}
31a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
32a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r13, r13, #16
33a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r1, [r13, #8]
34a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r2, [r13, #12]
35a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
36a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r4, #1
37a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 q11, #0            @ Initialize shift_internal.
38a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 q13, #0            @ Initialize sum64.
39a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 q10, #0
40a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.u8 d20[0], r4          @ Set q10 to 1.
41a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
42a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r0, #0
43a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  blt POST_LOOP_I
44a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
45a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r9, r3, r0, asl #1      @ &a_polynomial[lpc_order]
46a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r6, #0                  @ Loop counter i.
47a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r11, [r13, #48]
48a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r10, r0, #1
49a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r7, r3                  @ &a_polynomial[0]
50a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r9, [r13, #4]
51a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
52a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinLOOP_I:
53a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r2, [r11], #4            @ corr_coeffs[i]
54a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 q15, #0            @ Initialize the sum64_tmp.
55a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vdup.s32 d25, r2
56a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
57a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r0, r6                  @ Compare lpc_order to i.
58a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  movle r2, r6
59a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ble POST_LOOP_J
60a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
61a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r1, r6                  @ j = i;
62a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r12, r7                  @ &a_polynomial[i]
63a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r4, r3                  @ &a_polynomial[j - i]
64a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
65a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinLOOP_J:
66a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r8, [r12], #4
67a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r5, [r4], #4
68a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.u32 d0[0], r8
69a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.u32 d1[0], r5
70a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmull.s16 q0, d0, d1
71a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmull.s32 q0, d0, d25
72a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r6, #0                  @ i == 0?
73a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshl.s64 q0, q11
74a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  beq SUM1
75a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshl.s64 q0, #1
76a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
77a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinSUM1:
78a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vqadd.s64 q14, q0, q15      @ Sum and test overflow.
79a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r1, r1, #2
80a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bvc MOV1                    @ Skip the shift if there's no overflow.
81a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 q0, #1
82a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 q15, #1
83a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vadd.s64 q14, q0, q15
84a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vsub.s64 q11, q10
85a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
86a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinMOV1:
87a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r0, r1                  @ Compare lpc_order to j.
88a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 q15, q14
89a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bgt LOOP_J
90a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
91a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bic r1, r10, #1
92a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r2, r6, #2
93a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r2, r1, r2
94a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
95a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinPOST_LOOP_J:
96a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vqadd.s64 q0, q13, q15      @ Sum and test overflow.
97a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bvc MOV2                    @ Skip the shift if there's no overflow.
98a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 q13, #1
99a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 q15, #1
100a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vadd.s64 q0, q13, q15
101a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vsub.s64 q11, q10
102a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
103a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinMOV2:
104a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 q13, q0            @ update sum64.
105a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r2, r0
106a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bne CHECK_LOOP_CONDITION
107a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
108a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Last sample in the inner loop.
109a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r4, [r13, #4]
110a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrsh r8, [r4]
111a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrsh r12, [r9]
112a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mul r8, r8, r12
113a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s32 d0[0], r8
114a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmull.s32 q0, d0, d25
115a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r6, #0                  @ i == 0?
116a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshl.s64 q0, q11
117a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  beq SUM2
118a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshl.s64 q0, #1
119a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
120a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinSUM2:
121a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vqadd.s64 d1, d0, d26       @ Sum and test overflow.
122a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bvc MOV3                    @ Skip the shift if there's no overflow.
123a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 q13, #1
124a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 d0, #1
125a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vadd.s64 d1, d0, d26
126a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vsub.s64 q11, q10
127a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
128a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinMOV3:
129a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s64 d26, d1            @ update sum64.
130a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
131a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinCHECK_LOOP_CONDITION:
132a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r6, r6, #1
133a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r9, r9, #2
134a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  cmp r0, r6                  @ Compare i to lpc_order.
135a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r10, r10, #1
136a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r7, r7, #2
137a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bge LOOP_I
138a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
139a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinPOST_LOOP_I:
140a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r3, #0
141a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vqadd.s64 d0, d26, d27      @ Sum and test overflow.
142a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bvc GET_SHIFT_NORM          @ Skip the shift if there's no overflow.
143a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshr.s64 q13, #1
144a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vadd.s64 d0, d26, d27
145a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vsub.s64 q11, q10
146a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
147a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinGET_SHIFT_NORM:
148a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vcls.s32 d1, d0             @ Count leading extra sign bits.
149a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.32 r2, d1[1]           @ Store # of sign bits of only the 32 MSBs.
150a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmovl.s32 q1, d1
151a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshl.s64 d0, d3             @ d3 contains # of sign bits of the 32 MSBs.
152a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
153a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vcls.s32 d1, d0             @ Count again the leading extra sign bits.
154a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s32 r1, d1[1]          @ Store # of sign bits of only the 32 MSBs.
155a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmovl.s32 q1, d1
156a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vshl.s64 d0, d3             @ d3 contains # of sign bits of the 32 MSBs.
157a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
158a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s32 r0, d0[1]          @ residual_energy
159a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  vmov.s32 r3, d22[0]         @ shift_internal
160a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
161a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Calculate the value for q_val_residual_energy.
162a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r4, [r13, #8]            @ q_val_corr
163a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r5, [r13, #12]           @ q_val_polynomial
164a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r12, r4, #32
165a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r12, r12, r5, asl #1
166a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r1, r12, r1              @ add 1st part of shift_internal.
167a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r12, r1, r2              @ add 2nd part of shift_internal.
168a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r2, [r13, #52]
169a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r3, r12, r3              @ value for q_val_residual_energy.
170a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r3, [r2, #0]
171a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
172a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r13, r13, #16
173a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  pop {r4-r11}
174a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bx  r14
175a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
176a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fnend
177a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
178