1a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
2a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
4a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Use of this source code is governed by a BSD-style license
5a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ that can be found in the LICENSE file in the root of the source
6a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ tree. An additional intellectual property rights grant can be found
7a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ in the file PATENTS.  All contributing project authors may
8a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ be found in the AUTHORS file in the root of the source tree.
9a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
10a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
11a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Contains the core loop routine for the pitch filter function in iSAC,
12a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ optimized for ARMv7 platforms.
13a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@
14a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Output is bit-exact with the reference C code in pitch_filter.c.
15a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
16a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin#include "settings.h"
17a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
18a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.arch armv6
19a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.align  2
20a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.global WebRtcIsacfix_PitchFilterCore
21a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
22a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
23a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
24a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    WebRtc_Word16 gain,
25a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    int index,
26a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    WebRtc_Word16 sign,
27a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    WebRtc_Word16* inputState,
28a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    WebRtc_Word16* outputBuf2,
29a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    const WebRtc_Word16* coefficient,
30a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    WebRtc_Word16* inputBuf,
31a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    WebRtc_Word16* outputBuf,
32a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@                                    int* index2) {
33a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
34a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinWebRtcIsacfix_PitchFilterCore:
35a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fnstart
36a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  push {r4-r11}
37a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub sp, #8
38a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
39a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r0, [sp]                @ loopNumber
40a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r3, [sp, #4]            @ sign
41a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r3, [sp, #44]           @ outputBuf2
42a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r6, [sp, #60]           @ index2
43a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r7, [r6]                @ *index2
44a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r8, [sp, #52]           @ inputBuf
45a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r12, [sp, #56]          @ outputBuf
46a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
47a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r4, r7, r0
48a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r4, [r6]                @ Store return value to index2.
49a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
50a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mov r10, r7, asl #1
51a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r12, r10                @ &outputBuf[*index2]
52a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r8, r10                 @ &inputBuf[*index2]
53a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
54a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
55a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r6, r3, r4, lsl #1      @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
56a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r4, r2                  @ r2: index
57a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r4, #2                  @ *index2 + PITCH_BUFFSIZE - index - 2
58a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r3, r4, lsl #1          @ &ubufQQpos2[*index2]
59a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r9, [sp, #48]           @ coefficient
60a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
61a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinLOOP:
62a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@ Usage of registers in the loop:
63a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r0: loop counter
64a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r1: gain
65a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r2: tmpW32
66a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r3: &ubufQQpos2[]
67a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r6: &outputBuf2[]
68a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r8: &inputBuf[]
69a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r9: &coefficient[]
70a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r12: &outputBuf[]
71a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin@  r4, r5, r7, r10, r11: scratch
72a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
73a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Filter to get fractional pitch.
74a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ The pitch filter loop here is unrolled with 9 multipications.
75a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  pld [r3]
76a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r10, [r3], #4           @ ubufQQpos2[*index2 + 0, *index2 + 1]
77a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r4, [r9], #4            @ coefficient[0, 1]
78a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r11, [r3], #4
79a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r5, [r9], #4
80a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smuad r2, r10, r4
81a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smlad r2, r11, r5, r2
82a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
83a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r10, [r3], #4
84a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r4, [r9], #4
85a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r11, [r3], #4
86a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r5, [r9], #4
87a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smlad r2, r10, r4, r2
88a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrh r10, [r3], #-14        @ r3 back to &ubufQQpos2[*index2].
89a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrh  r4, [r9], #-16        @ r9 back to &coefficient[0].
90a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smlad r2, r11, r5, r2
91a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smlabb r2, r10, r4, r2
92a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
93a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Saturate to avoid overflow in tmpW16.
94a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  asr r2, #1
95a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r4, r2, #0x1000
96a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ssat r7, #16, r4, asr #13
97a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
98a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Shift low pass filter state, and excute the low pass filter.
99a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ The memmove() and the low pass filter loop are unrolled and mixed.
100a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smulbb r5, r1, r7
101a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r7, r5, #0x800
102a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  asr r7, #12                 @ Get the value for inputState[0].
103a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r11, [sp, #40]          @ inputState
104a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  pld [r11]
105a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  adr r10, kDampFilter
106a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrsh r4, [r10], #2         @ kDampFilter[0]
107a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  mul r2, r7, r4
108a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r4, [r11]               @ inputState[0, 1], before shift.
109a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  strh r7, [r11]              @ inputState[0], after shift.
110a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r5, [r11, #4]           @ inputState[2, 3], before shift.
111a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r7, [r10], #4           @ kDampFilter[1, 2]
112a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r10, [r10]              @ kDampFilter[3, 4]
113a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r4, [r11, #2]           @ inputState[1, 2], after shift.
114a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  str r5, [r11, #6]           @ inputState[3, 4], after shift.
115a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smlad r2, r4, r7, r2
116a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smlad r2, r5, r10, r2
117a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
118a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Saturate to avoid overflow.
119a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
120a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ to avoid overflow in the next saturation step.
121a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  asr r2, #1
122a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r10, r2, #0x2000
123a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ssat r10, #16, r10, asr #14
124a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
125a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  @ Subtract from input and update buffer.
126a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldr r11, [sp, #4]           @ sign
127a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrsh r4, [r8]
128a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ldrsh r7, [r8], #2          @ inputBuf[*index2]
129a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  smulbb r5, r11, r10
130a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  subs r0, #1
131a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  sub r4, r5
132a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ssat r2, #16, r4
133a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  strh  r2, [r12], #2         @ outputBuf[*index2]
134a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
135a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add r2, r7
136a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  ssat r2, #16, r2
137a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  strh  r2, [r6], #2          @ outputBuff2[*index2 + PITCH_BUFFSIZE]
138a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bgt LOOP
139a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
140a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  add sp, #8
141a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  pop {r4-r11}
142a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  bx  lr
143a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.fnend
144a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin
145a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin.align  2
146a6451827d543eb00824bc95097e47d0aac51ae93Alexander GutkinkDampFilter:
147a6451827d543eb00824bc95097e47d0aac51ae93Alexander Gutkin  .short  -2294, 8192, 20972, 8192, -2294
148