1b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
2b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
4b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Use of this source code is governed by a BSD-style license
5b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ that can be found in the LICENSE file in the root of the source
6b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ tree. An additional intellectual property rights grant can be found
7b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ in the file PATENTS.  All contributing project authors may
8b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ be found in the AUTHORS file in the root of the source tree.
9b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
10b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
11b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Contains the core loop routine for the pitch filter function in iSAC,
12b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ optimized for ARMv7 platforms.
13b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@
14b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Output is bit-exact with the reference C code in pitch_filter.c.
15b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
16b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.org#include "webrtc/system_wrappers/interface/asm_defines.h"
17b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org#include "settings.h"
18b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
19b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgGLOBAL_FUNCTION WebRtcIsacfix_PitchFilterCore
20b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org.align  2
21b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
22b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
23fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    int16_t gain,
24b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@                                    int index,
25fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    int16_t sign,
26fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    int16_t* inputState,
27fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    int16_t* outputBuf2,
28fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    const int16_t* coefficient,
29fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    int16_t* inputBuf,
30fbda0fcf2f9e82c82bcaac138f44d4e5144f6e0dpbos@webrtc.org@                                    int16_t* outputBuf,
31b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@                                    int* index2) {
32b238acaca55b4b345f0e37b82f8bbd9851c8bb6dkma@webrtc.orgDEFINE_FUNCTION WebRtcIsacfix_PitchFilterCore
33b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  push {r4-r11}
34b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  sub sp, #8
35b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
36b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  str r0, [sp]                @ loopNumber
37b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  str r3, [sp, #4]            @ sign
38b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r3, [sp, #44]           @ outputBuf2
39b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r6, [sp, #60]           @ index2
40b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r7, [r6]                @ *index2
41b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r8, [sp, #52]           @ inputBuf
42b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r12, [sp, #56]          @ outputBuf
43b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
44b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r4, r7, r0
45b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  str r4, [r6]                @ Store return value to index2.
46b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
47b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  mov r10, r7, asl #1
48b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r12, r10                @ &outputBuf[*index2]
49b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r8, r10                 @ &inputBuf[*index2]
50b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
51b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
52b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r6, r3, r4, lsl #1      @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
53b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  sub r4, r2                  @ r2: index
54b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  sub r4, #2                  @ *index2 + PITCH_BUFFSIZE - index - 2
55b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r3, r4, lsl #1          @ &ubufQQpos2[*index2]
56b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r9, [sp, #48]           @ coefficient
57b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
58b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgLOOP:
59b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@ Usage of registers in the loop:
60b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r0: loop counter
61b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r1: gain
62b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r2: tmpW32
63b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r3: &ubufQQpos2[]
64b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r6: &outputBuf2[]
65b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r8: &inputBuf[]
66b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r9: &coefficient[]
67b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r12: &outputBuf[]
68b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org@  r4, r5, r7, r10, r11: scratch
69b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
70b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ Filter to get fractional pitch.
71b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ The pitch filter loop here is unrolled with 9 multipications.
72b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  pld [r3]
73b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r10, [r3], #4           @ ubufQQpos2[*index2 + 0, *index2 + 1]
74b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r4, [r9], #4            @ coefficient[0, 1]
75b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r11, [r3], #4
76b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r5, [r9], #4
77b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smuad r2, r10, r4
78b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smlad r2, r11, r5, r2
79b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
80b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r10, [r3], #4
81b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r4, [r9], #4
82b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r11, [r3], #4
83b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r5, [r9], #4
84b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smlad r2, r10, r4, r2
85b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrh r10, [r3], #-14        @ r3 back to &ubufQQpos2[*index2].
86b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrh  r4, [r9], #-16        @ r9 back to &coefficient[0].
87b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smlad r2, r11, r5, r2
88b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smlabb r2, r10, r4, r2
89b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
90b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ Saturate to avoid overflow in tmpW16.
91b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  asr r2, #1
92b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r4, r2, #0x1000
93b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ssat r7, #16, r4, asr #13
94b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
95b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ Shift low pass filter state, and excute the low pass filter.
96b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ The memmove() and the low pass filter loop are unrolled and mixed.
97b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smulbb r5, r1, r7
98b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r7, r5, #0x800
99b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  asr r7, #12                 @ Get the value for inputState[0].
100b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r11, [sp, #40]          @ inputState
101b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  pld [r11]
102b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  adr r10, kDampFilter
103b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrsh r4, [r10], #2         @ kDampFilter[0]
104b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  mul r2, r7, r4
105b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r4, [r11]               @ inputState[0, 1], before shift.
106b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  strh r7, [r11]              @ inputState[0], after shift.
107b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r5, [r11, #4]           @ inputState[2, 3], before shift.
108b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r7, [r10], #4           @ kDampFilter[1, 2]
109b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r10, [r10]              @ kDampFilter[3, 4]
110b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  str r4, [r11, #2]           @ inputState[1, 2], after shift.
111b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  str r5, [r11, #6]           @ inputState[3, 4], after shift.
112b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smlad r2, r4, r7, r2
113b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smlad r2, r5, r10, r2
114b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
115b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ Saturate to avoid overflow.
116b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
117b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ to avoid overflow in the next saturation step.
118b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  asr r2, #1
119b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r10, r2, #0x2000
120b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ssat r10, #16, r10, asr #14
121b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
122b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  @ Subtract from input and update buffer.
123b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldr r11, [sp, #4]           @ sign
124b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrsh r4, [r8]
125b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ldrsh r7, [r8], #2          @ inputBuf[*index2]
126b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  smulbb r5, r11, r10
127b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  subs r0, #1
128b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  sub r4, r5
129b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ssat r2, #16, r4
130b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  strh  r2, [r12], #2         @ outputBuf[*index2]
131b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
132b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add r2, r7
133b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  ssat r2, #16, r2
134b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  strh  r2, [r6], #2          @ outputBuff2[*index2 + PITCH_BUFFSIZE]
135b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  bgt LOOP
136b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
137b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  add sp, #8
138b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  pop {r4-r11}
139b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  bx  lr
140b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org
141b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org.align  2
142b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.orgkDampFilter:
143b015cbede88899f67a53fbbe581b02ce8e32794andrew@webrtc.org  .short  -2294, 8192, 20972, 8192, -2294
144