1@
2@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3@
4@ Use of this source code is governed by a BSD-style license
5@ that can be found in the LICENSE file in the root of the source
6@ tree. An additional intellectual property rights grant can be found
7@ in the file PATENTS.  All contributing project authors may
8@ be found in the AUTHORS file in the root of the source tree.
9@
10
11@ Contains the core loop routine for the pitch filter function in iSAC,
12@ optimized for ARMv7 platforms.
13@
14@ Output is bit-exact with the reference C code in pitch_filter.c.
15
16#include "settings.h"
17
18.arch armv6
19.align  2
20.global WebRtcIsacfix_PitchFilterCore
21
22
23@ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
24@                                    WebRtc_Word16 gain,
25@                                    int index,
26@                                    WebRtc_Word16 sign,
27@                                    WebRtc_Word16* inputState,
28@                                    WebRtc_Word16* outputBuf2,
29@                                    const WebRtc_Word16* coefficient,
30@                                    WebRtc_Word16* inputBuf,
31@                                    WebRtc_Word16* outputBuf,
32@                                    int* index2) {
33
34WebRtcIsacfix_PitchFilterCore:
35.fnstart
36  push {r4-r11}
37  sub sp, #8
38
39  str r0, [sp]                @ loopNumber
40  str r3, [sp, #4]            @ sign
41  ldr r3, [sp, #44]           @ outputBuf2
42  ldr r6, [sp, #60]           @ index2
43  ldr r7, [r6]                @ *index2
44  ldr r8, [sp, #52]           @ inputBuf
45  ldr r12, [sp, #56]          @ outputBuf
46
47  add r4, r7, r0
48  str r4, [r6]                @ Store return value to index2.
49
50  mov r10, r7, asl #1
51  add r12, r10                @ &outputBuf[*index2]
52  add r8, r10                 @ &inputBuf[*index2]
53
54  add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
55  add r6, r3, r4, lsl #1      @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
56  sub r4, r2                  @ r2: index
57  sub r4, #2                  @ *index2 + PITCH_BUFFSIZE - index - 2
58  add r3, r4, lsl #1          @ &ubufQQpos2[*index2]
59  ldr r9, [sp, #48]           @ coefficient
60
61LOOP:
62@ Usage of registers in the loop:
63@  r0: loop counter
64@  r1: gain
65@  r2: tmpW32
66@  r3: &ubufQQpos2[]
67@  r6: &outputBuf2[]
68@  r8: &inputBuf[]
69@  r9: &coefficient[]
70@  r12: &outputBuf[]
71@  r4, r5, r7, r10, r11: scratch
72
73  @ Filter to get fractional pitch.
74  @ The pitch filter loop here is unrolled with 9 multipications.
75  pld [r3]
76  ldr r10, [r3], #4           @ ubufQQpos2[*index2 + 0, *index2 + 1]
77  ldr r4, [r9], #4            @ coefficient[0, 1]
78  ldr r11, [r3], #4
79  ldr r5, [r9], #4
80  smuad r2, r10, r4
81  smlad r2, r11, r5, r2
82
83  ldr r10, [r3], #4
84  ldr r4, [r9], #4
85  ldr r11, [r3], #4
86  ldr r5, [r9], #4
87  smlad r2, r10, r4, r2
88  ldrh r10, [r3], #-14        @ r3 back to &ubufQQpos2[*index2].
89  ldrh  r4, [r9], #-16        @ r9 back to &coefficient[0].
90  smlad r2, r11, r5, r2
91  smlabb r2, r10, r4, r2
92
93  @ Saturate to avoid overflow in tmpW16.
94  asr r2, #1
95  add r4, r2, #0x1000
96  ssat r7, #16, r4, asr #13
97
98  @ Shift low pass filter state, and excute the low pass filter.
99  @ The memmove() and the low pass filter loop are unrolled and mixed.
100  smulbb r5, r1, r7
101  add r7, r5, #0x800
102  asr r7, #12                 @ Get the value for inputState[0].
103  ldr r11, [sp, #40]          @ inputState
104  pld [r11]
105  adr r10, kDampFilter
106  ldrsh r4, [r10], #2         @ kDampFilter[0]
107  mul r2, r7, r4
108  ldr r4, [r11]               @ inputState[0, 1], before shift.
109  strh r7, [r11]              @ inputState[0], after shift.
110  ldr r5, [r11, #4]           @ inputState[2, 3], before shift.
111  ldr r7, [r10], #4           @ kDampFilter[1, 2]
112  ldr r10, [r10]              @ kDampFilter[3, 4]
113  str r4, [r11, #2]           @ inputState[1, 2], after shift.
114  str r5, [r11, #6]           @ inputState[3, 4], after shift.
115  smlad r2, r4, r7, r2
116  smlad r2, r5, r10, r2
117
118  @ Saturate to avoid overflow.
119  @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
120  @ to avoid overflow in the next saturation step.
121  asr r2, #1
122  add r10, r2, #0x2000
123  ssat r10, #16, r10, asr #14
124
125  @ Subtract from input and update buffer.
126  ldr r11, [sp, #4]           @ sign
127  ldrsh r4, [r8]
128  ldrsh r7, [r8], #2          @ inputBuf[*index2]
129  smulbb r5, r11, r10
130  subs r0, #1
131  sub r4, r5
132  ssat r2, #16, r4
133  strh  r2, [r12], #2         @ outputBuf[*index2]
134
135  add r2, r7
136  ssat r2, #16, r2
137  strh  r2, [r6], #2          @ outputBuff2[*index2 + PITCH_BUFFSIZE]
138  bgt LOOP
139
140  add sp, #8
141  pop {r4-r11}
142  bx  lr
143.fnend
144
145.align  2
146kDampFilter:
147  .short  -2294, 8192, 20972, 8192, -2294
148