1@ 2@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3@ 4@ Use of this source code is governed by a BSD-style license 5@ that can be found in the LICENSE file in the root of the source 6@ tree. An additional intellectual property rights grant can be found 7@ in the file PATENTS. All contributing project authors may 8@ be found in the AUTHORS file in the root of the source tree. 9@ 10 11@ Contains the core loop routine for the pitch filter function in iSAC, 12@ optimized for ARMv7 platforms. 13@ 14@ Output is bit-exact with the reference C code in pitch_filter.c. 15 16#include "settings.h" 17 18.arch armv6 19.align 2 20.global WebRtcIsacfix_PitchFilterCore 21 22 23@ void WebRtcIsacfix_PitchFilterCore(int loopNumber, 24@ WebRtc_Word16 gain, 25@ int index, 26@ WebRtc_Word16 sign, 27@ WebRtc_Word16* inputState, 28@ WebRtc_Word16* outputBuf2, 29@ const WebRtc_Word16* coefficient, 30@ WebRtc_Word16* inputBuf, 31@ WebRtc_Word16* outputBuf, 32@ int* index2) { 33 34WebRtcIsacfix_PitchFilterCore: 35.fnstart 36 push {r4-r11} 37 sub sp, #8 38 39 str r0, [sp] @ loopNumber 40 str r3, [sp, #4] @ sign 41 ldr r3, [sp, #44] @ outputBuf2 42 ldr r6, [sp, #60] @ index2 43 ldr r7, [r6] @ *index2 44 ldr r8, [sp, #52] @ inputBuf 45 ldr r12, [sp, #56] @ outputBuf 46 47 add r4, r7, r0 48 str r4, [r6] @ Store return value to index2. 49 50 mov r10, r7, asl #1 51 add r12, r10 @ &outputBuf[*index2] 52 add r8, r10 @ &inputBuf[*index2] 53 54 add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE 55 add r6, r3, r4, lsl #1 @ &outputBuf2[*index2 + PITCH_BUFFSIZE] 56 sub r4, r2 @ r2: index 57 sub r4, #2 @ *index2 + PITCH_BUFFSIZE - index - 2 58 add r3, r4, lsl #1 @ &ubufQQpos2[*index2] 59 ldr r9, [sp, #48] @ coefficient 60 61LOOP: 62@ Usage of registers in the loop: 63@ r0: loop counter 64@ r1: gain 65@ r2: tmpW32 66@ r3: &ubufQQpos2[] 67@ r6: &outputBuf2[] 68@ r8: &inputBuf[] 69@ r9: &coefficient[] 70@ r12: &outputBuf[] 71@ r4, r5, r7, r10, r11: scratch 72 73 @ Filter to get fractional pitch. 74 @ The pitch filter loop here is unrolled with 9 multipications. 75 pld [r3] 76 ldr r10, [r3], #4 @ ubufQQpos2[*index2 + 0, *index2 + 1] 77 ldr r4, [r9], #4 @ coefficient[0, 1] 78 ldr r11, [r3], #4 79 ldr r5, [r9], #4 80 smuad r2, r10, r4 81 smlad r2, r11, r5, r2 82 83 ldr r10, [r3], #4 84 ldr r4, [r9], #4 85 ldr r11, [r3], #4 86 ldr r5, [r9], #4 87 smlad r2, r10, r4, r2 88 ldrh r10, [r3], #-14 @ r3 back to &ubufQQpos2[*index2]. 89 ldrh r4, [r9], #-16 @ r9 back to &coefficient[0]. 90 smlad r2, r11, r5, r2 91 smlabb r2, r10, r4, r2 92 93 @ Saturate to avoid overflow in tmpW16. 94 asr r2, #1 95 add r4, r2, #0x1000 96 ssat r7, #16, r4, asr #13 97 98 @ Shift low pass filter state, and excute the low pass filter. 99 @ The memmove() and the low pass filter loop are unrolled and mixed. 100 smulbb r5, r1, r7 101 add r7, r5, #0x800 102 asr r7, #12 @ Get the value for inputState[0]. 103 ldr r11, [sp, #40] @ inputState 104 pld [r11] 105 adr r10, kDampFilter 106 ldrsh r4, [r10], #2 @ kDampFilter[0] 107 mul r2, r7, r4 108 ldr r4, [r11] @ inputState[0, 1], before shift. 109 strh r7, [r11] @ inputState[0], after shift. 110 ldr r5, [r11, #4] @ inputState[2, 3], before shift. 111 ldr r7, [r10], #4 @ kDampFilter[1, 2] 112 ldr r10, [r10] @ kDampFilter[3, 4] 113 str r4, [r11, #2] @ inputState[1, 2], after shift. 114 str r5, [r11, #6] @ inputState[3, 4], after shift. 115 smlad r2, r4, r7, r2 116 smlad r2, r5, r10, r2 117 118 @ Saturate to avoid overflow. 119 @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF], 120 @ to avoid overflow in the next saturation step. 121 asr r2, #1 122 add r10, r2, #0x2000 123 ssat r10, #16, r10, asr #14 124 125 @ Subtract from input and update buffer. 126 ldr r11, [sp, #4] @ sign 127 ldrsh r4, [r8] 128 ldrsh r7, [r8], #2 @ inputBuf[*index2] 129 smulbb r5, r11, r10 130 subs r0, #1 131 sub r4, r5 132 ssat r2, #16, r4 133 strh r2, [r12], #2 @ outputBuf[*index2] 134 135 add r2, r7 136 ssat r2, #16, r2 137 strh r2, [r6], #2 @ outputBuff2[*index2 + PITCH_BUFFSIZE] 138 bgt LOOP 139 140 add sp, #8 141 pop {r4-r11} 142 bx lr 143.fnend 144 145.align 2 146kDampFilter: 147 .short -2294, 8192, 20972, 8192, -2294 148