1/*
2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h"
12
13void WebRtcIsacfix_PitchFilterCore(int loopNumber,
14                                   int16_t gain,
15                                   int index,
16                                   int16_t sign,
17                                   int16_t* inputState,
18                                   int16_t* outputBuf2,
19                                   const int16_t* coefficient,
20                                   int16_t* inputBuf,
21                                   int16_t* outputBuf,
22                                   int* index2) {
23  int ind2t = *index2;
24  int i = 0;
25  int16_t* out2_pos2 = &outputBuf2[PITCH_BUFFSIZE - (index + 2)] + ind2t;
26  int32_t w1, w2, w3, w4, w5, gain32, sign32;
27  int32_t coef1, coef2, coef3, coef4, coef5 = 0;
28  // Define damp factors as int32_t (pair of int16_t)
29  int32_t kDampF0 = 0x0000F70A;
30  int32_t kDampF1 = 0x51EC2000;
31  int32_t kDampF2 = 0xF70A2000;
32  int16_t* input1 = inputBuf + ind2t;
33  int16_t* output1 = outputBuf + ind2t;
34  int16_t* output2 = outputBuf2 + ind2t + PITCH_BUFFSIZE;
35
36  // Load coefficients outside the loop and sign-extend gain and sign
37  __asm __volatile (
38    ".set     push                                        \n\t"
39    ".set     noreorder                                   \n\t"
40    "lwl      %[coef1],       3(%[coefficient])           \n\t"
41    "lwl      %[coef2],       7(%[coefficient])           \n\t"
42    "lwl      %[coef3],       11(%[coefficient])          \n\t"
43    "lwl      %[coef4],       15(%[coefficient])          \n\t"
44    "lwr      %[coef1],       0(%[coefficient])           \n\t"
45    "lwr      %[coef2],       4(%[coefficient])           \n\t"
46    "lwr      %[coef3],       8(%[coefficient])           \n\t"
47    "lwr      %[coef4],       12(%[coefficient])          \n\t"
48    "lhu      %[coef5],       16(%[coefficient])          \n\t"
49    "seh      %[gain32],      %[gain]                     \n\t"
50    "seh      %[sign32],      %[sign]                     \n\t"
51    ".set     pop                                         \n\t"
52    : [coef1] "=&r" (coef1), [coef2] "=&r" (coef2), [coef3] "=&r" (coef3),
53      [coef4] "=&r" (coef4), [coef5] "=&r" (coef5), [gain32] "=&r" (gain32),
54      [sign32] "=&r" (sign32)
55    : [coefficient] "r" (coefficient), [gain] "r" (gain),
56      [sign] "r" (sign)
57    : "memory"
58  );
59
60  for (i = 0; i < loopNumber; i++) {
61    __asm __volatile (
62      ".set       push                                            \n\t"
63      ".set       noreorder                                       \n\t"
64      // Filter to get fractional pitch
65      "li         %[w1],          8192                            \n\t"
66      "mtlo       %[w1]                                           \n\t"
67      "mthi       $0                                              \n\t"
68      "lwl        %[w1],          3(%[out2_pos2])                 \n\t"
69      "lwl        %[w2],          7(%[out2_pos2])                 \n\t"
70      "lwl        %[w3],          11(%[out2_pos2])                \n\t"
71      "lwl        %[w4],          15(%[out2_pos2])                \n\t"
72      "lwr        %[w1],          0(%[out2_pos2])                 \n\t"
73      "lwr        %[w2],          4(%[out2_pos2])                 \n\t"
74      "lwr        %[w3],          8(%[out2_pos2])                 \n\t"
75      "lwr        %[w4],          12(%[out2_pos2])                \n\t"
76      "lhu        %[w5],          16(%[out2_pos2])                \n\t"
77      "dpa.w.ph   $ac0,           %[w1],              %[coef1]    \n\t"
78      "dpa.w.ph   $ac0,           %[w2],              %[coef2]    \n\t"
79      "dpa.w.ph   $ac0,           %[w3],              %[coef3]    \n\t"
80      "dpa.w.ph   $ac0,           %[w4],              %[coef4]    \n\t"
81      "dpa.w.ph   $ac0,           %[w5],              %[coef5]    \n\t"
82      "addiu      %[out2_pos2],   %[out2_pos2],       2           \n\t"
83      "mthi       $0,             $ac1                            \n\t"
84      "lwl        %[w2],          3(%[inputState])                \n\t"
85      "lwl        %[w3],          7(%[inputState])                \n\t"
86      // Fractional pitch shift & saturation
87      "extr_s.h   %[w1],          $ac0,               14          \n\t"
88      "li         %[w4],          16384                           \n\t"
89      "lwr        %[w2],          0(%[inputState])                \n\t"
90      "lwr        %[w3],          4(%[inputState])                \n\t"
91      "mtlo       %[w4],          $ac1                            \n\t"
92      // Shift low pass filter state
93      "swl        %[w2],          5(%[inputState])                \n\t"
94      "swl        %[w3],          9(%[inputState])                \n\t"
95      "mul        %[w1],          %[gain32],          %[w1]       \n\t"
96      "swr        %[w2],          2(%[inputState])                \n\t"
97      "swr        %[w3],          6(%[inputState])                \n\t"
98      // Low pass filter accumulation
99      "dpa.w.ph   $ac1,           %[kDampF1],         %[w2]       \n\t"
100      "dpa.w.ph   $ac1,           %[kDampF2],         %[w3]       \n\t"
101      "lh         %[w4],          0(%[input1])                    \n\t"
102      "addiu      %[input1],      %[input1],          2           \n\t"
103      "shra_r.w   %[w1],          %[w1],              12          \n\t"
104      "sh         %[w1],          0(%[inputState])                \n\t"
105      "dpa.w.ph   $ac1,           %[kDampF0],         %[w1]       \n\t"
106      // Low pass filter shift & saturation
107      "extr_s.h   %[w2],          $ac1,               15          \n\t"
108      "mul        %[w2],          %[w2],              %[sign32]   \n\t"
109      // Buffer update
110      "subu       %[w2],          %[w4],              %[w2]       \n\t"
111      "shll_s.w   %[w2],          %[w2],              16          \n\t"
112      "sra        %[w2],          %[w2],              16          \n\t"
113      "sh         %[w2],          0(%[output1])                   \n\t"
114      "addu       %[w2],          %[w2],              %[w4]       \n\t"
115      "shll_s.w   %[w2],          %[w2],              16          \n\t"
116      "addiu      %[output1],     %[output1],         2           \n\t"
117      "sra        %[w2],          %[w2],              16          \n\t"
118      "sh         %[w2],          0(%[output2])                   \n\t"
119      "addiu      %[output2],     %[output2],         2           \n\t"
120      ".set       pop                                             \n\t"
121      : [w1] "=&r" (w1), [w2] "=&r" (w2), [w3] "=&r" (w3), [w4] "=&r" (w4),
122        [w5] "=&r" (w5), [input1] "+r" (input1), [out2_pos2] "+r" (out2_pos2),
123        [output1] "+r" (output1), [output2] "+r" (output2)
124      : [coefficient] "r" (coefficient), [inputState] "r" (inputState),
125        [gain32] "r" (gain32), [sign32] "r" (sign32), [kDampF0] "r" (kDampF0),
126        [kDampF1] "r" (kDampF1), [kDampF2] "r" (kDampF2),
127        [coef1] "r" (coef1), [coef2] "r" (coef2), [coef3] "r" (coef3),
128        [coef4] "r" (coef4), [coef5] "r" (coef5)
129      : "hi", "lo", "$ac1hi", "$ac1lo", "memory"
130    );
131  }
132  (*index2) += loopNumber;
133}
134