1/*
2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11/*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15#include "webrtc/modules/audio_processing/aec/aec_core.h"
16
17#include <math.h>
18
19#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
20#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
21#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
22
23extern const float WebRtcAec_weightCurve[65];
24extern const float WebRtcAec_overDriveCurve[65];
25
26void WebRtcAec_ComfortNoise_mips(AecCore* aec,
27                                 float efw[2][PART_LEN1],
28                                 float comfortNoiseHband[2][PART_LEN1],
29                                 const float* noisePow,
30                                 const float* lambda) {
31  int i, num;
32  float rand[PART_LEN];
33  float noise, noiseAvg, tmp, tmpAvg;
34  int16_t randW16[PART_LEN];
35  complex_t u[PART_LEN1];
36
37  const float pi2 = 6.28318530717959f;
38  const float pi2t = pi2 / 32768;
39
40  // Generate a uniform random array on [0 1]
41  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
42
43  int16_t* randWptr = randW16;
44  float randTemp, randTemp2, randTemp3, randTemp4;
45  int32_t tmp1s, tmp2s, tmp3s, tmp4s;
46
47  for (i = 0; i < PART_LEN; i+=4) {
48    __asm __volatile (
49      ".set     push                                           \n\t"
50      ".set     noreorder                                      \n\t"
51      "lh       %[tmp1s],       0(%[randWptr])                 \n\t"
52      "lh       %[tmp2s],       2(%[randWptr])                 \n\t"
53      "lh       %[tmp3s],       4(%[randWptr])                 \n\t"
54      "lh       %[tmp4s],       6(%[randWptr])                 \n\t"
55      "mtc1     %[tmp1s],       %[randTemp]                    \n\t"
56      "mtc1     %[tmp2s],       %[randTemp2]                   \n\t"
57      "mtc1     %[tmp3s],       %[randTemp3]                   \n\t"
58      "mtc1     %[tmp4s],       %[randTemp4]                   \n\t"
59      "cvt.s.w  %[randTemp],    %[randTemp]                    \n\t"
60      "cvt.s.w  %[randTemp2],   %[randTemp2]                   \n\t"
61      "cvt.s.w  %[randTemp3],   %[randTemp3]                   \n\t"
62      "cvt.s.w  %[randTemp4],   %[randTemp4]                   \n\t"
63      "addiu    %[randWptr],    %[randWptr],      8            \n\t"
64      "mul.s    %[randTemp],    %[randTemp],      %[pi2t]      \n\t"
65      "mul.s    %[randTemp2],   %[randTemp2],     %[pi2t]      \n\t"
66      "mul.s    %[randTemp3],   %[randTemp3],     %[pi2t]      \n\t"
67      "mul.s    %[randTemp4],   %[randTemp4],     %[pi2t]      \n\t"
68      ".set     pop                                            \n\t"
69      : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
70        [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
71        [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
72        [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
73        [tmp4s] "=&r" (tmp4s)
74      : [pi2t] "f" (pi2t)
75      : "memory"
76    );
77
78    u[i+1][0] = cosf(randTemp);
79    u[i+1][1] = sinf(randTemp);
80    u[i+2][0] = cosf(randTemp2);
81    u[i+2][1] = sinf(randTemp2);
82    u[i+3][0] = cosf(randTemp3);
83    u[i+3][1] = sinf(randTemp3);
84    u[i+4][0] = cosf(randTemp4);
85    u[i+4][1] = sinf(randTemp4);
86  }
87
88  // Reject LF noise
89  float* u_ptr = &u[1][0];
90  float noise2, noise3, noise4;
91  float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
92
93  u[0][0] = 0;
94  u[0][1] = 0;
95  for (i = 1; i < PART_LEN1; i+=4) {
96    __asm __volatile (
97      ".set     push                                            \n\t"
98      ".set     noreorder                                       \n\t"
99      "lwc1     %[noise],       4(%[noisePow])                  \n\t"
100      "lwc1     %[noise2],      8(%[noisePow])                  \n\t"
101      "lwc1     %[noise3],      12(%[noisePow])                 \n\t"
102      "lwc1     %[noise4],      16(%[noisePow])                 \n\t"
103      "sqrt.s   %[noise],       %[noise]                        \n\t"
104      "sqrt.s   %[noise2],      %[noise2]                       \n\t"
105      "sqrt.s   %[noise3],      %[noise3]                       \n\t"
106      "sqrt.s   %[noise4],      %[noise4]                       \n\t"
107      "lwc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
108      "lwc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
109      "lwc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
110      "lwc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
111      "lwc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
112      "lwc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
113      "lwc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
114      "lwc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
115      "addiu    %[noisePow],    %[noisePow],      16            \n\t"
116      "mul.s    %[tmp1f],       %[tmp1f],         %[noise]      \n\t"
117      "mul.s    %[tmp2f],       %[tmp2f],         %[noise]      \n\t"
118      "mul.s    %[tmp3f],       %[tmp3f],         %[noise2]     \n\t"
119      "mul.s    %[tmp4f],       %[tmp4f],         %[noise2]     \n\t"
120      "mul.s    %[tmp5f],       %[tmp5f],         %[noise3]     \n\t"
121      "mul.s    %[tmp6f],       %[tmp6f],         %[noise3]     \n\t"
122      "swc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
123      "swc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
124      "mul.s    %[tmp8f],       %[tmp8f],         %[noise4]     \n\t"
125      "mul.s    %[tmp7f],       %[tmp7f],         %[noise4]     \n\t"
126      "neg.s    %[tmp2f]                                        \n\t"
127      "neg.s    %[tmp4f]                                        \n\t"
128      "neg.s    %[tmp6f]                                        \n\t"
129      "neg.s    %[tmp8f]                                        \n\t"
130      "swc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
131      "swc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
132      "swc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
133      "swc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
134      "swc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
135      "swc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
136      "addiu    %[u_ptr],       %[u_ptr],         32            \n\t"
137      ".set     pop                                             \n\t"
138      : [u_ptr] "+r" (u_ptr),  [noisePow] "+r" (noisePow),
139        [noise] "=&f" (noise), [noise2] "=&f" (noise2),
140        [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
141        [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
142        [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
143        [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
144        [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
145      :
146      : "memory"
147    );
148  }
149  u[PART_LEN][1] = 0;
150  noisePow -= PART_LEN;
151
152  u_ptr = &u[0][0];
153  float* u_ptr_end = &u[PART_LEN][0];
154  float* efw_ptr_0 = &efw[0][0];
155  float* efw_ptr_1 = &efw[1][0];
156  float tmp9f, tmp10f;
157  const float tmp1c = 1.0;
158
159  __asm __volatile (
160    ".set     push                                                        \n\t"
161    ".set     noreorder                                                   \n\t"
162   "1:                                                                    \n\t"
163    "lwc1     %[tmp1f],       0(%[lambda])                                \n\t"
164    "lwc1     %[tmp6f],       4(%[lambda])                                \n\t"
165    "addiu    %[lambda],      %[lambda],        8                         \n\t"
166    "c.lt.s   %[tmp1f],       %[tmp1c]                                    \n\t"
167    "bc1f     4f                                                          \n\t"
168    " nop                                                                 \n\t"
169    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
170    "bc1f     3f                                                          \n\t"
171    " nop                                                                 \n\t"
172   "2:                                                                    \n\t"
173    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
174    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
175    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
176    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
177    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
178    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
179    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
180    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
181    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
182    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
183    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
184    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
185    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
186    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
187#if !defined(MIPS32_R2_LE)
188    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
189    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
190    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
191    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
192    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
193    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
194    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
195    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
196#else // #if !defined(MIPS32_R2_LE)
197    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
198    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
199    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
200    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
201#endif // #if !defined(MIPS32_R2_LE)
202    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
203    "swc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
204    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
205    "b        5f                                                          \n\t"
206    " swc1    %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
207   "3:                                                                    \n\t"
208    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
209    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
210    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
211    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
212    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
213    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
214    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
215#if !defined(MIPS32_R2_LE)
216    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
217    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
218    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
219    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
220#else // #if !defined(MIPS32_R2_LE)
221    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
222    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
223#endif // #if !defined(MIPS32_R2_LE)
224    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
225    "b        5f                                                          \n\t"
226    " swc1    %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
227   "4:                                                                    \n\t"
228    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
229    "bc1f     5f                                                          \n\t"
230    " nop                                                                 \n\t"
231    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
232    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
233    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
234    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
235    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
236    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
237    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
238#if !defined(MIPS32_R2_LE)
239    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
240    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
241    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
242    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
243#else // #if !defined(MIPS32_R2_LE)
244    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
245    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
246#endif // #if !defined(MIPS32_R2_LE)
247    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
248    "swc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
249   "5:                                                                    \n\t"
250    "addiu    %[u_ptr],       %[u_ptr],         16                        \n\t"
251    "addiu    %[efw_ptr_0],   %[efw_ptr_0],     8                         \n\t"
252    "bne      %[u_ptr],       %[u_ptr_end],     1b                        \n\t"
253    " addiu   %[efw_ptr_1],   %[efw_ptr_1],     8                         \n\t"
254    ".set     pop                                                         \n\t"
255    : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
256      [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
257      [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
258      [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
259      [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
260      [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
261    : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
262    : "memory"
263  );
264
265  lambda -= PART_LEN;
266  tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
267  //tmp = 1 - lambda[i];
268  efw[0][PART_LEN] += tmp * u[PART_LEN][0];
269  efw[1][PART_LEN] += tmp * u[PART_LEN][1];
270
271  // For H band comfort noise
272  // TODO: don't compute noise and "tmp" twice. Use the previous results.
273  noiseAvg = 0.0;
274  tmpAvg = 0.0;
275  num = 0;
276  if (aec->num_bands > 1) {
277    for (i = 0; i < PART_LEN; i++) {
278      rand[i] = ((float)randW16[i]) / 32768;
279    }
280
281    // average noise scale
282    // average over second half of freq spectrum (i.e., 4->8khz)
283    // TODO: we shouldn't need num. We know how many elements we're summing.
284    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
285      num++;
286      noiseAvg += sqrtf(noisePow[i]);
287    }
288    noiseAvg /= (float)num;
289
290    // average nlp scale
291    // average over second half of freq spectrum (i.e., 4->8khz)
292    // TODO: we shouldn't need num. We know how many elements we're summing.
293    num = 0;
294    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
295      num++;
296      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
297    }
298    tmpAvg /= (float)num;
299
300    // Use average noise for H band
301    // TODO: we should probably have a new random vector here.
302    // Reject LF noise
303    u[0][0] = 0;
304    u[0][1] = 0;
305    for (i = 1; i < PART_LEN1; i++) {
306      tmp = pi2 * rand[i - 1];
307
308      // Use average noise for H band
309      u[i][0] = noiseAvg * (float)cos(tmp);
310      u[i][1] = -noiseAvg * (float)sin(tmp);
311    }
312    u[PART_LEN][1] = 0;
313
314    for (i = 0; i < PART_LEN1; i++) {
315      // Use average NLP weight for H band
316      comfortNoiseHband[0][i] = tmpAvg * u[i][0];
317      comfortNoiseHband[1][i] = tmpAvg * u[i][1];
318    }
319  } else {
320    memset(comfortNoiseHband, 0,
321           2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
322  }
323}
324
325void WebRtcAec_FilterFar_mips(
326    int num_partitions,
327    int x_fft_buf_block_pos,
328    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
329    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
330    float y_fft[2][PART_LEN1]) {
331  int i;
332  for (i = 0; i < num_partitions; i++) {
333    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
334    int pos = i * PART_LEN1;
335    // Check for wrap
336    if (i + x_fft_buf_block_pos >=  num_partitions) {
337      xPos -=  num_partitions * (PART_LEN1);
338    }
339    float* yf0 = y_fft[0];
340    float* yf1 = y_fft[1];
341    float* aRe = x_fft_buf[0] + xPos;
342    float* aIm = x_fft_buf[1] + xPos;
343    float* bRe = h_fft_buf[0] + pos;
344    float* bIm = h_fft_buf[1] + pos;
345    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
346    int len = PART_LEN1 >> 1;
347
348    __asm __volatile (
349      ".set       push                                                \n\t"
350      ".set       noreorder                                           \n\t"
351     "1:                                                              \n\t"
352      "lwc1       %[f0],      0(%[aRe])                               \n\t"
353      "lwc1       %[f1],      0(%[bRe])                               \n\t"
354      "lwc1       %[f2],      0(%[bIm])                               \n\t"
355      "lwc1       %[f3],      0(%[aIm])                               \n\t"
356      "lwc1       %[f4],      4(%[aRe])                               \n\t"
357      "lwc1       %[f5],      4(%[bRe])                               \n\t"
358      "lwc1       %[f6],      4(%[bIm])                               \n\t"
359      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
360      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
361      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
362      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
363      "lwc1       %[f7],      4(%[aIm])                               \n\t"
364#if !defined(MIPS32_R2_LE)
365      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
366      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
367      "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
368      "addiu      %[aRe],     %[aRe],         8                       \n\t"
369      "addiu      %[aIm],     %[aIm],         8                       \n\t"
370      "addiu      %[len],     %[len],         -1                      \n\t"
371      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
372      "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
373      "lwc1       %[f2],      0(%[yf0])                               \n\t"
374      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
375      "lwc1       %[f3],      0(%[yf1])                               \n\t"
376      "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
377      "lwc1       %[f6],      4(%[yf0])                               \n\t"
378      "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
379#else // #if !defined(MIPS32_R2_LE)
380      "addiu      %[aRe],     %[aRe],         8                       \n\t"
381      "addiu      %[aIm],     %[aIm],         8                       \n\t"
382      "addiu      %[len],     %[len],         -1                      \n\t"
383      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
384      "lwc1       %[f2],      0(%[yf0])                               \n\t"
385      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
386      "lwc1       %[f3],      0(%[yf1])                               \n\t"
387      "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
388      "lwc1       %[f6],      4(%[yf0])                               \n\t"
389      "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
390#endif // #if !defined(MIPS32_R2_LE)
391      "lwc1       %[f5],      4(%[yf1])                               \n\t"
392      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
393      "addiu      %[bRe],     %[bRe],         8                       \n\t"
394      "addiu      %[bIm],     %[bIm],         8                       \n\t"
395      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
396      "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
397      "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
398      "swc1       %[f2],      0(%[yf0])                               \n\t"
399      "swc1       %[f3],      0(%[yf1])                               \n\t"
400      "swc1       %[f6],      4(%[yf0])                               \n\t"
401      "swc1       %[f5],      4(%[yf1])                               \n\t"
402      "addiu      %[yf0],     %[yf0],         8                       \n\t"
403      "bgtz       %[len],     1b                                      \n\t"
404      " addiu     %[yf1],     %[yf1],         8                       \n\t"
405      "lwc1       %[f0],      0(%[aRe])                               \n\t"
406      "lwc1       %[f1],      0(%[bRe])                               \n\t"
407      "lwc1       %[f2],      0(%[bIm])                               \n\t"
408      "lwc1       %[f3],      0(%[aIm])                               \n\t"
409      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
410      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
411#if !defined(MIPS32_R2_LE)
412      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
413      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
414      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
415      "lwc1       %[f2],      0(%[yf0])                               \n\t"
416      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
417      "lwc1       %[f3],      0(%[yf1])                               \n\t"
418#else // #if !defined(MIPS32_R2_LE)
419      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
420      "lwc1       %[f2],      0(%[yf0])                               \n\t"
421      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
422      "lwc1       %[f3],      0(%[yf1])                               \n\t"
423#endif // #if !defined(MIPS32_R2_LE)
424      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
425      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
426      "swc1       %[f2],      0(%[yf0])                               \n\t"
427      "swc1       %[f3],      0(%[yf1])                               \n\t"
428      ".set       pop                                                 \n\t"
429      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
430        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
431        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
432        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
433        [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
434        [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
435        [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
436      :
437      : "memory"
438    );
439  }
440}
441
442void WebRtcAec_FilterAdaptation_mips(
443    int num_partitions,
444    int x_fft_buf_block_pos,
445    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
446    float e_fft[2][PART_LEN1],
447    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
448  float fft[PART_LEN2];
449  int i;
450  for (i = 0; i < num_partitions; i++) {
451    int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1);
452    int pos;
453    // Check for wrap
454    if (i + x_fft_buf_block_pos >= num_partitions) {
455      xPos -= num_partitions * PART_LEN1;
456    }
457
458    pos = i * PART_LEN1;
459    float* aRe = x_fft_buf[0] + xPos;
460    float* aIm = x_fft_buf[1] + xPos;
461    float* bRe = e_fft[0];
462    float* bIm = e_fft[1];
463    float* fft_tmp;
464
465    float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
466    int len = PART_LEN >> 1;
467
468    __asm __volatile (
469      ".set       push                                                \n\t"
470      ".set       noreorder                                           \n\t"
471      "addiu      %[fft_tmp], %[fft],         0                       \n\t"
472     "1:                                                              \n\t"
473      "lwc1       %[f0],      0(%[aRe])                               \n\t"
474      "lwc1       %[f1],      0(%[bRe])                               \n\t"
475      "lwc1       %[f2],      0(%[bIm])                               \n\t"
476      "lwc1       %[f4],      4(%[aRe])                               \n\t"
477      "lwc1       %[f5],      4(%[bRe])                               \n\t"
478      "lwc1       %[f6],      4(%[bIm])                               \n\t"
479      "addiu      %[aRe],     %[aRe],         8                       \n\t"
480      "addiu      %[bRe],     %[bRe],         8                       \n\t"
481      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
482      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
483      "lwc1       %[f3],      0(%[aIm])                               \n\t"
484      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
485      "lwc1       %[f7],      4(%[aIm])                               \n\t"
486      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
487#if !defined(MIPS32_R2_LE)
488      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
489      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
490      "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
491      "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
492      "addiu      %[aIm],     %[aIm],         8                       \n\t"
493      "addiu      %[bIm],     %[bIm],         8                       \n\t"
494      "addiu      %[len],     %[len],         -1                      \n\t"
495      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
496      "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
497      "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
498      "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
499#else // #if !defined(MIPS32_R2_LE)
500      "addiu      %[aIm],     %[aIm],         8                       \n\t"
501      "addiu      %[bIm],     %[bIm],         8                       \n\t"
502      "addiu      %[len],     %[len],         -1                      \n\t"
503      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
504      "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
505      "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
506      "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
507#endif // #if !defined(MIPS32_R2_LE)
508      "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
509      "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
510      "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
511      "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
512      "bgtz       %[len],     1b                                      \n\t"
513      " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
514      "lwc1       %[f0],      0(%[aRe])                               \n\t"
515      "lwc1       %[f1],      0(%[bRe])                               \n\t"
516      "lwc1       %[f2],      0(%[bIm])                               \n\t"
517      "lwc1       %[f3],      0(%[aIm])                               \n\t"
518      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
519#if !defined(MIPS32_R2_LE)
520      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
521      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
522#else // #if !defined(MIPS32_R2_LE)
523      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
524#endif // #if !defined(MIPS32_R2_LE)
525      "swc1       %[f8],      4(%[fft])                               \n\t"
526      ".set       pop                                                 \n\t"
527      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
528        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
529        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
530        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
531        [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
532        [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
533        [len] "+r" (len)
534      : [fft] "r" (fft)
535      : "memory"
536    );
537
538    aec_rdft_inverse_128(fft);
539    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
540
541    // fft scaling
542    {
543      float scale = 2.0f / PART_LEN2;
544      __asm __volatile (
545        ".set     push                                    \n\t"
546        ".set     noreorder                               \n\t"
547        "addiu    %[fft_tmp], %[fft],        0            \n\t"
548        "addiu    %[len],     $zero,         8            \n\t"
549       "1:                                                \n\t"
550        "addiu    %[len],     %[len],        -1           \n\t"
551        "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
552        "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
553        "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
554        "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
555        "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
556        "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
557        "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
558        "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
559        "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
560        "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
561        "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
562        "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
563        "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
564        "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
565        "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
566        "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
567        "swc1     %[f0],      0(%[fft_tmp])               \n\t"
568        "swc1     %[f1],      4(%[fft_tmp])               \n\t"
569        "swc1     %[f2],      8(%[fft_tmp])               \n\t"
570        "swc1     %[f3],      12(%[fft_tmp])              \n\t"
571        "swc1     %[f4],      16(%[fft_tmp])              \n\t"
572        "swc1     %[f5],      20(%[fft_tmp])              \n\t"
573        "swc1     %[f6],      24(%[fft_tmp])              \n\t"
574        "swc1     %[f7],      28(%[fft_tmp])              \n\t"
575        "bgtz     %[len],     1b                          \n\t"
576        " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
577        ".set     pop                                     \n\t"
578        : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
579          [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
580          [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
581          [fft_tmp] "=&r" (fft_tmp)
582        : [scale] "f" (scale), [fft] "r" (fft)
583        : "memory"
584      );
585    }
586    aec_rdft_forward_128(fft);
587    aRe = h_fft_buf[0] + pos;
588    aIm = h_fft_buf[1] + pos;
589    __asm __volatile (
590      ".set     push                                    \n\t"
591      ".set     noreorder                               \n\t"
592      "addiu    %[fft_tmp], %[fft],        0            \n\t"
593      "addiu    %[len],     $zero,         31           \n\t"
594      "lwc1     %[f0],      0(%[aRe])                   \n\t"
595      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
596      "lwc1     %[f2],      256(%[aRe])                 \n\t"
597      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
598      "lwc1     %[f4],      4(%[aRe])                   \n\t"
599      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
600      "lwc1     %[f6],      4(%[aIm])                   \n\t"
601      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
602      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
603      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
604      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
605      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
606      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
607      "swc1     %[f0],      0(%[aRe])                   \n\t"
608      "swc1     %[f2],      256(%[aRe])                 \n\t"
609      "swc1     %[f4],      4(%[aRe])                   \n\t"
610      "addiu    %[aRe],     %[aRe],        8            \n\t"
611      "swc1     %[f6],      4(%[aIm])                   \n\t"
612      "addiu    %[aIm],     %[aIm],        8            \n\t"
613     "1:                                                \n\t"
614      "lwc1     %[f0],      0(%[aRe])                   \n\t"
615      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
616      "lwc1     %[f2],      0(%[aIm])                   \n\t"
617      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
618      "lwc1     %[f4],      4(%[aRe])                   \n\t"
619      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
620      "lwc1     %[f6],      4(%[aIm])                   \n\t"
621      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
622      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
623      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
624      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
625      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
626      "addiu    %[len],     %[len],        -1           \n\t"
627      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
628      "swc1     %[f0],      0(%[aRe])                   \n\t"
629      "swc1     %[f2],      0(%[aIm])                   \n\t"
630      "swc1     %[f4],      4(%[aRe])                   \n\t"
631      "addiu    %[aRe],     %[aRe],        8            \n\t"
632      "swc1     %[f6],      4(%[aIm])                   \n\t"
633      "bgtz     %[len],     1b                          \n\t"
634      " addiu   %[aIm],     %[aIm],        8            \n\t"
635      ".set     pop                                     \n\t"
636      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
637        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
638        [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
639        [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
640      : [fft] "r" (fft)
641      : "memory"
642    );
643  }
644}
645
646void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
647                                         float hNl[PART_LEN1],
648                                         const float hNlFb,
649                                         float efw[2][PART_LEN1]) {
650  int i;
651  const float one = 1.0;
652  float* p_hNl;
653  float* p_efw0;
654  float* p_efw1;
655  float* p_WebRtcAec_wC;
656  float temp1, temp2, temp3, temp4;
657
658  p_hNl = &hNl[0];
659  p_efw0 = &efw[0][0];
660  p_efw1 = &efw[1][0];
661  p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
662
663  for (i = 0; i < PART_LEN1; i++) {
664    // Weight subbands
665    __asm __volatile (
666      ".set      push                                              \n\t"
667      ".set      noreorder                                         \n\t"
668      "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
669      "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
670      "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
671      "bc1f      1f                                                \n\t"
672      " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
673      "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
674#if !defined(MIPS32_R2_LE)
675      "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
676      "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
677#else // #if !defined(MIPS32_R2_LE)
678      "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
679#endif // #if !defined(MIPS32_R2_LE)
680      "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
681     "1:                                                           \n\t"
682      "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
683      ".set      pop                                               \n\t"
684      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
685        [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
686      : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
687      : "memory"
688    );
689
690    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
691
692    __asm __volatile (
693      "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
694      "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
695      "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
696      "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
697      "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
698      "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
699      "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
700      "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
701      "neg.s     %[temp4],    %[temp3]                 \n\t"
702      "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
703      "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
704      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
705        [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
706        [p_hNl] "+r" (p_hNl)
707      :
708      : "memory"
709    );
710  }
711}
712
713void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled,
714                                     float normal_mu,
715                                     float normal_error_threshold,
716                                     float x_pow[PART_LEN1],
717                                     float ef[2][PART_LEN1]) {
718  const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
719  const float error_threshold = extended_filter_enabled
720                                    ? kExtendedErrorThreshold
721                                    : normal_error_threshold;
722  int len = (PART_LEN1);
723  float* ef0 = ef[0];
724  float* ef1 = ef[1];
725  float fac1 = 1e-10f;
726  float err_th2 = error_threshold * error_threshold;
727  float f0, f1, f2;
728#if !defined(MIPS32_R2_LE)
729  float f3;
730#endif
731
732  __asm __volatile (
733    ".set       push                                   \n\t"
734    ".set       noreorder                              \n\t"
735   "1:                                                 \n\t"
736    "lwc1       %[f0],     0(%[x_pow])                 \n\t"
737    "lwc1       %[f1],     0(%[ef0])                   \n\t"
738    "lwc1       %[f2],     0(%[ef1])                   \n\t"
739    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
740    "div.s      %[f1],     %[f1],       %[f0]          \n\t"
741    "div.s      %[f2],     %[f2],       %[f0]          \n\t"
742    "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
743#if defined(MIPS32_R2_LE)
744    "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
745#else
746    "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
747    "add.s      %[f0],     %[f0],       %[f3]          \n\t"
748#endif
749    "c.le.s     %[f0],     %[err_th2]                  \n\t"
750    "nop                                               \n\t"
751    "bc1t       2f                                     \n\t"
752    " nop                                              \n\t"
753    "sqrt.s     %[f0],     %[f0]                       \n\t"
754    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
755    "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
756    "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
757    "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
758   "2:                                                 \n\t"
759    "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
760    "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
761    "swc1       %[f1],     0(%[ef0])                   \n\t"
762    "swc1       %[f2],     0(%[ef1])                   \n\t"
763    "addiu      %[len],    %[len],      -1             \n\t"
764    "addiu      %[x_pow],  %[x_pow],    4              \n\t"
765    "addiu      %[ef0],    %[ef0],      4              \n\t"
766    "bgtz       %[len],    1b                          \n\t"
767    " addiu     %[ef1],    %[ef1],      4              \n\t"
768    ".set       pop                                    \n\t"
769    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
770#if !defined(MIPS32_R2_LE)
771      [f3] "=&f" (f3),
772#endif
773      [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
774      [len] "+r" (len)
775    : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
776      [err_th] "f" (error_threshold)
777    : "memory"
778  );
779}
780
781void WebRtcAec_InitAec_mips(void) {
782  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
783  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
784  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
785  WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
786  WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
787}
788