aecm_core_c.c revision 750423c7227be04acf65472af0ee57ce47376fcb
1/*
2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
12
13#include <assert.h>
14#include <stddef.h>
15#include <stdlib.h>
16
17#include "webrtc/common_audio/signal_processing/include/real_fft.h"
18#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
19#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
20#include "webrtc/modules/audio_processing/utility/ring_buffer.h"
21#include "webrtc/system_wrappers/interface/compile_assert_c.h"
22#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
23#include "webrtc/typedefs.h"
24
25// Square root of Hanning window in Q14.
26#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
27// Table is defined in an ARM assembly file.
28extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
29#else
30static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
31  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
32  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
33  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
34  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
35  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
36  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
37  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
38  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
39};
40#endif
41
42#ifdef AECM_WITH_ABS_APPROX
43//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
44static const uint16_t kAlpha1 = 32584;
45//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
46static const uint16_t kBeta1 = 4249;
47//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
48static const uint16_t kAlpha2 = 30879;
49//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
50static const uint16_t kBeta2 = 11072;
51//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
52static const uint16_t kAlpha3 = 26951;
53//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
54static const uint16_t kBeta3 = 18927;
55#endif
56
57static const int16_t kNoiseEstQDomain = 15;
58static const int16_t kNoiseEstIncCount = 5;
59
60static void ComfortNoise(AecmCore_t* aecm,
61                         const uint16_t* dfa,
62                         complex16_t* out,
63                         const int16_t* lambda);
64
65static void WindowAndFFT(AecmCore_t* aecm,
66                          int16_t* fft,
67                          const int16_t* time_signal,
68                          complex16_t* freq_signal,
69                          int time_signal_scaling) {
70  int i = 0;
71
72  // FFT of signal
73  for (i = 0; i < PART_LEN; i++) {
74    // Window time domain signal and insert into real part of
75    // transformation array |fft|
76    fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
77        (time_signal[i] << time_signal_scaling),
78        WebRtcAecm_kSqrtHanning[i],
79        14);
80    fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
81        (time_signal[i + PART_LEN] << time_signal_scaling),
82        WebRtcAecm_kSqrtHanning[PART_LEN - i],
83        14);
84  }
85
86  // Do forward FFT, then take only the first PART_LEN complex samples,
87  // and change signs of the imaginary parts.
88  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
89  for (i = 0; i < PART_LEN; i++) {
90    freq_signal[i].imag = -freq_signal[i].imag;
91  }
92}
93
94static void InverseFFTAndWindow(AecmCore_t* aecm,
95                                int16_t* fft,
96                                complex16_t* efw,
97                                int16_t* output,
98                                const int16_t* nearendClean)
99{
100  int i, j, outCFFT;
101  int32_t tmp32no1;
102  // Reuse |efw| for the inverse FFT output after transferring
103  // the contents to |fft|.
104  int16_t* ifft_out = (int16_t*)efw;
105
106  // Synthesis
107  for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
108    fft[j] = efw[i].real;
109    fft[j + 1] = -efw[i].imag;
110  }
111  fft[0] = efw[0].real;
112  fft[1] = -efw[0].imag;
113
114  fft[PART_LEN2] = efw[PART_LEN].real;
115  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
116
117  // Inverse FFT. Keep outCFFT to scale the samples in the next block.
118  outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
119  for (i = 0; i < PART_LEN; i++) {
120    ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
121                    ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
122    tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
123                                     outCFFT - aecm->dfaCleanQDomain);
124    output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
125                                        tmp32no1 + aecm->outBuf[i],
126                                        WEBRTC_SPL_WORD16_MIN);
127
128    tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(ifft_out[PART_LEN + i],
129                                         WebRtcAecm_kSqrtHanning[PART_LEN - i],
130                                         14);
131    tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
132                                    outCFFT - aecm->dfaCleanQDomain);
133    aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
134                                                tmp32no1,
135                                                WEBRTC_SPL_WORD16_MIN);
136  }
137
138  // Copy the current block to the old position
139  // (aecm->outBuf is shifted elsewhere)
140  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
141  memcpy(aecm->dBufNoisy,
142         aecm->dBufNoisy + PART_LEN,
143         sizeof(int16_t) * PART_LEN);
144  if (nearendClean != NULL)
145  {
146    memcpy(aecm->dBufClean,
147           aecm->dBufClean + PART_LEN,
148           sizeof(int16_t) * PART_LEN);
149  }
150}
151
152// Transforms a time domain signal into the frequency domain, outputting the
153// complex valued signal, absolute value and sum of absolute values.
154//
155// time_signal          [in]    Pointer to time domain signal
156// freq_signal_real     [out]   Pointer to real part of frequency domain array
157// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
158//                              array
159// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
160//                              array
161// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
162//                              the frequency domain array
163// return value                 The Q-domain of current frequency values
164//
165static int TimeToFrequencyDomain(AecmCore_t* aecm,
166                                 const int16_t* time_signal,
167                                 complex16_t* freq_signal,
168                                 uint16_t* freq_signal_abs,
169                                 uint32_t* freq_signal_sum_abs)
170{
171  int i = 0;
172  int time_signal_scaling = 0;
173
174  int32_t tmp32no1 = 0;
175  int32_t tmp32no2 = 0;
176
177  // In fft_buf, +16 for 32-byte alignment.
178  int16_t fft_buf[PART_LEN4 + 16];
179  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
180
181  int16_t tmp16no1;
182#ifndef WEBRTC_ARCH_ARM_V7
183  int16_t tmp16no2;
184#endif
185#ifdef AECM_WITH_ABS_APPROX
186  int16_t max_value = 0;
187  int16_t min_value = 0;
188  uint16_t alpha = 0;
189  uint16_t beta = 0;
190#endif
191
192#ifdef AECM_DYNAMIC_Q
193  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
194  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
195#endif
196
197  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
198
199  // Extract imaginary and real part, calculate the magnitude for
200  // all frequency bins
201  freq_signal[0].imag = 0;
202  freq_signal[PART_LEN].imag = 0;
203  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
204  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
205                                freq_signal[PART_LEN].real);
206  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
207                           (uint32_t)(freq_signal_abs[PART_LEN]);
208
209  for (i = 1; i < PART_LEN; i++)
210  {
211    if (freq_signal[i].real == 0)
212    {
213      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
214    }
215    else if (freq_signal[i].imag == 0)
216    {
217      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
218    }
219    else
220    {
221      // Approximation for magnitude of complex fft output
222      // magn = sqrt(real^2 + imag^2)
223      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
224      //
225      // The parameters alpha and beta are stored in Q15
226
227#ifdef AECM_WITH_ABS_APPROX
228      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
229      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
230
231      if(tmp16no1 > tmp16no2)
232      {
233        max_value = tmp16no1;
234        min_value = tmp16no2;
235      } else
236      {
237        max_value = tmp16no2;
238        min_value = tmp16no1;
239      }
240
241      // Magnitude in Q(-6)
242      if ((max_value >> 2) > min_value)
243      {
244        alpha = kAlpha1;
245        beta = kBeta1;
246      } else if ((max_value >> 1) > min_value)
247      {
248        alpha = kAlpha2;
249        beta = kBeta2;
250      } else
251      {
252        alpha = kAlpha3;
253        beta = kBeta3;
254      }
255      tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(max_value, alpha, 15);
256      tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(min_value, beta, 15);
257      freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
258#else
259#ifdef WEBRTC_ARCH_ARM_V7
260      __asm __volatile(
261        "smulbb %[tmp32no1], %[real], %[real]\n\t"
262        "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
263        :[tmp32no1]"+&r"(tmp32no1),
264         [tmp32no2]"=r"(tmp32no2)
265        :[real]"r"(freq_signal[i].real),
266         [imag]"r"(freq_signal[i].imag)
267      );
268#else
269      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
270      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
271      tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
272      tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
273      tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
274#endif // WEBRTC_ARCH_ARM_V7
275      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
276
277      freq_signal_abs[i] = (uint16_t)tmp32no1;
278#endif // AECM_WITH_ABS_APPROX
279    }
280    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
281  }
282
283  return time_signal_scaling;
284}
285
286int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
287                            const int16_t * farend,
288                            const int16_t * nearendNoisy,
289                            const int16_t * nearendClean,
290                            int16_t * output)
291{
292  int i;
293
294  uint32_t xfaSum;
295  uint32_t dfaNoisySum;
296  uint32_t dfaCleanSum;
297  uint32_t echoEst32Gained;
298  uint32_t tmpU32;
299
300  int32_t tmp32no1;
301
302  uint16_t xfa[PART_LEN1];
303  uint16_t dfaNoisy[PART_LEN1];
304  uint16_t dfaClean[PART_LEN1];
305  uint16_t* ptrDfaClean = dfaClean;
306  const uint16_t* far_spectrum_ptr = NULL;
307
308  // 32 byte aligned buffers (with +8 or +16).
309  // TODO (kma): define fft with complex16_t.
310  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
311  int32_t echoEst32_buf[PART_LEN1 + 8];
312  int32_t dfw_buf[PART_LEN2 + 8];
313  int32_t efw_buf[PART_LEN2 + 8];
314
315  int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
316  int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
317  complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31);
318  complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31);
319
320  int16_t hnl[PART_LEN1];
321  int16_t numPosCoef = 0;
322  int16_t nlpGain = ONE_Q14;
323  int delay;
324  int16_t tmp16no1;
325  int16_t tmp16no2;
326  int16_t mu;
327  int16_t supGain;
328  int16_t zeros32, zeros16;
329  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
330  int far_q;
331  int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
332
333  const int kMinPrefBand = 4;
334  const int kMaxPrefBand = 24;
335  int32_t avgHnl32 = 0;
336
337  // Determine startup state. There are three states:
338  // (0) the first CONV_LEN blocks
339  // (1) another CONV_LEN blocks
340  // (2) the rest
341
342  if (aecm->startupState < 2)
343  {
344    aecm->startupState = (aecm->totCount >= CONV_LEN) +
345                         (aecm->totCount >= CONV_LEN2);
346  }
347  // END: Determine startup state
348
349  // Buffer near and far end signals
350  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
351  memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
352  if (nearendClean != NULL)
353  {
354    memcpy(aecm->dBufClean + PART_LEN,
355           nearendClean,
356           sizeof(int16_t) * PART_LEN);
357  }
358
359  // Transform far end signal from time domain to frequency domain.
360  far_q = TimeToFrequencyDomain(aecm,
361                                aecm->xBuf,
362                                dfw,
363                                xfa,
364                                &xfaSum);
365
366  // Transform noisy near end signal from time domain to frequency domain.
367  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
368                                         aecm->dBufNoisy,
369                                         dfw,
370                                         dfaNoisy,
371                                         &dfaNoisySum);
372  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
373  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
374
375
376  if (nearendClean == NULL)
377  {
378    ptrDfaClean = dfaNoisy;
379    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
380    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
381    dfaCleanSum = dfaNoisySum;
382  } else
383  {
384    // Transform clean near end signal from time domain to frequency domain.
385    zerosDBufClean = TimeToFrequencyDomain(aecm,
386                                           aecm->dBufClean,
387                                           dfw,
388                                           dfaClean,
389                                           &dfaCleanSum);
390    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
391    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
392  }
393
394  // Get the delay
395  // Save far-end history and estimate delay
396  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
397  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
398                               xfa,
399                               PART_LEN1,
400                               far_q) == -1) {
401    return -1;
402  }
403  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
404                                          dfaNoisy,
405                                          PART_LEN1,
406                                          zerosDBufNoisy);
407  if (delay == -1)
408  {
409    return -1;
410  }
411  else if (delay == -2)
412  {
413    // If the delay is unknown, we assume zero.
414    // NOTE: this will have to be adjusted if we ever add lookahead.
415    delay = 0;
416  }
417
418  if (aecm->fixedDelay >= 0)
419  {
420    // Use fixed delay
421    delay = aecm->fixedDelay;
422  }
423
424  // Get aligned far end spectrum
425  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
426  zerosXBuf = (int16_t) far_q;
427  if (far_spectrum_ptr == NULL)
428  {
429    return -1;
430  }
431
432  // Calculate log(energy) and update energy threshold levels
433  WebRtcAecm_CalcEnergies(aecm,
434                          far_spectrum_ptr,
435                          zerosXBuf,
436                          dfaNoisySum,
437                          echoEst32);
438
439  // Calculate stepsize
440  mu = WebRtcAecm_CalcStepSize(aecm);
441
442  // Update counters
443  aecm->totCount++;
444
445  // This is the channel estimation algorithm.
446  // It is base on NLMS but has a variable step length,
447  // which was calculated above.
448  WebRtcAecm_UpdateChannel(aecm,
449                           far_spectrum_ptr,
450                           zerosXBuf,
451                           dfaNoisy,
452                           mu,
453                           echoEst32);
454  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
455
456
457  // Calculate Wiener filter hnl[]
458  for (i = 0; i < PART_LEN1; i++)
459  {
460    // Far end signal through channel estimate in Q8
461    // How much can we shift right to preserve resolution
462    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
463    aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
464
465    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
466    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
467    if (zeros32 + zeros16 > 16)
468    {
469      // Multiplication is safe
470      // Result in
471      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
472      //   aecm->xfaQDomainBuf[diff])
473      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
474                                              (uint16_t)supGain);
475      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
476      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
477    } else
478    {
479      tmp16no1 = 17 - zeros32 - zeros16;
480      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
481                       RESOLUTION_SUPGAIN;
482      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
483      if (zeros32 > tmp16no1)
484      {
485        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
486                                                (uint16_t)WEBRTC_SPL_RSHIFT_W16(
487                                                  supGain,
488                                                  tmp16no1)
489                                                );
490      } else
491      {
492        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
493        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)WEBRTC_SPL_RSHIFT_W32(
494                                                  aecm->echoFilt[i],
495                                                  tmp16no1),
496                                                (uint16_t)supGain);
497      }
498    }
499
500    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
501    assert(zeros16 >= 0);  // |zeros16| is a norm, hence non-negative.
502    dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
503    if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
504      tmp16no1 = aecm->nearFilt[i] << zeros16;
505      qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
506      tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
507    } else {
508      tmp16no1 = dfa_clean_q_domain_diff < 0
509          ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
510          : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
511      qDomainDiff = 0;
512      tmp16no2 = ptrDfaClean[i];
513    }
514    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
515    tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4);
516    tmp16no2 += tmp16no1;
517    zeros16 = WebRtcSpl_NormW16(tmp16no2);
518    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
519      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
520    } else {
521      aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
522                                          : tmp16no2 >> qDomainDiff;
523    }
524
525    // Wiener filter coefficients, resulting hnl in Q14
526    if (echoEst32Gained == 0)
527    {
528      hnl[i] = ONE_Q14;
529    } else if (aecm->nearFilt[i] == 0)
530    {
531      hnl[i] = 0;
532    } else
533    {
534      // Multiply the suppression gain
535      // Rounding
536      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
537      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
538                                   (uint16_t)aecm->nearFilt[i]);
539
540      // Current resolution is
541      // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
542      // Make sure we are in Q14
543      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
544      if (tmp32no1 > ONE_Q14)
545      {
546        hnl[i] = 0;
547      } else if (tmp32no1 < 0)
548      {
549        hnl[i] = ONE_Q14;
550      } else
551      {
552        // 1-echoEst/dfa
553        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
554        if (hnl[i] < 0)
555        {
556          hnl[i] = 0;
557        }
558      }
559    }
560    if (hnl[i])
561    {
562      numPosCoef++;
563    }
564  }
565  // Only in wideband. Prevent the gain in upper band from being larger than
566  // in lower band.
567  if (aecm->mult == 2)
568  {
569    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
570    //               speech distortion in double-talk.
571    for (i = 0; i < PART_LEN1; i++)
572    {
573      hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14);
574    }
575
576    for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
577    {
578      avgHnl32 += (int32_t)hnl[i];
579    }
580    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
581    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
582
583    for (i = kMaxPrefBand; i < PART_LEN1; i++)
584    {
585      if (hnl[i] > (int16_t)avgHnl32)
586      {
587        hnl[i] = (int16_t)avgHnl32;
588      }
589    }
590  }
591
592  // Calculate NLP gain, result is in Q14
593  if (aecm->nlpFlag)
594  {
595    for (i = 0; i < PART_LEN1; i++)
596    {
597      // Truncate values close to zero and one.
598      if (hnl[i] > NLP_COMP_HIGH)
599      {
600        hnl[i] = ONE_Q14;
601      } else if (hnl[i] < NLP_COMP_LOW)
602      {
603        hnl[i] = 0;
604      }
605
606      // Remove outliers
607      if (numPosCoef < 3)
608      {
609        nlpGain = 0;
610      } else
611      {
612        nlpGain = ONE_Q14;
613      }
614
615      // NLP
616      if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
617      {
618        hnl[i] = ONE_Q14;
619      } else
620      {
621        hnl[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14);
622      }
623
624      // multiply with Wiener coefficients
625      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
626                                                                   hnl[i], 14));
627      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
628                                                                   hnl[i], 14));
629    }
630  }
631  else
632  {
633    // multiply with Wiener coefficients
634    for (i = 0; i < PART_LEN1; i++)
635    {
636      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
637                                                                   hnl[i], 14));
638      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
639                                                                   hnl[i], 14));
640    }
641  }
642
643  if (aecm->cngMode == AecmTrue)
644  {
645    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
646  }
647
648  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
649
650  return 0;
651}
652
653
654static void ComfortNoise(AecmCore_t* aecm,
655                         const uint16_t* dfa,
656                         complex16_t* out,
657                         const int16_t* lambda)
658{
659  int16_t i;
660  int16_t tmp16;
661  int32_t tmp32;
662
663  int16_t randW16[PART_LEN];
664  int16_t uReal[PART_LEN1];
665  int16_t uImag[PART_LEN1];
666  int32_t outLShift32;
667  int16_t noiseRShift16[PART_LEN1];
668
669  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
670  int16_t minTrackShift;
671
672  assert(shiftFromNearToNoise >= 0);
673  assert(shiftFromNearToNoise < 16);
674
675  if (aecm->noiseEstCtr < 100)
676  {
677    // Track the minimum more quickly initially.
678    aecm->noiseEstCtr++;
679    minTrackShift = 6;
680  } else
681  {
682    minTrackShift = 9;
683  }
684
685  // Estimate noise power.
686  for (i = 0; i < PART_LEN1; i++)
687  {
688    // Shift to the noise domain.
689    tmp32 = (int32_t)dfa[i];
690    outLShift32 = tmp32 << shiftFromNearToNoise;
691
692    if (outLShift32 < aecm->noiseEst[i])
693    {
694      // Reset "too low" counter
695      aecm->noiseEstTooLowCtr[i] = 0;
696      // Track the minimum.
697      if (aecm->noiseEst[i] < (1 << minTrackShift))
698      {
699        // For small values, decrease noiseEst[i] every
700        // |kNoiseEstIncCount| block. The regular approach below can not
701        // go further down due to truncation.
702        aecm->noiseEstTooHighCtr[i]++;
703        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
704        {
705          aecm->noiseEst[i]--;
706          aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
707        }
708      }
709      else
710      {
711        aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
712                              >> minTrackShift);
713      }
714    } else
715    {
716      // Reset "too high" counter
717      aecm->noiseEstTooHighCtr[i] = 0;
718      // Ramp slowly upwards until we hit the minimum again.
719      if ((aecm->noiseEst[i] >> 19) > 0)
720      {
721        // Avoid overflow.
722        // Multiplication with 2049 will cause wrap around. Scale
723        // down first and then multiply
724        aecm->noiseEst[i] >>= 11;
725        aecm->noiseEst[i] *= 2049;
726      }
727      else if ((aecm->noiseEst[i] >> 11) > 0)
728      {
729        // Large enough for relative increase
730        aecm->noiseEst[i] *= 2049;
731        aecm->noiseEst[i] >>= 11;
732      }
733      else
734      {
735        // Make incremental increases based on size every
736        // |kNoiseEstIncCount| block
737        aecm->noiseEstTooLowCtr[i]++;
738        if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
739        {
740          aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
741          aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
742        }
743      }
744    }
745  }
746
747  for (i = 0; i < PART_LEN1; i++)
748  {
749    tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise);
750    if (tmp32 > 32767)
751    {
752      tmp32 = 32767;
753      aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
754    }
755    noiseRShift16[i] = (int16_t)tmp32;
756
757    tmp16 = ONE_Q14 - lambda[i];
758    noiseRShift16[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16,
759                                                          noiseRShift16[i],
760                                                          14);
761  }
762
763  // Generate a uniform random array on [0 2^15-1].
764  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
765
766  // Generate noise according to estimated energy.
767  uReal[0] = 0; // Reject LF noise.
768  uImag[0] = 0;
769  for (i = 1; i < PART_LEN1; i++)
770  {
771    // Get a random index for the cos and sin tables over [0 359].
772    tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15);
773
774    // Tables are in Q13.
775    uReal[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i],
776                                                  WebRtcAecm_kCosTable[tmp16],
777                                                  13);
778    uImag[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i],
779                                                  WebRtcAecm_kSinTable[tmp16],
780                                                  13);
781  }
782  uImag[PART_LEN] = 0;
783
784  for (i = 0; i < PART_LEN1; i++)
785  {
786    out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
787    out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
788  }
789}
790
791