1/*
2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
12
13#include <assert.h>
14#include <stddef.h>
15#include <stdlib.h>
16
17#include "webrtc/common_audio/ring_buffer.h"
18#include "webrtc/common_audio/signal_processing/include/real_fft.h"
19#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h"
20#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
21#include "webrtc/system_wrappers/include/compile_assert_c.h"
22#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
23#include "webrtc/typedefs.h"
24
25// Square root of Hanning window in Q14.
26#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
27// Table is defined in an ARM assembly file.
28extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
29#else
30static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
31  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
32  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
33  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
34  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
35  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
36  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
37  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
38  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
39};
40#endif
41
42#ifdef AECM_WITH_ABS_APPROX
43//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
44static const uint16_t kAlpha1 = 32584;
45//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
46static const uint16_t kBeta1 = 4249;
47//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
48static const uint16_t kAlpha2 = 30879;
49//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
50static const uint16_t kBeta2 = 11072;
51//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
52static const uint16_t kAlpha3 = 26951;
53//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
54static const uint16_t kBeta3 = 18927;
55#endif
56
57static const int16_t kNoiseEstQDomain = 15;
58static const int16_t kNoiseEstIncCount = 5;
59
60static void ComfortNoise(AecmCore* aecm,
61                         const uint16_t* dfa,
62                         ComplexInt16* out,
63                         const int16_t* lambda);
64
65static void WindowAndFFT(AecmCore* aecm,
66                         int16_t* fft,
67                         const int16_t* time_signal,
68                         ComplexInt16* freq_signal,
69                         int time_signal_scaling) {
70  int i = 0;
71
72  // FFT of signal
73  for (i = 0; i < PART_LEN; i++) {
74    // Window time domain signal and insert into real part of
75    // transformation array |fft|
76    int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
77    fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
78    scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
79    fft[PART_LEN + i] = (int16_t)((
80        scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
81  }
82
83  // Do forward FFT, then take only the first PART_LEN complex samples,
84  // and change signs of the imaginary parts.
85  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
86  for (i = 0; i < PART_LEN; i++) {
87    freq_signal[i].imag = -freq_signal[i].imag;
88  }
89}
90
91static void InverseFFTAndWindow(AecmCore* aecm,
92                                int16_t* fft,
93                                ComplexInt16* efw,
94                                int16_t* output,
95                                const int16_t* nearendClean) {
96  int i, j, outCFFT;
97  int32_t tmp32no1;
98  // Reuse |efw| for the inverse FFT output after transferring
99  // the contents to |fft|.
100  int16_t* ifft_out = (int16_t*)efw;
101
102  // Synthesis
103  for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
104    fft[j] = efw[i].real;
105    fft[j + 1] = -efw[i].imag;
106  }
107  fft[0] = efw[0].real;
108  fft[1] = -efw[0].imag;
109
110  fft[PART_LEN2] = efw[PART_LEN].real;
111  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
112
113  // Inverse FFT. Keep outCFFT to scale the samples in the next block.
114  outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
115  for (i = 0; i < PART_LEN; i++) {
116    ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
117                    ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
118    tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
119                                     outCFFT - aecm->dfaCleanQDomain);
120    output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
121                                        tmp32no1 + aecm->outBuf[i],
122                                        WEBRTC_SPL_WORD16_MIN);
123
124    tmp32no1 = (ifft_out[PART_LEN + i] *
125        WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
126    tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
127                                    outCFFT - aecm->dfaCleanQDomain);
128    aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
129                                                tmp32no1,
130                                                WEBRTC_SPL_WORD16_MIN);
131  }
132
133  // Copy the current block to the old position
134  // (aecm->outBuf is shifted elsewhere)
135  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
136  memcpy(aecm->dBufNoisy,
137         aecm->dBufNoisy + PART_LEN,
138         sizeof(int16_t) * PART_LEN);
139  if (nearendClean != NULL)
140  {
141    memcpy(aecm->dBufClean,
142           aecm->dBufClean + PART_LEN,
143           sizeof(int16_t) * PART_LEN);
144  }
145}
146
147// Transforms a time domain signal into the frequency domain, outputting the
148// complex valued signal, absolute value and sum of absolute values.
149//
150// time_signal          [in]    Pointer to time domain signal
151// freq_signal_real     [out]   Pointer to real part of frequency domain array
152// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
153//                              array
154// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
155//                              array
156// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
157//                              the frequency domain array
158// return value                 The Q-domain of current frequency values
159//
160static int TimeToFrequencyDomain(AecmCore* aecm,
161                                 const int16_t* time_signal,
162                                 ComplexInt16* freq_signal,
163                                 uint16_t* freq_signal_abs,
164                                 uint32_t* freq_signal_sum_abs) {
165  int i = 0;
166  int time_signal_scaling = 0;
167
168  int32_t tmp32no1 = 0;
169  int32_t tmp32no2 = 0;
170
171  // In fft_buf, +16 for 32-byte alignment.
172  int16_t fft_buf[PART_LEN4 + 16];
173  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
174
175  int16_t tmp16no1;
176#ifndef WEBRTC_ARCH_ARM_V7
177  int16_t tmp16no2;
178#endif
179#ifdef AECM_WITH_ABS_APPROX
180  int16_t max_value = 0;
181  int16_t min_value = 0;
182  uint16_t alpha = 0;
183  uint16_t beta = 0;
184#endif
185
186#ifdef AECM_DYNAMIC_Q
187  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
188  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
189#endif
190
191  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
192
193  // Extract imaginary and real part, calculate the magnitude for
194  // all frequency bins
195  freq_signal[0].imag = 0;
196  freq_signal[PART_LEN].imag = 0;
197  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
198  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
199                                freq_signal[PART_LEN].real);
200  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
201                           (uint32_t)(freq_signal_abs[PART_LEN]);
202
203  for (i = 1; i < PART_LEN; i++)
204  {
205    if (freq_signal[i].real == 0)
206    {
207      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
208    }
209    else if (freq_signal[i].imag == 0)
210    {
211      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
212    }
213    else
214    {
215      // Approximation for magnitude of complex fft output
216      // magn = sqrt(real^2 + imag^2)
217      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
218      //
219      // The parameters alpha and beta are stored in Q15
220
221#ifdef AECM_WITH_ABS_APPROX
222      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
223      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
224
225      if(tmp16no1 > tmp16no2)
226      {
227        max_value = tmp16no1;
228        min_value = tmp16no2;
229      } else
230      {
231        max_value = tmp16no2;
232        min_value = tmp16no1;
233      }
234
235      // Magnitude in Q(-6)
236      if ((max_value >> 2) > min_value)
237      {
238        alpha = kAlpha1;
239        beta = kBeta1;
240      } else if ((max_value >> 1) > min_value)
241      {
242        alpha = kAlpha2;
243        beta = kBeta2;
244      } else
245      {
246        alpha = kAlpha3;
247        beta = kBeta3;
248      }
249      tmp16no1 = (int16_t)((max_value * alpha) >> 15);
250      tmp16no2 = (int16_t)((min_value * beta) >> 15);
251      freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
252#else
253#ifdef WEBRTC_ARCH_ARM_V7
254      __asm __volatile(
255        "smulbb %[tmp32no1], %[real], %[real]\n\t"
256        "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
257        :[tmp32no1]"+&r"(tmp32no1),
258         [tmp32no2]"=r"(tmp32no2)
259        :[real]"r"(freq_signal[i].real),
260         [imag]"r"(freq_signal[i].imag)
261      );
262#else
263      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
264      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
265      tmp32no1 = tmp16no1 * tmp16no1;
266      tmp32no2 = tmp16no2 * tmp16no2;
267      tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
268#endif // WEBRTC_ARCH_ARM_V7
269      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
270
271      freq_signal_abs[i] = (uint16_t)tmp32no1;
272#endif // AECM_WITH_ABS_APPROX
273    }
274    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
275  }
276
277  return time_signal_scaling;
278}
279
280int WebRtcAecm_ProcessBlock(AecmCore* aecm,
281                            const int16_t* farend,
282                            const int16_t* nearendNoisy,
283                            const int16_t* nearendClean,
284                            int16_t* output) {
285  int i;
286
287  uint32_t xfaSum;
288  uint32_t dfaNoisySum;
289  uint32_t dfaCleanSum;
290  uint32_t echoEst32Gained;
291  uint32_t tmpU32;
292
293  int32_t tmp32no1;
294
295  uint16_t xfa[PART_LEN1];
296  uint16_t dfaNoisy[PART_LEN1];
297  uint16_t dfaClean[PART_LEN1];
298  uint16_t* ptrDfaClean = dfaClean;
299  const uint16_t* far_spectrum_ptr = NULL;
300
301  // 32 byte aligned buffers (with +8 or +16).
302  // TODO(kma): define fft with ComplexInt16.
303  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
304  int32_t echoEst32_buf[PART_LEN1 + 8];
305  int32_t dfw_buf[PART_LEN2 + 8];
306  int32_t efw_buf[PART_LEN2 + 8];
307
308  int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
309  int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
310  ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
311  ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
312
313  int16_t hnl[PART_LEN1];
314  int16_t numPosCoef = 0;
315  int16_t nlpGain = ONE_Q14;
316  int delay;
317  int16_t tmp16no1;
318  int16_t tmp16no2;
319  int16_t mu;
320  int16_t supGain;
321  int16_t zeros32, zeros16;
322  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
323  int far_q;
324  int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
325
326  const int kMinPrefBand = 4;
327  const int kMaxPrefBand = 24;
328  int32_t avgHnl32 = 0;
329
330  // Determine startup state. There are three states:
331  // (0) the first CONV_LEN blocks
332  // (1) another CONV_LEN blocks
333  // (2) the rest
334
335  if (aecm->startupState < 2)
336  {
337    aecm->startupState = (aecm->totCount >= CONV_LEN) +
338                         (aecm->totCount >= CONV_LEN2);
339  }
340  // END: Determine startup state
341
342  // Buffer near and far end signals
343  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
344  memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
345  if (nearendClean != NULL)
346  {
347    memcpy(aecm->dBufClean + PART_LEN,
348           nearendClean,
349           sizeof(int16_t) * PART_LEN);
350  }
351
352  // Transform far end signal from time domain to frequency domain.
353  far_q = TimeToFrequencyDomain(aecm,
354                                aecm->xBuf,
355                                dfw,
356                                xfa,
357                                &xfaSum);
358
359  // Transform noisy near end signal from time domain to frequency domain.
360  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
361                                         aecm->dBufNoisy,
362                                         dfw,
363                                         dfaNoisy,
364                                         &dfaNoisySum);
365  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
366  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
367
368
369  if (nearendClean == NULL)
370  {
371    ptrDfaClean = dfaNoisy;
372    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
373    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
374    dfaCleanSum = dfaNoisySum;
375  } else
376  {
377    // Transform clean near end signal from time domain to frequency domain.
378    zerosDBufClean = TimeToFrequencyDomain(aecm,
379                                           aecm->dBufClean,
380                                           dfw,
381                                           dfaClean,
382                                           &dfaCleanSum);
383    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
384    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
385  }
386
387  // Get the delay
388  // Save far-end history and estimate delay
389  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
390  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
391                               xfa,
392                               PART_LEN1,
393                               far_q) == -1) {
394    return -1;
395  }
396  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
397                                          dfaNoisy,
398                                          PART_LEN1,
399                                          zerosDBufNoisy);
400  if (delay == -1)
401  {
402    return -1;
403  }
404  else if (delay == -2)
405  {
406    // If the delay is unknown, we assume zero.
407    // NOTE: this will have to be adjusted if we ever add lookahead.
408    delay = 0;
409  }
410
411  if (aecm->fixedDelay >= 0)
412  {
413    // Use fixed delay
414    delay = aecm->fixedDelay;
415  }
416
417  // Get aligned far end spectrum
418  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
419  zerosXBuf = (int16_t) far_q;
420  if (far_spectrum_ptr == NULL)
421  {
422    return -1;
423  }
424
425  // Calculate log(energy) and update energy threshold levels
426  WebRtcAecm_CalcEnergies(aecm,
427                          far_spectrum_ptr,
428                          zerosXBuf,
429                          dfaNoisySum,
430                          echoEst32);
431
432  // Calculate stepsize
433  mu = WebRtcAecm_CalcStepSize(aecm);
434
435  // Update counters
436  aecm->totCount++;
437
438  // This is the channel estimation algorithm.
439  // It is base on NLMS but has a variable step length,
440  // which was calculated above.
441  WebRtcAecm_UpdateChannel(aecm,
442                           far_spectrum_ptr,
443                           zerosXBuf,
444                           dfaNoisy,
445                           mu,
446                           echoEst32);
447  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
448
449
450  // Calculate Wiener filter hnl[]
451  for (i = 0; i < PART_LEN1; i++)
452  {
453    // Far end signal through channel estimate in Q8
454    // How much can we shift right to preserve resolution
455    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
456    aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
457
458    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
459    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
460    if (zeros32 + zeros16 > 16)
461    {
462      // Multiplication is safe
463      // Result in
464      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
465      //   aecm->xfaQDomainBuf[diff])
466      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
467                                              (uint16_t)supGain);
468      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
469      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
470    } else
471    {
472      tmp16no1 = 17 - zeros32 - zeros16;
473      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
474                       RESOLUTION_SUPGAIN;
475      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
476      if (zeros32 > tmp16no1)
477      {
478        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
479                                                supGain >> tmp16no1);
480      } else
481      {
482        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
483        echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
484      }
485    }
486
487    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
488    assert(zeros16 >= 0);  // |zeros16| is a norm, hence non-negative.
489    dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
490    if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
491      tmp16no1 = aecm->nearFilt[i] << zeros16;
492      qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
493      tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
494    } else {
495      tmp16no1 = dfa_clean_q_domain_diff < 0
496          ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
497          : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
498      qDomainDiff = 0;
499      tmp16no2 = ptrDfaClean[i];
500    }
501    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
502    tmp16no2 = (int16_t)(tmp32no1 >> 4);
503    tmp16no2 += tmp16no1;
504    zeros16 = WebRtcSpl_NormW16(tmp16no2);
505    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
506      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
507    } else {
508      aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
509                                          : tmp16no2 >> qDomainDiff;
510    }
511
512    // Wiener filter coefficients, resulting hnl in Q14
513    if (echoEst32Gained == 0)
514    {
515      hnl[i] = ONE_Q14;
516    } else if (aecm->nearFilt[i] == 0)
517    {
518      hnl[i] = 0;
519    } else
520    {
521      // Multiply the suppression gain
522      // Rounding
523      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
524      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
525                                   (uint16_t)aecm->nearFilt[i]);
526
527      // Current resolution is
528      // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
529      // Make sure we are in Q14
530      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
531      if (tmp32no1 > ONE_Q14)
532      {
533        hnl[i] = 0;
534      } else if (tmp32no1 < 0)
535      {
536        hnl[i] = ONE_Q14;
537      } else
538      {
539        // 1-echoEst/dfa
540        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
541        if (hnl[i] < 0)
542        {
543          hnl[i] = 0;
544        }
545      }
546    }
547    if (hnl[i])
548    {
549      numPosCoef++;
550    }
551  }
552  // Only in wideband. Prevent the gain in upper band from being larger than
553  // in lower band.
554  if (aecm->mult == 2)
555  {
556    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
557    //               speech distortion in double-talk.
558    for (i = 0; i < PART_LEN1; i++)
559    {
560      hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
561    }
562
563    for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
564    {
565      avgHnl32 += (int32_t)hnl[i];
566    }
567    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
568    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
569
570    for (i = kMaxPrefBand; i < PART_LEN1; i++)
571    {
572      if (hnl[i] > (int16_t)avgHnl32)
573      {
574        hnl[i] = (int16_t)avgHnl32;
575      }
576    }
577  }
578
579  // Calculate NLP gain, result is in Q14
580  if (aecm->nlpFlag)
581  {
582    for (i = 0; i < PART_LEN1; i++)
583    {
584      // Truncate values close to zero and one.
585      if (hnl[i] > NLP_COMP_HIGH)
586      {
587        hnl[i] = ONE_Q14;
588      } else if (hnl[i] < NLP_COMP_LOW)
589      {
590        hnl[i] = 0;
591      }
592
593      // Remove outliers
594      if (numPosCoef < 3)
595      {
596        nlpGain = 0;
597      } else
598      {
599        nlpGain = ONE_Q14;
600      }
601
602      // NLP
603      if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
604      {
605        hnl[i] = ONE_Q14;
606      } else
607      {
608        hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
609      }
610
611      // multiply with Wiener coefficients
612      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
613                                                                   hnl[i], 14));
614      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
615                                                                   hnl[i], 14));
616    }
617  }
618  else
619  {
620    // multiply with Wiener coefficients
621    for (i = 0; i < PART_LEN1; i++)
622    {
623      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
624                                                                   hnl[i], 14));
625      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
626                                                                   hnl[i], 14));
627    }
628  }
629
630  if (aecm->cngMode == AecmTrue)
631  {
632    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
633  }
634
635  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
636
637  return 0;
638}
639
640static void ComfortNoise(AecmCore* aecm,
641                         const uint16_t* dfa,
642                         ComplexInt16* out,
643                         const int16_t* lambda) {
644  int16_t i;
645  int16_t tmp16;
646  int32_t tmp32;
647
648  int16_t randW16[PART_LEN];
649  int16_t uReal[PART_LEN1];
650  int16_t uImag[PART_LEN1];
651  int32_t outLShift32;
652  int16_t noiseRShift16[PART_LEN1];
653
654  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
655  int16_t minTrackShift;
656
657  assert(shiftFromNearToNoise >= 0);
658  assert(shiftFromNearToNoise < 16);
659
660  if (aecm->noiseEstCtr < 100)
661  {
662    // Track the minimum more quickly initially.
663    aecm->noiseEstCtr++;
664    minTrackShift = 6;
665  } else
666  {
667    minTrackShift = 9;
668  }
669
670  // Estimate noise power.
671  for (i = 0; i < PART_LEN1; i++)
672  {
673    // Shift to the noise domain.
674    tmp32 = (int32_t)dfa[i];
675    outLShift32 = tmp32 << shiftFromNearToNoise;
676
677    if (outLShift32 < aecm->noiseEst[i])
678    {
679      // Reset "too low" counter
680      aecm->noiseEstTooLowCtr[i] = 0;
681      // Track the minimum.
682      if (aecm->noiseEst[i] < (1 << minTrackShift))
683      {
684        // For small values, decrease noiseEst[i] every
685        // |kNoiseEstIncCount| block. The regular approach below can not
686        // go further down due to truncation.
687        aecm->noiseEstTooHighCtr[i]++;
688        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
689        {
690          aecm->noiseEst[i]--;
691          aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
692        }
693      }
694      else
695      {
696        aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
697                              >> minTrackShift);
698      }
699    } else
700    {
701      // Reset "too high" counter
702      aecm->noiseEstTooHighCtr[i] = 0;
703      // Ramp slowly upwards until we hit the minimum again.
704      if ((aecm->noiseEst[i] >> 19) > 0)
705      {
706        // Avoid overflow.
707        // Multiplication with 2049 will cause wrap around. Scale
708        // down first and then multiply
709        aecm->noiseEst[i] >>= 11;
710        aecm->noiseEst[i] *= 2049;
711      }
712      else if ((aecm->noiseEst[i] >> 11) > 0)
713      {
714        // Large enough for relative increase
715        aecm->noiseEst[i] *= 2049;
716        aecm->noiseEst[i] >>= 11;
717      }
718      else
719      {
720        // Make incremental increases based on size every
721        // |kNoiseEstIncCount| block
722        aecm->noiseEstTooLowCtr[i]++;
723        if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
724        {
725          aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
726          aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
727        }
728      }
729    }
730  }
731
732  for (i = 0; i < PART_LEN1; i++)
733  {
734    tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
735    if (tmp32 > 32767)
736    {
737      tmp32 = 32767;
738      aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
739    }
740    noiseRShift16[i] = (int16_t)tmp32;
741
742    tmp16 = ONE_Q14 - lambda[i];
743    noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
744  }
745
746  // Generate a uniform random array on [0 2^15-1].
747  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
748
749  // Generate noise according to estimated energy.
750  uReal[0] = 0; // Reject LF noise.
751  uImag[0] = 0;
752  for (i = 1; i < PART_LEN1; i++)
753  {
754    // Get a random index for the cos and sin tables over [0 359].
755    tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
756
757    // Tables are in Q13.
758    uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
759        13);
760    uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
761        13);
762  }
763  uImag[PART_LEN] = 0;
764
765  for (i = 0; i < PART_LEN1; i++)
766  {
767    out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
768    out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
769  }
770}
771
772