1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11/*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15#include "webrtc/modules/audio_processing/aec/aec_core.h"
16
17#ifdef WEBRTC_AEC_DEBUG_DUMP
18#include <stdio.h>
19#endif
20
21#include <assert.h>
22#include <math.h>
23#include <stddef.h>  // size_t
24#include <stdlib.h>
25#include <string.h>
26
27#include "webrtc/common_audio/ring_buffer.h"
28#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
29#include "webrtc/modules/audio_processing/aec/aec_common.h"
30#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
31#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
32#include "webrtc/modules/audio_processing/logging/aec_logging.h"
33#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
34#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
35#include "webrtc/typedefs.h"
36
37
38// Buffer size (samples)
39static const size_t kBufSizePartitions = 250;  // 1 second of audio in 16 kHz.
40
41// Metrics
42static const int subCountLen = 4;
43static const int countLen = 50;
44static const int kDelayMetricsAggregationWindow = 1250;  // 5 seconds at 16 kHz.
45
46// Quantities to control H band scaling for SWB input
47static const float cnScaleHband =
48    (float)0.4;  // scale for comfort noise in H band
49// Initial bin for averaging nlp gain in low band
50static const int freqAvgIc = PART_LEN / 2;
51
52// Matlab code to produce table:
53// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
54// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
55ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = {
56    0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
57    0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
58    0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
59    0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
60    0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
61    0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
62    0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
63    0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
64    0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
65    0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
66    0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
67    0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
68    0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
69    0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
70    0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
71    0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
72    1.00000000000000f};
73
74// Matlab code to produce table:
75// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
76// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
77ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = {
78    0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f,
79    0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f,
80    0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
81    0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f,
82    0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f,
83    0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
84    0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f,
85    0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f,
86    0.4000f};
87
88// Matlab code to produce table:
89// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
90// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
91ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = {
92    1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f,
93    1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f,
94    1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
95    1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f,
96    1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f,
97    1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
98    1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f,
99    1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f,
100    2.0000f};
101
102// Delay Agnostic AEC parameters, still under development and may change.
103static const float kDelayQualityThresholdMax = 0.07f;
104static const float kDelayQualityThresholdMin = 0.01f;
105static const int kInitialShiftOffset = 5;
106#if !defined(WEBRTC_ANDROID)
107static const int kDelayCorrectionStart = 1500;  // 10 ms chunks
108#endif
109
110// Target suppression levels for nlp modes.
111// log{0.001, 0.00001, 0.00000001}
112static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f};
113
114// Two sets of parameters, one for the extended filter mode.
115static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f};
116static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f};
117const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
118                                                              {0.92f, 0.08f}};
119const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f},
120                                                            {0.93f, 0.07f}};
121
122// Number of partitions forming the NLP's "preferred" bands.
123enum {
124  kPrefBandSize = 24
125};
126
127#ifdef WEBRTC_AEC_DEBUG_DUMP
128extern int webrtc_aec_instance_count;
129#endif
130
131WebRtcAecFilterFar WebRtcAec_FilterFar;
132WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
133WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
134WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
135WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
136WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
137WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
138WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
139WebRtcAecWindowData WebRtcAec_WindowData;
140
141__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
142  return aRe * bRe - aIm * bIm;
143}
144
145__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
146  return aRe * bIm + aIm * bRe;
147}
148
149static int CmpFloat(const void* a, const void* b) {
150  const float* da = (const float*)a;
151  const float* db = (const float*)b;
152
153  return (*da > *db) - (*da < *db);
154}
155
156static void FilterFar(
157    int num_partitions,
158    int x_fft_buf_block_pos,
159    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
160    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
161    float y_fft[2][PART_LEN1]) {
162  int i;
163  for (i = 0; i < num_partitions; i++) {
164    int j;
165    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
166    int pos = i * PART_LEN1;
167    // Check for wrap
168    if (i + x_fft_buf_block_pos >= num_partitions) {
169      xPos -= num_partitions * (PART_LEN1);
170    }
171
172    for (j = 0; j < PART_LEN1; j++) {
173      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
174                           x_fft_buf[1][xPos + j],
175                           h_fft_buf[0][pos + j],
176                           h_fft_buf[1][pos + j]);
177      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
178                           x_fft_buf[1][xPos + j],
179                           h_fft_buf[0][pos + j],
180                           h_fft_buf[1][pos + j]);
181    }
182  }
183}
184
185static void ScaleErrorSignal(int extended_filter_enabled,
186                             float normal_mu,
187                             float normal_error_threshold,
188                             float x_pow[PART_LEN1],
189                             float ef[2][PART_LEN1]) {
190  const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
191  const float error_threshold = extended_filter_enabled
192                                    ? kExtendedErrorThreshold
193                                    : normal_error_threshold;
194  int i;
195  float abs_ef;
196  for (i = 0; i < (PART_LEN1); i++) {
197    ef[0][i] /= (x_pow[i] + 1e-10f);
198    ef[1][i] /= (x_pow[i] + 1e-10f);
199    abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
200
201    if (abs_ef > error_threshold) {
202      abs_ef = error_threshold / (abs_ef + 1e-10f);
203      ef[0][i] *= abs_ef;
204      ef[1][i] *= abs_ef;
205    }
206
207    // Stepsize factor
208    ef[0][i] *= mu;
209    ef[1][i] *= mu;
210  }
211}
212
213
214static void FilterAdaptation(
215    int num_partitions,
216    int x_fft_buf_block_pos,
217    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
218    float e_fft[2][PART_LEN1],
219    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
220  int i, j;
221  float fft[PART_LEN2];
222  for (i = 0; i < num_partitions; i++) {
223    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
224    int pos;
225    // Check for wrap
226    if (i + x_fft_buf_block_pos >= num_partitions) {
227      xPos -= num_partitions * PART_LEN1;
228    }
229
230    pos = i * PART_LEN1;
231
232    for (j = 0; j < PART_LEN; j++) {
233
234      fft[2 * j] = MulRe(x_fft_buf[0][xPos + j],
235                         -x_fft_buf[1][xPos + j],
236                         e_fft[0][j],
237                         e_fft[1][j]);
238      fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j],
239                             -x_fft_buf[1][xPos + j],
240                             e_fft[0][j],
241                             e_fft[1][j]);
242    }
243    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
244                   -x_fft_buf[1][xPos + PART_LEN],
245                   e_fft[0][PART_LEN],
246                   e_fft[1][PART_LEN]);
247
248    aec_rdft_inverse_128(fft);
249    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
250
251    // fft scaling
252    {
253      float scale = 2.0f / PART_LEN2;
254      for (j = 0; j < PART_LEN; j++) {
255        fft[j] *= scale;
256      }
257    }
258    aec_rdft_forward_128(fft);
259
260    h_fft_buf[0][pos] += fft[0];
261    h_fft_buf[0][pos + PART_LEN] += fft[1];
262
263    for (j = 1; j < PART_LEN; j++) {
264      h_fft_buf[0][pos + j] += fft[2 * j];
265      h_fft_buf[1][pos + j] += fft[2 * j + 1];
266    }
267  }
268}
269
270static void OverdriveAndSuppress(AecCore* aec,
271                                 float hNl[PART_LEN1],
272                                 const float hNlFb,
273                                 float efw[2][PART_LEN1]) {
274  int i;
275  for (i = 0; i < PART_LEN1; i++) {
276    // Weight subbands
277    if (hNl[i] > hNlFb) {
278      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
279               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
280    }
281    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
282
283    // Suppress error signal
284    efw[0][i] *= hNl[i];
285    efw[1][i] *= hNl[i];
286
287    // Ooura fft returns incorrect sign on imaginary component. It matters here
288    // because we are making an additive change with comfort noise.
289    efw[1][i] *= -1;
290  }
291}
292
293static int PartitionDelay(const AecCore* aec) {
294  // Measures the energy in each filter partition and returns the partition with
295  // highest energy.
296  // TODO(bjornv): Spread computational cost by computing one partition per
297  // block?
298  float wfEnMax = 0;
299  int i;
300  int delay = 0;
301
302  for (i = 0; i < aec->num_partitions; i++) {
303    int j;
304    int pos = i * PART_LEN1;
305    float wfEn = 0;
306    for (j = 0; j < PART_LEN1; j++) {
307      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
308          aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
309    }
310
311    if (wfEn > wfEnMax) {
312      wfEnMax = wfEn;
313      delay = i;
314    }
315  }
316  return delay;
317}
318
319// Threshold to protect against the ill-effects of a zero far-end.
320const float WebRtcAec_kMinFarendPSD = 15;
321
322// Updates the following smoothed  Power Spectral Densities (PSD):
323//  - sd  : near-end
324//  - se  : residual echo
325//  - sx  : far-end
326//  - sde : cross-PSD of near-end and residual echo
327//  - sxd : cross-PSD of near-end and far-end
328//
329// In addition to updating the PSDs, also the filter diverge state is
330// determined.
331static void SmoothedPSD(AecCore* aec,
332                        float efw[2][PART_LEN1],
333                        float dfw[2][PART_LEN1],
334                        float xfw[2][PART_LEN1],
335                        int* extreme_filter_divergence) {
336  // Power estimate smoothing coefficients.
337  const float* ptrGCoh = aec->extended_filter_enabled
338      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
339      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
340  int i;
341  float sdSum = 0, seSum = 0;
342
343  for (i = 0; i < PART_LEN1; i++) {
344    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
345                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
346    aec->se[i] = ptrGCoh[0] * aec->se[i] +
347                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
348    // We threshold here to protect against the ill-effects of a zero farend.
349    // The threshold is not arbitrarily chosen, but balances protection and
350    // adverse interaction with the algorithm's tuning.
351    // TODO(bjornv): investigate further why this is so sensitive.
352    aec->sx[i] =
353        ptrGCoh[0] * aec->sx[i] +
354        ptrGCoh[1] * WEBRTC_SPL_MAX(
355            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
356            WebRtcAec_kMinFarendPSD);
357
358    aec->sde[i][0] =
359        ptrGCoh[0] * aec->sde[i][0] +
360        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
361    aec->sde[i][1] =
362        ptrGCoh[0] * aec->sde[i][1] +
363        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
364
365    aec->sxd[i][0] =
366        ptrGCoh[0] * aec->sxd[i][0] +
367        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
368    aec->sxd[i][1] =
369        ptrGCoh[0] * aec->sxd[i][1] +
370        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
371
372    sdSum += aec->sd[i];
373    seSum += aec->se[i];
374  }
375
376  // Divergent filter safeguard update.
377  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
378
379  // Signal extreme filter divergence if the error is significantly larger
380  // than the nearend (13 dB).
381  *extreme_filter_divergence = (seSum > (19.95f * sdSum));
382}
383
384// Window time domain data to be used by the fft.
385__inline static void WindowData(float* x_windowed, const float* x) {
386  int i;
387  for (i = 0; i < PART_LEN; i++) {
388    x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i];
389    x_windowed[PART_LEN + i] =
390        x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
391  }
392}
393
394// Puts fft output data into a complex valued array.
395__inline static void StoreAsComplex(const float* data,
396                                    float data_complex[2][PART_LEN1]) {
397  int i;
398  data_complex[0][0] = data[0];
399  data_complex[1][0] = 0;
400  for (i = 1; i < PART_LEN; i++) {
401    data_complex[0][i] = data[2 * i];
402    data_complex[1][i] = data[2 * i + 1];
403  }
404  data_complex[0][PART_LEN] = data[1];
405  data_complex[1][PART_LEN] = 0;
406}
407
408static void SubbandCoherence(AecCore* aec,
409                             float efw[2][PART_LEN1],
410                             float dfw[2][PART_LEN1],
411                             float xfw[2][PART_LEN1],
412                             float* fft,
413                             float* cohde,
414                             float* cohxd,
415                             int* extreme_filter_divergence) {
416  int i;
417
418  SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
419
420  // Subband coherence
421  for (i = 0; i < PART_LEN1; i++) {
422    cohde[i] =
423        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
424        (aec->sd[i] * aec->se[i] + 1e-10f);
425    cohxd[i] =
426        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
427        (aec->sx[i] * aec->sd[i] + 1e-10f);
428  }
429}
430
431static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
432  int i;
433
434  *nlpGainHband = (float)0.0;
435  for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
436    *nlpGainHband += lambda[i];
437  }
438  *nlpGainHband /= (float)(PART_LEN1 - 1 - freqAvgIc);
439}
440
441static void ComfortNoise(AecCore* aec,
442                         float efw[2][PART_LEN1],
443                         float comfortNoiseHband[2][PART_LEN1],
444                         const float* noisePow,
445                         const float* lambda) {
446  int i, num;
447  float rand[PART_LEN];
448  float noise, noiseAvg, tmp, tmpAvg;
449  int16_t randW16[PART_LEN];
450  float u[2][PART_LEN1];
451
452  const float pi2 = 6.28318530717959f;
453
454  // Generate a uniform random array on [0 1]
455  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
456  for (i = 0; i < PART_LEN; i++) {
457    rand[i] = ((float)randW16[i]) / 32768;
458  }
459
460  // Reject LF noise
461  u[0][0] = 0;
462  u[1][0] = 0;
463  for (i = 1; i < PART_LEN1; i++) {
464    tmp = pi2 * rand[i - 1];
465
466    noise = sqrtf(noisePow[i]);
467    u[0][i] = noise * cosf(tmp);
468    u[1][i] = -noise * sinf(tmp);
469  }
470  u[1][PART_LEN] = 0;
471
472  for (i = 0; i < PART_LEN1; i++) {
473    // This is the proper weighting to match the background noise power
474    tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
475    // tmp = 1 - lambda[i];
476    efw[0][i] += tmp * u[0][i];
477    efw[1][i] += tmp * u[1][i];
478  }
479
480  // For H band comfort noise
481  // TODO: don't compute noise and "tmp" twice. Use the previous results.
482  noiseAvg = 0.0;
483  tmpAvg = 0.0;
484  num = 0;
485  if (aec->num_bands > 1) {
486
487    // average noise scale
488    // average over second half of freq spectrum (i.e., 4->8khz)
489    // TODO: we shouldn't need num. We know how many elements we're summing.
490    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
491      num++;
492      noiseAvg += sqrtf(noisePow[i]);
493    }
494    noiseAvg /= (float)num;
495
496    // average nlp scale
497    // average over second half of freq spectrum (i.e., 4->8khz)
498    // TODO: we shouldn't need num. We know how many elements we're summing.
499    num = 0;
500    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
501      num++;
502      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
503    }
504    tmpAvg /= (float)num;
505
506    // Use average noise for H band
507    // TODO: we should probably have a new random vector here.
508    // Reject LF noise
509    u[0][0] = 0;
510    u[1][0] = 0;
511    for (i = 1; i < PART_LEN1; i++) {
512      tmp = pi2 * rand[i - 1];
513
514      // Use average noise for H band
515      u[0][i] = noiseAvg * (float)cos(tmp);
516      u[1][i] = -noiseAvg * (float)sin(tmp);
517    }
518    u[1][PART_LEN] = 0;
519
520    for (i = 0; i < PART_LEN1; i++) {
521      // Use average NLP weight for H band
522      comfortNoiseHband[0][i] = tmpAvg * u[0][i];
523      comfortNoiseHband[1][i] = tmpAvg * u[1][i];
524    }
525  } else {
526    memset(comfortNoiseHband, 0,
527           2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
528  }
529}
530
531static void InitLevel(PowerLevel* level) {
532  const float kBigFloat = 1E17f;
533
534  level->averagelevel = 0;
535  level->framelevel = 0;
536  level->minlevel = kBigFloat;
537  level->frsum = 0;
538  level->sfrsum = 0;
539  level->frcounter = 0;
540  level->sfrcounter = 0;
541}
542
543static void InitStats(Stats* stats) {
544  stats->instant = kOffsetLevel;
545  stats->average = kOffsetLevel;
546  stats->max = kOffsetLevel;
547  stats->min = kOffsetLevel * (-1);
548  stats->sum = 0;
549  stats->hisum = 0;
550  stats->himean = kOffsetLevel;
551  stats->counter = 0;
552  stats->hicounter = 0;
553}
554
555static void InitMetrics(AecCore* self) {
556  self->stateCounter = 0;
557  InitLevel(&self->farlevel);
558  InitLevel(&self->nearlevel);
559  InitLevel(&self->linoutlevel);
560  InitLevel(&self->nlpoutlevel);
561
562  InitStats(&self->erl);
563  InitStats(&self->erle);
564  InitStats(&self->aNlp);
565  InitStats(&self->rerl);
566}
567
568static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {
569  // Do the energy calculation in the frequency domain. The FFT is performed on
570  // a segment of PART_LEN2 samples due to overlap, but we only want the energy
571  // of half that data (the last PART_LEN samples). Parseval's relation states
572  // that the energy is preserved according to
573  //
574  // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
575  //                           = ENERGY,
576  //
577  // where N = PART_LEN2. Since we are only interested in calculating the energy
578  // for the last PART_LEN samples we approximate by calculating ENERGY and
579  // divide by 2,
580  //
581  // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
582  //
583  // Since we deal with real valued time domain signals we only store frequency
584  // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
585  // need to add the contribution from the missing part in
586  // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
587  // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
588  // is the values in the for loop below, but multiplication by 2 and division
589  // by 2 cancel.
590
591  // TODO(bjornv): Investigate reusing energy calculations performed at other
592  // places in the code.
593  int k = 1;
594  // Imaginary parts are zero at end points and left out of the calculation.
595  float energy = (in[0][0] * in[0][0]) / 2;
596  energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
597
598  for (k = 1; k < PART_LEN; k++) {
599    energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
600  }
601  energy /= PART_LEN2;
602
603  level->sfrsum += energy;
604  level->sfrcounter++;
605
606  if (level->sfrcounter > subCountLen) {
607    level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
608    level->sfrsum = 0;
609    level->sfrcounter = 0;
610    if (level->framelevel > 0) {
611      if (level->framelevel < level->minlevel) {
612        level->minlevel = level->framelevel;  // New minimum.
613      } else {
614        level->minlevel *= (1 + 0.001f);  // Small increase.
615      }
616    }
617    level->frcounter++;
618    level->frsum += level->framelevel;
619    if (level->frcounter > countLen) {
620      level->averagelevel = level->frsum / countLen;
621      level->frsum = 0;
622      level->frcounter = 0;
623    }
624  }
625}
626
627static void UpdateMetrics(AecCore* aec) {
628  float dtmp, dtmp2;
629
630  const float actThresholdNoisy = 8.0f;
631  const float actThresholdClean = 40.0f;
632  const float safety = 0.99995f;
633  const float noisyPower = 300000.0f;
634
635  float actThreshold;
636  float echo, suppressedEcho;
637
638  if (aec->echoState) {  // Check if echo is likely present
639    aec->stateCounter++;
640  }
641
642  if (aec->farlevel.frcounter == 0) {
643
644    if (aec->farlevel.minlevel < noisyPower) {
645      actThreshold = actThresholdClean;
646    } else {
647      actThreshold = actThresholdNoisy;
648    }
649
650    if ((aec->stateCounter > (0.5f * countLen * subCountLen)) &&
651        (aec->farlevel.sfrcounter == 0)
652
653        // Estimate in active far-end segments only
654        &&
655        (aec->farlevel.averagelevel >
656         (actThreshold * aec->farlevel.minlevel))) {
657
658      // Subtract noise power
659      echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
660
661      // ERL
662      dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
663                                   aec->nearlevel.averagelevel +
664                               1e-10f);
665      dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
666
667      aec->erl.instant = dtmp;
668      if (dtmp > aec->erl.max) {
669        aec->erl.max = dtmp;
670      }
671
672      if (dtmp < aec->erl.min) {
673        aec->erl.min = dtmp;
674      }
675
676      aec->erl.counter++;
677      aec->erl.sum += dtmp;
678      aec->erl.average = aec->erl.sum / aec->erl.counter;
679
680      // Upper mean
681      if (dtmp > aec->erl.average) {
682        aec->erl.hicounter++;
683        aec->erl.hisum += dtmp;
684        aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
685      }
686
687      // A_NLP
688      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
689                                   (2 * aec->linoutlevel.averagelevel) +
690                               1e-10f);
691
692      // subtract noise power
693      suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
694                            safety * aec->linoutlevel.minlevel);
695
696      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
697
698      aec->aNlp.instant = dtmp2;
699      if (dtmp > aec->aNlp.max) {
700        aec->aNlp.max = dtmp;
701      }
702
703      if (dtmp < aec->aNlp.min) {
704        aec->aNlp.min = dtmp;
705      }
706
707      aec->aNlp.counter++;
708      aec->aNlp.sum += dtmp;
709      aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
710
711      // Upper mean
712      if (dtmp > aec->aNlp.average) {
713        aec->aNlp.hicounter++;
714        aec->aNlp.hisum += dtmp;
715        aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
716      }
717
718      // ERLE
719
720      // subtract noise power
721      suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
722                            safety * aec->nlpoutlevel.minlevel);
723
724      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
725                                   (2 * aec->nlpoutlevel.averagelevel) +
726                               1e-10f);
727      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
728
729      dtmp = dtmp2;
730      aec->erle.instant = dtmp;
731      if (dtmp > aec->erle.max) {
732        aec->erle.max = dtmp;
733      }
734
735      if (dtmp < aec->erle.min) {
736        aec->erle.min = dtmp;
737      }
738
739      aec->erle.counter++;
740      aec->erle.sum += dtmp;
741      aec->erle.average = aec->erle.sum / aec->erle.counter;
742
743      // Upper mean
744      if (dtmp > aec->erle.average) {
745        aec->erle.hicounter++;
746        aec->erle.hisum += dtmp;
747        aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
748      }
749    }
750
751    aec->stateCounter = 0;
752  }
753}
754
755static void UpdateDelayMetrics(AecCore* self) {
756  int i = 0;
757  int delay_values = 0;
758  int median = 0;
759  int lookahead = WebRtc_lookahead(self->delay_estimator);
760  const int kMsPerBlock = PART_LEN / (self->mult * 8);
761  int64_t l1_norm = 0;
762
763  if (self->num_delay_values == 0) {
764    // We have no new delay value data. Even though -1 is a valid |median| in
765    // the sense that we allow negative values, it will practically never be
766    // used since multiples of |kMsPerBlock| will always be returned.
767    // We therefore use -1 to indicate in the logs that the delay estimator was
768    // not able to estimate the delay.
769    self->delay_median = -1;
770    self->delay_std = -1;
771    self->fraction_poor_delays = -1;
772    return;
773  }
774
775  // Start value for median count down.
776  delay_values = self->num_delay_values >> 1;
777  // Get median of delay values since last update.
778  for (i = 0; i < kHistorySizeBlocks; i++) {
779    delay_values -= self->delay_histogram[i];
780    if (delay_values < 0) {
781      median = i;
782      break;
783    }
784  }
785  // Account for lookahead.
786  self->delay_median = (median - lookahead) * kMsPerBlock;
787
788  // Calculate the L1 norm, with median value as central moment.
789  for (i = 0; i < kHistorySizeBlocks; i++) {
790    l1_norm += abs(i - median) * self->delay_histogram[i];
791  }
792  self->delay_std = (int)((l1_norm + self->num_delay_values / 2) /
793      self->num_delay_values) * kMsPerBlock;
794
795  // Determine fraction of delays that are out of bounds, that is, either
796  // negative (anti-causal system) or larger than the AEC filter length.
797  {
798    int num_delays_out_of_bounds = self->num_delay_values;
799    const int histogram_length = sizeof(self->delay_histogram) /
800      sizeof(self->delay_histogram[0]);
801    for (i = lookahead; i < lookahead + self->num_partitions; ++i) {
802      if (i < histogram_length)
803        num_delays_out_of_bounds -= self->delay_histogram[i];
804    }
805    self->fraction_poor_delays = (float)num_delays_out_of_bounds /
806        self->num_delay_values;
807  }
808
809  // Reset histogram.
810  memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
811  self->num_delay_values = 0;
812
813  return;
814}
815
816static void ScaledInverseFft(float freq_data[2][PART_LEN1],
817                             float time_data[PART_LEN2],
818                             float scale,
819                             int conjugate) {
820  int i;
821  const float normalization = scale / ((float)PART_LEN2);
822  const float sign = (conjugate ? -1 : 1);
823  time_data[0] = freq_data[0][0] * normalization;
824  time_data[1] = freq_data[0][PART_LEN] * normalization;
825  for (i = 1; i < PART_LEN; i++) {
826    time_data[2 * i] = freq_data[0][i] * normalization;
827    time_data[2 * i + 1] = sign * freq_data[1][i] * normalization;
828  }
829  aec_rdft_inverse_128(time_data);
830}
831
832
833static void Fft(float time_data[PART_LEN2],
834                float freq_data[2][PART_LEN1]) {
835  int i;
836  aec_rdft_forward_128(time_data);
837
838  // Reorder fft output data.
839  freq_data[1][0] = 0;
840  freq_data[1][PART_LEN] = 0;
841  freq_data[0][0] = time_data[0];
842  freq_data[0][PART_LEN] = time_data[1];
843  for (i = 1; i < PART_LEN; i++) {
844    freq_data[0][i] = time_data[2 * i];
845    freq_data[1][i] = time_data[2 * i + 1];
846  }
847}
848
849
850static int SignalBasedDelayCorrection(AecCore* self) {
851  int delay_correction = 0;
852  int last_delay = -2;
853  assert(self != NULL);
854#if !defined(WEBRTC_ANDROID)
855  // On desktops, turn on correction after |kDelayCorrectionStart| frames.  This
856  // is to let the delay estimation get a chance to converge.  Also, if the
857  // playout audio volume is low (or even muted) the delay estimation can return
858  // a very large delay, which will break the AEC if it is applied.
859  if (self->frame_count < kDelayCorrectionStart) {
860    return 0;
861  }
862#endif
863
864  // 1. Check for non-negative delay estimate.  Note that the estimates we get
865  //    from the delay estimation are not compensated for lookahead.  Hence, a
866  //    negative |last_delay| is an invalid one.
867  // 2. Verify that there is a delay change.  In addition, only allow a change
868  //    if the delay is outside a certain region taking the AEC filter length
869  //    into account.
870  // TODO(bjornv): Investigate if we can remove the non-zero delay change check.
871  // 3. Only allow delay correction if the delay estimation quality exceeds
872  //    |delay_quality_threshold|.
873  // 4. Finally, verify that the proposed |delay_correction| is feasible by
874  //    comparing with the size of the far-end buffer.
875  last_delay = WebRtc_last_delay(self->delay_estimator);
876  if ((last_delay >= 0) &&
877      (last_delay != self->previous_delay) &&
878      (WebRtc_last_delay_quality(self->delay_estimator) >
879           self->delay_quality_threshold)) {
880    int delay = last_delay - WebRtc_lookahead(self->delay_estimator);
881    // Allow for a slack in the actual delay, defined by a |lower_bound| and an
882    // |upper_bound|.  The adaptive echo cancellation filter is currently
883    // |num_partitions| (of 64 samples) long.  If the delay estimate is negative
884    // or at least 3/4 of the filter length we open up for correction.
885    const int lower_bound = 0;
886    const int upper_bound = self->num_partitions * 3 / 4;
887    const int do_correction = delay <= lower_bound || delay > upper_bound;
888    if (do_correction == 1) {
889      int available_read = (int)WebRtc_available_read(self->far_time_buf);
890      // With |shift_offset| we gradually rely on the delay estimates.  For
891      // positive delays we reduce the correction by |shift_offset| to lower the
892      // risk of pushing the AEC into a non causal state.  For negative delays
893      // we rely on the values up to a rounding error, hence compensate by 1
894      // element to make sure to push the delay into the causal region.
895      delay_correction = -delay;
896      delay_correction += delay > self->shift_offset ? self->shift_offset : 1;
897      self->shift_offset--;
898      self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset);
899      if (delay_correction > available_read - self->mult - 1) {
900        // There is not enough data in the buffer to perform this shift.  Hence,
901        // we do not rely on the delay estimate and do nothing.
902        delay_correction = 0;
903      } else {
904        self->previous_delay = last_delay;
905        ++self->delay_correction_count;
906      }
907    }
908  }
909  // Update the |delay_quality_threshold| once we have our first delay
910  // correction.
911  if (self->delay_correction_count > 0) {
912    float delay_quality = WebRtc_last_delay_quality(self->delay_estimator);
913    delay_quality = (delay_quality > kDelayQualityThresholdMax ?
914        kDelayQualityThresholdMax : delay_quality);
915    self->delay_quality_threshold =
916        (delay_quality > self->delay_quality_threshold ? delay_quality :
917            self->delay_quality_threshold);
918  }
919  return delay_correction;
920}
921
922static void EchoSubtraction(
923    AecCore* aec,
924    int num_partitions,
925    int x_fft_buf_block_pos,
926    int metrics_mode,
927    int extended_filter_enabled,
928    float normal_mu,
929    float normal_error_threshold,
930    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
931    float* const y,
932    float x_pow[PART_LEN1],
933    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
934    PowerLevel* linout_level,
935    float echo_subtractor_output[PART_LEN]) {
936  float s_fft[2][PART_LEN1];
937  float e_extended[PART_LEN2];
938  float s_extended[PART_LEN2];
939  float *s;
940  float e[PART_LEN];
941  float e_fft[2][PART_LEN1];
942  int i;
943  memset(s_fft, 0, sizeof(s_fft));
944
945  // Conditionally reset the echo subtraction filter if the filter has diverged
946  // significantly.
947  if (!aec->extended_filter_enabled &&
948      aec->extreme_filter_divergence) {
949    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
950    aec->extreme_filter_divergence = 0;
951  }
952
953  // Produce echo estimate s_fft.
954  WebRtcAec_FilterFar(num_partitions,
955                      x_fft_buf_block_pos,
956                      x_fft_buf,
957                      h_fft_buf,
958                      s_fft);
959
960  // Compute the time-domain echo estimate s.
961  ScaledInverseFft(s_fft, s_extended, 2.0f, 0);
962  s = &s_extended[PART_LEN];
963
964  // Compute the time-domain echo prediction error.
965  for (i = 0; i < PART_LEN; ++i) {
966    e[i] = y[i] - s[i];
967  }
968
969  // Compute the frequency domain echo prediction error.
970  memset(e_extended, 0, sizeof(float) * PART_LEN);
971  memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);
972  Fft(e_extended, e_fft);
973
974  RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
975                          &e_fft[0][0],
976                          sizeof(e_fft[0][0]) * PART_LEN1 * 2);
977
978  if (metrics_mode == 1) {
979    // Note that the first PART_LEN samples in fft (before transformation) are
980    // zero. Hence, the scaling by two in UpdateLevel() should not be
981    // performed. That scaling is taken care of in UpdateMetrics() instead.
982    UpdateLevel(linout_level, e_fft);
983  }
984
985  // Scale error signal inversely with far power.
986  WebRtcAec_ScaleErrorSignal(extended_filter_enabled,
987                             normal_mu,
988                             normal_error_threshold,
989                             x_pow,
990                             e_fft);
991  WebRtcAec_FilterAdaptation(num_partitions,
992                             x_fft_buf_block_pos,
993                             x_fft_buf,
994                             e_fft,
995                             h_fft_buf);
996  memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN);
997}
998
999
1000static void EchoSuppression(AecCore* aec,
1001                            float farend[PART_LEN2],
1002                            float* echo_subtractor_output,
1003                            float* output,
1004                            float* const* outputH) {
1005  float efw[2][PART_LEN1];
1006  float xfw[2][PART_LEN1];
1007  float dfw[2][PART_LEN1];
1008  float comfortNoiseHband[2][PART_LEN1];
1009  float fft[PART_LEN2];
1010  float nlpGainHband;
1011  int i;
1012  size_t j;
1013
1014  // Coherence and non-linear filter
1015  float cohde[PART_LEN1], cohxd[PART_LEN1];
1016  float hNlDeAvg, hNlXdAvg;
1017  float hNl[PART_LEN1];
1018  float hNlPref[kPrefBandSize];
1019  float hNlFb = 0, hNlFbLow = 0;
1020  const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
1021  const int prefBandSize = kPrefBandSize / aec->mult;
1022  const int minPrefBand = 4 / aec->mult;
1023  // Power estimate smoothing coefficients.
1024  const float* min_overdrive = aec->extended_filter_enabled
1025                                   ? kExtendedMinOverDrive
1026                                   : kNormalMinOverDrive;
1027
1028  // Filter energy
1029  const int delayEstInterval = 10 * aec->mult;
1030
1031  float* xfw_ptr = NULL;
1032
1033  // Update eBuf with echo subtractor output.
1034  memcpy(aec->eBuf + PART_LEN,
1035         echo_subtractor_output,
1036         sizeof(float) * PART_LEN);
1037
1038  // Analysis filter banks for the echo suppressor.
1039  // Windowed near-end ffts.
1040  WindowData(fft, aec->dBuf);
1041  aec_rdft_forward_128(fft);
1042  StoreAsComplex(fft, dfw);
1043
1044  // Windowed echo suppressor output ffts.
1045  WindowData(fft, aec->eBuf);
1046  aec_rdft_forward_128(fft);
1047  StoreAsComplex(fft, efw);
1048
1049  // NLP
1050
1051  // Convert far-end partition to the frequency domain with windowing.
1052  WindowData(fft, farend);
1053  Fft(fft, xfw);
1054  xfw_ptr = &xfw[0][0];
1055
1056  // Buffer far.
1057  memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
1058
1059  aec->delayEstCtr++;
1060  if (aec->delayEstCtr == delayEstInterval) {
1061    aec->delayEstCtr = 0;
1062    aec->delayIdx = WebRtcAec_PartitionDelay(aec);
1063  }
1064
1065  // Use delayed far.
1066  memcpy(xfw,
1067         aec->xfwBuf + aec->delayIdx * PART_LEN1,
1068         sizeof(xfw[0][0]) * 2 * PART_LEN1);
1069
1070  WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,
1071                             &aec->extreme_filter_divergence);
1072
1073  // Select the microphone signal as output if the filter is deemed to have
1074  // diverged.
1075  if (aec->divergeState) {
1076    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
1077  }
1078
1079  hNlXdAvg = 0;
1080  for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1081    hNlXdAvg += cohxd[i];
1082  }
1083  hNlXdAvg /= prefBandSize;
1084  hNlXdAvg = 1 - hNlXdAvg;
1085
1086  hNlDeAvg = 0;
1087  for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1088    hNlDeAvg += cohde[i];
1089  }
1090  hNlDeAvg /= prefBandSize;
1091
1092  if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
1093    aec->hNlXdAvgMin = hNlXdAvg;
1094  }
1095
1096  if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
1097    aec->stNearState = 1;
1098  } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
1099    aec->stNearState = 0;
1100  }
1101
1102  if (aec->hNlXdAvgMin == 1) {
1103    aec->echoState = 0;
1104    aec->overDrive = min_overdrive[aec->nlp_mode];
1105
1106    if (aec->stNearState == 1) {
1107      memcpy(hNl, cohde, sizeof(hNl));
1108      hNlFb = hNlDeAvg;
1109      hNlFbLow = hNlDeAvg;
1110    } else {
1111      for (i = 0; i < PART_LEN1; i++) {
1112        hNl[i] = 1 - cohxd[i];
1113      }
1114      hNlFb = hNlXdAvg;
1115      hNlFbLow = hNlXdAvg;
1116    }
1117  } else {
1118
1119    if (aec->stNearState == 1) {
1120      aec->echoState = 0;
1121      memcpy(hNl, cohde, sizeof(hNl));
1122      hNlFb = hNlDeAvg;
1123      hNlFbLow = hNlDeAvg;
1124    } else {
1125      aec->echoState = 1;
1126      for (i = 0; i < PART_LEN1; i++) {
1127        hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
1128      }
1129
1130      // Select an order statistic from the preferred bands.
1131      // TODO: Using quicksort now, but a selection algorithm may be preferred.
1132      memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
1133      qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
1134      hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
1135      hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
1136    }
1137  }
1138
1139  // Track the local filter minimum to determine suppression overdrive.
1140  if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
1141    aec->hNlFbLocalMin = hNlFbLow;
1142    aec->hNlFbMin = hNlFbLow;
1143    aec->hNlNewMin = 1;
1144    aec->hNlMinCtr = 0;
1145  }
1146  aec->hNlFbLocalMin =
1147      WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
1148  aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
1149
1150  if (aec->hNlNewMin == 1) {
1151    aec->hNlMinCtr++;
1152  }
1153  if (aec->hNlMinCtr == 2) {
1154    aec->hNlNewMin = 0;
1155    aec->hNlMinCtr = 0;
1156    aec->overDrive =
1157        WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
1158                           ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f),
1159                       min_overdrive[aec->nlp_mode]);
1160  }
1161
1162  // Smooth the overdrive.
1163  if (aec->overDrive < aec->overDriveSm) {
1164    aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
1165  } else {
1166    aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
1167  }
1168
1169  WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
1170
1171  // Add comfort noise.
1172  WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
1173
1174  // TODO(bjornv): Investigate how to take the windowing below into account if
1175  // needed.
1176  if (aec->metricsMode == 1) {
1177    // Note that we have a scaling by two in the time domain |eBuf|.
1178    // In addition the time domain signal is windowed before transformation,
1179    // losing half the energy on the average. We take care of the first
1180    // scaling only in UpdateMetrics().
1181    UpdateLevel(&aec->nlpoutlevel, efw);
1182  }
1183
1184  // Inverse error fft.
1185  ScaledInverseFft(efw, fft, 2.0f, 1);
1186
1187  // Overlap and add to obtain output.
1188  for (i = 0; i < PART_LEN; i++) {
1189    output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +
1190                 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);
1191
1192    // Saturate output to keep it in the allowed range.
1193    output[i] = WEBRTC_SPL_SAT(
1194        WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);
1195  }
1196  memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));
1197
1198  // For H band
1199  if (aec->num_bands > 1) {
1200    // H band gain
1201    // average nlp over low band: average over second half of freq spectrum
1202    // (4->8khz)
1203    GetHighbandGain(hNl, &nlpGainHband);
1204
1205    // Inverse comfort_noise
1206    ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0);
1207
1208    // compute gain factor
1209    for (j = 0; j < aec->num_bands - 1; ++j) {
1210      for (i = 0; i < PART_LEN; i++) {
1211        outputH[j][i] = aec->dBufH[j][i] * nlpGainHband;
1212      }
1213    }
1214
1215    // Add some comfort noise where Hband is attenuated.
1216    for (i = 0; i < PART_LEN; i++) {
1217      outputH[0][i] += cnScaleHband * fft[i];
1218    }
1219
1220    // Saturate output to keep it in the allowed range.
1221    for (j = 0; j < aec->num_bands - 1; ++j) {
1222      for (i = 0; i < PART_LEN; i++) {
1223        outputH[j][i] = WEBRTC_SPL_SAT(
1224            WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN);
1225      }
1226    }
1227
1228  }
1229
1230  // Copy the current block to the old position.
1231  memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
1232  memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
1233
1234  // Copy the current block to the old position for H band
1235  for (j = 0; j < aec->num_bands - 1; ++j) {
1236    memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN);
1237  }
1238
1239  memmove(aec->xfwBuf + PART_LEN1,
1240          aec->xfwBuf,
1241          sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
1242}
1243
1244static void ProcessBlock(AecCore* aec) {
1245  size_t i;
1246
1247  float fft[PART_LEN2];
1248  float xf[2][PART_LEN1];
1249  float df[2][PART_LEN1];
1250  float far_spectrum = 0.0f;
1251  float near_spectrum = 0.0f;
1252  float abs_far_spectrum[PART_LEN1];
1253  float abs_near_spectrum[PART_LEN1];
1254
1255  const float gPow[2] = {0.9f, 0.1f};
1256
1257  // Noise estimate constants.
1258  const int noiseInitBlocks = 500 * aec->mult;
1259  const float step = 0.1f;
1260  const float ramp = 1.0002f;
1261  const float gInitNoise[2] = {0.999f, 0.001f};
1262
1263  float nearend[PART_LEN];
1264  float* nearend_ptr = NULL;
1265  float farend[PART_LEN2];
1266  float* farend_ptr = NULL;
1267  float echo_subtractor_output[PART_LEN];
1268  float output[PART_LEN];
1269  float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
1270  float* outputH_ptr[NUM_HIGH_BANDS_MAX];
1271  float* xf_ptr = NULL;
1272
1273  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
1274    outputH_ptr[i] = outputH[i];
1275  }
1276
1277  // Concatenate old and new nearend blocks.
1278  for (i = 0; i < aec->num_bands - 1; ++i) {
1279    WebRtc_ReadBuffer(aec->nearFrBufH[i],
1280                      (void**)&nearend_ptr,
1281                      nearend,
1282                      PART_LEN);
1283    memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend));
1284  }
1285  WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
1286  memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
1287
1288  // We should always have at least one element stored in |far_buf|.
1289  assert(WebRtc_available_read(aec->far_time_buf) > 0);
1290  WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
1291
1292#ifdef WEBRTC_AEC_DEBUG_DUMP
1293  {
1294    // TODO(minyue): |farend_ptr| starts from buffered samples. This will be
1295    // modified when |aec->far_time_buf| is revised.
1296    RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN);
1297
1298    RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN);
1299  }
1300#endif
1301
1302  // Convert far-end signal to the frequency domain.
1303  memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2);
1304  Fft(fft, xf);
1305  xf_ptr = &xf[0][0];
1306
1307  // Near fft
1308  memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
1309  Fft(fft, df);
1310
1311  // Power smoothing
1312  for (i = 0; i < PART_LEN1; i++) {
1313    far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
1314                   (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
1315    aec->xPow[i] =
1316        gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
1317    // Calculate absolute spectra
1318    abs_far_spectrum[i] = sqrtf(far_spectrum);
1319
1320    near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
1321    aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
1322    // Calculate absolute spectra
1323    abs_near_spectrum[i] = sqrtf(near_spectrum);
1324  }
1325
1326  // Estimate noise power. Wait until dPow is more stable.
1327  if (aec->noiseEstCtr > 50) {
1328    for (i = 0; i < PART_LEN1; i++) {
1329      if (aec->dPow[i] < aec->dMinPow[i]) {
1330        aec->dMinPow[i] =
1331            (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp;
1332      } else {
1333        aec->dMinPow[i] *= ramp;
1334      }
1335    }
1336  }
1337
1338  // Smooth increasing noise power from zero at the start,
1339  // to avoid a sudden burst of comfort noise.
1340  if (aec->noiseEstCtr < noiseInitBlocks) {
1341    aec->noiseEstCtr++;
1342    for (i = 0; i < PART_LEN1; i++) {
1343      if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
1344        aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
1345                              gInitNoise[1] * aec->dMinPow[i];
1346      } else {
1347        aec->dInitMinPow[i] = aec->dMinPow[i];
1348      }
1349    }
1350    aec->noisePow = aec->dInitMinPow;
1351  } else {
1352    aec->noisePow = aec->dMinPow;
1353  }
1354
1355  // Block wise delay estimation used for logging
1356  if (aec->delay_logging_enabled) {
1357    if (WebRtc_AddFarSpectrumFloat(
1358            aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) {
1359      int delay_estimate = WebRtc_DelayEstimatorProcessFloat(
1360          aec->delay_estimator, abs_near_spectrum, PART_LEN1);
1361      if (delay_estimate >= 0) {
1362        // Update delay estimate buffer.
1363        aec->delay_histogram[delay_estimate]++;
1364        aec->num_delay_values++;
1365      }
1366      if (aec->delay_metrics_delivered == 1 &&
1367          aec->num_delay_values >= kDelayMetricsAggregationWindow) {
1368        UpdateDelayMetrics(aec);
1369      }
1370    }
1371  }
1372
1373  // Update the xfBuf block position.
1374  aec->xfBufBlockPos--;
1375  if (aec->xfBufBlockPos == -1) {
1376    aec->xfBufBlockPos = aec->num_partitions - 1;
1377  }
1378
1379  // Buffer xf
1380  memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1,
1381         xf_ptr,
1382         sizeof(float) * PART_LEN1);
1383  memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1,
1384         &xf_ptr[PART_LEN1],
1385         sizeof(float) * PART_LEN1);
1386
1387  // Perform echo subtraction.
1388  EchoSubtraction(aec,
1389                  aec->num_partitions,
1390                  aec->xfBufBlockPos,
1391                  aec->metricsMode,
1392                  aec->extended_filter_enabled,
1393                  aec->normal_mu,
1394                  aec->normal_error_threshold,
1395                  aec->xfBuf,
1396                  nearend_ptr,
1397                  aec->xPow,
1398                  aec->wfBuf,
1399                  &aec->linoutlevel,
1400                  echo_subtractor_output);
1401
1402  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);
1403
1404  // Perform echo suppression.
1405  EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);
1406
1407  if (aec->metricsMode == 1) {
1408    // Update power levels and echo metrics
1409    UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
1410    UpdateLevel(&aec->nearlevel, df);
1411    UpdateMetrics(aec);
1412  }
1413
1414  // Store the output block.
1415  WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
1416  // For high bands
1417  for (i = 0; i < aec->num_bands - 1; ++i) {
1418    WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
1419  }
1420
1421  RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
1422}
1423
1424AecCore* WebRtcAec_CreateAec() {
1425  int i;
1426  AecCore* aec = malloc(sizeof(AecCore));
1427  if (!aec) {
1428    return NULL;
1429  }
1430
1431  aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
1432  if (!aec->nearFrBuf) {
1433    WebRtcAec_FreeAec(aec);
1434    return NULL;
1435  }
1436
1437  aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
1438  if (!aec->outFrBuf) {
1439    WebRtcAec_FreeAec(aec);
1440    return NULL;
1441  }
1442
1443  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
1444    aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
1445                                             sizeof(float));
1446    if (!aec->nearFrBufH[i]) {
1447      WebRtcAec_FreeAec(aec);
1448      return NULL;
1449    }
1450    aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN,
1451                                            sizeof(float));
1452    if (!aec->outFrBufH[i]) {
1453      WebRtcAec_FreeAec(aec);
1454      return NULL;
1455    }
1456  }
1457
1458  // Create far-end buffers.
1459  // For bit exactness with legacy code, each element in |far_time_buf| is
1460  // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN|
1461  // samples from the last frame.
1462  // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples.
1463  aec->far_time_buf =
1464      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2);
1465  if (!aec->far_time_buf) {
1466    WebRtcAec_FreeAec(aec);
1467    return NULL;
1468  }
1469
1470#ifdef WEBRTC_AEC_DEBUG_DUMP
1471  aec->instance_index = webrtc_aec_instance_count;
1472
1473  aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL;
1474  aec->debug_dump_count = 0;
1475#endif
1476  aec->delay_estimator_farend =
1477      WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks);
1478  if (aec->delay_estimator_farend == NULL) {
1479    WebRtcAec_FreeAec(aec);
1480    return NULL;
1481  }
1482  // We create the delay_estimator with the same amount of maximum lookahead as
1483  // the delay history size (kHistorySizeBlocks) for symmetry reasons.
1484  aec->delay_estimator = WebRtc_CreateDelayEstimator(
1485      aec->delay_estimator_farend, kHistorySizeBlocks);
1486  if (aec->delay_estimator == NULL) {
1487    WebRtcAec_FreeAec(aec);
1488    return NULL;
1489  }
1490#ifdef WEBRTC_ANDROID
1491  aec->delay_agnostic_enabled = 1;  // DA-AEC enabled by default.
1492  // DA-AEC assumes the system is causal from the beginning and will self adjust
1493  // the lookahead when shifting is required.
1494  WebRtc_set_lookahead(aec->delay_estimator, 0);
1495#else
1496  aec->delay_agnostic_enabled = 0;
1497  WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks);
1498#endif
1499  aec->extended_filter_enabled = 0;
1500
1501  // Assembly optimization
1502  WebRtcAec_FilterFar = FilterFar;
1503  WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
1504  WebRtcAec_FilterAdaptation = FilterAdaptation;
1505  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
1506  WebRtcAec_ComfortNoise = ComfortNoise;
1507  WebRtcAec_SubbandCoherence = SubbandCoherence;
1508  WebRtcAec_StoreAsComplex = StoreAsComplex;
1509  WebRtcAec_PartitionDelay = PartitionDelay;
1510  WebRtcAec_WindowData = WindowData;
1511
1512
1513#if defined(WEBRTC_ARCH_X86_FAMILY)
1514  if (WebRtc_GetCPUInfo(kSSE2)) {
1515    WebRtcAec_InitAec_SSE2();
1516  }
1517#endif
1518
1519#if defined(MIPS_FPU_LE)
1520  WebRtcAec_InitAec_mips();
1521#endif
1522
1523#if defined(WEBRTC_HAS_NEON)
1524  WebRtcAec_InitAec_neon();
1525#elif defined(WEBRTC_DETECT_NEON)
1526  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
1527    WebRtcAec_InitAec_neon();
1528  }
1529#endif
1530
1531  aec_rdft_init();
1532
1533  return aec;
1534}
1535
1536void WebRtcAec_FreeAec(AecCore* aec) {
1537  int i;
1538  if (aec == NULL) {
1539    return;
1540  }
1541
1542  WebRtc_FreeBuffer(aec->nearFrBuf);
1543  WebRtc_FreeBuffer(aec->outFrBuf);
1544
1545  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
1546    WebRtc_FreeBuffer(aec->nearFrBufH[i]);
1547    WebRtc_FreeBuffer(aec->outFrBufH[i]);
1548  }
1549
1550  WebRtc_FreeBuffer(aec->far_time_buf);
1551
1552  RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile);
1553  RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile);
1554  RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile);
1555  RTC_AEC_DEBUG_WAV_CLOSE(aec->outLinearFile);
1556  RTC_AEC_DEBUG_RAW_CLOSE(aec->e_fft_file);
1557
1558  WebRtc_FreeDelayEstimator(aec->delay_estimator);
1559  WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend);
1560
1561  free(aec);
1562}
1563
1564int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
1565  int i;
1566
1567  aec->sampFreq = sampFreq;
1568
1569  if (sampFreq == 8000) {
1570    aec->normal_mu = 0.6f;
1571    aec->normal_error_threshold = 2e-6f;
1572    aec->num_bands = 1;
1573  } else {
1574    aec->normal_mu = 0.5f;
1575    aec->normal_error_threshold = 1.5e-6f;
1576    aec->num_bands = (size_t)(sampFreq / 16000);
1577  }
1578
1579  WebRtc_InitBuffer(aec->nearFrBuf);
1580  WebRtc_InitBuffer(aec->outFrBuf);
1581  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
1582    WebRtc_InitBuffer(aec->nearFrBufH[i]);
1583    WebRtc_InitBuffer(aec->outFrBufH[i]);
1584  }
1585
1586  // Initialize far-end buffers.
1587  WebRtc_InitBuffer(aec->far_time_buf);
1588
1589#ifdef WEBRTC_AEC_DEBUG_DUMP
1590  {
1591    int process_rate = sampFreq > 16000 ? 16000 : sampFreq;
1592    RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index,
1593                             aec->debug_dump_count, process_rate,
1594                             &aec->farFile );
1595    RTC_AEC_DEBUG_WAV_REOPEN("aec_near", aec->instance_index,
1596                             aec->debug_dump_count, process_rate,
1597                             &aec->nearFile);
1598    RTC_AEC_DEBUG_WAV_REOPEN("aec_out", aec->instance_index,
1599                             aec->debug_dump_count, process_rate,
1600                             &aec->outFile );
1601    RTC_AEC_DEBUG_WAV_REOPEN("aec_out_linear", aec->instance_index,
1602                             aec->debug_dump_count, process_rate,
1603                             &aec->outLinearFile);
1604  }
1605
1606  RTC_AEC_DEBUG_RAW_OPEN("aec_e_fft",
1607                         aec->debug_dump_count,
1608                         &aec->e_fft_file);
1609
1610  ++aec->debug_dump_count;
1611#endif
1612  aec->system_delay = 0;
1613
1614  if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) {
1615    return -1;
1616  }
1617  if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
1618    return -1;
1619  }
1620  aec->delay_logging_enabled = 0;
1621  aec->delay_metrics_delivered = 0;
1622  memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
1623  aec->num_delay_values = 0;
1624  aec->delay_median = -1;
1625  aec->delay_std = -1;
1626  aec->fraction_poor_delays = -1.0f;
1627
1628  aec->signal_delay_correction = 0;
1629  aec->previous_delay = -2;  // (-2): Uninitialized.
1630  aec->delay_correction_count = 0;
1631  aec->shift_offset = kInitialShiftOffset;
1632  aec->delay_quality_threshold = kDelayQualityThresholdMin;
1633
1634  aec->num_partitions = kNormalNumPartitions;
1635
1636  // Update the delay estimator with filter length.  We use half the
1637  // |num_partitions| to take the echo path into account.  In practice we say
1638  // that the echo has a duration of maximum half |num_partitions|, which is not
1639  // true, but serves as a crude measure.
1640  WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2);
1641  // TODO(bjornv): I currently hard coded the enable.  Once we've established
1642  // that AECM has no performance regression, robust_validation will be enabled
1643  // all the time and the APIs to turn it on/off will be removed.  Hence, remove
1644  // this line then.
1645  WebRtc_enable_robust_validation(aec->delay_estimator, 1);
1646  aec->frame_count = 0;
1647
1648  // Default target suppression mode.
1649  aec->nlp_mode = 1;
1650
1651  // Sampling frequency multiplier w.r.t. 8 kHz.
1652  // In case of multiple bands we process the lower band in 16 kHz, hence the
1653  // multiplier is always 2.
1654  if (aec->num_bands > 1) {
1655    aec->mult = 2;
1656  } else {
1657    aec->mult = (short)aec->sampFreq / 8000;
1658  }
1659
1660  aec->farBufWritePos = 0;
1661  aec->farBufReadPos = 0;
1662
1663  aec->inSamples = 0;
1664  aec->outSamples = 0;
1665  aec->knownDelay = 0;
1666
1667  // Initialize buffers
1668  memset(aec->dBuf, 0, sizeof(aec->dBuf));
1669  memset(aec->eBuf, 0, sizeof(aec->eBuf));
1670  // For H bands
1671  for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
1672    memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i]));
1673  }
1674
1675  memset(aec->xPow, 0, sizeof(aec->xPow));
1676  memset(aec->dPow, 0, sizeof(aec->dPow));
1677  memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
1678  aec->noisePow = aec->dInitMinPow;
1679  aec->noiseEstCtr = 0;
1680
1681  // Initial comfort noise power
1682  for (i = 0; i < PART_LEN1; i++) {
1683    aec->dMinPow[i] = 1.0e6f;
1684  }
1685
1686  // Holds the last block written to
1687  aec->xfBufBlockPos = 0;
1688  // TODO: Investigate need for these initializations. Deleting them doesn't
1689  //       change the output at all and yields 0.4% overall speedup.
1690  memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
1691  memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
1692  memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
1693  memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
1694  memset(
1695      aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
1696  memset(aec->se, 0, sizeof(float) * PART_LEN1);
1697
1698  // To prevent numerical instability in the first block.
1699  for (i = 0; i < PART_LEN1; i++) {
1700    aec->sd[i] = 1;
1701  }
1702  for (i = 0; i < PART_LEN1; i++) {
1703    aec->sx[i] = 1;
1704  }
1705
1706  memset(aec->hNs, 0, sizeof(aec->hNs));
1707  memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
1708
1709  aec->hNlFbMin = 1;
1710  aec->hNlFbLocalMin = 1;
1711  aec->hNlXdAvgMin = 1;
1712  aec->hNlNewMin = 0;
1713  aec->hNlMinCtr = 0;
1714  aec->overDrive = 2;
1715  aec->overDriveSm = 2;
1716  aec->delayIdx = 0;
1717  aec->stNearState = 0;
1718  aec->echoState = 0;
1719  aec->divergeState = 0;
1720
1721  aec->seed = 777;
1722  aec->delayEstCtr = 0;
1723
1724  aec->extreme_filter_divergence = 0;
1725
1726  // Metrics disabled by default
1727  aec->metricsMode = 0;
1728  InitMetrics(aec);
1729
1730  return 0;
1731}
1732
1733
1734// For bit exactness with a legacy code, |farend| is supposed to contain
1735// |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last
1736// frame.
1737// TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples.
1738void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
1739  // Check if the buffer is full, and in that case flush the oldest data.
1740  if (WebRtc_available_write(aec->far_time_buf) < 1) {
1741    WebRtcAec_MoveFarReadPtr(aec, 1);
1742  }
1743
1744  WebRtc_WriteBuffer(aec->far_time_buf, farend, 1);
1745}
1746
1747int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
1748  int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements);
1749  aec->system_delay -= elements_moved * PART_LEN;
1750  return elements_moved;
1751}
1752
1753void WebRtcAec_ProcessFrames(AecCore* aec,
1754                             const float* const* nearend,
1755                             size_t num_bands,
1756                             size_t num_samples,
1757                             int knownDelay,
1758                             float* const* out) {
1759  size_t i, j;
1760  int out_elements = 0;
1761
1762  aec->frame_count++;
1763  // For each frame the process is as follows:
1764  // 1) If the system_delay indicates on being too small for processing a
1765  //    frame we stuff the buffer with enough data for 10 ms.
1766  // 2 a) Adjust the buffer to the system delay, by moving the read pointer.
1767  //   b) Apply signal based delay correction, if we have detected poor AEC
1768  //    performance.
1769  // 3) TODO(bjornv): Investigate if we need to add this:
1770  //    If we can't move read pointer due to buffer size limitations we
1771  //    flush/stuff the buffer.
1772  // 4) Process as many partitions as possible.
1773  // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
1774  //    samples. Even though we will have data left to process (we work with
1775  //    partitions) we consider updating a whole frame, since that's the
1776  //    amount of data we input and output in audio_processing.
1777  // 6) Update the outputs.
1778
1779  // The AEC has two different delay estimation algorithms built in.  The
1780  // first relies on delay input values from the user and the amount of
1781  // shifted buffer elements is controlled by |knownDelay|.  This delay will
1782  // give a guess on how much we need to shift far-end buffers to align with
1783  // the near-end signal.  The other delay estimation algorithm uses the
1784  // far- and near-end signals to find the offset between them.  This one
1785  // (called "signal delay") is then used to fine tune the alignment, or
1786  // simply compensate for errors in the system based one.
1787  // Note that the two algorithms operate independently.  Currently, we only
1788  // allow one algorithm to be turned on.
1789
1790  assert(aec->num_bands == num_bands);
1791
1792  for (j = 0; j < num_samples; j+= FRAME_LEN) {
1793    // TODO(bjornv): Change the near-end buffer handling to be the same as for
1794    // far-end, that is, with a near_pre_buf.
1795    // Buffer the near-end frame.
1796    WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN);
1797    // For H band
1798    for (i = 1; i < num_bands; ++i) {
1799      WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN);
1800    }
1801
1802    // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
1803    // have enough far-end data for that by stuffing the buffer if the
1804    // |system_delay| indicates others.
1805    if (aec->system_delay < FRAME_LEN) {
1806      // We don't have enough data so we rewind 10 ms.
1807      WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
1808    }
1809
1810    if (!aec->delay_agnostic_enabled) {
1811      // 2 a) Compensate for a possible change in the system delay.
1812
1813      // TODO(bjornv): Investigate how we should round the delay difference;
1814      // right now we know that incoming |knownDelay| is underestimated when
1815      // it's less than |aec->knownDelay|. We therefore, round (-32) in that
1816      // direction. In the other direction, we don't have this situation, but
1817      // might flush one partition too little. This can cause non-causality,
1818      // which should be investigated. Maybe, allow for a non-symmetric
1819      // rounding, like -16.
1820      int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
1821      int moved_elements =
1822          WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
1823      aec->knownDelay -= moved_elements * PART_LEN;
1824    } else {
1825      // 2 b) Apply signal based delay correction.
1826      int move_elements = SignalBasedDelayCorrection(aec);
1827      int moved_elements =
1828          WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
1829      int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) -
1830          WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
1831      WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
1832      WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
1833                                           moved_elements);
1834      aec->signal_delay_correction += moved_elements;
1835      // If we rely on reported system delay values only, a buffer underrun here
1836      // can never occur since we've taken care of that in 1) above.  Here, we
1837      // apply signal based delay correction and can therefore end up with
1838      // buffer underruns since the delay estimation can be wrong.  We therefore
1839      // stuff the buffer with enough elements if needed.
1840      if (far_near_buffer_diff < 0) {
1841        WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff);
1842      }
1843    }
1844
1845    // 4) Process as many blocks as possible.
1846    while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
1847      ProcessBlock(aec);
1848    }
1849
1850    // 5) Update system delay with respect to the entire frame.
1851    aec->system_delay -= FRAME_LEN;
1852
1853    // 6) Update output frame.
1854    // Stuff the out buffer if we have less than a frame to output.
1855    // This should only happen for the first frame.
1856    out_elements = (int)WebRtc_available_read(aec->outFrBuf);
1857    if (out_elements < FRAME_LEN) {
1858      WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
1859      for (i = 0; i < num_bands - 1; ++i) {
1860        WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN);
1861      }
1862    }
1863    // Obtain an output frame.
1864    WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN);
1865    // For H bands.
1866    for (i = 1; i < num_bands; ++i) {
1867      WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN);
1868    }
1869  }
1870}
1871
1872int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
1873                                  float* fraction_poor_delays) {
1874  assert(self != NULL);
1875  assert(median != NULL);
1876  assert(std != NULL);
1877
1878  if (self->delay_logging_enabled == 0) {
1879    // Logging disabled.
1880    return -1;
1881  }
1882
1883  if (self->delay_metrics_delivered == 0) {
1884    UpdateDelayMetrics(self);
1885    self->delay_metrics_delivered = 1;
1886  }
1887  *median = self->delay_median;
1888  *std = self->delay_std;
1889  *fraction_poor_delays = self->fraction_poor_delays;
1890
1891  return 0;
1892}
1893
1894int WebRtcAec_echo_state(AecCore* self) { return self->echoState; }
1895
1896void WebRtcAec_GetEchoStats(AecCore* self,
1897                            Stats* erl,
1898                            Stats* erle,
1899                            Stats* a_nlp) {
1900  assert(erl != NULL);
1901  assert(erle != NULL);
1902  assert(a_nlp != NULL);
1903  *erl = self->erl;
1904  *erle = self->erle;
1905  *a_nlp = self->aNlp;
1906}
1907
1908void WebRtcAec_SetConfigCore(AecCore* self,
1909                             int nlp_mode,
1910                             int metrics_mode,
1911                             int delay_logging) {
1912  assert(nlp_mode >= 0 && nlp_mode < 3);
1913  self->nlp_mode = nlp_mode;
1914  self->metricsMode = metrics_mode;
1915  if (self->metricsMode) {
1916    InitMetrics(self);
1917  }
1918  // Turn on delay logging if it is either set explicitly or if delay agnostic
1919  // AEC is enabled (which requires delay estimates).
1920  self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled;
1921  if (self->delay_logging_enabled) {
1922    memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
1923  }
1924}
1925
1926void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) {
1927  self->delay_agnostic_enabled = enable;
1928}
1929
1930int WebRtcAec_delay_agnostic_enabled(AecCore* self) {
1931  return self->delay_agnostic_enabled;
1932}
1933
1934void WebRtcAec_enable_extended_filter(AecCore* self, int enable) {
1935  self->extended_filter_enabled = enable;
1936  self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions;
1937  // Update the delay estimator with filter length.  See InitAEC() for details.
1938  WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2);
1939}
1940
1941int WebRtcAec_extended_filter_enabled(AecCore* self) {
1942  return self->extended_filter_enabled;
1943}
1944
1945int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
1946
1947void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
1948  assert(delay >= 0);
1949  self->system_delay = delay;
1950}
1951