aec_core.c revision 5fc829200c451ac1033523a7ceaf2d9c5289c51d
1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11/*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15#include "webrtc/modules/audio_processing/aec/aec_core.h"
16
17#include <assert.h>
18#include <math.h>
19#include <stddef.h>  // size_t
20#include <stdlib.h>
21#include <string.h>
22
23#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
24#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
25#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
26#include "webrtc/modules/audio_processing/utility/ring_buffer.h"
27#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
28#include "webrtc/typedefs.h"
29
30// Buffer size (samples)
31static const size_t kBufSizePartitions = 250;  // 1 second of audio in 16 kHz.
32
33// Noise suppression
34static const int converged = 250;
35
36// Metrics
37static const int subCountLen = 4;
38static const int countLen = 50;
39
40// Quantities to control H band scaling for SWB input
41static const int flagHbandCn = 1; // flag for adding comfort noise in H band
42static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band
43// Initial bin for averaging nlp gain in low band
44static const int freqAvgIc = PART_LEN / 2;
45
46// Matlab code to produce table:
47// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
48// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
49static const float sqrtHanning[65] = {
50    0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
51    0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
52    0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
53    0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
54    0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
55    0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
56    0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
57    0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
58    0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
59    0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
60    0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
61    0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
62    0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
63    0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
64    0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
65    0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
66    0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
67    0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
68    0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
69    0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
70    0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
71    0.99969881869620f, 1.00000000000000f
72};
73
74// Matlab code to produce table:
75// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
76// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
77const float WebRtcAec_weightCurve[65] = {
78    0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
79    0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
80    0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f,
81    0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
82    0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f,
83    0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f,
84    0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f,
85    0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
86    0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f,
87    0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f,
88    0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
89};
90
91// Matlab code to produce table:
92// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
93// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
94const float WebRtcAec_overDriveCurve[65] = {
95    1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
96    1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
97    1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f,
98    1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
99    1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f,
100    1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f,
101    1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f,
102    1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
103    1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f,
104    1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f,
105    1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f
106};
107
108// Target suppression levels for nlp modes.
109// log{0.001, 0.00001, 0.00000001}
110static const float kTargetSupp[3] = { -6.9f, -11.5f, -18.4f };
111static const float kMinOverDrive[3] = { 1.0f, 2.0f, 5.0f };
112
113#ifdef WEBRTC_AEC_DEBUG_DUMP
114extern int webrtc_aec_instance_count;
115#endif
116
117// "Private" function prototypes.
118static void ProcessBlock(aec_t* aec);
119
120static void NonLinearProcessing(aec_t *aec, short *output, short *outputH);
121
122static void GetHighbandGain(const float *lambda, float *nlpGainHband);
123
124// Comfort_noise also computes noise for H band returned in comfortNoiseHband
125static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
126                                  complex_t *comfortNoiseHband,
127                                  const float *noisePow, const float *lambda);
128
129static void WebRtcAec_InitLevel(power_level_t *level);
130static void InitStats(Stats* stats);
131static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]);
132static void UpdateMetrics(aec_t *aec);
133// Convert from time domain to frequency domain. Note that |time_data| are
134// overwritten.
135static void TimeToFrequency(float time_data[PART_LEN2],
136                            float freq_data[2][PART_LEN1],
137                            int window);
138
139__inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
140{
141    return aRe * bRe - aIm * bIm;
142}
143
144__inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
145{
146    return aRe * bIm + aIm * bRe;
147}
148
149static int CmpFloat(const void *a, const void *b)
150{
151    const float *da = (const float *)a;
152    const float *db = (const float *)b;
153
154    return (*da > *db) - (*da < *db);
155}
156
157int WebRtcAec_CreateAec(aec_t **aecInst)
158{
159    aec_t *aec = malloc(sizeof(aec_t));
160    *aecInst = aec;
161    if (aec == NULL) {
162        return -1;
163    }
164
165    if (WebRtc_CreateBuffer(&aec->nearFrBuf,
166                            FRAME_LEN + PART_LEN,
167                            sizeof(int16_t)) == -1) {
168        WebRtcAec_FreeAec(aec);
169        aec = NULL;
170        return -1;
171    }
172
173    if (WebRtc_CreateBuffer(&aec->outFrBuf,
174                            FRAME_LEN + PART_LEN,
175                            sizeof(int16_t)) == -1) {
176        WebRtcAec_FreeAec(aec);
177        aec = NULL;
178        return -1;
179    }
180
181    if (WebRtc_CreateBuffer(&aec->nearFrBufH,
182                            FRAME_LEN + PART_LEN,
183                            sizeof(int16_t)) == -1) {
184        WebRtcAec_FreeAec(aec);
185        aec = NULL;
186        return -1;
187    }
188
189    if (WebRtc_CreateBuffer(&aec->outFrBufH,
190                            FRAME_LEN + PART_LEN,
191                            sizeof(int16_t)) == -1) {
192        WebRtcAec_FreeAec(aec);
193        aec = NULL;
194        return -1;
195    }
196
197    // Create far-end buffers.
198    if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions,
199                            sizeof(float) * 2 * PART_LEN1) == -1) {
200        WebRtcAec_FreeAec(aec);
201        aec = NULL;
202        return -1;
203    }
204    if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions,
205                            sizeof(float) * 2 * PART_LEN1) == -1) {
206        WebRtcAec_FreeAec(aec);
207        aec = NULL;
208        return -1;
209    }
210#ifdef WEBRTC_AEC_DEBUG_DUMP
211    if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions,
212                            sizeof(int16_t) * PART_LEN) == -1) {
213        WebRtcAec_FreeAec(aec);
214        aec = NULL;
215        return -1;
216    }
217    {
218        char filename[64];
219        sprintf(filename, "aec_far%d.pcm", webrtc_aec_instance_count);
220        aec->farFile = fopen(filename, "wb");
221        sprintf(filename, "aec_near%d.pcm", webrtc_aec_instance_count);
222        aec->nearFile = fopen(filename, "wb");
223        sprintf(filename, "aec_out%d.pcm", webrtc_aec_instance_count);
224        aec->outFile = fopen(filename, "wb");
225        sprintf(filename, "aec_out_linear%d.pcm", webrtc_aec_instance_count);
226        aec->outLinearFile = fopen(filename, "wb");
227    }
228#endif
229    aec->delay_estimator_farend =
230        WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks);
231    if (aec->delay_estimator_farend == NULL) {
232      WebRtcAec_FreeAec(aec);
233      aec = NULL;
234      return -1;
235    }
236    aec->delay_estimator =
237        WebRtc_CreateDelayEstimator(aec->delay_estimator_farend,
238                                    kLookaheadBlocks);
239    if (aec->delay_estimator == NULL) {
240      WebRtcAec_FreeAec(aec);
241      aec = NULL;
242      return -1;
243    }
244
245    return 0;
246}
247
248int WebRtcAec_FreeAec(aec_t *aec)
249{
250    if (aec == NULL) {
251        return -1;
252    }
253
254    WebRtc_FreeBuffer(aec->nearFrBuf);
255    WebRtc_FreeBuffer(aec->outFrBuf);
256
257    WebRtc_FreeBuffer(aec->nearFrBufH);
258    WebRtc_FreeBuffer(aec->outFrBufH);
259
260    WebRtc_FreeBuffer(aec->far_buf);
261    WebRtc_FreeBuffer(aec->far_buf_windowed);
262#ifdef WEBRTC_AEC_DEBUG_DUMP
263    WebRtc_FreeBuffer(aec->far_time_buf);
264    fclose(aec->farFile);
265    fclose(aec->nearFile);
266    fclose(aec->outFile);
267    fclose(aec->outLinearFile);
268#endif
269    WebRtc_FreeDelayEstimator(aec->delay_estimator);
270    WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend);
271
272    free(aec);
273    return 0;
274}
275
276static void FilterFar(aec_t *aec, float yf[2][PART_LEN1])
277{
278  int i;
279  for (i = 0; i < NR_PART; i++) {
280    int j;
281    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
282    int pos = i * PART_LEN1;
283    // Check for wrap
284    if (i + aec->xfBufBlockPos >= NR_PART) {
285      xPos -= NR_PART*(PART_LEN1);
286    }
287
288    for (j = 0; j < PART_LEN1; j++) {
289      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
290                        aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
291      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
292                        aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
293    }
294  }
295}
296
297static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
298{
299  int i;
300  float absEf;
301  for (i = 0; i < (PART_LEN1); i++) {
302    ef[0][i] /= (aec->xPow[i] + 1e-10f);
303    ef[1][i] /= (aec->xPow[i] + 1e-10f);
304    absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
305
306    if (absEf > aec->errThresh) {
307      absEf = aec->errThresh / (absEf + 1e-10f);
308      ef[0][i] *= absEf;
309      ef[1][i] *= absEf;
310    }
311
312    // Stepsize factor
313    ef[0][i] *= aec->mu;
314    ef[1][i] *= aec->mu;
315  }
316}
317
318// Time-unconstrined filter adaptation.
319// TODO(andrew): consider for a low-complexity mode.
320//static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
321//                                          float ef[2][PART_LEN1]) {
322//  int i, j;
323//  for (i = 0; i < NR_PART; i++) {
324//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
325//    int pos;
326//    // Check for wrap
327//    if (i + aec->xfBufBlockPos >= NR_PART) {
328//      xPos -= NR_PART * PART_LEN1;
329//    }
330//
331//    pos = i * PART_LEN1;
332//
333//    for (j = 0; j < PART_LEN1; j++) {
334//      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
335//                                      -aec->xfBuf[xPos + j][1],
336//                                      ef[j][0], ef[j][1]);
337//      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
338//                                      -aec->xfBuf[xPos + j][1],
339//                                      ef[j][0], ef[j][1]);
340//    }
341//  }
342//}
343
344static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
345  int i, j;
346  for (i = 0; i < NR_PART; i++) {
347    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
348    int pos;
349    // Check for wrap
350    if (i + aec->xfBufBlockPos >= NR_PART) {
351      xPos -= NR_PART * PART_LEN1;
352    }
353
354    pos = i * PART_LEN1;
355
356    for (j = 0; j < PART_LEN; j++) {
357
358      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
359                         -aec->xfBuf[1][xPos + j],
360                         ef[0][j], ef[1][j]);
361      fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
362                             -aec->xfBuf[1][xPos + j],
363                             ef[0][j], ef[1][j]);
364    }
365    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
366                   -aec->xfBuf[1][xPos + PART_LEN],
367                   ef[0][PART_LEN], ef[1][PART_LEN]);
368
369    aec_rdft_inverse_128(fft);
370    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
371
372    // fft scaling
373    {
374      float scale = 2.0f / PART_LEN2;
375      for (j = 0; j < PART_LEN; j++) {
376        fft[j] *= scale;
377      }
378    }
379    aec_rdft_forward_128(fft);
380
381    aec->wfBuf[0][pos] += fft[0];
382    aec->wfBuf[0][pos + PART_LEN] += fft[1];
383
384    for (j = 1; j < PART_LEN; j++) {
385      aec->wfBuf[0][pos + j] += fft[2 * j];
386      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
387    }
388  }
389}
390
391static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1],
392                                 const float hNlFb,
393                                 float efw[2][PART_LEN1]) {
394  int i;
395  for (i = 0; i < PART_LEN1; i++) {
396    // Weight subbands
397    if (hNl[i] > hNlFb) {
398      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
399          (1 - WebRtcAec_weightCurve[i]) * hNl[i];
400    }
401    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
402
403    // Suppress error signal
404    efw[0][i] *= hNl[i];
405    efw[1][i] *= hNl[i];
406
407    // Ooura fft returns incorrect sign on imaginary component. It matters here
408    // because we are making an additive change with comfort noise.
409    efw[1][i] *= -1;
410  }
411}
412
413WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
414WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
415WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
416WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
417
418int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
419{
420    int i;
421
422    aec->sampFreq = sampFreq;
423
424    if (sampFreq == 8000) {
425        aec->mu = 0.6f;
426        aec->errThresh = 2e-6f;
427    }
428    else {
429        aec->mu = 0.5f;
430        aec->errThresh = 1.5e-6f;
431    }
432
433    if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
434        return -1;
435    }
436
437    if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
438        return -1;
439    }
440
441    if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
442        return -1;
443    }
444
445    if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
446        return -1;
447    }
448
449    // Initialize far-end buffers.
450    if (WebRtc_InitBuffer(aec->far_buf) == -1) {
451        return -1;
452    }
453    if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
454        return -1;
455    }
456#ifdef WEBRTC_AEC_DEBUG_DUMP
457    if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
458        return -1;
459    }
460#endif
461    aec->system_delay = 0;
462
463    if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) {
464      return -1;
465    }
466    if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
467      return -1;
468    }
469    aec->delay_logging_enabled = 0;
470    memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
471
472    // Default target suppression mode.
473    aec->nlp_mode = 1;
474
475    // Sampling frequency multiplier
476    // SWB is processed as 160 frame size
477    if (aec->sampFreq == 32000) {
478      aec->mult = (short)aec->sampFreq / 16000;
479    }
480    else {
481        aec->mult = (short)aec->sampFreq / 8000;
482    }
483
484    aec->farBufWritePos = 0;
485    aec->farBufReadPos = 0;
486
487    aec->inSamples = 0;
488    aec->outSamples = 0;
489    aec->knownDelay = 0;
490
491    // Initialize buffers
492    memset(aec->dBuf, 0, sizeof(aec->dBuf));
493    memset(aec->eBuf, 0, sizeof(aec->eBuf));
494    // For H band
495    memset(aec->dBufH, 0, sizeof(aec->dBufH));
496
497    memset(aec->xPow, 0, sizeof(aec->xPow));
498    memset(aec->dPow, 0, sizeof(aec->dPow));
499    memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
500    aec->noisePow = aec->dInitMinPow;
501    aec->noiseEstCtr = 0;
502
503    // Initial comfort noise power
504    for (i = 0; i < PART_LEN1; i++) {
505        aec->dMinPow[i] = 1.0e6f;
506    }
507
508    // Holds the last block written to
509    aec->xfBufBlockPos = 0;
510    // TODO: Investigate need for these initializations. Deleting them doesn't
511    //       change the output at all and yields 0.4% overall speedup.
512    memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
513    memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
514    memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
515    memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
516    memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
517    memset(aec->se, 0, sizeof(float) * PART_LEN1);
518
519    // To prevent numerical instability in the first block.
520    for (i = 0; i < PART_LEN1; i++) {
521        aec->sd[i] = 1;
522    }
523    for (i = 0; i < PART_LEN1; i++) {
524        aec->sx[i] = 1;
525    }
526
527    memset(aec->hNs, 0, sizeof(aec->hNs));
528    memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
529
530    aec->hNlFbMin = 1;
531    aec->hNlFbLocalMin = 1;
532    aec->hNlXdAvgMin = 1;
533    aec->hNlNewMin = 0;
534    aec->hNlMinCtr = 0;
535    aec->overDrive = 2;
536    aec->overDriveSm = 2;
537    aec->delayIdx = 0;
538    aec->stNearState = 0;
539    aec->echoState = 0;
540    aec->divergeState = 0;
541
542    aec->seed = 777;
543    aec->delayEstCtr = 0;
544
545    // Metrics disabled by default
546    aec->metricsMode = 0;
547    WebRtcAec_InitMetrics(aec);
548
549    // Assembly optimization
550    WebRtcAec_FilterFar = FilterFar;
551    WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
552    WebRtcAec_FilterAdaptation = FilterAdaptation;
553    WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
554
555#if defined(WEBRTC_ARCH_X86_FAMILY)
556    if (WebRtc_GetCPUInfo(kSSE2)) {
557      WebRtcAec_InitAec_SSE2();
558    }
559#endif
560
561    aec_rdft_init();
562
563    return 0;
564}
565
566void WebRtcAec_InitMetrics(aec_t *aec)
567{
568    aec->stateCounter = 0;
569    WebRtcAec_InitLevel(&aec->farlevel);
570    WebRtcAec_InitLevel(&aec->nearlevel);
571    WebRtcAec_InitLevel(&aec->linoutlevel);
572    WebRtcAec_InitLevel(&aec->nlpoutlevel);
573
574    InitStats(&aec->erl);
575    InitStats(&aec->erle);
576    InitStats(&aec->aNlp);
577    InitStats(&aec->rerl);
578}
579
580void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) {
581  float fft[PART_LEN2];
582  float xf[2][PART_LEN1];
583
584  // Check if the buffer is full, and in that case flush the oldest data.
585  if (WebRtc_available_write(aec->far_buf) < 1) {
586    WebRtcAec_MoveFarReadPtr(aec, 1);
587  }
588  // Convert far-end partition to the frequency domain without windowing.
589  memcpy(fft, farend, sizeof(float) * PART_LEN2);
590  TimeToFrequency(fft, xf, 0);
591  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
592
593  // Convert far-end partition to the frequency domain with windowing.
594  memcpy(fft, farend, sizeof(float) * PART_LEN2);
595  TimeToFrequency(fft, xf, 1);
596  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
597}
598
599int WebRtcAec_MoveFarReadPtr(aec_t *aec, int elements) {
600  int elements_moved = WebRtc_MoveReadPtr(aec->far_buf_windowed, elements);
601  WebRtc_MoveReadPtr(aec->far_buf, elements);
602#ifdef WEBRTC_AEC_DEBUG_DUMP
603  WebRtc_MoveReadPtr(aec->far_time_buf, elements);
604#endif
605  aec->system_delay -= elements_moved * PART_LEN;
606  return elements_moved;
607}
608
609void WebRtcAec_ProcessFrame(aec_t *aec,
610                            const short *nearend,
611                            const short *nearendH,
612                            int knownDelay)
613{
614    // For each frame the process is as follows:
615    // 1) If the system_delay indicates on being too small for processing a
616    //    frame we stuff the buffer with enough data for 10 ms.
617    // 2) Adjust the buffer to the system delay, by moving the read pointer.
618    // 3) If we can't move read pointer due to buffer size limitations we
619    //    flush/stuff the buffer.
620    // 4) Process as many partitions as possible.
621    // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
622    //    samples. Even though we will have data left to process (we work with
623    //    partitions) we consider updating a whole frame, since that's the
624    //    amount of data we input and output in audio_processing.
625
626    // TODO(bjornv): Investigate how we should round the delay difference; right
627    // now we know that incoming |knownDelay| is underestimated when it's less
628    // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
629    // the other direction, we don't have this situation, but might flush one
630    // partition too little. This can cause non-causality, which should be
631    // investigated. Maybe, allow for a non-symmetric rounding, like -16.
632    int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
633    int moved_elements = 0;
634
635    // TODO(bjornv): Change the near-end buffer handling to be the same as for
636    // far-end, that is, with a near_pre_buf.
637    // Buffer the near-end frame.
638    WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
639    // For H band
640    if (aec->sampFreq == 32000) {
641        WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
642    }
643
644    // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
645    // have enough far-end data for that by stuffing the buffer if the
646    // |system_delay| indicates others.
647    if (aec->system_delay < FRAME_LEN) {
648      // We don't have enough data so we rewind 10 ms.
649      WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
650    }
651
652    // 2) Compensate for a possible change in the system delay.
653    WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
654    moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
655    aec->knownDelay -= moved_elements * PART_LEN;
656#ifdef WEBRTC_AEC_DEBUG_DUMP
657    WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
658#endif
659
660    // 4) Process as many blocks as possible.
661    while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
662        ProcessBlock(aec);
663    }
664
665    // 5) Update system delay with respect to the entire frame.
666    aec->system_delay -= FRAME_LEN;
667}
668
669int WebRtcAec_GetDelayMetricsCore(aec_t* self, int* median, int* std) {
670  int i = 0;
671  int delay_values = 0;
672  int num_delay_values = 0;
673  int my_median = 0;
674  const int kMsPerBlock = PART_LEN / (self->mult * 8);
675  float l1_norm = 0;
676
677  assert(self != NULL);
678  assert(median != NULL);
679  assert(std != NULL);
680
681  if (self->delay_logging_enabled == 0) {
682    // Logging disabled.
683    return -1;
684  }
685
686  // Get number of delay values since last update.
687  for (i = 0; i < kHistorySizeBlocks; i++) {
688    num_delay_values += self->delay_histogram[i];
689  }
690  if (num_delay_values == 0) {
691    // We have no new delay value data. Even though -1 is a valid estimate, it
692    // will practically never be used since multiples of |kMsPerBlock| will
693    // always be returned.
694    *median = -1;
695    *std = -1;
696    return 0;
697  }
698
699  delay_values = num_delay_values >> 1;  // Start value for median count down.
700  // Get median of delay values since last update.
701  for (i = 0; i < kHistorySizeBlocks; i++) {
702    delay_values -= self->delay_histogram[i];
703    if (delay_values < 0) {
704      my_median = i;
705      break;
706    }
707  }
708  // Account for lookahead.
709  *median = (my_median - kLookaheadBlocks) * kMsPerBlock;
710
711  // Calculate the L1 norm, with median value as central moment.
712  for (i = 0; i < kHistorySizeBlocks; i++) {
713    l1_norm += (float) (fabs(i - my_median) * self->delay_histogram[i]);
714  }
715  *std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock;
716
717  // Reset histogram.
718  memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
719
720  return 0;
721}
722
723int WebRtcAec_echo_state(aec_t* self) {
724  assert(self != NULL);
725  return self->echoState;
726}
727
728void WebRtcAec_GetEchoStats(aec_t* self, Stats* erl, Stats* erle,
729                            Stats* a_nlp) {
730  assert(self != NULL);
731  assert(erl != NULL);
732  assert(erle != NULL);
733  assert(a_nlp != NULL);
734  *erl = self->erl;
735  *erle = self->erle;
736  *a_nlp = self->aNlp;
737}
738
739static void ProcessBlock(aec_t* aec) {
740    int i;
741    float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
742    float scale;
743
744    float fft[PART_LEN2];
745    float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
746    float df[2][PART_LEN1];
747    float far_spectrum = 0.0f;
748    float near_spectrum = 0.0f;
749    float abs_far_spectrum[PART_LEN1];
750    float abs_near_spectrum[PART_LEN1];
751
752    const float gPow[2] = {0.9f, 0.1f};
753
754    // Noise estimate constants.
755    const int noiseInitBlocks = 500 * aec->mult;
756    const float step = 0.1f;
757    const float ramp = 1.0002f;
758    const float gInitNoise[2] = {0.999f, 0.001f};
759
760    int16_t nearend[PART_LEN];
761    int16_t* nearend_ptr = NULL;
762    int16_t output[PART_LEN];
763    int16_t outputH[PART_LEN];
764
765    float* xf_ptr = NULL;
766
767    memset(dH, 0, sizeof(dH));
768    if (aec->sampFreq == 32000) {
769      // Get the upper band first so we can reuse |nearend|.
770      WebRtc_ReadBuffer(aec->nearFrBufH,
771                        (void**) &nearend_ptr,
772                        nearend,
773                        PART_LEN);
774      for (i = 0; i < PART_LEN; i++) {
775          dH[i] = (float) (nearend_ptr[i]);
776      }
777      memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
778    }
779    WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN);
780
781    // ---------- Ooura fft ----------
782    // Concatenate old and new nearend blocks.
783    for (i = 0; i < PART_LEN; i++) {
784        d[i] = (float) (nearend_ptr[i]);
785    }
786    memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
787
788#ifdef WEBRTC_AEC_DEBUG_DUMP
789    {
790        int16_t farend[PART_LEN];
791        int16_t* farend_ptr = NULL;
792        WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1);
793        (void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
794        (void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
795    }
796#endif
797
798    // We should always have at least one element stored in |far_buf|.
799    assert(WebRtc_available_read(aec->far_buf) > 0);
800    WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1);
801
802    // Near fft
803    memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
804    TimeToFrequency(fft, df, 0);
805
806    // Power smoothing
807    for (i = 0; i < PART_LEN1; i++) {
808      far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
809          (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
810      aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
811      // Calculate absolute spectra
812      abs_far_spectrum[i] = sqrtf(far_spectrum);
813
814      near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
815      aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
816      // Calculate absolute spectra
817      abs_near_spectrum[i] = sqrtf(near_spectrum);
818    }
819
820    // Estimate noise power. Wait until dPow is more stable.
821    if (aec->noiseEstCtr > 50) {
822        for (i = 0; i < PART_LEN1; i++) {
823            if (aec->dPow[i] < aec->dMinPow[i]) {
824                aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] -
825                    aec->dPow[i])) * ramp;
826            }
827            else {
828                aec->dMinPow[i] *= ramp;
829            }
830        }
831    }
832
833    // Smooth increasing noise power from zero at the start,
834    // to avoid a sudden burst of comfort noise.
835    if (aec->noiseEstCtr < noiseInitBlocks) {
836        aec->noiseEstCtr++;
837        for (i = 0; i < PART_LEN1; i++) {
838            if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
839                aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
840                    gInitNoise[1] * aec->dMinPow[i];
841            }
842            else {
843                aec->dInitMinPow[i] = aec->dMinPow[i];
844            }
845        }
846        aec->noisePow = aec->dInitMinPow;
847    }
848    else {
849        aec->noisePow = aec->dMinPow;
850    }
851
852    // Block wise delay estimation used for logging
853    if (aec->delay_logging_enabled) {
854      int delay_estimate = 0;
855      if (WebRtc_AddFarSpectrumFloat(aec->delay_estimator_farend,
856                                     abs_far_spectrum, PART_LEN1) == 0) {
857        delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
858                                                           abs_near_spectrum,
859                                                           PART_LEN1);
860        if (delay_estimate >= 0) {
861          // Update delay estimate buffer.
862          aec->delay_histogram[delay_estimate]++;
863        }
864      }
865    }
866
867    // Update the xfBuf block position.
868    aec->xfBufBlockPos--;
869    if (aec->xfBufBlockPos == -1) {
870        aec->xfBufBlockPos = NR_PART - 1;
871    }
872
873    // Buffer xf
874    memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr,
875           sizeof(float) * PART_LEN1);
876    memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1],
877           sizeof(float) * PART_LEN1);
878
879    memset(yf, 0, sizeof(yf));
880
881    // Filter far
882    WebRtcAec_FilterFar(aec, yf);
883
884    // Inverse fft to obtain echo estimate and error.
885    fft[0] = yf[0][0];
886    fft[1] = yf[0][PART_LEN];
887    for (i = 1; i < PART_LEN; i++) {
888        fft[2 * i] = yf[0][i];
889        fft[2 * i + 1] = yf[1][i];
890    }
891    aec_rdft_inverse_128(fft);
892
893    scale = 2.0f / PART_LEN2;
894    for (i = 0; i < PART_LEN; i++) {
895        y[i] = fft[PART_LEN + i] * scale; // fft scaling
896    }
897
898    for (i = 0; i < PART_LEN; i++) {
899        e[i] = d[i] - y[i];
900    }
901
902    // Error fft
903    memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
904    memset(fft, 0, sizeof(float) * PART_LEN);
905    memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
906    // TODO(bjornv): Change to use TimeToFrequency().
907    aec_rdft_forward_128(fft);
908
909    ef[1][0] = 0;
910    ef[1][PART_LEN] = 0;
911    ef[0][0] = fft[0];
912    ef[0][PART_LEN] = fft[1];
913    for (i = 1; i < PART_LEN; i++) {
914        ef[0][i] = fft[2 * i];
915        ef[1][i] = fft[2 * i + 1];
916    }
917
918    if (aec->metricsMode == 1) {
919      // Note that the first PART_LEN samples in fft (before transformation) are
920      // zero. Hence, the scaling by two in UpdateLevel() should not be
921      // performed. That scaling is taken care of in UpdateMetrics() instead.
922      UpdateLevel(&aec->linoutlevel, ef);
923    }
924
925    // Scale error signal inversely with far power.
926    WebRtcAec_ScaleErrorSignal(aec, ef);
927    WebRtcAec_FilterAdaptation(aec, fft, ef);
928    NonLinearProcessing(aec, output, outputH);
929
930    if (aec->metricsMode == 1) {
931        // Update power levels and echo metrics
932        UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr);
933        UpdateLevel(&aec->nearlevel, df);
934        UpdateMetrics(aec);
935    }
936
937    // Store the output block.
938    WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
939    // For H band
940    if (aec->sampFreq == 32000) {
941        WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
942    }
943
944#ifdef WEBRTC_AEC_DEBUG_DUMP
945    {
946        int16_t eInt16[PART_LEN];
947        for (i = 0; i < PART_LEN; i++) {
948            eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i],
949                WEBRTC_SPL_WORD16_MIN);
950        }
951
952        (void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
953        (void)fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
954    }
955#endif
956}
957
958static void NonLinearProcessing(aec_t *aec, short *output, short *outputH)
959{
960    float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
961    complex_t comfortNoiseHband[PART_LEN1];
962    float fft[PART_LEN2];
963    float scale, dtmp;
964    float nlpGainHband;
965    int i, j, pos;
966
967    // Coherence and non-linear filter
968    float cohde[PART_LEN1], cohxd[PART_LEN1];
969    float hNlDeAvg, hNlXdAvg;
970    float hNl[PART_LEN1];
971    float hNlPref[PREF_BAND_SIZE];
972    float hNlFb = 0, hNlFbLow = 0;
973    const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
974    const int prefBandSize = PREF_BAND_SIZE / aec->mult;
975    const int minPrefBand = 4 / aec->mult;
976
977    // Near and error power sums
978    float sdSum = 0, seSum = 0;
979
980    // Power estimate smoothing coefficients
981    const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}};
982    const float *ptrGCoh = gCoh[aec->mult - 1];
983
984    // Filter energy
985    float wfEnMax = 0, wfEn = 0;
986    const int delayEstInterval = 10 * aec->mult;
987
988    float* xfw_ptr = NULL;
989
990    aec->delayEstCtr++;
991    if (aec->delayEstCtr == delayEstInterval) {
992        aec->delayEstCtr = 0;
993    }
994
995    // initialize comfort noise for H band
996    memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
997    nlpGainHband = (float)0.0;
998    dtmp = (float)0.0;
999
1000    // Measure energy in each filter partition to determine delay.
1001    // TODO: Spread by computing one partition per block?
1002    if (aec->delayEstCtr == 0) {
1003        wfEnMax = 0;
1004        aec->delayIdx = 0;
1005        for (i = 0; i < NR_PART; i++) {
1006            pos = i * PART_LEN1;
1007            wfEn = 0;
1008            for (j = 0; j < PART_LEN1; j++) {
1009                wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
1010                    aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
1011            }
1012
1013            if (wfEn > wfEnMax) {
1014                wfEnMax = wfEn;
1015                aec->delayIdx = i;
1016            }
1017        }
1018    }
1019
1020    // We should always have at least one element stored in |far_buf|.
1021    assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
1022    // NLP
1023    WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1);
1024
1025    // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
1026    // |xfwBuf|.
1027    // Buffer far.
1028    memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
1029
1030    // Use delayed far.
1031    memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
1032
1033    // Windowed near fft
1034    for (i = 0; i < PART_LEN; i++) {
1035        fft[i] = aec->dBuf[i] * sqrtHanning[i];
1036        fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
1037    }
1038    aec_rdft_forward_128(fft);
1039
1040    dfw[1][0] = 0;
1041    dfw[1][PART_LEN] = 0;
1042    dfw[0][0] = fft[0];
1043    dfw[0][PART_LEN] = fft[1];
1044    for (i = 1; i < PART_LEN; i++) {
1045        dfw[0][i] = fft[2 * i];
1046        dfw[1][i] = fft[2 * i + 1];
1047    }
1048
1049    // Windowed error fft
1050    for (i = 0; i < PART_LEN; i++) {
1051        fft[i] = aec->eBuf[i] * sqrtHanning[i];
1052        fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
1053    }
1054    aec_rdft_forward_128(fft);
1055    efw[1][0] = 0;
1056    efw[1][PART_LEN] = 0;
1057    efw[0][0] = fft[0];
1058    efw[0][PART_LEN] = fft[1];
1059    for (i = 1; i < PART_LEN; i++) {
1060        efw[0][i] = fft[2 * i];
1061        efw[1][i] = fft[2 * i + 1];
1062    }
1063
1064    // Smoothed PSD
1065    for (i = 0; i < PART_LEN1; i++) {
1066        aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] *
1067            (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
1068        aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] *
1069            (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
1070        // We threshold here to protect against the ill-effects of a zero farend.
1071        // The threshold is not arbitrarily chosen, but balances protection and
1072        // adverse interaction with the algorithm's tuning.
1073        // TODO: investigate further why this is so sensitive.
1074        aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] *
1075            WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
1076
1077        aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] *
1078            (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
1079        aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] *
1080            (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
1081
1082        aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] *
1083            (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
1084        aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] *
1085            (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
1086
1087        sdSum += aec->sd[i];
1088        seSum += aec->se[i];
1089    }
1090
1091    // Divergent filter safeguard.
1092    if (aec->divergeState == 0) {
1093        if (seSum > sdSum) {
1094            aec->divergeState = 1;
1095        }
1096    }
1097    else {
1098        if (seSum * 1.05f < sdSum) {
1099            aec->divergeState = 0;
1100        }
1101    }
1102
1103    if (aec->divergeState == 1) {
1104        memcpy(efw, dfw, sizeof(efw));
1105    }
1106
1107    // Reset if error is significantly larger than nearend (13 dB).
1108    if (seSum > (19.95f * sdSum)) {
1109        memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
1110    }
1111
1112    // Subband coherence
1113    for (i = 0; i < PART_LEN1; i++) {
1114        cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
1115            (aec->sd[i] * aec->se[i] + 1e-10f);
1116        cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
1117            (aec->sx[i] * aec->sd[i] + 1e-10f);
1118    }
1119
1120    hNlXdAvg = 0;
1121    for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1122        hNlXdAvg += cohxd[i];
1123    }
1124    hNlXdAvg /= prefBandSize;
1125    hNlXdAvg = 1 - hNlXdAvg;
1126
1127    hNlDeAvg = 0;
1128    for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1129        hNlDeAvg += cohde[i];
1130    }
1131    hNlDeAvg /= prefBandSize;
1132
1133    if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
1134        aec->hNlXdAvgMin = hNlXdAvg;
1135    }
1136
1137    if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
1138        aec->stNearState = 1;
1139    }
1140    else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
1141        aec->stNearState = 0;
1142    }
1143
1144    if (aec->hNlXdAvgMin == 1) {
1145        aec->echoState = 0;
1146        aec->overDrive = kMinOverDrive[aec->nlp_mode];
1147
1148        if (aec->stNearState == 1) {
1149            memcpy(hNl, cohde, sizeof(hNl));
1150            hNlFb = hNlDeAvg;
1151            hNlFbLow = hNlDeAvg;
1152        }
1153        else {
1154            for (i = 0; i < PART_LEN1; i++) {
1155                hNl[i] = 1 - cohxd[i];
1156            }
1157            hNlFb = hNlXdAvg;
1158            hNlFbLow = hNlXdAvg;
1159        }
1160    }
1161    else {
1162
1163        if (aec->stNearState == 1) {
1164            aec->echoState = 0;
1165            memcpy(hNl, cohde, sizeof(hNl));
1166            hNlFb = hNlDeAvg;
1167            hNlFbLow = hNlDeAvg;
1168        }
1169        else {
1170            aec->echoState = 1;
1171            for (i = 0; i < PART_LEN1; i++) {
1172                hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
1173            }
1174
1175            // Select an order statistic from the preferred bands.
1176            // TODO: Using quicksort now, but a selection algorithm may be preferred.
1177            memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
1178            qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
1179            hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
1180            hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
1181        }
1182    }
1183
1184    // Track the local filter minimum to determine suppression overdrive.
1185    if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
1186        aec->hNlFbLocalMin = hNlFbLow;
1187        aec->hNlFbMin = hNlFbLow;
1188        aec->hNlNewMin = 1;
1189        aec->hNlMinCtr = 0;
1190    }
1191    aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
1192    aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
1193
1194    if (aec->hNlNewMin == 1) {
1195        aec->hNlMinCtr++;
1196    }
1197    if (aec->hNlMinCtr == 2) {
1198        aec->hNlNewMin = 0;
1199        aec->hNlMinCtr = 0;
1200        aec->overDrive = WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] /
1201            ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f),
1202            kMinOverDrive[aec->nlp_mode]);
1203    }
1204
1205    // Smooth the overdrive.
1206    if (aec->overDrive < aec->overDriveSm) {
1207      aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
1208    }
1209    else {
1210      aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
1211    }
1212
1213    WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
1214
1215    // Add comfort noise.
1216    ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
1217
1218    // TODO(bjornv): Investigate how to take the windowing below into account if
1219    // needed.
1220    if (aec->metricsMode == 1) {
1221      // Note that we have a scaling by two in the time domain |eBuf|.
1222      // In addition the time domain signal is windowed before transformation,
1223      // losing half the energy on the average. We take care of the first
1224      // scaling only in UpdateMetrics().
1225      UpdateLevel(&aec->nlpoutlevel, efw);
1226    }
1227    // Inverse error fft.
1228    fft[0] = efw[0][0];
1229    fft[1] = efw[0][PART_LEN];
1230    for (i = 1; i < PART_LEN; i++) {
1231        fft[2*i] = efw[0][i];
1232        // Sign change required by Ooura fft.
1233        fft[2*i + 1] = -efw[1][i];
1234    }
1235    aec_rdft_inverse_128(fft);
1236
1237    // Overlap and add to obtain output.
1238    scale = 2.0f / PART_LEN2;
1239    for (i = 0; i < PART_LEN; i++) {
1240        fft[i] *= scale; // fft scaling
1241        fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i];
1242
1243        // Saturation protection
1244        output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i],
1245            WEBRTC_SPL_WORD16_MIN);
1246
1247        fft[PART_LEN + i] *= scale; // fft scaling
1248        aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
1249    }
1250
1251    // For H band
1252    if (aec->sampFreq == 32000) {
1253
1254        // H band gain
1255        // average nlp over low band: average over second half of freq spectrum
1256        // (4->8khz)
1257        GetHighbandGain(hNl, &nlpGainHband);
1258
1259        // Inverse comfort_noise
1260        if (flagHbandCn == 1) {
1261            fft[0] = comfortNoiseHband[0][0];
1262            fft[1] = comfortNoiseHband[PART_LEN][0];
1263            for (i = 1; i < PART_LEN; i++) {
1264                fft[2*i] = comfortNoiseHband[i][0];
1265                fft[2*i + 1] = comfortNoiseHband[i][1];
1266            }
1267            aec_rdft_inverse_128(fft);
1268            scale = 2.0f / PART_LEN2;
1269        }
1270
1271        // compute gain factor
1272        for (i = 0; i < PART_LEN; i++) {
1273            dtmp = (float)aec->dBufH[i];
1274            dtmp = (float)dtmp * nlpGainHband; // for variable gain
1275
1276            // add some comfort noise where Hband is attenuated
1277            if (flagHbandCn == 1) {
1278                fft[i] *= scale; // fft scaling
1279                dtmp += cnScaleHband * fft[i];
1280            }
1281
1282            // Saturation protection
1283            outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp,
1284                WEBRTC_SPL_WORD16_MIN);
1285         }
1286    }
1287
1288    // Copy the current block to the old position.
1289    memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
1290    memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
1291
1292    // Copy the current block to the old position for H band
1293    if (aec->sampFreq == 32000) {
1294        memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
1295    }
1296
1297    memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) -
1298        sizeof(complex_t) * PART_LEN1);
1299}
1300
1301static void GetHighbandGain(const float *lambda, float *nlpGainHband)
1302{
1303    int i;
1304
1305    nlpGainHband[0] = (float)0.0;
1306    for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
1307        nlpGainHband[0] += lambda[i];
1308    }
1309    nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
1310}
1311
1312static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
1313    complex_t *comfortNoiseHband, const float *noisePow, const float *lambda)
1314{
1315    int i, num;
1316    float rand[PART_LEN];
1317    float noise, noiseAvg, tmp, tmpAvg;
1318    WebRtc_Word16 randW16[PART_LEN];
1319    complex_t u[PART_LEN1];
1320
1321    const float pi2 = 6.28318530717959f;
1322
1323    // Generate a uniform random array on [0 1]
1324    WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
1325    for (i = 0; i < PART_LEN; i++) {
1326        rand[i] = ((float)randW16[i]) / 32768;
1327    }
1328
1329    // Reject LF noise
1330    u[0][0] = 0;
1331    u[0][1] = 0;
1332    for (i = 1; i < PART_LEN1; i++) {
1333        tmp = pi2 * rand[i - 1];
1334
1335        noise = sqrtf(noisePow[i]);
1336        u[i][0] = noise * cosf(tmp);
1337        u[i][1] = -noise * sinf(tmp);
1338    }
1339    u[PART_LEN][1] = 0;
1340
1341    for (i = 0; i < PART_LEN1; i++) {
1342        // This is the proper weighting to match the background noise power
1343        tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1344        //tmp = 1 - lambda[i];
1345        efw[0][i] += tmp * u[i][0];
1346        efw[1][i] += tmp * u[i][1];
1347    }
1348
1349    // For H band comfort noise
1350    // TODO: don't compute noise and "tmp" twice. Use the previous results.
1351    noiseAvg = 0.0;
1352    tmpAvg = 0.0;
1353    num = 0;
1354    if (aec->sampFreq == 32000 && flagHbandCn == 1) {
1355
1356        // average noise scale
1357        // average over second half of freq spectrum (i.e., 4->8khz)
1358        // TODO: we shouldn't need num. We know how many elements we're summing.
1359        for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1360            num++;
1361            noiseAvg += sqrtf(noisePow[i]);
1362        }
1363        noiseAvg /= (float)num;
1364
1365        // average nlp scale
1366        // average over second half of freq spectrum (i.e., 4->8khz)
1367        // TODO: we shouldn't need num. We know how many elements we're summing.
1368        num = 0;
1369        for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1370            num++;
1371            tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1372        }
1373        tmpAvg /= (float)num;
1374
1375        // Use average noise for H band
1376        // TODO: we should probably have a new random vector here.
1377        // Reject LF noise
1378        u[0][0] = 0;
1379        u[0][1] = 0;
1380        for (i = 1; i < PART_LEN1; i++) {
1381            tmp = pi2 * rand[i - 1];
1382
1383            // Use average noise for H band
1384            u[i][0] = noiseAvg * (float)cos(tmp);
1385            u[i][1] = -noiseAvg * (float)sin(tmp);
1386        }
1387        u[PART_LEN][1] = 0;
1388
1389        for (i = 0; i < PART_LEN1; i++) {
1390            // Use average NLP weight for H band
1391            comfortNoiseHband[i][0] = tmpAvg * u[i][0];
1392            comfortNoiseHband[i][1] = tmpAvg * u[i][1];
1393        }
1394    }
1395}
1396
1397static void WebRtcAec_InitLevel(power_level_t *level)
1398{
1399    const float bigFloat = 1E17f;
1400
1401    level->averagelevel = 0;
1402    level->framelevel = 0;
1403    level->minlevel = bigFloat;
1404    level->frsum = 0;
1405    level->sfrsum = 0;
1406    level->frcounter = 0;
1407    level->sfrcounter = 0;
1408}
1409
1410static void InitStats(Stats* stats) {
1411  stats->instant = offsetLevel;
1412  stats->average = offsetLevel;
1413  stats->max = offsetLevel;
1414  stats->min = offsetLevel * (-1);
1415  stats->sum = 0;
1416  stats->hisum = 0;
1417  stats->himean = offsetLevel;
1418  stats->counter = 0;
1419  stats->hicounter = 0;
1420}
1421
1422static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) {
1423  // Do the energy calculation in the frequency domain. The FFT is performed on
1424  // a segment of PART_LEN2 samples due to overlap, but we only want the energy
1425  // of half that data (the last PART_LEN samples). Parseval's relation states
1426  // that the energy is preserved according to
1427  //
1428  // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
1429  //                           = ENERGY,
1430  //
1431  // where N = PART_LEN2. Since we are only interested in calculating the energy
1432  // for the last PART_LEN samples we approximate by calculating ENERGY and
1433  // divide by 2,
1434  //
1435  // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
1436  //
1437  // Since we deal with real valued time domain signals we only store frequency
1438  // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
1439  // need to add the contribution from the missing part in
1440  // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
1441  // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
1442  // is the values in the for loop below, but multiplication by 2 and division
1443  // by 2 cancel.
1444
1445  // TODO(bjornv): Investigate reusing energy calculations performed at other
1446  // places in the code.
1447  int k = 1;
1448  // Imaginary parts are zero at end points and left out of the calculation.
1449  float energy = (in[0][0] * in[0][0]) / 2;
1450  energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
1451
1452  for (k = 1; k < PART_LEN; k++) {
1453    energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
1454  }
1455  energy /= PART_LEN2;
1456
1457  level->sfrsum += energy;
1458  level->sfrcounter++;
1459
1460  if (level->sfrcounter > subCountLen) {
1461    level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
1462    level->sfrsum = 0;
1463    level->sfrcounter = 0;
1464    if (level->framelevel > 0) {
1465      if (level->framelevel < level->minlevel) {
1466        level->minlevel = level->framelevel;  // New minimum.
1467      } else {
1468        level->minlevel *= (1 + 0.001f);  // Small increase.
1469      }
1470    }
1471    level->frcounter++;
1472    level->frsum += level->framelevel;
1473    if (level->frcounter > countLen) {
1474      level->averagelevel = level->frsum / countLen;
1475      level->frsum = 0;
1476      level->frcounter = 0;
1477    }
1478  }
1479}
1480
1481static void UpdateMetrics(aec_t *aec)
1482{
1483    float dtmp, dtmp2;
1484
1485    const float actThresholdNoisy = 8.0f;
1486    const float actThresholdClean = 40.0f;
1487    const float safety = 0.99995f;
1488    const float noisyPower = 300000.0f;
1489
1490    float actThreshold;
1491    float echo, suppressedEcho;
1492
1493    if (aec->echoState) {   // Check if echo is likely present
1494        aec->stateCounter++;
1495    }
1496
1497    if (aec->farlevel.frcounter == 0) {
1498
1499        if (aec->farlevel.minlevel < noisyPower) {
1500            actThreshold = actThresholdClean;
1501        }
1502        else {
1503            actThreshold = actThresholdNoisy;
1504        }
1505
1506        if ((aec->stateCounter > (0.5f * countLen * subCountLen))
1507            && (aec->farlevel.sfrcounter == 0)
1508
1509            // Estimate in active far-end segments only
1510            && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel))
1511            ) {
1512
1513            // Subtract noise power
1514            echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
1515
1516            // ERL
1517            dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
1518                aec->nearlevel.averagelevel + 1e-10f);
1519            dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
1520
1521            aec->erl.instant = dtmp;
1522            if (dtmp > aec->erl.max) {
1523                aec->erl.max = dtmp;
1524            }
1525
1526            if (dtmp < aec->erl.min) {
1527                aec->erl.min = dtmp;
1528            }
1529
1530            aec->erl.counter++;
1531            aec->erl.sum += dtmp;
1532            aec->erl.average = aec->erl.sum / aec->erl.counter;
1533
1534            // Upper mean
1535            if (dtmp > aec->erl.average) {
1536                aec->erl.hicounter++;
1537                aec->erl.hisum += dtmp;
1538                aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
1539            }
1540
1541            // A_NLP
1542            dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1543                (2 * aec->linoutlevel.averagelevel) + 1e-10f);
1544
1545            // subtract noise power
1546            suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
1547                safety * aec->linoutlevel.minlevel);
1548
1549            dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1550
1551            aec->aNlp.instant = dtmp2;
1552            if (dtmp > aec->aNlp.max) {
1553                aec->aNlp.max = dtmp;
1554            }
1555
1556            if (dtmp < aec->aNlp.min) {
1557                aec->aNlp.min = dtmp;
1558            }
1559
1560            aec->aNlp.counter++;
1561            aec->aNlp.sum += dtmp;
1562            aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
1563
1564            // Upper mean
1565            if (dtmp > aec->aNlp.average) {
1566                aec->aNlp.hicounter++;
1567                aec->aNlp.hisum += dtmp;
1568                aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
1569            }
1570
1571            // ERLE
1572
1573            // subtract noise power
1574            suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
1575                safety * aec->nlpoutlevel.minlevel);
1576
1577            dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1578                (2 * aec->nlpoutlevel.averagelevel) + 1e-10f);
1579            dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1580
1581            dtmp = dtmp2;
1582            aec->erle.instant = dtmp;
1583            if (dtmp > aec->erle.max) {
1584                aec->erle.max = dtmp;
1585            }
1586
1587            if (dtmp < aec->erle.min) {
1588                aec->erle.min = dtmp;
1589            }
1590
1591            aec->erle.counter++;
1592            aec->erle.sum += dtmp;
1593            aec->erle.average = aec->erle.sum / aec->erle.counter;
1594
1595            // Upper mean
1596            if (dtmp > aec->erle.average) {
1597                aec->erle.hicounter++;
1598                aec->erle.hisum += dtmp;
1599                aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
1600            }
1601        }
1602
1603        aec->stateCounter = 0;
1604    }
1605}
1606
1607static void TimeToFrequency(float time_data[PART_LEN2],
1608                            float freq_data[2][PART_LEN1],
1609                            int window) {
1610  int i = 0;
1611
1612  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
1613  if (window) {
1614    for (i = 0; i < PART_LEN; i++) {
1615      time_data[i] *= sqrtHanning[i];
1616      time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i];
1617    }
1618  }
1619
1620  aec_rdft_forward_128(time_data);
1621  // Reorder.
1622  freq_data[1][0] = 0;
1623  freq_data[1][PART_LEN] = 0;
1624  freq_data[0][0] = time_data[0];
1625  freq_data[0][PART_LEN] = time_data[1];
1626  for (i = 1; i < PART_LEN; i++) {
1627    freq_data[0][i] = time_data[2 * i];
1628    freq_data[1][i] = time_data[2 * i + 1];
1629  }
1630}
1631