1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11/*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15#include "aec_core.h"
16
17#include <assert.h>
18#include <math.h>
19#include <stddef.h>  // size_t
20#include <stdlib.h>
21#include <string.h>
22
23#include "aec_rdft.h"
24#include "delay_estimator_wrapper.h"
25#include "ring_buffer.h"
26#include "system_wrappers/interface/cpu_features_wrapper.h"
27#include "typedefs.h"
28
29// Buffer size (samples)
30static const size_t kBufSizePartitions = 250;  // 1 second of audio in 16 kHz.
31
32// Noise suppression
33static const int converged = 250;
34
35// Metrics
36static const int subCountLen = 4;
37static const int countLen = 50;
38
39// Quantities to control H band scaling for SWB input
40static const int flagHbandCn = 1; // flag for adding comfort noise in H band
41static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band
42// Initial bin for averaging nlp gain in low band
43static const int freqAvgIc = PART_LEN / 2;
44
45// Matlab code to produce table:
46// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
47// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
48static const float sqrtHanning[65] = {
49    0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
50    0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
51    0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
52    0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
53    0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
54    0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
55    0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
56    0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
57    0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
58    0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
59    0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
60    0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
61    0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
62    0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
63    0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
64    0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
65    0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
66    0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
67    0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
68    0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
69    0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
70    0.99969881869620f, 1.00000000000000f
71};
72
73// Matlab code to produce table:
74// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
75// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
76const float WebRtcAec_weightCurve[65] = {
77    0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
78    0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
79    0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f,
80    0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
81    0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f,
82    0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f,
83    0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f,
84    0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
85    0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f,
86    0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f,
87    0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
88};
89
90// Matlab code to produce table:
91// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
92// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
93const float WebRtcAec_overDriveCurve[65] = {
94    1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
95    1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
96    1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f,
97    1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
98    1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f,
99    1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f,
100    1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f,
101    1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
102    1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f,
103    1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f,
104    1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f
105};
106
107// "Private" function prototypes.
108static void ProcessBlock(aec_t* aec);
109
110static void NonLinearProcessing(aec_t *aec, short *output, short *outputH);
111
112static void GetHighbandGain(const float *lambda, float *nlpGainHband);
113
114// Comfort_noise also computes noise for H band returned in comfortNoiseHband
115static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
116                                  complex_t *comfortNoiseHband,
117                                  const float *noisePow, const float *lambda);
118
119static void WebRtcAec_InitLevel(power_level_t *level);
120static void WebRtcAec_InitStats(stats_t *stats);
121static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]);
122static void UpdateMetrics(aec_t *aec);
123// Convert from time domain to frequency domain. Note that |time_data| are
124// overwritten.
125static void TimeToFrequency(float time_data[PART_LEN2],
126                            float freq_data[2][PART_LEN1],
127                            int window);
128
129__inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
130{
131    return aRe * bRe - aIm * bIm;
132}
133
134__inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
135{
136    return aRe * bIm + aIm * bRe;
137}
138
139static int CmpFloat(const void *a, const void *b)
140{
141    const float *da = (const float *)a;
142    const float *db = (const float *)b;
143
144    return (*da > *db) - (*da < *db);
145}
146
147int WebRtcAec_CreateAec(aec_t **aecInst)
148{
149    aec_t *aec = malloc(sizeof(aec_t));
150    *aecInst = aec;
151    if (aec == NULL) {
152        return -1;
153    }
154
155    if (WebRtc_CreateBuffer(&aec->nearFrBuf,
156                            FRAME_LEN + PART_LEN,
157                            sizeof(int16_t)) == -1) {
158        WebRtcAec_FreeAec(aec);
159        aec = NULL;
160        return -1;
161    }
162
163    if (WebRtc_CreateBuffer(&aec->outFrBuf,
164                            FRAME_LEN + PART_LEN,
165                            sizeof(int16_t)) == -1) {
166        WebRtcAec_FreeAec(aec);
167        aec = NULL;
168        return -1;
169    }
170
171    if (WebRtc_CreateBuffer(&aec->nearFrBufH,
172                            FRAME_LEN + PART_LEN,
173                            sizeof(int16_t)) == -1) {
174        WebRtcAec_FreeAec(aec);
175        aec = NULL;
176        return -1;
177    }
178
179    if (WebRtc_CreateBuffer(&aec->outFrBufH,
180                            FRAME_LEN + PART_LEN,
181                            sizeof(int16_t)) == -1) {
182        WebRtcAec_FreeAec(aec);
183        aec = NULL;
184        return -1;
185    }
186
187    // Create far-end buffers.
188    if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions,
189                            sizeof(float) * 2 * PART_LEN1) == -1) {
190        WebRtcAec_FreeAec(aec);
191        aec = NULL;
192        return -1;
193    }
194    if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions,
195                            sizeof(float) * 2 * PART_LEN1) == -1) {
196        WebRtcAec_FreeAec(aec);
197        aec = NULL;
198        return -1;
199    }
200#ifdef WEBRTC_AEC_DEBUG_DUMP
201    if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions,
202                            sizeof(int16_t) * PART_LEN) == -1) {
203        WebRtcAec_FreeAec(aec);
204        aec = NULL;
205        return -1;
206    }
207#endif
208    if (WebRtc_CreateDelayEstimator(&aec->delay_estimator,
209                                    PART_LEN1,
210                                    kMaxDelayBlocks,
211                                    kLookaheadBlocks) == -1) {
212      WebRtcAec_FreeAec(aec);
213      aec = NULL;
214      return -1;
215    }
216
217    return 0;
218}
219
220int WebRtcAec_FreeAec(aec_t *aec)
221{
222    if (aec == NULL) {
223        return -1;
224    }
225
226    WebRtc_FreeBuffer(aec->nearFrBuf);
227    WebRtc_FreeBuffer(aec->outFrBuf);
228
229    WebRtc_FreeBuffer(aec->nearFrBufH);
230    WebRtc_FreeBuffer(aec->outFrBufH);
231
232    WebRtc_FreeBuffer(aec->far_buf);
233    WebRtc_FreeBuffer(aec->far_buf_windowed);
234#ifdef WEBRTC_AEC_DEBUG_DUMP
235    WebRtc_FreeBuffer(aec->far_time_buf);
236#endif
237    WebRtc_FreeDelayEstimator(aec->delay_estimator);
238
239    free(aec);
240    return 0;
241}
242
243static void FilterFar(aec_t *aec, float yf[2][PART_LEN1])
244{
245  int i;
246  for (i = 0; i < NR_PART; i++) {
247    int j;
248    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
249    int pos = i * PART_LEN1;
250    // Check for wrap
251    if (i + aec->xfBufBlockPos >= NR_PART) {
252      xPos -= NR_PART*(PART_LEN1);
253    }
254
255    for (j = 0; j < PART_LEN1; j++) {
256      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
257                        aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
258      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
259                        aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
260    }
261  }
262}
263
264static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
265{
266  int i;
267  float absEf;
268  for (i = 0; i < (PART_LEN1); i++) {
269    ef[0][i] /= (aec->xPow[i] + 1e-10f);
270    ef[1][i] /= (aec->xPow[i] + 1e-10f);
271    absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
272
273    if (absEf > aec->errThresh) {
274      absEf = aec->errThresh / (absEf + 1e-10f);
275      ef[0][i] *= absEf;
276      ef[1][i] *= absEf;
277    }
278
279    // Stepsize factor
280    ef[0][i] *= aec->mu;
281    ef[1][i] *= aec->mu;
282  }
283}
284
285// Time-unconstrined filter adaptation.
286// TODO(andrew): consider for a low-complexity mode.
287//static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
288//                                          float ef[2][PART_LEN1]) {
289//  int i, j;
290//  for (i = 0; i < NR_PART; i++) {
291//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
292//    int pos;
293//    // Check for wrap
294//    if (i + aec->xfBufBlockPos >= NR_PART) {
295//      xPos -= NR_PART * PART_LEN1;
296//    }
297//
298//    pos = i * PART_LEN1;
299//
300//    for (j = 0; j < PART_LEN1; j++) {
301//      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
302//                                      -aec->xfBuf[xPos + j][1],
303//                                      ef[j][0], ef[j][1]);
304//      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
305//                                      -aec->xfBuf[xPos + j][1],
306//                                      ef[j][0], ef[j][1]);
307//    }
308//  }
309//}
310
311static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
312  int i, j;
313  for (i = 0; i < NR_PART; i++) {
314    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
315    int pos;
316    // Check for wrap
317    if (i + aec->xfBufBlockPos >= NR_PART) {
318      xPos -= NR_PART * PART_LEN1;
319    }
320
321    pos = i * PART_LEN1;
322
323    for (j = 0; j < PART_LEN; j++) {
324
325      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
326                         -aec->xfBuf[1][xPos + j],
327                         ef[0][j], ef[1][j]);
328      fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
329                             -aec->xfBuf[1][xPos + j],
330                             ef[0][j], ef[1][j]);
331    }
332    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
333                   -aec->xfBuf[1][xPos + PART_LEN],
334                   ef[0][PART_LEN], ef[1][PART_LEN]);
335
336    aec_rdft_inverse_128(fft);
337    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
338
339    // fft scaling
340    {
341      float scale = 2.0f / PART_LEN2;
342      for (j = 0; j < PART_LEN; j++) {
343        fft[j] *= scale;
344      }
345    }
346    aec_rdft_forward_128(fft);
347
348    aec->wfBuf[0][pos] += fft[0];
349    aec->wfBuf[0][pos + PART_LEN] += fft[1];
350
351    for (j = 1; j < PART_LEN; j++) {
352      aec->wfBuf[0][pos + j] += fft[2 * j];
353      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
354    }
355  }
356}
357
358static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1],
359                                 const float hNlFb,
360                                 float efw[2][PART_LEN1]) {
361  int i;
362  for (i = 0; i < PART_LEN1; i++) {
363    // Weight subbands
364    if (hNl[i] > hNlFb) {
365      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
366          (1 - WebRtcAec_weightCurve[i]) * hNl[i];
367    }
368    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
369
370    // Suppress error signal
371    efw[0][i] *= hNl[i];
372    efw[1][i] *= hNl[i];
373
374    // Ooura fft returns incorrect sign on imaginary component. It matters here
375    // because we are making an additive change with comfort noise.
376    efw[1][i] *= -1;
377  }
378}
379
380WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
381WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
382WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
383WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
384
385int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
386{
387    int i;
388
389    aec->sampFreq = sampFreq;
390
391    if (sampFreq == 8000) {
392        aec->mu = 0.6f;
393        aec->errThresh = 2e-6f;
394    }
395    else {
396        aec->mu = 0.5f;
397        aec->errThresh = 1.5e-6f;
398    }
399
400    if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
401        return -1;
402    }
403
404    if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
405        return -1;
406    }
407
408    if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
409        return -1;
410    }
411
412    if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
413        return -1;
414    }
415
416    // Initialize far-end buffers.
417    if (WebRtc_InitBuffer(aec->far_buf) == -1) {
418        return -1;
419    }
420    if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
421        return -1;
422    }
423#ifdef WEBRTC_AEC_DEBUG_DUMP
424    if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
425        return -1;
426    }
427#endif
428    aec->system_delay = 0;
429
430    if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
431      return -1;
432    }
433    aec->delay_logging_enabled = 0;
434    memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
435
436    // Default target suppression level
437    aec->targetSupp = -11.5;
438    aec->minOverDrive = 2.0;
439
440    // Sampling frequency multiplier
441    // SWB is processed as 160 frame size
442    if (aec->sampFreq == 32000) {
443      aec->mult = (short)aec->sampFreq / 16000;
444    }
445    else {
446        aec->mult = (short)aec->sampFreq / 8000;
447    }
448
449    aec->farBufWritePos = 0;
450    aec->farBufReadPos = 0;
451
452    aec->inSamples = 0;
453    aec->outSamples = 0;
454    aec->knownDelay = 0;
455
456    // Initialize buffers
457    memset(aec->dBuf, 0, sizeof(aec->dBuf));
458    memset(aec->eBuf, 0, sizeof(aec->eBuf));
459    // For H band
460    memset(aec->dBufH, 0, sizeof(aec->dBufH));
461
462    memset(aec->xPow, 0, sizeof(aec->xPow));
463    memset(aec->dPow, 0, sizeof(aec->dPow));
464    memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
465    aec->noisePow = aec->dInitMinPow;
466    aec->noiseEstCtr = 0;
467
468    // Initial comfort noise power
469    for (i = 0; i < PART_LEN1; i++) {
470        aec->dMinPow[i] = 1.0e6f;
471    }
472
473    // Holds the last block written to
474    aec->xfBufBlockPos = 0;
475    // TODO: Investigate need for these initializations. Deleting them doesn't
476    //       change the output at all and yields 0.4% overall speedup.
477    memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
478    memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
479    memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
480    memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
481    memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
482    memset(aec->se, 0, sizeof(float) * PART_LEN1);
483
484    // To prevent numerical instability in the first block.
485    for (i = 0; i < PART_LEN1; i++) {
486        aec->sd[i] = 1;
487    }
488    for (i = 0; i < PART_LEN1; i++) {
489        aec->sx[i] = 1;
490    }
491
492    memset(aec->hNs, 0, sizeof(aec->hNs));
493    memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
494
495    aec->hNlFbMin = 1;
496    aec->hNlFbLocalMin = 1;
497    aec->hNlXdAvgMin = 1;
498    aec->hNlNewMin = 0;
499    aec->hNlMinCtr = 0;
500    aec->overDrive = 2;
501    aec->overDriveSm = 2;
502    aec->delayIdx = 0;
503    aec->stNearState = 0;
504    aec->echoState = 0;
505    aec->divergeState = 0;
506
507    aec->seed = 777;
508    aec->delayEstCtr = 0;
509
510    // Metrics disabled by default
511    aec->metricsMode = 0;
512    WebRtcAec_InitMetrics(aec);
513
514    // Assembly optimization
515    WebRtcAec_FilterFar = FilterFar;
516    WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
517    WebRtcAec_FilterAdaptation = FilterAdaptation;
518    WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
519    if (WebRtc_GetCPUInfo(kSSE2)) {
520#if defined(WEBRTC_USE_SSE2)
521      WebRtcAec_InitAec_SSE2();
522#endif
523    }
524    aec_rdft_init();
525
526    return 0;
527}
528
529void WebRtcAec_InitMetrics(aec_t *aec)
530{
531    aec->stateCounter = 0;
532    WebRtcAec_InitLevel(&aec->farlevel);
533    WebRtcAec_InitLevel(&aec->nearlevel);
534    WebRtcAec_InitLevel(&aec->linoutlevel);
535    WebRtcAec_InitLevel(&aec->nlpoutlevel);
536
537    WebRtcAec_InitStats(&aec->erl);
538    WebRtcAec_InitStats(&aec->erle);
539    WebRtcAec_InitStats(&aec->aNlp);
540    WebRtcAec_InitStats(&aec->rerl);
541}
542
543
544void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) {
545  float fft[PART_LEN2];
546  float xf[2][PART_LEN1];
547
548  // Check if the buffer is full, and in that case flush the oldest data.
549  if (WebRtc_available_write(aec->far_buf) < 1) {
550    WebRtc_MoveReadPtr(aec->far_buf, 1);
551    WebRtc_MoveReadPtr(aec->far_buf_windowed, 1);
552    aec->system_delay -= PART_LEN;
553#ifdef WEBRTC_AEC_DEBUG_DUMP
554    WebRtc_MoveReadPtr(aec->far_time_buf, 1);
555#endif
556  }
557  // Convert far-end partition to the frequency domain without windowing.
558  memcpy(fft, farend, sizeof(float) * PART_LEN2);
559  TimeToFrequency(fft, xf, 0);
560  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
561
562  // Convert far-end partition to the frequency domain with windowing.
563  memcpy(fft, farend, sizeof(float) * PART_LEN2);
564  TimeToFrequency(fft, xf, 1);
565  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
566}
567
568void WebRtcAec_ProcessFrame(aec_t *aec,
569                            const short *nearend,
570                            const short *nearendH,
571                            int knownDelay)
572{
573    // For each frame the process is as follows:
574    // 1) If the system_delay indicates on being too small for processing a
575    //    frame we stuff the buffer with enough data for 10 ms.
576    // 2) Adjust the buffer to the system delay, by moving the read pointer.
577    // 3) If we can't move read pointer due to buffer size limitations we
578    //    flush/stuff the buffer.
579    // 4) Process as many partitions as possible.
580    // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
581    //    samples. Even though we will have data left to process (we work with
582    //    partitions) we consider updating a whole frame, since that's the
583    //    amount of data we input and output in audio_processing.
584
585    // TODO(bjornv): Investigate how we should round the delay difference; right
586    // now we know that incoming |knownDelay| is underestimated when it's less
587    // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
588    // the other direction, we don't have this situation, but might flush one
589    // partition too little. This can cause non-causality, which should be
590    // investigated. Maybe, allow for a non-symmetric rounding, like -16.
591    int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
592    int moved_elements = 0;
593
594    // TODO(bjornv): Change the near-end buffer handling to be the same as for
595    // far-end, that is, with a near_pre_buf.
596    // Buffer the near-end frame.
597    WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
598    // For H band
599    if (aec->sampFreq == 32000) {
600        WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
601    }
602
603    // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
604    // have enough far-end data for that by stuffing the buffer if the
605    // |system_delay| indicates others.
606    if (aec->system_delay < FRAME_LEN) {
607      // We don't have enough data so we rewind 10 ms.
608      WebRtc_MoveReadPtr(aec->far_buf_windowed, -(aec->mult + 1));
609      aec->system_delay -= WebRtc_MoveReadPtr(aec->far_buf, -(aec->mult + 1)) *
610          PART_LEN;
611#ifdef WEBRTC_AEC_DEBUG_DUMP
612      WebRtc_MoveReadPtr(aec->far_time_buf, -(aec->mult + 1));
613#endif
614    }
615
616    // 2) Compensate for a possible change in the system delay.
617
618    WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
619    moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
620    aec->knownDelay -= moved_elements * PART_LEN;
621#ifdef WEBRTC_AEC_DEBUG_DUMP
622    WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
623#endif
624
625    // 4) Process as many blocks as possible.
626    while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
627        ProcessBlock(aec);
628    }
629
630    // 5) Update system delay with respect to the entire frame.
631    aec->system_delay -= FRAME_LEN;
632}
633
634static void ProcessBlock(aec_t* aec) {
635    int i;
636    float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
637    float scale;
638
639    float fft[PART_LEN2];
640    float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
641    float df[2][PART_LEN1];
642    float far_spectrum = 0.0f;
643    float near_spectrum = 0.0f;
644    float abs_far_spectrum[PART_LEN1];
645    float abs_near_spectrum[PART_LEN1];
646
647    const float gPow[2] = {0.9f, 0.1f};
648
649    // Noise estimate constants.
650    const int noiseInitBlocks = 500 * aec->mult;
651    const float step = 0.1f;
652    const float ramp = 1.0002f;
653    const float gInitNoise[2] = {0.999f, 0.001f};
654
655    int16_t nearend[PART_LEN];
656    int16_t* nearend_ptr = NULL;
657    int16_t output[PART_LEN];
658    int16_t outputH[PART_LEN];
659
660    float* xf_ptr = NULL;
661
662    memset(dH, 0, sizeof(dH));
663    if (aec->sampFreq == 32000) {
664      // Get the upper band first so we can reuse |nearend|.
665      WebRtc_ReadBuffer(aec->nearFrBufH,
666                        (void**) &nearend_ptr,
667                        nearend,
668                        PART_LEN);
669      for (i = 0; i < PART_LEN; i++) {
670          dH[i] = (float) (nearend_ptr[i]);
671      }
672      memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
673    }
674    WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN);
675
676    // ---------- Ooura fft ----------
677    // Concatenate old and new nearend blocks.
678    for (i = 0; i < PART_LEN; i++) {
679        d[i] = (float) (nearend_ptr[i]);
680    }
681    memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
682
683#ifdef WEBRTC_AEC_DEBUG_DUMP
684    {
685        int16_t farend[PART_LEN];
686        int16_t* farend_ptr = NULL;
687        WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1);
688        fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
689        fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
690    }
691#endif
692
693    // We should always have at least one element stored in |far_buf|.
694    assert(WebRtc_available_read(aec->far_buf) > 0);
695    WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1);
696
697    // Near fft
698    memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
699    TimeToFrequency(fft, df, 0);
700
701    // Power smoothing
702    for (i = 0; i < PART_LEN1; i++) {
703      far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
704          (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
705      aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
706      // Calculate absolute spectra
707      abs_far_spectrum[i] = sqrtf(far_spectrum);
708
709      near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
710      aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
711      // Calculate absolute spectra
712      abs_near_spectrum[i] = sqrtf(near_spectrum);
713    }
714
715    // Estimate noise power. Wait until dPow is more stable.
716    if (aec->noiseEstCtr > 50) {
717        for (i = 0; i < PART_LEN1; i++) {
718            if (aec->dPow[i] < aec->dMinPow[i]) {
719                aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] -
720                    aec->dPow[i])) * ramp;
721            }
722            else {
723                aec->dMinPow[i] *= ramp;
724            }
725        }
726    }
727
728    // Smooth increasing noise power from zero at the start,
729    // to avoid a sudden burst of comfort noise.
730    if (aec->noiseEstCtr < noiseInitBlocks) {
731        aec->noiseEstCtr++;
732        for (i = 0; i < PART_LEN1; i++) {
733            if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
734                aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
735                    gInitNoise[1] * aec->dMinPow[i];
736            }
737            else {
738                aec->dInitMinPow[i] = aec->dMinPow[i];
739            }
740        }
741        aec->noisePow = aec->dInitMinPow;
742    }
743    else {
744        aec->noisePow = aec->dMinPow;
745    }
746
747    // Block wise delay estimation used for logging
748    if (aec->delay_logging_enabled) {
749      int delay_estimate = 0;
750      // Estimate the delay
751      delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
752                                                         abs_far_spectrum,
753                                                         abs_near_spectrum,
754                                                         PART_LEN1);
755      if (delay_estimate >= 0) {
756        // Update delay estimate buffer.
757        aec->delay_histogram[delay_estimate]++;
758      }
759    }
760
761    // Update the xfBuf block position.
762    aec->xfBufBlockPos--;
763    if (aec->xfBufBlockPos == -1) {
764        aec->xfBufBlockPos = NR_PART - 1;
765    }
766
767    // Buffer xf
768    memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr,
769           sizeof(float) * PART_LEN1);
770    memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1],
771           sizeof(float) * PART_LEN1);
772
773    memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2));
774
775    // Filter far
776    WebRtcAec_FilterFar(aec, yf);
777
778    // Inverse fft to obtain echo estimate and error.
779    fft[0] = yf[0][0];
780    fft[1] = yf[0][PART_LEN];
781    for (i = 1; i < PART_LEN; i++) {
782        fft[2 * i] = yf[0][i];
783        fft[2 * i + 1] = yf[1][i];
784    }
785    aec_rdft_inverse_128(fft);
786
787    scale = 2.0f / PART_LEN2;
788    for (i = 0; i < PART_LEN; i++) {
789        y[i] = fft[PART_LEN + i] * scale; // fft scaling
790    }
791
792    for (i = 0; i < PART_LEN; i++) {
793        e[i] = d[i] - y[i];
794    }
795
796    // Error fft
797    memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
798    memset(fft, 0, sizeof(float) * PART_LEN);
799    memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
800    // TODO(bjornv): Change to use TimeToFrequency().
801    aec_rdft_forward_128(fft);
802
803    ef[1][0] = 0;
804    ef[1][PART_LEN] = 0;
805    ef[0][0] = fft[0];
806    ef[0][PART_LEN] = fft[1];
807    for (i = 1; i < PART_LEN; i++) {
808        ef[0][i] = fft[2 * i];
809        ef[1][i] = fft[2 * i + 1];
810    }
811
812    if (aec->metricsMode == 1) {
813      // Note that the first PART_LEN samples in fft (before transformation) are
814      // zero. Hence, the scaling by two in UpdateLevel() should not be
815      // performed. That scaling is taken care of in UpdateMetrics() instead.
816      UpdateLevel(&aec->linoutlevel, ef);
817    }
818
819    // Scale error signal inversely with far power.
820    WebRtcAec_ScaleErrorSignal(aec, ef);
821    WebRtcAec_FilterAdaptation(aec, fft, ef);
822    NonLinearProcessing(aec, output, outputH);
823
824    if (aec->metricsMode == 1) {
825        // Update power levels and echo metrics
826        UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr);
827        UpdateLevel(&aec->nearlevel, df);
828        UpdateMetrics(aec);
829    }
830
831    // Store the output block.
832    WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
833    // For H band
834    if (aec->sampFreq == 32000) {
835        WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
836    }
837
838#ifdef WEBRTC_AEC_DEBUG_DUMP
839    {
840        int16_t eInt16[PART_LEN];
841        for (i = 0; i < PART_LEN; i++) {
842            eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i],
843                WEBRTC_SPL_WORD16_MIN);
844        }
845
846        fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
847        fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
848    }
849#endif
850}
851
852static void NonLinearProcessing(aec_t *aec, short *output, short *outputH)
853{
854    float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
855    complex_t comfortNoiseHband[PART_LEN1];
856    float fft[PART_LEN2];
857    float scale, dtmp;
858    float nlpGainHband;
859    int i, j, pos;
860
861    // Coherence and non-linear filter
862    float cohde[PART_LEN1], cohxd[PART_LEN1];
863    float hNlDeAvg, hNlXdAvg;
864    float hNl[PART_LEN1];
865    float hNlPref[PREF_BAND_SIZE];
866    float hNlFb = 0, hNlFbLow = 0;
867    const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
868    const int prefBandSize = PREF_BAND_SIZE / aec->mult;
869    const int minPrefBand = 4 / aec->mult;
870
871    // Near and error power sums
872    float sdSum = 0, seSum = 0;
873
874    // Power estimate smoothing coefficients
875    const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}};
876    const float *ptrGCoh = gCoh[aec->mult - 1];
877
878    // Filter energy
879    float wfEnMax = 0, wfEn = 0;
880    const int delayEstInterval = 10 * aec->mult;
881
882    float* xfw_ptr = NULL;
883
884    aec->delayEstCtr++;
885    if (aec->delayEstCtr == delayEstInterval) {
886        aec->delayEstCtr = 0;
887    }
888
889    // initialize comfort noise for H band
890    memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
891    nlpGainHband = (float)0.0;
892    dtmp = (float)0.0;
893
894    // Measure energy in each filter partition to determine delay.
895    // TODO: Spread by computing one partition per block?
896    if (aec->delayEstCtr == 0) {
897        wfEnMax = 0;
898        aec->delayIdx = 0;
899        for (i = 0; i < NR_PART; i++) {
900            pos = i * PART_LEN1;
901            wfEn = 0;
902            for (j = 0; j < PART_LEN1; j++) {
903                wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
904                    aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
905            }
906
907            if (wfEn > wfEnMax) {
908                wfEnMax = wfEn;
909                aec->delayIdx = i;
910            }
911        }
912    }
913
914    // We should always have at least one element stored in |far_buf|.
915    assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
916    // NLP
917    WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1);
918
919    // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
920    // |xfwBuf|.
921    // Buffer far.
922    memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
923
924    // Use delayed far.
925    memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
926
927    // Windowed near fft
928    for (i = 0; i < PART_LEN; i++) {
929        fft[i] = aec->dBuf[i] * sqrtHanning[i];
930        fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
931    }
932    aec_rdft_forward_128(fft);
933
934    dfw[1][0] = 0;
935    dfw[1][PART_LEN] = 0;
936    dfw[0][0] = fft[0];
937    dfw[0][PART_LEN] = fft[1];
938    for (i = 1; i < PART_LEN; i++) {
939        dfw[0][i] = fft[2 * i];
940        dfw[1][i] = fft[2 * i + 1];
941    }
942
943    // Windowed error fft
944    for (i = 0; i < PART_LEN; i++) {
945        fft[i] = aec->eBuf[i] * sqrtHanning[i];
946        fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
947    }
948    aec_rdft_forward_128(fft);
949    efw[1][0] = 0;
950    efw[1][PART_LEN] = 0;
951    efw[0][0] = fft[0];
952    efw[0][PART_LEN] = fft[1];
953    for (i = 1; i < PART_LEN; i++) {
954        efw[0][i] = fft[2 * i];
955        efw[1][i] = fft[2 * i + 1];
956    }
957
958    // Smoothed PSD
959    for (i = 0; i < PART_LEN1; i++) {
960        aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] *
961            (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
962        aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] *
963            (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
964        // We threshold here to protect against the ill-effects of a zero farend.
965        // The threshold is not arbitrarily chosen, but balances protection and
966        // adverse interaction with the algorithm's tuning.
967        // TODO: investigate further why this is so sensitive.
968        aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] *
969            WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
970
971        aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] *
972            (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
973        aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] *
974            (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
975
976        aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] *
977            (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
978        aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] *
979            (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
980
981        sdSum += aec->sd[i];
982        seSum += aec->se[i];
983    }
984
985    // Divergent filter safeguard.
986    if (aec->divergeState == 0) {
987        if (seSum > sdSum) {
988            aec->divergeState = 1;
989        }
990    }
991    else {
992        if (seSum * 1.05f < sdSum) {
993            aec->divergeState = 0;
994        }
995    }
996
997    if (aec->divergeState == 1) {
998        memcpy(efw, dfw, sizeof(efw));
999    }
1000
1001    // Reset if error is significantly larger than nearend (13 dB).
1002    if (seSum > (19.95f * sdSum)) {
1003        memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
1004    }
1005
1006    // Subband coherence
1007    for (i = 0; i < PART_LEN1; i++) {
1008        cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
1009            (aec->sd[i] * aec->se[i] + 1e-10f);
1010        cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
1011            (aec->sx[i] * aec->sd[i] + 1e-10f);
1012    }
1013
1014    hNlXdAvg = 0;
1015    for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1016        hNlXdAvg += cohxd[i];
1017    }
1018    hNlXdAvg /= prefBandSize;
1019    hNlXdAvg = 1 - hNlXdAvg;
1020
1021    hNlDeAvg = 0;
1022    for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
1023        hNlDeAvg += cohde[i];
1024    }
1025    hNlDeAvg /= prefBandSize;
1026
1027    if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
1028        aec->hNlXdAvgMin = hNlXdAvg;
1029    }
1030
1031    if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
1032        aec->stNearState = 1;
1033    }
1034    else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
1035        aec->stNearState = 0;
1036    }
1037
1038    if (aec->hNlXdAvgMin == 1) {
1039        aec->echoState = 0;
1040        aec->overDrive = aec->minOverDrive;
1041
1042        if (aec->stNearState == 1) {
1043            memcpy(hNl, cohde, sizeof(hNl));
1044            hNlFb = hNlDeAvg;
1045            hNlFbLow = hNlDeAvg;
1046        }
1047        else {
1048            for (i = 0; i < PART_LEN1; i++) {
1049                hNl[i] = 1 - cohxd[i];
1050            }
1051            hNlFb = hNlXdAvg;
1052            hNlFbLow = hNlXdAvg;
1053        }
1054    }
1055    else {
1056
1057        if (aec->stNearState == 1) {
1058            aec->echoState = 0;
1059            memcpy(hNl, cohde, sizeof(hNl));
1060            hNlFb = hNlDeAvg;
1061            hNlFbLow = hNlDeAvg;
1062        }
1063        else {
1064            aec->echoState = 1;
1065            for (i = 0; i < PART_LEN1; i++) {
1066                hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
1067            }
1068
1069            // Select an order statistic from the preferred bands.
1070            // TODO: Using quicksort now, but a selection algorithm may be preferred.
1071            memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
1072            qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
1073            hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
1074            hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
1075        }
1076    }
1077
1078    // Track the local filter minimum to determine suppression overdrive.
1079    if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
1080        aec->hNlFbLocalMin = hNlFbLow;
1081        aec->hNlFbMin = hNlFbLow;
1082        aec->hNlNewMin = 1;
1083        aec->hNlMinCtr = 0;
1084    }
1085    aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
1086    aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
1087
1088    if (aec->hNlNewMin == 1) {
1089        aec->hNlMinCtr++;
1090    }
1091    if (aec->hNlMinCtr == 2) {
1092        aec->hNlNewMin = 0;
1093        aec->hNlMinCtr = 0;
1094        aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp /
1095            ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive);
1096    }
1097
1098    // Smooth the overdrive.
1099    if (aec->overDrive < aec->overDriveSm) {
1100      aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
1101    }
1102    else {
1103      aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
1104    }
1105
1106    WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
1107
1108    // Add comfort noise.
1109    ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
1110
1111    // TODO(bjornv): Investigate how to take the windowing below into account if
1112    // needed.
1113    if (aec->metricsMode == 1) {
1114      // Note that we have a scaling by two in the time domain |eBuf|.
1115      // In addition the time domain signal is windowed before transformation,
1116      // losing half the energy on the average. We take care of the first
1117      // scaling only in UpdateMetrics().
1118      UpdateLevel(&aec->nlpoutlevel, efw);
1119    }
1120    // Inverse error fft.
1121    fft[0] = efw[0][0];
1122    fft[1] = efw[0][PART_LEN];
1123    for (i = 1; i < PART_LEN; i++) {
1124        fft[2*i] = efw[0][i];
1125        // Sign change required by Ooura fft.
1126        fft[2*i + 1] = -efw[1][i];
1127    }
1128    aec_rdft_inverse_128(fft);
1129
1130    // Overlap and add to obtain output.
1131    scale = 2.0f / PART_LEN2;
1132    for (i = 0; i < PART_LEN; i++) {
1133        fft[i] *= scale; // fft scaling
1134        fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i];
1135
1136        // Saturation protection
1137        output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i],
1138            WEBRTC_SPL_WORD16_MIN);
1139
1140        fft[PART_LEN + i] *= scale; // fft scaling
1141        aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
1142    }
1143
1144    // For H band
1145    if (aec->sampFreq == 32000) {
1146
1147        // H band gain
1148        // average nlp over low band: average over second half of freq spectrum
1149        // (4->8khz)
1150        GetHighbandGain(hNl, &nlpGainHband);
1151
1152        // Inverse comfort_noise
1153        if (flagHbandCn == 1) {
1154            fft[0] = comfortNoiseHband[0][0];
1155            fft[1] = comfortNoiseHband[PART_LEN][0];
1156            for (i = 1; i < PART_LEN; i++) {
1157                fft[2*i] = comfortNoiseHband[i][0];
1158                fft[2*i + 1] = comfortNoiseHband[i][1];
1159            }
1160            aec_rdft_inverse_128(fft);
1161            scale = 2.0f / PART_LEN2;
1162        }
1163
1164        // compute gain factor
1165        for (i = 0; i < PART_LEN; i++) {
1166            dtmp = (float)aec->dBufH[i];
1167            dtmp = (float)dtmp * nlpGainHband; // for variable gain
1168
1169            // add some comfort noise where Hband is attenuated
1170            if (flagHbandCn == 1) {
1171                fft[i] *= scale; // fft scaling
1172                dtmp += cnScaleHband * fft[i];
1173            }
1174
1175            // Saturation protection
1176            outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp,
1177                WEBRTC_SPL_WORD16_MIN);
1178         }
1179    }
1180
1181    // Copy the current block to the old position.
1182    memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
1183    memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
1184
1185    // Copy the current block to the old position for H band
1186    if (aec->sampFreq == 32000) {
1187        memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
1188    }
1189
1190    memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) -
1191        sizeof(complex_t) * PART_LEN1);
1192}
1193
1194static void GetHighbandGain(const float *lambda, float *nlpGainHband)
1195{
1196    int i;
1197
1198    nlpGainHband[0] = (float)0.0;
1199    for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
1200        nlpGainHband[0] += lambda[i];
1201    }
1202    nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
1203}
1204
1205static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
1206    complex_t *comfortNoiseHband, const float *noisePow, const float *lambda)
1207{
1208    int i, num;
1209    float rand[PART_LEN];
1210    float noise, noiseAvg, tmp, tmpAvg;
1211    WebRtc_Word16 randW16[PART_LEN];
1212    complex_t u[PART_LEN1];
1213
1214    const float pi2 = 6.28318530717959f;
1215
1216    // Generate a uniform random array on [0 1]
1217    WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
1218    for (i = 0; i < PART_LEN; i++) {
1219        rand[i] = ((float)randW16[i]) / 32768;
1220    }
1221
1222    // Reject LF noise
1223    u[0][0] = 0;
1224    u[0][1] = 0;
1225    for (i = 1; i < PART_LEN1; i++) {
1226        tmp = pi2 * rand[i - 1];
1227
1228        noise = sqrtf(noisePow[i]);
1229        u[i][0] = noise * (float)cos(tmp);
1230        u[i][1] = -noise * (float)sin(tmp);
1231    }
1232    u[PART_LEN][1] = 0;
1233
1234    for (i = 0; i < PART_LEN1; i++) {
1235        // This is the proper weighting to match the background noise power
1236        tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1237        //tmp = 1 - lambda[i];
1238        efw[0][i] += tmp * u[i][0];
1239        efw[1][i] += tmp * u[i][1];
1240    }
1241
1242    // For H band comfort noise
1243    // TODO: don't compute noise and "tmp" twice. Use the previous results.
1244    noiseAvg = 0.0;
1245    tmpAvg = 0.0;
1246    num = 0;
1247    if (aec->sampFreq == 32000 && flagHbandCn == 1) {
1248
1249        // average noise scale
1250        // average over second half of freq spectrum (i.e., 4->8khz)
1251        // TODO: we shouldn't need num. We know how many elements we're summing.
1252        for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1253            num++;
1254            noiseAvg += sqrtf(noisePow[i]);
1255        }
1256        noiseAvg /= (float)num;
1257
1258        // average nlp scale
1259        // average over second half of freq spectrum (i.e., 4->8khz)
1260        // TODO: we shouldn't need num. We know how many elements we're summing.
1261        num = 0;
1262        for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
1263            num++;
1264            tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
1265        }
1266        tmpAvg /= (float)num;
1267
1268        // Use average noise for H band
1269        // TODO: we should probably have a new random vector here.
1270        // Reject LF noise
1271        u[0][0] = 0;
1272        u[0][1] = 0;
1273        for (i = 1; i < PART_LEN1; i++) {
1274            tmp = pi2 * rand[i - 1];
1275
1276            // Use average noise for H band
1277            u[i][0] = noiseAvg * (float)cos(tmp);
1278            u[i][1] = -noiseAvg * (float)sin(tmp);
1279        }
1280        u[PART_LEN][1] = 0;
1281
1282        for (i = 0; i < PART_LEN1; i++) {
1283            // Use average NLP weight for H band
1284            comfortNoiseHband[i][0] = tmpAvg * u[i][0];
1285            comfortNoiseHband[i][1] = tmpAvg * u[i][1];
1286        }
1287    }
1288}
1289
1290static void WebRtcAec_InitLevel(power_level_t *level)
1291{
1292    const float bigFloat = 1E17f;
1293
1294    level->averagelevel = 0;
1295    level->framelevel = 0;
1296    level->minlevel = bigFloat;
1297    level->frsum = 0;
1298    level->sfrsum = 0;
1299    level->frcounter = 0;
1300    level->sfrcounter = 0;
1301}
1302
1303static void WebRtcAec_InitStats(stats_t *stats)
1304{
1305    stats->instant = offsetLevel;
1306    stats->average = offsetLevel;
1307    stats->max = offsetLevel;
1308    stats->min = offsetLevel * (-1);
1309    stats->sum = 0;
1310    stats->hisum = 0;
1311    stats->himean = offsetLevel;
1312    stats->counter = 0;
1313    stats->hicounter = 0;
1314}
1315
1316static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) {
1317  // Do the energy calculation in the frequency domain. The FFT is performed on
1318  // a segment of PART_LEN2 samples due to overlap, but we only want the energy
1319  // of half that data (the last PART_LEN samples). Parseval's relation states
1320  // that the energy is preserved according to
1321  //
1322  // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
1323  //                           = ENERGY,
1324  //
1325  // where N = PART_LEN2. Since we are only interested in calculating the energy
1326  // for the last PART_LEN samples we approximate by calculating ENERGY and
1327  // divide by 2,
1328  //
1329  // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
1330  //
1331  // Since we deal with real valued time domain signals we only store frequency
1332  // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
1333  // need to add the contribution from the missing part in
1334  // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
1335  // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
1336  // is the values in the for loop below, but multiplication by 2 and division
1337  // by 2 cancel.
1338
1339  // TODO(bjornv): Investigate reusing energy calculations performed at other
1340  // places in the code.
1341  int k = 1;
1342  // Imaginary parts are zero at end points and left out of the calculation.
1343  float energy = (in[0][0] * in[0][0]) / 2;
1344  energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
1345
1346  for (k = 1; k < PART_LEN; k++) {
1347    energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
1348  }
1349  energy /= PART_LEN2;
1350
1351  level->sfrsum += energy;
1352  level->sfrcounter++;
1353
1354  if (level->sfrcounter > subCountLen) {
1355    level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
1356    level->sfrsum = 0;
1357    level->sfrcounter = 0;
1358    if (level->framelevel > 0) {
1359      if (level->framelevel < level->minlevel) {
1360        level->minlevel = level->framelevel;  // New minimum.
1361      } else {
1362        level->minlevel *= (1 + 0.001f);  // Small increase.
1363      }
1364    }
1365    level->frcounter++;
1366    level->frsum += level->framelevel;
1367    if (level->frcounter > countLen) {
1368      level->averagelevel = level->frsum / countLen;
1369      level->frsum = 0;
1370      level->frcounter = 0;
1371    }
1372  }
1373}
1374
1375static void UpdateMetrics(aec_t *aec)
1376{
1377    float dtmp, dtmp2;
1378
1379    const float actThresholdNoisy = 8.0f;
1380    const float actThresholdClean = 40.0f;
1381    const float safety = 0.99995f;
1382    const float noisyPower = 300000.0f;
1383
1384    float actThreshold;
1385    float echo, suppressedEcho;
1386
1387    if (aec->echoState) {   // Check if echo is likely present
1388        aec->stateCounter++;
1389    }
1390
1391    if (aec->farlevel.frcounter == 0) {
1392
1393        if (aec->farlevel.minlevel < noisyPower) {
1394            actThreshold = actThresholdClean;
1395        }
1396        else {
1397            actThreshold = actThresholdNoisy;
1398        }
1399
1400        if ((aec->stateCounter > (0.5f * countLen * subCountLen))
1401            && (aec->farlevel.sfrcounter == 0)
1402
1403            // Estimate in active far-end segments only
1404            && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel))
1405            ) {
1406
1407            // Subtract noise power
1408            echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
1409
1410            // ERL
1411            dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
1412                aec->nearlevel.averagelevel + 1e-10f);
1413            dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
1414
1415            aec->erl.instant = dtmp;
1416            if (dtmp > aec->erl.max) {
1417                aec->erl.max = dtmp;
1418            }
1419
1420            if (dtmp < aec->erl.min) {
1421                aec->erl.min = dtmp;
1422            }
1423
1424            aec->erl.counter++;
1425            aec->erl.sum += dtmp;
1426            aec->erl.average = aec->erl.sum / aec->erl.counter;
1427
1428            // Upper mean
1429            if (dtmp > aec->erl.average) {
1430                aec->erl.hicounter++;
1431                aec->erl.hisum += dtmp;
1432                aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
1433            }
1434
1435            // A_NLP
1436            dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1437                (2 * aec->linoutlevel.averagelevel) + 1e-10f);
1438
1439            // subtract noise power
1440            suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
1441                safety * aec->linoutlevel.minlevel);
1442
1443            dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1444
1445            aec->aNlp.instant = dtmp2;
1446            if (dtmp > aec->aNlp.max) {
1447                aec->aNlp.max = dtmp;
1448            }
1449
1450            if (dtmp < aec->aNlp.min) {
1451                aec->aNlp.min = dtmp;
1452            }
1453
1454            aec->aNlp.counter++;
1455            aec->aNlp.sum += dtmp;
1456            aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
1457
1458            // Upper mean
1459            if (dtmp > aec->aNlp.average) {
1460                aec->aNlp.hicounter++;
1461                aec->aNlp.hisum += dtmp;
1462                aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
1463            }
1464
1465            // ERLE
1466
1467            // subtract noise power
1468            suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
1469                safety * aec->nlpoutlevel.minlevel);
1470
1471            dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
1472                (2 * aec->nlpoutlevel.averagelevel) + 1e-10f);
1473            dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
1474
1475            dtmp = dtmp2;
1476            aec->erle.instant = dtmp;
1477            if (dtmp > aec->erle.max) {
1478                aec->erle.max = dtmp;
1479            }
1480
1481            if (dtmp < aec->erle.min) {
1482                aec->erle.min = dtmp;
1483            }
1484
1485            aec->erle.counter++;
1486            aec->erle.sum += dtmp;
1487            aec->erle.average = aec->erle.sum / aec->erle.counter;
1488
1489            // Upper mean
1490            if (dtmp > aec->erle.average) {
1491                aec->erle.hicounter++;
1492                aec->erle.hisum += dtmp;
1493                aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
1494            }
1495        }
1496
1497        aec->stateCounter = 0;
1498    }
1499}
1500
1501static void TimeToFrequency(float time_data[PART_LEN2],
1502                            float freq_data[2][PART_LEN1],
1503                            int window) {
1504  int i = 0;
1505
1506  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
1507  if (window) {
1508    for (i = 0; i < PART_LEN; i++) {
1509      time_data[i] *= sqrtHanning[i];
1510      time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i];
1511    }
1512  }
1513
1514  aec_rdft_forward_128(time_data);
1515  // Reorder.
1516  freq_data[1][0] = 0;
1517  freq_data[1][PART_LEN] = 0;
1518  freq_data[0][0] = time_data[0];
1519  freq_data[0][PART_LEN] = time_data[1];
1520  for (i = 1; i < PART_LEN; i++) {
1521    freq_data[0][i] = time_data[2 * i];
1522    freq_data[1][i] = time_data[2 * i + 1];
1523  }
1524}
1525