1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <stdlib.h>
12
13#include "aecm_core.h"
14#include "ring_buffer.h"
15#include "echo_control_mobile.h"
16#include "typedefs.h"
17
18// TODO(bjornv): Will be removed in final version.
19//#include <stdio.h>
20
21#ifdef ARM_WINM_LOG
22#include <stdio.h>
23#include <windows.h>
24#endif
25
26// BANDLAST - BANDFIRST must be < 32
27#define BANDFIRST                   12   // Only bit BANDFIRST through bit BANDLAST are processed
28#define BANDLAST                    43
29
30#ifdef ARM_WINM
31#define WebRtcSpl_AddSatW32(a,b)  _AddSatInt(a,b)
32#define WebRtcSpl_SubSatW32(a,b)  _SubSatInt(a,b)
33#endif
34// 16 instructions on most risc machines for 32-bit bitcount !
35
36#ifdef AEC_DEBUG
37FILE *dfile;
38FILE *testfile;
39#endif
40
41#ifdef AECM_SHORT
42
43// Square root of Hanning window in Q14
44static const WebRtc_Word16 kSqrtHanning[] =
45{
46    0, 804, 1606, 2404, 3196, 3981, 4756, 5520,
47    6270, 7005, 7723, 8423, 9102, 9760, 10394, 11003,
48    11585, 12140, 12665, 13160, 13623, 14053, 14449, 14811,
49    15137, 15426, 15679, 15893, 16069, 16207, 16305, 16364,
50    16384
51};
52
53#else
54
55// Square root of Hanning window in Q14
56static const WebRtc_Word16 kSqrtHanning[] = {0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
57        3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, 8364,
58        8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 11795, 12068, 12335,
59        12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, 14384, 14571, 14749, 14918,
60        15079, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 16111, 16179, 16237,
61        16286, 16325, 16354, 16373, 16384};
62
63#endif
64
65//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
66static const WebRtc_UWord16 kAlpha1 = 32584;
67//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
68static const WebRtc_UWord16 kBeta1 = 4249;
69//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
70static const WebRtc_UWord16 kAlpha2 = 30879;
71//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
72static const WebRtc_UWord16 kBeta2 = 11072;
73//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
74static const WebRtc_UWord16 kAlpha3 = 26951;
75//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
76static const WebRtc_UWord16 kBeta3 = 18927;
77
78// Initialization table for echo channel in 8 kHz
79static const WebRtc_Word16 kChannelStored8kHz[PART_LEN1] = {
80    2040,   1815,   1590,   1498,   1405,   1395,   1385,   1418,
81    1451,   1506,   1562,   1644,   1726,   1804,   1882,   1918,
82    1953,   1982,   2010,   2025,   2040,   2034,   2027,   2021,
83    2014,   1997,   1980,   1925,   1869,   1800,   1732,   1683,
84    1635,   1604,   1572,   1545,   1517,   1481,   1444,   1405,
85    1367,   1331,   1294,   1270,   1245,   1239,   1233,   1247,
86    1260,   1282,   1303,   1338,   1373,   1407,   1441,   1470,
87    1499,   1524,   1549,   1565,   1582,   1601,   1621,   1649,
88    1676
89};
90
91// Initialization table for echo channel in 16 kHz
92static const WebRtc_Word16 kChannelStored16kHz[PART_LEN1] = {
93    2040,   1590,   1405,   1385,   1451,   1562,   1726,   1882,
94    1953,   2010,   2040,   2027,   2014,   1980,   1869,   1732,
95    1635,   1572,   1517,   1444,   1367,   1294,   1245,   1233,
96    1260,   1303,   1373,   1441,   1499,   1549,   1582,   1621,
97    1676,   1741,   1802,   1861,   1921,   1983,   2040,   2102,
98    2170,   2265,   2375,   2515,   2651,   2781,   2922,   3075,
99    3253,   3471,   3738,   3976,   4151,   4258,   4308,   4288,
100    4270,   4253,   4237,   4179,   4086,   3947,   3757,   3484,
101    3153
102};
103
104#ifdef ARM_WINM_LOG
105HANDLE logFile = NULL;
106#endif
107
108static void WebRtcAecm_ComfortNoise(AecmCore_t* const aecm, const WebRtc_UWord16 * const dfa,
109                                    WebRtc_Word16 * const outReal,
110                                    WebRtc_Word16 * const outImag,
111                                    const WebRtc_Word16 * const lambda);
112
113static __inline WebRtc_UWord32 WebRtcAecm_SetBit(WebRtc_UWord32 in, WebRtc_Word32 pos)
114{
115    WebRtc_UWord32 mask, out;
116
117    mask = WEBRTC_SPL_SHIFT_W32(1, pos);
118    out = (in | mask);
119
120    return out;
121}
122
123// WebRtcAecm_Hisser(...)
124//
125// This function compares the binary vector specvec with all rows of the binary matrix specmat
126// and counts per row the number of times they have the same value.
127// Input:
128//       - specvec   : binary "vector"  that is stored in a long
129//       - specmat   : binary "matrix"  that is stored as a vector of long
130// Output:
131//       - bcount    : "Vector" stored as a long, containing for each row the number of times
132//                      the matrix row and the input vector have the same value
133//
134//
135void WebRtcAecm_Hisser(const WebRtc_UWord32 specvec, const WebRtc_UWord32 * const specmat,
136                       WebRtc_UWord32 * const bcount)
137{
138    int n;
139    WebRtc_UWord32 a, b;
140    register WebRtc_UWord32 tmp;
141
142    a = specvec;
143    // compare binary vector specvec with all rows of the binary matrix specmat
144    for (n = 0; n < MAX_DELAY; n++)
145    {
146        b = specmat[n];
147        a = (specvec ^ b);
148        // Returns bit counts in tmp
149        tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
150        tmp = ((tmp + (tmp >> 3)) & 030707070707);
151        tmp = (tmp + (tmp >> 6));
152        tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
153
154        bcount[n] = tmp;
155    }
156}
157
158// WebRtcAecm_BSpectrum(...)
159//
160// Computes the binary spectrum by comparing the input spectrum with a threshold spectrum.
161//
162// Input:
163//       - spectrum  : Spectrum of which the binary spectrum should be calculated.
164//       - thresvec  : Threshold spectrum with which the input spectrum is compared.
165// Return:
166//       - out       : Binary spectrum
167//
168WebRtc_UWord32 WebRtcAecm_BSpectrum(const WebRtc_UWord16 * const spectrum,
169                                    const WebRtc_UWord16 * const thresvec)
170{
171    int k;
172    WebRtc_UWord32 out;
173
174    out = 0;
175    for (k = BANDFIRST; k <= BANDLAST; k++)
176    {
177        if (spectrum[k] > thresvec[k])
178        {
179            out = WebRtcAecm_SetBit(out, k - BANDFIRST);
180        }
181    }
182
183    return out;
184}
185
186//   WebRtcAecm_MedianEstimator(...)
187//
188//   Calculates the median recursively.
189//
190//   Input:
191//           - newVal            :   new additional value
192//           - medianVec         :   vector with current medians
193//           - factor            :   factor for smoothing
194//
195//   Output:
196//           - medianVec         :   vector with updated median
197//
198int WebRtcAecm_MedianEstimator(const WebRtc_UWord16 newVal, WebRtc_UWord16 * const medianVec,
199                               const int factor)
200{
201    WebRtc_Word32 median;
202    WebRtc_Word32 diff;
203
204    median = (WebRtc_Word32)medianVec[0];
205
206    //median = median + ((newVal-median)>>factor);
207    diff = (WebRtc_Word32)newVal - median;
208    diff = WEBRTC_SPL_SHIFT_W32(diff, -factor);
209    median = median + diff;
210
211    medianVec[0] = (WebRtc_UWord16)median;
212
213    return 0;
214}
215
216int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
217{
218    AecmCore_t *aecm = malloc(sizeof(AecmCore_t));
219    *aecmInst = aecm;
220    if (aecm == NULL)
221    {
222        return -1;
223    }
224
225    if (WebRtcApm_CreateBuffer(&aecm->farFrameBuf, FRAME_LEN + PART_LEN) == -1)
226    {
227        WebRtcAecm_FreeCore(aecm);
228        aecm = NULL;
229        return -1;
230    }
231
232    if (WebRtcApm_CreateBuffer(&aecm->nearNoisyFrameBuf, FRAME_LEN + PART_LEN) == -1)
233    {
234        WebRtcAecm_FreeCore(aecm);
235        aecm = NULL;
236        return -1;
237    }
238
239    if (WebRtcApm_CreateBuffer(&aecm->nearCleanFrameBuf, FRAME_LEN + PART_LEN) == -1)
240    {
241        WebRtcAecm_FreeCore(aecm);
242        aecm = NULL;
243        return -1;
244    }
245
246    if (WebRtcApm_CreateBuffer(&aecm->outFrameBuf, FRAME_LEN + PART_LEN) == -1)
247    {
248        WebRtcAecm_FreeCore(aecm);
249        aecm = NULL;
250        return -1;
251    }
252
253    return 0;
254}
255
256// WebRtcAecm_InitCore(...)
257//
258// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
259// Input:
260//      - aecm            : Pointer to the Echo Suppression instance
261//      - samplingFreq   : Sampling Frequency
262//
263// Output:
264//      - aecm            : Initialized instance
265//
266// Return value         :  0 - Ok
267//                        -1 - Error
268//
269int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
270{
271    int retVal = 0;
272    WebRtc_Word16 i;
273    WebRtc_Word16 tmp16;
274
275    if (samplingFreq != 8000 && samplingFreq != 16000)
276    {
277        samplingFreq = 8000;
278        retVal = -1;
279    }
280    // sanity check of sampling frequency
281    aecm->mult = (WebRtc_Word16)samplingFreq / 8000;
282
283    aecm->farBufWritePos = 0;
284    aecm->farBufReadPos = 0;
285    aecm->knownDelay = 0;
286    aecm->lastKnownDelay = 0;
287
288    WebRtcApm_InitBuffer(aecm->farFrameBuf);
289    WebRtcApm_InitBuffer(aecm->nearNoisyFrameBuf);
290    WebRtcApm_InitBuffer(aecm->nearCleanFrameBuf);
291    WebRtcApm_InitBuffer(aecm->outFrameBuf);
292
293    memset(aecm->xBuf, 0, sizeof(aecm->xBuf));
294    memset(aecm->dBufClean, 0, sizeof(aecm->dBufClean));
295    memset(aecm->dBufNoisy, 0, sizeof(aecm->dBufNoisy));
296    memset(aecm->outBuf, 0, sizeof(WebRtc_Word16) * PART_LEN);
297
298    aecm->seed = 666;
299    aecm->totCount = 0;
300
301    memset(aecm->xfaHistory, 0, sizeof(WebRtc_UWord16) * (PART_LEN1) * MAX_DELAY);
302
303    aecm->delHistoryPos = MAX_DELAY;
304
305    memset(aecm->medianYlogspec, 0, sizeof(WebRtc_UWord16) * PART_LEN1);
306    memset(aecm->medianXlogspec, 0, sizeof(WebRtc_UWord16) * PART_LEN1);
307    memset(aecm->medianBCount, 0, sizeof(WebRtc_UWord16) * MAX_DELAY);
308    memset(aecm->bxHistory, 0, sizeof(aecm->bxHistory));
309
310    // Initialize to reasonable values
311    aecm->currentDelay = 8;
312    aecm->previousDelay = 8;
313    aecm->delayAdjust = 0;
314
315    aecm->nlpFlag = 1;
316    aecm->fixedDelay = -1;
317
318    memset(aecm->xfaQDomainBuf, 0, sizeof(WebRtc_Word16) * MAX_DELAY);
319    aecm->dfaCleanQDomain = 0;
320    aecm->dfaCleanQDomainOld = 0;
321    aecm->dfaNoisyQDomain = 0;
322    aecm->dfaNoisyQDomainOld = 0;
323
324    memset(aecm->nearLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN);
325    memset(aecm->farLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN);
326    memset(aecm->echoAdaptLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN);
327    memset(aecm->echoStoredLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN);
328
329    // Initialize the echo channels with a stored shape.
330    if (samplingFreq == 8000)
331    {
332        memcpy(aecm->channelAdapt16, kChannelStored8kHz, sizeof(WebRtc_Word16) * PART_LEN1);
333    }
334    else
335    {
336        memcpy(aecm->channelAdapt16, kChannelStored16kHz, sizeof(WebRtc_Word16) * PART_LEN1);
337    }
338    memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1);
339    for (i = 0; i < PART_LEN1; i++)
340    {
341        aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
342            (WebRtc_Word32)(aecm->channelAdapt16[i]), 16);
343    }
344
345    memset(aecm->echoFilt, 0, sizeof(WebRtc_Word32) * PART_LEN1);
346    memset(aecm->nearFilt, 0, sizeof(WebRtc_Word16) * PART_LEN1);
347    aecm->noiseEstCtr = 0;
348
349    aecm->cngMode = AecmTrue;
350
351    // Increase the noise Q domain with increasing frequency, to correspond to the
352    // expected energy levels.
353    // Also shape the initial noise level with this consideration.
354#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
355    for (i = 0; i < PART_LEN1; i++)
356    {
357        if (i < PART_LEN1 >> 2)
358        {
359            aecm->noiseEstQDomain[i] = 10;
360            tmp16 = PART_LEN1 - i;
361            aecm->noiseEst[i] = (tmp16 * tmp16) << 4;
362        } else if (i < PART_LEN1 >> 1)
363        {
364            aecm->noiseEstQDomain[i] = 11;
365            tmp16 = PART_LEN1 - i;
366            aecm->noiseEst[i] = ((tmp16 * tmp16) << 4) << 1;
367        } else
368        {
369            aecm->noiseEstQDomain[i] = 12;
370            aecm->noiseEst[i] = aecm->noiseEst[(PART_LEN1 >> 1) - 1] << 1;
371        }
372    }
373#else
374    for (i = 0; i < PART_LEN1 >> 2; i++)
375    {
376        aecm->noiseEstQDomain[i] = 10;
377        tmp16 = PART_LEN1 - i;
378        aecm->noiseEst[i] = (tmp16 * tmp16) << 4;
379    }
380    for (; i < PART_LEN1 >> 1; i++)
381    {
382        aecm->noiseEstQDomain[i] = 11;
383        tmp16 = PART_LEN1 - i;
384        aecm->noiseEst[i] = ((tmp16 * tmp16) << 4) << 1;
385    }
386    for (; i < PART_LEN1; i++)
387    {
388        aecm->noiseEstQDomain[i] = 12;
389        aecm->noiseEst[i] = aecm->noiseEst[(PART_LEN1 >> 1) - 1] << 1;
390    }
391#endif
392
393    aecm->mseAdaptOld = 1000;
394    aecm->mseStoredOld = 1000;
395    aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX;
396
397    aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX;
398    aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN;
399    aecm->farEnergyMaxMin = 0;
400    aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the
401                                         // beginning.
402    aecm->farEnergyMSE = 0;
403    aecm->currentVADValue = 0;
404    aecm->vadUpdateCount = 0;
405    aecm->firstVAD = 1;
406
407    aecm->delayCount = 0;
408    aecm->newDelayCorrData = 0;
409    aecm->lastDelayUpdateCount = 0;
410    memset(aecm->delayCorrelation, 0, sizeof(WebRtc_Word16) * ((CORR_MAX << 1) + 1));
411
412    aecm->startupState = 0;
413    aecm->mseChannelCount = 0;
414    aecm->supGain = SUPGAIN_DEFAULT;
415    aecm->supGainOld = SUPGAIN_DEFAULT;
416    aecm->delayOffsetFlag = 0;
417
418    memset(aecm->delayHistogram, 0, sizeof(aecm->delayHistogram));
419    aecm->delayVadCount = 0;
420    aecm->maxDelayHistIdx = 0;
421    aecm->lastMinPos = 0;
422
423    aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A;
424    aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D;
425    aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B;
426    aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D;
427
428    return 0;
429}
430
431int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag, int delayOffsetFlag)
432{
433    aecm->nlpFlag = nlpFlag;
434    aecm->fixedDelay = delay;
435    aecm->delayOffsetFlag = delayOffsetFlag;
436
437    return 0;
438}
439
440// WebRtcAecm_GetNewDelPos(...)
441//
442// Moves the pointer to the next entry. Returns to zero if max position reached.
443//
444// Input:
445//       - aecm     : Pointer to the AECM instance
446// Return:
447//       - pos      : New position in the history.
448//
449//
450WebRtc_Word16 WebRtcAecm_GetNewDelPos(AecmCore_t * const aecm)
451{
452    WebRtc_Word16 pos;
453
454    pos = aecm->delHistoryPos;
455    pos++;
456    if (pos >= MAX_DELAY)
457    {
458        pos = 0;
459    }
460    aecm->delHistoryPos = pos;
461
462    return pos;
463}
464
465// WebRtcAecm_EstimateDelay(...)
466//
467// Estimate the delay of the echo signal.
468//
469// Inputs:
470//      - aecm          : Pointer to the AECM instance
471//      - farSpec       : Delayed farend magnitude spectrum
472//      - nearSpec      : Nearend magnitude spectrum
473//      - stages        : Q-domain of xxFIX and yyFIX (without dynamic Q-domain)
474//      - xfaQ          : normalization factor, i.e., Q-domain before FFT
475// Return:
476//      - delay         : Estimated delay
477//
478WebRtc_Word16 WebRtcAecm_EstimateDelay(AecmCore_t * const aecm,
479                                       const WebRtc_UWord16 * const farSpec,
480                                       const WebRtc_UWord16 * const nearSpec,
481                                       const WebRtc_Word16 xfaQ)
482{
483    WebRtc_UWord32 bxspectrum, byspectrum;
484    WebRtc_UWord32 bcount[MAX_DELAY];
485
486    int i, res;
487
488    WebRtc_UWord16 xmean[PART_LEN1], ymean[PART_LEN1];
489    WebRtc_UWord16 dtmp1;
490    WebRtc_Word16 fcount[MAX_DELAY];
491
492    //WebRtc_Word16 res;
493    WebRtc_Word16 histpos;
494    WebRtc_Word16 maxHistLvl;
495    WebRtc_UWord16 *state;
496    WebRtc_Word16 minpos = -1;
497
498    enum
499    {
500        kVadCountThreshold = 25
501    };
502    enum
503    {
504        kMaxHistogram = 600
505    };
506
507    histpos = WebRtcAecm_GetNewDelPos(aecm);
508
509    for (i = 0; i < PART_LEN1; i++)
510    {
511        aecm->xfaHistory[i][histpos] = farSpec[i];
512
513        state = &(aecm->medianXlogspec[i]);
514        res = WebRtcAecm_MedianEstimator(farSpec[i], state, 6);
515
516        state = &(aecm->medianYlogspec[i]);
517        res = WebRtcAecm_MedianEstimator(nearSpec[i], state, 6);
518
519        //  Mean:
520        //  FLOAT:
521        //  ymean = dtmp2/MAX_DELAY
522        //
523        //  FIX:
524        //  input: dtmp2FIX in Q0
525        //  output: ymeanFIX in Q8
526        //  20 = 1/MAX_DELAY in Q13 = 1/MAX_DELAY * 2^13
527        xmean[i] = (aecm->medianXlogspec[i]);
528        ymean[i] = (aecm->medianYlogspec[i]);
529
530    }
531    // Update Q-domain buffer
532    aecm->xfaQDomainBuf[histpos] = xfaQ;
533
534    // Get binary spectra
535    //  FLOAT:
536    //  bxspectrum = bspectrum(xlogspec, xmean);
537    //
538    //  FIX:
539    //  input:  xlogspecFIX,ylogspecFIX in Q8
540    //          xmeanFIX, ymeanFIX in Q8
541    //  output: unsigned long bxspectrum, byspectrum in Q0
542    bxspectrum = WebRtcAecm_BSpectrum(farSpec, xmean);
543    byspectrum = WebRtcAecm_BSpectrum(nearSpec, ymean);
544
545    // Shift binary spectrum history
546    memmove(&(aecm->bxHistory[1]), &(aecm->bxHistory[0]),
547            (MAX_DELAY - 1) * sizeof(WebRtc_UWord32));
548
549    aecm->bxHistory[0] = bxspectrum;
550
551    // Compare with delayed spectra
552    WebRtcAecm_Hisser(byspectrum, aecm->bxHistory, bcount);
553
554    for (i = 0; i < MAX_DELAY; i++)
555    {
556        // Update sum
557        // bcount is constrained to [0, 32], meaning we can smooth with a factor up to 2^11.
558        dtmp1 = (WebRtc_UWord16)bcount[i];
559        dtmp1 = WEBRTC_SPL_LSHIFT_W16(dtmp1, 9);
560        state = &(aecm->medianBCount[i]);
561        res = WebRtcAecm_MedianEstimator(dtmp1, state, 9);
562        fcount[i] = (aecm->medianBCount[i]);
563    }
564
565    // Find minimum
566    minpos = WebRtcSpl_MinIndexW16(fcount, MAX_DELAY);
567
568    // If the farend has been active sufficiently long, begin accumulating a histogram
569    // of the minimum positions. Search for the maximum bin to determine the delay.
570    if (aecm->currentVADValue == 1)
571    {
572        if (aecm->delayVadCount >= kVadCountThreshold)
573        {
574            // Increment the histogram at the current minimum position.
575            if (aecm->delayHistogram[minpos] < kMaxHistogram)
576            {
577                aecm->delayHistogram[minpos] += 3;
578            }
579
580#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
581            // Decrement the entire histogram.
582            for (i = 0; i < MAX_DELAY; i++)
583            {
584                if (aecm->delayHistogram[i] > 0)
585                {
586                    aecm->delayHistogram[i]--;
587                }
588            }
589
590            // Select the histogram index corresponding to the maximum bin as the delay.
591            maxHistLvl = 0;
592            aecm->maxDelayHistIdx = 0;
593            for (i = 0; i < MAX_DELAY; i++)
594            {
595                if (aecm->delayHistogram[i] > maxHistLvl)
596                {
597                    maxHistLvl = aecm->delayHistogram[i];
598                    aecm->maxDelayHistIdx = i;
599                }
600            }
601#else
602            maxHistLvl = 0;
603            aecm->maxDelayHistIdx = 0;
604
605            for (i = 0; i < MAX_DELAY; i++)
606            {
607                WebRtc_Word16 tempVar = aecm->delayHistogram[i];
608
609                // Decrement the entire histogram.
610                if (tempVar > 0)
611                {
612                    tempVar--;
613                    aecm->delayHistogram[i] = tempVar;
614
615                    // Select the histogram index corresponding to the maximum bin as the delay.
616                    if (tempVar > maxHistLvl)
617                    {
618                        maxHistLvl = tempVar;
619                        aecm->maxDelayHistIdx = i;
620                    }
621                }
622            }
623#endif
624        } else
625        {
626            aecm->delayVadCount++;
627        }
628    } else
629    {
630        aecm->delayVadCount = 0;
631    }
632
633    return aecm->maxDelayHistIdx;
634}
635
636int WebRtcAecm_FreeCore(AecmCore_t *aecm)
637{
638    if (aecm == NULL)
639    {
640        return -1;
641    }
642
643    WebRtcApm_FreeBuffer(aecm->farFrameBuf);
644    WebRtcApm_FreeBuffer(aecm->nearNoisyFrameBuf);
645    WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf);
646    WebRtcApm_FreeBuffer(aecm->outFrameBuf);
647
648    free(aecm);
649
650    return 0;
651}
652
653void WebRtcAecm_ProcessFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend,
654                             const WebRtc_Word16 * const nearendNoisy,
655                             const WebRtc_Word16 * const nearendClean,
656                             WebRtc_Word16 * const out)
657{
658    WebRtc_Word16 farBlock[PART_LEN];
659    WebRtc_Word16 nearNoisyBlock[PART_LEN];
660    WebRtc_Word16 nearCleanBlock[PART_LEN];
661    WebRtc_Word16 outBlock[PART_LEN];
662    WebRtc_Word16 farFrame[FRAME_LEN];
663    int size = 0;
664
665    // Buffer the current frame.
666    // Fetch an older one corresponding to the delay.
667    WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN);
668    WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay);
669
670    // Buffer the synchronized far and near frames,
671    // to pass the smaller blocks individually.
672    WebRtcApm_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN);
673    WebRtcApm_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN);
674    if (nearendClean != NULL)
675    {
676        WebRtcApm_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN);
677    }
678
679    // Process as many blocks as possible.
680    while (WebRtcApm_get_buffer_size(aecm->farFrameBuf) >= PART_LEN)
681    {
682        WebRtcApm_ReadBuffer(aecm->farFrameBuf, farBlock, PART_LEN);
683        WebRtcApm_ReadBuffer(aecm->nearNoisyFrameBuf, nearNoisyBlock, PART_LEN);
684        if (nearendClean != NULL)
685        {
686            WebRtcApm_ReadBuffer(aecm->nearCleanFrameBuf, nearCleanBlock, PART_LEN);
687            WebRtcAecm_ProcessBlock(aecm, farBlock, nearNoisyBlock, nearCleanBlock, outBlock);
688        } else
689        {
690            WebRtcAecm_ProcessBlock(aecm, farBlock, nearNoisyBlock, NULL, outBlock);
691        }
692
693        WebRtcApm_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN);
694    }
695
696    // Stuff the out buffer if we have less than a frame to output.
697    // This should only happen for the first frame.
698    size = WebRtcApm_get_buffer_size(aecm->outFrameBuf);
699    if (size < FRAME_LEN)
700    {
701        WebRtcApm_StuffBuffer(aecm->outFrameBuf, FRAME_LEN - size);
702    }
703
704    // Obtain an output frame.
705    WebRtcApm_ReadBuffer(aecm->outFrameBuf, out, FRAME_LEN);
706}
707
708// WebRtcAecm_AsymFilt(...)
709//
710// Performs asymmetric filtering.
711//
712// Inputs:
713//      - filtOld       : Previous filtered value.
714//      - inVal         : New input value.
715//      - stepSizePos   : Step size when we have a positive contribution.
716//      - stepSizeNeg   : Step size when we have a negative contribution.
717//
718// Output:
719//
720// Return: - Filtered value.
721//
722WebRtc_Word16 WebRtcAecm_AsymFilt(const WebRtc_Word16 filtOld, const WebRtc_Word16 inVal,
723                                  const WebRtc_Word16 stepSizePos,
724                                  const WebRtc_Word16 stepSizeNeg)
725{
726    WebRtc_Word16 retVal;
727
728    if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN))
729    {
730        return inVal;
731    }
732    retVal = filtOld;
733    if (filtOld > inVal)
734    {
735        retVal -= WEBRTC_SPL_RSHIFT_W16(filtOld - inVal, stepSizeNeg);
736    } else
737    {
738        retVal += WEBRTC_SPL_RSHIFT_W16(inVal - filtOld, stepSizePos);
739    }
740
741    return retVal;
742}
743
744// WebRtcAecm_CalcEnergies(...)
745//
746// This function calculates the log of energies for nearend, farend and estimated
747// echoes. There is also an update of energy decision levels, i.e. internl VAD.
748//
749//
750// @param  aecm         [i/o]   Handle of the AECM instance.
751// @param  delayDiff    [in]    Delay position in farend buffer.
752// @param  nearEner     [in]    Near end energy for current block (Q[aecm->dfaQDomain]).
753// @param  echoEst      [i/o]   Estimated echo
754//                              (Q[aecm->xfaQDomain[delayDiff]+RESOLUTION_CHANNEL16]).
755//
756void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayDiff,
757                             const WebRtc_UWord32 nearEner, WebRtc_Word32 * const echoEst)
758{
759    // Local variables
760    WebRtc_UWord32 tmpAdapt, tmpStored, tmpFar;
761
762    int i;
763
764    WebRtc_Word16 zeros, frac;
765    WebRtc_Word16 tmp16;
766    WebRtc_Word16 increase_max_shifts = 4;
767    WebRtc_Word16 decrease_max_shifts = 11;
768    WebRtc_Word16 increase_min_shifts = 11;
769    WebRtc_Word16 decrease_min_shifts = 3;
770
771    // Get log of near end energy and store in buffer
772
773    // Shift buffer
774    memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy,
775            sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1));
776
777    // Logarithm of integrated magnitude spectrum (nearEner)
778    if (nearEner)
779    {
780        zeros = WebRtcSpl_NormU32(nearEner);
781        frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(
782                              (WEBRTC_SPL_LSHIFT_U32(nearEner, zeros) & 0x7FFFFFFF),
783                              23);
784        // log2 in Q8
785        aecm->nearLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
786        aecm->nearLogEnergy[0] -= WEBRTC_SPL_LSHIFT_W16(aecm->dfaNoisyQDomain, 8);
787    } else
788    {
789        aecm->nearLogEnergy[0] = 0;
790    }
791    aecm->nearLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7);
792    // END: Get log of near end energy
793
794    // Get energy for the delayed far end signal and estimated
795    // echo using both stored and adapted channels.
796    tmpAdapt = 0;
797    tmpStored = 0;
798    tmpFar = 0;
799
800    for (i = 0; i < PART_LEN1; i++)
801    {
802        // Get estimated echo energies for adaptive channel and stored channel
803        echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
804                aecm->xfaHistory[i][delayDiff]);
805        tmpFar += (WebRtc_UWord32)(aecm->xfaHistory[i][delayDiff]);
806        tmpAdapt += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[i],
807                aecm->xfaHistory[i][delayDiff]);
808        tmpStored += (WebRtc_UWord32)echoEst[i];
809    }
810    // Shift buffers
811    memmove(aecm->farLogEnergy + 1, aecm->farLogEnergy,
812            sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1));
813    memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy,
814            sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1));
815    memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy,
816            sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1));
817
818    // Logarithm of delayed far end energy
819    if (tmpFar)
820    {
821        zeros = WebRtcSpl_NormU32(tmpFar);
822        frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpFar, zeros)
823                        & 0x7FFFFFFF), 23);
824        // log2 in Q8
825        aecm->farLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
826        aecm->farLogEnergy[0] -= WEBRTC_SPL_LSHIFT_W16(aecm->xfaQDomainBuf[delayDiff], 8);
827    } else
828    {
829        aecm->farLogEnergy[0] = 0;
830    }
831    aecm->farLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7);
832
833    // Logarithm of estimated echo energy through adapted channel
834    if (tmpAdapt)
835    {
836        zeros = WebRtcSpl_NormU32(tmpAdapt);
837        frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpAdapt, zeros)
838                        & 0x7FFFFFFF), 23);
839        //log2 in Q8
840        aecm->echoAdaptLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
841        aecm->echoAdaptLogEnergy[0]
842                -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + aecm->xfaQDomainBuf[delayDiff], 8);
843    } else
844    {
845        aecm->echoAdaptLogEnergy[0] = 0;
846    }
847    aecm->echoAdaptLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7);
848
849    // Logarithm of estimated echo energy through stored channel
850    if (tmpStored)
851    {
852        zeros = WebRtcSpl_NormU32(tmpStored);
853        frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpStored, zeros)
854                        & 0x7FFFFFFF), 23);
855        //log2 in Q8
856        aecm->echoStoredLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac;
857        aecm->echoStoredLogEnergy[0]
858                -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + aecm->xfaQDomainBuf[delayDiff], 8);
859    } else
860    {
861        aecm->echoStoredLogEnergy[0] = 0;
862    }
863    aecm->echoStoredLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7);
864
865    // Update farend energy levels (min, max, vad, mse)
866    if (aecm->farLogEnergy[0] > FAR_ENERGY_MIN)
867    {
868        if (aecm->startupState == 0)
869        {
870            increase_max_shifts = 2;
871            decrease_min_shifts = 2;
872            increase_min_shifts = 8;
873        }
874
875        aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy[0],
876                                                 increase_min_shifts, decrease_min_shifts);
877        aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy[0],
878                                                 increase_max_shifts, decrease_max_shifts);
879        aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin);
880
881        // Dynamic VAD region size
882        tmp16 = 2560 - aecm->farEnergyMin;
883        if (tmp16 > 0)
884        {
885            tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, FAR_ENERGY_VAD_REGION, 9);
886        } else
887        {
888            tmp16 = 0;
889        }
890        tmp16 += FAR_ENERGY_VAD_REGION;
891
892        if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024))
893        {
894            // In startup phase or VAD update halted
895            aecm->farEnergyVAD = aecm->farEnergyMin + tmp16;
896        } else
897        {
898            if (aecm->farEnergyVAD > aecm->farLogEnergy[0])
899            {
900                aecm->farEnergyVAD += WEBRTC_SPL_RSHIFT_W16(aecm->farLogEnergy[0] + tmp16
901                        - aecm->farEnergyVAD, 6);
902                aecm->vadUpdateCount = 0;
903            } else
904            {
905                aecm->vadUpdateCount++;
906            }
907        }
908        // Put MSE threshold higher than VAD
909        aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8);
910    }
911
912    // Update VAD variables
913    if (aecm->farLogEnergy[0] > aecm->farEnergyVAD)
914    {
915        if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF))
916        {
917            // We are in startup or have significant dynamics in input speech level
918            aecm->currentVADValue = 1;
919        }
920    } else
921    {
922        aecm->currentVADValue = 0;
923    }
924    if ((aecm->currentVADValue) && (aecm->firstVAD))
925    {
926        aecm->firstVAD = 0;
927        if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0])
928        {
929            // The estimated echo has higher energy than the near end signal. This means that
930            // the initialization was too aggressive. Scale down by a factor 8
931            for (i = 0; i < PART_LEN1; i++)
932            {
933                aecm->channelAdapt16[i] >>= 3;
934            }
935            // Compensate the adapted echo energy level accordingly.
936            aecm->echoAdaptLogEnergy[0] -= (3 << 8);
937            aecm->firstVAD = 1;
938        }
939    }
940    // END: Energies of delayed far, echo estimates
941    // TODO(bjornv): Will be removed in final version.
942#ifdef VAD_DATA
943    fwrite(&(aecm->currentVADValue), sizeof(WebRtc_Word16), 1, aecm->vad_file);
944    fwrite(&(aecm->currentDelay), sizeof(WebRtc_Word16), 1, aecm->delay_file);
945    fwrite(&(aecm->farLogEnergy[0]), sizeof(WebRtc_Word16), 1, aecm->far_cur_file);
946    fwrite(&(aecm->farEnergyMin), sizeof(WebRtc_Word16), 1, aecm->far_min_file);
947    fwrite(&(aecm->farEnergyMax), sizeof(WebRtc_Word16), 1, aecm->far_max_file);
948    fwrite(&(aecm->farEnergyVAD), sizeof(WebRtc_Word16), 1, aecm->far_vad_file);
949#endif
950}
951
952// WebRtcAecm_CalcStepSize(...)
953//
954// This function calculates the step size used in channel estimation
955//
956//
957// @param  aecm  [in]    Handle of the AECM instance.
958// @param  mu   [out]   (Return value) Stepsize in log2(), i.e. number of shifts.
959//
960//
961WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm)
962{
963
964    WebRtc_Word32 tmp32;
965    WebRtc_Word16 tmp16;
966    WebRtc_Word16 mu;
967
968    // Here we calculate the step size mu used in the
969    // following NLMS based Channel estimation algorithm
970    mu = MU_MAX;
971    if (!aecm->currentVADValue)
972    {
973        // Far end energy level too low, no channel update
974        mu = 0;
975    } else if (aecm->startupState > 0)
976    {
977        if (aecm->farEnergyMin >= aecm->farEnergyMax)
978        {
979            mu = MU_MIN;
980        } else
981        {
982            tmp16 = (aecm->farLogEnergy[0] - aecm->farEnergyMin);
983            tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, MU_DIFF);
984            tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin);
985            mu = MU_MIN - 1 - (WebRtc_Word16)(tmp32);
986            // The -1 is an alternative to rounding. This way we get a larger
987            // stepsize, so we in some sense compensate for truncation in NLMS
988        }
989        if (mu < MU_MAX)
990        {
991            mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX
992        }
993    }
994    // END: Update step size
995
996    return mu;
997}
998
999// WebRtcAecm_UpdateChannel(...)
1000//
1001// This function performs channel estimation. NLMS and decision on channel storage.
1002//
1003//
1004// @param  aecm         [i/o]   Handle of the AECM instance.
1005// @param  dfa          [in]    Absolute value of the nearend signal (Q[aecm->dfaQDomain])
1006// @param  delayDiff    [in]    Delay position in farend buffer.
1007// @param  mu           [in]    NLMS step size.
1008// @param  echoEst      [i/o]   Estimated echo
1009//                              (Q[aecm->xfaQDomain[delayDiff]+RESOLUTION_CHANNEL16]).
1010//
1011void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * const dfa,
1012                              const WebRtc_Word16 delayDiff, const WebRtc_Word16 mu,
1013                              WebRtc_Word32 * const echoEst)
1014{
1015
1016    WebRtc_UWord32 tmpU32no1, tmpU32no2;
1017    WebRtc_Word32 tmp32no1, tmp32no2;
1018    WebRtc_Word32 mseStored;
1019    WebRtc_Word32 mseAdapt;
1020
1021    int i;
1022
1023    WebRtc_Word16 zerosFar, zerosNum, zerosCh, zerosDfa;
1024    WebRtc_Word16 shiftChFar, shiftNum, shift2ResChan;
1025    WebRtc_Word16 tmp16no1;
1026    WebRtc_Word16 xfaQ, dfaQ;
1027
1028    // This is the channel estimation algorithm. It is base on NLMS but has a variable step
1029    // length, which was calculated above.
1030    if (mu)
1031    {
1032        for (i = 0; i < PART_LEN1; i++)
1033        {
1034            // Determine norm of channel and farend to make sure we don't get overflow in
1035            // multiplication
1036            zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]);
1037            zerosFar = WebRtcSpl_NormU32((WebRtc_UWord32)aecm->xfaHistory[i][delayDiff]);
1038            if (zerosCh + zerosFar > 31)
1039            {
1040                // Multiplication is safe
1041                tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i],
1042                        aecm->xfaHistory[i][delayDiff]);
1043                shiftChFar = 0;
1044            } else
1045            {
1046                // We need to shift down before multiplication
1047                shiftChFar = 32 - zerosCh - zerosFar;
1048                tmpU32no1
1049                        = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i],
1050                                        shiftChFar),
1051                                aecm->xfaHistory[i][delayDiff]);
1052            }
1053            // Determine Q-domain of numerator
1054            zerosNum = WebRtcSpl_NormU32(tmpU32no1);
1055            if (dfa[i])
1056            {
1057                zerosDfa = WebRtcSpl_NormU32((WebRtc_UWord32)dfa[i]);
1058            } else
1059            {
1060                zerosDfa = 32;
1061            }
1062            tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - RESOLUTION_CHANNEL32
1063                    - aecm->xfaQDomainBuf[delayDiff] + shiftChFar;
1064            if (zerosNum > tmp16no1 + 1)
1065            {
1066                xfaQ = tmp16no1;
1067                dfaQ = zerosDfa - 2;
1068            } else
1069            {
1070                xfaQ = zerosNum - 2;
1071                dfaQ = RESOLUTION_CHANNEL32 + aecm->xfaQDomainBuf[delayDiff]
1072                        - aecm->dfaNoisyQDomain - shiftChFar + xfaQ;
1073            }
1074            // Add in the same Q-domain
1075            tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ);
1076            tmpU32no2 = WEBRTC_SPL_SHIFT_W32((WebRtc_UWord32)dfa[i], dfaQ);
1077            tmp32no1 = (WebRtc_Word32)tmpU32no2 - (WebRtc_Word32)tmpU32no1;
1078            zerosNum = WebRtcSpl_NormW32(tmp32no1);
1079            if ((tmp32no1) && (aecm->xfaHistory[i][delayDiff] > (CHANNEL_VAD
1080                    << aecm->xfaQDomainBuf[delayDiff])))
1081            {
1082                //
1083                // Update is needed
1084                //
1085                // This is what we would like to compute
1086                //
1087                // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * aecm->xfaHistory[i][delayDiff])
1088                // tmp32norm = (i + 1)
1089                // aecm->channelAdapt[i] += (2^mu) * tmp32no1
1090                //                        / (tmp32norm * aecm->xfaHistory[i][delayDiff])
1091                //
1092
1093                // Make sure we don't get overflow in multiplication.
1094                if (zerosNum + zerosFar > 31)
1095                {
1096                    if (tmp32no1 > 0)
1097                    {
1098                        tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(tmp32no1,
1099                                aecm->xfaHistory[i][delayDiff]);
1100                    } else
1101                    {
1102                        tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(-tmp32no1,
1103                                aecm->xfaHistory[i][delayDiff]);
1104                    }
1105                    shiftNum = 0;
1106                } else
1107                {
1108                    shiftNum = 32 - (zerosNum + zerosFar);
1109                    if (tmp32no1 > 0)
1110                    {
1111                        tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(
1112                                WEBRTC_SPL_RSHIFT_W32(tmp32no1, shiftNum),
1113                                aecm->xfaHistory[i][delayDiff]);
1114                    } else
1115                    {
1116                        tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(
1117                                WEBRTC_SPL_RSHIFT_W32(-tmp32no1, shiftNum),
1118                                aecm->xfaHistory[i][delayDiff]);
1119                    }
1120                }
1121                // Normalize with respect to frequency bin
1122                tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1);
1123                // Make sure we are in the right Q-domain
1124                shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1);
1125                if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan)
1126                {
1127                    tmp32no2 = WEBRTC_SPL_WORD32_MAX;
1128                } else
1129                {
1130                    tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan);
1131                }
1132                aecm->channelAdapt32[i] = WEBRTC_SPL_ADD_SAT_W32(aecm->channelAdapt32[i],
1133                        tmp32no2);
1134                if (aecm->channelAdapt32[i] < 0)
1135                {
1136                    // We can never have negative channel gain
1137                    aecm->channelAdapt32[i] = 0;
1138                }
1139                aecm->channelAdapt16[i]
1140                        = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], 16);
1141            }
1142        }
1143    }
1144    // END: Adaptive channel update
1145
1146    // Determine if we should store or restore the channel
1147    if ((aecm->startupState == 0) & (aecm->currentVADValue))
1148    {
1149        // During startup we store the channel every block.
1150        memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1);
1151        // TODO(bjornv): Will be removed in final version.
1152#ifdef STORE_CHANNEL_DATA
1153        fwrite(aecm->channelStored, sizeof(WebRtc_Word16), PART_LEN1, aecm->channel_file_init);
1154#endif
1155        // Recalculate echo estimate
1156#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
1157        for (i = 0; i < PART_LEN1; i++)
1158        {
1159            echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
1160                    aecm->xfaHistory[i][delayDiff]);
1161        }
1162#else
1163        for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples
1164
1165        {
1166            echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
1167                    aecm->xfaHistory[i][delayDiff]);
1168            i++;
1169            echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
1170                    aecm->xfaHistory[i][delayDiff]);
1171            i++;
1172            echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
1173                    aecm->xfaHistory[i][delayDiff]);
1174            i++;
1175            echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
1176                    aecm->xfaHistory[i][delayDiff]);
1177            i++;
1178        }
1179        echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
1180                aecm->xfaHistory[i][delayDiff]);
1181#endif
1182    } else
1183    {
1184        if (aecm->farLogEnergy[0] < aecm->farEnergyMSE)
1185        {
1186            aecm->mseChannelCount = 0;
1187            aecm->delayCount = 0;
1188        } else
1189        {
1190            aecm->mseChannelCount++;
1191            aecm->delayCount++;
1192        }
1193        // Enough data for validation. Store channel if we can.
1194        if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10))
1195        {
1196            // We have enough data.
1197            // Calculate MSE of "Adapt" and "Stored" versions.
1198            // It is actually not MSE, but average absolute error.
1199            mseStored = 0;
1200            mseAdapt = 0;
1201            for (i = 0; i < MIN_MSE_COUNT; i++)
1202            {
1203                tmp32no1 = ((WebRtc_Word32)aecm->echoStoredLogEnergy[i]
1204                        - (WebRtc_Word32)aecm->nearLogEnergy[i]);
1205                tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
1206                mseStored += tmp32no2;
1207
1208                tmp32no1 = ((WebRtc_Word32)aecm->echoAdaptLogEnergy[i]
1209                        - (WebRtc_Word32)aecm->nearLogEnergy[i]);
1210                tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1);
1211                mseAdapt += tmp32no2;
1212            }
1213            if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt))
1214                    & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF
1215                            * aecm->mseAdaptOld)))
1216            {
1217                // The stored channel has a significantly lower MSE than the adaptive one for
1218                // two consecutive calculations. Reset the adaptive channel.
1219                memcpy(aecm->channelAdapt16, aecm->channelStored,
1220                       sizeof(WebRtc_Word16) * PART_LEN1);
1221                // Restore the W32 channel
1222#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
1223                for (i = 0; i < PART_LEN1; i++)
1224                {
1225                    aecm->channelAdapt32[i]
1226                            = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16);
1227                }
1228#else
1229                for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples
1230
1231                {
1232                    aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16);
1233                    i++;
1234                    aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16);
1235                    i++;
1236                    aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16);
1237                    i++;
1238                    aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16);
1239                    i++;
1240                }
1241                aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16);
1242#endif
1243
1244            } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt
1245                    < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold))
1246            {
1247                // The adaptive channel has a significantly lower MSE than the stored one.
1248                // The MSE for the adaptive channel has also been low for two consecutive
1249                // calculations. Store the adaptive channel.
1250                memcpy(aecm->channelStored, aecm->channelAdapt16,
1251                       sizeof(WebRtc_Word16) * PART_LEN1);
1252                // TODO(bjornv): Will be removed in final version.
1253#ifdef STORE_CHANNEL_DATA
1254                fwrite(aecm->channelStored, sizeof(WebRtc_Word16), PART_LEN1,
1255                       aecm->channel_file);
1256#endif
1257// Recalculate echo estimate
1258#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
1259                for (i = 0; i < PART_LEN1; i++)
1260                {
1261                    echoEst[i]
1262                            = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]);
1263                }
1264#else
1265                for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples
1266
1267                {
1268                    echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]);
1269                    i++;
1270                    echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]);
1271                    i++;
1272                    echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]);
1273                    i++;
1274                    echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]);
1275                    i++;
1276                }
1277                echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]);
1278#endif
1279                // Update threshold
1280                if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX)
1281                {
1282                    aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld);
1283                } else
1284                {
1285                    aecm->mseThreshold += WEBRTC_SPL_MUL_16_16_RSFT(mseAdapt
1286                            - WEBRTC_SPL_MUL_16_16_RSFT(aecm->mseThreshold, 5, 3), 205, 8);
1287                }
1288
1289            }
1290
1291            // Reset counter
1292            aecm->mseChannelCount = 0;
1293
1294            // Store the MSE values.
1295            aecm->mseStoredOld = mseStored;
1296            aecm->mseAdaptOld = mseAdapt;
1297        }
1298    }
1299    // END: Determine if we should store or reset channel estimate.
1300}
1301
1302// WebRtcAecm_CalcSuppressionGain(...)
1303//
1304// This function calculates the suppression gain that is used in the Wiener filter.
1305//
1306//
1307// @param  aecm     [i/n]   Handle of the AECM instance.
1308// @param  supGain  [out]   (Return value) Suppression gain with which to scale the noise
1309//                          level (Q14).
1310//
1311//
1312WebRtc_Word16 WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm)
1313{
1314    WebRtc_Word32 tmp32no1;
1315
1316    WebRtc_Word16 supGain;
1317    WebRtc_Word16 tmp16no1;
1318    WebRtc_Word16 dE = 0;
1319
1320    // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far
1321    // end energy and echo estimation error.
1322    supGain = SUPGAIN_DEFAULT;
1323    // Adjust for the far end signal level. A low signal level indicates no far end signal,
1324    // hence we set the suppression gain to 0
1325    if (!aecm->currentVADValue)
1326    {
1327        supGain = 0;
1328    } else
1329    {
1330        // Adjust for possible double talk. If we have large variations in estimation error we
1331        // likely have double talk (or poor channel).
1332        tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET);
1333        dE = WEBRTC_SPL_ABS_W16(tmp16no1);
1334
1335        if (dE < ENERGY_DEV_TOL)
1336        {
1337            // Likely no double talk. The better estimation, the more we can suppress signal.
1338            // Update counters
1339            if (dE < SUPGAIN_EPC_DT)
1340            {
1341                tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffAB, dE);
1342                tmp32no1 += (SUPGAIN_EPC_DT >> 1);
1343                tmp16no1 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT);
1344                supGain = aecm->supGainErrParamA - tmp16no1;
1345            } else
1346            {
1347                tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffBD,
1348                                                (ENERGY_DEV_TOL - dE));
1349                tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1);
1350                tmp16no1 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL
1351                        - SUPGAIN_EPC_DT));
1352                supGain = aecm->supGainErrParamD + tmp16no1;
1353            }
1354        } else
1355        {
1356            // Likely in double talk. Use default value
1357            supGain = aecm->supGainErrParamD;
1358        }
1359    }
1360
1361    if (supGain > aecm->supGainOld)
1362    {
1363        tmp16no1 = supGain;
1364    } else
1365    {
1366        tmp16no1 = aecm->supGainOld;
1367    }
1368    aecm->supGainOld = supGain;
1369    if (tmp16no1 < aecm->supGain)
1370    {
1371        aecm->supGain += (WebRtc_Word16)((tmp16no1 - aecm->supGain) >> 4);
1372    } else
1373    {
1374        aecm->supGain += (WebRtc_Word16)((tmp16no1 - aecm->supGain) >> 4);
1375    }
1376
1377    // END: Update suppression gain
1378
1379    return aecm->supGain;
1380}
1381
1382// WebRtcAecm_DelayCompensation(...)
1383//
1384// Secondary delay estimation that can be used as a backup or for validation. This function is
1385// still under construction and not activated in current version.
1386//
1387//
1388// @param  aecm  [i/o]   Handle of the AECM instance.
1389//
1390//
1391void WebRtcAecm_DelayCompensation(AecmCore_t * const aecm)
1392{
1393    int i, j;
1394    WebRtc_Word32 delayMeanEcho[CORR_BUF_LEN];
1395    WebRtc_Word32 delayMeanNear[CORR_BUF_LEN];
1396    WebRtc_Word16 sumBitPattern, bitPatternEcho, bitPatternNear, maxPos, maxValue,
1397            maxValueLeft, maxValueRight;
1398
1399    // Check delay (calculate the delay offset (if we can)).
1400    if ((aecm->startupState > 0) & (aecm->delayCount >= CORR_MAX_BUF) & aecm->delayOffsetFlag)
1401    {
1402        // Calculate mean values
1403        for (i = 0; i < CORR_BUF_LEN; i++)
1404        {
1405            delayMeanEcho[i] = 0;
1406            delayMeanNear[i] = 0;
1407#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
1408            for (j = 0; j < CORR_WIDTH; j++)
1409            {
1410                delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j];
1411                delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j];
1412            }
1413#else
1414            for (j = 0; j < CORR_WIDTH -1; )
1415            {
1416                delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j];
1417                delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j];
1418                j++;
1419                delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j];
1420                delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j];
1421                j++;
1422            }
1423            delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j];
1424            delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j];
1425#endif
1426        }
1427        // Calculate correlation values
1428        for (i = 0; i < CORR_BUF_LEN; i++)
1429        {
1430            sumBitPattern = 0;
1431#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
1432            for (j = 0; j < CORR_WIDTH; j++)
1433            {
1434                bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i
1435                        + j] * CORR_WIDTH > delayMeanEcho[i]);
1436                bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX
1437                        + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]);
1438                sumBitPattern += !(bitPatternEcho ^ bitPatternNear);
1439            }
1440#else
1441            for (j = 0; j < CORR_WIDTH -1; )
1442            {
1443                bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i
1444                    + j] * CORR_WIDTH > delayMeanEcho[i]);
1445                bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX
1446                    + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]);
1447                sumBitPattern += !(bitPatternEcho ^ bitPatternNear);
1448                j++;
1449                bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i
1450                    + j] * CORR_WIDTH > delayMeanEcho[i]);
1451                bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX
1452                    + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]);
1453                sumBitPattern += !(bitPatternEcho ^ bitPatternNear);
1454                j++;
1455            }
1456            bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]
1457                    * CORR_WIDTH > delayMeanEcho[i]);
1458            bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX + j]
1459                    * CORR_WIDTH > delayMeanNear[CORR_MAX]);
1460            sumBitPattern += !(bitPatternEcho ^ bitPatternNear);
1461#endif
1462            aecm->delayCorrelation[i] = sumBitPattern;
1463        }
1464        aecm->newDelayCorrData = 1; // Indicate we have new correlation data to evaluate
1465    }
1466    if ((aecm->startupState == 2) & (aecm->lastDelayUpdateCount > (CORR_WIDTH << 1))
1467            & aecm->newDelayCorrData)
1468    {
1469        // Find maximum value and maximum position as well as values on the sides.
1470        maxPos = 0;
1471        maxValue = aecm->delayCorrelation[0];
1472        maxValueLeft = maxValue;
1473        maxValueRight = aecm->delayCorrelation[CORR_DEV];
1474        for (i = 1; i < CORR_BUF_LEN; i++)
1475        {
1476            if (aecm->delayCorrelation[i] > maxValue)
1477            {
1478                maxValue = aecm->delayCorrelation[i];
1479                maxPos = i;
1480                if (maxPos < CORR_DEV)
1481                {
1482                    maxValueLeft = aecm->delayCorrelation[0];
1483                    maxValueRight = aecm->delayCorrelation[i + CORR_DEV];
1484                } else if (maxPos > (CORR_MAX << 1) - CORR_DEV)
1485                {
1486                    maxValueLeft = aecm->delayCorrelation[i - CORR_DEV];
1487                    maxValueRight = aecm->delayCorrelation[(CORR_MAX << 1)];
1488                } else
1489                {
1490                    maxValueLeft = aecm->delayCorrelation[i - CORR_DEV];
1491                    maxValueRight = aecm->delayCorrelation[i + CORR_DEV];
1492                }
1493            }
1494        }
1495        if ((maxPos > 0) & (maxPos < (CORR_MAX << 1)))
1496        {
1497            // Avoid maximum at boundaries. The maximum peak has to be higher than
1498            // CORR_MAX_LEVEL. It also has to be sharp, i.e. the value CORR_DEV bins off should
1499            // be CORR_MAX_LOW lower than the maximum.
1500            if ((maxValue > CORR_MAX_LEVEL) & (maxValueLeft < maxValue - CORR_MAX_LOW)
1501                    & (maxValueRight < maxValue - CORR_MAX_LOW))
1502            {
1503                aecm->delayAdjust += CORR_MAX - maxPos;
1504                aecm->newDelayCorrData = 0;
1505                aecm->lastDelayUpdateCount = 0;
1506            }
1507        }
1508    }
1509    // END: "Check delay"
1510}
1511
1512void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * const farend,
1513                             const WebRtc_Word16 * const nearendNoisy,
1514                             const WebRtc_Word16 * const nearendClean,
1515                             WebRtc_Word16 * const output)
1516{
1517    int i, j;
1518
1519    WebRtc_UWord32 xfaSum;
1520    WebRtc_UWord32 dfaNoisySum;
1521    WebRtc_UWord32 echoEst32Gained;
1522    WebRtc_UWord32 tmpU32;
1523
1524    WebRtc_Word32 tmp32no1;
1525    WebRtc_Word32 tmp32no2;
1526    WebRtc_Word32 echoEst32[PART_LEN1];
1527
1528    WebRtc_UWord16 xfa[PART_LEN1];
1529    WebRtc_UWord16 dfaNoisy[PART_LEN1];
1530    WebRtc_UWord16 dfaClean[PART_LEN1];
1531    WebRtc_UWord16* ptrDfaClean = dfaClean;
1532
1533    int outCFFT;
1534
1535    WebRtc_Word16 fft[PART_LEN4];
1536#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1537    WebRtc_Word16 postFft[PART_LEN4];
1538#else
1539    WebRtc_Word16 postFft[PART_LEN2];
1540#endif
1541    WebRtc_Word16 dfwReal[PART_LEN1];
1542    WebRtc_Word16 dfwImag[PART_LEN1];
1543    WebRtc_Word16 xfwReal[PART_LEN1];
1544    WebRtc_Word16 xfwImag[PART_LEN1];
1545    WebRtc_Word16 efwReal[PART_LEN1];
1546    WebRtc_Word16 efwImag[PART_LEN1];
1547    WebRtc_Word16 hnl[PART_LEN1];
1548    WebRtc_Word16 numPosCoef;
1549    WebRtc_Word16 nlpGain;
1550    WebRtc_Word16 delay, diff, diffMinusOne;
1551    WebRtc_Word16 tmp16no1;
1552    WebRtc_Word16 tmp16no2;
1553#ifdef AECM_WITH_ABS_APPROX
1554    WebRtc_Word16 maxValue;
1555    WebRtc_Word16 minValue;
1556#endif
1557    WebRtc_Word16 mu;
1558    WebRtc_Word16 supGain;
1559    WebRtc_Word16 zeros32, zeros16;
1560    WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf;
1561    WebRtc_Word16 resolutionDiff, qDomainDiff;
1562
1563#ifdef ARM_WINM_LOG_
1564    DWORD temp;
1565    static int flag0 = 0;
1566    __int64 freq, start, end, diff__;
1567    unsigned int milliseconds;
1568#endif
1569
1570#ifdef AECM_WITH_ABS_APPROX
1571    WebRtc_UWord16 alpha, beta;
1572#endif
1573
1574    // Determine startup state. There are three states:
1575    // (0) the first CONV_LEN blocks
1576    // (1) another CONV_LEN blocks
1577    // (2) the rest
1578
1579    if (aecm->startupState < 2)
1580    {
1581        aecm->startupState = (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2);
1582    }
1583    // END: Determine startup state
1584
1585    // Buffer near and far end signals
1586    memcpy(aecm->xBuf + PART_LEN, farend, sizeof(WebRtc_Word16) * PART_LEN);
1587    memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(WebRtc_Word16) * PART_LEN);
1588    if (nearendClean != NULL)
1589    {
1590        memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(WebRtc_Word16) * PART_LEN);
1591    }
1592    // TODO(bjornv): Will be removed in final version.
1593#ifdef VAD_DATA
1594    fwrite(aecm->xBuf, sizeof(WebRtc_Word16), PART_LEN, aecm->far_file);
1595#endif
1596
1597#ifdef AECM_DYNAMIC_Q
1598    tmp16no1 = WebRtcSpl_MaxAbsValueW16(aecm->dBufNoisy, PART_LEN2);
1599    tmp16no2 = WebRtcSpl_MaxAbsValueW16(aecm->xBuf, PART_LEN2);
1600    zerosDBufNoisy = WebRtcSpl_NormW16(tmp16no1);
1601    zerosXBuf = WebRtcSpl_NormW16(tmp16no2);
1602#else
1603    zerosDBufNoisy = 0;
1604    zerosXBuf = 0;
1605#endif
1606    aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
1607    aecm->dfaNoisyQDomain = zerosDBufNoisy;
1608
1609    if (nearendClean != NULL)
1610    {
1611#ifdef AECM_DYNAMIC_Q
1612        tmp16no1 = WebRtcSpl_MaxAbsValueW16(aecm->dBufClean, PART_LEN2);
1613        zerosDBufClean = WebRtcSpl_NormW16(tmp16no1);
1614#else
1615        zerosDBufClean = 0;
1616#endif
1617        aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
1618        aecm->dfaCleanQDomain = zerosDBufClean;
1619    } else
1620    {
1621        zerosDBufClean = zerosDBufNoisy;
1622        aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
1623        aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
1624    }
1625
1626#ifdef ARM_WINM_LOG_
1627    // measure tick start
1628    QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
1629    QueryPerformanceCounter((LARGE_INTEGER*)&start);
1630#endif
1631
1632    // FFT of noisy near end signal
1633    for (i = 0; i < PART_LEN; i++)
1634    {
1635        j = WEBRTC_SPL_LSHIFT_W32(i, 1);
1636        // Window near end
1637        fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufNoisy[i]
1638                        << zerosDBufNoisy), kSqrtHanning[i], 14);
1639        fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
1640                (aecm->dBufNoisy[PART_LEN + i] << zerosDBufNoisy),
1641                kSqrtHanning[PART_LEN - i], 14);
1642        // Inserting zeros in imaginary parts
1643        fft[j + 1] = 0;
1644        fft[PART_LEN2 + j + 1] = 0;
1645    }
1646
1647    // Fourier transformation of near end signal.
1648    // The result is scaled with 1/PART_LEN2, that is, the result is in Q(-6) for PART_LEN = 32
1649
1650#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1651    outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1);
1652
1653    // The imaginary part has to switch sign
1654    for(i = 1; i < PART_LEN2-1;)
1655    {
1656        postFft[i] = -postFft[i];
1657        i += 2;
1658        postFft[i] = -postFft[i];
1659        i += 2;
1660    }
1661#else
1662    WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
1663    outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
1664
1665    // Take only the first PART_LEN2 samples
1666    for (i = 0; i < PART_LEN2; i++)
1667    {
1668        postFft[i] = fft[i];
1669    }
1670    // The imaginary part has to switch sign
1671    for (i = 1; i < PART_LEN2;)
1672    {
1673        postFft[i] = -postFft[i];
1674        i += 2;
1675    }
1676#endif
1677
1678    // Extract imaginary and real part, calculate the magnitude for all frequency bins
1679    dfwImag[0] = 0;
1680    dfwImag[PART_LEN] = 0;
1681    dfwReal[0] = postFft[0];
1682#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1683    dfwReal[PART_LEN] = postFft[PART_LEN2];
1684#else
1685    dfwReal[PART_LEN] = fft[PART_LEN2];
1686#endif
1687    dfaNoisy[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]);
1688    dfaNoisy[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]);
1689    dfaNoisySum = (WebRtc_UWord32)(dfaNoisy[0]);
1690    dfaNoisySum += (WebRtc_UWord32)(dfaNoisy[PART_LEN]);
1691
1692    for (i = 1; i < PART_LEN; i++)
1693    {
1694        j = WEBRTC_SPL_LSHIFT_W32(i, 1);
1695        dfwReal[i] = postFft[j];
1696        dfwImag[i] = postFft[j + 1];
1697
1698        if (dfwReal[i] == 0 || dfwImag[i] == 0)
1699        {
1700            dfaNoisy[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[i] + dfwImag[i]);
1701        } else
1702        {
1703            // Approximation for magnitude of complex fft output
1704            // magn = sqrt(real^2 + imag^2)
1705            // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
1706            //
1707            // The parameters alpha and beta are stored in Q15
1708
1709            tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]);
1710            tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]);
1711
1712#ifdef AECM_WITH_ABS_APPROX
1713            if(tmp16no1 > tmp16no2)
1714            {
1715                maxValue = tmp16no1;
1716                minValue = tmp16no2;
1717            } else
1718            {
1719                maxValue = tmp16no2;
1720                minValue = tmp16no1;
1721            }
1722
1723            // Magnitude in Q-6
1724            if ((maxValue >> 2) > minValue)
1725            {
1726                alpha = kAlpha1;
1727                beta = kBeta1;
1728            } else if ((maxValue >> 1) > minValue)
1729            {
1730                alpha = kAlpha2;
1731                beta = kBeta2;
1732            } else
1733            {
1734                alpha = kAlpha3;
1735                beta = kBeta3;
1736            }
1737            tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15);
1738            tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15);
1739            dfaNoisy[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2;
1740#else
1741            tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
1742            tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
1743            tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
1744            tmp32no1 = WebRtcSpl_Sqrt(tmp32no2);
1745            dfaNoisy[i] = (WebRtc_UWord16)tmp32no1;
1746#endif
1747        }
1748        dfaNoisySum += (WebRtc_UWord32)dfaNoisy[i];
1749    }
1750    // END: FFT of noisy near end signal
1751
1752    if (nearendClean == NULL)
1753    {
1754        ptrDfaClean = dfaNoisy;
1755    } else
1756    {
1757        // FFT of clean near end signal
1758        for (i = 0; i < PART_LEN; i++)
1759        {
1760            j = WEBRTC_SPL_LSHIFT_W32(i, 1);
1761            // Window near end
1762            fft[j]
1763                    = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufClean[i] << zerosDBufClean), kSqrtHanning[i], 14);
1764            fft[PART_LEN2 + j]
1765                    = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufClean[PART_LEN + i] << zerosDBufClean), kSqrtHanning[PART_LEN - i], 14);
1766            // Inserting zeros in imaginary parts
1767            fft[j + 1] = 0;
1768            fft[PART_LEN2 + j + 1] = 0;
1769        }
1770
1771        // Fourier transformation of near end signal.
1772        // The result is scaled with 1/PART_LEN2, that is, in Q(-6) for PART_LEN = 32
1773
1774#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1775        outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1);
1776
1777        // The imaginary part has to switch sign
1778        for(i = 1; i < PART_LEN2-1;)
1779        {
1780            postFft[i] = -postFft[i];
1781            i += 2;
1782            postFft[i] = -postFft[i];
1783            i += 2;
1784        }
1785#else
1786        WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
1787        outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
1788
1789        // Take only the first PART_LEN2 samples
1790        for (i = 0; i < PART_LEN2; i++)
1791        {
1792            postFft[i] = fft[i];
1793        }
1794        // The imaginary part has to switch sign
1795        for (i = 1; i < PART_LEN2;)
1796        {
1797            postFft[i] = -postFft[i];
1798            i += 2;
1799        }
1800#endif
1801
1802        // Extract imaginary and real part, calculate the magnitude for all frequency bins
1803        dfwImag[0] = 0;
1804        dfwImag[PART_LEN] = 0;
1805        dfwReal[0] = postFft[0];
1806#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1807        dfwReal[PART_LEN] = postFft[PART_LEN2];
1808#else
1809        dfwReal[PART_LEN] = fft[PART_LEN2];
1810#endif
1811        dfaClean[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]);
1812        dfaClean[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]);
1813
1814        for (i = 1; i < PART_LEN; i++)
1815        {
1816            j = WEBRTC_SPL_LSHIFT_W32(i, 1);
1817            dfwReal[i] = postFft[j];
1818            dfwImag[i] = postFft[j + 1];
1819
1820            if (dfwReal[i] == 0 || dfwImag[i] == 0)
1821            {
1822                dfaClean[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[i] + dfwImag[i]);
1823            } else
1824            {
1825                // Approximation for magnitude of complex fft output
1826                // magn = sqrt(real^2 + imag^2)
1827                // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
1828                //
1829                // The parameters alpha and beta are stored in Q15
1830
1831                tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]);
1832                tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]);
1833
1834#ifdef AECM_WITH_ABS_APPROX
1835                if(tmp16no1 > tmp16no2)
1836                {
1837                    maxValue = tmp16no1;
1838                    minValue = tmp16no2;
1839                } else
1840                {
1841                    maxValue = tmp16no2;
1842                    minValue = tmp16no1;
1843                }
1844
1845                // Magnitude in Q-6
1846                if ((maxValue >> 2) > minValue)
1847                {
1848                    alpha = kAlpha1;
1849                    beta = kBeta1;
1850                } else if ((maxValue >> 1) > minValue)
1851                {
1852                    alpha = kAlpha2;
1853                    beta = kBeta2;
1854                } else
1855                {
1856                    alpha = kAlpha3;
1857                    beta = kBeta3;
1858                }
1859                tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15);
1860                tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15);
1861                dfaClean[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2;
1862#else
1863                tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
1864                tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
1865                tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
1866                tmp32no1 = WebRtcSpl_Sqrt(tmp32no2);
1867                dfaClean[i] = (WebRtc_UWord16)tmp32no1;
1868#endif
1869            }
1870        }
1871    }
1872    // END: FFT of clean near end signal
1873
1874    // FFT of far end signal
1875    for (i = 0; i < PART_LEN; i++)
1876    {
1877        j = WEBRTC_SPL_LSHIFT_W32(i, 1);
1878        // Window farend
1879        fft[j]
1880                = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->xBuf[i] << zerosXBuf), kSqrtHanning[i], 14);
1881        fft[PART_LEN2 + j]
1882                = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->xBuf[PART_LEN + i] << zerosXBuf), kSqrtHanning[PART_LEN - i], 14);
1883        // Inserting zeros in imaginary parts
1884        fft[j + 1] = 0;
1885        fft[PART_LEN2 + j + 1] = 0;
1886    }
1887    // Fourier transformation of far end signal.
1888    // The result is scaled with 1/PART_LEN2, that is the result is in Q(-6) for PART_LEN = 32
1889#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1890    outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1);
1891
1892    // The imaginary part has to switch sign
1893    for(i = 1; i < PART_LEN2-1;)
1894    {
1895        postFft[i] = -postFft[i];
1896        i += 2;
1897        postFft[i] = -postFft[i];
1898        i += 2;
1899    }
1900#else
1901    WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
1902    outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
1903
1904    // Take only the first PART_LEN2 samples
1905    for (i = 0; i < PART_LEN2; i++)
1906    {
1907        postFft[i] = fft[i];
1908    }
1909    // The imaginary part has to switch sign
1910    for (i = 1; i < PART_LEN2;)
1911    {
1912        postFft[i] = -postFft[i];
1913        i += 2;
1914    }
1915#endif
1916
1917    // Extract imaginary and real part, calculate the magnitude for all frequency bins
1918    xfwImag[0] = 0;
1919    xfwImag[PART_LEN] = 0;
1920    xfwReal[0] = postFft[0];
1921#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT)
1922    xfwReal[PART_LEN] = postFft[PART_LEN2];
1923#else
1924    xfwReal[PART_LEN] = fft[PART_LEN2];
1925#endif
1926    xfa[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[0]);
1927    xfa[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[PART_LEN]);
1928    xfaSum = (WebRtc_UWord32)(xfa[0]) + (WebRtc_UWord32)(xfa[PART_LEN]);
1929
1930    for (i = 1; i < PART_LEN; i++)
1931    {
1932        j = WEBRTC_SPL_LSHIFT_W32(i,1);
1933        xfwReal[i] = postFft[j];
1934        xfwImag[i] = postFft[j + 1];
1935
1936        if (xfwReal[i] == 0 || xfwImag[i] == 0)
1937        {
1938            xfa[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[i] + xfwImag[i]);
1939        } else
1940        {
1941            // Approximation for magnitude of complex fft output
1942            // magn = sqrt(real^2 + imag^2)
1943            // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
1944            //
1945            // The parameters alpha and beta are stored in Q15
1946
1947            tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]);
1948            tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]);
1949
1950#ifdef AECM_WITH_ABS_APPROX
1951            if(tmp16no1 > xfwImag[i])
1952            {
1953                maxValue = tmp16no1;
1954                minValue = tmp16no2;
1955            } else
1956            {
1957                maxValue = tmp16no2;
1958                minValue = tmp16no1;
1959            }
1960            // Magnitude in Q-6
1961            if ((maxValue >> 2) > minValue)
1962            {
1963                alpha = kAlpha1;
1964                beta = kBeta1;
1965            } else if ((maxValue >> 1) > minValue)
1966            {
1967                alpha = kAlpha2;
1968                beta = kBeta2;
1969            } else
1970            {
1971                alpha = kAlpha3;
1972                beta = kBeta3;
1973            }
1974            tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15);
1975            tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15);
1976            xfa[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2;
1977#else
1978            tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
1979            tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
1980            tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
1981            tmp32no1 = WebRtcSpl_Sqrt(tmp32no2);
1982            xfa[i] = (WebRtc_UWord16)tmp32no1;
1983#endif
1984        }
1985        xfaSum += (WebRtc_UWord32)xfa[i];
1986    }
1987
1988#ifdef ARM_WINM_LOG_
1989    // measure tick end
1990    QueryPerformanceCounter((LARGE_INTEGER*)&end);
1991    diff__ = ((end - start) * 1000) / (freq/1000);
1992    milliseconds = (unsigned int)(diff__ & 0xffffffff);
1993    WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
1994#endif
1995    // END: FFT of far end signal
1996
1997    // Get the delay
1998
1999    // Fixed delay estimation
2000    // input: dfaFIX, xfaFIX in Q-stages
2001    // output: delay in Q0
2002    //
2003    // comment on the fixed point accuracy of estimate_delayFIX
2004    // -> due to rounding the fixed point variables xfa and dfa contain a lot more zeros
2005    // than the corresponding floating point variables this results in big differences
2006    // between the floating point and the fixed point logarithmic spectra for small values
2007#ifdef ARM_WINM_LOG_
2008    // measure tick start
2009    QueryPerformanceCounter((LARGE_INTEGER*)&start);
2010#endif
2011
2012    // Save far-end history and estimate delay
2013    delay = WebRtcAecm_EstimateDelay(aecm, xfa, dfaNoisy, zerosXBuf);
2014
2015    if (aecm->fixedDelay >= 0)
2016    {
2017        // Use fixed delay
2018        delay = aecm->fixedDelay;
2019    }
2020
2021    aecm->currentDelay = delay;
2022
2023    if ((aecm->delayOffsetFlag) & (aecm->startupState > 0)) // If delay compensation is on
2024    {
2025        // If the delay estimate changed from previous block, update the offset
2026        if ((aecm->currentDelay != aecm->previousDelay) & !aecm->currentDelay
2027                & !aecm->previousDelay)
2028        {
2029            aecm->delayAdjust += (aecm->currentDelay - aecm->previousDelay);
2030        }
2031        // Compensate with the offset estimate
2032        aecm->currentDelay -= aecm->delayAdjust;
2033        aecm->previousDelay = delay;
2034    }
2035
2036    diff = aecm->delHistoryPos - aecm->currentDelay;
2037    if (diff < 0)
2038    {
2039        diff = diff + MAX_DELAY;
2040    }
2041
2042#ifdef ARM_WINM_LOG_
2043    // measure tick end
2044    QueryPerformanceCounter((LARGE_INTEGER*)&end);
2045    diff__ = ((end - start) * 1000) / (freq/1000);
2046    milliseconds = (unsigned int)(diff__ & 0xffffffff);
2047    WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
2048#endif
2049
2050    // END: Get the delay
2051
2052#ifdef ARM_WINM_LOG_
2053    // measure tick start
2054    QueryPerformanceCounter((LARGE_INTEGER*)&start);
2055#endif
2056    // Calculate log(energy) and update energy threshold levels
2057    WebRtcAecm_CalcEnergies(aecm, diff, dfaNoisySum, echoEst32);
2058
2059    // Calculate stepsize
2060    mu = WebRtcAecm_CalcStepSize(aecm);
2061
2062    // Update counters
2063    aecm->totCount++;
2064    aecm->lastDelayUpdateCount++;
2065
2066    // This is the channel estimation algorithm.
2067    // It is base on NLMS but has a variable step length, which was calculated above.
2068    WebRtcAecm_UpdateChannel(aecm, dfaNoisy, diff, mu, echoEst32);
2069    WebRtcAecm_DelayCompensation(aecm);
2070    supGain = WebRtcAecm_CalcSuppressionGain(aecm);
2071
2072#ifdef ARM_WINM_LOG_
2073    // measure tick end
2074    QueryPerformanceCounter((LARGE_INTEGER*)&end);
2075    diff__ = ((end - start) * 1000) / (freq/1000);
2076    milliseconds = (unsigned int)(diff__ & 0xffffffff);
2077    WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
2078#endif
2079
2080#ifdef ARM_WINM_LOG_
2081    // measure tick start
2082    QueryPerformanceCounter((LARGE_INTEGER*)&start);
2083#endif
2084
2085    // Calculate Wiener filter hnl[]
2086    numPosCoef = 0;
2087    diffMinusOne = diff - 1;
2088    if (diff == 0)
2089    {
2090        diffMinusOne = MAX_DELAY;
2091    }
2092    for (i = 0; i < PART_LEN1; i++)
2093    {
2094        // Far end signal through channel estimate in Q8
2095        // How much can we shift right to preserve resolution
2096        tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
2097        aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8);
2098
2099        zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
2100        zeros16 = WebRtcSpl_NormW16(supGain) + 1;
2101        if (zeros32 + zeros16 > 16)
2102        {
2103            // Multiplication is safe
2104            // Result in Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
2105            echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i],
2106                                                    (WebRtc_UWord16)supGain);
2107            resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
2108            resolutionDiff += (aecm->dfaCleanQDomain - aecm->xfaQDomainBuf[diff]);
2109        } else
2110        {
2111            tmp16no1 = 17 - zeros32 - zeros16;
2112            resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
2113            resolutionDiff += (aecm->dfaCleanQDomain - aecm->xfaQDomainBuf[diff]);
2114            if (zeros32 > tmp16no1)
2115            {
2116                echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i],
2117                        (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_W16(supGain,
2118                                tmp16no1)); // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
2119            } else
2120            {
2121                // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
2122                echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
2123                        (WebRtc_UWord32)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], tmp16no1),
2124                        (WebRtc_UWord16)supGain);
2125            }
2126        }
2127
2128        zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
2129        if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld))
2130                & (aecm->nearFilt[i]))
2131        {
2132            tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16);
2133            qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld;
2134        } else
2135        {
2136            tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], aecm->dfaCleanQDomain
2137                                            - aecm->dfaCleanQDomainOld);
2138            qDomainDiff = 0;
2139        }
2140        tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff);
2141        tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no2 - tmp16no1, 1, 4);
2142        tmp16no2 += tmp16no1;
2143        zeros16 = WebRtcSpl_NormW16(tmp16no2);
2144        if ((tmp16no2) & (-qDomainDiff > zeros16))
2145        {
2146            aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
2147        } else
2148        {
2149            aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff);
2150        }
2151
2152        // Wiener filter coefficients, resulting hnl in Q14
2153        if (echoEst32Gained == 0)
2154        {
2155            hnl[i] = ONE_Q14;
2156        } else if (aecm->nearFilt[i] == 0)
2157        {
2158            hnl[i] = 0;
2159        } else
2160        {
2161            // Multiply the suppression gain
2162            // Rounding
2163            echoEst32Gained += (WebRtc_UWord32)(aecm->nearFilt[i] >> 1);
2164            tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, (WebRtc_UWord16)aecm->nearFilt[i]);
2165
2166            // Current resolution is
2167            // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN - max(0, 17 - zeros16 - zeros32))
2168            // Make sure we are in Q14
2169            tmp32no1 = (WebRtc_Word32)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
2170            if (tmp32no1 > ONE_Q14)
2171            {
2172                hnl[i] = 0;
2173            } else if (tmp32no1 < 0)
2174            {
2175                hnl[i] = ONE_Q14;
2176            } else
2177            {
2178                // 1-echoEst/dfa
2179#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
2180                hnl[i] = ONE_Q14 - (WebRtc_Word16)tmp32no1;
2181                if (hnl[i] < 0)
2182                {
2183                    hnl[i] = 0;
2184                }
2185#else
2186                hnl[i] = ((ONE_Q14 - (WebRtc_Word16)tmp32no1) > 0) ? (ONE_Q14 - (WebRtc_Word16)tmp32no1) : 0;
2187#endif
2188            }
2189        }
2190        if (hnl[i])
2191        {
2192            numPosCoef++;
2193        }
2194    }
2195
2196#ifdef ARM_WINM_LOG_
2197    // measure tick end
2198    QueryPerformanceCounter((LARGE_INTEGER*)&end);
2199    diff__ = ((end - start) * 1000) / (freq/1000);
2200    milliseconds = (unsigned int)(diff__ & 0xffffffff);
2201    WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
2202#endif
2203
2204#ifdef ARM_WINM_LOG_
2205    // measure tick start
2206    QueryPerformanceCounter((LARGE_INTEGER*)&start);
2207#endif
2208
2209    // Calculate NLP gain, result is in Q14
2210    for (i = 0; i < PART_LEN1; i++)
2211    {
2212        if (aecm->nlpFlag)
2213        {
2214            // Truncate values close to zero and one.
2215            if (hnl[i] > NLP_COMP_HIGH)
2216            {
2217                hnl[i] = ONE_Q14;
2218            } else if (hnl[i] < NLP_COMP_LOW)
2219            {
2220                hnl[i] = 0;
2221            }
2222
2223            // Remove outliers
2224            if (numPosCoef < 3)
2225            {
2226                nlpGain = 0;
2227            } else
2228            {
2229                nlpGain = ONE_Q14;
2230            }
2231            // NLP
2232            if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
2233            {
2234                hnl[i] = ONE_Q14;
2235            } else
2236            {
2237                hnl[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14);
2238            }
2239        }
2240
2241        // multiply with Wiener coefficients
2242        efwReal[i] = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfwReal[i], hnl[i],
2243                                                                          14));
2244        efwImag[i] = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfwImag[i], hnl[i],
2245                                                                          14));
2246    }
2247
2248    if (aecm->cngMode == AecmTrue)
2249    {
2250        WebRtcAecm_ComfortNoise(aecm, ptrDfaClean, efwReal, efwImag, hnl);
2251    }
2252
2253#ifdef ARM_WINM_LOG_
2254    // measure tick end
2255    QueryPerformanceCounter((LARGE_INTEGER*)&end);
2256    diff__ = ((end - start) * 1000) / (freq/1000);
2257    milliseconds = (unsigned int)(diff__ & 0xffffffff);
2258    WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
2259#endif
2260
2261#ifdef ARM_WINM_LOG_
2262    // measure tick start
2263    QueryPerformanceCounter((LARGE_INTEGER*)&start);
2264#endif
2265
2266    // Synthesis
2267    for (i = 1; i < PART_LEN; i++)
2268    {
2269        j = WEBRTC_SPL_LSHIFT_W32(i, 1);
2270        fft[j] = efwReal[i];
2271
2272        // mirrored data, even
2273        fft[PART_LEN4 - j] = efwReal[i];
2274        fft[j + 1] = -efwImag[i];
2275
2276        //mirrored data, odd
2277        fft[PART_LEN4 - (j - 1)] = efwImag[i];
2278    }
2279    fft[0] = efwReal[0];
2280    fft[1] = -efwImag[0];
2281
2282    fft[PART_LEN2] = efwReal[PART_LEN];
2283    fft[PART_LEN2 + 1] = -efwImag[PART_LEN];
2284
2285#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
2286    // inverse FFT, result should be scaled with outCFFT
2287    WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
2288    outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
2289
2290    //take only the real values and scale with outCFFT
2291    for (i = 0; i < PART_LEN2; i++)
2292    {
2293        j = WEBRTC_SPL_LSHIFT_W32(i, 1);
2294        fft[i] = fft[j];
2295    }
2296#else
2297    outCFFT = WebRtcSpl_ComplexIFFT2(fft, postFft, PART_LEN_SHIFT, 1);
2298
2299    //take only the real values and scale with outCFFT
2300    for(i = 0, j = 0; i < PART_LEN2;)
2301    {
2302        fft[i] = postFft[j];
2303        i += 1;
2304        j += 2;
2305        fft[i] = postFft[j];
2306        i += 1;
2307        j += 2;
2308    }
2309#endif
2310
2311    for (i = 0; i < PART_LEN; i++)
2312    {
2313        fft[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
2314                fft[i],
2315                kSqrtHanning[i],
2316                14);
2317        tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)fft[i],
2318                outCFFT - aecm->dfaCleanQDomain);
2319        fft[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
2320                tmp32no1 + aecm->outBuf[i],
2321                WEBRTC_SPL_WORD16_MIN);
2322        output[i] = fft[i];
2323
2324        tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(
2325                fft[PART_LEN + i],
2326                kSqrtHanning[PART_LEN - i],
2327                14);
2328        tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
2329                outCFFT - aecm->dfaCleanQDomain);
2330        aecm->outBuf[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(
2331                WEBRTC_SPL_WORD16_MAX,
2332                tmp32no1,
2333                WEBRTC_SPL_WORD16_MIN);
2334    }
2335
2336#ifdef ARM_WINM_LOG_
2337    // measure tick end
2338    QueryPerformanceCounter((LARGE_INTEGER*)&end);
2339    diff__ = ((end - start) * 1000) / (freq/1000);
2340    milliseconds = (unsigned int)(diff__ & 0xffffffff);
2341    WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
2342#endif
2343    // Copy the current block to the old position (outBuf is shifted elsewhere)
2344    memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN);
2345    memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN);
2346    if (nearendClean != NULL)
2347    {
2348        memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN);
2349    }
2350}
2351
2352// Generate comfort noise and add to output signal.
2353//
2354// \param[in]     aecm     Handle of the AECM instance.
2355// \param[in]     dfa     Absolute value of the nearend signal (Q[aecm->dfaQDomain]).
2356// \param[in,out] outReal Real part of the output signal (Q[aecm->dfaQDomain]).
2357// \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]).
2358// \param[in]     lambda  Suppression gain with which to scale the noise level (Q14).
2359//
2360static void WebRtcAecm_ComfortNoise(AecmCore_t * const aecm, const WebRtc_UWord16 * const dfa,
2361                                    WebRtc_Word16 * const outReal,
2362                                    WebRtc_Word16 * const outImag,
2363                                    const WebRtc_Word16 * const lambda)
2364{
2365    WebRtc_Word16 i;
2366    WebRtc_Word16 tmp16;
2367    WebRtc_Word32 tmp32;
2368
2369    WebRtc_Word16 randW16[PART_LEN];
2370    WebRtc_Word16 uReal[PART_LEN1];
2371    WebRtc_Word16 uImag[PART_LEN1];
2372    WebRtc_Word32 outLShift32[PART_LEN1];
2373    WebRtc_Word16 noiseRShift16[PART_LEN1];
2374
2375    WebRtc_Word16 shiftFromNearToNoise[PART_LEN1];
2376    WebRtc_Word16 minTrackShift;
2377    WebRtc_Word32 upper32;
2378    WebRtc_Word32 lower32;
2379
2380    if (aecm->noiseEstCtr < 100)
2381    {
2382        // Track the minimum more quickly initially.
2383        aecm->noiseEstCtr++;
2384        minTrackShift = 7;
2385    } else
2386    {
2387        minTrackShift = 9;
2388    }
2389
2390    // Estimate noise power.
2391    for (i = 0; i < PART_LEN1; i++)
2392    {
2393        shiftFromNearToNoise[i] = aecm->noiseEstQDomain[i] - aecm->dfaCleanQDomain;
2394
2395        // Shift to the noise domain.
2396        tmp32 = (WebRtc_Word32)dfa[i];
2397        outLShift32[i] = WEBRTC_SPL_SHIFT_W32(tmp32, shiftFromNearToNoise[i]);
2398
2399        if (outLShift32[i] < aecm->noiseEst[i])
2400        {
2401            // Track the minimum.
2402            aecm->noiseEst[i] += ((outLShift32[i] - aecm->noiseEst[i]) >> minTrackShift);
2403        } else
2404        {
2405            // Ramp slowly upwards until we hit the minimum again.
2406
2407            // Avoid overflow.
2408            if (aecm->noiseEst[i] < 2146435583)
2409            {
2410                // Store the fractional portion.
2411                upper32 = (aecm->noiseEst[i] & 0xffff0000) >> 16;
2412                lower32 = aecm->noiseEst[i] & 0x0000ffff;
2413                upper32 = ((upper32 * 2049) >> 11);
2414                lower32 = ((lower32 * 2049) >> 11);
2415                aecm->noiseEst[i] = WEBRTC_SPL_ADD_SAT_W32(upper32 << 16, lower32);
2416            }
2417        }
2418    }
2419
2420    for (i = 0; i < PART_LEN1; i++)
2421    {
2422        tmp32 = WEBRTC_SPL_SHIFT_W32(aecm->noiseEst[i], -shiftFromNearToNoise[i]);
2423        if (tmp32 > 32767)
2424        {
2425            tmp32 = 32767;
2426            aecm->noiseEst[i] = WEBRTC_SPL_SHIFT_W32(tmp32, shiftFromNearToNoise[i]);
2427        }
2428        noiseRShift16[i] = (WebRtc_Word16)tmp32;
2429
2430        tmp16 = ONE_Q14 - lambda[i];
2431        noiseRShift16[i]
2432                = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, noiseRShift16[i], 14);
2433    }
2434
2435    // Generate a uniform random array on [0 2^15-1].
2436    WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
2437
2438    // Generate noise according to estimated energy.
2439    uReal[0] = 0; // Reject LF noise.
2440    uImag[0] = 0;
2441    for (i = 1; i < PART_LEN1; i++)
2442    {
2443        // Get a random index for the cos and sin tables over [0 359].
2444        tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15);
2445
2446        // Tables are in Q13.
2447        uReal[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i],
2448                WebRtcSpl_kCosTable[tmp16], 13);
2449        uImag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i],
2450                WebRtcSpl_kSinTable[tmp16], 13);
2451    }
2452    uImag[PART_LEN] = 0;
2453
2454#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
2455    for (i = 0; i < PART_LEN1; i++)
2456    {
2457        outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]);
2458        outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]);
2459    }
2460#else
2461    for (i = 0; i < PART_LEN1 -1; )
2462    {
2463        outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]);
2464        outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]);
2465        i++;
2466
2467        outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]);
2468        outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]);
2469        i++;
2470    }
2471    outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]);
2472    outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]);
2473#endif
2474}
2475
2476void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend,
2477                               const int farLen)
2478{
2479    int writeLen = farLen, writePos = 0;
2480
2481    // Check if the write position must be wrapped
2482    while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN)
2483    {
2484        // Write to remaining buffer space before wrapping
2485        writeLen = FAR_BUF_LEN - aecm->farBufWritePos;
2486        memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
2487               sizeof(WebRtc_Word16) * writeLen);
2488        aecm->farBufWritePos = 0;
2489        writePos = writeLen;
2490        writeLen = farLen - writeLen;
2491    }
2492
2493    memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos,
2494           sizeof(WebRtc_Word16) * writeLen);
2495    aecm->farBufWritePos += writeLen;
2496}
2497
2498void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend,
2499                              const int farLen, const int knownDelay)
2500{
2501    int readLen = farLen;
2502    int readPos = 0;
2503    int delayChange = knownDelay - aecm->lastKnownDelay;
2504
2505    aecm->farBufReadPos -= delayChange;
2506
2507    // Check if delay forces a read position wrap
2508    while (aecm->farBufReadPos < 0)
2509    {
2510        aecm->farBufReadPos += FAR_BUF_LEN;
2511    }
2512    while (aecm->farBufReadPos > FAR_BUF_LEN - 1)
2513    {
2514        aecm->farBufReadPos -= FAR_BUF_LEN;
2515    }
2516
2517    aecm->lastKnownDelay = knownDelay;
2518
2519    // Check if read position must be wrapped
2520    while (aecm->farBufReadPos + readLen > FAR_BUF_LEN)
2521    {
2522
2523        // Read from remaining buffer space before wrapping
2524        readLen = FAR_BUF_LEN - aecm->farBufReadPos;
2525        memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
2526               sizeof(WebRtc_Word16) * readLen);
2527        aecm->farBufReadPos = 0;
2528        readPos = readLen;
2529        readLen = farLen - readLen;
2530    }
2531    memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos,
2532           sizeof(WebRtc_Word16) * readLen);
2533    aecm->farBufReadPos += readLen;
2534}
2535