aec_core.c revision 3cfa756f371ffdcc0369d745e9832cfff55861ba
1/* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11/* 12 * The core AEC algorithm, which is presented with time-aligned signals. 13 */ 14 15#include "webrtc/modules/audio_processing/aec/aec_core.h" 16 17#ifdef WEBRTC_AEC_DEBUG_DUMP 18#include <stdio.h> 19#endif 20 21#include <assert.h> 22#include <math.h> 23#include <stddef.h> // size_t 24#include <stdlib.h> 25#include <string.h> 26 27#include "webrtc/common_audio/ring_buffer.h" 28#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 29#include "webrtc/modules/audio_processing/aec/aec_common.h" 30#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" 31#include "webrtc/modules/audio_processing/aec/aec_rdft.h" 32#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" 33#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" 34#include "webrtc/typedefs.h" 35 36// Buffer size (samples) 37static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. 38 39// Metrics 40static const int subCountLen = 4; 41static const int countLen = 50; 42static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. 43 44// Quantities to control H band scaling for SWB input 45static const int flagHbandCn = 1; // flag for adding comfort noise in H band 46static const float cnScaleHband = 47 (float)0.4; // scale for comfort noise in H band 48// Initial bin for averaging nlp gain in low band 49static const int freqAvgIc = PART_LEN / 2; 50 51// Matlab code to produce table: 52// win = sqrt(hanning(63)); win = [0 ; win(1:32)]; 53// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); 54ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { 55 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, 56 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, 57 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, 58 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, 59 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, 60 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, 61 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, 62 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, 63 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, 64 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, 65 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, 66 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, 67 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, 68 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, 69 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, 70 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, 71 1.00000000000000f}; 72 73// Matlab code to produce table: 74// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; 75// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); 76ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = { 77 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f, 78 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f, 79 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, 80 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f, 81 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f, 82 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, 83 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f, 84 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f, 85 0.4000f}; 86 87// Matlab code to produce table: 88// overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; 89// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); 90ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = { 91 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f, 92 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f, 93 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, 94 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f, 95 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f, 96 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, 97 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f, 98 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, 99 2.0000f}; 100 101// TODO(bjornv): These parameters will be tuned. 102static const float kDelayQualityThresholdMax = 0.07f; 103static const int kInitialShiftOffset = 5; 104 105// Target suppression levels for nlp modes. 106// log{0.001, 0.00001, 0.00000001} 107static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; 108 109// Two sets of parameters, one for the extended filter mode. 110static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f}; 111static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f}; 112const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, 113 {0.92f, 0.08f}}; 114const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, 115 {0.93f, 0.07f}}; 116 117// Number of partitions forming the NLP's "preferred" bands. 118enum { 119 kPrefBandSize = 24 120}; 121 122#ifdef WEBRTC_AEC_DEBUG_DUMP 123extern int webrtc_aec_instance_count; 124#endif 125 126WebRtcAecFilterFar WebRtcAec_FilterFar; 127WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; 128WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; 129WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; 130WebRtcAecComfortNoise WebRtcAec_ComfortNoise; 131WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; 132 133__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 134 return aRe * bRe - aIm * bIm; 135} 136 137__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { 138 return aRe * bIm + aIm * bRe; 139} 140 141static int CmpFloat(const void* a, const void* b) { 142 const float* da = (const float*)a; 143 const float* db = (const float*)b; 144 145 return (*da > *db) - (*da < *db); 146} 147 148static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { 149 int i; 150 for (i = 0; i < aec->num_partitions; i++) { 151 int j; 152 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 153 int pos = i * PART_LEN1; 154 // Check for wrap 155 if (i + aec->xfBufBlockPos >= aec->num_partitions) { 156 xPos -= aec->num_partitions * (PART_LEN1); 157 } 158 159 for (j = 0; j < PART_LEN1; j++) { 160 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], 161 aec->xfBuf[1][xPos + j], 162 aec->wfBuf[0][pos + j], 163 aec->wfBuf[1][pos + j]); 164 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], 165 aec->xfBuf[1][xPos + j], 166 aec->wfBuf[0][pos + j], 167 aec->wfBuf[1][pos + j]); 168 } 169 } 170} 171 172static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { 173 const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; 174 const float error_threshold = aec->extended_filter_enabled 175 ? kExtendedErrorThreshold 176 : aec->normal_error_threshold; 177 int i; 178 float abs_ef; 179 for (i = 0; i < (PART_LEN1); i++) { 180 ef[0][i] /= (aec->xPow[i] + 1e-10f); 181 ef[1][i] /= (aec->xPow[i] + 1e-10f); 182 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 183 184 if (abs_ef > error_threshold) { 185 abs_ef = error_threshold / (abs_ef + 1e-10f); 186 ef[0][i] *= abs_ef; 187 ef[1][i] *= abs_ef; 188 } 189 190 // Stepsize factor 191 ef[0][i] *= mu; 192 ef[1][i] *= mu; 193 } 194} 195 196// Time-unconstrined filter adaptation. 197// TODO(andrew): consider for a low-complexity mode. 198// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, 199// float ef[2][PART_LEN1]) { 200// int i, j; 201// for (i = 0; i < aec->num_partitions; i++) { 202// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); 203// int pos; 204// // Check for wrap 205// if (i + aec->xfBufBlockPos >= aec->num_partitions) { 206// xPos -= aec->num_partitions * PART_LEN1; 207// } 208// 209// pos = i * PART_LEN1; 210// 211// for (j = 0; j < PART_LEN1; j++) { 212// aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j], 213// -aec->xfBuf[1][xPos + j], 214// ef[0][j], ef[1][j]); 215// aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j], 216// -aec->xfBuf[1][xPos + j], 217// ef[0][j], ef[1][j]); 218// } 219// } 220//} 221 222static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { 223 int i, j; 224 for (i = 0; i < aec->num_partitions; i++) { 225 int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); 226 int pos; 227 // Check for wrap 228 if (i + aec->xfBufBlockPos >= aec->num_partitions) { 229 xPos -= aec->num_partitions * PART_LEN1; 230 } 231 232 pos = i * PART_LEN1; 233 234 for (j = 0; j < PART_LEN; j++) { 235 236 fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], 237 -aec->xfBuf[1][xPos + j], 238 ef[0][j], 239 ef[1][j]); 240 fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], 241 -aec->xfBuf[1][xPos + j], 242 ef[0][j], 243 ef[1][j]); 244 } 245 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], 246 -aec->xfBuf[1][xPos + PART_LEN], 247 ef[0][PART_LEN], 248 ef[1][PART_LEN]); 249 250 aec_rdft_inverse_128(fft); 251 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 252 253 // fft scaling 254 { 255 float scale = 2.0f / PART_LEN2; 256 for (j = 0; j < PART_LEN; j++) { 257 fft[j] *= scale; 258 } 259 } 260 aec_rdft_forward_128(fft); 261 262 aec->wfBuf[0][pos] += fft[0]; 263 aec->wfBuf[0][pos + PART_LEN] += fft[1]; 264 265 for (j = 1; j < PART_LEN; j++) { 266 aec->wfBuf[0][pos + j] += fft[2 * j]; 267 aec->wfBuf[1][pos + j] += fft[2 * j + 1]; 268 } 269 } 270} 271 272static void OverdriveAndSuppress(AecCore* aec, 273 float hNl[PART_LEN1], 274 const float hNlFb, 275 float efw[2][PART_LEN1]) { 276 int i; 277 for (i = 0; i < PART_LEN1; i++) { 278 // Weight subbands 279 if (hNl[i] > hNlFb) { 280 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 281 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 282 } 283 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 284 285 // Suppress error signal 286 efw[0][i] *= hNl[i]; 287 efw[1][i] *= hNl[i]; 288 289 // Ooura fft returns incorrect sign on imaginary component. It matters here 290 // because we are making an additive change with comfort noise. 291 efw[1][i] *= -1; 292 } 293} 294 295static int PartitionDelay(const AecCore* aec) { 296 // Measures the energy in each filter partition and returns the partition with 297 // highest energy. 298 // TODO(bjornv): Spread computational cost by computing one partition per 299 // block? 300 float wfEnMax = 0; 301 int i; 302 int delay = 0; 303 304 for (i = 0; i < aec->num_partitions; i++) { 305 int j; 306 int pos = i * PART_LEN1; 307 float wfEn = 0; 308 for (j = 0; j < PART_LEN1; j++) { 309 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + 310 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; 311 } 312 313 if (wfEn > wfEnMax) { 314 wfEnMax = wfEn; 315 delay = i; 316 } 317 } 318 return delay; 319} 320 321// Threshold to protect against the ill-effects of a zero far-end. 322const float WebRtcAec_kMinFarendPSD = 15; 323 324// Updates the following smoothed Power Spectral Densities (PSD): 325// - sd : near-end 326// - se : residual echo 327// - sx : far-end 328// - sde : cross-PSD of near-end and residual echo 329// - sxd : cross-PSD of near-end and far-end 330// 331// In addition to updating the PSDs, also the filter diverge state is determined 332// upon actions are taken. 333static void SmoothedPSD(AecCore* aec, 334 float efw[2][PART_LEN1], 335 float dfw[2][PART_LEN1], 336 float xfw[2][PART_LEN1]) { 337 // Power estimate smoothing coefficients. 338 const float* ptrGCoh = aec->extended_filter_enabled 339 ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] 340 : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; 341 int i; 342 float sdSum = 0, seSum = 0; 343 344 for (i = 0; i < PART_LEN1; i++) { 345 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + 346 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); 347 aec->se[i] = ptrGCoh[0] * aec->se[i] + 348 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); 349 // We threshold here to protect against the ill-effects of a zero farend. 350 // The threshold is not arbitrarily chosen, but balances protection and 351 // adverse interaction with the algorithm's tuning. 352 // TODO(bjornv): investigate further why this is so sensitive. 353 aec->sx[i] = 354 ptrGCoh[0] * aec->sx[i] + 355 ptrGCoh[1] * WEBRTC_SPL_MAX( 356 xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 357 WebRtcAec_kMinFarendPSD); 358 359 aec->sde[i][0] = 360 ptrGCoh[0] * aec->sde[i][0] + 361 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); 362 aec->sde[i][1] = 363 ptrGCoh[0] * aec->sde[i][1] + 364 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); 365 366 aec->sxd[i][0] = 367 ptrGCoh[0] * aec->sxd[i][0] + 368 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); 369 aec->sxd[i][1] = 370 ptrGCoh[0] * aec->sxd[i][1] + 371 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); 372 373 sdSum += aec->sd[i]; 374 seSum += aec->se[i]; 375 } 376 377 // Divergent filter safeguard. 378 aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; 379 380 if (aec->divergeState) 381 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); 382 383 // Reset if error is significantly larger than nearend (13 dB). 384 if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) 385 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 386} 387 388// Window time domain data to be used by the fft. 389__inline static void WindowData(float* x_windowed, const float* x) { 390 int i; 391 for (i = 0; i < PART_LEN; i++) { 392 x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i]; 393 x_windowed[PART_LEN + i] = 394 x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; 395 } 396} 397 398// Puts fft output data into a complex valued array. 399__inline static void StoreAsComplex(const float* data, 400 float data_complex[2][PART_LEN1]) { 401 int i; 402 data_complex[0][0] = data[0]; 403 data_complex[1][0] = 0; 404 for (i = 1; i < PART_LEN; i++) { 405 data_complex[0][i] = data[2 * i]; 406 data_complex[1][i] = data[2 * i + 1]; 407 } 408 data_complex[0][PART_LEN] = data[1]; 409 data_complex[1][PART_LEN] = 0; 410} 411 412static void SubbandCoherence(AecCore* aec, 413 float efw[2][PART_LEN1], 414 float xfw[2][PART_LEN1], 415 float* fft, 416 float* cohde, 417 float* cohxd) { 418 float dfw[2][PART_LEN1]; 419 int i; 420 421 if (aec->delayEstCtr == 0) 422 aec->delayIdx = PartitionDelay(aec); 423 424 // Use delayed far. 425 memcpy(xfw, 426 aec->xfwBuf + aec->delayIdx * PART_LEN1, 427 sizeof(xfw[0][0]) * 2 * PART_LEN1); 428 429 // Windowed near fft 430 WindowData(fft, aec->dBuf); 431 aec_rdft_forward_128(fft); 432 StoreAsComplex(fft, dfw); 433 434 // Windowed error fft 435 WindowData(fft, aec->eBuf); 436 aec_rdft_forward_128(fft); 437 StoreAsComplex(fft, efw); 438 439 SmoothedPSD(aec, efw, dfw, xfw); 440 441 // Subband coherence 442 for (i = 0; i < PART_LEN1; i++) { 443 cohde[i] = 444 (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / 445 (aec->sd[i] * aec->se[i] + 1e-10f); 446 cohxd[i] = 447 (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / 448 (aec->sx[i] * aec->sd[i] + 1e-10f); 449 } 450} 451 452static void GetHighbandGain(const float* lambda, float* nlpGainHband) { 453 int i; 454 455 nlpGainHband[0] = (float)0.0; 456 for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { 457 nlpGainHband[0] += lambda[i]; 458 } 459 nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); 460} 461 462static void ComfortNoise(AecCore* aec, 463 float efw[2][PART_LEN1], 464 complex_t* comfortNoiseHband, 465 const float* noisePow, 466 const float* lambda) { 467 int i, num; 468 float rand[PART_LEN]; 469 float noise, noiseAvg, tmp, tmpAvg; 470 int16_t randW16[PART_LEN]; 471 complex_t u[PART_LEN1]; 472 473 const float pi2 = 6.28318530717959f; 474 475 // Generate a uniform random array on [0 1] 476 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); 477 for (i = 0; i < PART_LEN; i++) { 478 rand[i] = ((float)randW16[i]) / 32768; 479 } 480 481 // Reject LF noise 482 u[0][0] = 0; 483 u[0][1] = 0; 484 for (i = 1; i < PART_LEN1; i++) { 485 tmp = pi2 * rand[i - 1]; 486 487 noise = sqrtf(noisePow[i]); 488 u[i][0] = noise * cosf(tmp); 489 u[i][1] = -noise * sinf(tmp); 490 } 491 u[PART_LEN][1] = 0; 492 493 for (i = 0; i < PART_LEN1; i++) { 494 // This is the proper weighting to match the background noise power 495 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 496 // tmp = 1 - lambda[i]; 497 efw[0][i] += tmp * u[i][0]; 498 efw[1][i] += tmp * u[i][1]; 499 } 500 501 // For H band comfort noise 502 // TODO: don't compute noise and "tmp" twice. Use the previous results. 503 noiseAvg = 0.0; 504 tmpAvg = 0.0; 505 num = 0; 506 if (aec->num_bands > 1 && flagHbandCn == 1) { 507 508 // average noise scale 509 // average over second half of freq spectrum (i.e., 4->8khz) 510 // TODO: we shouldn't need num. We know how many elements we're summing. 511 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 512 num++; 513 noiseAvg += sqrtf(noisePow[i]); 514 } 515 noiseAvg /= (float)num; 516 517 // average nlp scale 518 // average over second half of freq spectrum (i.e., 4->8khz) 519 // TODO: we shouldn't need num. We know how many elements we're summing. 520 num = 0; 521 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 522 num++; 523 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 524 } 525 tmpAvg /= (float)num; 526 527 // Use average noise for H band 528 // TODO: we should probably have a new random vector here. 529 // Reject LF noise 530 u[0][0] = 0; 531 u[0][1] = 0; 532 for (i = 1; i < PART_LEN1; i++) { 533 tmp = pi2 * rand[i - 1]; 534 535 // Use average noise for H band 536 u[i][0] = noiseAvg * (float)cos(tmp); 537 u[i][1] = -noiseAvg * (float)sin(tmp); 538 } 539 u[PART_LEN][1] = 0; 540 541 for (i = 0; i < PART_LEN1; i++) { 542 // Use average NLP weight for H band 543 comfortNoiseHband[i][0] = tmpAvg * u[i][0]; 544 comfortNoiseHband[i][1] = tmpAvg * u[i][1]; 545 } 546 } 547} 548 549static void InitLevel(PowerLevel* level) { 550 const float kBigFloat = 1E17f; 551 552 level->averagelevel = 0; 553 level->framelevel = 0; 554 level->minlevel = kBigFloat; 555 level->frsum = 0; 556 level->sfrsum = 0; 557 level->frcounter = 0; 558 level->sfrcounter = 0; 559} 560 561static void InitStats(Stats* stats) { 562 stats->instant = kOffsetLevel; 563 stats->average = kOffsetLevel; 564 stats->max = kOffsetLevel; 565 stats->min = kOffsetLevel * (-1); 566 stats->sum = 0; 567 stats->hisum = 0; 568 stats->himean = kOffsetLevel; 569 stats->counter = 0; 570 stats->hicounter = 0; 571} 572 573static void InitMetrics(AecCore* self) { 574 self->stateCounter = 0; 575 InitLevel(&self->farlevel); 576 InitLevel(&self->nearlevel); 577 InitLevel(&self->linoutlevel); 578 InitLevel(&self->nlpoutlevel); 579 580 InitStats(&self->erl); 581 InitStats(&self->erle); 582 InitStats(&self->aNlp); 583 InitStats(&self->rerl); 584} 585 586static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { 587 // Do the energy calculation in the frequency domain. The FFT is performed on 588 // a segment of PART_LEN2 samples due to overlap, but we only want the energy 589 // of half that data (the last PART_LEN samples). Parseval's relation states 590 // that the energy is preserved according to 591 // 592 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 593 // = ENERGY, 594 // 595 // where N = PART_LEN2. Since we are only interested in calculating the energy 596 // for the last PART_LEN samples we approximate by calculating ENERGY and 597 // divide by 2, 598 // 599 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 600 // 601 // Since we deal with real valued time domain signals we only store frequency 602 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we 603 // need to add the contribution from the missing part in 604 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical 605 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This 606 // is the values in the for loop below, but multiplication by 2 and division 607 // by 2 cancel. 608 609 // TODO(bjornv): Investigate reusing energy calculations performed at other 610 // places in the code. 611 int k = 1; 612 // Imaginary parts are zero at end points and left out of the calculation. 613 float energy = (in[0][0] * in[0][0]) / 2; 614 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; 615 616 for (k = 1; k < PART_LEN; k++) { 617 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); 618 } 619 energy /= PART_LEN2; 620 621 level->sfrsum += energy; 622 level->sfrcounter++; 623 624 if (level->sfrcounter > subCountLen) { 625 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); 626 level->sfrsum = 0; 627 level->sfrcounter = 0; 628 if (level->framelevel > 0) { 629 if (level->framelevel < level->minlevel) { 630 level->minlevel = level->framelevel; // New minimum. 631 } else { 632 level->minlevel *= (1 + 0.001f); // Small increase. 633 } 634 } 635 level->frcounter++; 636 level->frsum += level->framelevel; 637 if (level->frcounter > countLen) { 638 level->averagelevel = level->frsum / countLen; 639 level->frsum = 0; 640 level->frcounter = 0; 641 } 642 } 643} 644 645static void UpdateMetrics(AecCore* aec) { 646 float dtmp, dtmp2; 647 648 const float actThresholdNoisy = 8.0f; 649 const float actThresholdClean = 40.0f; 650 const float safety = 0.99995f; 651 const float noisyPower = 300000.0f; 652 653 float actThreshold; 654 float echo, suppressedEcho; 655 656 if (aec->echoState) { // Check if echo is likely present 657 aec->stateCounter++; 658 } 659 660 if (aec->farlevel.frcounter == 0) { 661 662 if (aec->farlevel.minlevel < noisyPower) { 663 actThreshold = actThresholdClean; 664 } else { 665 actThreshold = actThresholdNoisy; 666 } 667 668 if ((aec->stateCounter > (0.5f * countLen * subCountLen)) && 669 (aec->farlevel.sfrcounter == 0) 670 671 // Estimate in active far-end segments only 672 && 673 (aec->farlevel.averagelevel > 674 (actThreshold * aec->farlevel.minlevel))) { 675 676 // Subtract noise power 677 echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; 678 679 // ERL 680 dtmp = 10 * (float)log10(aec->farlevel.averagelevel / 681 aec->nearlevel.averagelevel + 682 1e-10f); 683 dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); 684 685 aec->erl.instant = dtmp; 686 if (dtmp > aec->erl.max) { 687 aec->erl.max = dtmp; 688 } 689 690 if (dtmp < aec->erl.min) { 691 aec->erl.min = dtmp; 692 } 693 694 aec->erl.counter++; 695 aec->erl.sum += dtmp; 696 aec->erl.average = aec->erl.sum / aec->erl.counter; 697 698 // Upper mean 699 if (dtmp > aec->erl.average) { 700 aec->erl.hicounter++; 701 aec->erl.hisum += dtmp; 702 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; 703 } 704 705 // A_NLP 706 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 707 (2 * aec->linoutlevel.averagelevel) + 708 1e-10f); 709 710 // subtract noise power 711 suppressedEcho = 2 * (aec->linoutlevel.averagelevel - 712 safety * aec->linoutlevel.minlevel); 713 714 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 715 716 aec->aNlp.instant = dtmp2; 717 if (dtmp > aec->aNlp.max) { 718 aec->aNlp.max = dtmp; 719 } 720 721 if (dtmp < aec->aNlp.min) { 722 aec->aNlp.min = dtmp; 723 } 724 725 aec->aNlp.counter++; 726 aec->aNlp.sum += dtmp; 727 aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; 728 729 // Upper mean 730 if (dtmp > aec->aNlp.average) { 731 aec->aNlp.hicounter++; 732 aec->aNlp.hisum += dtmp; 733 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; 734 } 735 736 // ERLE 737 738 // subtract noise power 739 suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - 740 safety * aec->nlpoutlevel.minlevel); 741 742 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 743 (2 * aec->nlpoutlevel.averagelevel) + 744 1e-10f); 745 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 746 747 dtmp = dtmp2; 748 aec->erle.instant = dtmp; 749 if (dtmp > aec->erle.max) { 750 aec->erle.max = dtmp; 751 } 752 753 if (dtmp < aec->erle.min) { 754 aec->erle.min = dtmp; 755 } 756 757 aec->erle.counter++; 758 aec->erle.sum += dtmp; 759 aec->erle.average = aec->erle.sum / aec->erle.counter; 760 761 // Upper mean 762 if (dtmp > aec->erle.average) { 763 aec->erle.hicounter++; 764 aec->erle.hisum += dtmp; 765 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; 766 } 767 } 768 769 aec->stateCounter = 0; 770 } 771} 772 773static void UpdateDelayMetrics(AecCore* self) { 774 int i = 0; 775 int delay_values = 0; 776 int median = 0; 777 int lookahead = WebRtc_lookahead(self->delay_estimator); 778 const int kMsPerBlock = PART_LEN / (self->mult * 8); 779 int64_t l1_norm = 0; 780 781 if (self->num_delay_values == 0) { 782 // We have no new delay value data. Even though -1 is a valid |median| in 783 // the sense that we allow negative values, it will practically never be 784 // used since multiples of |kMsPerBlock| will always be returned. 785 // We therefore use -1 to indicate in the logs that the delay estimator was 786 // not able to estimate the delay. 787 self->delay_median = -1; 788 self->delay_std = -1; 789 self->fraction_poor_delays = -1; 790 return; 791 } 792 793 // Start value for median count down. 794 delay_values = self->num_delay_values >> 1; 795 // Get median of delay values since last update. 796 for (i = 0; i < kHistorySizeBlocks; i++) { 797 delay_values -= self->delay_histogram[i]; 798 if (delay_values < 0) { 799 median = i; 800 break; 801 } 802 } 803 // Account for lookahead. 804 self->delay_median = (median - lookahead) * kMsPerBlock; 805 806 // Calculate the L1 norm, with median value as central moment. 807 for (i = 0; i < kHistorySizeBlocks; i++) { 808 l1_norm += abs(i - median) * self->delay_histogram[i]; 809 } 810 self->delay_std = (int)((l1_norm + self->num_delay_values / 2) / 811 self->num_delay_values) * kMsPerBlock; 812 813 // Determine fraction of delays that are out of bounds, that is, either 814 // negative (anti-causal system) or larger than the AEC filter length. 815 { 816 int num_delays_out_of_bounds = self->num_delay_values; 817 for (i = lookahead; i < lookahead + self->num_partitions; ++i) { 818 num_delays_out_of_bounds -= self->delay_histogram[i]; 819 } 820 self->fraction_poor_delays = (float)num_delays_out_of_bounds / 821 self->num_delay_values; 822 } 823 824 // Reset histogram. 825 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); 826 self->num_delay_values = 0; 827 828 return; 829} 830 831static void TimeToFrequency(float time_data[PART_LEN2], 832 float freq_data[2][PART_LEN1], 833 int window) { 834 int i = 0; 835 836 // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? 837 if (window) { 838 for (i = 0; i < PART_LEN; i++) { 839 time_data[i] *= WebRtcAec_sqrtHanning[i]; 840 time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i]; 841 } 842 } 843 844 aec_rdft_forward_128(time_data); 845 // Reorder. 846 freq_data[1][0] = 0; 847 freq_data[1][PART_LEN] = 0; 848 freq_data[0][0] = time_data[0]; 849 freq_data[0][PART_LEN] = time_data[1]; 850 for (i = 1; i < PART_LEN; i++) { 851 freq_data[0][i] = time_data[2 * i]; 852 freq_data[1][i] = time_data[2 * i + 1]; 853 } 854} 855 856static int SignalBasedDelayCorrection(AecCore* self) { 857 int delay_correction = 0; 858 int last_delay = -2; 859 assert(self != NULL); 860 // 1. Check for non-negative delay estimate. Note that the estimates we get 861 // from the delay estimation are not compensated for lookahead. Hence, a 862 // negative |last_delay| is an invalid one. 863 // 2. Verify that there is a delay change. In addition, only allow a change 864 // if the delay is outside a certain region taking the AEC filter length 865 // into account. 866 // TODO(bjornv): Investigate if we can remove the non-zero delay change check. 867 // 3. Only allow delay correction if the delay estimation quality exceeds 868 // |delay_quality_threshold|. 869 // 4. Finally, verify that the proposed |delay_correction| is feasible by 870 // comparing with the size of the far-end buffer. 871 last_delay = WebRtc_last_delay(self->delay_estimator); 872 if ((last_delay >= 0) && 873 (last_delay != self->previous_delay) && 874 (WebRtc_last_delay_quality(self->delay_estimator) > 875 self->delay_quality_threshold)) { 876 int delay = last_delay - WebRtc_lookahead(self->delay_estimator); 877 // Allow for a slack in the actual delay. The adaptive echo cancellation 878 // filter is currently |num_partitions| (of 64 samples) long. If the 879 // delay estimate indicates a delay of at least one quarter of the filter 880 // length we open up for correction. 881 if (delay <= 0 || delay > (self->num_partitions / 4)) { 882 int available_read = (int)WebRtc_available_read(self->far_buf); 883 // Adjust w.r.t. a |shift_offset| to account for not as reliable estimates 884 // in the beginning, hence we are more conservative. 885 delay_correction = -(delay - self->shift_offset); 886 self->shift_offset--; 887 self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset); 888 if (delay_correction > available_read - self->mult - 1) { 889 // There is not enough data in the buffer to perform this shift. Hence, 890 // we do not rely on the delay estimate and do nothing. 891 delay_correction = 0; 892 } else { 893 self->previous_delay = last_delay; 894 ++self->delay_correction_count; 895 } 896 } 897 } 898 // Update the |delay_quality_threshold| once we have our first delay 899 // correction. 900 if (self->delay_correction_count > 0) { 901 float delay_quality = WebRtc_last_delay_quality(self->delay_estimator); 902 delay_quality = (delay_quality > kDelayQualityThresholdMax ? 903 kDelayQualityThresholdMax : delay_quality); 904 self->delay_quality_threshold = 905 (delay_quality > self->delay_quality_threshold ? delay_quality : 906 self->delay_quality_threshold); 907 } 908 return delay_correction; 909} 910 911static void NonLinearProcessing(AecCore* aec, 912 float* output, 913 float* const* outputH) { 914 float efw[2][PART_LEN1], xfw[2][PART_LEN1]; 915 complex_t comfortNoiseHband[PART_LEN1]; 916 float fft[PART_LEN2]; 917 float scale, dtmp; 918 float nlpGainHband; 919 int i, j; 920 921 // Coherence and non-linear filter 922 float cohde[PART_LEN1], cohxd[PART_LEN1]; 923 float hNlDeAvg, hNlXdAvg; 924 float hNl[PART_LEN1]; 925 float hNlPref[kPrefBandSize]; 926 float hNlFb = 0, hNlFbLow = 0; 927 const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; 928 const int prefBandSize = kPrefBandSize / aec->mult; 929 const int minPrefBand = 4 / aec->mult; 930 // Power estimate smoothing coefficients. 931 const float* min_overdrive = aec->extended_filter_enabled 932 ? kExtendedMinOverDrive 933 : kNormalMinOverDrive; 934 935 // Filter energy 936 const int delayEstInterval = 10 * aec->mult; 937 938 float* xfw_ptr = NULL; 939 940 aec->delayEstCtr++; 941 if (aec->delayEstCtr == delayEstInterval) { 942 aec->delayEstCtr = 0; 943 } 944 945 // initialize comfort noise for H band 946 memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); 947 nlpGainHband = (float)0.0; 948 dtmp = (float)0.0; 949 950 // We should always have at least one element stored in |far_buf|. 951 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); 952 // NLP 953 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); 954 955 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of 956 // |xfwBuf|. 957 // Buffer far. 958 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); 959 960 WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); 961 962 hNlXdAvg = 0; 963 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 964 hNlXdAvg += cohxd[i]; 965 } 966 hNlXdAvg /= prefBandSize; 967 hNlXdAvg = 1 - hNlXdAvg; 968 969 hNlDeAvg = 0; 970 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 971 hNlDeAvg += cohde[i]; 972 } 973 hNlDeAvg /= prefBandSize; 974 975 if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { 976 aec->hNlXdAvgMin = hNlXdAvg; 977 } 978 979 if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { 980 aec->stNearState = 1; 981 } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { 982 aec->stNearState = 0; 983 } 984 985 if (aec->hNlXdAvgMin == 1) { 986 aec->echoState = 0; 987 aec->overDrive = min_overdrive[aec->nlp_mode]; 988 989 if (aec->stNearState == 1) { 990 memcpy(hNl, cohde, sizeof(hNl)); 991 hNlFb = hNlDeAvg; 992 hNlFbLow = hNlDeAvg; 993 } else { 994 for (i = 0; i < PART_LEN1; i++) { 995 hNl[i] = 1 - cohxd[i]; 996 } 997 hNlFb = hNlXdAvg; 998 hNlFbLow = hNlXdAvg; 999 } 1000 } else { 1001 1002 if (aec->stNearState == 1) { 1003 aec->echoState = 0; 1004 memcpy(hNl, cohde, sizeof(hNl)); 1005 hNlFb = hNlDeAvg; 1006 hNlFbLow = hNlDeAvg; 1007 } else { 1008 aec->echoState = 1; 1009 for (i = 0; i < PART_LEN1; i++) { 1010 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); 1011 } 1012 1013 // Select an order statistic from the preferred bands. 1014 // TODO: Using quicksort now, but a selection algorithm may be preferred. 1015 memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); 1016 qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); 1017 hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; 1018 hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; 1019 } 1020 } 1021 1022 // Track the local filter minimum to determine suppression overdrive. 1023 if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { 1024 aec->hNlFbLocalMin = hNlFbLow; 1025 aec->hNlFbMin = hNlFbLow; 1026 aec->hNlNewMin = 1; 1027 aec->hNlMinCtr = 0; 1028 } 1029 aec->hNlFbLocalMin = 1030 WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); 1031 aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); 1032 1033 if (aec->hNlNewMin == 1) { 1034 aec->hNlMinCtr++; 1035 } 1036 if (aec->hNlMinCtr == 2) { 1037 aec->hNlNewMin = 0; 1038 aec->hNlMinCtr = 0; 1039 aec->overDrive = 1040 WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / 1041 ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), 1042 min_overdrive[aec->nlp_mode]); 1043 } 1044 1045 // Smooth the overdrive. 1046 if (aec->overDrive < aec->overDriveSm) { 1047 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; 1048 } else { 1049 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; 1050 } 1051 1052 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); 1053 1054 // Add comfort noise. 1055 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); 1056 1057 // TODO(bjornv): Investigate how to take the windowing below into account if 1058 // needed. 1059 if (aec->metricsMode == 1) { 1060 // Note that we have a scaling by two in the time domain |eBuf|. 1061 // In addition the time domain signal is windowed before transformation, 1062 // losing half the energy on the average. We take care of the first 1063 // scaling only in UpdateMetrics(). 1064 UpdateLevel(&aec->nlpoutlevel, efw); 1065 } 1066 // Inverse error fft. 1067 fft[0] = efw[0][0]; 1068 fft[1] = efw[0][PART_LEN]; 1069 for (i = 1; i < PART_LEN; i++) { 1070 fft[2 * i] = efw[0][i]; 1071 // Sign change required by Ooura fft. 1072 fft[2 * i + 1] = -efw[1][i]; 1073 } 1074 aec_rdft_inverse_128(fft); 1075 1076 // Overlap and add to obtain output. 1077 scale = 2.0f / PART_LEN2; 1078 for (i = 0; i < PART_LEN; i++) { 1079 fft[i] *= scale; // fft scaling 1080 fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; 1081 1082 fft[PART_LEN + i] *= scale; // fft scaling 1083 aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; 1084 1085 // Saturate output to keep it in the allowed range. 1086 output[i] = WEBRTC_SPL_SAT( 1087 WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); 1088 } 1089 1090 // For H band 1091 if (aec->num_bands > 1) { 1092 1093 // H band gain 1094 // average nlp over low band: average over second half of freq spectrum 1095 // (4->8khz) 1096 GetHighbandGain(hNl, &nlpGainHband); 1097 1098 // Inverse comfort_noise 1099 if (flagHbandCn == 1) { 1100 fft[0] = comfortNoiseHband[0][0]; 1101 fft[1] = comfortNoiseHband[PART_LEN][0]; 1102 for (i = 1; i < PART_LEN; i++) { 1103 fft[2 * i] = comfortNoiseHband[i][0]; 1104 fft[2 * i + 1] = comfortNoiseHband[i][1]; 1105 } 1106 aec_rdft_inverse_128(fft); 1107 scale = 2.0f / PART_LEN2; 1108 } 1109 1110 // compute gain factor 1111 for (j = 0; j < aec->num_bands - 1; ++j) { 1112 for (i = 0; i < PART_LEN; i++) { 1113 dtmp = aec->dBufH[j][i]; 1114 dtmp = dtmp * nlpGainHband; // for variable gain 1115 1116 // add some comfort noise where Hband is attenuated 1117 if (flagHbandCn == 1 && j == 0) { 1118 fft[i] *= scale; // fft scaling 1119 dtmp += cnScaleHband * fft[i]; 1120 } 1121 1122 // Saturate output to keep it in the allowed range. 1123 outputH[j][i] = WEBRTC_SPL_SAT( 1124 WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); 1125 } 1126 } 1127 } 1128 1129 // Copy the current block to the old position. 1130 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); 1131 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); 1132 1133 // Copy the current block to the old position for H band 1134 for (i = 0; i < aec->num_bands - 1; ++i) { 1135 memcpy(aec->dBufH[i], aec->dBufH[i] + PART_LEN, sizeof(float) * PART_LEN); 1136 } 1137 1138 memmove(aec->xfwBuf + PART_LEN1, 1139 aec->xfwBuf, 1140 sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1); 1141} 1142 1143static void ProcessBlock(AecCore* aec) { 1144 int i; 1145 float y[PART_LEN], e[PART_LEN]; 1146 float scale; 1147 1148 float fft[PART_LEN2]; 1149 float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; 1150 float df[2][PART_LEN1]; 1151 float far_spectrum = 0.0f; 1152 float near_spectrum = 0.0f; 1153 float abs_far_spectrum[PART_LEN1]; 1154 float abs_near_spectrum[PART_LEN1]; 1155 1156 const float gPow[2] = {0.9f, 0.1f}; 1157 1158 // Noise estimate constants. 1159 const int noiseInitBlocks = 500 * aec->mult; 1160 const float step = 0.1f; 1161 const float ramp = 1.0002f; 1162 const float gInitNoise[2] = {0.999f, 0.001f}; 1163 1164 float nearend[PART_LEN]; 1165 float* nearend_ptr = NULL; 1166 float output[PART_LEN]; 1167 float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; 1168 float* outputH_ptr[NUM_HIGH_BANDS_MAX]; 1169 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1170 outputH_ptr[i] = outputH[i]; 1171 } 1172 1173 float* xf_ptr = NULL; 1174 1175 // Concatenate old and new nearend blocks. 1176 for (i = 0; i < aec->num_bands - 1; ++i) { 1177 WebRtc_ReadBuffer(aec->nearFrBufH[i], 1178 (void**)&nearend_ptr, 1179 nearend, 1180 PART_LEN); 1181 memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend)); 1182 } 1183 WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); 1184 memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); 1185 1186 // ---------- Ooura fft ---------- 1187 1188#ifdef WEBRTC_AEC_DEBUG_DUMP 1189 { 1190 float farend[PART_LEN]; 1191 float* farend_ptr = NULL; 1192 WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); 1193 rtc_WavWriteSamples(aec->farFile, farend_ptr, PART_LEN); 1194 rtc_WavWriteSamples(aec->nearFile, nearend_ptr, PART_LEN); 1195 } 1196#endif 1197 1198 // We should always have at least one element stored in |far_buf|. 1199 assert(WebRtc_available_read(aec->far_buf) > 0); 1200 WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); 1201 1202 // Near fft 1203 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); 1204 TimeToFrequency(fft, df, 0); 1205 1206 // Power smoothing 1207 for (i = 0; i < PART_LEN1; i++) { 1208 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + 1209 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); 1210 aec->xPow[i] = 1211 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; 1212 // Calculate absolute spectra 1213 abs_far_spectrum[i] = sqrtf(far_spectrum); 1214 1215 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; 1216 aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; 1217 // Calculate absolute spectra 1218 abs_near_spectrum[i] = sqrtf(near_spectrum); 1219 } 1220 1221 // Estimate noise power. Wait until dPow is more stable. 1222 if (aec->noiseEstCtr > 50) { 1223 for (i = 0; i < PART_LEN1; i++) { 1224 if (aec->dPow[i] < aec->dMinPow[i]) { 1225 aec->dMinPow[i] = 1226 (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp; 1227 } else { 1228 aec->dMinPow[i] *= ramp; 1229 } 1230 } 1231 } 1232 1233 // Smooth increasing noise power from zero at the start, 1234 // to avoid a sudden burst of comfort noise. 1235 if (aec->noiseEstCtr < noiseInitBlocks) { 1236 aec->noiseEstCtr++; 1237 for (i = 0; i < PART_LEN1; i++) { 1238 if (aec->dMinPow[i] > aec->dInitMinPow[i]) { 1239 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + 1240 gInitNoise[1] * aec->dMinPow[i]; 1241 } else { 1242 aec->dInitMinPow[i] = aec->dMinPow[i]; 1243 } 1244 } 1245 aec->noisePow = aec->dInitMinPow; 1246 } else { 1247 aec->noisePow = aec->dMinPow; 1248 } 1249 1250 // Block wise delay estimation used for logging 1251 if (aec->delay_logging_enabled) { 1252 if (WebRtc_AddFarSpectrumFloat( 1253 aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) { 1254 int delay_estimate = WebRtc_DelayEstimatorProcessFloat( 1255 aec->delay_estimator, abs_near_spectrum, PART_LEN1); 1256 if (delay_estimate >= 0) { 1257 // Update delay estimate buffer. 1258 aec->delay_histogram[delay_estimate]++; 1259 aec->num_delay_values++; 1260 } 1261 if (aec->delay_metrics_delivered == 1 && 1262 aec->num_delay_values >= kDelayMetricsAggregationWindow) { 1263 UpdateDelayMetrics(aec); 1264 } 1265 } 1266 } 1267 1268 // Update the xfBuf block position. 1269 aec->xfBufBlockPos--; 1270 if (aec->xfBufBlockPos == -1) { 1271 aec->xfBufBlockPos = aec->num_partitions - 1; 1272 } 1273 1274 // Buffer xf 1275 memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, 1276 xf_ptr, 1277 sizeof(float) * PART_LEN1); 1278 memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, 1279 &xf_ptr[PART_LEN1], 1280 sizeof(float) * PART_LEN1); 1281 1282 memset(yf, 0, sizeof(yf)); 1283 1284 // Filter far 1285 WebRtcAec_FilterFar(aec, yf); 1286 1287 // Inverse fft to obtain echo estimate and error. 1288 fft[0] = yf[0][0]; 1289 fft[1] = yf[0][PART_LEN]; 1290 for (i = 1; i < PART_LEN; i++) { 1291 fft[2 * i] = yf[0][i]; 1292 fft[2 * i + 1] = yf[1][i]; 1293 } 1294 aec_rdft_inverse_128(fft); 1295 1296 scale = 2.0f / PART_LEN2; 1297 for (i = 0; i < PART_LEN; i++) { 1298 y[i] = fft[PART_LEN + i] * scale; // fft scaling 1299 } 1300 1301 for (i = 0; i < PART_LEN; i++) { 1302 e[i] = nearend_ptr[i] - y[i]; 1303 } 1304 1305 // Error fft 1306 memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); 1307 memset(fft, 0, sizeof(float) * PART_LEN); 1308 memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); 1309 // TODO(bjornv): Change to use TimeToFrequency(). 1310 aec_rdft_forward_128(fft); 1311 1312 ef[1][0] = 0; 1313 ef[1][PART_LEN] = 0; 1314 ef[0][0] = fft[0]; 1315 ef[0][PART_LEN] = fft[1]; 1316 for (i = 1; i < PART_LEN; i++) { 1317 ef[0][i] = fft[2 * i]; 1318 ef[1][i] = fft[2 * i + 1]; 1319 } 1320 1321 if (aec->metricsMode == 1) { 1322 // Note that the first PART_LEN samples in fft (before transformation) are 1323 // zero. Hence, the scaling by two in UpdateLevel() should not be 1324 // performed. That scaling is taken care of in UpdateMetrics() instead. 1325 UpdateLevel(&aec->linoutlevel, ef); 1326 } 1327 1328 // Scale error signal inversely with far power. 1329 WebRtcAec_ScaleErrorSignal(aec, ef); 1330 WebRtcAec_FilterAdaptation(aec, fft, ef); 1331 NonLinearProcessing(aec, output, outputH_ptr); 1332 1333 if (aec->metricsMode == 1) { 1334 // Update power levels and echo metrics 1335 UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr); 1336 UpdateLevel(&aec->nearlevel, df); 1337 UpdateMetrics(aec); 1338 } 1339 1340 // Store the output block. 1341 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); 1342 // For high bands 1343 for (i = 0; i < aec->num_bands - 1; ++i) { 1344 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); 1345 } 1346 1347#ifdef WEBRTC_AEC_DEBUG_DUMP 1348 rtc_WavWriteSamples(aec->outLinearFile, e, PART_LEN); 1349 rtc_WavWriteSamples(aec->outFile, output, PART_LEN); 1350#endif 1351} 1352 1353int WebRtcAec_CreateAec(AecCore** aecInst) { 1354 int i; 1355 AecCore* aec = malloc(sizeof(AecCore)); 1356 *aecInst = aec; 1357 if (aec == NULL) { 1358 return -1; 1359 } 1360 1361 aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); 1362 if (!aec->nearFrBuf) { 1363 WebRtcAec_FreeAec(aec); 1364 aec = NULL; 1365 return -1; 1366 } 1367 1368 aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); 1369 if (!aec->outFrBuf) { 1370 WebRtcAec_FreeAec(aec); 1371 aec = NULL; 1372 return -1; 1373 } 1374 1375 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1376 aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, 1377 sizeof(float)); 1378 if (!aec->nearFrBufH[i]) { 1379 WebRtcAec_FreeAec(aec); 1380 aec = NULL; 1381 return -1; 1382 } 1383 aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, 1384 sizeof(float)); 1385 if (!aec->outFrBufH[i]) { 1386 WebRtcAec_FreeAec(aec); 1387 aec = NULL; 1388 return -1; 1389 } 1390 } 1391 1392 // Create far-end buffers. 1393 aec->far_buf = 1394 WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); 1395 if (!aec->far_buf) { 1396 WebRtcAec_FreeAec(aec); 1397 aec = NULL; 1398 return -1; 1399 } 1400 aec->far_buf_windowed = 1401 WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); 1402 if (!aec->far_buf_windowed) { 1403 WebRtcAec_FreeAec(aec); 1404 aec = NULL; 1405 return -1; 1406 } 1407#ifdef WEBRTC_AEC_DEBUG_DUMP 1408 aec->instance_index = webrtc_aec_instance_count; 1409 aec->far_time_buf = 1410 WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN); 1411 if (!aec->far_time_buf) { 1412 WebRtcAec_FreeAec(aec); 1413 aec = NULL; 1414 return -1; 1415 } 1416 aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; 1417 aec->debug_dump_count = 0; 1418#endif 1419 aec->delay_estimator_farend = 1420 WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); 1421 if (aec->delay_estimator_farend == NULL) { 1422 WebRtcAec_FreeAec(aec); 1423 aec = NULL; 1424 return -1; 1425 } 1426 // We create the delay_estimator with the same amount of maximum lookahead as 1427 // the delay history size (kHistorySizeBlocks) for symmetry reasons. 1428 aec->delay_estimator = WebRtc_CreateDelayEstimator( 1429 aec->delay_estimator_farend, kHistorySizeBlocks); 1430 if (aec->delay_estimator == NULL) { 1431 WebRtcAec_FreeAec(aec); 1432 aec = NULL; 1433 return -1; 1434 } 1435#ifdef WEBRTC_ANDROID 1436 // DA-AEC assumes the system is causal from the beginning and will self adjust 1437 // the lookahead when shifting is required. 1438 WebRtc_set_lookahead(aec->delay_estimator, 0); 1439#else 1440 WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks); 1441#endif 1442 1443 // Assembly optimization 1444 WebRtcAec_FilterFar = FilterFar; 1445 WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; 1446 WebRtcAec_FilterAdaptation = FilterAdaptation; 1447 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; 1448 WebRtcAec_ComfortNoise = ComfortNoise; 1449 WebRtcAec_SubbandCoherence = SubbandCoherence; 1450 1451#if defined(WEBRTC_ARCH_X86_FAMILY) 1452 if (WebRtc_GetCPUInfo(kSSE2)) { 1453 WebRtcAec_InitAec_SSE2(); 1454 } 1455#endif 1456 1457#if defined(MIPS_FPU_LE) 1458 WebRtcAec_InitAec_mips(); 1459#endif 1460 1461#if defined(WEBRTC_ARCH_ARM_NEON) 1462 WebRtcAec_InitAec_neon(); 1463#elif defined(WEBRTC_DETECT_ARM_NEON) 1464 if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { 1465 WebRtcAec_InitAec_neon(); 1466 } 1467#endif 1468 1469 aec_rdft_init(); 1470 1471 return 0; 1472} 1473 1474void WebRtcAec_FreeAec(AecCore* aec) { 1475 int i; 1476 if (aec == NULL) { 1477 return; 1478 } 1479 1480 WebRtc_FreeBuffer(aec->nearFrBuf); 1481 WebRtc_FreeBuffer(aec->outFrBuf); 1482 1483 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1484 WebRtc_FreeBuffer(aec->nearFrBufH[i]); 1485 WebRtc_FreeBuffer(aec->outFrBufH[i]); 1486 } 1487 1488 WebRtc_FreeBuffer(aec->far_buf); 1489 WebRtc_FreeBuffer(aec->far_buf_windowed); 1490#ifdef WEBRTC_AEC_DEBUG_DUMP 1491 WebRtc_FreeBuffer(aec->far_time_buf); 1492 rtc_WavClose(aec->farFile); 1493 rtc_WavClose(aec->nearFile); 1494 rtc_WavClose(aec->outFile); 1495 rtc_WavClose(aec->outLinearFile); 1496#endif 1497 WebRtc_FreeDelayEstimator(aec->delay_estimator); 1498 WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); 1499 1500 free(aec); 1501} 1502 1503#ifdef WEBRTC_AEC_DEBUG_DUMP 1504// Open a new Wav file for writing. If it was already open with a different 1505// sample frequency, close it first. 1506static void ReopenWav(rtc_WavWriter** wav_file, 1507 const char* name, 1508 int seq1, 1509 int seq2, 1510 int sample_rate) { 1511 int written ATTRIBUTE_UNUSED; 1512 char filename[64]; 1513 if (*wav_file) { 1514 if (rtc_WavSampleRate(*wav_file) == sample_rate) 1515 return; 1516 rtc_WavClose(*wav_file); 1517 } 1518 written = snprintf(filename, sizeof(filename), "%s%d-%d.wav", 1519 name, seq1, seq2); 1520 assert(written >= 0); // no output error 1521 assert((size_t)written < sizeof(filename)); // buffer was large enough 1522 *wav_file = rtc_WavOpen(filename, sample_rate, 1); 1523} 1524#endif // WEBRTC_AEC_DEBUG_DUMP 1525 1526int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { 1527 int i; 1528 1529 aec->sampFreq = sampFreq; 1530 1531 if (sampFreq == 8000) { 1532 aec->normal_mu = 0.6f; 1533 aec->normal_error_threshold = 2e-6f; 1534 aec->num_bands = 1; 1535 } else { 1536 aec->normal_mu = 0.5f; 1537 aec->normal_error_threshold = 1.5e-6f; 1538 aec->num_bands = sampFreq / 16000; 1539 } 1540 1541 WebRtc_InitBuffer(aec->nearFrBuf); 1542 WebRtc_InitBuffer(aec->outFrBuf); 1543 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1544 WebRtc_InitBuffer(aec->nearFrBufH[i]); 1545 WebRtc_InitBuffer(aec->outFrBufH[i]); 1546 } 1547 1548 // Initialize far-end buffers. 1549 WebRtc_InitBuffer(aec->far_buf); 1550 WebRtc_InitBuffer(aec->far_buf_windowed); 1551#ifdef WEBRTC_AEC_DEBUG_DUMP 1552 WebRtc_InitBuffer(aec->far_time_buf); 1553 { 1554 int process_rate = sampFreq > 16000 ? 16000 : sampFreq; 1555 ReopenWav(&aec->farFile, "aec_far", 1556 aec->instance_index, aec->debug_dump_count, process_rate); 1557 ReopenWav(&aec->nearFile, "aec_near", 1558 aec->instance_index, aec->debug_dump_count, process_rate); 1559 ReopenWav(&aec->outFile, "aec_out", 1560 aec->instance_index, aec->debug_dump_count, process_rate); 1561 ReopenWav(&aec->outLinearFile, "aec_out_linear", 1562 aec->instance_index, aec->debug_dump_count, process_rate); 1563 } 1564 ++aec->debug_dump_count; 1565#endif 1566 aec->system_delay = 0; 1567 1568 if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { 1569 return -1; 1570 } 1571 if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { 1572 return -1; 1573 } 1574 aec->delay_logging_enabled = 0; 1575 aec->delay_metrics_delivered = 0; 1576 memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); 1577 aec->num_delay_values = 0; 1578 aec->delay_median = -1; 1579 aec->delay_std = -1; 1580 aec->fraction_poor_delays = -1.0f; 1581 1582 aec->signal_delay_correction = 0; 1583 aec->previous_delay = -2; // (-2): Uninitialized. 1584 aec->delay_correction_count = 0; 1585 aec->shift_offset = kInitialShiftOffset; 1586 aec->delay_quality_threshold = 0; 1587 1588#ifdef WEBRTC_ANDROID 1589 aec->reported_delay_enabled = 0; // Disabled by default. 1590#else 1591 aec->reported_delay_enabled = 1; 1592#endif 1593 aec->extended_filter_enabled = 0; 1594 aec->num_partitions = kNormalNumPartitions; 1595 1596 // Update the delay estimator with filter length. We use half the 1597 // |num_partitions| to take the echo path into account. In practice we say 1598 // that the echo has a duration of maximum half |num_partitions|, which is not 1599 // true, but serves as a crude measure. 1600 WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2); 1601 // TODO(bjornv): I currently hard coded the enable. Once we've established 1602 // that AECM has no performance regression, robust_validation will be enabled 1603 // all the time and the APIs to turn it on/off will be removed. Hence, remove 1604 // this line then. 1605 WebRtc_enable_robust_validation(aec->delay_estimator, 1); 1606 1607 // Default target suppression mode. 1608 aec->nlp_mode = 1; 1609 1610 // Sampling frequency multiplier w.r.t. 8 kHz. 1611 // In case of multiple bands we process the lower band in 16 kHz, hence the 1612 // multiplier is always 2. 1613 if (aec->num_bands > 1) { 1614 aec->mult = 2; 1615 } else { 1616 aec->mult = (short)aec->sampFreq / 8000; 1617 } 1618 1619 aec->farBufWritePos = 0; 1620 aec->farBufReadPos = 0; 1621 1622 aec->inSamples = 0; 1623 aec->outSamples = 0; 1624 aec->knownDelay = 0; 1625 1626 // Initialize buffers 1627 memset(aec->dBuf, 0, sizeof(aec->dBuf)); 1628 memset(aec->eBuf, 0, sizeof(aec->eBuf)); 1629 // For H bands 1630 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1631 memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i])); 1632 } 1633 1634 memset(aec->xPow, 0, sizeof(aec->xPow)); 1635 memset(aec->dPow, 0, sizeof(aec->dPow)); 1636 memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); 1637 aec->noisePow = aec->dInitMinPow; 1638 aec->noiseEstCtr = 0; 1639 1640 // Initial comfort noise power 1641 for (i = 0; i < PART_LEN1; i++) { 1642 aec->dMinPow[i] = 1.0e6f; 1643 } 1644 1645 // Holds the last block written to 1646 aec->xfBufBlockPos = 0; 1647 // TODO: Investigate need for these initializations. Deleting them doesn't 1648 // change the output at all and yields 0.4% overall speedup. 1649 memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); 1650 memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); 1651 memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); 1652 memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); 1653 memset( 1654 aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); 1655 memset(aec->se, 0, sizeof(float) * PART_LEN1); 1656 1657 // To prevent numerical instability in the first block. 1658 for (i = 0; i < PART_LEN1; i++) { 1659 aec->sd[i] = 1; 1660 } 1661 for (i = 0; i < PART_LEN1; i++) { 1662 aec->sx[i] = 1; 1663 } 1664 1665 memset(aec->hNs, 0, sizeof(aec->hNs)); 1666 memset(aec->outBuf, 0, sizeof(float) * PART_LEN); 1667 1668 aec->hNlFbMin = 1; 1669 aec->hNlFbLocalMin = 1; 1670 aec->hNlXdAvgMin = 1; 1671 aec->hNlNewMin = 0; 1672 aec->hNlMinCtr = 0; 1673 aec->overDrive = 2; 1674 aec->overDriveSm = 2; 1675 aec->delayIdx = 0; 1676 aec->stNearState = 0; 1677 aec->echoState = 0; 1678 aec->divergeState = 0; 1679 1680 aec->seed = 777; 1681 aec->delayEstCtr = 0; 1682 1683 // Metrics disabled by default 1684 aec->metricsMode = 0; 1685 InitMetrics(aec); 1686 1687 return 0; 1688} 1689 1690void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { 1691 float fft[PART_LEN2]; 1692 float xf[2][PART_LEN1]; 1693 1694 // Check if the buffer is full, and in that case flush the oldest data. 1695 if (WebRtc_available_write(aec->far_buf) < 1) { 1696 WebRtcAec_MoveFarReadPtr(aec, 1); 1697 } 1698 // Convert far-end partition to the frequency domain without windowing. 1699 memcpy(fft, farend, sizeof(float) * PART_LEN2); 1700 TimeToFrequency(fft, xf, 0); 1701 WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); 1702 1703 // Convert far-end partition to the frequency domain with windowing. 1704 memcpy(fft, farend, sizeof(float) * PART_LEN2); 1705 TimeToFrequency(fft, xf, 1); 1706 WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); 1707} 1708 1709int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { 1710 int elements_moved = WebRtc_MoveReadPtr(aec->far_buf_windowed, elements); 1711 WebRtc_MoveReadPtr(aec->far_buf, elements); 1712#ifdef WEBRTC_AEC_DEBUG_DUMP 1713 WebRtc_MoveReadPtr(aec->far_time_buf, elements); 1714#endif 1715 aec->system_delay -= elements_moved * PART_LEN; 1716 return elements_moved; 1717} 1718 1719void WebRtcAec_ProcessFrames(AecCore* aec, 1720 const float* const* nearend, 1721 int num_bands, 1722 int num_samples, 1723 int knownDelay, 1724 float* const* out) { 1725 int i, j; 1726 int out_elements = 0; 1727 1728 // For each frame the process is as follows: 1729 // 1) If the system_delay indicates on being too small for processing a 1730 // frame we stuff the buffer with enough data for 10 ms. 1731 // 2 a) Adjust the buffer to the system delay, by moving the read pointer. 1732 // b) Apply signal based delay correction, if we have detected poor AEC 1733 // performance. 1734 // 3) TODO(bjornv): Investigate if we need to add this: 1735 // If we can't move read pointer due to buffer size limitations we 1736 // flush/stuff the buffer. 1737 // 4) Process as many partitions as possible. 1738 // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN 1739 // samples. Even though we will have data left to process (we work with 1740 // partitions) we consider updating a whole frame, since that's the 1741 // amount of data we input and output in audio_processing. 1742 // 6) Update the outputs. 1743 1744 // The AEC has two different delay estimation algorithms built in. The 1745 // first relies on delay input values from the user and the amount of 1746 // shifted buffer elements is controlled by |knownDelay|. This delay will 1747 // give a guess on how much we need to shift far-end buffers to align with 1748 // the near-end signal. The other delay estimation algorithm uses the 1749 // far- and near-end signals to find the offset between them. This one 1750 // (called "signal delay") is then used to fine tune the alignment, or 1751 // simply compensate for errors in the system based one. 1752 // Note that the two algorithms operate independently. Currently, we only 1753 // allow one algorithm to be turned on. 1754 1755 assert(aec->num_bands == num_bands); 1756 1757 for (j = 0; j < num_samples; j+= FRAME_LEN) { 1758 // TODO(bjornv): Change the near-end buffer handling to be the same as for 1759 // far-end, that is, with a near_pre_buf. 1760 // Buffer the near-end frame. 1761 WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN); 1762 // For H band 1763 for (i = 1; i < num_bands; ++i) { 1764 WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN); 1765 } 1766 1767 // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we 1768 // have enough far-end data for that by stuffing the buffer if the 1769 // |system_delay| indicates others. 1770 if (aec->system_delay < FRAME_LEN) { 1771 // We don't have enough data so we rewind 10 ms. 1772 WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); 1773 } 1774 1775 if (aec->reported_delay_enabled) { 1776 // 2 a) Compensate for a possible change in the system delay. 1777 1778 // TODO(bjornv): Investigate how we should round the delay difference; 1779 // right now we know that incoming |knownDelay| is underestimated when 1780 // it's less than |aec->knownDelay|. We therefore, round (-32) in that 1781 // direction. In the other direction, we don't have this situation, but 1782 // might flush one partition too little. This can cause non-causality, 1783 // which should be investigated. Maybe, allow for a non-symmetric 1784 // rounding, like -16. 1785 int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; 1786 int moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements); 1787 WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); 1788 aec->knownDelay -= moved_elements * PART_LEN; 1789 #ifdef WEBRTC_AEC_DEBUG_DUMP 1790 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); 1791 #endif 1792 } else { 1793 // 2 b) Apply signal based delay correction. 1794 int move_elements = SignalBasedDelayCorrection(aec); 1795 int moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements); 1796 WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); 1797 #ifdef WEBRTC_AEC_DEBUG_DUMP 1798 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); 1799 #endif 1800 WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); 1801 WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, 1802 moved_elements); 1803 aec->signal_delay_correction += moved_elements; 1804 // TODO(bjornv): Investigate if this is reasonable. I had to add this 1805 // guard when the signal based delay correction replaces the system based 1806 // one. Otherwise there was a buffer underrun in the "qa-new/01/" 1807 // recording when adding 44 ms extra delay. This was not seen if we kept 1808 // both delay correction algorithms running in parallel. 1809 // A first investigation showed that we have a drift in this case that 1810 // causes the buffer underrun. Compared to when delay correction was 1811 // turned off, we get buffer underrun as well which was triggered in 1) 1812 // above. In addition there was a shift in |knownDelay| later increasing 1813 // the buffer. When running in parallel, this if statement was not 1814 // triggered. This suggests two alternatives; (a) use both algorithms, or 1815 // (b) allow for smaller delay corrections when we operate close to the 1816 // buffer limit. At the time of testing we required a change of 6 blocks, 1817 // but could change it to, e.g., 2 blocks. It requires some testing 1818 // though. 1819 if ((int)WebRtc_available_read(aec->far_buf) < (aec->mult + 1)) { 1820 // We don't have enough data so we stuff the far-end buffers. 1821 WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); 1822 } 1823 } 1824 1825 // 4) Process as many blocks as possible. 1826 while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { 1827 ProcessBlock(aec); 1828 } 1829 1830 // 5) Update system delay with respect to the entire frame. 1831 aec->system_delay -= FRAME_LEN; 1832 1833 // 6) Update output frame. 1834 // Stuff the out buffer if we have less than a frame to output. 1835 // This should only happen for the first frame. 1836 out_elements = (int)WebRtc_available_read(aec->outFrBuf); 1837 if (out_elements < FRAME_LEN) { 1838 WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN); 1839 for (i = 0; i < num_bands - 1; ++i) { 1840 WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN); 1841 } 1842 } 1843 // Obtain an output frame. 1844 WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN); 1845 // For H bands. 1846 for (i = 1; i < num_bands; ++i) { 1847 WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN); 1848 } 1849 } 1850} 1851 1852int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std, 1853 float* fraction_poor_delays) { 1854 assert(self != NULL); 1855 assert(median != NULL); 1856 assert(std != NULL); 1857 1858 if (self->delay_logging_enabled == 0) { 1859 // Logging disabled. 1860 return -1; 1861 } 1862 1863 if (self->delay_metrics_delivered == 0) { 1864 UpdateDelayMetrics(self); 1865 self->delay_metrics_delivered = 1; 1866 } 1867 *median = self->delay_median; 1868 *std = self->delay_std; 1869 *fraction_poor_delays = self->fraction_poor_delays; 1870 1871 return 0; 1872} 1873 1874int WebRtcAec_echo_state(AecCore* self) { return self->echoState; } 1875 1876void WebRtcAec_GetEchoStats(AecCore* self, 1877 Stats* erl, 1878 Stats* erle, 1879 Stats* a_nlp) { 1880 assert(erl != NULL); 1881 assert(erle != NULL); 1882 assert(a_nlp != NULL); 1883 *erl = self->erl; 1884 *erle = self->erle; 1885 *a_nlp = self->aNlp; 1886} 1887 1888#ifdef WEBRTC_AEC_DEBUG_DUMP 1889void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } 1890#endif 1891 1892void WebRtcAec_SetConfigCore(AecCore* self, 1893 int nlp_mode, 1894 int metrics_mode, 1895 int delay_logging) { 1896 assert(nlp_mode >= 0 && nlp_mode < 3); 1897 self->nlp_mode = nlp_mode; 1898 self->metricsMode = metrics_mode; 1899 if (self->metricsMode) { 1900 InitMetrics(self); 1901 } 1902 // Turn on delay logging if it is either set explicitly or if delay agnostic 1903 // AEC is enabled (which requires delay estimates). 1904 self->delay_logging_enabled = delay_logging || !self->reported_delay_enabled; 1905 if (self->delay_logging_enabled) { 1906 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); 1907 } 1908} 1909 1910void WebRtcAec_enable_reported_delay(AecCore* self, int enable) { 1911 self->reported_delay_enabled = enable; 1912} 1913 1914int WebRtcAec_reported_delay_enabled(AecCore* self) { 1915 return self->reported_delay_enabled; 1916} 1917 1918void WebRtcAec_enable_delay_correction(AecCore* self, int enable) { 1919 self->extended_filter_enabled = enable; 1920 self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions; 1921 // Update the delay estimator with filter length. See InitAEC() for details. 1922 WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2); 1923} 1924 1925int WebRtcAec_delay_correction_enabled(AecCore* self) { 1926 return self->extended_filter_enabled; 1927} 1928 1929int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } 1930 1931void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { 1932 assert(delay >= 0); 1933 self->system_delay = delay; 1934} 1935