aec_core.c revision 5fc829200c451ac1033523a7ceaf2d9c5289c51d
1/* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11/* 12 * The core AEC algorithm, which is presented with time-aligned signals. 13 */ 14 15#include "webrtc/modules/audio_processing/aec/aec_core.h" 16 17#include <assert.h> 18#include <math.h> 19#include <stddef.h> // size_t 20#include <stdlib.h> 21#include <string.h> 22 23#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 24#include "webrtc/modules/audio_processing/aec/aec_rdft.h" 25#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" 26#include "webrtc/modules/audio_processing/utility/ring_buffer.h" 27#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" 28#include "webrtc/typedefs.h" 29 30// Buffer size (samples) 31static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. 32 33// Noise suppression 34static const int converged = 250; 35 36// Metrics 37static const int subCountLen = 4; 38static const int countLen = 50; 39 40// Quantities to control H band scaling for SWB input 41static const int flagHbandCn = 1; // flag for adding comfort noise in H band 42static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band 43// Initial bin for averaging nlp gain in low band 44static const int freqAvgIc = PART_LEN / 2; 45 46// Matlab code to produce table: 47// win = sqrt(hanning(63)); win = [0 ; win(1:32)]; 48// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); 49static const float sqrtHanning[65] = { 50 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 51 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, 52 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, 53 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, 54 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 55 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, 56 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, 57 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, 58 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 59 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, 60 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, 61 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, 62 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 63 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, 64 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, 65 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, 66 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 67 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, 68 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, 69 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, 70 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 71 0.99969881869620f, 1.00000000000000f 72}; 73 74// Matlab code to produce table: 75// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; 76// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); 77const float WebRtcAec_weightCurve[65] = { 78 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 79 0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f, 80 0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f, 81 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, 82 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 83 0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f, 84 0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f, 85 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, 86 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 87 0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f, 88 0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f 89}; 90 91// Matlab code to produce table: 92// overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; 93// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); 94const float WebRtcAec_overDriveCurve[65] = { 95 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 96 1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f, 97 1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f, 98 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, 99 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 100 1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f, 101 1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f, 102 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, 103 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 104 1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f, 105 1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f 106}; 107 108// Target suppression levels for nlp modes. 109// log{0.001, 0.00001, 0.00000001} 110static const float kTargetSupp[3] = { -6.9f, -11.5f, -18.4f }; 111static const float kMinOverDrive[3] = { 1.0f, 2.0f, 5.0f }; 112 113#ifdef WEBRTC_AEC_DEBUG_DUMP 114extern int webrtc_aec_instance_count; 115#endif 116 117// "Private" function prototypes. 118static void ProcessBlock(aec_t* aec); 119 120static void NonLinearProcessing(aec_t *aec, short *output, short *outputH); 121 122static void GetHighbandGain(const float *lambda, float *nlpGainHband); 123 124// Comfort_noise also computes noise for H band returned in comfortNoiseHband 125static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], 126 complex_t *comfortNoiseHband, 127 const float *noisePow, const float *lambda); 128 129static void WebRtcAec_InitLevel(power_level_t *level); 130static void InitStats(Stats* stats); 131static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]); 132static void UpdateMetrics(aec_t *aec); 133// Convert from time domain to frequency domain. Note that |time_data| are 134// overwritten. 135static void TimeToFrequency(float time_data[PART_LEN2], 136 float freq_data[2][PART_LEN1], 137 int window); 138 139__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) 140{ 141 return aRe * bRe - aIm * bIm; 142} 143 144__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) 145{ 146 return aRe * bIm + aIm * bRe; 147} 148 149static int CmpFloat(const void *a, const void *b) 150{ 151 const float *da = (const float *)a; 152 const float *db = (const float *)b; 153 154 return (*da > *db) - (*da < *db); 155} 156 157int WebRtcAec_CreateAec(aec_t **aecInst) 158{ 159 aec_t *aec = malloc(sizeof(aec_t)); 160 *aecInst = aec; 161 if (aec == NULL) { 162 return -1; 163 } 164 165 if (WebRtc_CreateBuffer(&aec->nearFrBuf, 166 FRAME_LEN + PART_LEN, 167 sizeof(int16_t)) == -1) { 168 WebRtcAec_FreeAec(aec); 169 aec = NULL; 170 return -1; 171 } 172 173 if (WebRtc_CreateBuffer(&aec->outFrBuf, 174 FRAME_LEN + PART_LEN, 175 sizeof(int16_t)) == -1) { 176 WebRtcAec_FreeAec(aec); 177 aec = NULL; 178 return -1; 179 } 180 181 if (WebRtc_CreateBuffer(&aec->nearFrBufH, 182 FRAME_LEN + PART_LEN, 183 sizeof(int16_t)) == -1) { 184 WebRtcAec_FreeAec(aec); 185 aec = NULL; 186 return -1; 187 } 188 189 if (WebRtc_CreateBuffer(&aec->outFrBufH, 190 FRAME_LEN + PART_LEN, 191 sizeof(int16_t)) == -1) { 192 WebRtcAec_FreeAec(aec); 193 aec = NULL; 194 return -1; 195 } 196 197 // Create far-end buffers. 198 if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions, 199 sizeof(float) * 2 * PART_LEN1) == -1) { 200 WebRtcAec_FreeAec(aec); 201 aec = NULL; 202 return -1; 203 } 204 if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions, 205 sizeof(float) * 2 * PART_LEN1) == -1) { 206 WebRtcAec_FreeAec(aec); 207 aec = NULL; 208 return -1; 209 } 210#ifdef WEBRTC_AEC_DEBUG_DUMP 211 if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions, 212 sizeof(int16_t) * PART_LEN) == -1) { 213 WebRtcAec_FreeAec(aec); 214 aec = NULL; 215 return -1; 216 } 217 { 218 char filename[64]; 219 sprintf(filename, "aec_far%d.pcm", webrtc_aec_instance_count); 220 aec->farFile = fopen(filename, "wb"); 221 sprintf(filename, "aec_near%d.pcm", webrtc_aec_instance_count); 222 aec->nearFile = fopen(filename, "wb"); 223 sprintf(filename, "aec_out%d.pcm", webrtc_aec_instance_count); 224 aec->outFile = fopen(filename, "wb"); 225 sprintf(filename, "aec_out_linear%d.pcm", webrtc_aec_instance_count); 226 aec->outLinearFile = fopen(filename, "wb"); 227 } 228#endif 229 aec->delay_estimator_farend = 230 WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); 231 if (aec->delay_estimator_farend == NULL) { 232 WebRtcAec_FreeAec(aec); 233 aec = NULL; 234 return -1; 235 } 236 aec->delay_estimator = 237 WebRtc_CreateDelayEstimator(aec->delay_estimator_farend, 238 kLookaheadBlocks); 239 if (aec->delay_estimator == NULL) { 240 WebRtcAec_FreeAec(aec); 241 aec = NULL; 242 return -1; 243 } 244 245 return 0; 246} 247 248int WebRtcAec_FreeAec(aec_t *aec) 249{ 250 if (aec == NULL) { 251 return -1; 252 } 253 254 WebRtc_FreeBuffer(aec->nearFrBuf); 255 WebRtc_FreeBuffer(aec->outFrBuf); 256 257 WebRtc_FreeBuffer(aec->nearFrBufH); 258 WebRtc_FreeBuffer(aec->outFrBufH); 259 260 WebRtc_FreeBuffer(aec->far_buf); 261 WebRtc_FreeBuffer(aec->far_buf_windowed); 262#ifdef WEBRTC_AEC_DEBUG_DUMP 263 WebRtc_FreeBuffer(aec->far_time_buf); 264 fclose(aec->farFile); 265 fclose(aec->nearFile); 266 fclose(aec->outFile); 267 fclose(aec->outLinearFile); 268#endif 269 WebRtc_FreeDelayEstimator(aec->delay_estimator); 270 WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); 271 272 free(aec); 273 return 0; 274} 275 276static void FilterFar(aec_t *aec, float yf[2][PART_LEN1]) 277{ 278 int i; 279 for (i = 0; i < NR_PART; i++) { 280 int j; 281 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 282 int pos = i * PART_LEN1; 283 // Check for wrap 284 if (i + aec->xfBufBlockPos >= NR_PART) { 285 xPos -= NR_PART*(PART_LEN1); 286 } 287 288 for (j = 0; j < PART_LEN1; j++) { 289 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], 290 aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); 291 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], 292 aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); 293 } 294 } 295} 296 297static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1]) 298{ 299 int i; 300 float absEf; 301 for (i = 0; i < (PART_LEN1); i++) { 302 ef[0][i] /= (aec->xPow[i] + 1e-10f); 303 ef[1][i] /= (aec->xPow[i] + 1e-10f); 304 absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 305 306 if (absEf > aec->errThresh) { 307 absEf = aec->errThresh / (absEf + 1e-10f); 308 ef[0][i] *= absEf; 309 ef[1][i] *= absEf; 310 } 311 312 // Stepsize factor 313 ef[0][i] *= aec->mu; 314 ef[1][i] *= aec->mu; 315 } 316} 317 318// Time-unconstrined filter adaptation. 319// TODO(andrew): consider for a low-complexity mode. 320//static void FilterAdaptationUnconstrained(aec_t *aec, float *fft, 321// float ef[2][PART_LEN1]) { 322// int i, j; 323// for (i = 0; i < NR_PART; i++) { 324// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); 325// int pos; 326// // Check for wrap 327// if (i + aec->xfBufBlockPos >= NR_PART) { 328// xPos -= NR_PART * PART_LEN1; 329// } 330// 331// pos = i * PART_LEN1; 332// 333// for (j = 0; j < PART_LEN1; j++) { 334// aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0], 335// -aec->xfBuf[xPos + j][1], 336// ef[j][0], ef[j][1]); 337// aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0], 338// -aec->xfBuf[xPos + j][1], 339// ef[j][0], ef[j][1]); 340// } 341// } 342//} 343 344static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { 345 int i, j; 346 for (i = 0; i < NR_PART; i++) { 347 int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); 348 int pos; 349 // Check for wrap 350 if (i + aec->xfBufBlockPos >= NR_PART) { 351 xPos -= NR_PART * PART_LEN1; 352 } 353 354 pos = i * PART_LEN1; 355 356 for (j = 0; j < PART_LEN; j++) { 357 358 fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], 359 -aec->xfBuf[1][xPos + j], 360 ef[0][j], ef[1][j]); 361 fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], 362 -aec->xfBuf[1][xPos + j], 363 ef[0][j], ef[1][j]); 364 } 365 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], 366 -aec->xfBuf[1][xPos + PART_LEN], 367 ef[0][PART_LEN], ef[1][PART_LEN]); 368 369 aec_rdft_inverse_128(fft); 370 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 371 372 // fft scaling 373 { 374 float scale = 2.0f / PART_LEN2; 375 for (j = 0; j < PART_LEN; j++) { 376 fft[j] *= scale; 377 } 378 } 379 aec_rdft_forward_128(fft); 380 381 aec->wfBuf[0][pos] += fft[0]; 382 aec->wfBuf[0][pos + PART_LEN] += fft[1]; 383 384 for (j = 1; j < PART_LEN; j++) { 385 aec->wfBuf[0][pos + j] += fft[2 * j]; 386 aec->wfBuf[1][pos + j] += fft[2 * j + 1]; 387 } 388 } 389} 390 391static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1], 392 const float hNlFb, 393 float efw[2][PART_LEN1]) { 394 int i; 395 for (i = 0; i < PART_LEN1; i++) { 396 // Weight subbands 397 if (hNl[i] > hNlFb) { 398 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 399 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 400 } 401 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 402 403 // Suppress error signal 404 efw[0][i] *= hNl[i]; 405 efw[1][i] *= hNl[i]; 406 407 // Ooura fft returns incorrect sign on imaginary component. It matters here 408 // because we are making an additive change with comfort noise. 409 efw[1][i] *= -1; 410 } 411} 412 413WebRtcAec_FilterFar_t WebRtcAec_FilterFar; 414WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; 415WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; 416WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; 417 418int WebRtcAec_InitAec(aec_t *aec, int sampFreq) 419{ 420 int i; 421 422 aec->sampFreq = sampFreq; 423 424 if (sampFreq == 8000) { 425 aec->mu = 0.6f; 426 aec->errThresh = 2e-6f; 427 } 428 else { 429 aec->mu = 0.5f; 430 aec->errThresh = 1.5e-6f; 431 } 432 433 if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) { 434 return -1; 435 } 436 437 if (WebRtc_InitBuffer(aec->outFrBuf) == -1) { 438 return -1; 439 } 440 441 if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) { 442 return -1; 443 } 444 445 if (WebRtc_InitBuffer(aec->outFrBufH) == -1) { 446 return -1; 447 } 448 449 // Initialize far-end buffers. 450 if (WebRtc_InitBuffer(aec->far_buf) == -1) { 451 return -1; 452 } 453 if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) { 454 return -1; 455 } 456#ifdef WEBRTC_AEC_DEBUG_DUMP 457 if (WebRtc_InitBuffer(aec->far_time_buf) == -1) { 458 return -1; 459 } 460#endif 461 aec->system_delay = 0; 462 463 if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { 464 return -1; 465 } 466 if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { 467 return -1; 468 } 469 aec->delay_logging_enabled = 0; 470 memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); 471 472 // Default target suppression mode. 473 aec->nlp_mode = 1; 474 475 // Sampling frequency multiplier 476 // SWB is processed as 160 frame size 477 if (aec->sampFreq == 32000) { 478 aec->mult = (short)aec->sampFreq / 16000; 479 } 480 else { 481 aec->mult = (short)aec->sampFreq / 8000; 482 } 483 484 aec->farBufWritePos = 0; 485 aec->farBufReadPos = 0; 486 487 aec->inSamples = 0; 488 aec->outSamples = 0; 489 aec->knownDelay = 0; 490 491 // Initialize buffers 492 memset(aec->dBuf, 0, sizeof(aec->dBuf)); 493 memset(aec->eBuf, 0, sizeof(aec->eBuf)); 494 // For H band 495 memset(aec->dBufH, 0, sizeof(aec->dBufH)); 496 497 memset(aec->xPow, 0, sizeof(aec->xPow)); 498 memset(aec->dPow, 0, sizeof(aec->dPow)); 499 memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); 500 aec->noisePow = aec->dInitMinPow; 501 aec->noiseEstCtr = 0; 502 503 // Initial comfort noise power 504 for (i = 0; i < PART_LEN1; i++) { 505 aec->dMinPow[i] = 1.0e6f; 506 } 507 508 // Holds the last block written to 509 aec->xfBufBlockPos = 0; 510 // TODO: Investigate need for these initializations. Deleting them doesn't 511 // change the output at all and yields 0.4% overall speedup. 512 memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); 513 memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); 514 memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); 515 memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); 516 memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); 517 memset(aec->se, 0, sizeof(float) * PART_LEN1); 518 519 // To prevent numerical instability in the first block. 520 for (i = 0; i < PART_LEN1; i++) { 521 aec->sd[i] = 1; 522 } 523 for (i = 0; i < PART_LEN1; i++) { 524 aec->sx[i] = 1; 525 } 526 527 memset(aec->hNs, 0, sizeof(aec->hNs)); 528 memset(aec->outBuf, 0, sizeof(float) * PART_LEN); 529 530 aec->hNlFbMin = 1; 531 aec->hNlFbLocalMin = 1; 532 aec->hNlXdAvgMin = 1; 533 aec->hNlNewMin = 0; 534 aec->hNlMinCtr = 0; 535 aec->overDrive = 2; 536 aec->overDriveSm = 2; 537 aec->delayIdx = 0; 538 aec->stNearState = 0; 539 aec->echoState = 0; 540 aec->divergeState = 0; 541 542 aec->seed = 777; 543 aec->delayEstCtr = 0; 544 545 // Metrics disabled by default 546 aec->metricsMode = 0; 547 WebRtcAec_InitMetrics(aec); 548 549 // Assembly optimization 550 WebRtcAec_FilterFar = FilterFar; 551 WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; 552 WebRtcAec_FilterAdaptation = FilterAdaptation; 553 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; 554 555#if defined(WEBRTC_ARCH_X86_FAMILY) 556 if (WebRtc_GetCPUInfo(kSSE2)) { 557 WebRtcAec_InitAec_SSE2(); 558 } 559#endif 560 561 aec_rdft_init(); 562 563 return 0; 564} 565 566void WebRtcAec_InitMetrics(aec_t *aec) 567{ 568 aec->stateCounter = 0; 569 WebRtcAec_InitLevel(&aec->farlevel); 570 WebRtcAec_InitLevel(&aec->nearlevel); 571 WebRtcAec_InitLevel(&aec->linoutlevel); 572 WebRtcAec_InitLevel(&aec->nlpoutlevel); 573 574 InitStats(&aec->erl); 575 InitStats(&aec->erle); 576 InitStats(&aec->aNlp); 577 InitStats(&aec->rerl); 578} 579 580void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) { 581 float fft[PART_LEN2]; 582 float xf[2][PART_LEN1]; 583 584 // Check if the buffer is full, and in that case flush the oldest data. 585 if (WebRtc_available_write(aec->far_buf) < 1) { 586 WebRtcAec_MoveFarReadPtr(aec, 1); 587 } 588 // Convert far-end partition to the frequency domain without windowing. 589 memcpy(fft, farend, sizeof(float) * PART_LEN2); 590 TimeToFrequency(fft, xf, 0); 591 WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); 592 593 // Convert far-end partition to the frequency domain with windowing. 594 memcpy(fft, farend, sizeof(float) * PART_LEN2); 595 TimeToFrequency(fft, xf, 1); 596 WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); 597} 598 599int WebRtcAec_MoveFarReadPtr(aec_t *aec, int elements) { 600 int elements_moved = WebRtc_MoveReadPtr(aec->far_buf_windowed, elements); 601 WebRtc_MoveReadPtr(aec->far_buf, elements); 602#ifdef WEBRTC_AEC_DEBUG_DUMP 603 WebRtc_MoveReadPtr(aec->far_time_buf, elements); 604#endif 605 aec->system_delay -= elements_moved * PART_LEN; 606 return elements_moved; 607} 608 609void WebRtcAec_ProcessFrame(aec_t *aec, 610 const short *nearend, 611 const short *nearendH, 612 int knownDelay) 613{ 614 // For each frame the process is as follows: 615 // 1) If the system_delay indicates on being too small for processing a 616 // frame we stuff the buffer with enough data for 10 ms. 617 // 2) Adjust the buffer to the system delay, by moving the read pointer. 618 // 3) If we can't move read pointer due to buffer size limitations we 619 // flush/stuff the buffer. 620 // 4) Process as many partitions as possible. 621 // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN 622 // samples. Even though we will have data left to process (we work with 623 // partitions) we consider updating a whole frame, since that's the 624 // amount of data we input and output in audio_processing. 625 626 // TODO(bjornv): Investigate how we should round the delay difference; right 627 // now we know that incoming |knownDelay| is underestimated when it's less 628 // than |aec->knownDelay|. We therefore, round (-32) in that direction. In 629 // the other direction, we don't have this situation, but might flush one 630 // partition too little. This can cause non-causality, which should be 631 // investigated. Maybe, allow for a non-symmetric rounding, like -16. 632 int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; 633 int moved_elements = 0; 634 635 // TODO(bjornv): Change the near-end buffer handling to be the same as for 636 // far-end, that is, with a near_pre_buf. 637 // Buffer the near-end frame. 638 WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN); 639 // For H band 640 if (aec->sampFreq == 32000) { 641 WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN); 642 } 643 644 // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we 645 // have enough far-end data for that by stuffing the buffer if the 646 // |system_delay| indicates others. 647 if (aec->system_delay < FRAME_LEN) { 648 // We don't have enough data so we rewind 10 ms. 649 WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); 650 } 651 652 // 2) Compensate for a possible change in the system delay. 653 WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); 654 moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements); 655 aec->knownDelay -= moved_elements * PART_LEN; 656#ifdef WEBRTC_AEC_DEBUG_DUMP 657 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); 658#endif 659 660 // 4) Process as many blocks as possible. 661 while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { 662 ProcessBlock(aec); 663 } 664 665 // 5) Update system delay with respect to the entire frame. 666 aec->system_delay -= FRAME_LEN; 667} 668 669int WebRtcAec_GetDelayMetricsCore(aec_t* self, int* median, int* std) { 670 int i = 0; 671 int delay_values = 0; 672 int num_delay_values = 0; 673 int my_median = 0; 674 const int kMsPerBlock = PART_LEN / (self->mult * 8); 675 float l1_norm = 0; 676 677 assert(self != NULL); 678 assert(median != NULL); 679 assert(std != NULL); 680 681 if (self->delay_logging_enabled == 0) { 682 // Logging disabled. 683 return -1; 684 } 685 686 // Get number of delay values since last update. 687 for (i = 0; i < kHistorySizeBlocks; i++) { 688 num_delay_values += self->delay_histogram[i]; 689 } 690 if (num_delay_values == 0) { 691 // We have no new delay value data. Even though -1 is a valid estimate, it 692 // will practically never be used since multiples of |kMsPerBlock| will 693 // always be returned. 694 *median = -1; 695 *std = -1; 696 return 0; 697 } 698 699 delay_values = num_delay_values >> 1; // Start value for median count down. 700 // Get median of delay values since last update. 701 for (i = 0; i < kHistorySizeBlocks; i++) { 702 delay_values -= self->delay_histogram[i]; 703 if (delay_values < 0) { 704 my_median = i; 705 break; 706 } 707 } 708 // Account for lookahead. 709 *median = (my_median - kLookaheadBlocks) * kMsPerBlock; 710 711 // Calculate the L1 norm, with median value as central moment. 712 for (i = 0; i < kHistorySizeBlocks; i++) { 713 l1_norm += (float) (fabs(i - my_median) * self->delay_histogram[i]); 714 } 715 *std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock; 716 717 // Reset histogram. 718 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); 719 720 return 0; 721} 722 723int WebRtcAec_echo_state(aec_t* self) { 724 assert(self != NULL); 725 return self->echoState; 726} 727 728void WebRtcAec_GetEchoStats(aec_t* self, Stats* erl, Stats* erle, 729 Stats* a_nlp) { 730 assert(self != NULL); 731 assert(erl != NULL); 732 assert(erle != NULL); 733 assert(a_nlp != NULL); 734 *erl = self->erl; 735 *erle = self->erle; 736 *a_nlp = self->aNlp; 737} 738 739static void ProcessBlock(aec_t* aec) { 740 int i; 741 float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; 742 float scale; 743 744 float fft[PART_LEN2]; 745 float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; 746 float df[2][PART_LEN1]; 747 float far_spectrum = 0.0f; 748 float near_spectrum = 0.0f; 749 float abs_far_spectrum[PART_LEN1]; 750 float abs_near_spectrum[PART_LEN1]; 751 752 const float gPow[2] = {0.9f, 0.1f}; 753 754 // Noise estimate constants. 755 const int noiseInitBlocks = 500 * aec->mult; 756 const float step = 0.1f; 757 const float ramp = 1.0002f; 758 const float gInitNoise[2] = {0.999f, 0.001f}; 759 760 int16_t nearend[PART_LEN]; 761 int16_t* nearend_ptr = NULL; 762 int16_t output[PART_LEN]; 763 int16_t outputH[PART_LEN]; 764 765 float* xf_ptr = NULL; 766 767 memset(dH, 0, sizeof(dH)); 768 if (aec->sampFreq == 32000) { 769 // Get the upper band first so we can reuse |nearend|. 770 WebRtc_ReadBuffer(aec->nearFrBufH, 771 (void**) &nearend_ptr, 772 nearend, 773 PART_LEN); 774 for (i = 0; i < PART_LEN; i++) { 775 dH[i] = (float) (nearend_ptr[i]); 776 } 777 memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); 778 } 779 WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN); 780 781 // ---------- Ooura fft ---------- 782 // Concatenate old and new nearend blocks. 783 for (i = 0; i < PART_LEN; i++) { 784 d[i] = (float) (nearend_ptr[i]); 785 } 786 memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); 787 788#ifdef WEBRTC_AEC_DEBUG_DUMP 789 { 790 int16_t farend[PART_LEN]; 791 int16_t* farend_ptr = NULL; 792 WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1); 793 (void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile); 794 (void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile); 795 } 796#endif 797 798 // We should always have at least one element stored in |far_buf|. 799 assert(WebRtc_available_read(aec->far_buf) > 0); 800 WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1); 801 802 // Near fft 803 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); 804 TimeToFrequency(fft, df, 0); 805 806 // Power smoothing 807 for (i = 0; i < PART_LEN1; i++) { 808 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + 809 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); 810 aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum; 811 // Calculate absolute spectra 812 abs_far_spectrum[i] = sqrtf(far_spectrum); 813 814 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; 815 aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; 816 // Calculate absolute spectra 817 abs_near_spectrum[i] = sqrtf(near_spectrum); 818 } 819 820 // Estimate noise power. Wait until dPow is more stable. 821 if (aec->noiseEstCtr > 50) { 822 for (i = 0; i < PART_LEN1; i++) { 823 if (aec->dPow[i] < aec->dMinPow[i]) { 824 aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] - 825 aec->dPow[i])) * ramp; 826 } 827 else { 828 aec->dMinPow[i] *= ramp; 829 } 830 } 831 } 832 833 // Smooth increasing noise power from zero at the start, 834 // to avoid a sudden burst of comfort noise. 835 if (aec->noiseEstCtr < noiseInitBlocks) { 836 aec->noiseEstCtr++; 837 for (i = 0; i < PART_LEN1; i++) { 838 if (aec->dMinPow[i] > aec->dInitMinPow[i]) { 839 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + 840 gInitNoise[1] * aec->dMinPow[i]; 841 } 842 else { 843 aec->dInitMinPow[i] = aec->dMinPow[i]; 844 } 845 } 846 aec->noisePow = aec->dInitMinPow; 847 } 848 else { 849 aec->noisePow = aec->dMinPow; 850 } 851 852 // Block wise delay estimation used for logging 853 if (aec->delay_logging_enabled) { 854 int delay_estimate = 0; 855 if (WebRtc_AddFarSpectrumFloat(aec->delay_estimator_farend, 856 abs_far_spectrum, PART_LEN1) == 0) { 857 delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator, 858 abs_near_spectrum, 859 PART_LEN1); 860 if (delay_estimate >= 0) { 861 // Update delay estimate buffer. 862 aec->delay_histogram[delay_estimate]++; 863 } 864 } 865 } 866 867 // Update the xfBuf block position. 868 aec->xfBufBlockPos--; 869 if (aec->xfBufBlockPos == -1) { 870 aec->xfBufBlockPos = NR_PART - 1; 871 } 872 873 // Buffer xf 874 memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr, 875 sizeof(float) * PART_LEN1); 876 memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1], 877 sizeof(float) * PART_LEN1); 878 879 memset(yf, 0, sizeof(yf)); 880 881 // Filter far 882 WebRtcAec_FilterFar(aec, yf); 883 884 // Inverse fft to obtain echo estimate and error. 885 fft[0] = yf[0][0]; 886 fft[1] = yf[0][PART_LEN]; 887 for (i = 1; i < PART_LEN; i++) { 888 fft[2 * i] = yf[0][i]; 889 fft[2 * i + 1] = yf[1][i]; 890 } 891 aec_rdft_inverse_128(fft); 892 893 scale = 2.0f / PART_LEN2; 894 for (i = 0; i < PART_LEN; i++) { 895 y[i] = fft[PART_LEN + i] * scale; // fft scaling 896 } 897 898 for (i = 0; i < PART_LEN; i++) { 899 e[i] = d[i] - y[i]; 900 } 901 902 // Error fft 903 memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); 904 memset(fft, 0, sizeof(float) * PART_LEN); 905 memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); 906 // TODO(bjornv): Change to use TimeToFrequency(). 907 aec_rdft_forward_128(fft); 908 909 ef[1][0] = 0; 910 ef[1][PART_LEN] = 0; 911 ef[0][0] = fft[0]; 912 ef[0][PART_LEN] = fft[1]; 913 for (i = 1; i < PART_LEN; i++) { 914 ef[0][i] = fft[2 * i]; 915 ef[1][i] = fft[2 * i + 1]; 916 } 917 918 if (aec->metricsMode == 1) { 919 // Note that the first PART_LEN samples in fft (before transformation) are 920 // zero. Hence, the scaling by two in UpdateLevel() should not be 921 // performed. That scaling is taken care of in UpdateMetrics() instead. 922 UpdateLevel(&aec->linoutlevel, ef); 923 } 924 925 // Scale error signal inversely with far power. 926 WebRtcAec_ScaleErrorSignal(aec, ef); 927 WebRtcAec_FilterAdaptation(aec, fft, ef); 928 NonLinearProcessing(aec, output, outputH); 929 930 if (aec->metricsMode == 1) { 931 // Update power levels and echo metrics 932 UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr); 933 UpdateLevel(&aec->nearlevel, df); 934 UpdateMetrics(aec); 935 } 936 937 // Store the output block. 938 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); 939 // For H band 940 if (aec->sampFreq == 32000) { 941 WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN); 942 } 943 944#ifdef WEBRTC_AEC_DEBUG_DUMP 945 { 946 int16_t eInt16[PART_LEN]; 947 for (i = 0; i < PART_LEN; i++) { 948 eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], 949 WEBRTC_SPL_WORD16_MIN); 950 } 951 952 (void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile); 953 (void)fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile); 954 } 955#endif 956} 957 958static void NonLinearProcessing(aec_t *aec, short *output, short *outputH) 959{ 960 float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1]; 961 complex_t comfortNoiseHband[PART_LEN1]; 962 float fft[PART_LEN2]; 963 float scale, dtmp; 964 float nlpGainHband; 965 int i, j, pos; 966 967 // Coherence and non-linear filter 968 float cohde[PART_LEN1], cohxd[PART_LEN1]; 969 float hNlDeAvg, hNlXdAvg; 970 float hNl[PART_LEN1]; 971 float hNlPref[PREF_BAND_SIZE]; 972 float hNlFb = 0, hNlFbLow = 0; 973 const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; 974 const int prefBandSize = PREF_BAND_SIZE / aec->mult; 975 const int minPrefBand = 4 / aec->mult; 976 977 // Near and error power sums 978 float sdSum = 0, seSum = 0; 979 980 // Power estimate smoothing coefficients 981 const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}}; 982 const float *ptrGCoh = gCoh[aec->mult - 1]; 983 984 // Filter energy 985 float wfEnMax = 0, wfEn = 0; 986 const int delayEstInterval = 10 * aec->mult; 987 988 float* xfw_ptr = NULL; 989 990 aec->delayEstCtr++; 991 if (aec->delayEstCtr == delayEstInterval) { 992 aec->delayEstCtr = 0; 993 } 994 995 // initialize comfort noise for H band 996 memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); 997 nlpGainHband = (float)0.0; 998 dtmp = (float)0.0; 999 1000 // Measure energy in each filter partition to determine delay. 1001 // TODO: Spread by computing one partition per block? 1002 if (aec->delayEstCtr == 0) { 1003 wfEnMax = 0; 1004 aec->delayIdx = 0; 1005 for (i = 0; i < NR_PART; i++) { 1006 pos = i * PART_LEN1; 1007 wfEn = 0; 1008 for (j = 0; j < PART_LEN1; j++) { 1009 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + 1010 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; 1011 } 1012 1013 if (wfEn > wfEnMax) { 1014 wfEnMax = wfEn; 1015 aec->delayIdx = i; 1016 } 1017 } 1018 } 1019 1020 // We should always have at least one element stored in |far_buf|. 1021 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); 1022 // NLP 1023 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1); 1024 1025 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of 1026 // |xfwBuf|. 1027 // Buffer far. 1028 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); 1029 1030 // Use delayed far. 1031 memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw)); 1032 1033 // Windowed near fft 1034 for (i = 0; i < PART_LEN; i++) { 1035 fft[i] = aec->dBuf[i] * sqrtHanning[i]; 1036 fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; 1037 } 1038 aec_rdft_forward_128(fft); 1039 1040 dfw[1][0] = 0; 1041 dfw[1][PART_LEN] = 0; 1042 dfw[0][0] = fft[0]; 1043 dfw[0][PART_LEN] = fft[1]; 1044 for (i = 1; i < PART_LEN; i++) { 1045 dfw[0][i] = fft[2 * i]; 1046 dfw[1][i] = fft[2 * i + 1]; 1047 } 1048 1049 // Windowed error fft 1050 for (i = 0; i < PART_LEN; i++) { 1051 fft[i] = aec->eBuf[i] * sqrtHanning[i]; 1052 fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; 1053 } 1054 aec_rdft_forward_128(fft); 1055 efw[1][0] = 0; 1056 efw[1][PART_LEN] = 0; 1057 efw[0][0] = fft[0]; 1058 efw[0][PART_LEN] = fft[1]; 1059 for (i = 1; i < PART_LEN; i++) { 1060 efw[0][i] = fft[2 * i]; 1061 efw[1][i] = fft[2 * i + 1]; 1062 } 1063 1064 // Smoothed PSD 1065 for (i = 0; i < PART_LEN1; i++) { 1066 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] * 1067 (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); 1068 aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] * 1069 (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); 1070 // We threshold here to protect against the ill-effects of a zero farend. 1071 // The threshold is not arbitrarily chosen, but balances protection and 1072 // adverse interaction with the algorithm's tuning. 1073 // TODO: investigate further why this is so sensitive. 1074 aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] * 1075 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15); 1076 1077 aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] * 1078 (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); 1079 aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] * 1080 (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); 1081 1082 aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] * 1083 (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); 1084 aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] * 1085 (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); 1086 1087 sdSum += aec->sd[i]; 1088 seSum += aec->se[i]; 1089 } 1090 1091 // Divergent filter safeguard. 1092 if (aec->divergeState == 0) { 1093 if (seSum > sdSum) { 1094 aec->divergeState = 1; 1095 } 1096 } 1097 else { 1098 if (seSum * 1.05f < sdSum) { 1099 aec->divergeState = 0; 1100 } 1101 } 1102 1103 if (aec->divergeState == 1) { 1104 memcpy(efw, dfw, sizeof(efw)); 1105 } 1106 1107 // Reset if error is significantly larger than nearend (13 dB). 1108 if (seSum > (19.95f * sdSum)) { 1109 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 1110 } 1111 1112 // Subband coherence 1113 for (i = 0; i < PART_LEN1; i++) { 1114 cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / 1115 (aec->sd[i] * aec->se[i] + 1e-10f); 1116 cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / 1117 (aec->sx[i] * aec->sd[i] + 1e-10f); 1118 } 1119 1120 hNlXdAvg = 0; 1121 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 1122 hNlXdAvg += cohxd[i]; 1123 } 1124 hNlXdAvg /= prefBandSize; 1125 hNlXdAvg = 1 - hNlXdAvg; 1126 1127 hNlDeAvg = 0; 1128 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 1129 hNlDeAvg += cohde[i]; 1130 } 1131 hNlDeAvg /= prefBandSize; 1132 1133 if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { 1134 aec->hNlXdAvgMin = hNlXdAvg; 1135 } 1136 1137 if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { 1138 aec->stNearState = 1; 1139 } 1140 else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { 1141 aec->stNearState = 0; 1142 } 1143 1144 if (aec->hNlXdAvgMin == 1) { 1145 aec->echoState = 0; 1146 aec->overDrive = kMinOverDrive[aec->nlp_mode]; 1147 1148 if (aec->stNearState == 1) { 1149 memcpy(hNl, cohde, sizeof(hNl)); 1150 hNlFb = hNlDeAvg; 1151 hNlFbLow = hNlDeAvg; 1152 } 1153 else { 1154 for (i = 0; i < PART_LEN1; i++) { 1155 hNl[i] = 1 - cohxd[i]; 1156 } 1157 hNlFb = hNlXdAvg; 1158 hNlFbLow = hNlXdAvg; 1159 } 1160 } 1161 else { 1162 1163 if (aec->stNearState == 1) { 1164 aec->echoState = 0; 1165 memcpy(hNl, cohde, sizeof(hNl)); 1166 hNlFb = hNlDeAvg; 1167 hNlFbLow = hNlDeAvg; 1168 } 1169 else { 1170 aec->echoState = 1; 1171 for (i = 0; i < PART_LEN1; i++) { 1172 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); 1173 } 1174 1175 // Select an order statistic from the preferred bands. 1176 // TODO: Using quicksort now, but a selection algorithm may be preferred. 1177 memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); 1178 qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); 1179 hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; 1180 hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; 1181 } 1182 } 1183 1184 // Track the local filter minimum to determine suppression overdrive. 1185 if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { 1186 aec->hNlFbLocalMin = hNlFbLow; 1187 aec->hNlFbMin = hNlFbLow; 1188 aec->hNlNewMin = 1; 1189 aec->hNlMinCtr = 0; 1190 } 1191 aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); 1192 aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); 1193 1194 if (aec->hNlNewMin == 1) { 1195 aec->hNlMinCtr++; 1196 } 1197 if (aec->hNlMinCtr == 2) { 1198 aec->hNlNewMin = 0; 1199 aec->hNlMinCtr = 0; 1200 aec->overDrive = WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / 1201 ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), 1202 kMinOverDrive[aec->nlp_mode]); 1203 } 1204 1205 // Smooth the overdrive. 1206 if (aec->overDrive < aec->overDriveSm) { 1207 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; 1208 } 1209 else { 1210 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; 1211 } 1212 1213 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); 1214 1215 // Add comfort noise. 1216 ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); 1217 1218 // TODO(bjornv): Investigate how to take the windowing below into account if 1219 // needed. 1220 if (aec->metricsMode == 1) { 1221 // Note that we have a scaling by two in the time domain |eBuf|. 1222 // In addition the time domain signal is windowed before transformation, 1223 // losing half the energy on the average. We take care of the first 1224 // scaling only in UpdateMetrics(). 1225 UpdateLevel(&aec->nlpoutlevel, efw); 1226 } 1227 // Inverse error fft. 1228 fft[0] = efw[0][0]; 1229 fft[1] = efw[0][PART_LEN]; 1230 for (i = 1; i < PART_LEN; i++) { 1231 fft[2*i] = efw[0][i]; 1232 // Sign change required by Ooura fft. 1233 fft[2*i + 1] = -efw[1][i]; 1234 } 1235 aec_rdft_inverse_128(fft); 1236 1237 // Overlap and add to obtain output. 1238 scale = 2.0f / PART_LEN2; 1239 for (i = 0; i < PART_LEN; i++) { 1240 fft[i] *= scale; // fft scaling 1241 fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i]; 1242 1243 // Saturation protection 1244 output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i], 1245 WEBRTC_SPL_WORD16_MIN); 1246 1247 fft[PART_LEN + i] *= scale; // fft scaling 1248 aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; 1249 } 1250 1251 // For H band 1252 if (aec->sampFreq == 32000) { 1253 1254 // H band gain 1255 // average nlp over low band: average over second half of freq spectrum 1256 // (4->8khz) 1257 GetHighbandGain(hNl, &nlpGainHband); 1258 1259 // Inverse comfort_noise 1260 if (flagHbandCn == 1) { 1261 fft[0] = comfortNoiseHband[0][0]; 1262 fft[1] = comfortNoiseHband[PART_LEN][0]; 1263 for (i = 1; i < PART_LEN; i++) { 1264 fft[2*i] = comfortNoiseHband[i][0]; 1265 fft[2*i + 1] = comfortNoiseHband[i][1]; 1266 } 1267 aec_rdft_inverse_128(fft); 1268 scale = 2.0f / PART_LEN2; 1269 } 1270 1271 // compute gain factor 1272 for (i = 0; i < PART_LEN; i++) { 1273 dtmp = (float)aec->dBufH[i]; 1274 dtmp = (float)dtmp * nlpGainHband; // for variable gain 1275 1276 // add some comfort noise where Hband is attenuated 1277 if (flagHbandCn == 1) { 1278 fft[i] *= scale; // fft scaling 1279 dtmp += cnScaleHband * fft[i]; 1280 } 1281 1282 // Saturation protection 1283 outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp, 1284 WEBRTC_SPL_WORD16_MIN); 1285 } 1286 } 1287 1288 // Copy the current block to the old position. 1289 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); 1290 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); 1291 1292 // Copy the current block to the old position for H band 1293 if (aec->sampFreq == 32000) { 1294 memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN); 1295 } 1296 1297 memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) - 1298 sizeof(complex_t) * PART_LEN1); 1299} 1300 1301static void GetHighbandGain(const float *lambda, float *nlpGainHband) 1302{ 1303 int i; 1304 1305 nlpGainHband[0] = (float)0.0; 1306 for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { 1307 nlpGainHband[0] += lambda[i]; 1308 } 1309 nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); 1310} 1311 1312static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], 1313 complex_t *comfortNoiseHband, const float *noisePow, const float *lambda) 1314{ 1315 int i, num; 1316 float rand[PART_LEN]; 1317 float noise, noiseAvg, tmp, tmpAvg; 1318 WebRtc_Word16 randW16[PART_LEN]; 1319 complex_t u[PART_LEN1]; 1320 1321 const float pi2 = 6.28318530717959f; 1322 1323 // Generate a uniform random array on [0 1] 1324 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); 1325 for (i = 0; i < PART_LEN; i++) { 1326 rand[i] = ((float)randW16[i]) / 32768; 1327 } 1328 1329 // Reject LF noise 1330 u[0][0] = 0; 1331 u[0][1] = 0; 1332 for (i = 1; i < PART_LEN1; i++) { 1333 tmp = pi2 * rand[i - 1]; 1334 1335 noise = sqrtf(noisePow[i]); 1336 u[i][0] = noise * cosf(tmp); 1337 u[i][1] = -noise * sinf(tmp); 1338 } 1339 u[PART_LEN][1] = 0; 1340 1341 for (i = 0; i < PART_LEN1; i++) { 1342 // This is the proper weighting to match the background noise power 1343 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 1344 //tmp = 1 - lambda[i]; 1345 efw[0][i] += tmp * u[i][0]; 1346 efw[1][i] += tmp * u[i][1]; 1347 } 1348 1349 // For H band comfort noise 1350 // TODO: don't compute noise and "tmp" twice. Use the previous results. 1351 noiseAvg = 0.0; 1352 tmpAvg = 0.0; 1353 num = 0; 1354 if (aec->sampFreq == 32000 && flagHbandCn == 1) { 1355 1356 // average noise scale 1357 // average over second half of freq spectrum (i.e., 4->8khz) 1358 // TODO: we shouldn't need num. We know how many elements we're summing. 1359 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 1360 num++; 1361 noiseAvg += sqrtf(noisePow[i]); 1362 } 1363 noiseAvg /= (float)num; 1364 1365 // average nlp scale 1366 // average over second half of freq spectrum (i.e., 4->8khz) 1367 // TODO: we shouldn't need num. We know how many elements we're summing. 1368 num = 0; 1369 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 1370 num++; 1371 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 1372 } 1373 tmpAvg /= (float)num; 1374 1375 // Use average noise for H band 1376 // TODO: we should probably have a new random vector here. 1377 // Reject LF noise 1378 u[0][0] = 0; 1379 u[0][1] = 0; 1380 for (i = 1; i < PART_LEN1; i++) { 1381 tmp = pi2 * rand[i - 1]; 1382 1383 // Use average noise for H band 1384 u[i][0] = noiseAvg * (float)cos(tmp); 1385 u[i][1] = -noiseAvg * (float)sin(tmp); 1386 } 1387 u[PART_LEN][1] = 0; 1388 1389 for (i = 0; i < PART_LEN1; i++) { 1390 // Use average NLP weight for H band 1391 comfortNoiseHband[i][0] = tmpAvg * u[i][0]; 1392 comfortNoiseHband[i][1] = tmpAvg * u[i][1]; 1393 } 1394 } 1395} 1396 1397static void WebRtcAec_InitLevel(power_level_t *level) 1398{ 1399 const float bigFloat = 1E17f; 1400 1401 level->averagelevel = 0; 1402 level->framelevel = 0; 1403 level->minlevel = bigFloat; 1404 level->frsum = 0; 1405 level->sfrsum = 0; 1406 level->frcounter = 0; 1407 level->sfrcounter = 0; 1408} 1409 1410static void InitStats(Stats* stats) { 1411 stats->instant = offsetLevel; 1412 stats->average = offsetLevel; 1413 stats->max = offsetLevel; 1414 stats->min = offsetLevel * (-1); 1415 stats->sum = 0; 1416 stats->hisum = 0; 1417 stats->himean = offsetLevel; 1418 stats->counter = 0; 1419 stats->hicounter = 0; 1420} 1421 1422static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) { 1423 // Do the energy calculation in the frequency domain. The FFT is performed on 1424 // a segment of PART_LEN2 samples due to overlap, but we only want the energy 1425 // of half that data (the last PART_LEN samples). Parseval's relation states 1426 // that the energy is preserved according to 1427 // 1428 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 1429 // = ENERGY, 1430 // 1431 // where N = PART_LEN2. Since we are only interested in calculating the energy 1432 // for the last PART_LEN samples we approximate by calculating ENERGY and 1433 // divide by 2, 1434 // 1435 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 1436 // 1437 // Since we deal with real valued time domain signals we only store frequency 1438 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we 1439 // need to add the contribution from the missing part in 1440 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical 1441 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This 1442 // is the values in the for loop below, but multiplication by 2 and division 1443 // by 2 cancel. 1444 1445 // TODO(bjornv): Investigate reusing energy calculations performed at other 1446 // places in the code. 1447 int k = 1; 1448 // Imaginary parts are zero at end points and left out of the calculation. 1449 float energy = (in[0][0] * in[0][0]) / 2; 1450 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; 1451 1452 for (k = 1; k < PART_LEN; k++) { 1453 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); 1454 } 1455 energy /= PART_LEN2; 1456 1457 level->sfrsum += energy; 1458 level->sfrcounter++; 1459 1460 if (level->sfrcounter > subCountLen) { 1461 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); 1462 level->sfrsum = 0; 1463 level->sfrcounter = 0; 1464 if (level->framelevel > 0) { 1465 if (level->framelevel < level->minlevel) { 1466 level->minlevel = level->framelevel; // New minimum. 1467 } else { 1468 level->minlevel *= (1 + 0.001f); // Small increase. 1469 } 1470 } 1471 level->frcounter++; 1472 level->frsum += level->framelevel; 1473 if (level->frcounter > countLen) { 1474 level->averagelevel = level->frsum / countLen; 1475 level->frsum = 0; 1476 level->frcounter = 0; 1477 } 1478 } 1479} 1480 1481static void UpdateMetrics(aec_t *aec) 1482{ 1483 float dtmp, dtmp2; 1484 1485 const float actThresholdNoisy = 8.0f; 1486 const float actThresholdClean = 40.0f; 1487 const float safety = 0.99995f; 1488 const float noisyPower = 300000.0f; 1489 1490 float actThreshold; 1491 float echo, suppressedEcho; 1492 1493 if (aec->echoState) { // Check if echo is likely present 1494 aec->stateCounter++; 1495 } 1496 1497 if (aec->farlevel.frcounter == 0) { 1498 1499 if (aec->farlevel.minlevel < noisyPower) { 1500 actThreshold = actThresholdClean; 1501 } 1502 else { 1503 actThreshold = actThresholdNoisy; 1504 } 1505 1506 if ((aec->stateCounter > (0.5f * countLen * subCountLen)) 1507 && (aec->farlevel.sfrcounter == 0) 1508 1509 // Estimate in active far-end segments only 1510 && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel)) 1511 ) { 1512 1513 // Subtract noise power 1514 echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; 1515 1516 // ERL 1517 dtmp = 10 * (float)log10(aec->farlevel.averagelevel / 1518 aec->nearlevel.averagelevel + 1e-10f); 1519 dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); 1520 1521 aec->erl.instant = dtmp; 1522 if (dtmp > aec->erl.max) { 1523 aec->erl.max = dtmp; 1524 } 1525 1526 if (dtmp < aec->erl.min) { 1527 aec->erl.min = dtmp; 1528 } 1529 1530 aec->erl.counter++; 1531 aec->erl.sum += dtmp; 1532 aec->erl.average = aec->erl.sum / aec->erl.counter; 1533 1534 // Upper mean 1535 if (dtmp > aec->erl.average) { 1536 aec->erl.hicounter++; 1537 aec->erl.hisum += dtmp; 1538 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; 1539 } 1540 1541 // A_NLP 1542 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 1543 (2 * aec->linoutlevel.averagelevel) + 1e-10f); 1544 1545 // subtract noise power 1546 suppressedEcho = 2 * (aec->linoutlevel.averagelevel - 1547 safety * aec->linoutlevel.minlevel); 1548 1549 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 1550 1551 aec->aNlp.instant = dtmp2; 1552 if (dtmp > aec->aNlp.max) { 1553 aec->aNlp.max = dtmp; 1554 } 1555 1556 if (dtmp < aec->aNlp.min) { 1557 aec->aNlp.min = dtmp; 1558 } 1559 1560 aec->aNlp.counter++; 1561 aec->aNlp.sum += dtmp; 1562 aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; 1563 1564 // Upper mean 1565 if (dtmp > aec->aNlp.average) { 1566 aec->aNlp.hicounter++; 1567 aec->aNlp.hisum += dtmp; 1568 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; 1569 } 1570 1571 // ERLE 1572 1573 // subtract noise power 1574 suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - 1575 safety * aec->nlpoutlevel.minlevel); 1576 1577 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 1578 (2 * aec->nlpoutlevel.averagelevel) + 1e-10f); 1579 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 1580 1581 dtmp = dtmp2; 1582 aec->erle.instant = dtmp; 1583 if (dtmp > aec->erle.max) { 1584 aec->erle.max = dtmp; 1585 } 1586 1587 if (dtmp < aec->erle.min) { 1588 aec->erle.min = dtmp; 1589 } 1590 1591 aec->erle.counter++; 1592 aec->erle.sum += dtmp; 1593 aec->erle.average = aec->erle.sum / aec->erle.counter; 1594 1595 // Upper mean 1596 if (dtmp > aec->erle.average) { 1597 aec->erle.hicounter++; 1598 aec->erle.hisum += dtmp; 1599 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; 1600 } 1601 } 1602 1603 aec->stateCounter = 0; 1604 } 1605} 1606 1607static void TimeToFrequency(float time_data[PART_LEN2], 1608 float freq_data[2][PART_LEN1], 1609 int window) { 1610 int i = 0; 1611 1612 // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? 1613 if (window) { 1614 for (i = 0; i < PART_LEN; i++) { 1615 time_data[i] *= sqrtHanning[i]; 1616 time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i]; 1617 } 1618 } 1619 1620 aec_rdft_forward_128(time_data); 1621 // Reorder. 1622 freq_data[1][0] = 0; 1623 freq_data[1][PART_LEN] = 0; 1624 freq_data[0][0] = time_data[0]; 1625 freq_data[0][PART_LEN] = time_data[1]; 1626 for (i = 1; i < PART_LEN; i++) { 1627 freq_data[0][i] = time_data[2 * i]; 1628 freq_data[1][i] = time_data[2 * i + 1]; 1629 } 1630} 1631