1/* Copyright (C) 2003-2008 Jean-Marc Valin 2 3 File: mdf.c 4 Echo canceller based on the MDF algorithm (see below) 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are 8 met: 9 10 1. Redistributions of source code must retain the above copyright notice, 11 this list of conditions and the following disclaimer. 12 13 2. Redistributions in binary form must reproduce the above copyright 14 notice, this list of conditions and the following disclaimer in the 15 documentation and/or other materials provided with the distribution. 16 17 3. The name of the author may not be used to endorse or promote products 18 derived from this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 24 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31*/ 32 33/* 34 The echo canceller is based on the MDF algorithm described in: 35 36 J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 37 IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 38 February 1990. 39 40 We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 41 double-talk is achieved using a variable learning rate as described in: 42 43 Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 44 Cancellation With Double-Talk. IEEE Transactions on Audio, 45 Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007. 46 http://people.xiph.org/~jm/papers/valin_taslp2006.pdf 47 48 There is no explicit double-talk detection, but a continuous variation 49 in the learning rate based on residual echo, double-talk and background 50 noise. 51 52 About the fixed-point version: 53 All the signals are represented with 16-bit words. The filter weights 54 are represented with 32-bit words, but only the top 16 bits are used 55 in most cases. The lower 16 bits are completely unreliable (due to the 56 fact that the update is done only on the top bits), but help in the 57 adaptation -- probably by removing a "threshold effect" due to 58 quantization (rounding going to zero) when the gradient is small. 59 60 Another kludge that seems to work good: when performing the weight 61 update, we only move half the way toward the "goal" this seems to 62 reduce the effect of quantization noise in the update phase. This 63 can be seen as applying a gradient descent on a "soft constraint" 64 instead of having a hard constraint. 65 66*/ 67 68#ifdef HAVE_CONFIG_H 69#include "config.h" 70#endif 71 72#include "arch.h" 73#include "speex/speex_echo.h" 74#include "fftwrap.h" 75#include "pseudofloat.h" 76#include "math_approx.h" 77#include "os_support.h" 78 79#ifndef M_PI 80#define M_PI 3.14159265358979323846 81#endif 82 83#ifdef FIXED_POINT 84#define WEIGHT_SHIFT 11 85#define NORMALIZE_SCALEDOWN 5 86#define NORMALIZE_SCALEUP 3 87#else 88#define WEIGHT_SHIFT 0 89#endif 90 91#ifdef FIXED_POINT 92#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x))) 93#else 94#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x)))) 95#endif 96 97/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk 98 and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */ 99#define TWO_PATH 100 101#ifdef FIXED_POINT 102static const spx_float_t MIN_LEAK = {20972, -22}; 103 104/* Constants for the two-path filter */ 105static const spx_float_t VAR1_SMOOTH = {23593, -16}; 106static const spx_float_t VAR2_SMOOTH = {23675, -15}; 107static const spx_float_t VAR1_UPDATE = {16384, -15}; 108static const spx_float_t VAR2_UPDATE = {16384, -16}; 109static const spx_float_t VAR_BACKTRACK = {16384, -12}; 110#define TOP16(x) ((x)>>16) 111 112#else 113 114static const spx_float_t MIN_LEAK = .005f; 115 116/* Constants for the two-path filter */ 117static const spx_float_t VAR1_SMOOTH = .36f; 118static const spx_float_t VAR2_SMOOTH = .7225f; 119static const spx_float_t VAR1_UPDATE = .5f; 120static const spx_float_t VAR2_UPDATE = .25f; 121static const spx_float_t VAR_BACKTRACK = 4.f; 122#define TOP16(x) (x) 123#endif 124 125 126#define PLAYBACK_DELAY 2 127 128void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len); 129 130 131/** Speex echo cancellation state. */ 132struct SpeexEchoState_ { 133 int frame_size; /**< Number of samples processed each time */ 134 int window_size; 135 int M; 136 int cancel_count; 137 int adapted; 138 int saturated; 139 int screwed_up; 140 int C; /** Number of input channels (microphones) */ 141 int K; /** Number of output channels (loudspeakers) */ 142 spx_int32_t sampling_rate; 143 spx_word16_t spec_average; 144 spx_word16_t beta0; 145 spx_word16_t beta_max; 146 spx_word32_t sum_adapt; 147 spx_word16_t leak_estimate; 148 149 spx_word16_t *e; /* scratch */ 150 spx_word16_t *x; /* Far-end input buffer (2N) */ 151 spx_word16_t *X; /* Far-end buffer (M+1 frames) in frequency domain */ 152 spx_word16_t *input; /* scratch */ 153 spx_word16_t *y; /* scratch */ 154 spx_word16_t *last_y; 155 spx_word16_t *Y; /* scratch */ 156 spx_word16_t *E; 157 spx_word32_t *PHI; /* scratch */ 158 spx_word32_t *W; /* (Background) filter weights */ 159#ifdef TWO_PATH 160 spx_word16_t *foreground; /* Foreground filter weights */ 161 spx_word32_t Davg1; /* 1st recursive average of the residual power difference */ 162 spx_word32_t Davg2; /* 2nd recursive average of the residual power difference */ 163 spx_float_t Dvar1; /* Estimated variance of 1st estimator */ 164 spx_float_t Dvar2; /* Estimated variance of 2nd estimator */ 165#endif 166 spx_word32_t *power; /* Power of the far-end signal */ 167 spx_float_t *power_1;/* Inverse power of far-end */ 168 spx_word16_t *wtmp; /* scratch */ 169#ifdef FIXED_POINT 170 spx_word16_t *wtmp2; /* scratch */ 171#endif 172 spx_word32_t *Rf; /* scratch */ 173 spx_word32_t *Yf; /* scratch */ 174 spx_word32_t *Xf; /* scratch */ 175 spx_word32_t *Eh; 176 spx_word32_t *Yh; 177 spx_float_t Pey; 178 spx_float_t Pyy; 179 spx_word16_t *window; 180 spx_word16_t *prop; 181 void *fft_table; 182 spx_word16_t *memX, *memD, *memE; 183 spx_word16_t preemph; 184 spx_word16_t notch_radius; 185 spx_mem_t *notch_mem; 186 187 /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */ 188 spx_int16_t *play_buf; 189 int play_buf_pos; 190 int play_buf_started; 191}; 192 193static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride) 194{ 195 int i; 196 spx_word16_t den2; 197#ifdef FIXED_POINT 198 den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius)); 199#else 200 den2 = radius*radius + .7*(1-radius)*(1-radius); 201#endif 202 /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/ 203 for (i=0;i<len;i++) 204 { 205 spx_word16_t vin = in[i*stride]; 206 spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15); 207#ifdef FIXED_POINT 208 mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1); 209#else 210 mem[0] = mem[1] + 2*(-vin + radius*vout); 211#endif 212 mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout); 213 out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767); 214 } 215} 216 217/* This inner product is slightly different from the codec version because of fixed-point */ 218static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len) 219{ 220 spx_word32_t sum=0; 221 len >>= 1; 222 while(len--) 223 { 224 spx_word32_t part=0; 225 part = MAC16_16(part,*x++,*y++); 226 part = MAC16_16(part,*x++,*y++); 227 /* HINT: If you had a 40-bit accumulator, you could shift only at the end */ 228 sum = ADD32(sum,SHR32(part,6)); 229 } 230 return sum; 231} 232 233/** Compute power spectrum of a half-complex (packed) vector */ 234static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N) 235{ 236 int i, j; 237 ps[0]=MULT16_16(X[0],X[0]); 238 for (i=1,j=1;i<N-1;i+=2,j++) 239 { 240 ps[j] = MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 241 } 242 ps[j]=MULT16_16(X[i],X[i]); 243} 244 245/** Compute power spectrum of a half-complex (packed) vector and accumulate */ 246static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N) 247{ 248 int i, j; 249 ps[0]+=MULT16_16(X[0],X[0]); 250 for (i=1,j=1;i<N-1;i+=2,j++) 251 { 252 ps[j] += MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]); 253 } 254 ps[j]+=MULT16_16(X[i],X[i]); 255} 256 257/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */ 258#ifdef FIXED_POINT 259static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 260{ 261 int i,j; 262 spx_word32_t tmp1=0,tmp2=0; 263 for (j=0;j<M;j++) 264 { 265 tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N])); 266 } 267 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); 268 for (i=1;i<N-1;i+=2) 269 { 270 tmp1 = tmp2 = 0; 271 for (j=0;j<M;j++) 272 { 273 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1]))); 274 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1])); 275 } 276 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); 277 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); 278 } 279 tmp1 = tmp2 = 0; 280 for (j=0;j<M;j++) 281 { 282 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1])); 283 } 284 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); 285} 286static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M) 287{ 288 int i,j; 289 spx_word32_t tmp1=0,tmp2=0; 290 for (j=0;j<M;j++) 291 { 292 tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]); 293 } 294 acc[0] = PSHR32(tmp1,WEIGHT_SHIFT); 295 for (i=1;i<N-1;i+=2) 296 { 297 tmp1 = tmp2 = 0; 298 for (j=0;j<M;j++) 299 { 300 tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1])); 301 tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]); 302 } 303 acc[i] = PSHR32(tmp1,WEIGHT_SHIFT); 304 acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT); 305 } 306 tmp1 = tmp2 = 0; 307 for (j=0;j<M;j++) 308 { 309 tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]); 310 } 311 acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT); 312} 313 314#else 315static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M) 316{ 317 int i,j; 318 for (i=0;i<N;i++) 319 acc[i] = 0; 320 for (j=0;j<M;j++) 321 { 322 acc[0] += X[0]*Y[0]; 323 for (i=1;i<N-1;i+=2) 324 { 325 acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]); 326 acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]); 327 } 328 acc[i] += X[i]*Y[i]; 329 X += N; 330 Y += N; 331 } 332} 333#define spectral_mul_accum16 spectral_mul_accum 334#endif 335 336/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */ 337static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N) 338{ 339 int i, j; 340 spx_float_t W; 341 W = FLOAT_AMULT(p, w[0]); 342 prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0])); 343 for (i=1,j=1;i<N-1;i+=2,j++) 344 { 345 W = FLOAT_AMULT(p, w[j]); 346 prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1])); 347 prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1])); 348 } 349 W = FLOAT_AMULT(p, w[j]); 350 prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i])); 351} 352 353static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop) 354{ 355 int i, j, p; 356 spx_word16_t max_sum = 1; 357 spx_word32_t prop_sum = 1; 358 for (i=0;i<M;i++) 359 { 360 spx_word32_t tmp = 1; 361 for (p=0;p<P;p++) 362 for (j=0;j<N;j++) 363 tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18))); 364#ifdef FIXED_POINT 365 /* Just a security in case an overflow were to occur */ 366 tmp = MIN32(ABS32(tmp), 536870912); 367#endif 368 prop[i] = spx_sqrt(tmp); 369 if (prop[i] > max_sum) 370 max_sum = prop[i]; 371 } 372 for (i=0;i<M;i++) 373 { 374 prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum); 375 prop_sum += EXTEND32(prop[i]); 376 } 377 for (i=0;i<M;i++) 378 { 379 prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum); 380 /*printf ("%f ", prop[i]);*/ 381 } 382 /*printf ("\n");*/ 383} 384 385#ifdef DUMP_ECHO_CANCEL_DATA 386#include <stdio.h> 387static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL; 388 389static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len) 390{ 391 if (!(rFile && pFile && oFile)) 392 { 393 speex_fatal("Dump files not open"); 394 } 395 fwrite(rec, sizeof(spx_int16_t), len, rFile); 396 fwrite(play, sizeof(spx_int16_t), len, pFile); 397 fwrite(out, sizeof(spx_int16_t), len, oFile); 398} 399#endif 400 401/** Creates a new echo canceller state */ 402EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length) 403{ 404 return speex_echo_state_init_mc(frame_size, filter_length, 1, 1); 405} 406 407EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers) 408{ 409 int i,N,M, C, K; 410 SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState)); 411 412 st->K = nb_speakers; 413 st->C = nb_mic; 414 C=st->C; 415 K=st->K; 416#ifdef DUMP_ECHO_CANCEL_DATA 417 if (rFile || pFile || oFile) 418 speex_fatal("Opening dump files twice"); 419 rFile = fopen("aec_rec.sw", "wb"); 420 pFile = fopen("aec_play.sw", "wb"); 421 oFile = fopen("aec_out.sw", "wb"); 422#endif 423 424 st->frame_size = frame_size; 425 st->window_size = 2*frame_size; 426 N = st->window_size; 427 M = st->M = (filter_length+st->frame_size-1)/frame_size; 428 st->cancel_count=0; 429 st->sum_adapt = 0; 430 st->saturated = 0; 431 st->screwed_up = 0; 432 /* This is the default sampling rate */ 433 st->sampling_rate = 8000; 434 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); 435#ifdef FIXED_POINT 436 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); 437 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); 438#else 439 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; 440 st->beta_max = (.5f*st->frame_size)/st->sampling_rate; 441#endif 442 st->leak_estimate = 0; 443 444 st->fft_table = spx_fft_init(N); 445 446 st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 447 st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t)); 448 st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t)); 449 st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 450 st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 451 st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 452 st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 453 st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 454 st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 455 st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t)); 456 457 st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t)); 458 st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 459 st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t)); 460 st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t)); 461#ifdef TWO_PATH 462 st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t)); 463#endif 464 st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t)); 465 st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t)); 466 st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t)); 467 st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 468 st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t)); 469 st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 470#ifdef FIXED_POINT 471 st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t)); 472 for (i=0;i<N>>1;i++) 473 { 474 st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1)); 475 st->window[N-i-1] = st->window[i]; 476 } 477#else 478 for (i=0;i<N;i++) 479 st->window[i] = .5-.5*cos(2*M_PI*i/N); 480#endif 481 for (i=0;i<=st->frame_size;i++) 482 st->power_1[i] = FLOAT_ONE; 483 for (i=0;i<N*M*K*C;i++) 484 st->W[i] = 0; 485 { 486 spx_word32_t sum = 0; 487 /* Ratio of ~10 between adaptation rate of first and last block */ 488 spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1); 489 st->prop[0] = QCONST16(.7, 15); 490 sum = EXTEND32(st->prop[0]); 491 for (i=1;i<M;i++) 492 { 493 st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay); 494 sum = ADD32(sum, EXTEND32(st->prop[i])); 495 } 496 for (i=M-1;i>=0;i--) 497 { 498 st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum); 499 } 500 } 501 502 st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t)); 503 st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 504 st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t)); 505 st->preemph = QCONST16(.9,15); 506 if (st->sampling_rate<12000) 507 st->notch_radius = QCONST16(.9, 15); 508 else if (st->sampling_rate<24000) 509 st->notch_radius = QCONST16(.982, 15); 510 else 511 st->notch_radius = QCONST16(.992, 15); 512 513 st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t)); 514 st->adapted = 0; 515 st->Pey = st->Pyy = FLOAT_ONE; 516 517#ifdef TWO_PATH 518 st->Davg1 = st->Davg2 = 0; 519 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 520#endif 521 522 st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t)); 523 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 524 st->play_buf_started = 0; 525 526 return st; 527} 528 529/** Resets echo canceller state */ 530EXPORT void speex_echo_state_reset(SpeexEchoState *st) 531{ 532 int i, M, N, C, K; 533 st->cancel_count=0; 534 st->screwed_up = 0; 535 N = st->window_size; 536 M = st->M; 537 C=st->C; 538 K=st->K; 539 for (i=0;i<N*M;i++) 540 st->W[i] = 0; 541#ifdef TWO_PATH 542 for (i=0;i<N*M;i++) 543 st->foreground[i] = 0; 544#endif 545 for (i=0;i<N*(M+1);i++) 546 st->X[i] = 0; 547 for (i=0;i<=st->frame_size;i++) 548 { 549 st->power[i] = 0; 550 st->power_1[i] = FLOAT_ONE; 551 st->Eh[i] = 0; 552 st->Yh[i] = 0; 553 } 554 for (i=0;i<st->frame_size;i++) 555 { 556 st->last_y[i] = 0; 557 } 558 for (i=0;i<N*C;i++) 559 { 560 st->E[i] = 0; 561 } 562 for (i=0;i<N*K;i++) 563 { 564 st->x[i] = 0; 565 } 566 for (i=0;i<2*C;i++) 567 st->notch_mem[i] = 0; 568 for (i=0;i<C;i++) 569 st->memD[i]=st->memE[i]=0; 570 for (i=0;i<K;i++) 571 st->memX[i]=0; 572 573 st->saturated = 0; 574 st->adapted = 0; 575 st->sum_adapt = 0; 576 st->Pey = st->Pyy = FLOAT_ONE; 577#ifdef TWO_PATH 578 st->Davg1 = st->Davg2 = 0; 579 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 580#endif 581 for (i=0;i<3*st->frame_size;i++) 582 st->play_buf[i] = 0; 583 st->play_buf_pos = PLAYBACK_DELAY*st->frame_size; 584 st->play_buf_started = 0; 585 586} 587 588/** Destroys an echo canceller state */ 589EXPORT void speex_echo_state_destroy(SpeexEchoState *st) 590{ 591 spx_fft_destroy(st->fft_table); 592 593 speex_free(st->e); 594 speex_free(st->x); 595 speex_free(st->input); 596 speex_free(st->y); 597 speex_free(st->last_y); 598 speex_free(st->Yf); 599 speex_free(st->Rf); 600 speex_free(st->Xf); 601 speex_free(st->Yh); 602 speex_free(st->Eh); 603 604 speex_free(st->X); 605 speex_free(st->Y); 606 speex_free(st->E); 607 speex_free(st->W); 608#ifdef TWO_PATH 609 speex_free(st->foreground); 610#endif 611 speex_free(st->PHI); 612 speex_free(st->power); 613 speex_free(st->power_1); 614 speex_free(st->window); 615 speex_free(st->prop); 616 speex_free(st->wtmp); 617#ifdef FIXED_POINT 618 speex_free(st->wtmp2); 619#endif 620 speex_free(st->memX); 621 speex_free(st->memD); 622 speex_free(st->memE); 623 speex_free(st->notch_mem); 624 625 speex_free(st->play_buf); 626 speex_free(st); 627 628#ifdef DUMP_ECHO_CANCEL_DATA 629 fclose(rFile); 630 fclose(pFile); 631 fclose(oFile); 632 rFile = pFile = oFile = NULL; 633#endif 634} 635 636EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out) 637{ 638 int i; 639 /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/ 640 st->play_buf_started = 1; 641 if (st->play_buf_pos>=st->frame_size) 642 { 643 speex_echo_cancellation(st, rec, st->play_buf, out); 644 st->play_buf_pos -= st->frame_size; 645 for (i=0;i<st->play_buf_pos;i++) 646 st->play_buf[i] = st->play_buf[i+st->frame_size]; 647 } else { 648 speex_warning("No playback frame available (your application is buggy and/or got xruns)"); 649 if (st->play_buf_pos!=0) 650 { 651 speex_warning("internal playback buffer corruption?"); 652 st->play_buf_pos = 0; 653 } 654 for (i=0;i<st->frame_size;i++) 655 out[i] = rec[i]; 656 } 657} 658 659EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play) 660{ 661 /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/ 662 if (!st->play_buf_started) 663 { 664 speex_warning("discarded first playback frame"); 665 return; 666 } 667 if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size) 668 { 669 int i; 670 for (i=0;i<st->frame_size;i++) 671 st->play_buf[st->play_buf_pos+i] = play[i]; 672 st->play_buf_pos += st->frame_size; 673 if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size) 674 { 675 speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)"); 676 for (i=0;i<st->frame_size;i++) 677 st->play_buf[st->play_buf_pos+i] = play[i]; 678 st->play_buf_pos += st->frame_size; 679 } 680 } else { 681 speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)"); 682 } 683} 684 685/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */ 686EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout) 687{ 688 speex_echo_cancellation(st, in, far_end, out); 689} 690 691/** Performs echo cancellation on a frame */ 692EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out) 693{ 694 int i,j, chan, speak; 695 int N,M, C, K; 696 spx_word32_t Syy,See,Sxx,Sdd, Sff; 697#ifdef TWO_PATH 698 spx_word32_t Dbf; 699 int update_foreground; 700#endif 701 spx_word32_t Sey; 702 spx_word16_t ss, ss_1; 703 spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE; 704 spx_float_t alpha, alpha_1; 705 spx_word16_t RER; 706 spx_word32_t tmp32; 707 708 N = st->window_size; 709 M = st->M; 710 C = st->C; 711 K = st->K; 712 713 st->cancel_count++; 714#ifdef FIXED_POINT 715 ss=DIV32_16(11469,M); 716 ss_1 = SUB16(32767,ss); 717#else 718 ss=.35/M; 719 ss_1 = 1-ss; 720#endif 721 722 for (chan = 0; chan < C; chan++) 723 { 724 /* Apply a notch filter to make sure DC doesn't end up causing problems */ 725 filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C); 726 /* Copy input data to buffer and apply pre-emphasis */ 727 /* Copy input data to buffer */ 728 for (i=0;i<st->frame_size;i++) 729 { 730 spx_word32_t tmp32; 731 /* FIXME: This core has changed a bit, need to merge properly */ 732 tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan]))); 733#ifdef FIXED_POINT 734 if (tmp32 > 32767) 735 { 736 tmp32 = 32767; 737 if (st->saturated == 0) 738 st->saturated = 1; 739 } 740 if (tmp32 < -32767) 741 { 742 tmp32 = -32767; 743 if (st->saturated == 0) 744 st->saturated = 1; 745 } 746#endif 747 st->memD[chan] = st->input[chan*st->frame_size+i]; 748 st->input[chan*st->frame_size+i] = EXTRACT16(tmp32); 749 } 750 } 751 752 for (speak = 0; speak < K; speak++) 753 { 754 for (i=0;i<st->frame_size;i++) 755 { 756 spx_word32_t tmp32; 757 st->x[speak*N+i] = st->x[speak*N+i+st->frame_size]; 758 tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak]))); 759#ifdef FIXED_POINT 760 /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */ 761 if (tmp32 > 32767) 762 { 763 tmp32 = 32767; 764 st->saturated = M+1; 765 } 766 if (tmp32 < -32767) 767 { 768 tmp32 = -32767; 769 st->saturated = M+1; 770 } 771#endif 772 st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32); 773 st->memX[speak] = far_end[i*K+speak]; 774 } 775 } 776 777 for (speak = 0; speak < K; speak++) 778 { 779 /* Shift memory: this could be optimized eventually*/ 780 for (j=M-1;j>=0;j--) 781 { 782 for (i=0;i<N;i++) 783 st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i]; 784 } 785 /* Convert x (echo input) to frequency domain */ 786 spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]); 787 } 788 789 Sxx = 0; 790 for (speak = 0; speak < K; speak++) 791 { 792 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 793 power_spectrum_accum(st->X+speak*N, st->Xf, N); 794 } 795 796 Sff = 0; 797 for (chan = 0; chan < C; chan++) 798 { 799#ifdef TWO_PATH 800 /* Compute foreground filter */ 801 spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K); 802 spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N); 803 for (i=0;i<st->frame_size;i++) 804 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]); 805 Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 806#endif 807 } 808 809 /* Adjust proportional adaption rate */ 810 /* FIXME: Adjust that for C, K*/ 811 if (st->adapted) 812 mdf_adjust_prop (st->W, N, M, C*K, st->prop); 813 /* Compute weight gradient */ 814 if (st->saturated == 0) 815 { 816 for (chan = 0; chan < C; chan++) 817 { 818 for (speak = 0; speak < K; speak++) 819 { 820 for (j=M-1;j>=0;j--) 821 { 822 weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N); 823 for (i=0;i<N;i++) 824 st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i]; 825 } 826 } 827 } 828 } else { 829 st->saturated--; 830 } 831 832 /* FIXME: MC conversion required */ 833 /* Update weight to prevent circular convolution (MDF / AUMDF) */ 834 for (chan = 0; chan < C; chan++) 835 { 836 for (speak = 0; speak < K; speak++) 837 { 838 for (j=0;j<M;j++) 839 { 840 /* This is a variant of the Alternatively Updated MDF (AUMDF) */ 841 /* Remove the "if" to make this an MDF filter */ 842 if (j==0 || st->cancel_count%(M-1) == j-1) 843 { 844#ifdef FIXED_POINT 845 for (i=0;i<N;i++) 846 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16)); 847 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 848 for (i=0;i<st->frame_size;i++) 849 { 850 st->wtmp[i]=0; 851 } 852 for (i=st->frame_size;i<N;i++) 853 { 854 st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP); 855 } 856 spx_fft(st->fft_table, st->wtmp, st->wtmp2); 857 /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */ 858 for (i=0;i<N;i++) 859 st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1); 860#else 861 spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp); 862 for (i=st->frame_size;i<N;i++) 863 { 864 st->wtmp[i]=0; 865 } 866 spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]); 867#endif 868 } 869 } 870 } 871 } 872 873 /* So we can use power_spectrum_accum */ 874 for (i=0;i<=st->frame_size;i++) 875 st->Rf[i] = st->Yf[i] = st->Xf[i] = 0; 876 877 Dbf = 0; 878 See = 0; 879#ifdef TWO_PATH 880 /* Difference in response, this is used to estimate the variance of our residual power estimate */ 881 for (chan = 0; chan < C; chan++) 882 { 883 spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K); 884 spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N); 885 for (i=0;i<st->frame_size;i++) 886 st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]); 887 Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 888 for (i=0;i<st->frame_size;i++) 889 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 890 See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size); 891 } 892#endif 893 894#ifndef TWO_PATH 895 Sff = See; 896#endif 897 898#ifdef TWO_PATH 899 /* Logic for updating the foreground filter */ 900 901 /* For two time windows, compute the mean of the energy difference, as well as the variance */ 902 st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See))); 903 st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See))); 904 st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf))); 905 st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf))); 906 907 /* Equivalent float code: 908 st->Davg1 = .6*st->Davg1 + .4*(Sff-See); 909 st->Davg2 = .85*st->Davg2 + .15*(Sff-See); 910 st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf; 911 st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf; 912 */ 913 914 update_foreground = 0; 915 /* Check if we have a statistically significant reduction in the residual echo */ 916 /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */ 917 if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf))) 918 update_foreground = 1; 919 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1)))) 920 update_foreground = 1; 921 else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2)))) 922 update_foreground = 1; 923 924 /* Do we update? */ 925 if (update_foreground) 926 { 927 st->Davg1 = st->Davg2 = 0; 928 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 929 /* Copy background filter to foreground filter */ 930 for (i=0;i<N*M*C*K;i++) 931 st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16)); 932 /* Apply a smooth transition so as to not introduce blocking artifacts */ 933 for (chan = 0; chan < C; chan++) 934 for (i=0;i<st->frame_size;i++) 935 st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]); 936 } else { 937 int reset_background=0; 938 /* Otherwise, check if the background filter is significantly worse */ 939 if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf)))) 940 reset_background = 1; 941 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1))) 942 reset_background = 1; 943 if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2))) 944 reset_background = 1; 945 if (reset_background) 946 { 947 /* Copy foreground filter to background filter */ 948 for (i=0;i<N*M*C*K;i++) 949 st->W[i] = SHL32(EXTEND32(st->foreground[i]),16); 950 /* We also need to copy the output so as to get correct adaptation */ 951 for (chan = 0; chan < C; chan++) 952 { 953 for (i=0;i<st->frame_size;i++) 954 st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size]; 955 for (i=0;i<st->frame_size;i++) 956 st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]); 957 } 958 See = Sff; 959 st->Davg1 = st->Davg2 = 0; 960 st->Dvar1 = st->Dvar2 = FLOAT_ZERO; 961 } 962 } 963#endif 964 965 Sey = Syy = Sdd = 0; 966 for (chan = 0; chan < C; chan++) 967 { 968 /* Compute error signal (for the output with de-emphasis) */ 969 for (i=0;i<st->frame_size;i++) 970 { 971 spx_word32_t tmp_out; 972#ifdef TWO_PATH 973 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size])); 974#else 975 tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size])); 976#endif 977 tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan]))); 978 /* This is an arbitrary test for saturation in the microphone signal */ 979 if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000) 980 { 981 if (st->saturated == 0) 982 st->saturated = 1; 983 } 984 out[i*C+chan] = WORD2INT(tmp_out); 985 st->memE[chan] = tmp_out; 986 } 987 988#ifdef DUMP_ECHO_CANCEL_DATA 989 dump_audio(in, far_end, out, st->frame_size); 990#endif 991 992 /* Compute error signal (filter update version) */ 993 for (i=0;i<st->frame_size;i++) 994 { 995 st->e[chan*N+i+st->frame_size] = st->e[chan*N+i]; 996 st->e[chan*N+i] = 0; 997 } 998 999 /* Compute a bunch of correlations */ 1000 /* FIXME: bad merge */ 1001 Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 1002 Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size); 1003 Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size); 1004 1005 /* Convert error to frequency domain */ 1006 spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N); 1007 for (i=0;i<st->frame_size;i++) 1008 st->y[i+chan*N] = 0; 1009 spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N); 1010 1011 /* Compute power spectrum of echo (X), error (E) and filter response (Y) */ 1012 power_spectrum_accum(st->E+chan*N, st->Rf, N); 1013 power_spectrum_accum(st->Y+chan*N, st->Yf, N); 1014 1015 } 1016 1017 /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/ 1018 1019 /* Do some sanity check */ 1020 if (!(Syy>=0 && Sxx>=0 && See >= 0) 1021#ifndef FIXED_POINT 1022 || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9) 1023#endif 1024 ) 1025 { 1026 /* Things have gone really bad */ 1027 st->screwed_up += 50; 1028 for (i=0;i<st->frame_size*C;i++) 1029 out[i] = 0; 1030 } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6))) 1031 { 1032 /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */ 1033 st->screwed_up++; 1034 } else { 1035 /* Everything's fine */ 1036 st->screwed_up=0; 1037 } 1038 if (st->screwed_up>=50) 1039 { 1040 speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now."); 1041 speex_echo_state_reset(st); 1042 return; 1043 } 1044 1045 /* Add a small noise floor to make sure not to have problems when dividing */ 1046 See = MAX32(See, SHR32(MULT16_16(N, 100),6)); 1047 1048 for (speak = 0; speak < K; speak++) 1049 { 1050 Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size); 1051 power_spectrum_accum(st->X+speak*N, st->Xf, N); 1052 } 1053 1054 1055 /* Smooth far end energy estimate over time */ 1056 for (j=0;j<=st->frame_size;j++) 1057 st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]); 1058 1059 /* Compute filtered spectra and (cross-)correlations */ 1060 for (j=st->frame_size;j>=0;j--) 1061 { 1062 spx_float_t Eh, Yh; 1063 Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]); 1064 Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]); 1065 Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh)); 1066 Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh)); 1067#ifdef FIXED_POINT 1068 st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]); 1069 st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]); 1070#else 1071 st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j]; 1072 st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j]; 1073#endif 1074 } 1075 1076 Pyy = FLOAT_SQRT(Pyy); 1077 Pey = FLOAT_DIVU(Pey,Pyy); 1078 1079 /* Compute correlation updatete rate */ 1080 tmp32 = MULT16_32_Q15(st->beta0,Syy); 1081 if (tmp32 > MULT16_32_Q15(st->beta_max,See)) 1082 tmp32 = MULT16_32_Q15(st->beta_max,See); 1083 alpha = FLOAT_DIV32(tmp32, See); 1084 alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha); 1085 /* Update correlations (recursive average) */ 1086 st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey)); 1087 st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy)); 1088 if (FLOAT_LT(st->Pyy, FLOAT_ONE)) 1089 st->Pyy = FLOAT_ONE; 1090 /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */ 1091 if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy))) 1092 st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy); 1093 if (FLOAT_GT(st->Pey, st->Pyy)) 1094 st->Pey = st->Pyy; 1095 /* leak_estimate is the linear regression result */ 1096 st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14)); 1097 /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */ 1098 if (st->leak_estimate > 16383) 1099 st->leak_estimate = 32767; 1100 else 1101 st->leak_estimate = SHL16(st->leak_estimate,1); 1102 /*printf ("%f\n", st->leak_estimate);*/ 1103 1104 /* Compute Residual to Error Ratio */ 1105#ifdef FIXED_POINT 1106 tmp32 = MULT16_32_Q15(st->leak_estimate,Syy); 1107 tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1))); 1108 /* Check for y in e (lower bound on RER) */ 1109 { 1110 spx_float_t bound = PSEUDOFLOAT(Sey); 1111 bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy))); 1112 if (FLOAT_GT(bound, PSEUDOFLOAT(See))) 1113 tmp32 = See; 1114 else if (tmp32 < FLOAT_EXTRACT32(bound)) 1115 tmp32 = FLOAT_EXTRACT32(bound); 1116 } 1117 if (tmp32 > SHR32(See,1)) 1118 tmp32 = SHR32(See,1); 1119 RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15)); 1120#else 1121 RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See; 1122 /* Check for y in e (lower bound on RER) */ 1123 if (RER < Sey*Sey/(1+See*Syy)) 1124 RER = Sey*Sey/(1+See*Syy); 1125 if (RER > .5) 1126 RER = .5; 1127#endif 1128 1129 /* We consider that the filter has had minimal adaptation if the following is true*/ 1130 if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy)) 1131 { 1132 st->adapted = 1; 1133 } 1134 1135 if (st->adapted) 1136 { 1137 /* Normal learning rate calculation once we're past the minimal adaptation phase */ 1138 for (i=0;i<=st->frame_size;i++) 1139 { 1140 spx_word32_t r, e; 1141 /* Compute frequency-domain adaptation mask */ 1142 r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3)); 1143 e = SHL32(st->Rf[i],3)+1; 1144#ifdef FIXED_POINT 1145 if (r>SHR32(e,1)) 1146 r = SHR32(e,1); 1147#else 1148 if (r>.5*e) 1149 r = .5*e; 1150#endif 1151 r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e))); 1152 /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/ 1153 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16); 1154 } 1155 } else { 1156 /* Temporary adaption rate if filter is not yet adapted enough */ 1157 spx_word16_t adapt_rate=0; 1158 1159 if (Sxx > SHR32(MULT16_16(N, 1000),6)) 1160 { 1161 tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx); 1162#ifdef FIXED_POINT 1163 if (tmp32 > SHR32(See,2)) 1164 tmp32 = SHR32(See,2); 1165#else 1166 if (tmp32 > .25*See) 1167 tmp32 = .25*See; 1168#endif 1169 adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15)); 1170 } 1171 for (i=0;i<=st->frame_size;i++) 1172 st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1); 1173 1174 1175 /* How much have we adapted so far? */ 1176 st->sum_adapt = ADD32(st->sum_adapt,adapt_rate); 1177 } 1178 1179 /* FIXME: MC conversion required */ 1180 for (i=0;i<st->frame_size;i++) 1181 st->last_y[i] = st->last_y[st->frame_size+i]; 1182 if (st->adapted) 1183 { 1184 /* If the filter is adapted, take the filtered echo */ 1185 for (i=0;i<st->frame_size;i++) 1186 st->last_y[st->frame_size+i] = in[i]-out[i]; 1187 } else { 1188 /* If filter isn't adapted yet, all we can do is take the far end signal directly */ 1189 /* moved earlier: for (i=0;i<N;i++) 1190 st->last_y[i] = st->x[i];*/ 1191 } 1192 1193} 1194 1195/* Compute spectrum of estimated echo for use in an echo post-filter */ 1196void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len) 1197{ 1198 int i; 1199 spx_word16_t leak2; 1200 int N; 1201 1202 N = st->window_size; 1203 1204 /* Apply hanning window (should pre-compute it)*/ 1205 for (i=0;i<N;i++) 1206 st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]); 1207 1208 /* Compute power spectrum of the echo */ 1209 spx_fft(st->fft_table, st->y, st->Y); 1210 power_spectrum(st->Y, residual_echo, N); 1211 1212#ifdef FIXED_POINT 1213 if (st->leak_estimate > 16383) 1214 leak2 = 32767; 1215 else 1216 leak2 = SHL16(st->leak_estimate, 1); 1217#else 1218 if (st->leak_estimate>.5) 1219 leak2 = 1; 1220 else 1221 leak2 = 2*st->leak_estimate; 1222#endif 1223 /* Estimate residual echo */ 1224 for (i=0;i<=st->frame_size;i++) 1225 residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]); 1226 1227} 1228 1229EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr) 1230{ 1231 switch(request) 1232 { 1233 1234 case SPEEX_ECHO_GET_FRAME_SIZE: 1235 (*(int*)ptr) = st->frame_size; 1236 break; 1237 case SPEEX_ECHO_SET_SAMPLING_RATE: 1238 st->sampling_rate = (*(int*)ptr); 1239 st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate); 1240#ifdef FIXED_POINT 1241 st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate); 1242 st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate); 1243#else 1244 st->beta0 = (2.0f*st->frame_size)/st->sampling_rate; 1245 st->beta_max = (.5f*st->frame_size)/st->sampling_rate; 1246#endif 1247 if (st->sampling_rate<12000) 1248 st->notch_radius = QCONST16(.9, 15); 1249 else if (st->sampling_rate<24000) 1250 st->notch_radius = QCONST16(.982, 15); 1251 else 1252 st->notch_radius = QCONST16(.992, 15); 1253 break; 1254 case SPEEX_ECHO_GET_SAMPLING_RATE: 1255 (*(int*)ptr) = st->sampling_rate; 1256 break; 1257 case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE: 1258 /*FIXME: Implement this for multiple channels */ 1259 *((spx_int32_t *)ptr) = st->M * st->frame_size; 1260 break; 1261 case SPEEX_ECHO_GET_IMPULSE_RESPONSE: 1262 { 1263 int M = st->M, N = st->window_size, n = st->frame_size, i, j; 1264 spx_int32_t *filt = (spx_int32_t *) ptr; 1265 for(j=0;j<M;j++) 1266 { 1267 /*FIXME: Implement this for multiple channels */ 1268#ifdef FIXED_POINT 1269 for (i=0;i<N;i++) 1270 st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN)); 1271 spx_ifft(st->fft_table, st->wtmp2, st->wtmp); 1272#else 1273 spx_ifft(st->fft_table, &st->W[j*N], st->wtmp); 1274#endif 1275 for(i=0;i<n;i++) 1276 filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN); 1277 } 1278 } 1279 break; 1280 default: 1281 speex_warning_int("Unknown speex_echo_ctl request: ", request); 1282 return -1; 1283 } 1284 return 0; 1285} 1286