1/* Copyright (C) 2003-2008 Jean-Marc Valin
2
3   File: mdf.c
4   Echo canceller based on the MDF algorithm (see below)
5
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are
8   met:
9
10   1. Redistributions of source code must retain the above copyright notice,
11   this list of conditions and the following disclaimer.
12
13   2. Redistributions in binary form must reproduce the above copyright
14   notice, this list of conditions and the following disclaimer in the
15   documentation and/or other materials provided with the distribution.
16
17   3. The name of the author may not be used to endorse or promote products
18   derived from this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
24   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31*/
32
33/*
34   The echo canceller is based on the MDF algorithm described in:
35
36   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
37   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
38   February 1990.
39
40   We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
41   double-talk is achieved using a variable learning rate as described in:
42
43   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
44   Cancellation With Double-Talk. IEEE Transactions on Audio,
45   Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
46   http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
47
48   There is no explicit double-talk detection, but a continuous variation
49   in the learning rate based on residual echo, double-talk and background
50   noise.
51
52   About the fixed-point version:
53   All the signals are represented with 16-bit words. The filter weights
54   are represented with 32-bit words, but only the top 16 bits are used
55   in most cases. The lower 16 bits are completely unreliable (due to the
56   fact that the update is done only on the top bits), but help in the
57   adaptation -- probably by removing a "threshold effect" due to
58   quantization (rounding going to zero) when the gradient is small.
59
60   Another kludge that seems to work good: when performing the weight
61   update, we only move half the way toward the "goal" this seems to
62   reduce the effect of quantization noise in the update phase. This
63   can be seen as applying a gradient descent on a "soft constraint"
64   instead of having a hard constraint.
65
66*/
67
68#ifdef HAVE_CONFIG_H
69#include "config.h"
70#endif
71
72#include "arch.h"
73#include "speex/speex_echo.h"
74#include "fftwrap.h"
75#include "pseudofloat.h"
76#include "math_approx.h"
77#include "os_support.h"
78
79#ifndef M_PI
80#define M_PI 3.14159265358979323846
81#endif
82
83#ifdef FIXED_POINT
84#define WEIGHT_SHIFT 11
85#define NORMALIZE_SCALEDOWN 5
86#define NORMALIZE_SCALEUP 3
87#else
88#define WEIGHT_SHIFT 0
89#endif
90
91#ifdef FIXED_POINT
92#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
93#else
94#define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
95#endif
96
97/* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
98   and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
99#define TWO_PATH
100
101#ifdef FIXED_POINT
102static const spx_float_t MIN_LEAK = {20972, -22};
103
104/* Constants for the two-path filter */
105static const spx_float_t VAR1_SMOOTH = {23593, -16};
106static const spx_float_t VAR2_SMOOTH = {23675, -15};
107static const spx_float_t VAR1_UPDATE = {16384, -15};
108static const spx_float_t VAR2_UPDATE = {16384, -16};
109static const spx_float_t VAR_BACKTRACK = {16384, -12};
110#define TOP16(x) ((x)>>16)
111
112#else
113
114static const spx_float_t MIN_LEAK = .005f;
115
116/* Constants for the two-path filter */
117static const spx_float_t VAR1_SMOOTH = .36f;
118static const spx_float_t VAR2_SMOOTH = .7225f;
119static const spx_float_t VAR1_UPDATE = .5f;
120static const spx_float_t VAR2_UPDATE = .25f;
121static const spx_float_t VAR_BACKTRACK = 4.f;
122#define TOP16(x) (x)
123#endif
124
125
126#define PLAYBACK_DELAY 2
127
128void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
129
130
131/** Speex echo cancellation state. */
132struct SpeexEchoState_ {
133   int frame_size;           /**< Number of samples processed each time */
134   int window_size;
135   int M;
136   int cancel_count;
137   int adapted;
138   int saturated;
139   int screwed_up;
140   int C;                    /** Number of input channels (microphones) */
141   int K;                    /** Number of output channels (loudspeakers) */
142   spx_int32_t sampling_rate;
143   spx_word16_t spec_average;
144   spx_word16_t beta0;
145   spx_word16_t beta_max;
146   spx_word32_t sum_adapt;
147   spx_word16_t leak_estimate;
148
149   spx_word16_t *e;      /* scratch */
150   spx_word16_t *x;      /* Far-end input buffer (2N) */
151   spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
152   spx_word16_t *input;  /* scratch */
153   spx_word16_t *y;      /* scratch */
154   spx_word16_t *last_y;
155   spx_word16_t *Y;      /* scratch */
156   spx_word16_t *E;
157   spx_word32_t *PHI;    /* scratch */
158   spx_word32_t *W;      /* (Background) filter weights */
159#ifdef TWO_PATH
160   spx_word16_t *foreground; /* Foreground filter weights */
161   spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
162   spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
163   spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
164   spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
165#endif
166   spx_word32_t *power;  /* Power of the far-end signal */
167   spx_float_t  *power_1;/* Inverse power of far-end */
168   spx_word16_t *wtmp;   /* scratch */
169#ifdef FIXED_POINT
170   spx_word16_t *wtmp2;  /* scratch */
171#endif
172   spx_word32_t *Rf;     /* scratch */
173   spx_word32_t *Yf;     /* scratch */
174   spx_word32_t *Xf;     /* scratch */
175   spx_word32_t *Eh;
176   spx_word32_t *Yh;
177   spx_float_t   Pey;
178   spx_float_t   Pyy;
179   spx_word16_t *window;
180   spx_word16_t *prop;
181   void *fft_table;
182   spx_word16_t *memX, *memD, *memE;
183   spx_word16_t preemph;
184   spx_word16_t notch_radius;
185   spx_mem_t *notch_mem;
186
187   /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
188   spx_int16_t *play_buf;
189   int play_buf_pos;
190   int play_buf_started;
191};
192
193static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
194{
195   int i;
196   spx_word16_t den2;
197#ifdef FIXED_POINT
198   den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
199#else
200   den2 = radius*radius + .7*(1-radius)*(1-radius);
201#endif
202   /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
203   for (i=0;i<len;i++)
204   {
205      spx_word16_t vin = in[i*stride];
206      spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
207#ifdef FIXED_POINT
208      mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
209#else
210      mem[0] = mem[1] + 2*(-vin + radius*vout);
211#endif
212      mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
213      out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
214   }
215}
216
217/* This inner product is slightly different from the codec version because of fixed-point */
218static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
219{
220   spx_word32_t sum=0;
221   len >>= 1;
222   while(len--)
223   {
224      spx_word32_t part=0;
225      part = MAC16_16(part,*x++,*y++);
226      part = MAC16_16(part,*x++,*y++);
227      /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
228      sum = ADD32(sum,SHR32(part,6));
229   }
230   return sum;
231}
232
233/** Compute power spectrum of a half-complex (packed) vector */
234static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
235{
236   int i, j;
237   ps[0]=MULT16_16(X[0],X[0]);
238   for (i=1,j=1;i<N-1;i+=2,j++)
239   {
240      ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
241   }
242   ps[j]=MULT16_16(X[i],X[i]);
243}
244
245/** Compute power spectrum of a half-complex (packed) vector and accumulate */
246static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
247{
248   int i, j;
249   ps[0]+=MULT16_16(X[0],X[0]);
250   for (i=1,j=1;i<N-1;i+=2,j++)
251   {
252      ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
253   }
254   ps[j]+=MULT16_16(X[i],X[i]);
255}
256
257/** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
258#ifdef FIXED_POINT
259static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
260{
261   int i,j;
262   spx_word32_t tmp1=0,tmp2=0;
263   for (j=0;j<M;j++)
264   {
265      tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
266   }
267   acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
268   for (i=1;i<N-1;i+=2)
269   {
270      tmp1 = tmp2 = 0;
271      for (j=0;j<M;j++)
272      {
273         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
274         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
275      }
276      acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
277      acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
278   }
279   tmp1 = tmp2 = 0;
280   for (j=0;j<M;j++)
281   {
282      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
283   }
284   acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
285}
286static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
287{
288   int i,j;
289   spx_word32_t tmp1=0,tmp2=0;
290   for (j=0;j<M;j++)
291   {
292      tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
293   }
294   acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
295   for (i=1;i<N-1;i+=2)
296   {
297      tmp1 = tmp2 = 0;
298      for (j=0;j<M;j++)
299      {
300         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
301         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
302      }
303      acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
304      acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
305   }
306   tmp1 = tmp2 = 0;
307   for (j=0;j<M;j++)
308   {
309      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
310   }
311   acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
312}
313
314#else
315static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
316{
317   int i,j;
318   for (i=0;i<N;i++)
319      acc[i] = 0;
320   for (j=0;j<M;j++)
321   {
322      acc[0] += X[0]*Y[0];
323      for (i=1;i<N-1;i+=2)
324      {
325         acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
326         acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
327      }
328      acc[i] += X[i]*Y[i];
329      X += N;
330      Y += N;
331   }
332}
333#define spectral_mul_accum16 spectral_mul_accum
334#endif
335
336/** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
337static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
338{
339   int i, j;
340   spx_float_t W;
341   W = FLOAT_AMULT(p, w[0]);
342   prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
343   for (i=1,j=1;i<N-1;i+=2,j++)
344   {
345      W = FLOAT_AMULT(p, w[j]);
346      prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
347      prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
348   }
349   W = FLOAT_AMULT(p, w[j]);
350   prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
351}
352
353static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
354{
355   int i, j, p;
356   spx_word16_t max_sum = 1;
357   spx_word32_t prop_sum = 1;
358   for (i=0;i<M;i++)
359   {
360      spx_word32_t tmp = 1;
361      for (p=0;p<P;p++)
362         for (j=0;j<N;j++)
363            tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
364#ifdef FIXED_POINT
365      /* Just a security in case an overflow were to occur */
366      tmp = MIN32(ABS32(tmp), 536870912);
367#endif
368      prop[i] = spx_sqrt(tmp);
369      if (prop[i] > max_sum)
370         max_sum = prop[i];
371   }
372   for (i=0;i<M;i++)
373   {
374      prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
375      prop_sum += EXTEND32(prop[i]);
376   }
377   for (i=0;i<M;i++)
378   {
379      prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
380      /*printf ("%f ", prop[i]);*/
381   }
382   /*printf ("\n");*/
383}
384
385#ifdef DUMP_ECHO_CANCEL_DATA
386#include <stdio.h>
387static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
388
389static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
390{
391   if (!(rFile && pFile && oFile))
392   {
393      speex_fatal("Dump files not open");
394   }
395   fwrite(rec, sizeof(spx_int16_t), len, rFile);
396   fwrite(play, sizeof(spx_int16_t), len, pFile);
397   fwrite(out, sizeof(spx_int16_t), len, oFile);
398}
399#endif
400
401/** Creates a new echo canceller state */
402EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
403{
404   return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
405}
406
407EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
408{
409   int i,N,M, C, K;
410   SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
411
412   st->K = nb_speakers;
413   st->C = nb_mic;
414   C=st->C;
415   K=st->K;
416#ifdef DUMP_ECHO_CANCEL_DATA
417   if (rFile || pFile || oFile)
418      speex_fatal("Opening dump files twice");
419   rFile = fopen("aec_rec.sw", "wb");
420   pFile = fopen("aec_play.sw", "wb");
421   oFile = fopen("aec_out.sw", "wb");
422#endif
423
424   st->frame_size = frame_size;
425   st->window_size = 2*frame_size;
426   N = st->window_size;
427   M = st->M = (filter_length+st->frame_size-1)/frame_size;
428   st->cancel_count=0;
429   st->sum_adapt = 0;
430   st->saturated = 0;
431   st->screwed_up = 0;
432   /* This is the default sampling rate */
433   st->sampling_rate = 8000;
434   st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
435#ifdef FIXED_POINT
436   st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
437   st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
438#else
439   st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
440   st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
441#endif
442   st->leak_estimate = 0;
443
444   st->fft_table = spx_fft_init(N);
445
446   st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
447   st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
448   st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
449   st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
450   st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
451   st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
452   st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
453   st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
454   st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
455   st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
456
457   st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
458   st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
459   st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
460   st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
461#ifdef TWO_PATH
462   st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
463#endif
464   st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
465   st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
466   st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
467   st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
468   st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
469   st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
470#ifdef FIXED_POINT
471   st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
472   for (i=0;i<N>>1;i++)
473   {
474      st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
475      st->window[N-i-1] = st->window[i];
476   }
477#else
478   for (i=0;i<N;i++)
479      st->window[i] = .5-.5*cos(2*M_PI*i/N);
480#endif
481   for (i=0;i<=st->frame_size;i++)
482      st->power_1[i] = FLOAT_ONE;
483   for (i=0;i<N*M*K*C;i++)
484      st->W[i] = 0;
485   {
486      spx_word32_t sum = 0;
487      /* Ratio of ~10 between adaptation rate of first and last block */
488      spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
489      st->prop[0] = QCONST16(.7, 15);
490      sum = EXTEND32(st->prop[0]);
491      for (i=1;i<M;i++)
492      {
493         st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
494         sum = ADD32(sum, EXTEND32(st->prop[i]));
495      }
496      for (i=M-1;i>=0;i--)
497      {
498         st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
499      }
500   }
501
502   st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
503   st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
504   st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
505   st->preemph = QCONST16(.9,15);
506   if (st->sampling_rate<12000)
507      st->notch_radius = QCONST16(.9, 15);
508   else if (st->sampling_rate<24000)
509      st->notch_radius = QCONST16(.982, 15);
510   else
511      st->notch_radius = QCONST16(.992, 15);
512
513   st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
514   st->adapted = 0;
515   st->Pey = st->Pyy = FLOAT_ONE;
516
517#ifdef TWO_PATH
518   st->Davg1 = st->Davg2 = 0;
519   st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
520#endif
521
522   st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
523   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
524   st->play_buf_started = 0;
525
526   return st;
527}
528
529/** Resets echo canceller state */
530EXPORT void speex_echo_state_reset(SpeexEchoState *st)
531{
532   int i, M, N, C, K;
533   st->cancel_count=0;
534   st->screwed_up = 0;
535   N = st->window_size;
536   M = st->M;
537   C=st->C;
538   K=st->K;
539   for (i=0;i<N*M;i++)
540      st->W[i] = 0;
541#ifdef TWO_PATH
542   for (i=0;i<N*M;i++)
543      st->foreground[i] = 0;
544#endif
545   for (i=0;i<N*(M+1);i++)
546      st->X[i] = 0;
547   for (i=0;i<=st->frame_size;i++)
548   {
549      st->power[i] = 0;
550      st->power_1[i] = FLOAT_ONE;
551      st->Eh[i] = 0;
552      st->Yh[i] = 0;
553   }
554   for (i=0;i<st->frame_size;i++)
555   {
556      st->last_y[i] = 0;
557   }
558   for (i=0;i<N*C;i++)
559   {
560      st->E[i] = 0;
561   }
562   for (i=0;i<N*K;i++)
563   {
564      st->x[i] = 0;
565   }
566   for (i=0;i<2*C;i++)
567      st->notch_mem[i] = 0;
568   for (i=0;i<C;i++)
569      st->memD[i]=st->memE[i]=0;
570   for (i=0;i<K;i++)
571      st->memX[i]=0;
572
573   st->saturated = 0;
574   st->adapted = 0;
575   st->sum_adapt = 0;
576   st->Pey = st->Pyy = FLOAT_ONE;
577#ifdef TWO_PATH
578   st->Davg1 = st->Davg2 = 0;
579   st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
580#endif
581   for (i=0;i<3*st->frame_size;i++)
582      st->play_buf[i] = 0;
583   st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
584   st->play_buf_started = 0;
585
586}
587
588/** Destroys an echo canceller state */
589EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
590{
591   spx_fft_destroy(st->fft_table);
592
593   speex_free(st->e);
594   speex_free(st->x);
595   speex_free(st->input);
596   speex_free(st->y);
597   speex_free(st->last_y);
598   speex_free(st->Yf);
599   speex_free(st->Rf);
600   speex_free(st->Xf);
601   speex_free(st->Yh);
602   speex_free(st->Eh);
603
604   speex_free(st->X);
605   speex_free(st->Y);
606   speex_free(st->E);
607   speex_free(st->W);
608#ifdef TWO_PATH
609   speex_free(st->foreground);
610#endif
611   speex_free(st->PHI);
612   speex_free(st->power);
613   speex_free(st->power_1);
614   speex_free(st->window);
615   speex_free(st->prop);
616   speex_free(st->wtmp);
617#ifdef FIXED_POINT
618   speex_free(st->wtmp2);
619#endif
620   speex_free(st->memX);
621   speex_free(st->memD);
622   speex_free(st->memE);
623   speex_free(st->notch_mem);
624
625   speex_free(st->play_buf);
626   speex_free(st);
627
628#ifdef DUMP_ECHO_CANCEL_DATA
629   fclose(rFile);
630   fclose(pFile);
631   fclose(oFile);
632   rFile = pFile = oFile = NULL;
633#endif
634}
635
636EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
637{
638   int i;
639   /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
640   st->play_buf_started = 1;
641   if (st->play_buf_pos>=st->frame_size)
642   {
643      speex_echo_cancellation(st, rec, st->play_buf, out);
644      st->play_buf_pos -= st->frame_size;
645      for (i=0;i<st->play_buf_pos;i++)
646         st->play_buf[i] = st->play_buf[i+st->frame_size];
647   } else {
648      speex_warning("No playback frame available (your application is buggy and/or got xruns)");
649      if (st->play_buf_pos!=0)
650      {
651         speex_warning("internal playback buffer corruption?");
652         st->play_buf_pos = 0;
653      }
654      for (i=0;i<st->frame_size;i++)
655         out[i] = rec[i];
656   }
657}
658
659EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
660{
661   /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
662   if (!st->play_buf_started)
663   {
664      speex_warning("discarded first playback frame");
665      return;
666   }
667   if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
668   {
669      int i;
670      for (i=0;i<st->frame_size;i++)
671         st->play_buf[st->play_buf_pos+i] = play[i];
672      st->play_buf_pos += st->frame_size;
673      if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
674      {
675         speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
676         for (i=0;i<st->frame_size;i++)
677            st->play_buf[st->play_buf_pos+i] = play[i];
678         st->play_buf_pos += st->frame_size;
679      }
680   } else {
681      speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
682   }
683}
684
685/** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
686EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
687{
688   speex_echo_cancellation(st, in, far_end, out);
689}
690
691/** Performs echo cancellation on a frame */
692EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
693{
694   int i,j, chan, speak;
695   int N,M, C, K;
696   spx_word32_t Syy,See,Sxx,Sdd, Sff;
697#ifdef TWO_PATH
698   spx_word32_t Dbf;
699   int update_foreground;
700#endif
701   spx_word32_t Sey;
702   spx_word16_t ss, ss_1;
703   spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
704   spx_float_t alpha, alpha_1;
705   spx_word16_t RER;
706   spx_word32_t tmp32;
707
708   N = st->window_size;
709   M = st->M;
710   C = st->C;
711   K = st->K;
712
713   st->cancel_count++;
714#ifdef FIXED_POINT
715   ss=DIV32_16(11469,M);
716   ss_1 = SUB16(32767,ss);
717#else
718   ss=.35/M;
719   ss_1 = 1-ss;
720#endif
721
722   for (chan = 0; chan < C; chan++)
723   {
724      /* Apply a notch filter to make sure DC doesn't end up causing problems */
725      filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
726      /* Copy input data to buffer and apply pre-emphasis */
727      /* Copy input data to buffer */
728      for (i=0;i<st->frame_size;i++)
729      {
730         spx_word32_t tmp32;
731         /* FIXME: This core has changed a bit, need to merge properly */
732         tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
733#ifdef FIXED_POINT
734         if (tmp32 > 32767)
735         {
736            tmp32 = 32767;
737            if (st->saturated == 0)
738               st->saturated = 1;
739         }
740         if (tmp32 < -32767)
741         {
742            tmp32 = -32767;
743            if (st->saturated == 0)
744               st->saturated = 1;
745         }
746#endif
747         st->memD[chan] = st->input[chan*st->frame_size+i];
748         st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
749      }
750   }
751
752   for (speak = 0; speak < K; speak++)
753   {
754      for (i=0;i<st->frame_size;i++)
755      {
756         spx_word32_t tmp32;
757         st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
758         tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
759#ifdef FIXED_POINT
760         /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
761         if (tmp32 > 32767)
762         {
763            tmp32 = 32767;
764            st->saturated = M+1;
765         }
766         if (tmp32 < -32767)
767         {
768            tmp32 = -32767;
769            st->saturated = M+1;
770         }
771#endif
772         st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
773         st->memX[speak] = far_end[i*K+speak];
774      }
775   }
776
777   for (speak = 0; speak < K; speak++)
778   {
779      /* Shift memory: this could be optimized eventually*/
780      for (j=M-1;j>=0;j--)
781      {
782         for (i=0;i<N;i++)
783            st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
784      }
785      /* Convert x (echo input) to frequency domain */
786      spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
787   }
788
789   Sxx = 0;
790   for (speak = 0; speak < K; speak++)
791   {
792      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
793      power_spectrum_accum(st->X+speak*N, st->Xf, N);
794   }
795
796   Sff = 0;
797   for (chan = 0; chan < C; chan++)
798   {
799#ifdef TWO_PATH
800      /* Compute foreground filter */
801      spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
802      spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
803      for (i=0;i<st->frame_size;i++)
804         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
805      Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
806#endif
807   }
808
809   /* Adjust proportional adaption rate */
810   /* FIXME: Adjust that for C, K*/
811   if (st->adapted)
812      mdf_adjust_prop (st->W, N, M, C*K, st->prop);
813   /* Compute weight gradient */
814   if (st->saturated == 0)
815   {
816      for (chan = 0; chan < C; chan++)
817      {
818         for (speak = 0; speak < K; speak++)
819         {
820            for (j=M-1;j>=0;j--)
821            {
822               weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
823               for (i=0;i<N;i++)
824                  st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
825            }
826         }
827      }
828   } else {
829      st->saturated--;
830   }
831
832   /* FIXME: MC conversion required */
833   /* Update weight to prevent circular convolution (MDF / AUMDF) */
834   for (chan = 0; chan < C; chan++)
835   {
836      for (speak = 0; speak < K; speak++)
837      {
838         for (j=0;j<M;j++)
839         {
840            /* This is a variant of the Alternatively Updated MDF (AUMDF) */
841            /* Remove the "if" to make this an MDF filter */
842            if (j==0 || st->cancel_count%(M-1) == j-1)
843            {
844#ifdef FIXED_POINT
845               for (i=0;i<N;i++)
846                  st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
847               spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
848               for (i=0;i<st->frame_size;i++)
849               {
850                  st->wtmp[i]=0;
851               }
852               for (i=st->frame_size;i<N;i++)
853               {
854                  st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
855               }
856               spx_fft(st->fft_table, st->wtmp, st->wtmp2);
857               /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
858               for (i=0;i<N;i++)
859                  st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
860#else
861               spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
862               for (i=st->frame_size;i<N;i++)
863               {
864                  st->wtmp[i]=0;
865               }
866               spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
867#endif
868            }
869         }
870      }
871   }
872
873   /* So we can use power_spectrum_accum */
874   for (i=0;i<=st->frame_size;i++)
875      st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
876
877   Dbf = 0;
878   See = 0;
879#ifdef TWO_PATH
880   /* Difference in response, this is used to estimate the variance of our residual power estimate */
881   for (chan = 0; chan < C; chan++)
882   {
883      spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
884      spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
885      for (i=0;i<st->frame_size;i++)
886         st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
887      Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
888      for (i=0;i<st->frame_size;i++)
889         st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
890      See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
891   }
892#endif
893
894#ifndef TWO_PATH
895   Sff = See;
896#endif
897
898#ifdef TWO_PATH
899   /* Logic for updating the foreground filter */
900
901   /* For two time windows, compute the mean of the energy difference, as well as the variance */
902   st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
903   st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
904   st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
905   st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
906
907   /* Equivalent float code:
908   st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
909   st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
910   st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
911   st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
912   */
913
914   update_foreground = 0;
915   /* Check if we have a statistically significant reduction in the residual echo */
916   /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
917   if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
918      update_foreground = 1;
919   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
920      update_foreground = 1;
921   else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
922      update_foreground = 1;
923
924   /* Do we update? */
925   if (update_foreground)
926   {
927      st->Davg1 = st->Davg2 = 0;
928      st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
929      /* Copy background filter to foreground filter */
930      for (i=0;i<N*M*C*K;i++)
931         st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
932      /* Apply a smooth transition so as to not introduce blocking artifacts */
933      for (chan = 0; chan < C; chan++)
934         for (i=0;i<st->frame_size;i++)
935            st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
936   } else {
937      int reset_background=0;
938      /* Otherwise, check if the background filter is significantly worse */
939      if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
940         reset_background = 1;
941      if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
942         reset_background = 1;
943      if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
944         reset_background = 1;
945      if (reset_background)
946      {
947         /* Copy foreground filter to background filter */
948         for (i=0;i<N*M*C*K;i++)
949            st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
950         /* We also need to copy the output so as to get correct adaptation */
951         for (chan = 0; chan < C; chan++)
952         {
953            for (i=0;i<st->frame_size;i++)
954               st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
955            for (i=0;i<st->frame_size;i++)
956               st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
957         }
958         See = Sff;
959         st->Davg1 = st->Davg2 = 0;
960         st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
961      }
962   }
963#endif
964
965   Sey = Syy = Sdd = 0;
966   for (chan = 0; chan < C; chan++)
967   {
968      /* Compute error signal (for the output with de-emphasis) */
969      for (i=0;i<st->frame_size;i++)
970      {
971         spx_word32_t tmp_out;
972#ifdef TWO_PATH
973         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
974#else
975         tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
976#endif
977         tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
978      /* This is an arbitrary test for saturation in the microphone signal */
979         if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
980         {
981         if (st->saturated == 0)
982            st->saturated = 1;
983         }
984         out[i*C+chan] = WORD2INT(tmp_out);
985         st->memE[chan] = tmp_out;
986      }
987
988#ifdef DUMP_ECHO_CANCEL_DATA
989      dump_audio(in, far_end, out, st->frame_size);
990#endif
991
992      /* Compute error signal (filter update version) */
993      for (i=0;i<st->frame_size;i++)
994      {
995         st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
996         st->e[chan*N+i] = 0;
997      }
998
999      /* Compute a bunch of correlations */
1000      /* FIXME: bad merge */
1001      Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1002      Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
1003      Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
1004
1005      /* Convert error to frequency domain */
1006      spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
1007      for (i=0;i<st->frame_size;i++)
1008         st->y[i+chan*N] = 0;
1009      spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
1010
1011      /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
1012      power_spectrum_accum(st->E+chan*N, st->Rf, N);
1013      power_spectrum_accum(st->Y+chan*N, st->Yf, N);
1014
1015   }
1016
1017   /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
1018
1019   /* Do some sanity check */
1020   if (!(Syy>=0 && Sxx>=0 && See >= 0)
1021#ifndef FIXED_POINT
1022       || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
1023#endif
1024      )
1025   {
1026      /* Things have gone really bad */
1027      st->screwed_up += 50;
1028      for (i=0;i<st->frame_size*C;i++)
1029         out[i] = 0;
1030   } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
1031   {
1032      /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
1033      st->screwed_up++;
1034   } else {
1035      /* Everything's fine */
1036      st->screwed_up=0;
1037   }
1038   if (st->screwed_up>=50)
1039   {
1040      speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
1041      speex_echo_state_reset(st);
1042      return;
1043   }
1044
1045   /* Add a small noise floor to make sure not to have problems when dividing */
1046   See = MAX32(See, SHR32(MULT16_16(N, 100),6));
1047
1048   for (speak = 0; speak < K; speak++)
1049   {
1050      Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
1051      power_spectrum_accum(st->X+speak*N, st->Xf, N);
1052   }
1053
1054
1055   /* Smooth far end energy estimate over time */
1056   for (j=0;j<=st->frame_size;j++)
1057      st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
1058
1059   /* Compute filtered spectra and (cross-)correlations */
1060   for (j=st->frame_size;j>=0;j--)
1061   {
1062      spx_float_t Eh, Yh;
1063      Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
1064      Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
1065      Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
1066      Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
1067#ifdef FIXED_POINT
1068      st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
1069      st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
1070#else
1071      st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
1072      st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
1073#endif
1074   }
1075
1076   Pyy = FLOAT_SQRT(Pyy);
1077   Pey = FLOAT_DIVU(Pey,Pyy);
1078
1079   /* Compute correlation updatete rate */
1080   tmp32 = MULT16_32_Q15(st->beta0,Syy);
1081   if (tmp32 > MULT16_32_Q15(st->beta_max,See))
1082      tmp32 = MULT16_32_Q15(st->beta_max,See);
1083   alpha = FLOAT_DIV32(tmp32, See);
1084   alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
1085   /* Update correlations (recursive average) */
1086   st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
1087   st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
1088   if (FLOAT_LT(st->Pyy, FLOAT_ONE))
1089      st->Pyy = FLOAT_ONE;
1090   /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
1091   if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
1092      st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
1093   if (FLOAT_GT(st->Pey, st->Pyy))
1094      st->Pey = st->Pyy;
1095   /* leak_estimate is the linear regression result */
1096   st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
1097   /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
1098   if (st->leak_estimate > 16383)
1099      st->leak_estimate = 32767;
1100   else
1101      st->leak_estimate = SHL16(st->leak_estimate,1);
1102   /*printf ("%f\n", st->leak_estimate);*/
1103
1104   /* Compute Residual to Error Ratio */
1105#ifdef FIXED_POINT
1106   tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
1107   tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
1108   /* Check for y in e (lower bound on RER) */
1109   {
1110      spx_float_t bound = PSEUDOFLOAT(Sey);
1111      bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
1112      if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
1113         tmp32 = See;
1114      else if (tmp32 < FLOAT_EXTRACT32(bound))
1115         tmp32 = FLOAT_EXTRACT32(bound);
1116   }
1117   if (tmp32 > SHR32(See,1))
1118      tmp32 = SHR32(See,1);
1119   RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
1120#else
1121   RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
1122   /* Check for y in e (lower bound on RER) */
1123   if (RER < Sey*Sey/(1+See*Syy))
1124      RER = Sey*Sey/(1+See*Syy);
1125   if (RER > .5)
1126      RER = .5;
1127#endif
1128
1129   /* We consider that the filter has had minimal adaptation if the following is true*/
1130   if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
1131   {
1132      st->adapted = 1;
1133   }
1134
1135   if (st->adapted)
1136   {
1137      /* Normal learning rate calculation once we're past the minimal adaptation phase */
1138      for (i=0;i<=st->frame_size;i++)
1139      {
1140         spx_word32_t r, e;
1141         /* Compute frequency-domain adaptation mask */
1142         r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
1143         e = SHL32(st->Rf[i],3)+1;
1144#ifdef FIXED_POINT
1145         if (r>SHR32(e,1))
1146            r = SHR32(e,1);
1147#else
1148         if (r>.5*e)
1149            r = .5*e;
1150#endif
1151         r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
1152         /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
1153         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
1154      }
1155   } else {
1156      /* Temporary adaption rate if filter is not yet adapted enough */
1157      spx_word16_t adapt_rate=0;
1158
1159      if (Sxx > SHR32(MULT16_16(N, 1000),6))
1160      {
1161         tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
1162#ifdef FIXED_POINT
1163         if (tmp32 > SHR32(See,2))
1164            tmp32 = SHR32(See,2);
1165#else
1166         if (tmp32 > .25*See)
1167            tmp32 = .25*See;
1168#endif
1169         adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
1170      }
1171      for (i=0;i<=st->frame_size;i++)
1172         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
1173
1174
1175      /* How much have we adapted so far? */
1176      st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
1177   }
1178
1179   /* FIXME: MC conversion required */
1180      for (i=0;i<st->frame_size;i++)
1181         st->last_y[i] = st->last_y[st->frame_size+i];
1182   if (st->adapted)
1183   {
1184      /* If the filter is adapted, take the filtered echo */
1185      for (i=0;i<st->frame_size;i++)
1186         st->last_y[st->frame_size+i] = in[i]-out[i];
1187   } else {
1188      /* If filter isn't adapted yet, all we can do is take the far end signal directly */
1189      /* moved earlier: for (i=0;i<N;i++)
1190      st->last_y[i] = st->x[i];*/
1191   }
1192
1193}
1194
1195/* Compute spectrum of estimated echo for use in an echo post-filter */
1196void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
1197{
1198   int i;
1199   spx_word16_t leak2;
1200   int N;
1201
1202   N = st->window_size;
1203
1204   /* Apply hanning window (should pre-compute it)*/
1205   for (i=0;i<N;i++)
1206      st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
1207
1208   /* Compute power spectrum of the echo */
1209   spx_fft(st->fft_table, st->y, st->Y);
1210   power_spectrum(st->Y, residual_echo, N);
1211
1212#ifdef FIXED_POINT
1213   if (st->leak_estimate > 16383)
1214      leak2 = 32767;
1215   else
1216      leak2 = SHL16(st->leak_estimate, 1);
1217#else
1218   if (st->leak_estimate>.5)
1219      leak2 = 1;
1220   else
1221      leak2 = 2*st->leak_estimate;
1222#endif
1223   /* Estimate residual echo */
1224   for (i=0;i<=st->frame_size;i++)
1225      residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
1226
1227}
1228
1229EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
1230{
1231   switch(request)
1232   {
1233
1234      case SPEEX_ECHO_GET_FRAME_SIZE:
1235         (*(int*)ptr) = st->frame_size;
1236         break;
1237      case SPEEX_ECHO_SET_SAMPLING_RATE:
1238         st->sampling_rate = (*(int*)ptr);
1239         st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
1240#ifdef FIXED_POINT
1241         st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
1242         st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
1243#else
1244         st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
1245         st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
1246#endif
1247         if (st->sampling_rate<12000)
1248            st->notch_radius = QCONST16(.9, 15);
1249         else if (st->sampling_rate<24000)
1250            st->notch_radius = QCONST16(.982, 15);
1251         else
1252            st->notch_radius = QCONST16(.992, 15);
1253         break;
1254      case SPEEX_ECHO_GET_SAMPLING_RATE:
1255         (*(int*)ptr) = st->sampling_rate;
1256         break;
1257      case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
1258         /*FIXME: Implement this for multiple channels */
1259         *((spx_int32_t *)ptr) = st->M * st->frame_size;
1260         break;
1261      case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
1262      {
1263         int M = st->M, N = st->window_size, n = st->frame_size, i, j;
1264         spx_int32_t *filt = (spx_int32_t *) ptr;
1265         for(j=0;j<M;j++)
1266         {
1267            /*FIXME: Implement this for multiple channels */
1268#ifdef FIXED_POINT
1269            for (i=0;i<N;i++)
1270               st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
1271            spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
1272#else
1273            spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
1274#endif
1275            for(i=0;i<n;i++)
1276               filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
1277         }
1278      }
1279         break;
1280      default:
1281         speex_warning_int("Unknown speex_echo_ctl request: ", request);
1282         return -1;
1283   }
1284   return 0;
1285}
1286