1/* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
2   Copyright (C) 2004-2006 Epic Games
3
4   File: preprocess.c
5   Preprocessor with denoising based on the algorithm by Ephraim and Malah
6
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions are
9   met:
10
11   1. Redistributions of source code must retain the above copyright notice,
12   this list of conditions and the following disclaimer.
13
14   2. Redistributions in binary form must reproduce the above copyright
15   notice, this list of conditions and the following disclaimer in the
16   documentation and/or other materials provided with the distribution.
17
18   3. The name of the author may not be used to endorse or promote products
19   derived from this software without specific prior written permission.
20
21   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
25   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
29   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31   POSSIBILITY OF SUCH DAMAGE.
32*/
33
34
35/*
36   Recommended papers:
37
38   Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
39   short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
40   Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
41
42   Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
43   log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
44   Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
45
46   I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
47   Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
48
49   Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
50   approach to combined acoustic echo cancellation and noise reduction". IEEE
51   Transactions on Speech and Audio Processing, 2002.
52
53   J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
54   of simultaneous non-stationary sources". In Proceedings IEEE International
55   Conference on Acoustics, Speech, and Signal Processing, 2004.
56*/
57
58#ifdef HAVE_CONFIG_H
59#include "config.h"
60#endif
61
62#include <math.h>
63#include "speex/speex_preprocess.h"
64#include "speex/speex_echo.h"
65#include "arch.h"
66#include "fftwrap.h"
67#include "filterbank.h"
68#include "math_approx.h"
69#include "os_support.h"
70
71#ifndef M_PI
72#define M_PI 3.14159263
73#endif
74
75#define LOUDNESS_EXP 5.f
76#define AMP_SCALE .001f
77#define AMP_SCALE_1 1000.f
78
79#define NB_BANDS 24
80
81#define SPEECH_PROB_START_DEFAULT       QCONST16(0.35f,15)
82#define SPEECH_PROB_CONTINUE_DEFAULT    QCONST16(0.20f,15)
83#define NOISE_SUPPRESS_DEFAULT       -15
84#define ECHO_SUPPRESS_DEFAULT        -40
85#define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
86
87#ifndef NULL
88#define NULL 0
89#endif
90
91#define SQR(x) ((x)*(x))
92#define SQR16(x) (MULT16_16((x),(x)))
93#define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
94
95#ifdef FIXED_POINT
96static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
97{
98   if (SHR32(a,7) >= b)
99   {
100      return 32767;
101   } else {
102      if (b>=QCONST32(1,23))
103      {
104         a = SHR32(a,8);
105         b = SHR32(b,8);
106      }
107      if (b>=QCONST32(1,19))
108      {
109         a = SHR32(a,4);
110         b = SHR32(b,4);
111      }
112      if (b>=QCONST32(1,15))
113      {
114         a = SHR32(a,4);
115         b = SHR32(b,4);
116      }
117      a = SHL32(a,8);
118      return PDIV32_16(a,b);
119   }
120
121}
122static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
123{
124   if (SHR32(a,15) >= b)
125   {
126      return 32767;
127   } else {
128      if (b>=QCONST32(1,23))
129      {
130         a = SHR32(a,8);
131         b = SHR32(b,8);
132      }
133      if (b>=QCONST32(1,19))
134      {
135         a = SHR32(a,4);
136         b = SHR32(b,4);
137      }
138      if (b>=QCONST32(1,15))
139      {
140         a = SHR32(a,4);
141         b = SHR32(b,4);
142      }
143      a = SHL32(a,15)-a;
144      return DIV32_16(a,b);
145   }
146}
147#define SNR_SCALING 256.f
148#define SNR_SCALING_1 0.0039062f
149#define SNR_SHIFT 8
150
151#define FRAC_SCALING 32767.f
152#define FRAC_SCALING_1 3.0518e-05
153#define FRAC_SHIFT 1
154
155#define EXPIN_SCALING 2048.f
156#define EXPIN_SCALING_1 0.00048828f
157#define EXPIN_SHIFT 11
158#define EXPOUT_SCALING_1 1.5259e-05
159
160#define NOISE_SHIFT 7
161
162#else
163
164#define DIV32_16_Q8(a,b) ((a)/(b))
165#define DIV32_16_Q15(a,b) ((a)/(b))
166#define SNR_SCALING 1.f
167#define SNR_SCALING_1 1.f
168#define SNR_SHIFT 0
169#define FRAC_SCALING 1.f
170#define FRAC_SCALING_1 1.f
171#define FRAC_SHIFT 0
172#define NOISE_SHIFT 0
173
174#define EXPIN_SCALING 1.f
175#define EXPIN_SCALING_1 1.f
176#define EXPOUT_SCALING_1 1.f
177
178#endif
179
180/** Speex pre-processor state. */
181struct SpeexPreprocessState_ {
182   /* Basic info */
183   int    frame_size;        /**< Number of samples processed each time */
184   int    ps_size;           /**< Number of points in the power spectrum */
185   int    sampling_rate;     /**< Sampling rate of the input/output */
186   int    nbands;
187   FilterBank *bank;
188
189   /* Parameters */
190   int    denoise_enabled;
191   int    vad_enabled;
192   int    dereverb_enabled;
193   spx_word16_t  reverb_decay;
194   spx_word16_t  reverb_level;
195   spx_word16_t speech_prob_start;
196   spx_word16_t speech_prob_continue;
197   int    noise_suppress;
198   int    echo_suppress;
199   int    echo_suppress_active;
200   SpeexEchoState *echo_state;
201
202   spx_word16_t	speech_prob;  /**< Probability last frame was speech */
203
204   /* DSP-related arrays */
205   spx_word16_t *frame;      /**< Processing frame (2*ps_size) */
206   spx_word16_t *ft;         /**< Processing frame in freq domain (2*ps_size) */
207   spx_word32_t *ps;         /**< Current power spectrum */
208   spx_word16_t *gain2;      /**< Adjusted gains */
209   spx_word16_t *gain_floor; /**< Minimum gain allowed */
210   spx_word16_t *window;     /**< Analysis/Synthesis window */
211   spx_word32_t *noise;      /**< Noise estimate */
212   spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
213   spx_word32_t *old_ps;     /**< Power spectrum for last frame */
214   spx_word16_t *gain;       /**< Ephraim Malah gain */
215   spx_word16_t *prior;      /**< A-priori SNR */
216   spx_word16_t *post;       /**< A-posteriori SNR */
217
218   spx_word32_t *S;          /**< Smoothed power spectrum */
219   spx_word32_t *Smin;       /**< See Cohen paper */
220   spx_word32_t *Stmp;       /**< See Cohen paper */
221   int *update_prob;         /**< Probability of speech presence for noise update */
222
223   spx_word16_t *zeta;       /**< Smoothed a priori SNR */
224   spx_word32_t *echo_noise;
225   spx_word32_t *residual_echo;
226
227   /* Misc */
228   spx_word16_t *inbuf;      /**< Input buffer (overlapped analysis) */
229   spx_word16_t *outbuf;     /**< Output buffer (for overlap and add) */
230
231   /* AGC stuff, only for floating point for now */
232#ifndef FIXED_POINT
233   int    agc_enabled;
234   float  agc_level;
235   float  loudness_accum;
236   float *loudness_weight;   /**< Perceptual loudness curve */
237   float  loudness;          /**< Loudness estimate */
238   float  agc_gain;          /**< Current AGC gain */
239   float  max_gain;          /**< Maximum gain allowed */
240   float  max_increase_step; /**< Maximum increase in gain from one frame to another */
241   float  max_decrease_step; /**< Maximum decrease in gain from one frame to another */
242   float  prev_loudness;     /**< Loudness of previous frame */
243   float  init_max;          /**< Current gain limit during initialisation */
244#endif
245   int    nb_adapt;          /**< Number of frames used for adaptation so far */
246   int    was_speech;
247   int    min_count;         /**< Number of frames processed so far */
248   void  *fft_lookup;        /**< Lookup table for the FFT */
249#ifdef FIXED_POINT
250   int    frame_shift;
251#endif
252};
253
254
255static void conj_window(spx_word16_t *w, int len)
256{
257   int i;
258   for (i=0;i<len;i++)
259   {
260      spx_word16_t tmp;
261#ifdef FIXED_POINT
262      spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
263#else
264      spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
265#endif
266      int inv=0;
267      if (x<QCONST16(1.f,13))
268      {
269      } else if (x<QCONST16(2.f,13))
270      {
271         x=QCONST16(2.f,13)-x;
272         inv=1;
273      } else if (x<QCONST16(3.f,13))
274      {
275         x=x-QCONST16(2.f,13);
276         inv=1;
277      } else {
278         x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
279      }
280      x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
281      tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
282      if (inv)
283         tmp=SUB16(Q15_ONE,tmp);
284      w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
285   }
286}
287
288
289#ifdef FIXED_POINT
290/* This function approximates the gain function
291   y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
292   which multiplied by xi/(1+xi) is the optimal gain
293   in the loudness domain ( sqrt[amplitude] )
294   Input in Q11 format, output in Q15
295*/
296static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
297{
298   int ind;
299   spx_word16_t frac;
300   /* Q13 table */
301   static const spx_word16_t table[21] = {
302       6730,  8357,  9868, 11267, 12563, 13770, 14898,
303      15959, 16961, 17911, 18816, 19682, 20512, 21311,
304      22082, 22827, 23549, 24250, 24931, 25594, 26241};
305      ind = SHR32(xx,10);
306      if (ind<0)
307         return Q15_ONE;
308      if (ind>19)
309         return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
310      frac = SHL32(xx-SHL32(ind,10),5);
311      return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
312}
313
314static inline spx_word16_t qcurve(spx_word16_t x)
315{
316   x = MAX16(x, 1);
317   return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
318}
319
320/* Compute the gain floor based on different floors for the background noise and residual echo */
321static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
322{
323   int i;
324
325   if (noise_suppress > effective_echo_suppress)
326   {
327      spx_word16_t noise_gain, gain_ratio;
328      noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
329      gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
330
331      /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
332      for (i=0;i<len;i++)
333         gain_floor[i] = MULT16_16_Q15(noise_gain,
334                                       spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
335                                             (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
336   } else {
337      spx_word16_t echo_gain, gain_ratio;
338      echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
339      gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
340
341      /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
342      for (i=0;i<len;i++)
343         gain_floor[i] = MULT16_16_Q15(echo_gain,
344                                       spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
345                                             (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
346   }
347}
348
349#else
350/* This function approximates the gain function
351   y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
352   which multiplied by xi/(1+xi) is the optimal gain
353   in the loudness domain ( sqrt[amplitude] )
354*/
355static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
356{
357   int ind;
358   float integer, frac;
359   float x;
360   static const float table[21] = {
361      0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
362      1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
363      2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
364      x = EXPIN_SCALING_1*xx;
365      integer = floor(2*x);
366      ind = (int)integer;
367      if (ind<0)
368         return FRAC_SCALING;
369      if (ind>19)
370         return FRAC_SCALING*(1+.1296/x);
371      frac = 2*x-integer;
372      return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
373}
374
375static inline spx_word16_t qcurve(spx_word16_t x)
376{
377   return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
378}
379
380static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
381{
382   int i;
383   float echo_floor;
384   float noise_floor;
385
386   noise_floor = exp(.2302585f*noise_suppress);
387   echo_floor = exp(.2302585f*effective_echo_suppress);
388
389   /* Compute the gain floor based on different floors for the background noise and residual echo */
390   for (i=0;i<len;i++)
391      gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
392}
393
394#endif
395EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
396{
397   int i;
398   int N, N3, N4, M;
399
400   SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
401   st->frame_size = frame_size;
402
403   /* Round ps_size down to the nearest power of two */
404#if 0
405   i=1;
406   st->ps_size = st->frame_size;
407   while(1)
408   {
409      if (st->ps_size & ~i)
410      {
411         st->ps_size &= ~i;
412         i<<=1;
413      } else {
414         break;
415      }
416   }
417
418
419   if (st->ps_size < 3*st->frame_size/4)
420      st->ps_size = st->ps_size * 3 / 2;
421#else
422   st->ps_size = st->frame_size;
423#endif
424
425   N = st->ps_size;
426   N3 = 2*N - st->frame_size;
427   N4 = st->frame_size - N3;
428
429   st->sampling_rate = sampling_rate;
430   st->denoise_enabled = 1;
431   st->vad_enabled = 0;
432   st->dereverb_enabled = 0;
433   st->reverb_decay = 0;
434   st->reverb_level = 0;
435   st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
436   st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
437   st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
438
439   st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
440   st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
441
442   st->echo_state = NULL;
443
444   st->nbands = NB_BANDS;
445   M = st->nbands;
446   st->bank = filterbank_new(M, sampling_rate, N, 1);
447
448   st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
449   st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
450   st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
451
452   st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
453   st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
454   st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
455   st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
456   st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
457   st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
458   st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
459   st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
460   st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
461   st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
462   st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
463   st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
464
465   st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
466   st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
467   st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
468   st->update_prob = (int*)speex_alloc(N*sizeof(int));
469
470   st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
471   st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
472
473   conj_window(st->window, 2*N3);
474   for (i=2*N3;i<2*st->ps_size;i++)
475      st->window[i]=Q15_ONE;
476
477   if (N4>0)
478   {
479      for (i=N3-1;i>=0;i--)
480      {
481         st->window[i+N3+N4]=st->window[i+N3];
482         st->window[i+N3]=1;
483      }
484   }
485   for (i=0;i<N+M;i++)
486   {
487      st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
488      st->reverb_estimate[i]=0;
489      st->old_ps[i]=1;
490      st->gain[i]=Q15_ONE;
491      st->post[i]=SHL16(1, SNR_SHIFT);
492      st->prior[i]=SHL16(1, SNR_SHIFT);
493   }
494
495   for (i=0;i<N;i++)
496      st->update_prob[i] = 1;
497   for (i=0;i<N3;i++)
498   {
499      st->inbuf[i]=0;
500      st->outbuf[i]=0;
501   }
502#ifndef FIXED_POINT
503   st->agc_enabled = 0;
504   st->agc_level = 8000;
505   st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
506   for (i=0;i<N;i++)
507   {
508      float ff=((float)i)*.5*sampling_rate/((float)N);
509      /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
510      st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
511      if (st->loudness_weight[i]<.01f)
512         st->loudness_weight[i]=.01f;
513      st->loudness_weight[i] *= st->loudness_weight[i];
514   }
515   /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
516   st->loudness = 1e-15;
517   st->agc_gain = 1;
518   st->max_gain = 30;
519   st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
520   st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
521   st->prev_loudness = 1;
522   st->init_max = 1;
523#endif
524   st->was_speech = 0;
525
526   st->fft_lookup = spx_fft_init(2*N);
527
528   st->nb_adapt=0;
529   st->min_count=0;
530   return st;
531}
532
533EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st)
534{
535   speex_free(st->frame);
536   speex_free(st->ft);
537   speex_free(st->ps);
538   speex_free(st->gain2);
539   speex_free(st->gain_floor);
540   speex_free(st->window);
541   speex_free(st->noise);
542   speex_free(st->reverb_estimate);
543   speex_free(st->old_ps);
544   speex_free(st->gain);
545   speex_free(st->prior);
546   speex_free(st->post);
547#ifndef FIXED_POINT
548   speex_free(st->loudness_weight);
549#endif
550   speex_free(st->echo_noise);
551   speex_free(st->residual_echo);
552
553   speex_free(st->S);
554   speex_free(st->Smin);
555   speex_free(st->Stmp);
556   speex_free(st->update_prob);
557   speex_free(st->zeta);
558
559   speex_free(st->inbuf);
560   speex_free(st->outbuf);
561
562   spx_fft_destroy(st->fft_lookup);
563   filterbank_destroy(st->bank);
564   speex_free(st);
565}
566
567/* FIXME: The AGC doesn't work yet with fixed-point*/
568#ifndef FIXED_POINT
569static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
570{
571   int i;
572   int N = st->ps_size;
573   float target_gain;
574   float loudness=1.f;
575   float rate;
576
577   for (i=2;i<N;i++)
578   {
579      loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
580   }
581   loudness=sqrt(loudness);
582      /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
583   loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
584   if (Pframe>.3f)
585   {
586      /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
587      rate = .03*Pframe*Pframe;
588      st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
589      st->loudness_accum = (1-rate)*st->loudness_accum + rate;
590      if (st->init_max < st->max_gain && st->nb_adapt > 20)
591         st->init_max *= 1.f + .1f*Pframe*Pframe;
592   }
593   /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
594
595   target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
596
597   if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)
598   {
599      if (target_gain > st->max_increase_step*st->agc_gain)
600         target_gain = st->max_increase_step*st->agc_gain;
601      if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
602         target_gain = st->max_decrease_step*st->agc_gain;
603      if (target_gain > st->max_gain)
604         target_gain = st->max_gain;
605      if (target_gain > st->init_max)
606         target_gain = st->init_max;
607
608      st->agc_gain = target_gain;
609   }
610   /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
611
612   for (i=0;i<2*N;i++)
613      ft[i] *= st->agc_gain;
614   st->prev_loudness = loudness;
615}
616#endif
617
618static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
619{
620   int i;
621   int N = st->ps_size;
622   int N3 = 2*N - st->frame_size;
623   int N4 = st->frame_size - N3;
624   spx_word32_t *ps=st->ps;
625
626   /* 'Build' input frame */
627   for (i=0;i<N3;i++)
628      st->frame[i]=st->inbuf[i];
629   for (i=0;i<st->frame_size;i++)
630      st->frame[N3+i]=x[i];
631
632   /* Update inbuf */
633   for (i=0;i<N3;i++)
634      st->inbuf[i]=x[N4+i];
635
636   /* Windowing */
637   for (i=0;i<2*N;i++)
638      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
639
640#ifdef FIXED_POINT
641   {
642      spx_word16_t max_val=0;
643      for (i=0;i<2*N;i++)
644         max_val = MAX16(max_val, ABS16(st->frame[i]));
645      st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
646      for (i=0;i<2*N;i++)
647         st->frame[i] = SHL16(st->frame[i], st->frame_shift);
648   }
649#endif
650
651   /* Perform FFT */
652   spx_fft(st->fft_lookup, st->frame, st->ft);
653
654   /* Power spectrum */
655   ps[0]=MULT16_16(st->ft[0],st->ft[0]);
656   for (i=1;i<N;i++)
657      ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
658   for (i=0;i<N;i++)
659      st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
660
661   filterbank_compute_bank32(st->bank, ps, ps+N);
662}
663
664static void update_noise_prob(SpeexPreprocessState *st)
665{
666   int i;
667   int min_range;
668   int N = st->ps_size;
669
670   for (i=1;i<N-1;i++)
671      st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
672                      + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
673   st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
674   st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
675
676   if (st->nb_adapt==1)
677   {
678      for (i=0;i<N;i++)
679         st->Smin[i] = st->Stmp[i] = 0;
680   }
681
682   if (st->nb_adapt < 100)
683      min_range = 15;
684   else if (st->nb_adapt < 1000)
685      min_range = 50;
686   else if (st->nb_adapt < 10000)
687      min_range = 150;
688   else
689      min_range = 300;
690   if (st->min_count > min_range)
691   {
692      st->min_count = 0;
693      for (i=0;i<N;i++)
694      {
695         st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
696         st->Stmp[i] = st->S[i];
697      }
698   } else {
699      for (i=0;i<N;i++)
700      {
701         st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
702         st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
703      }
704   }
705   for (i=0;i<N;i++)
706   {
707      if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])
708         st->update_prob[i] = 1;
709      else
710         st->update_prob[i] = 0;
711      /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
712      /*fprintf (stderr, "%f ", st->update_prob[i]);*/
713   }
714
715}
716
717#define NOISE_OVERCOMPENS 1.
718
719void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
720
721EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
722{
723   return speex_preprocess_run(st, x);
724}
725
726EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
727{
728   int i;
729   int M;
730   int N = st->ps_size;
731   int N3 = 2*N - st->frame_size;
732   int N4 = st->frame_size - N3;
733   spx_word32_t *ps=st->ps;
734   spx_word32_t Zframe;
735   spx_word16_t Pframe;
736   spx_word16_t beta, beta_1;
737   spx_word16_t effective_echo_suppress;
738
739   st->nb_adapt++;
740   if (st->nb_adapt>20000)
741      st->nb_adapt = 20000;
742   st->min_count++;
743
744   beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
745   beta_1 = Q15_ONE-beta;
746   M = st->nbands;
747   /* Deal with residual echo if provided */
748   if (st->echo_state)
749   {
750      speex_echo_get_residual(st->echo_state, st->residual_echo, N);
751#ifndef FIXED_POINT
752      /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
753      if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
754      {
755         for (i=0;i<N;i++)
756            st->residual_echo[i] = 0;
757      }
758#endif
759      for (i=0;i<N;i++)
760         st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
761      filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
762   } else {
763      for (i=0;i<N+M;i++)
764         st->echo_noise[i] = 0;
765   }
766   preprocess_analysis(st, x);
767
768   update_noise_prob(st);
769
770   /* Noise estimation always updated for the 10 first frames */
771   /*if (st->nb_adapt<10)
772   {
773      for (i=1;i<N-1;i++)
774         st->update_prob[i] = 0;
775   }
776   */
777
778   /* Update the noise estimate for the frequencies where it can be */
779   for (i=0;i<N;i++)
780   {
781      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
782         st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
783   }
784   filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
785
786   /* Special case for first frame */
787   if (st->nb_adapt==1)
788      for (i=0;i<N+M;i++)
789         st->old_ps[i] = ps[i];
790
791   /* Compute a posteriori SNR */
792   for (i=0;i<N+M;i++)
793   {
794      spx_word16_t gamma;
795
796      /* Total noise estimate including residual echo and reverberation */
797      spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
798
799      /* A posteriori SNR = ps/noise - 1*/
800      st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
801      st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
802
803      /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
804      gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
805
806      /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
807      st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
808      st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
809   }
810
811   /*print_vec(st->post, N+M, "");*/
812
813   /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
814   st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
815   for (i=1;i<N-1;i++)
816      st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
817                           MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
818   for (i=N-1;i<N+M;i++)
819      st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
820
821   /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
822   Zframe = 0;
823   for (i=N;i<N+M;i++)
824      Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
825   Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
826
827   effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
828
829   compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
830
831   /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
832      Technically this is actually wrong because the EM gaim assumes a slightly different probability
833      distribution */
834   for (i=N;i<N+M;i++)
835   {
836      /* See EM and Cohen papers*/
837      spx_word32_t theta;
838      /* Gain from hypergeometric function */
839      spx_word32_t MM;
840      /* Weiner filter gain */
841      spx_word16_t prior_ratio;
842      /* a priority probability of speech presence based on Bark sub-band alone */
843      spx_word16_t P1;
844      /* Speech absence a priori probability (considering sub-band and frame) */
845      spx_word16_t q;
846#ifdef FIXED_POINT
847      spx_word16_t tmp;
848#endif
849
850      prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
851      theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
852
853      MM = hypergeom_gain(theta);
854      /* Gain with bound */
855      st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
856      /* Save old Bark power spectrum */
857      st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
858
859      P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
860      q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
861#ifdef FIXED_POINT
862      theta = MIN32(theta, EXTEND32(32767));
863/*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
864      tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
865/*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
866      st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
867#else
868      st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
869#endif
870   }
871   /* Convert the EM gains and speech prob to linear frequency */
872   filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
873   filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
874
875   /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
876   if (1)
877   {
878      filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
879
880      /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
881      for (i=0;i<N;i++)
882      {
883         spx_word32_t MM;
884         spx_word32_t theta;
885         spx_word16_t prior_ratio;
886         spx_word16_t tmp;
887         spx_word16_t p;
888         spx_word16_t g;
889
890         /* Wiener filter gain */
891         prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
892         theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
893
894         /* Optimal estimator for loudness domain */
895         MM = hypergeom_gain(theta);
896         /* EM gain with bound */
897         g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
898         /* Interpolated speech probability of presence */
899         p = st->gain2[i];
900
901         /* Constrain the gain to be close to the Bark scale gain */
902         if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
903            g = MULT16_16(3,st->gain[i]);
904         st->gain[i] = g;
905
906         /* Save old power spectrum */
907         st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
908
909         /* Apply gain floor */
910         if (st->gain[i] < st->gain_floor[i])
911            st->gain[i] = st->gain_floor[i];
912
913         /* Exponential decay model for reverberation (unused) */
914         /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
915
916         /* Take into account speech probability of presence (loudness domain MMSE estimator) */
917         /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
918         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
919         st->gain2[i]=SQR16_Q15(tmp);
920
921         /* Use this if you want a log-domain MMSE estimator instead */
922         /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
923      }
924   } else {
925      for (i=N;i<N+M;i++)
926      {
927         spx_word16_t tmp;
928         spx_word16_t p = st->gain2[i];
929         st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
930         tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
931         st->gain2[i]=SQR16_Q15(tmp);
932      }
933      filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
934   }
935
936   /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
937   if (!st->denoise_enabled)
938   {
939      for (i=0;i<N+M;i++)
940         st->gain2[i]=Q15_ONE;
941   }
942
943   /* Apply computed gain */
944   for (i=1;i<N;i++)
945   {
946      st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
947      st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
948   }
949   st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
950   st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
951
952   /*FIXME: This *will* not work for fixed-point */
953#ifndef FIXED_POINT
954   if (st->agc_enabled)
955      speex_compute_agc(st, Pframe, st->ft);
956#endif
957
958   /* Inverse FFT with 1/N scaling */
959   spx_ifft(st->fft_lookup, st->ft, st->frame);
960   /* Scale back to original (lower) amplitude */
961   for (i=0;i<2*N;i++)
962      st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
963
964   /*FIXME: This *will* not work for fixed-point */
965#ifndef FIXED_POINT
966   if (st->agc_enabled)
967   {
968      float max_sample=0;
969      for (i=0;i<2*N;i++)
970         if (fabs(st->frame[i])>max_sample)
971            max_sample = fabs(st->frame[i]);
972      if (max_sample>28000.f)
973      {
974         float damp = 28000.f/max_sample;
975         for (i=0;i<2*N;i++)
976            st->frame[i] *= damp;
977      }
978   }
979#endif
980
981   /* Synthesis window (for WOLA) */
982   for (i=0;i<2*N;i++)
983      st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
984
985   /* Perform overlap and add */
986   for (i=0;i<N3;i++)
987      x[i] = st->outbuf[i] + st->frame[i];
988   for (i=0;i<N4;i++)
989      x[N3+i] = st->frame[N3+i];
990
991   /* Update outbuf */
992   for (i=0;i<N3;i++)
993      st->outbuf[i] = st->frame[st->frame_size+i];
994
995   /* FIXME: This VAD is a kludge */
996   st->speech_prob = Pframe;
997   if (st->vad_enabled)
998   {
999      if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue))
1000      {
1001         st->was_speech=1;
1002         return 1;
1003      } else
1004      {
1005         st->was_speech=0;
1006         return 0;
1007      }
1008   } else {
1009      return 1;
1010   }
1011}
1012
1013EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
1014{
1015   int i;
1016   int N = st->ps_size;
1017   int N3 = 2*N - st->frame_size;
1018   int M;
1019   spx_word32_t *ps=st->ps;
1020
1021   M = st->nbands;
1022   st->min_count++;
1023
1024   preprocess_analysis(st, x);
1025
1026   update_noise_prob(st);
1027
1028   for (i=1;i<N-1;i++)
1029   {
1030      if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
1031      {
1032         st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
1033      }
1034   }
1035
1036   for (i=0;i<N3;i++)
1037      st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
1038
1039   /* Save old power spectrum */
1040   for (i=0;i<N+M;i++)
1041      st->old_ps[i] = ps[i];
1042
1043   for (i=0;i<N;i++)
1044      st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
1045}
1046
1047
1048EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
1049{
1050   int i;
1051   SpeexPreprocessState *st;
1052   st=(SpeexPreprocessState*)state;
1053   switch(request)
1054   {
1055   case SPEEX_PREPROCESS_SET_DENOISE:
1056      st->denoise_enabled = (*(spx_int32_t*)ptr);
1057      break;
1058   case SPEEX_PREPROCESS_GET_DENOISE:
1059      (*(spx_int32_t*)ptr) = st->denoise_enabled;
1060      break;
1061#ifndef FIXED_POINT
1062   case SPEEX_PREPROCESS_SET_AGC:
1063      st->agc_enabled = (*(spx_int32_t*)ptr);
1064      break;
1065   case SPEEX_PREPROCESS_GET_AGC:
1066      (*(spx_int32_t*)ptr) = st->agc_enabled;
1067      break;
1068#ifndef DISABLE_FLOAT_API
1069   case SPEEX_PREPROCESS_SET_AGC_LEVEL:
1070      st->agc_level = (*(float*)ptr);
1071      if (st->agc_level<1)
1072         st->agc_level=1;
1073      if (st->agc_level>32768)
1074         st->agc_level=32768;
1075      break;
1076   case SPEEX_PREPROCESS_GET_AGC_LEVEL:
1077      (*(float*)ptr) = st->agc_level;
1078      break;
1079#endif /* #ifndef DISABLE_FLOAT_API */
1080   case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
1081      st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1082      break;
1083   case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
1084      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
1085      break;
1086   case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
1087      st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
1088      break;
1089   case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
1090      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
1091      break;
1092   case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
1093      st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
1094      break;
1095   case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
1096      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
1097      break;
1098#endif
1099   case SPEEX_PREPROCESS_SET_VAD:
1100      // Disabled by mlebeau, we don't want this in the launched Google Mobile App for iPhone.
1101      //speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
1102      st->vad_enabled = (*(spx_int32_t*)ptr);
1103      break;
1104   case SPEEX_PREPROCESS_GET_VAD:
1105      (*(spx_int32_t*)ptr) = st->vad_enabled;
1106      break;
1107
1108   case SPEEX_PREPROCESS_SET_DEREVERB:
1109      st->dereverb_enabled = (*(spx_int32_t*)ptr);
1110      for (i=0;i<st->ps_size;i++)
1111         st->reverb_estimate[i]=0;
1112      break;
1113   case SPEEX_PREPROCESS_GET_DEREVERB:
1114      (*(spx_int32_t*)ptr) = st->dereverb_enabled;
1115      break;
1116
1117   case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
1118      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1119      /*st->reverb_level = (*(float*)ptr);*/
1120      break;
1121   case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
1122      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1123      /*(*(float*)ptr) = st->reverb_level;*/
1124      break;
1125
1126   case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
1127      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1128      /*st->reverb_decay = (*(float*)ptr);*/
1129      break;
1130   case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
1131      /* FIXME: Re-enable when de-reverberation is actually enabled again */
1132      /*(*(float*)ptr) = st->reverb_decay;*/
1133      break;
1134
1135   case SPEEX_PREPROCESS_SET_PROB_START:
1136      *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1137      st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1138      break;
1139   case SPEEX_PREPROCESS_GET_PROB_START:
1140      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
1141      break;
1142
1143   case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
1144      *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
1145      st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
1146      break;
1147   case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
1148      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
1149      break;
1150
1151   case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
1152      st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
1153      break;
1154   case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
1155      (*(spx_int32_t*)ptr) = st->noise_suppress;
1156      break;
1157   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
1158      st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
1159      break;
1160   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
1161      (*(spx_int32_t*)ptr) = st->echo_suppress;
1162      break;
1163   case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
1164      st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
1165      break;
1166   case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
1167      (*(spx_int32_t*)ptr) = st->echo_suppress_active;
1168      break;
1169   case SPEEX_PREPROCESS_SET_ECHO_STATE:
1170      st->echo_state = (SpeexEchoState*)ptr;
1171      break;
1172   case SPEEX_PREPROCESS_GET_ECHO_STATE:
1173      (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state;
1174      break;
1175#ifndef FIXED_POINT
1176   case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
1177      (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
1178      break;
1179   case SPEEX_PREPROCESS_GET_AGC_GAIN:
1180      (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain));
1181      break;
1182#endif
1183   case SPEEX_PREPROCESS_GET_PSD_SIZE:
1184   case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE:
1185      (*(spx_int32_t*)ptr) = st->ps_size;
1186      break;
1187   case SPEEX_PREPROCESS_GET_PSD:
1188      for(i=0;i<st->ps_size;i++)
1189      	((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i];
1190      break;
1191   case SPEEX_PREPROCESS_GET_NOISE_PSD:
1192      for(i=0;i<st->ps_size;i++)
1193      	((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT);
1194      break;
1195   case SPEEX_PREPROCESS_GET_PROB:
1196      (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100);
1197      break;
1198#ifndef FIXED_POINT
1199   case SPEEX_PREPROCESS_SET_AGC_TARGET:
1200      st->agc_level = (*(spx_int32_t*)ptr);
1201      if (st->agc_level<1)
1202         st->agc_level=1;
1203      if (st->agc_level>32768)
1204         st->agc_level=32768;
1205      break;
1206   case SPEEX_PREPROCESS_GET_AGC_TARGET:
1207      (*(spx_int32_t*)ptr) = st->agc_level;
1208      break;
1209#endif
1210   default:
1211      speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
1212      return -1;
1213   }
1214   return 0;
1215}
1216
1217#ifdef FIXED_DEBUG
1218long long spx_mips=0;
1219#endif
1220
1221