1/* Copyright (C) 2002-2006 Jean-Marc Valin
2   File: filters.c
3   Various analysis/synthesis filters
4
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions
7   are met:
8
9   - Redistributions of source code must retain the above copyright
10   notice, this list of conditions and the following disclaimer.
11
12   - Redistributions in binary form must reproduce the above copyright
13   notice, this list of conditions and the following disclaimer in the
14   documentation and/or other materials provided with the distribution.
15
16   - Neither the name of the Xiph.org Foundation nor the names of its
17   contributors may be used to endorse or promote products derived from
18   this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
24   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include "filters.h"
38#include "stack_alloc.h"
39#include "arch.h"
40#include "math_approx.h"
41#include "ltp.h"
42#include <math.h>
43
44#ifdef _USE_SSE
45#include "filters_sse.h"
46#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
47#include "filters_arm4.h"
48#elif defined (BFIN_ASM)
49#include "filters_bfin.h"
50#endif
51
52
53
54void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
55{
56   int i;
57   spx_word16_t tmp=gamma;
58   for (i=0;i<order;i++)
59   {
60      lpc_out[i] = MULT16_16_P15(tmp,lpc_in[i]);
61      tmp = MULT16_16_P15(tmp, gamma);
62   }
63}
64
65void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len)
66{
67   int i;
68   for (i=0;i<len;i++)
69   {
70      /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */
71      if (!(vec[i]>=min_val && vec[i] <= max_val))
72      {
73         if (vec[i] < min_val)
74            vec[i] = min_val;
75         else if (vec[i] > max_val)
76            vec[i] = max_val;
77         else /* Has to be NaN */
78            vec[i] = 0;
79      }
80   }
81}
82
83void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem)
84{
85   int i;
86#ifdef FIXED_POINT
87   const spx_word16_t Pcoef[5][3] = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
88   const spx_word16_t Zcoef[5][3] = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
89#else
90   const spx_word16_t Pcoef[5][3] = {{1.00000f, -1.91120f, 0.91498f}, {1.00000f, -1.92683f, 0.93071f}, {1.00000f, -1.93338f, 0.93553f}, {1.00000f, -1.97226f, 0.97332f}, {1.00000f, -1.37000f, 0.39900f}};
91   const spx_word16_t Zcoef[5][3] = {{0.95654f, -1.91309f, 0.95654f}, {0.96446f, -1.92879f, 0.96446f}, {0.96723f, -1.93445f, 0.96723f}, {0.98645f, -1.97277f, 0.98645f}, {0.88000f, -1.76000f, 0.88000f}};
92#endif
93   const spx_word16_t *den, *num;
94   if (filtID>4)
95      filtID=4;
96
97   den = Pcoef[filtID]; num = Zcoef[filtID];
98   /*return;*/
99   for (i=0;i<len;i++)
100   {
101      spx_word16_t yi;
102      spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]);
103      yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767));
104      mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1));
105      mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1));
106      y[i] = yi;
107   }
108}
109
110#ifdef FIXED_POINT
111
112/* FIXME: These functions are ugly and probably introduce too much error */
113void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
114{
115   int i;
116   for (i=0;i<len;i++)
117   {
118      y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
119   }
120}
121
122void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
123{
124   int i;
125   if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
126   {
127      spx_word16_t scale_1;
128      scale = PSHR32(scale, SIG_SHIFT);
129      scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
130      for (i=0;i<len;i++)
131      {
132         y[i] = MULT16_16_P15(scale_1, x[i]);
133      }
134   } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
135      spx_word16_t scale_1;
136      scale = PSHR32(scale, SIG_SHIFT-5);
137      scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
138      for (i=0;i<len;i++)
139      {
140         y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
141      }
142   } else {
143      spx_word16_t scale_1;
144      scale = PSHR32(scale, SIG_SHIFT-7);
145      if (scale < 5)
146         scale = 5;
147      scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
148      for (i=0;i<len;i++)
149      {
150         y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
151      }
152   }
153}
154
155#else
156
157void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
158{
159   int i;
160   for (i=0;i<len;i++)
161      y[i] = scale*x[i];
162}
163
164void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
165{
166   int i;
167   float scale_1 = 1/scale;
168   for (i=0;i<len;i++)
169      y[i] = scale_1*x[i];
170}
171#endif
172
173
174
175#ifdef FIXED_POINT
176
177
178
179spx_word16_t compute_rms(const spx_sig_t *x, int len)
180{
181   int i;
182   spx_word32_t sum=0;
183   spx_sig_t max_val=1;
184   int sig_shift;
185
186   for (i=0;i<len;i++)
187   {
188      spx_sig_t tmp = x[i];
189      if (tmp<0)
190         tmp = -tmp;
191      if (tmp > max_val)
192         max_val = tmp;
193   }
194
195   sig_shift=0;
196   while (max_val>16383)
197   {
198      sig_shift++;
199      max_val >>= 1;
200   }
201
202   for (i=0;i<len;i+=4)
203   {
204      spx_word32_t sum2=0;
205      spx_word16_t tmp;
206      tmp = EXTRACT16(SHR32(x[i],sig_shift));
207      sum2 = MAC16_16(sum2,tmp,tmp);
208      tmp = EXTRACT16(SHR32(x[i+1],sig_shift));
209      sum2 = MAC16_16(sum2,tmp,tmp);
210      tmp = EXTRACT16(SHR32(x[i+2],sig_shift));
211      sum2 = MAC16_16(sum2,tmp,tmp);
212      tmp = EXTRACT16(SHR32(x[i+3],sig_shift));
213      sum2 = MAC16_16(sum2,tmp,tmp);
214      sum = ADD32(sum,SHR32(sum2,6));
215   }
216
217   return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
218}
219
220spx_word16_t compute_rms16(const spx_word16_t *x, int len)
221{
222   int i;
223   spx_word16_t max_val=10;
224
225   for (i=0;i<len;i++)
226   {
227      spx_sig_t tmp = x[i];
228      if (tmp<0)
229         tmp = -tmp;
230      if (tmp > max_val)
231         max_val = tmp;
232   }
233   if (max_val>16383)
234   {
235      spx_word32_t sum=0;
236      for (i=0;i<len;i+=4)
237      {
238         spx_word32_t sum2=0;
239         sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1));
240         sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1));
241         sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1));
242         sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1));
243         sum = ADD32(sum,SHR32(sum2,6));
244      }
245      return SHL16(spx_sqrt(DIV32(sum,len)),4);
246   } else {
247      spx_word32_t sum=0;
248      int sig_shift=0;
249      if (max_val < 8192)
250         sig_shift=1;
251      if (max_val < 4096)
252         sig_shift=2;
253      if (max_val < 2048)
254         sig_shift=3;
255      for (i=0;i<len;i+=4)
256      {
257         spx_word32_t sum2=0;
258         sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
259         sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
260         sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
261         sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
262         sum = ADD32(sum,SHR32(sum2,6));
263      }
264      return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
265   }
266}
267
268#ifndef OVERRIDE_NORMALIZE16
269int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
270{
271   int i;
272   spx_sig_t max_val=1;
273   int sig_shift;
274
275   for (i=0;i<len;i++)
276   {
277      spx_sig_t tmp = x[i];
278      if (tmp<0)
279         tmp = NEG32(tmp);
280      if (tmp >= max_val)
281         max_val = tmp;
282   }
283
284   sig_shift=0;
285   while (max_val>max_scale)
286   {
287      sig_shift++;
288      max_val >>= 1;
289   }
290
291   for (i=0;i<len;i++)
292      y[i] = EXTRACT16(SHR32(x[i], sig_shift));
293
294   return sig_shift;
295}
296#endif
297
298#else
299
300spx_word16_t compute_rms(const spx_sig_t *x, int len)
301{
302   int i;
303   float sum=0;
304   for (i=0;i<len;i++)
305   {
306      sum += x[i]*x[i];
307   }
308   return sqrt(.1+sum/len);
309}
310spx_word16_t compute_rms16(const spx_word16_t *x, int len)
311{
312   return compute_rms(x, len);
313}
314#endif
315
316
317
318#ifndef OVERRIDE_FILTER_MEM16
319void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
320{
321   int i,j;
322   spx_word16_t xi,yi,nyi;
323   for (i=0;i<N;i++)
324   {
325      xi= x[i];
326      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
327      nyi = NEG16(yi);
328      for (j=0;j<ord-1;j++)
329      {
330         mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
331      }
332      mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
333      y[i] = yi;
334   }
335}
336#endif
337
338#ifndef OVERRIDE_IIR_MEM16
339void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
340{
341   int i,j;
342   spx_word16_t yi,nyi;
343
344   for (i=0;i<N;i++)
345   {
346      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
347      nyi = NEG16(yi);
348      for (j=0;j<ord-1;j++)
349      {
350         mem[j] = MAC16_16(mem[j+1],den[j],nyi);
351      }
352      mem[ord-1] = MULT16_16(den[ord-1],nyi);
353      y[i] = yi;
354   }
355}
356#endif
357
358#ifndef OVERRIDE_FIR_MEM16
359void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
360{
361   int i,j;
362   spx_word16_t xi,yi;
363
364   for (i=0;i<N;i++)
365   {
366      xi=x[i];
367      yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
368      for (j=0;j<ord-1;j++)
369      {
370         mem[j] = MAC16_16(mem[j+1], num[j],xi);
371      }
372      mem[ord-1] = MULT16_16(num[ord-1],xi);
373      y[i] = yi;
374   }
375}
376#endif
377
378
379void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
380{
381   int i;
382   VARDECL(spx_mem_t *mem);
383   ALLOC(mem, ord, spx_mem_t);
384   for (i=0;i<ord;i++)
385      mem[i]=0;
386   iir_mem16(xx, ak, y, N, ord, mem, stack);
387   for (i=0;i<ord;i++)
388      mem[i]=0;
389   filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
390}
391void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
392{
393   int i;
394   VARDECL(spx_mem_t *mem);
395   ALLOC(mem, ord, spx_mem_t);
396   for (i=0;i<ord;i++)
397      mem[i]=0;
398   filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
399   for (i=0;i<ord;i++)
400      mem[i]=0;
401   fir_mem16(y, awk2, y, N, ord, mem, stack);
402}
403
404
405#ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE
406void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
407{
408   int i,j;
409   spx_word16_t y1, ny1i, ny2i;
410   VARDECL(spx_mem_t *mem1);
411   VARDECL(spx_mem_t *mem2);
412   ALLOC(mem1, ord, spx_mem_t);
413   ALLOC(mem2, ord, spx_mem_t);
414
415   y[0] = LPC_SCALING;
416   for (i=0;i<ord;i++)
417      y[i+1] = awk1[i];
418   i++;
419   for (;i<N;i++)
420      y[i] = VERY_SMALL;
421   for (i=0;i<ord;i++)
422      mem1[i] = mem2[i] = 0;
423   for (i=0;i<N;i++)
424   {
425      y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
426      ny1i = NEG16(y1);
427      y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
428      ny2i = NEG16(y[i]);
429      for (j=0;j<ord-1;j++)
430      {
431         mem1[j] = MAC16_16(mem1[j+1], awk2[j],ny1i);
432         mem2[j] = MAC16_16(mem2[j+1], ak[j],ny2i);
433      }
434      mem1[ord-1] = MULT16_16(awk2[ord-1],ny1i);
435      mem2[ord-1] = MULT16_16(ak[ord-1],ny2i);
436   }
437}
438#endif
439
440/* Decomposes a signal into low-band and high-band using a QMF */
441void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack)
442{
443   int i,j,k,M2;
444   VARDECL(spx_word16_t *a);
445   VARDECL(spx_word16_t *x);
446   spx_word16_t *x2;
447
448   ALLOC(a, M, spx_word16_t);
449   ALLOC(x, N+M-1, spx_word16_t);
450   x2=x+M-1;
451   M2=M>>1;
452   for (i=0;i<M;i++)
453      a[M-i-1]= aa[i];
454   for (i=0;i<M-1;i++)
455      x[i]=mem[M-i-2];
456   for (i=0;i<N;i++)
457      x[i+M-1]=SHR16(xx[i],1);
458   for (i=0;i<M-1;i++)
459      mem[i]=SHR16(xx[N-i-1],1);
460   for (i=0,k=0;i<N;i+=2,k++)
461   {
462      spx_word32_t y1k=0, y2k=0;
463      for (j=0;j<M2;j++)
464      {
465         y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
466         y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
467         j++;
468         y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
469         y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
470      }
471      y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767));
472      y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767));
473   }
474}
475
476/* Re-synthesised a signal from the QMF low-band and high-band signals */
477void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word16_t *mem1, spx_word16_t *mem2, char *stack)
478   /* assumptions:
479      all odd x[i] are zero -- well, actually they are left out of the array now
480      N and M are multiples of 4 */
481{
482   int i, j;
483   int M2, N2;
484   VARDECL(spx_word16_t *xx1);
485   VARDECL(spx_word16_t *xx2);
486
487   M2 = M>>1;
488   N2 = N>>1;
489   ALLOC(xx1, M2+N2, spx_word16_t);
490   ALLOC(xx2, M2+N2, spx_word16_t);
491
492   for (i = 0; i < N2; i++)
493      xx1[i] = x1[N2-1-i];
494   for (i = 0; i < M2; i++)
495      xx1[N2+i] = mem1[2*i+1];
496   for (i = 0; i < N2; i++)
497      xx2[i] = x2[N2-1-i];
498   for (i = 0; i < M2; i++)
499      xx2[N2+i] = mem2[2*i+1];
500
501   for (i = 0; i < N2; i += 2) {
502      spx_sig_t y0, y1, y2, y3;
503      spx_word16_t x10, x20;
504
505      y0 = y1 = y2 = y3 = 0;
506      x10 = xx1[N2-2-i];
507      x20 = xx2[N2-2-i];
508
509      for (j = 0; j < M2; j += 2) {
510         spx_word16_t x11, x21;
511         spx_word16_t a0, a1;
512
513         a0 = a[2*j];
514         a1 = a[2*j+1];
515         x11 = xx1[N2-1+j-i];
516         x21 = xx2[N2-1+j-i];
517
518#ifdef FIXED_POINT
519         /* We multiply twice by the same coef to avoid overflows */
520         y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21);
521         y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21);
522         y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20);
523         y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20);
524#else
525         y0 = ADD32(y0,MULT16_16(a0, x11-x21));
526         y1 = ADD32(y1,MULT16_16(a1, x11+x21));
527         y2 = ADD32(y2,MULT16_16(a0, x10-x20));
528         y3 = ADD32(y3,MULT16_16(a1, x10+x20));
529#endif
530         a0 = a[2*j+2];
531         a1 = a[2*j+3];
532         x10 = xx1[N2+j-i];
533         x20 = xx2[N2+j-i];
534
535#ifdef FIXED_POINT
536         /* We multiply twice by the same coef to avoid overflows */
537         y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20);
538         y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20);
539         y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21);
540         y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21);
541#else
542         y0 = ADD32(y0,MULT16_16(a0, x10-x20));
543         y1 = ADD32(y1,MULT16_16(a1, x10+x20));
544         y2 = ADD32(y2,MULT16_16(a0, x11-x21));
545         y3 = ADD32(y3,MULT16_16(a1, x11+x21));
546#endif
547      }
548#ifdef FIXED_POINT
549      y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
550      y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
551      y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
552      y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
553#else
554      /* Normalize up explicitly if we're in float */
555      y[2*i] = 2.f*y0;
556      y[2*i+1] = 2.f*y1;
557      y[2*i+2] = 2.f*y2;
558      y[2*i+3] = 2.f*y3;
559#endif
560   }
561
562   for (i = 0; i < M2; i++)
563      mem1[2*i+1] = xx1[i];
564   for (i = 0; i < M2; i++)
565      mem2[2*i+1] = xx2[i];
566}
567
568#ifdef FIXED_POINT
569#if 0
570const spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
571                                 {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
572                                 {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
573#else
574const spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
575                                {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
576                                 {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
577#endif
578#else
579#if 0
580const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
581                          {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
582                          {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
583#else
584const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f},
585                          {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f},
586                          {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}};
587#endif
588#endif
589
590int interp_pitch(
591spx_word16_t *exc,          /*decoded excitation*/
592spx_word16_t *interp,          /*decoded excitation*/
593int pitch,               /*pitch period*/
594int len
595)
596{
597   int i,j,k;
598   spx_word32_t corr[4][7];
599   spx_word32_t maxcorr;
600   int maxi, maxj;
601   for (i=0;i<7;i++)
602   {
603      corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
604   }
605   for (i=0;i<3;i++)
606   {
607      for (j=0;j<7;j++)
608      {
609         int i1, i2;
610         spx_word32_t tmp=0;
611         i1 = 3-j;
612         if (i1<0)
613            i1 = 0;
614         i2 = 10-j;
615         if (i2>7)
616            i2 = 7;
617         for (k=i1;k<i2;k++)
618            tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
619         corr[i+1][j] = tmp;
620      }
621   }
622   maxi=maxj=0;
623   maxcorr = corr[0][0];
624   for (i=0;i<4;i++)
625   {
626      for (j=0;j<7;j++)
627      {
628         if (corr[i][j] > maxcorr)
629         {
630            maxcorr = corr[i][j];
631            maxi=i;
632            maxj=j;
633         }
634      }
635   }
636   for (i=0;i<len;i++)
637   {
638      spx_word32_t tmp = 0;
639      if (maxi>0)
640      {
641         for (k=0;k<7;k++)
642         {
643            tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
644         }
645      } else {
646         tmp = SHL32(exc[i-(pitch-maxj+3)],15);
647      }
648      interp[i] = PSHR32(tmp,15);
649   }
650   return pitch-maxj+3;
651}
652
653void multicomb(
654spx_word16_t *exc,          /*decoded excitation*/
655spx_word16_t *new_exc,      /*enhanced excitation*/
656spx_coef_t *ak,           /*LPC filter coefs*/
657int p,               /*LPC order*/
658int nsf,             /*sub-frame size*/
659int pitch,           /*pitch period*/
660int max_pitch,
661spx_word16_t  comb_gain,    /*gain of comb filter*/
662char *stack
663)
664{
665   int i;
666   VARDECL(spx_word16_t *iexc);
667   spx_word16_t old_ener, new_ener;
668   int corr_pitch;
669
670   spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
671   spx_word32_t corr0, corr1;
672   spx_word16_t gain0, gain1;
673   spx_word16_t pgain1, pgain2;
674   spx_word16_t c1, c2;
675   spx_word16_t g1, g2;
676   spx_word16_t ngain;
677   spx_word16_t gg1, gg2;
678#ifdef FIXED_POINT
679   int scaledown=0;
680#endif
681#if 0 /* Set to 1 to enable full pitch search */
682   int nol_pitch[6];
683   spx_word16_t nol_pitch_coef[6];
684   spx_word16_t ol_pitch_coef;
685   open_loop_nbest_pitch(exc, 20, 120, nsf,
686                         nol_pitch, nol_pitch_coef, 6, stack);
687   corr_pitch=nol_pitch[0];
688   ol_pitch_coef = nol_pitch_coef[0];
689   /*Try to remove pitch multiples*/
690   for (i=1;i<6;i++)
691   {
692#ifdef FIXED_POINT
693      if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
694#else
695      if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
696#endif
697         (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
698         ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
699      {
700         corr_pitch = nol_pitch[i];
701      }
702   }
703#else
704   corr_pitch = pitch;
705#endif
706
707   ALLOC(iexc, 2*nsf, spx_word16_t);
708
709   interp_pitch(exc, iexc, corr_pitch, 80);
710   if (corr_pitch>max_pitch)
711      interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
712   else
713      interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
714
715#ifdef FIXED_POINT
716   for (i=0;i<nsf;i++)
717   {
718      if (ABS16(exc[i])>16383)
719      {
720         scaledown = 1;
721         break;
722      }
723   }
724   if (scaledown)
725   {
726      for (i=0;i<nsf;i++)
727         exc[i] = SHR16(exc[i],1);
728      for (i=0;i<2*nsf;i++)
729         iexc[i] = SHR16(iexc[i],1);
730   }
731#endif
732   /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
733
734   /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
735   iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
736   iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
737   exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
738   corr0  = inner_prod(iexc,exc,nsf);
739   if (corr0<0)
740      corr0=0;
741   corr1 = inner_prod(iexc+nsf,exc,nsf);
742   if (corr1<0)
743      corr1=0;
744#ifdef FIXED_POINT
745   /* Doesn't cost much to limit the ratio and it makes the rest easier */
746   if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
747      iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
748   if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
749      iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
750#endif
751   if (corr0 > MULT16_16(iexc0_mag,exc_mag))
752      pgain1 = QCONST16(1., 14);
753   else
754      pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
755   if (corr1 > MULT16_16(iexc1_mag,exc_mag))
756      pgain2 = QCONST16(1., 14);
757   else
758      pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
759   gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
760   gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
761   if (comb_gain>0)
762   {
763#ifdef FIXED_POINT
764      c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
765      c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
766#else
767      c1 = .4*comb_gain+.07;
768      c2 = .5+1.72*(c1-.07);
769#endif
770   } else
771   {
772      c1=c2=0;
773   }
774#ifdef FIXED_POINT
775   g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
776   g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
777#else
778   g1 = 1-c2*pgain1*pgain1;
779   g2 = 1-c2*pgain2*pgain2;
780#endif
781   if (g1<c1)
782      g1 = c1;
783   if (g2<c1)
784      g2 = c1;
785   g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
786   g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
787   if (corr_pitch>max_pitch)
788   {
789      gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
790      gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
791   } else {
792      gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
793      gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
794   }
795   for (i=0;i<nsf;i++)
796      new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
797   /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
798   new_ener = compute_rms16(new_exc, nsf);
799   old_ener = compute_rms16(exc, nsf);
800
801   if (old_ener < 1)
802      old_ener = 1;
803   if (new_ener < 1)
804      new_ener = 1;
805   if (old_ener > new_ener)
806      old_ener = new_ener;
807   ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
808
809   for (i=0;i<nsf;i++)
810      new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
811#ifdef FIXED_POINT
812   if (scaledown)
813   {
814      for (i=0;i<nsf;i++)
815         exc[i] = SHL16(exc[i],1);
816      for (i=0;i<nsf;i++)
817         new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1);
818   }
819#endif
820}
821
822