1/* Copyright (C) 2002-2006 Jean-Marc Valin
2   File: ltp.c
3   Long-Term Prediction functions
4
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions
7   are met:
8
9   - Redistributions of source code must retain the above copyright
10   notice, this list of conditions and the following disclaimer.
11
12   - Redistributions in binary form must reproduce the above copyright
13   notice, this list of conditions and the following disclaimer in the
14   documentation and/or other materials provided with the distribution.
15
16   - Neither the name of the Xiph.org Foundation nor the names of its
17   contributors may be used to endorse or promote products derived from
18   this software without specific prior written permission.
19
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
24   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31*/
32
33#ifdef HAVE_CONFIG_H
34#include "config.h"
35#endif
36
37#include <math.h>
38#include "ltp.h"
39#include "stack_alloc.h"
40#include "filters.h"
41#include <speex/speex_bits.h>
42#include "math_approx.h"
43#include "os_support.h"
44
45#ifndef NULL
46#define NULL 0
47#endif
48
49
50#ifdef _USE_SSE
51#include "ltp_sse.h"
52#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
53#include "ltp_arm4.h"
54#elif defined (BFIN_ASM)
55#include "ltp_bfin.h"
56#endif
57
58#ifndef OVERRIDE_INNER_PROD
59spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
60{
61   spx_word32_t sum=0;
62   len >>= 2;
63   while(len--)
64   {
65      spx_word32_t part=0;
66      part = MAC16_16(part,*x++,*y++);
67      part = MAC16_16(part,*x++,*y++);
68      part = MAC16_16(part,*x++,*y++);
69      part = MAC16_16(part,*x++,*y++);
70      /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
71      sum = ADD32(sum,SHR32(part,6));
72   }
73   return sum;
74}
75#endif
76
77#ifndef OVERRIDE_PITCH_XCORR
78#if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
79void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
80{
81   int i,j;
82   for (i=0;i<nb_pitch;i+=4)
83   {
84      /* Compute correlation*/
85      /*corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);*/
86      spx_word32_t sum1=0;
87      spx_word32_t sum2=0;
88      spx_word32_t sum3=0;
89      spx_word32_t sum4=0;
90      const spx_word16_t *y = _y+i;
91      const spx_word16_t *x = _x;
92      spx_word16_t y0, y1, y2, y3;
93      /*y0=y[0];y1=y[1];y2=y[2];y3=y[3];*/
94      y0=*y++;
95      y1=*y++;
96      y2=*y++;
97      y3=*y++;
98      for (j=0;j<len;j+=4)
99      {
100         spx_word32_t part1;
101         spx_word32_t part2;
102         spx_word32_t part3;
103         spx_word32_t part4;
104         part1 = MULT16_16(*x,y0);
105         part2 = MULT16_16(*x,y1);
106         part3 = MULT16_16(*x,y2);
107         part4 = MULT16_16(*x,y3);
108         x++;
109         y0=*y++;
110         part1 = MAC16_16(part1,*x,y1);
111         part2 = MAC16_16(part2,*x,y2);
112         part3 = MAC16_16(part3,*x,y3);
113         part4 = MAC16_16(part4,*x,y0);
114         x++;
115         y1=*y++;
116         part1 = MAC16_16(part1,*x,y2);
117         part2 = MAC16_16(part2,*x,y3);
118         part3 = MAC16_16(part3,*x,y0);
119         part4 = MAC16_16(part4,*x,y1);
120         x++;
121         y2=*y++;
122         part1 = MAC16_16(part1,*x,y3);
123         part2 = MAC16_16(part2,*x,y0);
124         part3 = MAC16_16(part3,*x,y1);
125         part4 = MAC16_16(part4,*x,y2);
126         x++;
127         y3=*y++;
128
129         sum1 = ADD32(sum1,SHR32(part1,6));
130         sum2 = ADD32(sum2,SHR32(part2,6));
131         sum3 = ADD32(sum3,SHR32(part3,6));
132         sum4 = ADD32(sum4,SHR32(part4,6));
133      }
134      corr[nb_pitch-1-i]=sum1;
135      corr[nb_pitch-2-i]=sum2;
136      corr[nb_pitch-3-i]=sum3;
137      corr[nb_pitch-4-i]=sum4;
138   }
139
140}
141#else
142void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
143{
144   int i;
145   for (i=0;i<nb_pitch;i++)
146   {
147      /* Compute correlation*/
148      corr[nb_pitch-1-i]=inner_prod(_x, _y+i, len);
149   }
150
151}
152#endif
153#endif
154
155#ifndef OVERRIDE_COMPUTE_PITCH_ERROR
156static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g, spx_word16_t pitch_control)
157{
158   spx_word32_t sum = 0;
159   sum = ADD32(sum,MULT16_16(MULT16_16_16(g[0],pitch_control),C[0]));
160   sum = ADD32(sum,MULT16_16(MULT16_16_16(g[1],pitch_control),C[1]));
161   sum = ADD32(sum,MULT16_16(MULT16_16_16(g[2],pitch_control),C[2]));
162   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[1]),C[3]));
163   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[1]),C[4]));
164   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[0]),C[5]));
165   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[0],g[0]),C[6]));
166   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[1],g[1]),C[7]));
167   sum = SUB32(sum,MULT16_16(MULT16_16_16(g[2],g[2]),C[8]));
168   return sum;
169}
170#endif
171
172#ifndef OVERRIDE_OPEN_LOOP_NBEST_PITCH
173void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
174{
175   int i,j,k;
176   VARDECL(spx_word32_t *best_score);
177   VARDECL(spx_word32_t *best_ener);
178   spx_word32_t e0;
179   VARDECL(spx_word32_t *corr);
180#ifdef FIXED_POINT
181   /* In fixed-point, we need only one (temporary) array of 32-bit values and two (corr16, ener16)
182      arrays for (normalized) 16-bit values */
183   VARDECL(spx_word16_t *corr16);
184   VARDECL(spx_word16_t *ener16);
185   spx_word32_t *energy;
186   int cshift=0, eshift=0;
187   int scaledown = 0;
188   ALLOC(corr16, end-start+1, spx_word16_t);
189   ALLOC(ener16, end-start+1, spx_word16_t);
190   ALLOC(corr, end-start+1, spx_word32_t);
191   energy = corr;
192#else
193   /* In floating-point, we need to float arrays and no normalized copies */
194   VARDECL(spx_word32_t *energy);
195   spx_word16_t *corr16;
196   spx_word16_t *ener16;
197   ALLOC(energy, end-start+2, spx_word32_t);
198   ALLOC(corr, end-start+1, spx_word32_t);
199   corr16 = corr;
200   ener16 = energy;
201#endif
202
203   ALLOC(best_score, N, spx_word32_t);
204   ALLOC(best_ener, N, spx_word32_t);
205   for (i=0;i<N;i++)
206   {
207        best_score[i]=-1;
208        best_ener[i]=0;
209        pitch[i]=start;
210   }
211
212#ifdef FIXED_POINT
213   for (i=-end;i<len;i++)
214   {
215      if (ABS16(sw[i])>16383)
216      {
217         scaledown=1;
218         break;
219      }
220   }
221   /* If the weighted input is close to saturation, then we scale it down */
222   if (scaledown)
223   {
224      for (i=-end;i<len;i++)
225      {
226         sw[i]=SHR16(sw[i],1);
227      }
228   }
229#endif
230   energy[0]=inner_prod(sw-start, sw-start, len);
231   e0=inner_prod(sw, sw, len);
232   for (i=start;i<end;i++)
233   {
234      /* Update energy for next pitch*/
235      energy[i-start+1] = SUB32(ADD32(energy[i-start],SHR32(MULT16_16(sw[-i-1],sw[-i-1]),6)), SHR32(MULT16_16(sw[-i+len-1],sw[-i+len-1]),6));
236      if (energy[i-start+1] < 0)
237         energy[i-start+1] = 0;
238   }
239
240#ifdef FIXED_POINT
241   eshift = normalize16(energy, ener16, 32766, end-start+1);
242#endif
243
244   /* In fixed-point, this actually overrites the energy array (aliased to corr) */
245   pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
246
247#ifdef FIXED_POINT
248   /* Normalize to 180 so we can square it and it still fits in 16 bits */
249   cshift = normalize16(corr, corr16, 180, end-start+1);
250   /* If we scaled weighted input down, we need to scale it up again (OK, so we've just lost the LSB, who cares?) */
251   if (scaledown)
252   {
253      for (i=-end;i<len;i++)
254      {
255         sw[i]=SHL16(sw[i],1);
256      }
257   }
258#endif
259
260   /* Search for the best pitch prediction gain */
261   for (i=start;i<=end;i++)
262   {
263      spx_word16_t tmp = MULT16_16_16(corr16[i-start],corr16[i-start]);
264      /* Instead of dividing the tmp by the energy, we multiply on the other side */
265      if (MULT16_16(tmp,best_ener[N-1])>MULT16_16(best_score[N-1],ADD16(1,ener16[i-start])))
266      {
267         /* We can safely put it last and then check */
268         best_score[N-1]=tmp;
269         best_ener[N-1]=ener16[i-start]+1;
270         pitch[N-1]=i;
271         /* Check if it comes in front of others */
272         for (j=0;j<N-1;j++)
273         {
274            if (MULT16_16(tmp,best_ener[j])>MULT16_16(best_score[j],ADD16(1,ener16[i-start])))
275            {
276               for (k=N-1;k>j;k--)
277               {
278                  best_score[k]=best_score[k-1];
279                  best_ener[k]=best_ener[k-1];
280                  pitch[k]=pitch[k-1];
281               }
282               best_score[j]=tmp;
283               best_ener[j]=ener16[i-start]+1;
284               pitch[j]=i;
285               break;
286            }
287         }
288      }
289   }
290
291   /* Compute open-loop gain if necessary */
292   if (gain)
293   {
294      for (j=0;j<N;j++)
295      {
296         spx_word16_t g;
297         i=pitch[j];
298         g = DIV32(SHL32(EXTEND32(corr16[i-start]),cshift), 10+SHR32(MULT16_16(spx_sqrt(e0),spx_sqrt(SHL32(EXTEND32(ener16[i-start]),eshift))),6));
299         /* FIXME: g = max(g,corr/energy) */
300         if (g<0)
301            g = 0;
302         gain[j]=g;
303      }
304   }
305
306
307}
308#endif
309
310#ifndef OVERRIDE_PITCH_GAIN_SEARCH_3TAP_VQ
311static int pitch_gain_search_3tap_vq(
312  const signed char *gain_cdbk,
313  int                gain_cdbk_size,
314  spx_word16_t      *C16,
315  spx_word16_t       max_gain
316)
317{
318  const signed char *ptr=gain_cdbk;
319  int                best_cdbk=0;
320  spx_word32_t       best_sum=-VERY_LARGE32;
321  spx_word32_t       sum=0;
322  spx_word16_t       g[3];
323  spx_word16_t       pitch_control=64;
324  spx_word16_t       gain_sum;
325  int                i;
326
327  for (i=0;i<gain_cdbk_size;i++) {
328
329    ptr = gain_cdbk+4*i;
330    g[0]=ADD16((spx_word16_t)ptr[0],32);
331    g[1]=ADD16((spx_word16_t)ptr[1],32);
332    g[2]=ADD16((spx_word16_t)ptr[2],32);
333    gain_sum = (spx_word16_t)ptr[3];
334
335    sum = compute_pitch_error(C16, g, pitch_control);
336
337    if (sum>best_sum && gain_sum<=max_gain) {
338      best_sum=sum;
339      best_cdbk=i;
340    }
341  }
342
343  return best_cdbk;
344}
345#endif
346
347/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
348static spx_word32_t pitch_gain_search_3tap(
349const spx_word16_t target[],       /* Target vector */
350const spx_coef_t ak[],          /* LPCs for this subframe */
351const spx_coef_t awk1[],        /* Weighted LPCs #1 for this subframe */
352const spx_coef_t awk2[],        /* Weighted LPCs #2 for this subframe */
353spx_sig_t exc[],                /* Excitation */
354const signed char *gain_cdbk,
355int gain_cdbk_size,
356int   pitch,                    /* Pitch value */
357int   p,                        /* Number of LPC coeffs */
358int   nsf,                      /* Number of samples in subframe */
359SpeexBits *bits,
360char *stack,
361const spx_word16_t *exc2,
362const spx_word16_t *r,
363spx_word16_t *new_target,
364int  *cdbk_index,
365int plc_tuning,
366spx_word32_t cumul_gain,
367int scaledown
368)
369{
370   int i,j;
371   VARDECL(spx_word16_t *tmp1);
372   VARDECL(spx_word16_t *e);
373   spx_word16_t *x[3];
374   spx_word32_t corr[3];
375   spx_word32_t A[3][3];
376   spx_word16_t gain[3];
377   spx_word32_t err;
378   spx_word16_t max_gain=128;
379   int          best_cdbk=0;
380
381   ALLOC(tmp1, 3*nsf, spx_word16_t);
382   ALLOC(e, nsf, spx_word16_t);
383
384   if (cumul_gain > 262144)
385      max_gain = 31;
386
387   x[0]=tmp1;
388   x[1]=tmp1+nsf;
389   x[2]=tmp1+2*nsf;
390
391   for (j=0;j<nsf;j++)
392      new_target[j] = target[j];
393
394   {
395      VARDECL(spx_mem_t *mm);
396      int pp=pitch-1;
397      ALLOC(mm, p, spx_mem_t);
398      for (j=0;j<nsf;j++)
399      {
400         if (j-pp<0)
401            e[j]=exc2[j-pp];
402         else if (j-pp-pitch<0)
403            e[j]=exc2[j-pp-pitch];
404         else
405            e[j]=0;
406      }
407#ifdef FIXED_POINT
408      /* Scale target and excitation down if needed (avoiding overflow) */
409      if (scaledown)
410      {
411         for (j=0;j<nsf;j++)
412            e[j] = SHR16(e[j],1);
413         for (j=0;j<nsf;j++)
414            new_target[j] = SHR16(new_target[j],1);
415      }
416#endif
417      for (j=0;j<p;j++)
418         mm[j] = 0;
419      iir_mem16(e, ak, e, nsf, p, mm, stack);
420      for (j=0;j<p;j++)
421         mm[j] = 0;
422      filter_mem16(e, awk1, awk2, e, nsf, p, mm, stack);
423      for (j=0;j<nsf;j++)
424         x[2][j] = e[j];
425   }
426   for (i=1;i>=0;i--)
427   {
428      spx_word16_t e0=exc2[-pitch-1+i];
429#ifdef FIXED_POINT
430      /* Scale excitation down if needed (avoiding overflow) */
431      if (scaledown)
432         e0 = SHR16(e0,1);
433#endif
434      x[i][0]=MULT16_16_Q14(r[0], e0);
435      for (j=0;j<nsf-1;j++)
436         x[i][j+1]=ADD32(x[i+1][j],MULT16_16_P14(r[j+1], e0));
437   }
438
439   for (i=0;i<3;i++)
440      corr[i]=inner_prod(x[i],new_target,nsf);
441   for (i=0;i<3;i++)
442      for (j=0;j<=i;j++)
443         A[i][j]=A[j][i]=inner_prod(x[i],x[j],nsf);
444
445   {
446      spx_word32_t C[9];
447#ifdef FIXED_POINT
448      spx_word16_t C16[9];
449#else
450      spx_word16_t *C16=C;
451#endif
452      C[0]=corr[2];
453      C[1]=corr[1];
454      C[2]=corr[0];
455      C[3]=A[1][2];
456      C[4]=A[0][1];
457      C[5]=A[0][2];
458      C[6]=A[2][2];
459      C[7]=A[1][1];
460      C[8]=A[0][0];
461
462      /*plc_tuning *= 2;*/
463      if (plc_tuning<2)
464         plc_tuning=2;
465      if (plc_tuning>30)
466         plc_tuning=30;
467#ifdef FIXED_POINT
468      C[0] = SHL32(C[0],1);
469      C[1] = SHL32(C[1],1);
470      C[2] = SHL32(C[2],1);
471      C[3] = SHL32(C[3],1);
472      C[4] = SHL32(C[4],1);
473      C[5] = SHL32(C[5],1);
474      C[6] = MAC16_32_Q15(C[6],MULT16_16_16(plc_tuning,655),C[6]);
475      C[7] = MAC16_32_Q15(C[7],MULT16_16_16(plc_tuning,655),C[7]);
476      C[8] = MAC16_32_Q15(C[8],MULT16_16_16(plc_tuning,655),C[8]);
477      normalize16(C, C16, 32767, 9);
478#else
479      C[6]*=.5*(1+.02*plc_tuning);
480      C[7]*=.5*(1+.02*plc_tuning);
481      C[8]*=.5*(1+.02*plc_tuning);
482#endif
483
484      best_cdbk = pitch_gain_search_3tap_vq(gain_cdbk, gain_cdbk_size, C16, max_gain);
485
486#ifdef FIXED_POINT
487      gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4]);
488      gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+1]);
489      gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[best_cdbk*4+2]);
490      /*printf ("%d %d %d %d\n",gain[0],gain[1],gain[2], best_cdbk);*/
491#else
492      gain[0] = 0.015625*gain_cdbk[best_cdbk*4]  + .5;
493      gain[1] = 0.015625*gain_cdbk[best_cdbk*4+1]+ .5;
494      gain[2] = 0.015625*gain_cdbk[best_cdbk*4+2]+ .5;
495#endif
496      *cdbk_index=best_cdbk;
497   }
498
499   SPEEX_MEMSET(exc, 0, nsf);
500   for (i=0;i<3;i++)
501   {
502      int j;
503      int tmp1, tmp3;
504      int pp=pitch+1-i;
505      tmp1=nsf;
506      if (tmp1>pp)
507         tmp1=pp;
508      for (j=0;j<tmp1;j++)
509         exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp]);
510      tmp3=nsf;
511      if (tmp3>pp+pitch)
512         tmp3=pp+pitch;
513      for (j=tmp1;j<tmp3;j++)
514         exc[j]=MAC16_16(exc[j],SHL16(gain[2-i],7),exc2[j-pp-pitch]);
515   }
516   for (i=0;i<nsf;i++)
517   {
518      spx_word32_t tmp = ADD32(ADD32(MULT16_16(gain[0],x[2][i]),MULT16_16(gain[1],x[1][i])),
519                            MULT16_16(gain[2],x[0][i]));
520      new_target[i] = SUB16(new_target[i], EXTRACT16(PSHR32(tmp,6)));
521   }
522   err = inner_prod(new_target, new_target, nsf);
523
524   return err;
525}
526
527/** Finds the best quantized 3-tap pitch predictor by analysis by synthesis */
528int pitch_search_3tap(
529spx_word16_t target[],                 /* Target vector */
530spx_word16_t *sw,
531spx_coef_t ak[],                     /* LPCs for this subframe */
532spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
533spx_coef_t awk2[],                   /* Weighted LPCs #2 for this subframe */
534spx_sig_t exc[],                    /* Excitation */
535const void *par,
536int   start,                    /* Smallest pitch value allowed */
537int   end,                      /* Largest pitch value allowed */
538spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
539int   p,                        /* Number of LPC coeffs */
540int   nsf,                      /* Number of samples in subframe */
541SpeexBits *bits,
542char *stack,
543spx_word16_t *exc2,
544spx_word16_t *r,
545int complexity,
546int cdbk_offset,
547int plc_tuning,
548spx_word32_t *cumul_gain
549)
550{
551   int i;
552   int cdbk_index, pitch=0, best_gain_index=0;
553   VARDECL(spx_sig_t *best_exc);
554   VARDECL(spx_word16_t *new_target);
555   VARDECL(spx_word16_t *best_target);
556   int best_pitch=0;
557   spx_word32_t err, best_err=-1;
558   int N;
559   const ltp_params *params;
560   const signed char *gain_cdbk;
561   int   gain_cdbk_size;
562   int scaledown=0;
563
564   VARDECL(int *nbest);
565
566   params = (const ltp_params*) par;
567   gain_cdbk_size = 1<<params->gain_bits;
568   gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
569
570   N=complexity;
571   if (N>10)
572      N=10;
573   if (N<1)
574      N=1;
575
576   ALLOC(nbest, N, int);
577   params = (const ltp_params*) par;
578
579   if (end<start)
580   {
581      speex_bits_pack(bits, 0, params->pitch_bits);
582      speex_bits_pack(bits, 0, params->gain_bits);
583      SPEEX_MEMSET(exc, 0, nsf);
584      return start;
585   }
586
587#ifdef FIXED_POINT
588   /* Check if we need to scale everything down in the pitch search to avoid overflows */
589   for (i=0;i<nsf;i++)
590   {
591      if (ABS16(target[i])>16383)
592      {
593         scaledown=1;
594         break;
595      }
596   }
597   for (i=-end;i<nsf;i++)
598   {
599      if (ABS16(exc2[i])>16383)
600      {
601         scaledown=1;
602         break;
603      }
604   }
605#endif
606   if (N>end-start+1)
607      N=end-start+1;
608   if (end != start)
609      open_loop_nbest_pitch(sw, start, end, nsf, nbest, NULL, N, stack);
610   else
611      nbest[0] = start;
612
613   ALLOC(best_exc, nsf, spx_sig_t);
614   ALLOC(new_target, nsf, spx_word16_t);
615   ALLOC(best_target, nsf, spx_word16_t);
616
617   for (i=0;i<N;i++)
618   {
619      pitch=nbest[i];
620      SPEEX_MEMSET(exc, 0, nsf);
621      err=pitch_gain_search_3tap(target, ak, awk1, awk2, exc, gain_cdbk, gain_cdbk_size, pitch, p, nsf,
622                                 bits, stack, exc2, r, new_target, &cdbk_index, plc_tuning, *cumul_gain, scaledown);
623      if (err<best_err || best_err<0)
624      {
625         SPEEX_COPY(best_exc, exc, nsf);
626         SPEEX_COPY(best_target, new_target, nsf);
627         best_err=err;
628         best_pitch=pitch;
629         best_gain_index=cdbk_index;
630      }
631   }
632   /*printf ("pitch: %d %d\n", best_pitch, best_gain_index);*/
633   speex_bits_pack(bits, best_pitch-start, params->pitch_bits);
634   speex_bits_pack(bits, best_gain_index, params->gain_bits);
635#ifdef FIXED_POINT
636   *cumul_gain = MULT16_32_Q13(SHL16(params->gain_cdbk[4*best_gain_index+3],8), MAX32(1024,*cumul_gain));
637#else
638   *cumul_gain = 0.03125*MAX32(1024,*cumul_gain)*params->gain_cdbk[4*best_gain_index+3];
639#endif
640   /*printf ("%f\n", cumul_gain);*/
641   /*printf ("encode pitch: %d %d\n", best_pitch, best_gain_index);*/
642   SPEEX_COPY(exc, best_exc, nsf);
643   SPEEX_COPY(target, best_target, nsf);
644#ifdef FIXED_POINT
645   /* Scale target back up if needed */
646   if (scaledown)
647   {
648      for (i=0;i<nsf;i++)
649         target[i]=SHL16(target[i],1);
650   }
651#endif
652   return pitch;
653}
654
655void pitch_unquant_3tap(
656spx_word16_t exc[],             /* Input excitation */
657spx_word32_t exc_out[],         /* Output excitation */
658int   start,                    /* Smallest pitch value allowed */
659int   end,                      /* Largest pitch value allowed */
660spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
661const void *par,
662int   nsf,                      /* Number of samples in subframe */
663int *pitch_val,
664spx_word16_t *gain_val,
665SpeexBits *bits,
666char *stack,
667int count_lost,
668int subframe_offset,
669spx_word16_t last_pitch_gain,
670int cdbk_offset
671)
672{
673   int i;
674   int pitch;
675   int gain_index;
676   spx_word16_t gain[3];
677   const signed char *gain_cdbk;
678   int gain_cdbk_size;
679   const ltp_params *params;
680
681   params = (const ltp_params*) par;
682   gain_cdbk_size = 1<<params->gain_bits;
683   gain_cdbk = params->gain_cdbk + 4*gain_cdbk_size*cdbk_offset;
684
685   pitch = speex_bits_unpack_unsigned(bits, params->pitch_bits);
686   pitch += start;
687   gain_index = speex_bits_unpack_unsigned(bits, params->gain_bits);
688   /*printf ("decode pitch: %d %d\n", pitch, gain_index);*/
689#ifdef FIXED_POINT
690   gain[0] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4]);
691   gain[1] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+1]);
692   gain[2] = ADD16(32,(spx_word16_t)gain_cdbk[gain_index*4+2]);
693#else
694   gain[0] = 0.015625*gain_cdbk[gain_index*4]+.5;
695   gain[1] = 0.015625*gain_cdbk[gain_index*4+1]+.5;
696   gain[2] = 0.015625*gain_cdbk[gain_index*4+2]+.5;
697#endif
698
699   if (count_lost && pitch > subframe_offset)
700   {
701      spx_word16_t gain_sum;
702      if (1) {
703#ifdef FIXED_POINT
704         spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : SHR16(last_pitch_gain,1);
705         if (tmp>62)
706            tmp=62;
707#else
708         spx_word16_t tmp = count_lost < 4 ? last_pitch_gain : 0.5 * last_pitch_gain;
709         if (tmp>.95)
710            tmp=.95;
711#endif
712         gain_sum = gain_3tap_to_1tap(gain);
713
714         if (gain_sum > tmp)
715         {
716            spx_word16_t fact = DIV32_16(SHL32(EXTEND32(tmp),14),gain_sum);
717            for (i=0;i<3;i++)
718               gain[i]=MULT16_16_Q14(fact,gain[i]);
719         }
720
721      }
722
723   }
724
725   *pitch_val = pitch;
726   gain_val[0]=gain[0];
727   gain_val[1]=gain[1];
728   gain_val[2]=gain[2];
729   gain[0] = SHL16(gain[0],7);
730   gain[1] = SHL16(gain[1],7);
731   gain[2] = SHL16(gain[2],7);
732   SPEEX_MEMSET(exc_out, 0, nsf);
733   for (i=0;i<3;i++)
734   {
735      int j;
736      int tmp1, tmp3;
737      int pp=pitch+1-i;
738      tmp1=nsf;
739      if (tmp1>pp)
740         tmp1=pp;
741      for (j=0;j<tmp1;j++)
742         exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp]);
743      tmp3=nsf;
744      if (tmp3>pp+pitch)
745         tmp3=pp+pitch;
746      for (j=tmp1;j<tmp3;j++)
747         exc_out[j]=MAC16_16(exc_out[j],gain[2-i],exc[j-pp-pitch]);
748   }
749   /*for (i=0;i<nsf;i++)
750   exc[i]=PSHR32(exc32[i],13);*/
751}
752
753
754/** Forced pitch delay and gain */
755int forced_pitch_quant(
756spx_word16_t target[],                 /* Target vector */
757spx_word16_t *sw,
758spx_coef_t ak[],                     /* LPCs for this subframe */
759spx_coef_t awk1[],                   /* Weighted LPCs #1 for this subframe */
760spx_coef_t awk2[],                   /* Weighted LPCs #2 for this subframe */
761spx_sig_t exc[],                    /* Excitation */
762const void *par,
763int   start,                    /* Smallest pitch value allowed */
764int   end,                      /* Largest pitch value allowed */
765spx_word16_t pitch_coef,               /* Voicing (pitch) coefficient */
766int   p,                        /* Number of LPC coeffs */
767int   nsf,                      /* Number of samples in subframe */
768SpeexBits *bits,
769char *stack,
770spx_word16_t *exc2,
771spx_word16_t *r,
772int complexity,
773int cdbk_offset,
774int plc_tuning,
775spx_word32_t *cumul_gain
776)
777{
778   int i;
779   VARDECL(spx_word16_t *res);
780   ALLOC(res, nsf, spx_word16_t);
781#ifdef FIXED_POINT
782   if (pitch_coef>63)
783      pitch_coef=63;
784#else
785   if (pitch_coef>.99)
786      pitch_coef=.99;
787#endif
788   for (i=0;i<nsf&&i<start;i++)
789   {
790      exc[i]=MULT16_16(SHL16(pitch_coef, 7),exc2[i-start]);
791   }
792   for (;i<nsf;i++)
793   {
794      exc[i]=MULT16_32_Q15(SHL16(pitch_coef, 9),exc[i-start]);
795   }
796   for (i=0;i<nsf;i++)
797      res[i] = EXTRACT16(PSHR32(exc[i], SIG_SHIFT-1));
798   syn_percep_zero16(res, ak, awk1, awk2, res, nsf, p, stack);
799   for (i=0;i<nsf;i++)
800      target[i]=EXTRACT16(SATURATE(SUB32(EXTEND32(target[i]),EXTEND32(res[i])),32700));
801   return start;
802}
803
804/** Unquantize forced pitch delay and gain */
805void forced_pitch_unquant(
806spx_word16_t exc[],             /* Input excitation */
807spx_word32_t exc_out[],         /* Output excitation */
808int   start,                    /* Smallest pitch value allowed */
809int   end,                      /* Largest pitch value allowed */
810spx_word16_t pitch_coef,        /* Voicing (pitch) coefficient */
811const void *par,
812int   nsf,                      /* Number of samples in subframe */
813int *pitch_val,
814spx_word16_t *gain_val,
815SpeexBits *bits,
816char *stack,
817int count_lost,
818int subframe_offset,
819spx_word16_t last_pitch_gain,
820int cdbk_offset
821)
822{
823   int i;
824#ifdef FIXED_POINT
825   if (pitch_coef>63)
826      pitch_coef=63;
827#else
828   if (pitch_coef>.99)
829      pitch_coef=.99;
830#endif
831   for (i=0;i<nsf;i++)
832   {
833      exc_out[i]=MULT16_16(exc[i-start],SHL16(pitch_coef,7));
834      exc[i] = EXTRACT16(PSHR32(exc_out[i],13));
835   }
836   *pitch_val = start;
837   gain_val[0]=gain_val[2]=0;
838   gain_val[1] = pitch_coef;
839}
840