celt_lpc.c revision 2bd8b54017b5320bc0c1df9bf86f4cdc9f8db242
1/* Copyright (c) 2009-2010 Xiph.Org Foundation
2   Written by Jean-Marc Valin */
3/*
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7
8   - Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10
11   - Redistributions in binary form must reproduce the above copyright
12   notice, this list of conditions and the following disclaimer in the
13   documentation and/or other materials provided with the distribution.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "celt_lpc.h"
33#include "stack_alloc.h"
34#include "mathops.h"
35#include "pitch.h"
36
37void _celt_lpc(
38      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40int          p
41)
42{
43   int i, j;
44   opus_val32 r;
45   opus_val32 error = ac[0];
46#ifdef FIXED_POINT
47   opus_val32 lpc[LPC_ORDER];
48#else
49   float *lpc = _lpc;
50#endif
51
52   for (i = 0; i < p; i++)
53      lpc[i] = 0;
54   if (ac[0] != 0)
55   {
56      for (i = 0; i < p; i++) {
57         /* Sum up this iteration's reflection coefficient */
58         opus_val32 rr = 0;
59         for (j = 0; j < i; j++)
60            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
61         rr += SHR32(ac[i + 1],3);
62         r = -frac_div32(SHL32(rr,3), error);
63         /*  Update LPC coefficients and total error */
64         lpc[i] = SHR32(r,3);
65         for (j = 0; j < (i+1)>>1; j++)
66         {
67            opus_val32 tmp1, tmp2;
68            tmp1 = lpc[j];
69            tmp2 = lpc[i-1-j];
70            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
71            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
72         }
73
74         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
75         /* Bail out once we get 30 dB gain */
76#ifdef FIXED_POINT
77         if (error<SHR32(ac[0],10))
78            break;
79#else
80         if (error<.001f*ac[0])
81            break;
82#endif
83      }
84   }
85#ifdef FIXED_POINT
86   for (i=0;i<p;i++)
87      _lpc[i] = ROUND16(lpc[i],16);
88#endif
89}
90
91void celt_fir(const opus_val16 *_x,
92         const opus_val16 *num,
93         opus_val16 *_y,
94         int N,
95         int ord,
96         opus_val16 *mem)
97{
98   int i,j;
99   VARDECL(opus_val16, rnum);
100   VARDECL(opus_val16, x);
101   SAVE_STACK;
102
103   ALLOC(rnum, ord, opus_val16);
104   ALLOC(x, N+ord, opus_val16);
105   for(i=0;i<ord;i++)
106      rnum[i] = num[ord-i-1];
107   for(i=0;i<ord;i++)
108      x[i] = mem[ord-i-1];
109   for (i=0;i<N;i++)
110      x[i+ord]=_x[i];
111   for(i=0;i<ord;i++)
112      mem[i] = _x[N-i-1];
113#ifdef SMALL_FOOTPRINT
114   for (i=0;i<N;i++)
115   {
116      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
117      for (j=0;j<ord;j++)
118      {
119         sum = MAC16_16(sum,rnum[j],x[i+j]);
120      }
121      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
122   }
123#else
124   for (i=0;i<N-3;i+=4)
125   {
126      opus_val32 sum[4]={0,0,0,0};
127      xcorr_kernel(rnum, x+i, sum, ord);
128      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
129      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
130      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
131      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
132   }
133   for (;i<N;i++)
134   {
135      opus_val32 sum = 0;
136      for (j=0;j<ord;j++)
137         sum = MAC16_16(sum,rnum[j],x[i+j]);
138      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
139   }
140#endif
141   RESTORE_STACK;
142}
143
144void celt_iir(const opus_val32 *_x,
145         const opus_val16 *den,
146         opus_val32 *_y,
147         int N,
148         int ord,
149         opus_val16 *mem)
150{
151#ifdef SMALL_FOOTPRINT
152   int i,j;
153   for (i=0;i<N;i++)
154   {
155      opus_val32 sum = _x[i];
156      for (j=0;j<ord;j++)
157      {
158         sum -= MULT16_16(den[j],mem[j]);
159      }
160      for (j=ord-1;j>=1;j--)
161      {
162         mem[j]=mem[j-1];
163      }
164      mem[0] = ROUND16(sum,SIG_SHIFT);
165      _y[i] = sum;
166   }
167#else
168   int i,j;
169   VARDECL(opus_val16, rden);
170   VARDECL(opus_val16, y);
171   SAVE_STACK;
172
173   celt_assert((ord&3)==0);
174   ALLOC(rden, ord, opus_val16);
175   ALLOC(y, N+ord, opus_val16);
176   for(i=0;i<ord;i++)
177      rden[i] = den[ord-i-1];
178   for(i=0;i<ord;i++)
179      y[i] = -mem[ord-i-1];
180   for(;i<N+ord;i++)
181      y[i]=0;
182   for (i=0;i<N-3;i+=4)
183   {
184      /* Unroll by 4 as if it were an FIR filter */
185      opus_val32 sum[4];
186      sum[0]=_x[i];
187      sum[1]=_x[i+1];
188      sum[2]=_x[i+2];
189      sum[3]=_x[i+3];
190      xcorr_kernel(rden, y+i, sum, ord);
191
192      /* Patch up the result to compensate for the fact that this is an IIR */
193      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
194      _y[i  ] = sum[0];
195      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
196      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
197      _y[i+1] = sum[1];
198      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
199      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
200      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
201      _y[i+2] = sum[2];
202
203      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
204      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
205      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
206      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
207      _y[i+3] = sum[3];
208   }
209   for (;i<N;i++)
210   {
211      opus_val32 sum = _x[i];
212      for (j=0;j<ord;j++)
213         sum -= MULT16_16(rden[j],y[i+j]);
214      y[i+ord] = ROUND16(sum,SIG_SHIFT);
215      _y[i] = sum;
216   }
217   for(i=0;i<ord;i++)
218      mem[i] = _y[N-i-1];
219   RESTORE_STACK;
220#endif
221}
222
223int _celt_autocorr(
224                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
225                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
226                   const opus_val16       *window,
227                   int          overlap,
228                   int          lag,
229                   int          n,
230                   int          arch
231                  )
232{
233   opus_val32 d;
234   int i, k;
235   int fastN=n-lag;
236   int shift;
237   const opus_val16 *xptr;
238   VARDECL(opus_val16, xx);
239   SAVE_STACK;
240   ALLOC(xx, n, opus_val16);
241   celt_assert(n>0);
242   celt_assert(overlap>=0);
243   if (overlap == 0)
244   {
245      xptr = x;
246   } else {
247      for (i=0;i<n;i++)
248         xx[i] = x[i];
249      for (i=0;i<overlap;i++)
250      {
251         xx[i] = MULT16_16_Q15(x[i],window[i]);
252         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
253      }
254      xptr = xx;
255   }
256   shift=0;
257#ifdef FIXED_POINT
258   {
259      opus_val32 ac0;
260      ac0 = 1+(n<<7);
261      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
262      for(i=(n&1);i<n;i+=2)
263      {
264         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
265         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
266      }
267
268      shift = celt_ilog2(ac0)-30+10;
269      shift = (shift)/2;
270      if (shift>0)
271      {
272         for(i=0;i<n;i++)
273            xx[i] = PSHR32(xptr[i], shift);
274         xptr = xx;
275      } else
276         shift = 0;
277   }
278#endif
279   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
280   for (k=0;k<=lag;k++)
281   {
282      for (i = k+fastN, d = 0; i < n; i++)
283         d = MAC16_16(d, xptr[i], xptr[i-k]);
284      ac[k] += d;
285   }
286#ifdef FIXED_POINT
287   shift = 2*shift;
288   if (shift<=0)
289      ac[0] += SHL32((opus_int32)1, -shift);
290   if (ac[0] < 268435456)
291   {
292      int shift2 = 29 - EC_ILOG(ac[0]);
293      for (i=0;i<=lag;i++)
294         ac[i] = SHL32(ac[i], shift2);
295      shift -= shift2;
296   } else if (ac[0] >= 536870912)
297   {
298      int shift2=1;
299      if (ac[0] >= 1073741824)
300         shift2++;
301      for (i=0;i<=lag;i++)
302         ac[i] = SHR32(ac[i], shift2);
303      shift += shift2;
304   }
305#endif
306
307   RESTORE_STACK;
308   return shift;
309}
310