1/* Copyright (c) 2009-2010 Xiph.Org Foundation
2   Written by Jean-Marc Valin */
3/*
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions
6   are met:
7
8   - Redistributions of source code must retain the above copyright
9   notice, this list of conditions and the following disclaimer.
10
11   - Redistributions in binary form must reproduce the above copyright
12   notice, this list of conditions and the following disclaimer in the
13   documentation and/or other materials provided with the distribution.
14
15   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31
32#include "celt_lpc.h"
33#include "stack_alloc.h"
34#include "mathops.h"
35#include "pitch.h"
36
37void _celt_lpc(
38      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40int          p
41)
42{
43   int i, j;
44   opus_val32 r;
45   opus_val32 error = ac[0];
46#ifdef FIXED_POINT
47   opus_val32 lpc[LPC_ORDER];
48#else
49   float *lpc = _lpc;
50#endif
51
52   OPUS_CLEAR(lpc, p);
53   if (ac[0] != 0)
54   {
55      for (i = 0; i < p; i++) {
56         /* Sum up this iteration's reflection coefficient */
57         opus_val32 rr = 0;
58         for (j = 0; j < i; j++)
59            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
60         rr += SHR32(ac[i + 1],3);
61         r = -frac_div32(SHL32(rr,3), error);
62         /*  Update LPC coefficients and total error */
63         lpc[i] = SHR32(r,3);
64         for (j = 0; j < (i+1)>>1; j++)
65         {
66            opus_val32 tmp1, tmp2;
67            tmp1 = lpc[j];
68            tmp2 = lpc[i-1-j];
69            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
70            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
71         }
72
73         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
74         /* Bail out once we get 30 dB gain */
75#ifdef FIXED_POINT
76         if (error<SHR32(ac[0],10))
77            break;
78#else
79         if (error<.001f*ac[0])
80            break;
81#endif
82      }
83   }
84#ifdef FIXED_POINT
85   for (i=0;i<p;i++)
86      _lpc[i] = ROUND16(lpc[i],16);
87#endif
88}
89
90
91void celt_fir_c(
92         const opus_val16 *_x,
93         const opus_val16 *num,
94         opus_val16 *_y,
95         int N,
96         int ord,
97         opus_val16 *mem,
98         int arch)
99{
100   int i,j;
101   VARDECL(opus_val16, rnum);
102   VARDECL(opus_val16, x);
103   SAVE_STACK;
104
105   ALLOC(rnum, ord, opus_val16);
106   ALLOC(x, N+ord, opus_val16);
107   for(i=0;i<ord;i++)
108      rnum[i] = num[ord-i-1];
109   for(i=0;i<ord;i++)
110      x[i] = mem[ord-i-1];
111   for (i=0;i<N;i++)
112      x[i+ord]=_x[i];
113   for(i=0;i<ord;i++)
114      mem[i] = _x[N-i-1];
115#ifdef SMALL_FOOTPRINT
116   (void)arch;
117   for (i=0;i<N;i++)
118   {
119      opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
120      for (j=0;j<ord;j++)
121      {
122         sum = MAC16_16(sum,rnum[j],x[i+j]);
123      }
124      _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
125   }
126#else
127   for (i=0;i<N-3;i+=4)
128   {
129      opus_val32 sum[4]={0,0,0,0};
130      xcorr_kernel(rnum, x+i, sum, ord, arch);
131      _y[i  ] = SATURATE16(ADD32(EXTEND32(_x[i  ]), PSHR32(sum[0], SIG_SHIFT)));
132      _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
133      _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
134      _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
135   }
136   for (;i<N;i++)
137   {
138      opus_val32 sum = 0;
139      for (j=0;j<ord;j++)
140         sum = MAC16_16(sum,rnum[j],x[i+j]);
141      _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
142   }
143#endif
144   RESTORE_STACK;
145}
146
147void celt_iir(const opus_val32 *_x,
148         const opus_val16 *den,
149         opus_val32 *_y,
150         int N,
151         int ord,
152         opus_val16 *mem,
153         int arch)
154{
155#ifdef SMALL_FOOTPRINT
156   int i,j;
157   (void)arch;
158   for (i=0;i<N;i++)
159   {
160      opus_val32 sum = _x[i];
161      for (j=0;j<ord;j++)
162      {
163         sum -= MULT16_16(den[j],mem[j]);
164      }
165      for (j=ord-1;j>=1;j--)
166      {
167         mem[j]=mem[j-1];
168      }
169      mem[0] = ROUND16(sum,SIG_SHIFT);
170      _y[i] = sum;
171   }
172#else
173   int i,j;
174   VARDECL(opus_val16, rden);
175   VARDECL(opus_val16, y);
176   SAVE_STACK;
177
178   celt_assert((ord&3)==0);
179   ALLOC(rden, ord, opus_val16);
180   ALLOC(y, N+ord, opus_val16);
181   for(i=0;i<ord;i++)
182      rden[i] = den[ord-i-1];
183   for(i=0;i<ord;i++)
184      y[i] = -mem[ord-i-1];
185   for(;i<N+ord;i++)
186      y[i]=0;
187   for (i=0;i<N-3;i+=4)
188   {
189      /* Unroll by 4 as if it were an FIR filter */
190      opus_val32 sum[4];
191      sum[0]=_x[i];
192      sum[1]=_x[i+1];
193      sum[2]=_x[i+2];
194      sum[3]=_x[i+3];
195      xcorr_kernel(rden, y+i, sum, ord, arch);
196
197      /* Patch up the result to compensate for the fact that this is an IIR */
198      y[i+ord  ] = -ROUND16(sum[0],SIG_SHIFT);
199      _y[i  ] = sum[0];
200      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
201      y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
202      _y[i+1] = sum[1];
203      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
204      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
205      y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
206      _y[i+2] = sum[2];
207
208      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
209      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
210      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
211      y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
212      _y[i+3] = sum[3];
213   }
214   for (;i<N;i++)
215   {
216      opus_val32 sum = _x[i];
217      for (j=0;j<ord;j++)
218         sum -= MULT16_16(rden[j],y[i+j]);
219      y[i+ord] = ROUND16(sum,SIG_SHIFT);
220      _y[i] = sum;
221   }
222   for(i=0;i<ord;i++)
223      mem[i] = _y[N-i-1];
224   RESTORE_STACK;
225#endif
226}
227
228int _celt_autocorr(
229                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
230                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
231                   const opus_val16       *window,
232                   int          overlap,
233                   int          lag,
234                   int          n,
235                   int          arch
236                  )
237{
238   opus_val32 d;
239   int i, k;
240   int fastN=n-lag;
241   int shift;
242   const opus_val16 *xptr;
243   VARDECL(opus_val16, xx);
244   SAVE_STACK;
245   ALLOC(xx, n, opus_val16);
246   celt_assert(n>0);
247   celt_assert(overlap>=0);
248   if (overlap == 0)
249   {
250      xptr = x;
251   } else {
252      for (i=0;i<n;i++)
253         xx[i] = x[i];
254      for (i=0;i<overlap;i++)
255      {
256         xx[i] = MULT16_16_Q15(x[i],window[i]);
257         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
258      }
259      xptr = xx;
260   }
261   shift=0;
262#ifdef FIXED_POINT
263   {
264      opus_val32 ac0;
265      ac0 = 1+(n<<7);
266      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
267      for(i=(n&1);i<n;i+=2)
268      {
269         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
270         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
271      }
272
273      shift = celt_ilog2(ac0)-30+10;
274      shift = (shift)/2;
275      if (shift>0)
276      {
277         for(i=0;i<n;i++)
278            xx[i] = PSHR32(xptr[i], shift);
279         xptr = xx;
280      } else
281         shift = 0;
282   }
283#endif
284   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
285   for (k=0;k<=lag;k++)
286   {
287      for (i = k+fastN, d = 0; i < n; i++)
288         d = MAC16_16(d, xptr[i], xptr[i-k]);
289      ac[k] += d;
290   }
291#ifdef FIXED_POINT
292   shift = 2*shift;
293   if (shift<=0)
294      ac[0] += SHL32((opus_int32)1, -shift);
295   if (ac[0] < 268435456)
296   {
297      int shift2 = 29 - EC_ILOG(ac[0]);
298      for (i=0;i<=lag;i++)
299         ac[i] = SHL32(ac[i], shift2);
300      shift -= shift2;
301   } else if (ac[0] >= 536870912)
302   {
303      int shift2=1;
304      if (ac[0] >= 1073741824)
305         shift2++;
306      for (i=0;i<=lag;i++)
307         ac[i] = SHR32(ac[i], shift2);
308      shift += shift2;
309   }
310#endif
311
312   RESTORE_STACK;
313   return shift;
314}
315