1/* Copyright (c) 2009-2010 Xiph.Org Foundation 2 Written by Jean-Marc Valin */ 3/* 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 - Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 - Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*/ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include "celt_lpc.h" 33#include "stack_alloc.h" 34#include "mathops.h" 35#include "pitch.h" 36 37void _celt_lpc( 38 opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ 39const opus_val32 *ac, /* in: [0...p] autocorrelation values */ 40int p 41) 42{ 43 int i, j; 44 opus_val32 r; 45 opus_val32 error = ac[0]; 46#ifdef FIXED_POINT 47 opus_val32 lpc[LPC_ORDER]; 48#else 49 float *lpc = _lpc; 50#endif 51 52 OPUS_CLEAR(lpc, p); 53 if (ac[0] != 0) 54 { 55 for (i = 0; i < p; i++) { 56 /* Sum up this iteration's reflection coefficient */ 57 opus_val32 rr = 0; 58 for (j = 0; j < i; j++) 59 rr += MULT32_32_Q31(lpc[j],ac[i - j]); 60 rr += SHR32(ac[i + 1],3); 61 r = -frac_div32(SHL32(rr,3), error); 62 /* Update LPC coefficients and total error */ 63 lpc[i] = SHR32(r,3); 64 for (j = 0; j < (i+1)>>1; j++) 65 { 66 opus_val32 tmp1, tmp2; 67 tmp1 = lpc[j]; 68 tmp2 = lpc[i-1-j]; 69 lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); 70 lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); 71 } 72 73 error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); 74 /* Bail out once we get 30 dB gain */ 75#ifdef FIXED_POINT 76 if (error<SHR32(ac[0],10)) 77 break; 78#else 79 if (error<.001f*ac[0]) 80 break; 81#endif 82 } 83 } 84#ifdef FIXED_POINT 85 for (i=0;i<p;i++) 86 _lpc[i] = ROUND16(lpc[i],16); 87#endif 88} 89 90 91void celt_fir_c( 92 const opus_val16 *_x, 93 const opus_val16 *num, 94 opus_val16 *_y, 95 int N, 96 int ord, 97 opus_val16 *mem, 98 int arch) 99{ 100 int i,j; 101 VARDECL(opus_val16, rnum); 102 VARDECL(opus_val16, x); 103 SAVE_STACK; 104 105 ALLOC(rnum, ord, opus_val16); 106 ALLOC(x, N+ord, opus_val16); 107 for(i=0;i<ord;i++) 108 rnum[i] = num[ord-i-1]; 109 for(i=0;i<ord;i++) 110 x[i] = mem[ord-i-1]; 111 for (i=0;i<N;i++) 112 x[i+ord]=_x[i]; 113 for(i=0;i<ord;i++) 114 mem[i] = _x[N-i-1]; 115#ifdef SMALL_FOOTPRINT 116 (void)arch; 117 for (i=0;i<N;i++) 118 { 119 opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); 120 for (j=0;j<ord;j++) 121 { 122 sum = MAC16_16(sum,rnum[j],x[i+j]); 123 } 124 _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); 125 } 126#else 127 for (i=0;i<N-3;i+=4) 128 { 129 opus_val32 sum[4]={0,0,0,0}; 130 xcorr_kernel(rnum, x+i, sum, ord, arch); 131 _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); 132 _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); 133 _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); 134 _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); 135 } 136 for (;i<N;i++) 137 { 138 opus_val32 sum = 0; 139 for (j=0;j<ord;j++) 140 sum = MAC16_16(sum,rnum[j],x[i+j]); 141 _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); 142 } 143#endif 144 RESTORE_STACK; 145} 146 147void celt_iir(const opus_val32 *_x, 148 const opus_val16 *den, 149 opus_val32 *_y, 150 int N, 151 int ord, 152 opus_val16 *mem, 153 int arch) 154{ 155#ifdef SMALL_FOOTPRINT 156 int i,j; 157 (void)arch; 158 for (i=0;i<N;i++) 159 { 160 opus_val32 sum = _x[i]; 161 for (j=0;j<ord;j++) 162 { 163 sum -= MULT16_16(den[j],mem[j]); 164 } 165 for (j=ord-1;j>=1;j--) 166 { 167 mem[j]=mem[j-1]; 168 } 169 mem[0] = ROUND16(sum,SIG_SHIFT); 170 _y[i] = sum; 171 } 172#else 173 int i,j; 174 VARDECL(opus_val16, rden); 175 VARDECL(opus_val16, y); 176 SAVE_STACK; 177 178 celt_assert((ord&3)==0); 179 ALLOC(rden, ord, opus_val16); 180 ALLOC(y, N+ord, opus_val16); 181 for(i=0;i<ord;i++) 182 rden[i] = den[ord-i-1]; 183 for(i=0;i<ord;i++) 184 y[i] = -mem[ord-i-1]; 185 for(;i<N+ord;i++) 186 y[i]=0; 187 for (i=0;i<N-3;i+=4) 188 { 189 /* Unroll by 4 as if it were an FIR filter */ 190 opus_val32 sum[4]; 191 sum[0]=_x[i]; 192 sum[1]=_x[i+1]; 193 sum[2]=_x[i+2]; 194 sum[3]=_x[i+3]; 195 xcorr_kernel(rden, y+i, sum, ord, arch); 196 197 /* Patch up the result to compensate for the fact that this is an IIR */ 198 y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); 199 _y[i ] = sum[0]; 200 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); 201 y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); 202 _y[i+1] = sum[1]; 203 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); 204 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); 205 y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); 206 _y[i+2] = sum[2]; 207 208 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); 209 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); 210 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); 211 y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); 212 _y[i+3] = sum[3]; 213 } 214 for (;i<N;i++) 215 { 216 opus_val32 sum = _x[i]; 217 for (j=0;j<ord;j++) 218 sum -= MULT16_16(rden[j],y[i+j]); 219 y[i+ord] = ROUND16(sum,SIG_SHIFT); 220 _y[i] = sum; 221 } 222 for(i=0;i<ord;i++) 223 mem[i] = _y[N-i-1]; 224 RESTORE_STACK; 225#endif 226} 227 228int _celt_autocorr( 229 const opus_val16 *x, /* in: [0...n-1] samples x */ 230 opus_val32 *ac, /* out: [0...lag-1] ac values */ 231 const opus_val16 *window, 232 int overlap, 233 int lag, 234 int n, 235 int arch 236 ) 237{ 238 opus_val32 d; 239 int i, k; 240 int fastN=n-lag; 241 int shift; 242 const opus_val16 *xptr; 243 VARDECL(opus_val16, xx); 244 SAVE_STACK; 245 ALLOC(xx, n, opus_val16); 246 celt_assert(n>0); 247 celt_assert(overlap>=0); 248 if (overlap == 0) 249 { 250 xptr = x; 251 } else { 252 for (i=0;i<n;i++) 253 xx[i] = x[i]; 254 for (i=0;i<overlap;i++) 255 { 256 xx[i] = MULT16_16_Q15(x[i],window[i]); 257 xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); 258 } 259 xptr = xx; 260 } 261 shift=0; 262#ifdef FIXED_POINT 263 { 264 opus_val32 ac0; 265 ac0 = 1+(n<<7); 266 if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9); 267 for(i=(n&1);i<n;i+=2) 268 { 269 ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9); 270 ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9); 271 } 272 273 shift = celt_ilog2(ac0)-30+10; 274 shift = (shift)/2; 275 if (shift>0) 276 { 277 for(i=0;i<n;i++) 278 xx[i] = PSHR32(xptr[i], shift); 279 xptr = xx; 280 } else 281 shift = 0; 282 } 283#endif 284 celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); 285 for (k=0;k<=lag;k++) 286 { 287 for (i = k+fastN, d = 0; i < n; i++) 288 d = MAC16_16(d, xptr[i], xptr[i-k]); 289 ac[k] += d; 290 } 291#ifdef FIXED_POINT 292 shift = 2*shift; 293 if (shift<=0) 294 ac[0] += SHL32((opus_int32)1, -shift); 295 if (ac[0] < 268435456) 296 { 297 int shift2 = 29 - EC_ILOG(ac[0]); 298 for (i=0;i<=lag;i++) 299 ac[i] = SHL32(ac[i], shift2); 300 shift -= shift2; 301 } else if (ac[0] >= 536870912) 302 { 303 int shift2=1; 304 if (ac[0] >= 1073741824) 305 shift2++; 306 for (i=0;i<=lag;i++) 307 ac[i] = SHR32(ac[i], shift2); 308 shift += shift2; 309 } 310#endif 311 312 RESTORE_STACK; 313 return shift; 314} 315