1/* Copyright (c) 2007-2008 CSIRO
2   Copyright (c) 2007-2009 Xiph.Org Foundation
3   Written by Jean-Marc Valin */
4/**
5   @file pitch.c
6   @brief Pitch analysis
7 */
8
9/*
10   Redistribution and use in source and binary forms, with or without
11   modification, are permitted provided that the following conditions
12   are met:
13
14   - Redistributions of source code must retain the above copyright
15   notice, this list of conditions and the following disclaimer.
16
17   - Redistributions in binary form must reproduce the above copyright
18   notice, this list of conditions and the following disclaimer in the
19   documentation and/or other materials provided with the distribution.
20
21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*/
33
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37
38#include "pitch.h"
39#include "os_support.h"
40#include "modes.h"
41#include "stack_alloc.h"
42#include "mathops.h"
43#include "celt_lpc.h"
44
45static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
46                            int max_pitch, int *best_pitch
47#ifdef FIXED_POINT
48                            , int yshift, opus_val32 maxcorr
49#endif
50                            )
51{
52   int i, j;
53   opus_val32 Syy=1;
54   opus_val16 best_num[2];
55   opus_val32 best_den[2];
56#ifdef FIXED_POINT
57   int xshift;
58
59   xshift = celt_ilog2(maxcorr)-14;
60#endif
61
62   best_num[0] = -1;
63   best_num[1] = -1;
64   best_den[0] = 0;
65   best_den[1] = 0;
66   best_pitch[0] = 0;
67   best_pitch[1] = 1;
68   for (j=0;j<len;j++)
69      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
70   for (i=0;i<max_pitch;i++)
71   {
72      if (xcorr[i]>0)
73      {
74         opus_val16 num;
75         opus_val32 xcorr16;
76         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
77#ifndef FIXED_POINT
78         /* Considering the range of xcorr16, this should avoid both underflows
79            and overflows (inf) when squaring xcorr16 */
80         xcorr16 *= 1e-12f;
81#endif
82         num = MULT16_16_Q15(xcorr16,xcorr16);
83         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
84         {
85            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
86            {
87               best_num[1] = best_num[0];
88               best_den[1] = best_den[0];
89               best_pitch[1] = best_pitch[0];
90               best_num[0] = num;
91               best_den[0] = Syy;
92               best_pitch[0] = i;
93            } else {
94               best_num[1] = num;
95               best_den[1] = Syy;
96               best_pitch[1] = i;
97            }
98         }
99      }
100      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
101      Syy = MAX32(1, Syy);
102   }
103}
104
105static void celt_fir5(const opus_val16 *x,
106         const opus_val16 *num,
107         opus_val16 *y,
108         int N,
109         opus_val16 *mem)
110{
111   int i;
112   opus_val16 num0, num1, num2, num3, num4;
113   opus_val32 mem0, mem1, mem2, mem3, mem4;
114   num0=num[0];
115   num1=num[1];
116   num2=num[2];
117   num3=num[3];
118   num4=num[4];
119   mem0=mem[0];
120   mem1=mem[1];
121   mem2=mem[2];
122   mem3=mem[3];
123   mem4=mem[4];
124   for (i=0;i<N;i++)
125   {
126      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
127      sum = MAC16_16(sum,num0,mem0);
128      sum = MAC16_16(sum,num1,mem1);
129      sum = MAC16_16(sum,num2,mem2);
130      sum = MAC16_16(sum,num3,mem3);
131      sum = MAC16_16(sum,num4,mem4);
132      mem4 = mem3;
133      mem3 = mem2;
134      mem2 = mem1;
135      mem1 = mem0;
136      mem0 = x[i];
137      y[i] = ROUND16(sum, SIG_SHIFT);
138   }
139   mem[0]=mem0;
140   mem[1]=mem1;
141   mem[2]=mem2;
142   mem[3]=mem3;
143   mem[4]=mem4;
144}
145
146
147void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
148      int len, int C, int arch)
149{
150   int i;
151   opus_val32 ac[5];
152   opus_val16 tmp=Q15ONE;
153   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
154   opus_val16 lpc2[5];
155   opus_val16 c1 = QCONST16(.8f,15);
156#ifdef FIXED_POINT
157   int shift;
158   opus_val32 maxabs = celt_maxabs32(x[0], len);
159   if (C==2)
160   {
161      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
162      maxabs = MAX32(maxabs, maxabs_1);
163   }
164   if (maxabs<1)
165      maxabs=1;
166   shift = celt_ilog2(maxabs)-10;
167   if (shift<0)
168      shift=0;
169   if (C==2)
170      shift++;
171#endif
172   for (i=1;i<len>>1;i++)
173      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
174   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
175   if (C==2)
176   {
177      for (i=1;i<len>>1;i++)
178         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
179      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
180   }
181
182   _celt_autocorr(x_lp, ac, NULL, 0,
183                  4, len>>1, arch);
184
185   /* Noise floor -40 dB */
186#ifdef FIXED_POINT
187   ac[0] += SHR32(ac[0],13);
188#else
189   ac[0] *= 1.0001f;
190#endif
191   /* Lag windowing */
192   for (i=1;i<=4;i++)
193   {
194      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
195#ifdef FIXED_POINT
196      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
197#else
198      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
199#endif
200   }
201
202   _celt_lpc(lpc, ac, 4);
203   for (i=0;i<4;i++)
204   {
205      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
206      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
207   }
208   /* Add a zero */
209   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
210   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
211   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
212   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
213   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
214   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
215}
216
217/* Pure C implementation. */
218#ifdef FIXED_POINT
219opus_val32
220#else
221void
222#endif
223#if defined(OVERRIDE_PITCH_XCORR)
224celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
225      opus_val32 *xcorr, int len, int max_pitch)
226#else
227celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
228      opus_val32 *xcorr, int len, int max_pitch, int arch)
229#endif
230{
231
232#if 0 /* This is a simple version of the pitch correlation that should work
233         well on DSPs like Blackfin and TI C5x/C6x */
234   int i, j;
235#ifdef FIXED_POINT
236   opus_val32 maxcorr=1;
237#endif
238#if !defined(OVERRIDE_PITCH_XCORR)
239   (void)arch;
240#endif
241   for (i=0;i<max_pitch;i++)
242   {
243      opus_val32 sum = 0;
244      for (j=0;j<len;j++)
245         sum = MAC16_16(sum, _x[j], _y[i+j]);
246      xcorr[i] = sum;
247#ifdef FIXED_POINT
248      maxcorr = MAX32(maxcorr, sum);
249#endif
250   }
251#ifdef FIXED_POINT
252   return maxcorr;
253#endif
254
255#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
256   int i;
257   /*The EDSP version requires that max_pitch is at least 1, and that _x is
258      32-bit aligned.
259     Since it's hard to put asserts in assembly, put them here.*/
260#ifdef FIXED_POINT
261   opus_val32 maxcorr=1;
262#endif
263   celt_assert(max_pitch>0);
264   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
265   for (i=0;i<max_pitch-3;i+=4)
266   {
267      opus_val32 sum[4]={0,0,0,0};
268#if defined(OVERRIDE_PITCH_XCORR)
269      xcorr_kernel_c(_x, _y+i, sum, len);
270#else
271      xcorr_kernel(_x, _y+i, sum, len, arch);
272#endif
273      xcorr[i]=sum[0];
274      xcorr[i+1]=sum[1];
275      xcorr[i+2]=sum[2];
276      xcorr[i+3]=sum[3];
277#ifdef FIXED_POINT
278      sum[0] = MAX32(sum[0], sum[1]);
279      sum[2] = MAX32(sum[2], sum[3]);
280      sum[0] = MAX32(sum[0], sum[2]);
281      maxcorr = MAX32(maxcorr, sum[0]);
282#endif
283   }
284   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
285   for (;i<max_pitch;i++)
286   {
287      opus_val32 sum;
288#if defined(OVERRIDE_PITCH_XCORR)
289      sum = celt_inner_prod_c(_x, _y+i, len);
290#else
291      sum = celt_inner_prod(_x, _y+i, len, arch);
292#endif
293      xcorr[i] = sum;
294#ifdef FIXED_POINT
295      maxcorr = MAX32(maxcorr, sum);
296#endif
297   }
298#ifdef FIXED_POINT
299   return maxcorr;
300#endif
301#endif
302}
303
304void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
305                  int len, int max_pitch, int *pitch, int arch)
306{
307   int i, j;
308   int lag;
309   int best_pitch[2]={0,0};
310   VARDECL(opus_val16, x_lp4);
311   VARDECL(opus_val16, y_lp4);
312   VARDECL(opus_val32, xcorr);
313#ifdef FIXED_POINT
314   opus_val32 maxcorr;
315   opus_val32 xmax, ymax;
316   int shift=0;
317#endif
318   int offset;
319
320   SAVE_STACK;
321
322   celt_assert(len>0);
323   celt_assert(max_pitch>0);
324   lag = len+max_pitch;
325
326   ALLOC(x_lp4, len>>2, opus_val16);
327   ALLOC(y_lp4, lag>>2, opus_val16);
328   ALLOC(xcorr, max_pitch>>1, opus_val32);
329
330   /* Downsample by 2 again */
331   for (j=0;j<len>>2;j++)
332      x_lp4[j] = x_lp[2*j];
333   for (j=0;j<lag>>2;j++)
334      y_lp4[j] = y[2*j];
335
336#ifdef FIXED_POINT
337   xmax = celt_maxabs16(x_lp4, len>>2);
338   ymax = celt_maxabs16(y_lp4, lag>>2);
339   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
340   if (shift>0)
341   {
342      for (j=0;j<len>>2;j++)
343         x_lp4[j] = SHR16(x_lp4[j], shift);
344      for (j=0;j<lag>>2;j++)
345         y_lp4[j] = SHR16(y_lp4[j], shift);
346      /* Use double the shift for a MAC */
347      shift *= 2;
348   } else {
349      shift = 0;
350   }
351#endif
352
353   /* Coarse search with 4x decimation */
354
355#ifdef FIXED_POINT
356   maxcorr =
357#endif
358   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
359
360   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
361#ifdef FIXED_POINT
362                   , 0, maxcorr
363#endif
364                   );
365
366   /* Finer search with 2x decimation */
367#ifdef FIXED_POINT
368   maxcorr=1;
369#endif
370   for (i=0;i<max_pitch>>1;i++)
371   {
372      opus_val32 sum;
373      xcorr[i] = 0;
374      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
375         continue;
376#ifdef FIXED_POINT
377      sum = 0;
378      for (j=0;j<len>>1;j++)
379         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
380#else
381      sum = celt_inner_prod_c(x_lp, y+i, len>>1);
382#endif
383      xcorr[i] = MAX32(-1, sum);
384#ifdef FIXED_POINT
385      maxcorr = MAX32(maxcorr, sum);
386#endif
387   }
388   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
389#ifdef FIXED_POINT
390                   , shift+1, maxcorr
391#endif
392                   );
393
394   /* Refine by pseudo-interpolation */
395   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
396   {
397      opus_val32 a, b, c;
398      a = xcorr[best_pitch[0]-1];
399      b = xcorr[best_pitch[0]];
400      c = xcorr[best_pitch[0]+1];
401      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
402         offset = 1;
403      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
404         offset = -1;
405      else
406         offset = 0;
407   } else {
408      offset = 0;
409   }
410   *pitch = 2*best_pitch[0]-offset;
411
412   RESTORE_STACK;
413}
414
415#ifdef FIXED_POINT
416static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
417{
418   opus_val32 x2y2;
419   int sx, sy, shift;
420   opus_val32 g;
421   opus_val16 den;
422   if (xy == 0 || xx == 0 || yy == 0)
423      return 0;
424   sx = celt_ilog2(xx)-14;
425   sy = celt_ilog2(yy)-14;
426   shift = sx + sy;
427   x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
428   if (shift & 1) {
429      if (x2y2 < 32768)
430      {
431         x2y2 <<= 1;
432         shift--;
433      } else {
434         x2y2 >>= 1;
435         shift++;
436      }
437   }
438   den = celt_rsqrt_norm(x2y2);
439   g = MULT16_32_Q15(den, xy);
440   g = VSHR32(g, (shift>>1)-1);
441   return EXTRACT16(MIN32(g, Q15ONE));
442}
443#else
444static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
445{
446   return xy/celt_sqrt(1+xx*yy);
447}
448#endif
449
450static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
451opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
452      int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
453{
454   int k, i, T, T0;
455   opus_val16 g, g0;
456   opus_val16 pg;
457   opus_val32 xy,xx,yy,xy2;
458   opus_val32 xcorr[3];
459   opus_val32 best_xy, best_yy;
460   int offset;
461   int minperiod0;
462   VARDECL(opus_val32, yy_lookup);
463   SAVE_STACK;
464
465   minperiod0 = minperiod;
466   maxperiod /= 2;
467   minperiod /= 2;
468   *T0_ /= 2;
469   prev_period /= 2;
470   N /= 2;
471   x += maxperiod;
472   if (*T0_>=maxperiod)
473      *T0_=maxperiod-1;
474
475   T = T0 = *T0_;
476   ALLOC(yy_lookup, maxperiod+1, opus_val32);
477   dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
478   yy_lookup[0] = xx;
479   yy=xx;
480   for (i=1;i<=maxperiod;i++)
481   {
482      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
483      yy_lookup[i] = MAX32(0, yy);
484   }
485   yy = yy_lookup[T0];
486   best_xy = xy;
487   best_yy = yy;
488   g = g0 = compute_pitch_gain(xy, xx, yy);
489   /* Look for any pitch at T/k */
490   for (k=2;k<=15;k++)
491   {
492      int T1, T1b;
493      opus_val16 g1;
494      opus_val16 cont=0;
495      opus_val16 thresh;
496      T1 = celt_udiv(2*T0+k, 2*k);
497      if (T1 < minperiod)
498         break;
499      /* Look for another strong correlation at T1b */
500      if (k==2)
501      {
502         if (T1+T0>maxperiod)
503            T1b = T0;
504         else
505            T1b = T0+T1;
506      } else
507      {
508         T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
509      }
510      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
511      xy = HALF32(xy + xy2);
512      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
513      g1 = compute_pitch_gain(xy, xx, yy);
514      if (abs(T1-prev_period)<=1)
515         cont = prev_gain;
516      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
517         cont = HALF16(prev_gain);
518      else
519         cont = 0;
520      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
521      /* Bias against very high pitch (very short period) to avoid false-positives
522         due to short-term correlation */
523      if (T1<3*minperiod)
524         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
525      else if (T1<2*minperiod)
526         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
527      if (g1 > thresh)
528      {
529         best_xy = xy;
530         best_yy = yy;
531         T = T1;
532         g = g1;
533      }
534   }
535   best_xy = MAX32(0, best_xy);
536   if (best_yy <= best_xy)
537      pg = Q15ONE;
538   else
539      pg = SHR32(frac_div32(best_xy,best_yy+1),16);
540
541   for (k=0;k<3;k++)
542      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
543   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
544      offset = 1;
545   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
546      offset = -1;
547   else
548      offset = 0;
549   if (pg > g)
550      pg = g;
551   *T0_ = 2*T+offset;
552
553   if (*T0_<minperiod0)
554      *T0_=minperiod0;
555   RESTORE_STACK;
556   return pg;
557}
558