1/* Copyright (c) 2007-2008 CSIRO
2   Copyright (c) 2007-2009 Xiph.Org Foundation
3   Written by Jean-Marc Valin */
4/**
5   @file pitch.c
6   @brief Pitch analysis
7 */
8
9/*
10   Redistribution and use in source and binary forms, with or without
11   modification, are permitted provided that the following conditions
12   are met:
13
14   - Redistributions of source code must retain the above copyright
15   notice, this list of conditions and the following disclaimer.
16
17   - Redistributions in binary form must reproduce the above copyright
18   notice, this list of conditions and the following disclaimer in the
19   documentation and/or other materials provided with the distribution.
20
21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*/
33
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37
38#include "pitch.h"
39#include "os_support.h"
40#include "modes.h"
41#include "stack_alloc.h"
42#include "mathops.h"
43#include "celt_lpc.h"
44
45static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
46                            int max_pitch, int *best_pitch
47#ifdef FIXED_POINT
48                            , int yshift, opus_val32 maxcorr
49#endif
50                            )
51{
52   int i, j;
53   opus_val32 Syy=1;
54   opus_val16 best_num[2];
55   opus_val32 best_den[2];
56#ifdef FIXED_POINT
57   int xshift;
58
59   xshift = celt_ilog2(maxcorr)-14;
60#endif
61
62   best_num[0] = -1;
63   best_num[1] = -1;
64   best_den[0] = 0;
65   best_den[1] = 0;
66   best_pitch[0] = 0;
67   best_pitch[1] = 1;
68   for (j=0;j<len;j++)
69      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
70   for (i=0;i<max_pitch;i++)
71   {
72      if (xcorr[i]>0)
73      {
74         opus_val16 num;
75         opus_val32 xcorr16;
76         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
77#ifndef FIXED_POINT
78         /* Considering the range of xcorr16, this should avoid both underflows
79            and overflows (inf) when squaring xcorr16 */
80         xcorr16 *= 1e-12f;
81#endif
82         num = MULT16_16_Q15(xcorr16,xcorr16);
83         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
84         {
85            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
86            {
87               best_num[1] = best_num[0];
88               best_den[1] = best_den[0];
89               best_pitch[1] = best_pitch[0];
90               best_num[0] = num;
91               best_den[0] = Syy;
92               best_pitch[0] = i;
93            } else {
94               best_num[1] = num;
95               best_den[1] = Syy;
96               best_pitch[1] = i;
97            }
98         }
99      }
100      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
101      Syy = MAX32(1, Syy);
102   }
103}
104
105static void celt_fir5(const opus_val16 *x,
106         const opus_val16 *num,
107         opus_val16 *y,
108         int N,
109         opus_val16 *mem)
110{
111   int i;
112   opus_val16 num0, num1, num2, num3, num4;
113   opus_val32 mem0, mem1, mem2, mem3, mem4;
114   num0=num[0];
115   num1=num[1];
116   num2=num[2];
117   num3=num[3];
118   num4=num[4];
119   mem0=mem[0];
120   mem1=mem[1];
121   mem2=mem[2];
122   mem3=mem[3];
123   mem4=mem[4];
124   for (i=0;i<N;i++)
125   {
126      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
127      sum = MAC16_16(sum,num0,mem0);
128      sum = MAC16_16(sum,num1,mem1);
129      sum = MAC16_16(sum,num2,mem2);
130      sum = MAC16_16(sum,num3,mem3);
131      sum = MAC16_16(sum,num4,mem4);
132      mem4 = mem3;
133      mem3 = mem2;
134      mem2 = mem1;
135      mem1 = mem0;
136      mem0 = x[i];
137      y[i] = ROUND16(sum, SIG_SHIFT);
138   }
139   mem[0]=mem0;
140   mem[1]=mem1;
141   mem[2]=mem2;
142   mem[3]=mem3;
143   mem[4]=mem4;
144}
145
146
147void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
148      int len, int C)
149{
150   int i;
151   opus_val32 ac[5];
152   opus_val16 tmp=Q15ONE;
153   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
154   opus_val16 lpc2[5];
155   opus_val16 c1 = QCONST16(.8f,15);
156#ifdef FIXED_POINT
157   int shift;
158   opus_val32 maxabs = celt_maxabs32(x[0], len);
159   if (C==2)
160   {
161      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
162      maxabs = MAX32(maxabs, maxabs_1);
163   }
164   if (maxabs<1)
165      maxabs=1;
166   shift = celt_ilog2(maxabs)-10;
167   if (shift<0)
168      shift=0;
169   if (C==2)
170      shift++;
171#endif
172   for (i=1;i<len>>1;i++)
173      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
174   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
175   if (C==2)
176   {
177      for (i=1;i<len>>1;i++)
178         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
179      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
180   }
181
182   _celt_autocorr(x_lp, ac, NULL, 0,
183                  4, len>>1);
184
185   /* Noise floor -40 dB */
186#ifdef FIXED_POINT
187   ac[0] += SHR32(ac[0],13);
188#else
189   ac[0] *= 1.0001f;
190#endif
191   /* Lag windowing */
192   for (i=1;i<=4;i++)
193   {
194      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
195#ifdef FIXED_POINT
196      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
197#else
198      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
199#endif
200   }
201
202   _celt_lpc(lpc, ac, 4);
203   for (i=0;i<4;i++)
204   {
205      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
206      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
207   }
208   /* Add a zero */
209   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
210   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
211   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
212   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
213   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
214   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
215}
216
217#if 0 /* This is a simple version of the pitch correlation that should work
218         well on DSPs like Blackfin and TI C5x/C6x */
219
220#ifdef FIXED_POINT
221opus_val32
222#else
223void
224#endif
225celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
226{
227   int i, j;
228#ifdef FIXED_POINT
229   opus_val32 maxcorr=1;
230#endif
231   for (i=0;i<max_pitch;i++)
232   {
233      opus_val32 sum = 0;
234      for (j=0;j<len;j++)
235         sum = MAC16_16(sum, x[j],y[i+j]);
236      xcorr[i] = sum;
237#ifdef FIXED_POINT
238      maxcorr = MAX32(maxcorr, sum);
239#endif
240   }
241#ifdef FIXED_POINT
242   return maxcorr;
243#endif
244}
245
246#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
247
248#ifdef FIXED_POINT
249opus_val32
250#else
251void
252#endif
253celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
254{
255   int i,j;
256#ifdef FIXED_POINT
257   opus_val32 maxcorr=1;
258#endif
259   for (i=0;i<max_pitch-3;i+=4)
260   {
261      opus_val32 sum[4]={0,0,0,0};
262      xcorr_kernel(_x, _y+i, sum, len);
263      xcorr[i]=sum[0];
264      xcorr[i+1]=sum[1];
265      xcorr[i+2]=sum[2];
266      xcorr[i+3]=sum[3];
267#ifdef FIXED_POINT
268      sum[0] = MAX32(sum[0], sum[1]);
269      sum[2] = MAX32(sum[2], sum[3]);
270      sum[0] = MAX32(sum[0], sum[2]);
271      maxcorr = MAX32(maxcorr, sum[0]);
272#endif
273   }
274   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
275   for (;i<max_pitch;i++)
276   {
277      opus_val32 sum = 0;
278      for (j=0;j<len;j++)
279         sum = MAC16_16(sum, _x[j],_y[i+j]);
280      xcorr[i] = sum;
281#ifdef FIXED_POINT
282      maxcorr = MAX32(maxcorr, sum);
283#endif
284   }
285#ifdef FIXED_POINT
286   return maxcorr;
287#endif
288}
289
290#endif
291void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
292                  int len, int max_pitch, int *pitch)
293{
294   int i, j;
295   int lag;
296   int best_pitch[2]={0,0};
297   VARDECL(opus_val16, x_lp4);
298   VARDECL(opus_val16, y_lp4);
299   VARDECL(opus_val32, xcorr);
300#ifdef FIXED_POINT
301   opus_val32 maxcorr;
302   opus_val32 xmax, ymax;
303   int shift=0;
304#endif
305   int offset;
306
307   SAVE_STACK;
308
309   celt_assert(len>0);
310   celt_assert(max_pitch>0);
311   lag = len+max_pitch;
312
313   ALLOC(x_lp4, len>>2, opus_val16);
314   ALLOC(y_lp4, lag>>2, opus_val16);
315   ALLOC(xcorr, max_pitch>>1, opus_val32);
316
317   /* Downsample by 2 again */
318   for (j=0;j<len>>2;j++)
319      x_lp4[j] = x_lp[2*j];
320   for (j=0;j<lag>>2;j++)
321      y_lp4[j] = y[2*j];
322
323#ifdef FIXED_POINT
324   xmax = celt_maxabs16(x_lp4, len>>2);
325   ymax = celt_maxabs16(y_lp4, lag>>2);
326   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
327   if (shift>0)
328   {
329      for (j=0;j<len>>2;j++)
330         x_lp4[j] = SHR16(x_lp4[j], shift);
331      for (j=0;j<lag>>2;j++)
332         y_lp4[j] = SHR16(y_lp4[j], shift);
333      /* Use double the shift for a MAC */
334      shift *= 2;
335   } else {
336      shift = 0;
337   }
338#endif
339
340   /* Coarse search with 4x decimation */
341
342#ifdef FIXED_POINT
343   maxcorr =
344#endif
345   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
346
347   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
348#ifdef FIXED_POINT
349                   , 0, maxcorr
350#endif
351                   );
352
353   /* Finer search with 2x decimation */
354#ifdef FIXED_POINT
355   maxcorr=1;
356#endif
357   for (i=0;i<max_pitch>>1;i++)
358   {
359      opus_val32 sum=0;
360      xcorr[i] = 0;
361      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
362         continue;
363      for (j=0;j<len>>1;j++)
364         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
365      xcorr[i] = MAX32(-1, sum);
366#ifdef FIXED_POINT
367      maxcorr = MAX32(maxcorr, sum);
368#endif
369   }
370   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
371#ifdef FIXED_POINT
372                   , shift+1, maxcorr
373#endif
374                   );
375
376   /* Refine by pseudo-interpolation */
377   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
378   {
379      opus_val32 a, b, c;
380      a = xcorr[best_pitch[0]-1];
381      b = xcorr[best_pitch[0]];
382      c = xcorr[best_pitch[0]+1];
383      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
384         offset = 1;
385      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
386         offset = -1;
387      else
388         offset = 0;
389   } else {
390      offset = 0;
391   }
392   *pitch = 2*best_pitch[0]-offset;
393
394   RESTORE_STACK;
395}
396
397static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
398opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
399      int N, int *T0_, int prev_period, opus_val16 prev_gain)
400{
401   int k, i, T, T0;
402   opus_val16 g, g0;
403   opus_val16 pg;
404   opus_val32 xy,xx,yy,xy2;
405   opus_val32 xcorr[3];
406   opus_val32 best_xy, best_yy;
407   int offset;
408   int minperiod0;
409   VARDECL(opus_val32, yy_lookup);
410   SAVE_STACK;
411
412   minperiod0 = minperiod;
413   maxperiod /= 2;
414   minperiod /= 2;
415   *T0_ /= 2;
416   prev_period /= 2;
417   N /= 2;
418   x += maxperiod;
419   if (*T0_>=maxperiod)
420      *T0_=maxperiod-1;
421
422   T = T0 = *T0_;
423   ALLOC(yy_lookup, maxperiod+1, opus_val32);
424   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
425   yy_lookup[0] = xx;
426   yy=xx;
427   for (i=1;i<=maxperiod;i++)
428   {
429      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
430      yy_lookup[i] = MAX32(0, yy);
431   }
432   yy = yy_lookup[T0];
433   best_xy = xy;
434   best_yy = yy;
435#ifdef FIXED_POINT
436      {
437         opus_val32 x2y2;
438         int sh, t;
439         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
440         sh = celt_ilog2(x2y2)>>1;
441         t = VSHR32(x2y2, 2*(sh-7));
442         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
443      }
444#else
445      g = g0 = xy/celt_sqrt(1+xx*yy);
446#endif
447   /* Look for any pitch at T/k */
448   for (k=2;k<=15;k++)
449   {
450      int T1, T1b;
451      opus_val16 g1;
452      opus_val16 cont=0;
453      opus_val16 thresh;
454      T1 = (2*T0+k)/(2*k);
455      if (T1 < minperiod)
456         break;
457      /* Look for another strong correlation at T1b */
458      if (k==2)
459      {
460         if (T1+T0>maxperiod)
461            T1b = T0;
462         else
463            T1b = T0+T1;
464      } else
465      {
466         T1b = (2*second_check[k]*T0+k)/(2*k);
467      }
468      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
469      xy += xy2;
470      yy = yy_lookup[T1] + yy_lookup[T1b];
471#ifdef FIXED_POINT
472      {
473         opus_val32 x2y2;
474         int sh, t;
475         x2y2 = 1+MULT32_32_Q31(xx,yy);
476         sh = celt_ilog2(x2y2)>>1;
477         t = VSHR32(x2y2, 2*(sh-7));
478         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
479      }
480#else
481      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
482#endif
483      if (abs(T1-prev_period)<=1)
484         cont = prev_gain;
485      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
486         cont = HALF32(prev_gain);
487      else
488         cont = 0;
489      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
490      /* Bias against very high pitch (very short period) to avoid false-positives
491         due to short-term correlation */
492      if (T1<3*minperiod)
493         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
494      else if (T1<2*minperiod)
495         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
496      if (g1 > thresh)
497      {
498         best_xy = xy;
499         best_yy = yy;
500         T = T1;
501         g = g1;
502      }
503   }
504   best_xy = MAX32(0, best_xy);
505   if (best_yy <= best_xy)
506      pg = Q15ONE;
507   else
508      pg = SHR32(frac_div32(best_xy,best_yy+1),16);
509
510   for (k=0;k<3;k++)
511   {
512      int T1 = T+k-1;
513      xy = 0;
514      for (i=0;i<N;i++)
515         xy = MAC16_16(xy, x[i], x[i-T1]);
516      xcorr[k] = xy;
517   }
518   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
519      offset = 1;
520   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
521      offset = -1;
522   else
523      offset = 0;
524   if (pg > g)
525      pg = g;
526   *T0_ = 2*T+offset;
527
528   if (*T0_<minperiod0)
529      *T0_=minperiod0;
530   RESTORE_STACK;
531   return pg;
532}
533