1/* Copyright (c) 2007-2008 CSIRO
2   Copyright (c) 2007-2009 Xiph.Org Foundation
3   Written by Jean-Marc Valin */
4/**
5   @file pitch.c
6   @brief Pitch analysis
7 */
8
9/*
10   Redistribution and use in source and binary forms, with or without
11   modification, are permitted provided that the following conditions
12   are met:
13
14   - Redistributions of source code must retain the above copyright
15   notice, this list of conditions and the following disclaimer.
16
17   - Redistributions in binary form must reproduce the above copyright
18   notice, this list of conditions and the following disclaimer in the
19   documentation and/or other materials provided with the distribution.
20
21   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*/
33
34#ifdef HAVE_CONFIG_H
35#include "config.h"
36#endif
37
38#include "pitch.h"
39#include "os_support.h"
40#include "modes.h"
41#include "stack_alloc.h"
42#include "mathops.h"
43#include "celt_lpc.h"
44
45static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
46                            int max_pitch, int *best_pitch
47#ifdef FIXED_POINT
48                            , int yshift, opus_val32 maxcorr
49#endif
50                            )
51{
52   int i, j;
53   opus_val32 Syy=1;
54   opus_val16 best_num[2];
55   opus_val32 best_den[2];
56#ifdef FIXED_POINT
57   int xshift;
58
59   xshift = celt_ilog2(maxcorr)-14;
60#endif
61
62   best_num[0] = -1;
63   best_num[1] = -1;
64   best_den[0] = 0;
65   best_den[1] = 0;
66   best_pitch[0] = 0;
67   best_pitch[1] = 1;
68   for (j=0;j<len;j++)
69      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
70   for (i=0;i<max_pitch;i++)
71   {
72      if (xcorr[i]>0)
73      {
74         opus_val16 num;
75         opus_val32 xcorr16;
76         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
77#ifndef FIXED_POINT
78         /* Considering the range of xcorr16, this should avoid both underflows
79            and overflows (inf) when squaring xcorr16 */
80         xcorr16 *= 1e-12f;
81#endif
82         num = MULT16_16_Q15(xcorr16,xcorr16);
83         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
84         {
85            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
86            {
87               best_num[1] = best_num[0];
88               best_den[1] = best_den[0];
89               best_pitch[1] = best_pitch[0];
90               best_num[0] = num;
91               best_den[0] = Syy;
92               best_pitch[0] = i;
93            } else {
94               best_num[1] = num;
95               best_den[1] = Syy;
96               best_pitch[1] = i;
97            }
98         }
99      }
100      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
101      Syy = MAX32(1, Syy);
102   }
103}
104
105static void celt_fir5(const opus_val16 *x,
106         const opus_val16 *num,
107         opus_val16 *y,
108         int N,
109         opus_val16 *mem)
110{
111   int i;
112   opus_val16 num0, num1, num2, num3, num4;
113   opus_val32 mem0, mem1, mem2, mem3, mem4;
114   num0=num[0];
115   num1=num[1];
116   num2=num[2];
117   num3=num[3];
118   num4=num[4];
119   mem0=mem[0];
120   mem1=mem[1];
121   mem2=mem[2];
122   mem3=mem[3];
123   mem4=mem[4];
124   for (i=0;i<N;i++)
125   {
126      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
127      sum = MAC16_16(sum,num0,mem0);
128      sum = MAC16_16(sum,num1,mem1);
129      sum = MAC16_16(sum,num2,mem2);
130      sum = MAC16_16(sum,num3,mem3);
131      sum = MAC16_16(sum,num4,mem4);
132      mem4 = mem3;
133      mem3 = mem2;
134      mem2 = mem1;
135      mem1 = mem0;
136      mem0 = x[i];
137      y[i] = ROUND16(sum, SIG_SHIFT);
138   }
139   mem[0]=mem0;
140   mem[1]=mem1;
141   mem[2]=mem2;
142   mem[3]=mem3;
143   mem[4]=mem4;
144}
145
146
147void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
148      int len, int C, int arch)
149{
150   int i;
151   opus_val32 ac[5];
152   opus_val16 tmp=Q15ONE;
153   opus_val16 lpc[4], mem[5]={0,0,0,0,0};
154   opus_val16 lpc2[5];
155   opus_val16 c1 = QCONST16(.8f,15);
156#ifdef FIXED_POINT
157   int shift;
158   opus_val32 maxabs = celt_maxabs32(x[0], len);
159   if (C==2)
160   {
161      opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
162      maxabs = MAX32(maxabs, maxabs_1);
163   }
164   if (maxabs<1)
165      maxabs=1;
166   shift = celt_ilog2(maxabs)-10;
167   if (shift<0)
168      shift=0;
169   if (C==2)
170      shift++;
171#endif
172   for (i=1;i<len>>1;i++)
173      x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
174   x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
175   if (C==2)
176   {
177      for (i=1;i<len>>1;i++)
178         x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
179      x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
180   }
181
182   _celt_autocorr(x_lp, ac, NULL, 0,
183                  4, len>>1, arch);
184
185   /* Noise floor -40 dB */
186#ifdef FIXED_POINT
187   ac[0] += SHR32(ac[0],13);
188#else
189   ac[0] *= 1.0001f;
190#endif
191   /* Lag windowing */
192   for (i=1;i<=4;i++)
193   {
194      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
195#ifdef FIXED_POINT
196      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
197#else
198      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
199#endif
200   }
201
202   _celt_lpc(lpc, ac, 4);
203   for (i=0;i<4;i++)
204   {
205      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
206      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
207   }
208   /* Add a zero */
209   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
210   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
211   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
212   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
213   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
214   celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
215}
216
217#if 0 /* This is a simple version of the pitch correlation that should work
218         well on DSPs like Blackfin and TI C5x/C6x */
219
220#ifdef FIXED_POINT
221opus_val32
222#else
223void
224#endif
225celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
226{
227   int i, j;
228#ifdef FIXED_POINT
229   opus_val32 maxcorr=1;
230#endif
231   for (i=0;i<max_pitch;i++)
232   {
233      opus_val32 sum = 0;
234      for (j=0;j<len;j++)
235         sum = MAC16_16(sum, x[j],y[i+j]);
236      xcorr[i] = sum;
237#ifdef FIXED_POINT
238      maxcorr = MAX32(maxcorr, sum);
239#endif
240   }
241#ifdef FIXED_POINT
242   return maxcorr;
243#endif
244}
245
246#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
247
248#ifdef FIXED_POINT
249opus_val32
250#else
251void
252#endif
253celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
254{
255   int i,j;
256   /*The EDSP version requires that max_pitch is at least 1, and that _x is
257      32-bit aligned.
258     Since it's hard to put asserts in assembly, put them here.*/
259   celt_assert(max_pitch>0);
260   celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
261#ifdef FIXED_POINT
262   opus_val32 maxcorr=1;
263#endif
264   for (i=0;i<max_pitch-3;i+=4)
265   {
266      opus_val32 sum[4]={0,0,0,0};
267      xcorr_kernel(_x, _y+i, sum, len);
268      xcorr[i]=sum[0];
269      xcorr[i+1]=sum[1];
270      xcorr[i+2]=sum[2];
271      xcorr[i+3]=sum[3];
272#ifdef FIXED_POINT
273      sum[0] = MAX32(sum[0], sum[1]);
274      sum[2] = MAX32(sum[2], sum[3]);
275      sum[0] = MAX32(sum[0], sum[2]);
276      maxcorr = MAX32(maxcorr, sum[0]);
277#endif
278   }
279   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
280   for (;i<max_pitch;i++)
281   {
282      opus_val32 sum = 0;
283      for (j=0;j<len;j++)
284         sum = MAC16_16(sum, _x[j],_y[i+j]);
285      xcorr[i] = sum;
286#ifdef FIXED_POINT
287      maxcorr = MAX32(maxcorr, sum);
288#endif
289   }
290#ifdef FIXED_POINT
291   return maxcorr;
292#endif
293}
294
295#endif
296void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
297                  int len, int max_pitch, int *pitch, int arch)
298{
299   int i, j;
300   int lag;
301   int best_pitch[2]={0,0};
302   VARDECL(opus_val16, x_lp4);
303   VARDECL(opus_val16, y_lp4);
304   VARDECL(opus_val32, xcorr);
305#ifdef FIXED_POINT
306   opus_val32 maxcorr;
307   opus_val32 xmax, ymax;
308   int shift=0;
309#endif
310   int offset;
311
312   SAVE_STACK;
313
314   celt_assert(len>0);
315   celt_assert(max_pitch>0);
316   lag = len+max_pitch;
317
318   ALLOC(x_lp4, len>>2, opus_val16);
319   ALLOC(y_lp4, lag>>2, opus_val16);
320   ALLOC(xcorr, max_pitch>>1, opus_val32);
321
322   /* Downsample by 2 again */
323   for (j=0;j<len>>2;j++)
324      x_lp4[j] = x_lp[2*j];
325   for (j=0;j<lag>>2;j++)
326      y_lp4[j] = y[2*j];
327
328#ifdef FIXED_POINT
329   xmax = celt_maxabs16(x_lp4, len>>2);
330   ymax = celt_maxabs16(y_lp4, lag>>2);
331   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
332   if (shift>0)
333   {
334      for (j=0;j<len>>2;j++)
335         x_lp4[j] = SHR16(x_lp4[j], shift);
336      for (j=0;j<lag>>2;j++)
337         y_lp4[j] = SHR16(y_lp4[j], shift);
338      /* Use double the shift for a MAC */
339      shift *= 2;
340   } else {
341      shift = 0;
342   }
343#endif
344
345   /* Coarse search with 4x decimation */
346
347#ifdef FIXED_POINT
348   maxcorr =
349#endif
350   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
351
352   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
353#ifdef FIXED_POINT
354                   , 0, maxcorr
355#endif
356                   );
357
358   /* Finer search with 2x decimation */
359#ifdef FIXED_POINT
360   maxcorr=1;
361#endif
362   for (i=0;i<max_pitch>>1;i++)
363   {
364      opus_val32 sum=0;
365      xcorr[i] = 0;
366      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
367         continue;
368      for (j=0;j<len>>1;j++)
369         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
370      xcorr[i] = MAX32(-1, sum);
371#ifdef FIXED_POINT
372      maxcorr = MAX32(maxcorr, sum);
373#endif
374   }
375   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
376#ifdef FIXED_POINT
377                   , shift+1, maxcorr
378#endif
379                   );
380
381   /* Refine by pseudo-interpolation */
382   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
383   {
384      opus_val32 a, b, c;
385      a = xcorr[best_pitch[0]-1];
386      b = xcorr[best_pitch[0]];
387      c = xcorr[best_pitch[0]+1];
388      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
389         offset = 1;
390      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
391         offset = -1;
392      else
393         offset = 0;
394   } else {
395      offset = 0;
396   }
397   *pitch = 2*best_pitch[0]-offset;
398
399   RESTORE_STACK;
400}
401
402static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
403opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
404      int N, int *T0_, int prev_period, opus_val16 prev_gain)
405{
406   int k, i, T, T0;
407   opus_val16 g, g0;
408   opus_val16 pg;
409   opus_val32 xy,xx,yy,xy2;
410   opus_val32 xcorr[3];
411   opus_val32 best_xy, best_yy;
412   int offset;
413   int minperiod0;
414   VARDECL(opus_val32, yy_lookup);
415   SAVE_STACK;
416
417   minperiod0 = minperiod;
418   maxperiod /= 2;
419   minperiod /= 2;
420   *T0_ /= 2;
421   prev_period /= 2;
422   N /= 2;
423   x += maxperiod;
424   if (*T0_>=maxperiod)
425      *T0_=maxperiod-1;
426
427   T = T0 = *T0_;
428   ALLOC(yy_lookup, maxperiod+1, opus_val32);
429   dual_inner_prod(x, x, x-T0, N, &xx, &xy);
430   yy_lookup[0] = xx;
431   yy=xx;
432   for (i=1;i<=maxperiod;i++)
433   {
434      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
435      yy_lookup[i] = MAX32(0, yy);
436   }
437   yy = yy_lookup[T0];
438   best_xy = xy;
439   best_yy = yy;
440#ifdef FIXED_POINT
441      {
442         opus_val32 x2y2;
443         int sh, t;
444         x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
445         sh = celt_ilog2(x2y2)>>1;
446         t = VSHR32(x2y2, 2*(sh-7));
447         g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
448      }
449#else
450      g = g0 = xy/celt_sqrt(1+xx*yy);
451#endif
452   /* Look for any pitch at T/k */
453   for (k=2;k<=15;k++)
454   {
455      int T1, T1b;
456      opus_val16 g1;
457      opus_val16 cont=0;
458      opus_val16 thresh;
459      T1 = (2*T0+k)/(2*k);
460      if (T1 < minperiod)
461         break;
462      /* Look for another strong correlation at T1b */
463      if (k==2)
464      {
465         if (T1+T0>maxperiod)
466            T1b = T0;
467         else
468            T1b = T0+T1;
469      } else
470      {
471         T1b = (2*second_check[k]*T0+k)/(2*k);
472      }
473      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
474      xy += xy2;
475      yy = yy_lookup[T1] + yy_lookup[T1b];
476#ifdef FIXED_POINT
477      {
478         opus_val32 x2y2;
479         int sh, t;
480         x2y2 = 1+MULT32_32_Q31(xx,yy);
481         sh = celt_ilog2(x2y2)>>1;
482         t = VSHR32(x2y2, 2*(sh-7));
483         g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
484      }
485#else
486      g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
487#endif
488      if (abs(T1-prev_period)<=1)
489         cont = prev_gain;
490      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
491         cont = HALF32(prev_gain);
492      else
493         cont = 0;
494      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
495      /* Bias against very high pitch (very short period) to avoid false-positives
496         due to short-term correlation */
497      if (T1<3*minperiod)
498         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
499      else if (T1<2*minperiod)
500         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
501      if (g1 > thresh)
502      {
503         best_xy = xy;
504         best_yy = yy;
505         T = T1;
506         g = g1;
507      }
508   }
509   best_xy = MAX32(0, best_xy);
510   if (best_yy <= best_xy)
511      pg = Q15ONE;
512   else
513      pg = SHR32(frac_div32(best_xy,best_yy+1),16);
514
515   for (k=0;k<3;k++)
516   {
517      int T1 = T+k-1;
518      xy = 0;
519      for (i=0;i<N;i++)
520         xy = MAC16_16(xy, x[i], x[i-T1]);
521      xcorr[k] = xy;
522   }
523   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
524      offset = 1;
525   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
526      offset = -1;
527   else
528      offset = 0;
529   if (pg > g)
530      pg = g;
531   *T0_ = 2*T+offset;
532
533   if (*T0_<minperiod0)
534      *T0_=minperiod0;
535   RESTORE_STACK;
536   return pg;
537}
538