1/********************************************************************
2 *                                                                  *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7 *                                                                  *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
9 * by the Xiph.Org Foundation http://www.xiph.org/                  *
10 *                                                                  *
11 ********************************************************************
12
13  function: LSP (also called LSF) conversion routines
14  last mod: $Id: lsp.c 16227 2009-07-08 06:58:46Z xiphmont $
15
16  The LSP generation code is taken (with minimal modification and a
17  few bugfixes) from "On the Computation of the LSP Frequencies" by
18  Joseph Rothweiler (see http://www.rothweiler.us for contact info).
19  The paper is available at:
20
21  http://www.myown1.com/joe/lsf
22
23 ********************************************************************/
24
25/* Note that the lpc-lsp conversion finds the roots of polynomial with
26   an iterative root polisher (CACM algorithm 283).  It *is* possible
27   to confuse this algorithm into not converging; that should only
28   happen with absurdly closely spaced roots (very sharp peaks in the
29   LPC f response) which in turn should be impossible in our use of
30   the code.  If this *does* happen anyway, it's a bug in the floor
31   finder; find the cause of the confusion (probably a single bin
32   spike or accidental near-float-limit resolution problems) and
33   correct it. */
34
35#include <math.h>
36#include <string.h>
37#include <stdlib.h>
38#include "lsp.h"
39#include "os.h"
40#include "misc.h"
41#include "lookup.h"
42#include "scales.h"
43
44/* three possible LSP to f curve functions; the exact computation
45   (float), a lookup based float implementation, and an integer
46   implementation.  The float lookup is likely the optimal choice on
47   any machine with an FPU.  The integer implementation is *not* fixed
48   point (due to the need for a large dynamic range and thus a
49   seperately tracked exponent) and thus much more complex than the
50   relatively simple float implementations. It's mostly for future
51   work on a fully fixed point implementation for processors like the
52   ARM family. */
53
54/* define either of these (preferably FLOAT_LOOKUP) to have faster
55   but less precise implementation. */
56#undef FLOAT_LOOKUP
57#undef INT_LOOKUP
58
59#ifdef FLOAT_LOOKUP
60#include "lookup.c" /* catch this in the build system; we #include for
61                       compilers (like gcc) that can't inline across
62                       modules */
63
64/* side effect: changes *lsp to cosines of lsp */
65void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
66                            float amp,float ampoffset){
67  int i;
68  float wdel=M_PI/ln;
69  vorbis_fpu_control fpu;
70
71  vorbis_fpu_setround(&fpu);
72  for(i=0;i<m;i++)lsp[i]=vorbis_coslook(lsp[i]);
73
74  i=0;
75  while(i<n){
76    int k=map[i];
77    int qexp;
78    float p=.7071067812f;
79    float q=.7071067812f;
80    float w=vorbis_coslook(wdel*k);
81    float *ftmp=lsp;
82    int c=m>>1;
83
84    do{
85      q*=ftmp[0]-w;
86      p*=ftmp[1]-w;
87      ftmp+=2;
88    }while(--c);
89
90    if(m&1){
91      /* odd order filter; slightly assymetric */
92      /* the last coefficient */
93      q*=ftmp[0]-w;
94      q*=q;
95      p*=p*(1.f-w*w);
96    }else{
97      /* even order filter; still symmetric */
98      q*=q*(1.f+w);
99      p*=p*(1.f-w);
100    }
101
102    q=frexp(p+q,&qexp);
103    q=vorbis_fromdBlook(amp*
104                        vorbis_invsqlook(q)*
105                        vorbis_invsq2explook(qexp+m)-
106                        ampoffset);
107
108    do{
109      curve[i++]*=q;
110    }while(map[i]==k);
111  }
112  vorbis_fpu_restore(fpu);
113}
114
115#else
116
117#ifdef INT_LOOKUP
118#include "lookup.c" /* catch this in the build system; we #include for
119                       compilers (like gcc) that can't inline across
120                       modules */
121
122static const int MLOOP_1[64]={
123   0,10,11,11, 12,12,12,12, 13,13,13,13, 13,13,13,13,
124  14,14,14,14, 14,14,14,14, 14,14,14,14, 14,14,14,14,
125  15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
126  15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
127};
128
129static const int MLOOP_2[64]={
130  0,4,5,5, 6,6,6,6, 7,7,7,7, 7,7,7,7,
131  8,8,8,8, 8,8,8,8, 8,8,8,8, 8,8,8,8,
132  9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9,
133  9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9,
134};
135
136static const int MLOOP_3[8]={0,1,2,2,3,3,3,3};
137
138
139/* side effect: changes *lsp to cosines of lsp */
140void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
141                            float amp,float ampoffset){
142
143  /* 0 <= m < 256 */
144
145  /* set up for using all int later */
146  int i;
147  int ampoffseti=rint(ampoffset*4096.f);
148  int ampi=rint(amp*16.f);
149  long *ilsp=alloca(m*sizeof(*ilsp));
150  for(i=0;i<m;i++)ilsp[i]=vorbis_coslook_i(lsp[i]/M_PI*65536.f+.5f);
151
152  i=0;
153  while(i<n){
154    int j,k=map[i];
155    unsigned long pi=46341; /* 2**-.5 in 0.16 */
156    unsigned long qi=46341;
157    int qexp=0,shift;
158    long wi=vorbis_coslook_i(k*65536/ln);
159
160    qi*=labs(ilsp[0]-wi);
161    pi*=labs(ilsp[1]-wi);
162
163    for(j=3;j<m;j+=2){
164      if(!(shift=MLOOP_1[(pi|qi)>>25]))
165        if(!(shift=MLOOP_2[(pi|qi)>>19]))
166          shift=MLOOP_3[(pi|qi)>>16];
167      qi=(qi>>shift)*labs(ilsp[j-1]-wi);
168      pi=(pi>>shift)*labs(ilsp[j]-wi);
169      qexp+=shift;
170    }
171    if(!(shift=MLOOP_1[(pi|qi)>>25]))
172      if(!(shift=MLOOP_2[(pi|qi)>>19]))
173        shift=MLOOP_3[(pi|qi)>>16];
174
175    /* pi,qi normalized collectively, both tracked using qexp */
176
177    if(m&1){
178      /* odd order filter; slightly assymetric */
179      /* the last coefficient */
180      qi=(qi>>shift)*labs(ilsp[j-1]-wi);
181      pi=(pi>>shift)<<14;
182      qexp+=shift;
183
184      if(!(shift=MLOOP_1[(pi|qi)>>25]))
185        if(!(shift=MLOOP_2[(pi|qi)>>19]))
186          shift=MLOOP_3[(pi|qi)>>16];
187
188      pi>>=shift;
189      qi>>=shift;
190      qexp+=shift-14*((m+1)>>1);
191
192      pi=((pi*pi)>>16);
193      qi=((qi*qi)>>16);
194      qexp=qexp*2+m;
195
196      pi*=(1<<14)-((wi*wi)>>14);
197      qi+=pi>>14;
198
199    }else{
200      /* even order filter; still symmetric */
201
202      /* p*=p(1-w), q*=q(1+w), let normalization drift because it isn't
203         worth tracking step by step */
204
205      pi>>=shift;
206      qi>>=shift;
207      qexp+=shift-7*m;
208
209      pi=((pi*pi)>>16);
210      qi=((qi*qi)>>16);
211      qexp=qexp*2+m;
212
213      pi*=(1<<14)-wi;
214      qi*=(1<<14)+wi;
215      qi=(qi+pi)>>14;
216
217    }
218
219
220    /* we've let the normalization drift because it wasn't important;
221       however, for the lookup, things must be normalized again.  We
222       need at most one right shift or a number of left shifts */
223
224    if(qi&0xffff0000){ /* checks for 1.xxxxxxxxxxxxxxxx */
225      qi>>=1; qexp++;
226    }else
227      while(qi && !(qi&0x8000)){ /* checks for 0.0xxxxxxxxxxxxxxx or less*/
228        qi<<=1; qexp--;
229      }
230
231    amp=vorbis_fromdBlook_i(ampi*                     /*  n.4         */
232                            vorbis_invsqlook_i(qi,qexp)-
233                                                      /*  m.8, m+n<=8 */
234                            ampoffseti);              /*  8.12[0]     */
235
236    curve[i]*=amp;
237    while(map[++i]==k)curve[i]*=amp;
238  }
239}
240
241#else
242
243/* old, nonoptimized but simple version for any poor sap who needs to
244   figure out what the hell this code does, or wants the other
245   fraction of a dB precision */
246
247/* side effect: changes *lsp to cosines of lsp */
248void vorbis_lsp_to_curve(float *curve,int *map,int n,int ln,float *lsp,int m,
249                            float amp,float ampoffset){
250  int i;
251  float wdel=M_PI/ln;
252  for(i=0;i<m;i++)lsp[i]=2.f*cos(lsp[i]);
253
254  i=0;
255  while(i<n){
256    int j,k=map[i];
257    float p=.5f;
258    float q=.5f;
259    float w=2.f*cos(wdel*k);
260    for(j=1;j<m;j+=2){
261      q *= w-lsp[j-1];
262      p *= w-lsp[j];
263    }
264    if(j==m){
265      /* odd order filter; slightly assymetric */
266      /* the last coefficient */
267      q*=w-lsp[j-1];
268      p*=p*(4.f-w*w);
269      q*=q;
270    }else{
271      /* even order filter; still symmetric */
272      p*=p*(2.f-w);
273      q*=q*(2.f+w);
274    }
275
276    q=fromdB(amp/sqrt(p+q)-ampoffset);
277
278    curve[i]*=q;
279    while(map[++i]==k)curve[i]*=q;
280  }
281}
282
283#endif
284#endif
285
286static void cheby(float *g, int ord) {
287  int i, j;
288
289  g[0] *= .5f;
290  for(i=2; i<= ord; i++) {
291    for(j=ord; j >= i; j--) {
292      g[j-2] -= g[j];
293      g[j] += g[j];
294    }
295  }
296}
297
298static int comp(const void *a,const void *b){
299  return (*(float *)a<*(float *)b)-(*(float *)a>*(float *)b);
300}
301
302/* Newton-Raphson-Maehly actually functioned as a decent root finder,
303   but there are root sets for which it gets into limit cycles
304   (exacerbated by zero suppression) and fails.  We can't afford to
305   fail, even if the failure is 1 in 100,000,000, so we now use
306   Laguerre and later polish with Newton-Raphson (which can then
307   afford to fail) */
308
309#define EPSILON 10e-7
310static int Laguerre_With_Deflation(float *a,int ord,float *r){
311  int i,m;
312  double lastdelta=0.f;
313  double *defl=alloca(sizeof(*defl)*(ord+1));
314  for(i=0;i<=ord;i++)defl[i]=a[i];
315
316  for(m=ord;m>0;m--){
317    double new=0.f,delta;
318
319    /* iterate a root */
320    while(1){
321      double p=defl[m],pp=0.f,ppp=0.f,denom;
322
323      /* eval the polynomial and its first two derivatives */
324      for(i=m;i>0;i--){
325        ppp = new*ppp + pp;
326        pp  = new*pp  + p;
327        p   = new*p   + defl[i-1];
328      }
329
330      /* Laguerre's method */
331      denom=(m-1) * ((m-1)*pp*pp - m*p*ppp);
332      if(denom<0)
333        return(-1);  /* complex root!  The LPC generator handed us a bad filter */
334
335      if(pp>0){
336        denom = pp + sqrt(denom);
337        if(denom<EPSILON)denom=EPSILON;
338      }else{
339        denom = pp - sqrt(denom);
340        if(denom>-(EPSILON))denom=-(EPSILON);
341      }
342
343      delta  = m*p/denom;
344      new   -= delta;
345
346      if(delta<0.f)delta*=-1;
347
348      if(fabs(delta/new)<10e-12)break;
349      lastdelta=delta;
350    }
351
352    r[m-1]=new;
353
354    /* forward deflation */
355
356    for(i=m;i>0;i--)
357      defl[i-1]+=new*defl[i];
358    defl++;
359
360  }
361  return(0);
362}
363
364
365/* for spit-and-polish only */
366static int Newton_Raphson(float *a,int ord,float *r){
367  int i, k, count=0;
368  double error=1.f;
369  double *root=alloca(ord*sizeof(*root));
370
371  for(i=0; i<ord;i++) root[i] = r[i];
372
373  while(error>1e-20){
374    error=0;
375
376    for(i=0; i<ord; i++) { /* Update each point. */
377      double pp=0.,delta;
378      double rooti=root[i];
379      double p=a[ord];
380      for(k=ord-1; k>= 0; k--) {
381
382        pp= pp* rooti + p;
383        p = p * rooti + a[k];
384      }
385
386      delta = p/pp;
387      root[i] -= delta;
388      error+= delta*delta;
389    }
390
391    if(count>40)return(-1);
392
393    count++;
394  }
395
396  /* Replaced the original bubble sort with a real sort.  With your
397     help, we can eliminate the bubble sort in our lifetime. --Monty */
398
399  for(i=0; i<ord;i++) r[i] = root[i];
400  return(0);
401}
402
403
404/* Convert lpc coefficients to lsp coefficients */
405int vorbis_lpc_to_lsp(float *lpc,float *lsp,int m){
406  int order2=(m+1)>>1;
407  int g1_order,g2_order;
408  float *g1=alloca(sizeof(*g1)*(order2+1));
409  float *g2=alloca(sizeof(*g2)*(order2+1));
410  float *g1r=alloca(sizeof(*g1r)*(order2+1));
411  float *g2r=alloca(sizeof(*g2r)*(order2+1));
412  int i;
413
414  /* even and odd are slightly different base cases */
415  g1_order=(m+1)>>1;
416  g2_order=(m)  >>1;
417
418  /* Compute the lengths of the x polynomials. */
419  /* Compute the first half of K & R F1 & F2 polynomials. */
420  /* Compute half of the symmetric and antisymmetric polynomials. */
421  /* Remove the roots at +1 and -1. */
422
423  g1[g1_order] = 1.f;
424  for(i=1;i<=g1_order;i++) g1[g1_order-i] = lpc[i-1]+lpc[m-i];
425  g2[g2_order] = 1.f;
426  for(i=1;i<=g2_order;i++) g2[g2_order-i] = lpc[i-1]-lpc[m-i];
427
428  if(g1_order>g2_order){
429    for(i=2; i<=g2_order;i++) g2[g2_order-i] += g2[g2_order-i+2];
430  }else{
431    for(i=1; i<=g1_order;i++) g1[g1_order-i] -= g1[g1_order-i+1];
432    for(i=1; i<=g2_order;i++) g2[g2_order-i] += g2[g2_order-i+1];
433  }
434
435  /* Convert into polynomials in cos(alpha) */
436  cheby(g1,g1_order);
437  cheby(g2,g2_order);
438
439  /* Find the roots of the 2 even polynomials.*/
440  if(Laguerre_With_Deflation(g1,g1_order,g1r) ||
441     Laguerre_With_Deflation(g2,g2_order,g2r))
442    return(-1);
443
444  Newton_Raphson(g1,g1_order,g1r); /* if it fails, it leaves g1r alone */
445  Newton_Raphson(g2,g2_order,g2r); /* if it fails, it leaves g2r alone */
446
447  qsort(g1r,g1_order,sizeof(*g1r),comp);
448  qsort(g2r,g2_order,sizeof(*g2r),comp);
449
450  for(i=0;i<g1_order;i++)
451    lsp[i*2] = acos(g1r[i]);
452
453  for(i=0;i<g2_order;i++)
454    lsp[i*2+1] = acos(g2r[i]);
455  return(0);
456}
457