1/********************************************************************
2 *                                                                  *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7 *                                                                  *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
9 * by the Xiph.Org Foundation http://www.xiph.org/                  *
10 *                                                                  *
11 ********************************************************************
12
13 function: normalized modified discrete cosine transform
14           power of two length transform only [64 <= n ]
15 last mod: $Id: mdct.c 16227 2009-07-08 06:58:46Z xiphmont $
16
17 Original algorithm adapted long ago from _The use of multirate filter
18 banks for coding of high quality digital audio_, by T. Sporer,
19 K. Brandenburg and B. Edler, collection of the European Signal
20 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
21 211-214
22
23 The below code implements an algorithm that no longer looks much like
24 that presented in the paper, but the basic structure remains if you
25 dig deep enough to see it.
26
27 This module DOES NOT INCLUDE code to generate/apply the window
28 function.  Everybody has their own weird favorite including me... I
29 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
30 vehemently disagree.
31
32 ********************************************************************/
33
34/* this can also be run as an integer transform by uncommenting a
35   define in mdct.h; the integerization is a first pass and although
36   it's likely stable for Vorbis, the dynamic range is constrained and
37   roundoff isn't done (so it's noisy).  Consider it functional, but
38   only a starting point.  There's no point on a machine with an FPU */
39
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <math.h>
44#include "vorbis/codec.h"
45#include "mdct.h"
46#include "os.h"
47#include "misc.h"
48
49/* build lookups for trig functions; also pre-figure scaling and
50   some window function algebra. */
51
52void mdct_init(mdct_lookup *lookup,int n){
53  int   *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4));
54  DATA_TYPE *T=_ogg_malloc(sizeof(*T)*(n+n/4));
55
56  int i;
57  int n2=n>>1;
58  int log2n=lookup->log2n=rint(log((float)n)/log(2.f));
59  lookup->n=n;
60  lookup->trig=T;
61  lookup->bitrev=bitrev;
62
63/* trig lookups... */
64
65  for(i=0;i<n/4;i++){
66    T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i)));
67    T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i)));
68    T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1)));
69    T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1)));
70  }
71  for(i=0;i<n/8;i++){
72    T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5);
73    T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5);
74  }
75
76  /* bitreverse lookup... */
77
78  {
79    int mask=(1<<(log2n-1))-1,i,j;
80    int msb=1<<(log2n-2);
81    for(i=0;i<n/8;i++){
82      int acc=0;
83      for(j=0;msb>>j;j++)
84        if((msb>>j)&i)acc|=1<<j;
85      bitrev[i*2]=((~acc)&mask)-1;
86      bitrev[i*2+1]=acc;
87
88    }
89  }
90  lookup->scale=FLOAT_CONV(4.f/n);
91}
92
93/* 8 point butterfly (in place, 4 register) */
94STIN void mdct_butterfly_8(DATA_TYPE *x){
95  REG_TYPE r0   = x[6] + x[2];
96  REG_TYPE r1   = x[6] - x[2];
97  REG_TYPE r2   = x[4] + x[0];
98  REG_TYPE r3   = x[4] - x[0];
99
100           x[6] = r0   + r2;
101           x[4] = r0   - r2;
102
103           r0   = x[5] - x[1];
104           r2   = x[7] - x[3];
105           x[0] = r1   + r0;
106           x[2] = r1   - r0;
107
108           r0   = x[5] + x[1];
109           r1   = x[7] + x[3];
110           x[3] = r2   + r3;
111           x[1] = r2   - r3;
112           x[7] = r1   + r0;
113           x[5] = r1   - r0;
114
115}
116
117/* 16 point butterfly (in place, 4 register) */
118STIN void mdct_butterfly_16(DATA_TYPE *x){
119  REG_TYPE r0     = x[1]  - x[9];
120  REG_TYPE r1     = x[0]  - x[8];
121
122           x[8]  += x[0];
123           x[9]  += x[1];
124           x[0]   = MULT_NORM((r0   + r1) * cPI2_8);
125           x[1]   = MULT_NORM((r0   - r1) * cPI2_8);
126
127           r0     = x[3]  - x[11];
128           r1     = x[10] - x[2];
129           x[10] += x[2];
130           x[11] += x[3];
131           x[2]   = r0;
132           x[3]   = r1;
133
134           r0     = x[12] - x[4];
135           r1     = x[13] - x[5];
136           x[12] += x[4];
137           x[13] += x[5];
138           x[4]   = MULT_NORM((r0   - r1) * cPI2_8);
139           x[5]   = MULT_NORM((r0   + r1) * cPI2_8);
140
141           r0     = x[14] - x[6];
142           r1     = x[15] - x[7];
143           x[14] += x[6];
144           x[15] += x[7];
145           x[6]  = r0;
146           x[7]  = r1;
147
148           mdct_butterfly_8(x);
149           mdct_butterfly_8(x+8);
150}
151
152/* 32 point butterfly (in place, 4 register) */
153STIN void mdct_butterfly_32(DATA_TYPE *x){
154  REG_TYPE r0     = x[30] - x[14];
155  REG_TYPE r1     = x[31] - x[15];
156
157           x[30] +=         x[14];
158           x[31] +=         x[15];
159           x[14]  =         r0;
160           x[15]  =         r1;
161
162           r0     = x[28] - x[12];
163           r1     = x[29] - x[13];
164           x[28] +=         x[12];
165           x[29] +=         x[13];
166           x[12]  = MULT_NORM( r0 * cPI1_8  -  r1 * cPI3_8 );
167           x[13]  = MULT_NORM( r0 * cPI3_8  +  r1 * cPI1_8 );
168
169           r0     = x[26] - x[10];
170           r1     = x[27] - x[11];
171           x[26] +=         x[10];
172           x[27] +=         x[11];
173           x[10]  = MULT_NORM(( r0  - r1 ) * cPI2_8);
174           x[11]  = MULT_NORM(( r0  + r1 ) * cPI2_8);
175
176           r0     = x[24] - x[8];
177           r1     = x[25] - x[9];
178           x[24] += x[8];
179           x[25] += x[9];
180           x[8]   = MULT_NORM( r0 * cPI3_8  -  r1 * cPI1_8 );
181           x[9]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
182
183           r0     = x[22] - x[6];
184           r1     = x[7]  - x[23];
185           x[22] += x[6];
186           x[23] += x[7];
187           x[6]   = r1;
188           x[7]   = r0;
189
190           r0     = x[4]  - x[20];
191           r1     = x[5]  - x[21];
192           x[20] += x[4];
193           x[21] += x[5];
194           x[4]   = MULT_NORM( r1 * cPI1_8  +  r0 * cPI3_8 );
195           x[5]   = MULT_NORM( r1 * cPI3_8  -  r0 * cPI1_8 );
196
197           r0     = x[2]  - x[18];
198           r1     = x[3]  - x[19];
199           x[18] += x[2];
200           x[19] += x[3];
201           x[2]   = MULT_NORM(( r1  + r0 ) * cPI2_8);
202           x[3]   = MULT_NORM(( r1  - r0 ) * cPI2_8);
203
204           r0     = x[0]  - x[16];
205           r1     = x[1]  - x[17];
206           x[16] += x[0];
207           x[17] += x[1];
208           x[0]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
209           x[1]   = MULT_NORM( r1 * cPI1_8  -  r0 * cPI3_8 );
210
211           mdct_butterfly_16(x);
212           mdct_butterfly_16(x+16);
213
214}
215
216/* N point first stage butterfly (in place, 2 register) */
217STIN void mdct_butterfly_first(DATA_TYPE *T,
218                                        DATA_TYPE *x,
219                                        int points){
220
221  DATA_TYPE *x1        = x          + points      - 8;
222  DATA_TYPE *x2        = x          + (points>>1) - 8;
223  REG_TYPE   r0;
224  REG_TYPE   r1;
225
226  do{
227
228               r0      = x1[6]      -  x2[6];
229               r1      = x1[7]      -  x2[7];
230               x1[6]  += x2[6];
231               x1[7]  += x2[7];
232               x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
233               x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
234
235               r0      = x1[4]      -  x2[4];
236               r1      = x1[5]      -  x2[5];
237               x1[4]  += x2[4];
238               x1[5]  += x2[5];
239               x2[4]   = MULT_NORM(r1 * T[5]  +  r0 * T[4]);
240               x2[5]   = MULT_NORM(r1 * T[4]  -  r0 * T[5]);
241
242               r0      = x1[2]      -  x2[2];
243               r1      = x1[3]      -  x2[3];
244               x1[2]  += x2[2];
245               x1[3]  += x2[3];
246               x2[2]   = MULT_NORM(r1 * T[9]  +  r0 * T[8]);
247               x2[3]   = MULT_NORM(r1 * T[8]  -  r0 * T[9]);
248
249               r0      = x1[0]      -  x2[0];
250               r1      = x1[1]      -  x2[1];
251               x1[0]  += x2[0];
252               x1[1]  += x2[1];
253               x2[0]   = MULT_NORM(r1 * T[13] +  r0 * T[12]);
254               x2[1]   = MULT_NORM(r1 * T[12] -  r0 * T[13]);
255
256    x1-=8;
257    x2-=8;
258    T+=16;
259
260  }while(x2>=x);
261}
262
263/* N/stage point generic N stage butterfly (in place, 2 register) */
264STIN void mdct_butterfly_generic(DATA_TYPE *T,
265                                          DATA_TYPE *x,
266                                          int points,
267                                          int trigint){
268
269  DATA_TYPE *x1        = x          + points      - 8;
270  DATA_TYPE *x2        = x          + (points>>1) - 8;
271  REG_TYPE   r0;
272  REG_TYPE   r1;
273
274  do{
275
276               r0      = x1[6]      -  x2[6];
277               r1      = x1[7]      -  x2[7];
278               x1[6]  += x2[6];
279               x1[7]  += x2[7];
280               x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
281               x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
282
283               T+=trigint;
284
285               r0      = x1[4]      -  x2[4];
286               r1      = x1[5]      -  x2[5];
287               x1[4]  += x2[4];
288               x1[5]  += x2[5];
289               x2[4]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
290               x2[5]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
291
292               T+=trigint;
293
294               r0      = x1[2]      -  x2[2];
295               r1      = x1[3]      -  x2[3];
296               x1[2]  += x2[2];
297               x1[3]  += x2[3];
298               x2[2]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
299               x2[3]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
300
301               T+=trigint;
302
303               r0      = x1[0]      -  x2[0];
304               r1      = x1[1]      -  x2[1];
305               x1[0]  += x2[0];
306               x1[1]  += x2[1];
307               x2[0]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
308               x2[1]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
309
310               T+=trigint;
311    x1-=8;
312    x2-=8;
313
314  }while(x2>=x);
315}
316
317STIN void mdct_butterflies(mdct_lookup *init,
318                             DATA_TYPE *x,
319                             int points){
320
321  DATA_TYPE *T=init->trig;
322  int stages=init->log2n-5;
323  int i,j;
324
325  if(--stages>0){
326    mdct_butterfly_first(T,x,points);
327  }
328
329  for(i=1;--stages>0;i++){
330    for(j=0;j<(1<<i);j++)
331      mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
332  }
333
334  for(j=0;j<points;j+=32)
335    mdct_butterfly_32(x+j);
336
337}
338
339void mdct_clear(mdct_lookup *l){
340  if(l){
341    if(l->trig)_ogg_free(l->trig);
342    if(l->bitrev)_ogg_free(l->bitrev);
343    memset(l,0,sizeof(*l));
344  }
345}
346
347STIN void mdct_bitreverse(mdct_lookup *init,
348                            DATA_TYPE *x){
349  int        n       = init->n;
350  int       *bit     = init->bitrev;
351  DATA_TYPE *w0      = x;
352  DATA_TYPE *w1      = x = w0+(n>>1);
353  DATA_TYPE *T       = init->trig+n;
354
355  do{
356    DATA_TYPE *x0    = x+bit[0];
357    DATA_TYPE *x1    = x+bit[1];
358
359    REG_TYPE  r0     = x0[1]  - x1[1];
360    REG_TYPE  r1     = x0[0]  + x1[0];
361    REG_TYPE  r2     = MULT_NORM(r1     * T[0]   + r0 * T[1]);
362    REG_TYPE  r3     = MULT_NORM(r1     * T[1]   - r0 * T[0]);
363
364              w1    -= 4;
365
366              r0     = HALVE(x0[1] + x1[1]);
367              r1     = HALVE(x0[0] - x1[0]);
368
369              w0[0]  = r0     + r2;
370              w1[2]  = r0     - r2;
371              w0[1]  = r1     + r3;
372              w1[3]  = r3     - r1;
373
374              x0     = x+bit[2];
375              x1     = x+bit[3];
376
377              r0     = x0[1]  - x1[1];
378              r1     = x0[0]  + x1[0];
379              r2     = MULT_NORM(r1     * T[2]   + r0 * T[3]);
380              r3     = MULT_NORM(r1     * T[3]   - r0 * T[2]);
381
382              r0     = HALVE(x0[1] + x1[1]);
383              r1     = HALVE(x0[0] - x1[0]);
384
385              w0[2]  = r0     + r2;
386              w1[0]  = r0     - r2;
387              w0[3]  = r1     + r3;
388              w1[1]  = r3     - r1;
389
390              T     += 4;
391              bit   += 4;
392              w0    += 4;
393
394  }while(w0<w1);
395}
396
397void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
398  int n=init->n;
399  int n2=n>>1;
400  int n4=n>>2;
401
402  /* rotate */
403
404  DATA_TYPE *iX = in+n2-7;
405  DATA_TYPE *oX = out+n2+n4;
406  DATA_TYPE *T  = init->trig+n4;
407
408  do{
409    oX         -= 4;
410    oX[0]       = MULT_NORM(-iX[2] * T[3] - iX[0]  * T[2]);
411    oX[1]       = MULT_NORM (iX[0] * T[3] - iX[2]  * T[2]);
412    oX[2]       = MULT_NORM(-iX[6] * T[1] - iX[4]  * T[0]);
413    oX[3]       = MULT_NORM (iX[4] * T[1] - iX[6]  * T[0]);
414    iX         -= 8;
415    T          += 4;
416  }while(iX>=in);
417
418  iX            = in+n2-8;
419  oX            = out+n2+n4;
420  T             = init->trig+n4;
421
422  do{
423    T          -= 4;
424    oX[0]       =  MULT_NORM (iX[4] * T[3] + iX[6] * T[2]);
425    oX[1]       =  MULT_NORM (iX[4] * T[2] - iX[6] * T[3]);
426    oX[2]       =  MULT_NORM (iX[0] * T[1] + iX[2] * T[0]);
427    oX[3]       =  MULT_NORM (iX[0] * T[0] - iX[2] * T[1]);
428    iX         -= 8;
429    oX         += 4;
430  }while(iX>=in);
431
432  mdct_butterflies(init,out+n2,n2);
433  mdct_bitreverse(init,out);
434
435  /* roatate + window */
436
437  {
438    DATA_TYPE *oX1=out+n2+n4;
439    DATA_TYPE *oX2=out+n2+n4;
440    DATA_TYPE *iX =out;
441    T             =init->trig+n2;
442
443    do{
444      oX1-=4;
445
446      oX1[3]  =  MULT_NORM (iX[0] * T[1] - iX[1] * T[0]);
447      oX2[0]  = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]);
448
449      oX1[2]  =  MULT_NORM (iX[2] * T[3] - iX[3] * T[2]);
450      oX2[1]  = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]);
451
452      oX1[1]  =  MULT_NORM (iX[4] * T[5] - iX[5] * T[4]);
453      oX2[2]  = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]);
454
455      oX1[0]  =  MULT_NORM (iX[6] * T[7] - iX[7] * T[6]);
456      oX2[3]  = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]);
457
458      oX2+=4;
459      iX    +=   8;
460      T     +=   8;
461    }while(iX<oX1);
462
463    iX=out+n2+n4;
464    oX1=out+n4;
465    oX2=oX1;
466
467    do{
468      oX1-=4;
469      iX-=4;
470
471      oX2[0] = -(oX1[3] = iX[3]);
472      oX2[1] = -(oX1[2] = iX[2]);
473      oX2[2] = -(oX1[1] = iX[1]);
474      oX2[3] = -(oX1[0] = iX[0]);
475
476      oX2+=4;
477    }while(oX2<iX);
478
479    iX=out+n2+n4;
480    oX1=out+n2+n4;
481    oX2=out+n2;
482    do{
483      oX1-=4;
484      oX1[0]= iX[3];
485      oX1[1]= iX[2];
486      oX1[2]= iX[1];
487      oX1[3]= iX[0];
488      iX+=4;
489    }while(oX1>oX2);
490  }
491}
492
493void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
494  int n=init->n;
495  int n2=n>>1;
496  int n4=n>>2;
497  int n8=n>>3;
498  DATA_TYPE *w=alloca(n*sizeof(*w)); /* forward needs working space */
499  DATA_TYPE *w2=w+n2;
500
501  /* rotate */
502
503  /* window + rotate + step 1 */
504
505  REG_TYPE r0;
506  REG_TYPE r1;
507  DATA_TYPE *x0=in+n2+n4;
508  DATA_TYPE *x1=x0+1;
509  DATA_TYPE *T=init->trig+n2;
510
511  int i=0;
512
513  for(i=0;i<n8;i+=2){
514    x0 -=4;
515    T-=2;
516    r0= x0[2] + x1[0];
517    r1= x0[0] + x1[2];
518    w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
519    w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
520    x1 +=4;
521  }
522
523  x1=in+1;
524
525  for(;i<n2-n8;i+=2){
526    T-=2;
527    x0 -=4;
528    r0= x0[2] - x1[0];
529    r1= x0[0] - x1[2];
530    w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
531    w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
532    x1 +=4;
533  }
534
535  x0=in+n;
536
537  for(;i<n2;i+=2){
538    T-=2;
539    x0 -=4;
540    r0= -x0[2] - x1[0];
541    r1= -x0[0] - x1[2];
542    w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
543    w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
544    x1 +=4;
545  }
546
547
548  mdct_butterflies(init,w+n2,n2);
549  mdct_bitreverse(init,w);
550
551  /* roatate + window */
552
553  T=init->trig+n2;
554  x0=out+n2;
555
556  for(i=0;i<n4;i++){
557    x0--;
558    out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale);
559    x0[0]  =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale);
560    w+=2;
561    T+=2;
562  }
563}
564