1/************************************************************************
2 * Copyright (C) 2002-2009, Xiph.org Foundation
3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *     * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *     * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 *     * Neither the names of the Xiph.org Foundation nor Pinknoise
17 * Productions Ltd nor the names of its contributors may be used to
18 * endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ************************************************************************
33
34 function: normalized modified discrete cosine transform
35           power of two length transform only [64 <= n ]
36 last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
37
38 Original algorithm adapted long ago from _The use of multirate filter
39 banks for coding of high quality digital audio_, by T. Sporer,
40 K. Brandenburg and B. Edler, collection of the European Signal
41 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
42 211-214
43
44 The below code implements an algorithm that no longer looks much like
45 that presented in the paper, but the basic structure remains if you
46 dig deep enough to see it.
47
48 This module DOES NOT INCLUDE code to generate/apply the window
49 function.  Everybody has their own weird favorite including me... I
50 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
51 vehemently disagree.
52
53 ************************************************************************/
54
55#include "ivorbiscodec.h"
56#include "os.h"
57#include "misc.h"
58#include "mdct.h"
59#include "mdct_lookup.h"
60
61#include <stdio.h>
62
63#if defined(ONLY_C)
64STIN void presymmetry(DATA_TYPE *in,int n2,int step){
65  DATA_TYPE *aX;
66  DATA_TYPE *bX;
67  LOOKUP_T *T;
68  int n4=n2>>1;
69
70  aX            = in+n2-3;
71  T             = sincos_lookup0;
72
73  do{
74    REG_TYPE  s0= aX[0];
75    REG_TYPE  s2= aX[2];
76    XPROD31( s0, s2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
77    aX-=4;
78  }while(aX>=in+n4);
79  do{
80    REG_TYPE  s0= aX[0];
81    REG_TYPE  s2= aX[2];
82    XPROD31( s0, s2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
83    aX-=4;
84  }while(aX>=in);
85
86  aX            = in+n2-4;
87  bX            = in;
88  T             = sincos_lookup0;
89  do{
90    REG_TYPE  ri0= aX[0];
91    REG_TYPE  ri2= aX[2];
92    REG_TYPE  ro0= bX[0];
93    REG_TYPE  ro2= bX[2];
94
95    XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
96    XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
97
98    aX-=4;
99    bX+=4;
100  }while(aX>=bX);
101}
102
103/* 8 point butterfly (in place) */
104STIN void mdct_butterfly_8(DATA_TYPE *x){
105
106  REG_TYPE s0   = x[0] + x[1];
107  REG_TYPE s1   = x[0] - x[1];
108  REG_TYPE s2   = x[2] + x[3];
109  REG_TYPE s3   = x[2] - x[3];
110  REG_TYPE s4   = x[4] + x[5];
111  REG_TYPE s5   = x[4] - x[5];
112  REG_TYPE s6   = x[6] + x[7];
113  REG_TYPE s7   = x[6] - x[7];
114
115	   x[0] = s5   + s3;
116	   x[1] = s7   - s1;
117	   x[2] = s5   - s3;
118	   x[3] = s7   + s1;
119           x[4] = s4   - s0;
120	   x[5] = s6   - s2;
121           x[6] = s4   + s0;
122	   x[7] = s6   + s2;
123	   MB();
124}
125
126/* 16 point butterfly (in place, 4 register) */
127STIN void mdct_butterfly_16(DATA_TYPE *x){
128
129  REG_TYPE s0, s1, s2, s3;
130
131	   s0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
132	   s1 = x[10] - x[11]; x[10] += x[11];
133	   s2 = x[ 1] - x[ 0]; x[ 9]  = x[ 1] + x[0];
134	   s3 = x[ 3] - x[ 2]; x[11]  = x[ 3] + x[2];
135	   x[ 0] = MULT31((s0 - s1) , cPI2_8);
136	   x[ 1] = MULT31((s2 + s3) , cPI2_8);
137	   x[ 2] = MULT31((s0 + s1) , cPI2_8);
138	   x[ 3] = MULT31((s3 - s2) , cPI2_8);
139	   MB();
140
141	   s2 = x[12] - x[13]; x[12] += x[13];
142	   s3 = x[14] - x[15]; x[14] += x[15];
143	   s0 = x[ 4] - x[ 5]; x[13]  = x[ 5] + x[ 4];
144	   s1 = x[ 7] - x[ 6]; x[15]  = x[ 7] + x[ 6];
145	   x[ 4] = s2; x[ 5] = s1;
146	   x[ 6] = s3; x[ 7] = s0;
147	   MB();
148
149	   mdct_butterfly_8(x);
150	   mdct_butterfly_8(x+8);
151}
152
153/* 32 point butterfly (in place, 4 register) */
154STIN void mdct_butterfly_32(DATA_TYPE *x){
155
156  REG_TYPE s0, s1, s2, s3;
157
158	   s0 = x[16] - x[17]; x[16] += x[17];
159	   s1 = x[18] - x[19]; x[18] += x[19];
160	   s2 = x[ 1] - x[ 0]; x[17]  = x[ 1] + x[ 0];
161	   s3 = x[ 3] - x[ 2]; x[19]  = x[ 3] + x[ 2];
162	   XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
163	   XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
164	   MB();
165
166	   s0 = x[20] - x[21]; x[20] += x[21];
167	   s1 = x[22] - x[23]; x[22] += x[23];
168	   s2 = x[ 5] - x[ 4]; x[21]  = x[ 5] + x[ 4];
169	   s3 = x[ 7] - x[ 6]; x[23]  = x[ 7] + x[ 6];
170	   x[ 4] = MULT31((s0 - s1) , cPI2_8);
171	   x[ 5] = MULT31((s3 + s2) , cPI2_8);
172	   x[ 6] = MULT31((s0 + s1) , cPI2_8);
173	   x[ 7] = MULT31((s3 - s2) , cPI2_8);
174	   MB();
175
176	   s0 = x[24] - x[25]; x[24] += x[25];
177	   s1 = x[26] - x[27]; x[26] += x[27];
178	   s2 = x[ 9] - x[ 8]; x[25]  = x[ 9] + x[ 8];
179	   s3 = x[11] - x[10]; x[27]  = x[11] + x[10];
180	   XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
181	   XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
182	   MB();
183
184	   s0 = x[28] - x[29]; x[28] += x[29];
185	   s1 = x[30] - x[31]; x[30] += x[31];
186	   s2 = x[12] - x[13]; x[29]  = x[13] + x[12];
187	   s3 = x[15] - x[14]; x[31]  = x[15] + x[14];
188	   x[12] = s0; x[13] = s3;
189	   x[14] = s1; x[15] = s2;
190	   MB();
191
192	   mdct_butterfly_16(x);
193	   mdct_butterfly_16(x+16);
194}
195
196/* N/stage point generic N stage butterfly (in place, 2 register) */
197STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
198  LOOKUP_T   *T  = sincos_lookup0;
199  DATA_TYPE *x1  = x + points - 4;
200  DATA_TYPE *x2  = x + (points>>1) - 4;
201  REG_TYPE   s0, s1, s2, s3;
202
203  do{
204    s0 = x1[0] - x1[1]; x1[0] += x1[1];
205    s1 = x1[3] - x1[2]; x1[2] += x1[3];
206    s2 = x2[1] - x2[0]; x1[1]  = x2[1] + x2[0];
207    s3 = x2[3] - x2[2]; x1[3]  = x2[3] + x2[2];
208    XPROD31( s1, s0, T[0], T[1], &x2[0], &x2[2] );
209    XPROD31( s2, s3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
210    x1-=4;
211    x2-=4;
212  }while(T<sincos_lookup0+1024);
213  x1 = x + (points>>1) + (points>>2) - 4;
214  x2 = x +               (points>>2) - 4;
215  T = sincos_lookup0+1024;
216  do{
217    s0 = x1[0] - x1[1]; x1[0] += x1[1];
218    s1 = x1[2] - x1[3]; x1[2] += x1[3];
219    s2 = x2[0] - x2[1]; x1[1]  = x2[1] + x2[0];
220    s3 = x2[3] - x2[2]; x1[3]  = x2[3] + x2[2];
221    XNPROD31( s0, s1, T[0], T[1], &x2[0], &x2[2] );
222    XNPROD31( s3, s2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
223    x1-=4;
224    x2-=4;
225  }while(T>sincos_lookup0);
226}
227
228STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
229
230  int stages=7-shift;
231  int i,j;
232
233  for(i=0;--stages>=0;i++){
234    for(j=0;j<(1<<i);j++)
235    {
236        mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
237    }
238  }
239
240  for(j=0;j<points;j+=32)
241    mdct_butterfly_32(x+j);
242}
243
244static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
245
246STIN int bitrev12(int x){
247  return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
248}
249
250STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
251  int          bit   = 0;
252  DATA_TYPE   *w     = x+(n>>1);
253
254  do{
255    DATA_TYPE  b     = bitrev12(bit++);
256    DATA_TYPE *xx    = x + (b>>shift);
257    REG_TYPE  r;
258
259               w    -= 2;
260
261	       if(w>xx){
262
263		 r      = xx[0];
264		 xx[0]  = w[0];
265		 w[0]   = r;
266
267		 r      = xx[1];
268		 xx[1]  = w[1];
269		 w[1]   = r;
270	       }
271  }while(w>x);
272}
273
274STIN void mdct_step7(DATA_TYPE *x,int n,int step){
275  DATA_TYPE   *w0    = x;
276  DATA_TYPE   *w1    = x+(n>>1);
277  LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
278  LOOKUP_T    *Ttop  = T+1024;
279  REG_TYPE     s0, s1, s2, s3;
280
281  do{
282	      w1    -= 2;
283
284              s0     = w0[0]  + w1[0];
285              s1     = w1[1]  - w0[1];
286	      s2     = MULT32(s0, T[1]) + MULT32(s1, T[0]);
287	      s3     = MULT32(s1, T[1]) - MULT32(s0, T[0]);
288	      T+=step;
289
290	      s0     = (w0[1] + w1[1])>>1;
291              s1     = (w0[0] - w1[0])>>1;
292	      w0[0]  = s0     + s2;
293	      w0[1]  = s1     + s3;
294	      w1[0]  = s0     - s2;
295	      w1[1]  = s3     - s1;
296
297	      w0    += 2;
298  }while(T<Ttop);
299  do{
300	      w1    -= 2;
301
302              s0     = w0[0]  + w1[0];
303              s1     = w1[1]  - w0[1];
304	      T-=step;
305	      s2     = MULT32(s0, T[0]) + MULT32(s1, T[1]);
306	      s3     = MULT32(s1, T[0]) - MULT32(s0, T[1]);
307
308	      s0     = (w0[1] + w1[1])>>1;
309              s1     = (w0[0] - w1[0])>>1;
310	      w0[0]  = s0     + s2;
311	      w0[1]  = s1     + s3;
312	      w1[0]  = s0     - s2;
313	      w1[1]  = s3     - s1;
314
315	      w0    += 2;
316  }while(w0<w1);
317}
318#endif
319
320STIN void mdct_step8(DATA_TYPE *x, int n, int step){
321  LOOKUP_T *T;
322  LOOKUP_T *V;
323  DATA_TYPE *iX =x+(n>>1);
324
325  switch(step) {
326#if defined(ONLY_C)
327  default:
328    T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
329    do{
330      REG_TYPE     s0  =  x[0];
331      REG_TYPE     s1  = -x[1];
332                   XPROD31( s0, s1, T[0], T[1], x, x+1); T+=step;
333                   x  +=2;
334    }while(x<iX);
335    break;
336#endif
337
338  case 1:
339    {
340      /* linear interpolation between table values: offset=0.5, step=1 */
341      REG_TYPE    t0,t1,v0,v1,s0,s1;
342      T         = sincos_lookup0;
343      V         = sincos_lookup1;
344      t0        = (*T++)>>1;
345      t1        = (*T++)>>1;
346      do{
347	    s0  =  x[0];
348	    s1  = -x[1];
349	    t0 += (v0 = (*V++)>>1);
350	    t1 += (v1 = (*V++)>>1);
351	    XPROD31( s0, s1, t0, t1, x, x+1 );
352
353	    s0  =  x[2];
354	    s1  = -x[3];
355	    v0 += (t0 = (*T++)>>1);
356	    v1 += (t1 = (*T++)>>1);
357	    XPROD31( s0, s1, v0, v1, x+2, x+3 );
358
359	    x += 4;
360      }while(x<iX);
361      break;
362    }
363
364  case 0:
365    {
366      /* linear interpolation between table values: offset=0.25, step=0.5 */
367      REG_TYPE    t0,t1,v0,v1,q0,q1,s0,s1;
368      T         = sincos_lookup0;
369      V         = sincos_lookup1;
370      t0        = *T++;
371      t1        = *T++;
372      do{
373
374
375	v0  = *V++;
376	v1  = *V++;
377	t0 +=  (q0 = (v0-t0)>>2);
378	t1 +=  (q1 = (v1-t1)>>2);
379	s0  =  x[0];
380	s1  = -x[1];
381	XPROD31( s0, s1, t0, t1, x, x+1 );
382	t0  = v0-q0;
383	t1  = v1-q1;
384	s0  =  x[2];
385	s1  = -x[3];
386	XPROD31( s0, s1, t0, t1, x+2, x+3 );
387
388	t0  = *T++;
389	t1  = *T++;
390	v0 += (q0 = (t0-v0)>>2);
391	v1 += (q1 = (t1-v1)>>2);
392	s0  =  x[4];
393	s1  = -x[5];
394	XPROD31( s0, s1, v0, v1, x+4, x+5 );
395	v0  = t0-q0;
396	v1  = t1-q1;
397	s0  =  x[6];
398	s1  = -x[7];
399	XPROD31( s0, s1, v0, v1, x+5, x+6 );
400
401	x+=8;
402      }while(x<iX);
403      break;
404    }
405  }
406}
407
408extern int mdct_backwardARM(int n, DATA_TYPE *in);
409
410/* partial; doesn't perform last-step deinterleave/unrolling.  That
411   can be done more efficiently during pcm output */
412void mdct_backward(int n, DATA_TYPE *in){
413  int step;
414
415#if defined(ONLY_C)
416  int shift;
417
418  for (shift=4;!(n&(1<<shift));shift++);
419  shift=13-shift;
420  step=2<<shift;
421
422  presymmetry(in,n>>1,step);
423  mdct_butterflies(in,n>>1,shift);
424  mdct_bitreverse(in,n,shift);
425  mdct_step7(in,n,step);
426  mdct_step8(in,n,step>>2);
427#else
428  step = mdct_backwardARM(n, in);
429  if (step <= 1)
430    mdct_step8(in,n,step);
431#endif
432}
433
434#if defined(ONLY_C)
435void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
436  int i;
437  n>>=2;
438  in+=1;
439
440  for(i=0;i<n;i++)
441    right[i]=in[i<<1];
442}
443#endif
444
445extern ogg_int16_t *mdct_unroll_prelap(ogg_int16_t *out,
446                                       DATA_TYPE   *post,
447                                       DATA_TYPE   *l,
448                                       int          step);
449extern ogg_int16_t *mdct_unroll_part2(ogg_int16_t *out,
450                                      DATA_TYPE   *post,
451                                      DATA_TYPE   *l,
452                                      DATA_TYPE   *r,
453                                      int          step,
454                                      LOOKUP_T    *wL,
455                                      LOOKUP_T    *wR);
456extern ogg_int16_t *mdct_unroll_part3(ogg_int16_t *out,
457                                      DATA_TYPE   *post,
458                                      DATA_TYPE   *l,
459                                      DATA_TYPE   *r,
460                                      int          step,
461                                      LOOKUP_T    *wL,
462                                      LOOKUP_T    *wR);
463extern ogg_int16_t *mdct_unroll_postlap(ogg_int16_t *out,
464                                        DATA_TYPE   *post,
465                                        DATA_TYPE   *l,
466                                        int          step);
467
468void mdct_unroll_lap(int n0,int n1,
469		     int lW,int W,
470		     DATA_TYPE *in,
471		     DATA_TYPE *right,
472		     LOOKUP_T *w0,
473		     LOOKUP_T *w1,
474		     ogg_int16_t *out,
475		     int step,
476		     int start, /* samples, this frame */
477		     int end    /* samples, this frame */){
478
479  DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
480  DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
481  DATA_TYPE *post;
482  LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
483  LOOKUP_T *wL=(W && lW ? w1         : w0        );
484
485  int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
486  int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
487  int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
488  int n,off;
489
490  /* preceeding direct-copy lapping from previous frame, if any */
491  if(preLap){
492    n      = (end<preLap?end:preLap);
493    off    = (start<preLap?start:preLap);
494    post   = r-n;
495    r     -= off;
496    start -= off;
497    end   -= n;
498#if defined(ONLY_C)
499    while(r>post){
500      *out = CLIP_TO_15((*--r)>>9);
501      out+=step;
502    }
503#else
504    out = mdct_unroll_prelap(out,post,r,step);
505    n -= off;
506    if (n < 0)
507      n = 0;
508    r -= n;
509#endif
510  }
511
512  /* cross-lap; two halves due to wrap-around */
513  n      = (end<halfLap?end:halfLap);
514  off    = (start<halfLap?start:halfLap);
515  post   = r-n;
516  r     -= off;
517  l     -= off*2;
518  start -= off;
519  wR    -= off;
520  wL    += off;
521  end   -= n;
522#if defined(ONLY_C)
523  while(r>post){
524    l-=2;
525    *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
526    out+=step;
527  }
528#else
529  out = mdct_unroll_part2(out, post, l, r, step, wL, wR);
530  n -= off;
531  if (n < 0)
532      n = 0;
533  l -= 2*n;
534  r -= n;
535  wR -= n;
536  wL += n;
537#endif
538
539  n      = (end<halfLap?end:halfLap);
540  off    = (start<halfLap?start:halfLap);
541  post   = r+n;
542  r     += off;
543  l     += off*2;
544  start -= off;
545  end   -= n;
546  wR    -= off;
547  wL    += off;
548#if defined(ONLY_C)
549  while(r<post){
550    *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
551    out+=step;
552    l+=2;
553  }
554#else
555  out = mdct_unroll_part3(out, post, l, r, step, wL, wR);
556  n -= off;
557  if (n < 0)
558      n = 0;
559  l += 2*n;
560  r += n;
561  wR -= n;
562  wL += n;
563#endif
564
565  /* preceeding direct-copy lapping from previous frame, if any */
566  if(postLap){
567    n      = (end<postLap?end:postLap);
568    off    = (start<postLap?start:postLap);
569    post   = l+n*2;
570    l     += off*2;
571#if defined(ONLY_C)
572    while(l<post){
573      *out = CLIP_TO_15((-*l)>>9);
574      out+=step;
575      l+=2;
576    }
577#else
578    out = mdct_unroll_postlap(out,post,l,step);
579#endif
580  }
581}
582
583