12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/************************************************************************
22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Copyright (C) 2002-2009, Xiph.org Foundation
32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) * All rights reserved.
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * modification, are permitted provided that the following conditions
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * are met:
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *     * Redistributions of source code must retain the above copyright
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * notice, this list of conditions and the following disclaimer.
122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *     * Redistributions in binary form must reproduce the above
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * copyright notice, this list of conditions and the following disclaimer
14868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) * in the documentation and/or other materials provided with the
15ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch * distribution.
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *     * Neither the names of the Xiph.org Foundation nor Pinknoise
172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * Productions Ltd nor the names of its contributors may be used to
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * endorse or promote products derived from this software without
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * specific prior written permission.
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) *
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ************************************************************************
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) function: normalized modified discrete cosine transform
352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)           power of two length transform only [64 <= n ]
362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $
372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Original algorithm adapted long ago from _The use of multirate filter
392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) banks for coding of high quality digital audio_, by T. Sporer,
402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) K. Brandenburg and B. Edler, collection of the European Signal
412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 211-214
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
44c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) The below code implements an algorithm that no longer looks much like
452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) that presented in the paper, but the basic structure remains if you
462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) dig deep enough to see it.
472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
48ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch This module DOES NOT INCLUDE code to generate/apply the window
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) function.  Everybody has their own weird favorite including me... I
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) vehemently disagree.
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ************************************************************************/
542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "ivorbiscodec.h"
567d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "os.h"
577d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)#include "misc.h"
58ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "mdct.h"
5990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)#include "mdct_lookup.h"
6090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <stdio.h>
62c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
63c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)#if defined(ONLY_C)
64ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben MurdochSTIN void presymmetry(DATA_TYPE *in,int n2,int step){
65a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  DATA_TYPE *aX;
66c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  DATA_TYPE *bX;
67c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  LOOKUP_T *T;
68a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  int n4=n2>>1;
69a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
70a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  aX            = in+n2-3;
71a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  T             = sincos_lookup0;
72a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
73c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  do{
74c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    REG_TYPE  s0= aX[0];
75c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    REG_TYPE  s2= aX[2];
76c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    XPROD31( s0, s2, T[0], T[1], &aX[0], &aX[2] ); T+=step;
77c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    aX-=4;
78c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  }while(aX>=in+n4);
79c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  do{
80c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)    REG_TYPE  s0= aX[0];
817d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    REG_TYPE  s2= aX[2];
827d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    XPROD31( s0, s2, T[1], T[0], &aX[0], &aX[2] ); T-=step;
83a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    aX-=4;
84a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  }while(aX>=in);
85a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
86ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  aX            = in+n2-4;
8790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  bX            = in;
8890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  T             = sincos_lookup0;
89c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  do{
90a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    REG_TYPE  ri0= aX[0];
91a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    REG_TYPE  ri2= aX[2];
92a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    REG_TYPE  ro0= bX[0];
93a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    REG_TYPE  ro2= bX[2];
94a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
95a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step;
96a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] );
97a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
98a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    aX-=4;
99a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch    bX+=4;
100a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  }while(aX>=bX);
101a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch}
10290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/* 8 point butterfly (in place) */
1047d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)STIN void mdct_butterfly_8(DATA_TYPE *x){
1052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE s0   = x[0] + x[1];
1072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE s1   = x[0] - x[1];
1082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE s2   = x[2] + x[3];
10990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  REG_TYPE s3   = x[2] - x[3];
1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE s4   = x[4] + x[5];
1117d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  REG_TYPE s5   = x[4] - x[5];
1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE s6   = x[6] + x[7];
1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE s7   = x[6] - x[7];
1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[0] = s5   + s3;
1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[1] = s7   - s1;
1177d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)	   x[2] = s5   - s3;
1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[3] = s7   + s1;
119ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch           x[4] = s4   - s0;
12090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   x[5] = s6   - s2;
12190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)           x[6] = s4   + s0;
12290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   x[7] = s6   + s2;
12390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   MB();
1242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/* 16 point butterfly (in place, 4 register) */
1272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STIN void mdct_butterfly_16(DATA_TYPE *x){
1282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
12990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  REG_TYPE s0, s1, s2, s3;
13090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
13190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   s0 = x[ 8] - x[ 9]; x[ 8] += x[ 9];
13290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   s1 = x[10] - x[11]; x[10] += x[11];
1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s2 = x[ 1] - x[ 0]; x[ 9]  = x[ 1] + x[0];
1342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s3 = x[ 3] - x[ 2]; x[11]  = x[ 3] + x[2];
1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 0] = MULT31((s0 - s1) , cPI2_8);
1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 1] = MULT31((s2 + s3) , cPI2_8);
1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 2] = MULT31((s0 + s1) , cPI2_8);
1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 3] = MULT31((s3 - s2) , cPI2_8);
1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   MB();
1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s2 = x[12] - x[13]; x[12] += x[13];
1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s3 = x[14] - x[15]; x[14] += x[15];
1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s0 = x[ 4] - x[ 5]; x[13]  = x[ 5] + x[ 4];
1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s1 = x[ 7] - x[ 6]; x[15]  = x[ 7] + x[ 6];
1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 4] = s2; x[ 5] = s1;
1467d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)	   x[ 6] = s3; x[ 7] = s0;
1477d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)	   MB();
148ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch
14990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   mdct_butterfly_8(x);
15090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)	   mdct_butterfly_8(x+8);
1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/* 32 point butterfly (in place, 4 register) */
1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STIN void mdct_butterfly_32(DATA_TYPE *x){
1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
156ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  REG_TYPE s0, s1, s2, s3;
1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s0 = x[16] - x[17]; x[16] += x[17];
1592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s1 = x[18] - x[19]; x[18] += x[19];
1602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s2 = x[ 1] - x[ 0]; x[17]  = x[ 1] + x[ 0];
1612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s3 = x[ 3] - x[ 2]; x[19]  = x[ 3] + x[ 2];
1622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] );
1632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] );
1642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   MB();
1652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s0 = x[20] - x[21]; x[20] += x[21];
1672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s1 = x[22] - x[23]; x[22] += x[23];
1682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s2 = x[ 5] - x[ 4]; x[21]  = x[ 5] + x[ 4];
1692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s3 = x[ 7] - x[ 6]; x[23]  = x[ 7] + x[ 6];
1702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 4] = MULT31((s0 - s1) , cPI2_8);
1712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 5] = MULT31((s3 + s2) , cPI2_8);
1722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 6] = MULT31((s0 + s1) , cPI2_8);
1732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[ 7] = MULT31((s3 - s2) , cPI2_8);
1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   MB();
1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1767d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)	   s0 = x[24] - x[25]; x[24] += x[25];
1777d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)	   s1 = x[26] - x[27]; x[26] += x[27];
178ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch	   s2 = x[ 9] - x[ 8]; x[25]  = x[ 9] + x[ 8];
1792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s3 = x[11] - x[10]; x[27]  = x[11] + x[10];
1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] );
1812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] );
1822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   MB();
1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s0 = x[28] - x[29]; x[28] += x[29];
185ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch	   s1 = x[30] - x[31]; x[30] += x[31];
1862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s2 = x[12] - x[13]; x[29]  = x[13] + x[12];
1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   s3 = x[15] - x[14]; x[31]  = x[15] + x[14];
1882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[12] = s0; x[13] = s3;
1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   x[14] = s1; x[15] = s2;
1902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   MB();
1912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   mdct_butterfly_16(x);
1932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)	   mdct_butterfly_16(x+16);
1942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/* N/stage point generic N stage butterfly (in place, 2 register) */
1972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
1982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  LOOKUP_T   *T  = sincos_lookup0;
1992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DATA_TYPE *x1  = x + points - 4;
2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DATA_TYPE *x2  = x + (points>>1) - 4;
2012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  REG_TYPE   s0, s1, s2, s3;
2022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
20390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  do{
20490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    s0 = x1[0] - x1[1]; x1[0] += x1[1];
2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    s1 = x1[3] - x1[2]; x1[2] += x1[3];
2062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    s2 = x2[1] - x2[0]; x1[1]  = x2[1] + x2[0];
20790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    s3 = x2[3] - x2[2]; x1[3]  = x2[3] + x2[2];
20890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    XPROD31( s1, s0, T[0], T[1], &x2[0], &x2[2] );
20990dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    XPROD31( s2, s3, T[0], T[1], &x2[1], &x2[3] ); T+=step;
2102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    x1-=4;
2112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    x2-=4;
2122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }while(T<sincos_lookup0+1024);
2137d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  x1 = x + (points>>1) + (points>>2) - 4;
2147d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  x2 = x +               (points>>2) - 4;
215ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  T = sincos_lookup0+1024;
216  do{
217    s0 = x1[0] - x1[1]; x1[0] += x1[1];
218    s1 = x1[2] - x1[3]; x1[2] += x1[3];
219    s2 = x2[0] - x2[1]; x1[1]  = x2[1] + x2[0];
220    s3 = x2[3] - x2[2]; x1[3]  = x2[3] + x2[2];
221    XNPROD31( s0, s1, T[0], T[1], &x2[0], &x2[2] );
222    XNPROD31( s3, s2, T[0], T[1], &x2[1], &x2[3] ); T-=step;
223    x1-=4;
224    x2-=4;
225  }while(T>sincos_lookup0);
226}
227
228STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
229
230  int stages=7-shift;
231  int i,j;
232
233  for(i=0;--stages>=0;i++){
234    for(j=0;j<(1<<i);j++)
235    {
236        mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
237    }
238  }
239
240  for(j=0;j<points;j+=32)
241    mdct_butterfly_32(x+j);
242}
243
244static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
245
246STIN int bitrev12(int x){
247  return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
248}
249
250STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){
251  int          bit   = 0;
252  DATA_TYPE   *w     = x+(n>>1);
253
254  do{
255    DATA_TYPE  b     = bitrev12(bit++);
256    DATA_TYPE *xx    = x + (b>>shift);
257    REG_TYPE  r;
258
259               w    -= 2;
260
261	       if(w>xx){
262
263		 r      = xx[0];
264		 xx[0]  = w[0];
265		 w[0]   = r;
266
267		 r      = xx[1];
268		 xx[1]  = w[1];
269		 w[1]   = r;
270	       }
271  }while(w>x);
272}
273
274STIN void mdct_step7(DATA_TYPE *x,int n,int step){
275  DATA_TYPE   *w0    = x;
276  DATA_TYPE   *w1    = x+(n>>1);
277  LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
278  LOOKUP_T    *Ttop  = T+1024;
279  REG_TYPE     s0, s1, s2, s3;
280
281  do{
282	      w1    -= 2;
283
284              s0     = w0[0]  + w1[0];
285              s1     = w1[1]  - w0[1];
286	      s2     = MULT32(s0, T[1]) + MULT32(s1, T[0]);
287	      s3     = MULT32(s1, T[1]) - MULT32(s0, T[0]);
288	      T+=step;
289
290	      s0     = (w0[1] + w1[1])>>1;
291              s1     = (w0[0] - w1[0])>>1;
292	      w0[0]  = s0     + s2;
293	      w0[1]  = s1     + s3;
294	      w1[0]  = s0     - s2;
295	      w1[1]  = s3     - s1;
296
297	      w0    += 2;
298  }while(T<Ttop);
299  do{
300	      w1    -= 2;
301
302              s0     = w0[0]  + w1[0];
303              s1     = w1[1]  - w0[1];
304	      T-=step;
305	      s2     = MULT32(s0, T[0]) + MULT32(s1, T[1]);
306	      s3     = MULT32(s1, T[0]) - MULT32(s0, T[1]);
307
308	      s0     = (w0[1] + w1[1])>>1;
309              s1     = (w0[0] - w1[0])>>1;
310	      w0[0]  = s0     + s2;
311	      w0[1]  = s1     + s3;
312	      w1[0]  = s0     - s2;
313	      w1[1]  = s3     - s1;
314
315	      w0    += 2;
316  }while(w0<w1);
317}
318#endif
319
320STIN void mdct_step8(DATA_TYPE *x, int n, int step){
321  LOOKUP_T *T;
322  LOOKUP_T *V;
323  DATA_TYPE *iX =x+(n>>1);
324
325  switch(step) {
326#if defined(ONLY_C)
327  default:
328    T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
329    do{
330      REG_TYPE     s0  =  x[0];
331      REG_TYPE     s1  = -x[1];
332                   XPROD31( s0, s1, T[0], T[1], x, x+1); T+=step;
333                   x  +=2;
334    }while(x<iX);
335    break;
336#endif
337
338  case 1:
339    {
340      /* linear interpolation between table values: offset=0.5, step=1 */
341      REG_TYPE    t0,t1,v0,v1,s0,s1;
342      T         = sincos_lookup0;
343      V         = sincos_lookup1;
344      t0        = (*T++)>>1;
345      t1        = (*T++)>>1;
346      do{
347	    s0  =  x[0];
348	    s1  = -x[1];
349	    t0 += (v0 = (*V++)>>1);
350	    t1 += (v1 = (*V++)>>1);
351	    XPROD31( s0, s1, t0, t1, x, x+1 );
352
353	    s0  =  x[2];
354	    s1  = -x[3];
355	    v0 += (t0 = (*T++)>>1);
356	    v1 += (t1 = (*T++)>>1);
357	    XPROD31( s0, s1, v0, v1, x+2, x+3 );
358
359	    x += 4;
360      }while(x<iX);
361      break;
362    }
363
364  case 0:
365    {
366      /* linear interpolation between table values: offset=0.25, step=0.5 */
367      REG_TYPE    t0,t1,v0,v1,q0,q1,s0,s1;
368      T         = sincos_lookup0;
369      V         = sincos_lookup1;
370      t0        = *T++;
371      t1        = *T++;
372      do{
373
374
375	v0  = *V++;
376	v1  = *V++;
377	t0 +=  (q0 = (v0-t0)>>2);
378	t1 +=  (q1 = (v1-t1)>>2);
379	s0  =  x[0];
380	s1  = -x[1];
381	XPROD31( s0, s1, t0, t1, x, x+1 );
382	t0  = v0-q0;
383	t1  = v1-q1;
384	s0  =  x[2];
385	s1  = -x[3];
386	XPROD31( s0, s1, t0, t1, x+2, x+3 );
387
388	t0  = *T++;
389	t1  = *T++;
390	v0 += (q0 = (t0-v0)>>2);
391	v1 += (q1 = (t1-v1)>>2);
392	s0  =  x[4];
393	s1  = -x[5];
394	XPROD31( s0, s1, v0, v1, x+4, x+5 );
395	v0  = t0-q0;
396	v1  = t1-q1;
397	s0  =  x[6];
398	s1  = -x[7];
399	XPROD31( s0, s1, v0, v1, x+5, x+6 );
400
401	x+=8;
402      }while(x<iX);
403      break;
404    }
405  }
406}
407
408extern int mdct_backwardARM(int n, DATA_TYPE *in);
409
410/* partial; doesn't perform last-step deinterleave/unrolling.  That
411   can be done more efficiently during pcm output */
412void mdct_backward(int n, DATA_TYPE *in){
413  int step;
414
415#if defined(ONLY_C)
416  int shift;
417
418  for (shift=4;!(n&(1<<shift));shift++);
419  shift=13-shift;
420  step=2<<shift;
421
422  presymmetry(in,n>>1,step);
423  mdct_butterflies(in,n>>1,shift);
424  mdct_bitreverse(in,n,shift);
425  mdct_step7(in,n,step);
426  mdct_step8(in,n,step>>2);
427#else
428  step = mdct_backwardARM(n, in);
429  if (step <= 1)
430    mdct_step8(in,n,step);
431#endif
432}
433
434#if defined(ONLY_C)
435void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){
436  int i;
437  n>>=2;
438  in+=1;
439
440  for(i=0;i<n;i++)
441    right[i]=in[i<<1];
442}
443#endif
444
445extern ogg_int16_t *mdct_unroll_prelap(ogg_int16_t *out,
446                                       DATA_TYPE   *post,
447                                       DATA_TYPE   *l,
448                                       int          step);
449extern ogg_int16_t *mdct_unroll_part2(ogg_int16_t *out,
450                                      DATA_TYPE   *post,
451                                      DATA_TYPE   *l,
452                                      DATA_TYPE   *r,
453                                      int          step,
454                                      LOOKUP_T    *wL,
455                                      LOOKUP_T    *wR);
456extern ogg_int16_t *mdct_unroll_part3(ogg_int16_t *out,
457                                      DATA_TYPE   *post,
458                                      DATA_TYPE   *l,
459                                      DATA_TYPE   *r,
460                                      int          step,
461                                      LOOKUP_T    *wL,
462                                      LOOKUP_T    *wR);
463extern ogg_int16_t *mdct_unroll_postlap(ogg_int16_t *out,
464                                        DATA_TYPE   *post,
465                                        DATA_TYPE   *l,
466                                        int          step);
467
468void mdct_unroll_lap(int n0,int n1,
469		     int lW,int W,
470		     DATA_TYPE *in,
471		     DATA_TYPE *right,
472		     LOOKUP_T *w0,
473		     LOOKUP_T *w1,
474		     ogg_int16_t *out,
475		     int step,
476		     int start, /* samples, this frame */
477		     int end    /* samples, this frame */){
478
479  DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1);
480  DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2);
481  DATA_TYPE *post;
482  LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1));
483  LOOKUP_T *wL=(W && lW ? w1         : w0        );
484
485  int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 );
486  int halfLap=(lW && W ? (n1>>2) : (n0>>2) );
487  int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 );
488  int n,off;
489
490  /* preceeding direct-copy lapping from previous frame, if any */
491  if(preLap){
492    n      = (end<preLap?end:preLap);
493    off    = (start<preLap?start:preLap);
494    post   = r-n;
495    r     -= off;
496    start -= off;
497    end   -= n;
498#if defined(ONLY_C)
499    while(r>post){
500      *out = CLIP_TO_15((*--r)>>9);
501      out+=step;
502    }
503#else
504    out = mdct_unroll_prelap(out,post,r,step);
505    n -= off;
506    if (n < 0)
507      n = 0;
508    r -= n;
509#endif
510  }
511
512  /* cross-lap; two halves due to wrap-around */
513  n      = (end<halfLap?end:halfLap);
514  off    = (start<halfLap?start:halfLap);
515  post   = r-n;
516  r     -= off;
517  l     -= off*2;
518  start -= off;
519  wR    -= off;
520  wL    += off;
521  end   -= n;
522#if defined(ONLY_C)
523  while(r>post){
524    l-=2;
525    *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9);
526    out+=step;
527  }
528#else
529  out = mdct_unroll_part2(out, post, l, r, step, wL, wR);
530  n -= off;
531  if (n < 0)
532      n = 0;
533  l -= 2*n;
534  r -= n;
535  wR -= n;
536  wL += n;
537#endif
538
539  n      = (end<halfLap?end:halfLap);
540  off    = (start<halfLap?start:halfLap);
541  post   = r+n;
542  r     += off;
543  l     += off*2;
544  start -= off;
545  end   -= n;
546  wR    -= off;
547  wL    += off;
548#if defined(ONLY_C)
549  while(r<post){
550    *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9);
551    out+=step;
552    l+=2;
553  }
554#else
555  out = mdct_unroll_part3(out, post, l, r, step, wL, wR);
556  n -= off;
557  if (n < 0)
558      n = 0;
559  l += 2*n;
560  r += n;
561  wR -= n;
562  wL += n;
563#endif
564
565  /* preceeding direct-copy lapping from previous frame, if any */
566  if(postLap){
567    n      = (end<postLap?end:postLap);
568    off    = (start<postLap?start:postLap);
569    post   = l+n*2;
570    l     += off*2;
571#if defined(ONLY_C)
572    while(l<post){
573      *out = CLIP_TO_15((-*l)>>9);
574      out+=step;
575      l+=2;
576    }
577#else
578    out = mdct_unroll_postlap(out,post,l,step);
579#endif
580  }
581}
582
583