12bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Copyright (c) 2007-2008 CSIRO 22bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian Copyright (c) 2007-2008 Xiph.Org Foundation 32bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian Written by Jean-Marc Valin */ 42bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* 52bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian Redistribution and use in source and binary forms, with or without 62bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian modification, are permitted provided that the following conditions 72bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian are met: 82bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 92bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian - Redistributions of source code must retain the above copyright 102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian notice, this list of conditions and the following disclaimer. 112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian - Redistributions in binary form must reproduce the above copyright 132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian notice, this list of conditions and the following disclaimer in the 142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian documentation and/or other materials provided with the distribution. 152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/ 282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* This is a simple MDCT implementation that uses a N/4 complex FFT 302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian to do most of the work. It should be relatively straightforward to 312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian plug in pretty much and FFT here. 322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian This replaces the Vorbis FFT (and uses the exact same API), which 342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian was a bit too messy and that was ending up duplicating code 352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian (might as well use the same FFT everywhere). 362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian The algorithm is similar to (and inspired from) Fabrice Bellard's 382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian MDCT implementation in FFMPEG, but has differences in signs, ordering 392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian and scaling in many places. 402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/ 412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef SKIP_CONFIG_H 432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef HAVE_CONFIG_H 442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "config.h" 452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mdct.h" 492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "kiss_fft.h" 502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "_kiss_fft_guts.h" 512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include <math.h> 522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "os_support.h" 532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mathops.h" 542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "stack_alloc.h" 552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef CUSTOM_MODES 572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianint clt_mdct_init(mdct_lookup *l,int N, int maxshift) 592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N4; 622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_twiddle_scalar *trig; 632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#if defined(FIXED_POINT) 642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N2=N>>1; 652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->n = N; 672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N4 = N>>2; 682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->maxshift = maxshift; 692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for (i=0;i<=maxshift;i++) 702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian if (i==0) 722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); 732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian else 742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); 752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef ENABLE_TI_DSPLIB55 762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian if (l->kfft[i]==NULL) 772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian return 0; 782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); 812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian if (l->trig==NULL) 822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian return 0; 832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* We have enough points that sine isn't necessary */ 842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#if defined(FIXED_POINT) 852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for (i=0;i<=N4;i++) 862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); 872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else 882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for (i=0;i<=N4;i++) 892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); 902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian return 1; 922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianvoid clt_mdct_clear(mdct_lookup *l) 952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for (i=0;i<=l->maxshift;i++) 982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian opus_fft_free(l->kfft[i]); 992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian opus_free((kiss_twiddle_scalar*)l->trig); 1002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 1012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif /* CUSTOM_MODES */ 1032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Forward MDCT trashes the input array */ 1052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianvoid clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 1062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 *window, int overlap, int shift, int stride) 1072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 1082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 1092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N, N2, N4; 1102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_twiddle_scalar sine; 1112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian VARDECL(kiss_fft_scalar, f); 1122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian VARDECL(kiss_fft_scalar, f2); 1132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian SAVE_STACK; 1142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N = l->n; 1152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N >>= shift; 1162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N2 = N>>1; 1172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N4 = N>>2; 1182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian ALLOC(f, N2, kiss_fft_scalar); 1192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian ALLOC(f2, N2, kiss_fft_scalar); 1202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* sin(x) ~= x here */ 1212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef FIXED_POINT 1222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 1232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else 1242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 1252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 1262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Consider the input to be composed of four blocks: [a, b, c, d] */ 1282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Window, shuffle, fold */ 1292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 1312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); 1322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); 1332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp = f; 1342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); 1352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; 1362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<((overlap+3)>>2);i++) 1372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ 1392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); 1402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); 1412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2; 1422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2; 1432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1+=2; 1442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2-=2; 1452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1 = window; 1472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2 = window+overlap-1; 1482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(;i<N4-((overlap+3)>>2);i++) 1492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 1512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = *xp2; 1522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = *xp1; 1532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2; 1542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2; 1552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(;i<N4;i++) 1572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 1592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); 1602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); 1612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2; 1622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2; 1632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1+=2; 1642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2-=2; 1652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Pre-rotation */ 1682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp = f; 1702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_twiddle_scalar *t = &l->trig[0]; 1712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<N4;i++) 1722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar re, im, yr, yi; 1742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian re = yp[0]; 1752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian im = yp[1]; 1762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 1772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 1782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* works because the cos is nearly one */ 1792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = yr + S_MUL(yi,sine); 1802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = yi - S_MUL(yr,sine); 1812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* N/4 complex FFT, down-scales by 4/N */ 1852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); 1862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Post-rotate */ 1882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 1902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT fp = f2; 1912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 1922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 1932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_twiddle_scalar *t = &l->trig[0]; 1942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 1952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<N4;i++) 1962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar yr, yi; 1982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); 1992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); 2002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* works because the cos is nearly one */ 2012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp1 = yr - S_MUL(yi,sine); 2022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp2 = yi + S_MUL(yr,sine);; 2032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian fp += 2; 2042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp1 += 2*stride; 2052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp2 -= 2*stride; 2062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian RESTORE_STACK; 2092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 2102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianvoid clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 2122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 2132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 2142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 2152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N, N2, N4; 2162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_twiddle_scalar sine; 2172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian VARDECL(kiss_fft_scalar, f2); 2182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian SAVE_STACK; 2192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N = l->n; 2202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N >>= shift; 2212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N2 = N>>1; 2222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N4 = N>>2; 2232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian ALLOC(f2, N2, kiss_fft_scalar); 2242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* sin(x) ~= x here */ 2252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef FIXED_POINT 2262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 2272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else 2282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 2292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 2302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Pre-rotate */ 2322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 2342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 2352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 2362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp = f2; 2372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_twiddle_scalar *t = &l->trig[0]; 2382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<N4;i++) 2392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar yr, yi; 2412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); 2422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); 2432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* works because the cos is nearly one */ 2442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = yr - S_MUL(yi,sine); 2452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = yi + S_MUL(yr,sine); 2462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2*stride; 2472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2*stride; 2482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ 2522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); 2532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Post-rotate and de-shuffle from both ends of the buffer at once to make 2552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian it in-place. */ 2562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); 2582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; 2592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_twiddle_scalar *t = &l->trig[0]; 2602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 2612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian middle pair will be computed twice. */ 2622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<(N4+1)>>1;i++) 2632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar re, im, yr, yi; 2652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_twiddle_scalar t0, t1; 2662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian re = yp0[0]; 2672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian im = yp0[1]; 2682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian t0 = t[i<<shift]; 2692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian t1 = t[(N4-i)<<shift]; 2702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 2712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yr = S_MUL(re,t0) - S_MUL(im,t1); 2722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yi = S_MUL(im,t0) + S_MUL(re,t1); 2732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian re = yp1[0]; 2742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian im = yp1[1]; 2752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* works because the cos is nearly one */ 2762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp0[0] = -(yr - S_MUL(yi,sine)); 2772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp1[1] = yi + S_MUL(yr,sine); 2782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian t0 = t[(N4-i-1)<<shift]; 2802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian t1 = t[(i+1)<<shift]; 2812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 2822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yr = S_MUL(re,t0) - S_MUL(im,t1); 2832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yi = S_MUL(im,t0) + S_MUL(re,t1); 2842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* works because the cos is nearly one */ 2852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp1[0] = -(yr - S_MUL(yi,sine)); 2862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp0[1] = yi + S_MUL(yr,sine); 2872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp0 += 2; 2882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp1 -= 2; 2892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Mirror on both sides for TDAC */ 2932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; 2952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 2962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp1 = window; 2972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 2982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i = 0; i < overlap/2; i++) 3002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 3012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar x1, x2; 3022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian x1 = *xp1; 3032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian x2 = *yp1; 3042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); 3052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); 3062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1++; 3072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2--; 3082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 3092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 3102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian RESTORE_STACK; 3112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 312