12bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Copyright (c) 2007-2008 CSIRO 22bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian Copyright (c) 2007-2008 Xiph.Org Foundation 32bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian Written by Jean-Marc Valin */ 42bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* 52bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian Redistribution and use in source and binary forms, with or without 62bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian modification, are permitted provided that the following conditions 72bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian are met: 82bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 92bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian - Redistributions of source code must retain the above copyright 102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian notice, this list of conditions and the following disclaimer. 112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian - Redistributions in binary form must reproduce the above copyright 132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian notice, this list of conditions and the following disclaimer in the 142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian documentation and/or other materials provided with the distribution. 152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/ 282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* This is a simple MDCT implementation that uses a N/4 complex FFT 302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian to do most of the work. It should be relatively straightforward to 312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian plug in pretty much and FFT here. 322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian This replaces the Vorbis FFT (and uses the exact same API), which 342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian was a bit too messy and that was ending up duplicating code 352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian (might as well use the same FFT everywhere). 362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian The algorithm is similar to (and inspired from) Fabrice Bellard's 382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian MDCT implementation in FFMPEG, but has differences in signs, ordering 392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian and scaling in many places. 402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/ 412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef SKIP_CONFIG_H 432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef HAVE_CONFIG_H 442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "config.h" 452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mdct.h" 492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "kiss_fft.h" 502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "_kiss_fft_guts.h" 512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include <math.h> 522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "os_support.h" 532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mathops.h" 542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "stack_alloc.h" 552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 56c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#if defined(MIPSr1_ASM) 57c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "mips/mdct_mipsr1.h" 58c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif 59c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 60c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef CUSTOM_MODES 622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 63c91ee5b5642fcc4969150f73d5f6848f88bf1638flimint clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch) 642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_twiddle_scalar *trig; 67c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int shift; 682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N2=N>>1; 692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->n = N; 702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian l->maxshift = maxshift; 712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for (i=0;i<=maxshift;i++) 722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian if (i==0) 74c91ee5b5642fcc4969150f73d5f6848f88bf1638flim l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch); 752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian else 76c91ee5b5642fcc4969150f73d5f6848f88bf1638flim l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch); 772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef ENABLE_TI_DSPLIB55 782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian if (l->kfft[i]==NULL) 792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian return 0; 802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 82c91ee5b5642fcc4969150f73d5f6848f88bf1638flim l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar)); 832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian if (l->trig==NULL) 842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian return 0; 85c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (shift=0;shift<=maxshift;shift++) 86c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 87c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* We have enough points that sine isn't necessary */ 882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#if defined(FIXED_POINT) 89c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#if 1 90c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<N2;i++) 91c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N)); 922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else 93c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<N2;i++) 94c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N)))); 952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif 96c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#else 97c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<N2;i++) 98c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N); 99c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif 100c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig += N2; 101c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N2 >>= 1; 102c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N >>= 1; 103c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 1042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian return 1; 1052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 1062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 107c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_clear(mdct_lookup *l, int arch) 1082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 1092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 1102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for (i=0;i<=l->maxshift;i++) 111c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_fft_free(l->kfft[i], arch); 1122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian opus_free((kiss_twiddle_scalar*)l->trig); 1132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 1142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif /* CUSTOM_MODES */ 1162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Forward MDCT trashes the input array */ 118c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifndef OVERRIDE_clt_mdct_forward 119c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 120c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 *window, int overlap, int shift, int stride, int arch) 1212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 1222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 1232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N, N2, N4; 1242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian VARDECL(kiss_fft_scalar, f); 125c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL(kiss_fft_cpx, f2); 126c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_state *st = l->kfft[shift]; 127c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *trig; 128c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_val16 scale; 129c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifdef FIXED_POINT 130c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Allows us to scale with MULT16_32_Q16(), which is faster than 131c91ee5b5642fcc4969150f73d5f6848f88bf1638flim MULT16_32_Q15() on ARM. */ 132c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int scale_shift = st->scale_shift-1; 133c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif 1342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian SAVE_STACK; 135c91ee5b5642fcc4969150f73d5f6848f88bf1638flim (void)arch; 136c91ee5b5642fcc4969150f73d5f6848f88bf1638flim scale = st->scale; 137c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 1382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N = l->n; 139c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig = l->trig; 140c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<shift;i++) 141c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 142c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N >>= 1; 143c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig += N; 144c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 1452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N2 = N>>1; 1462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N4 = N>>2; 147c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 1482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian ALLOC(f, N2, kiss_fft_scalar); 149c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC(f2, N4, kiss_fft_cpx); 1502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 1512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Consider the input to be composed of four blocks: [a, b, c, d] */ 1522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Window, shuffle, fold */ 1532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 1552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); 1562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); 1572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp = f; 1582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); 1592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; 1602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<((overlap+3)>>2);i++) 1612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ 1632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); 1642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); 1652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2; 1662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2; 1672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1+=2; 1682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2-=2; 1692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1 = window; 1712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2 = window+overlap-1; 1722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(;i<N4-((overlap+3)>>2);i++) 1732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 1752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = *xp2; 1762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = *xp1; 1772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2; 1782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2; 1792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(;i<N4;i++) 1812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 1832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); 1842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); 1852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2; 1862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2; 1872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1+=2; 1882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2-=2; 1892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 1912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Pre-rotation */ 1922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 1932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp = f; 194c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *t = &trig[0]; 1952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<N4;i++) 1962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 197c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_cpx yc; 198c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_twiddle_scalar t0, t1; 1992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar re, im, yr, yi; 200c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t0 = t[i]; 201c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t1 = t[N4+i]; 202c91ee5b5642fcc4969150f73d5f6848f88bf1638flim re = *yp++; 203c91ee5b5642fcc4969150f73d5f6848f88bf1638flim im = *yp++; 204c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(re,t0) - S_MUL(im,t1); 205c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(im,t0) + S_MUL(re,t1); 206c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yc.r = yr; 207c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yc.i = yi; 208c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); 209c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift); 210c91ee5b5642fcc4969150f73d5f6848f88bf1638flim f2[st->bitrev[i]] = yc; 2112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 214c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* N/4 complex FFT, does not downscale anymore */ 215c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_fft_impl(st, f2); 2162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Post-rotate */ 2182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 220c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_cpx * OPUS_RESTRICT fp = f2; 2212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 2222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 223c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *t = &trig[0]; 2242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 2252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<N4;i++) 2262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar yr, yi; 228c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); 229c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); 230c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp1 = yr; 231c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp2 = yi; 232c91ee5b5642fcc4969150f73d5f6848f88bf1638flim fp++; 2332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp1 += 2*stride; 2342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp2 -= 2*stride; 2352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian RESTORE_STACK; 2382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 239c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif /* OVERRIDE_clt_mdct_forward */ 2402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 241c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifndef OVERRIDE_clt_mdct_backward 242c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 243c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch) 2442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{ 2452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int i; 2462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian int N, N2, N4; 247c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *trig; 248c91ee5b5642fcc4969150f73d5f6848f88bf1638flim (void) arch; 249c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 2502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N = l->n; 251c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig = l->trig; 252c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<shift;i++) 253c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 254c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N >>= 1; 255c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig += N; 256c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 2572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N2 = N>>1; 2582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian N4 = N>>2; 2592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Pre-rotate */ 2612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Temp pointers to make it really clear to the compiler what we're doing */ 2632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 2642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 265c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1); 266c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; 267c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev; 2682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<N4;i++) 2692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 270c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int rev; 2712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar yr, yi; 272c91ee5b5642fcc4969150f73d5f6848f88bf1638flim rev = *bitrev++; 2730c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i])); 2740c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i])); 275c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* We swap real and imag because we use an FFT instead of an IFFT. */ 276c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp[2*rev+1] = yr; 277c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp[2*rev] = yi; 278c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Storing the pre-rotation directly in the bitrev order. */ 2792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp1+=2*stride; 2802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian xp2-=2*stride; 2812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 2832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 284c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1))); 2852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 2862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Post-rotate and de-shuffle from both ends of the buffer at once to make 2872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian it in-place. */ 2882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 289c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * yp0 = out+(overlap>>1); 290c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; 291c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *t = &trig[0]; 2922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 2932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian middle pair will be computed twice. */ 2942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i=0;i<(N4+1)>>1;i++) 2952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 2962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar re, im, yr, yi; 2972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_twiddle_scalar t0, t1; 298c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* We swap real and imag because we're using an FFT instead of an IFFT. */ 299c91ee5b5642fcc4969150f73d5f6848f88bf1638flim re = yp0[1]; 300c91ee5b5642fcc4969150f73d5f6848f88bf1638flim im = yp0[0]; 301c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t0 = t[i]; 302c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t1 = t[N4+i]; 3032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 3040c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); 3050c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); 306c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* We swap real and imag because we're using an FFT instead of an IFFT. */ 307c91ee5b5642fcc4969150f73d5f6848f88bf1638flim re = yp1[1]; 308c91ee5b5642fcc4969150f73d5f6848f88bf1638flim im = yp1[0]; 309c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp0[0] = yr; 310c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp1[1] = yi; 3112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 312c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t0 = t[(N4-i-1)]; 313c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t1 = t[(N2-i-1)]; 3142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 3150c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); 3160c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); 317c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp1[0] = yr; 318c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp0[1] = yi; 3192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp0 += 2; 3202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian yp1 -= 2; 3212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 3222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 3232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 3242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian /* Mirror on both sides for TDAC */ 3252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 3262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; 3272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 3282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp1 = window; 3292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 3302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian 3312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian for(i = 0; i < overlap/2; i++) 3322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian { 3332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian kiss_fft_scalar x1, x2; 3342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian x1 = *xp1; 3352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian x2 = *yp1; 3360c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1)); 3370c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1)); 3382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp1++; 3392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian wp2--; 3402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 3412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian } 3422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian} 343c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif /* OVERRIDE_clt_mdct_backward */ 344