12bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Copyright (c) 2007-2008 CSIRO
22bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Copyright (c) 2007-2008 Xiph.Org Foundation
32bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Written by Jean-Marc Valin */
42bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/*
52bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Redistribution and use in source and binary forms, with or without
62bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   modification, are permitted provided that the following conditions
72bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   are met:
82bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
92bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   - Redistributions of source code must retain the above copyright
102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   notice, this list of conditions and the following disclaimer.
112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   - Redistributions in binary form must reproduce the above copyright
132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   notice, this list of conditions and the following disclaimer in the
142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   documentation and/or other materials provided with the distribution.
152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/
282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* This is a simple MDCT implementation that uses a N/4 complex FFT
302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   to do most of the work. It should be relatively straightforward to
312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   plug in pretty much and FFT here.
322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   This replaces the Vorbis FFT (and uses the exact same API), which
342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   was a bit too messy and that was ending up duplicating code
352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   (might as well use the same FFT everywhere).
362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   The algorithm is similar to (and inspired from) Fabrice Bellard's
382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   MDCT implementation in FFMPEG, but has differences in signs, ordering
392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   and scaling in many places.
402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/
412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef SKIP_CONFIG_H
432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef HAVE_CONFIG_H
442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "config.h"
452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mdct.h"
492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "kiss_fft.h"
502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "_kiss_fft_guts.h"
512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include <math.h>
522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "os_support.h"
532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mathops.h"
542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "stack_alloc.h"
552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef CUSTOM_MODES
572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianint clt_mdct_init(mdct_lookup *l,int N, int maxshift)
592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N4;
622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   kiss_twiddle_scalar *trig;
632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#if defined(FIXED_POINT)
642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N2=N>>1;
652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   l->n = N;
672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N4 = N>>2;
682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   l->maxshift = maxshift;
692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   for (i=0;i<=maxshift;i++)
702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      if (i==0)
722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      else
742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef ENABLE_TI_DSPLIB55
762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      if (l->kfft[i]==NULL)
772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return 0;
782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   if (l->trig==NULL)
822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian     return 0;
832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* We have enough points that sine isn't necessary */
842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#if defined(FIXED_POINT)
852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   for (i=0;i<=N4;i++)
862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N));
872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else
882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   for (i=0;i<=N4;i++)
892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N);
902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   return 1;
922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianvoid clt_mdct_clear(mdct_lookup *l)
952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   for (i=0;i<=l->maxshift;i++)
982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      opus_fft_free(l->kfft[i]);
992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_free((kiss_twiddle_scalar*)l->trig);
1002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif /* CUSTOM_MODES */
1032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Forward MDCT trashes the input array */
1052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianvoid clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
1062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 *window, int overlap, int shift, int stride)
1072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
1092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N, N2, N4;
1102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   kiss_twiddle_scalar sine;
1112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   VARDECL(kiss_fft_scalar, f);
1122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   VARDECL(kiss_fft_scalar, f2);
1132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   SAVE_STACK;
1142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N = l->n;
1152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N >>= shift;
1162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N2 = N>>1;
1172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N4 = N>>2;
1182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ALLOC(f, N2, kiss_fft_scalar);
1192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ALLOC(f2, N2, kiss_fft_scalar);
1202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* sin(x) ~= x here */
1212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef FIXED_POINT
1222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
1232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else
1242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
1252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
1262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Consider the input to be composed of four blocks: [a, b, c, d] */
1282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Window, shuffle, fold */
1292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
1302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
1312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
1322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
1332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp = f;
1342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
1352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
1362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<((overlap+3)>>2);i++)
1372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
1392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
1402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
1412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2;
1422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2;
1432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp1+=2;
1442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp2-=2;
1452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      wp1 = window;
1472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      wp2 = window+overlap-1;
1482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(;i<N4-((overlap+3)>>2);i++)
1492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
1512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = *xp2;
1522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = *xp1;
1532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2;
1542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2;
1552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(;i<N4;i++)
1572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
1592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
1602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
1612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2;
1622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2;
1632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp1+=2;
1642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp2-=2;
1652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
1672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Pre-rotation */
1682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
1692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp = f;
1702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_twiddle_scalar *t = &l->trig[0];
1712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<N4;i++)
1722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar re, im, yr, yi;
1742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         re = yp[0];
1752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         im = yp[1];
1762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yr = -S_MUL(re,t[i<<shift])  -  S_MUL(im,t[(N4-i)<<shift]);
1772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yi = -S_MUL(im,t[i<<shift])  +  S_MUL(re,t[(N4-i)<<shift]);
1782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* works because the cos is nearly one */
1792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = yr + S_MUL(yi,sine);
1802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = yi - S_MUL(yr,sine);
1812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
1832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* N/4 complex FFT, down-scales by 4/N */
1852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
1862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Post-rotate */
1882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
1892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
1902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
1912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
1922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
1932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_twiddle_scalar *t = &l->trig[0];
1942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
1952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<N4;i++)
1962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar yr, yi;
1982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
1992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
2002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* works because the cos is nearly one */
2012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp1 = yr - S_MUL(yi,sine);
2022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp2 = yi + S_MUL(yr,sine);;
2032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         fp += 2;
2042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp1 += 2*stride;
2052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp2 -= 2*stride;
2062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
2072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
2082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   RESTORE_STACK;
2092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
2102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianvoid clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
2122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)
2132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
2142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
2152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N, N2, N4;
2162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   kiss_twiddle_scalar sine;
2172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   VARDECL(kiss_fft_scalar, f2);
2182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   SAVE_STACK;
2192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N = l->n;
2202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N >>= shift;
2212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N2 = N>>1;
2222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N4 = N>>2;
2232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ALLOC(f2, N2, kiss_fft_scalar);
2242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* sin(x) ~= x here */
2252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef FIXED_POINT
2262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
2272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else
2282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
2292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
2302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Pre-rotate */
2322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
2332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
2342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
2352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
2362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp = f2;
2372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_twiddle_scalar *t = &l->trig[0];
2382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<N4;i++)
2392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
2402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar yr, yi;
2412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
2422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yi =  -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
2432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* works because the cos is nearly one */
2442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = yr - S_MUL(yi,sine);
2452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = yi + S_MUL(yr,sine);
2462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2*stride;
2472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2*stride;
2482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
2492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
2502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
2522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1)));
2532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
2552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      it in-place. */
2562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
2572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);
2582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;
2592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_twiddle_scalar *t = &l->trig[0];
2602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
2612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         middle pair will be computed twice. */
2622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<(N4+1)>>1;i++)
2632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
2642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar re, im, yr, yi;
2652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_twiddle_scalar t0, t1;
2662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         re = yp0[0];
2672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         im = yp0[1];
2682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         t0 = t[i<<shift];
2692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         t1 = t[(N4-i)<<shift];
2702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
2712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yr = S_MUL(re,t0) - S_MUL(im,t1);
2722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yi = S_MUL(im,t0) + S_MUL(re,t1);
2732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         re = yp1[0];
2742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         im = yp1[1];
2752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* works because the cos is nearly one */
2762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp0[0] = -(yr - S_MUL(yi,sine));
2772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp1[1] = yi + S_MUL(yr,sine);
2782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         t0 = t[(N4-i-1)<<shift];
2802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         t1 = t[(i+1)<<shift];
2812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
2822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yr = S_MUL(re,t0) - S_MUL(im,t1);
2832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yi = S_MUL(im,t0) + S_MUL(re,t1);
2842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* works because the cos is nearly one */
2852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp1[0] = -(yr - S_MUL(yi,sine));
2862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp0[1] = yi + S_MUL(yr,sine);
2872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp0 += 2;
2882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp1 -= 2;
2892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
2902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
2912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Mirror on both sides for TDAC */
2932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
2942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
2952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
2962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp1 = window;
2972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
2982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i = 0; i < overlap/2; i++)
3002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
3012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar x1, x2;
3022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         x1 = *xp1;
3032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         x2 = *yp1;
3042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
3052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
3062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp1++;
3072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp2--;
3082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
3092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
3102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   RESTORE_STACK;
3112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
312