12bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Copyright (c) 2007-2008 CSIRO
22bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Copyright (c) 2007-2008 Xiph.Org Foundation
32bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Written by Jean-Marc Valin */
42bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/*
52bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Redistribution and use in source and binary forms, with or without
62bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   modification, are permitted provided that the following conditions
72bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   are met:
82bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
92bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   - Redistributions of source code must retain the above copyright
102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   notice, this list of conditions and the following disclaimer.
112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   - Redistributions in binary form must reproduce the above copyright
132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   notice, this list of conditions and the following disclaimer in the
142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   documentation and/or other materials provided with the distribution.
152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/
282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* This is a simple MDCT implementation that uses a N/4 complex FFT
302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   to do most of the work. It should be relatively straightforward to
312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   plug in pretty much and FFT here.
322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   This replaces the Vorbis FFT (and uses the exact same API), which
342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   was a bit too messy and that was ending up duplicating code
352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   (might as well use the same FFT everywhere).
362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   The algorithm is similar to (and inspired from) Fabrice Bellard's
382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   MDCT implementation in FFMPEG, but has differences in signs, ordering
392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   and scaling in many places.
402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/
412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef SKIP_CONFIG_H
432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef HAVE_CONFIG_H
442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "config.h"
452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mdct.h"
492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "kiss_fft.h"
502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "_kiss_fft_guts.h"
512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include <math.h>
522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "os_support.h"
532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mathops.h"
542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "stack_alloc.h"
552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
56c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#if defined(MIPSr1_ASM)
57c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "mips/mdct_mipsr1.h"
58c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif
59c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
60c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef CUSTOM_MODES
622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
63c91ee5b5642fcc4969150f73d5f6848f88bf1638flimint clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   kiss_twiddle_scalar *trig;
67c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   int shift;
682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N2=N>>1;
692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   l->n = N;
702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   l->maxshift = maxshift;
712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   for (i=0;i<=maxshift;i++)
722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      if (i==0)
74c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      else
76c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifndef ENABLE_TI_DSPLIB55
782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      if (l->kfft[i]==NULL)
792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return 0;
802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
82c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(kiss_twiddle_scalar));
832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   if (l->trig==NULL)
842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian     return 0;
85c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   for (shift=0;shift<=maxshift;shift++)
86c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   {
87c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      /* We have enough points that sine isn't necessary */
882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#if defined(FIXED_POINT)
89c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#if 1
90c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      for (i=0;i<N2;i++)
91c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#else
93c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      for (i=0;i<N2;i++)
94c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
96c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#else
97c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      for (i=0;i<N2;i++)
98c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
99c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif
100c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      trig += N2;
101c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      N2 >>= 1;
102c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      N >>= 1;
103c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   }
1042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   return 1;
1052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
107c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_clear(mdct_lookup *l, int arch)
1082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
1102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   for (i=0;i<=l->maxshift;i++)
111c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      opus_fft_free(l->kfft[i], arch);
1122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_free((kiss_twiddle_scalar*)l->trig);
1132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif /* CUSTOM_MODES */
1162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Forward MDCT trashes the input array */
118c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifndef OVERRIDE_clt_mdct_forward
119c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
120c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const opus_val16 *window, int overlap, int shift, int stride, int arch)
1212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
1232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N, N2, N4;
1242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   VARDECL(kiss_fft_scalar, f);
125c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   VARDECL(kiss_fft_cpx, f2);
126c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   const kiss_fft_state *st = l->kfft[shift];
127c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   const kiss_twiddle_scalar *trig;
128c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   opus_val16 scale;
129c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifdef FIXED_POINT
130c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   /* Allows us to scale with MULT16_32_Q16(), which is faster than
131c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      MULT16_32_Q15() on ARM. */
132c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   int scale_shift = st->scale_shift-1;
133c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif
1342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   SAVE_STACK;
135c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   (void)arch;
136c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   scale = st->scale;
137c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
1382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N = l->n;
139c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   trig = l->trig;
140c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   for (i=0;i<shift;i++)
141c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   {
142c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      N >>= 1;
143c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      trig += N;
144c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   }
1452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N2 = N>>1;
1462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N4 = N>>2;
147c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
1482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ALLOC(f, N2, kiss_fft_scalar);
149c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   ALLOC(f2, N4, kiss_fft_cpx);
1502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Consider the input to be composed of four blocks: [a, b, c, d] */
1522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Window, shuffle, fold */
1532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
1542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
1552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
1562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
1572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp = f;
1582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
1592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
1602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<((overlap+3)>>2);i++)
1612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
1632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
1642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
1652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2;
1662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2;
1672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp1+=2;
1682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp2-=2;
1692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      wp1 = window;
1712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      wp2 = window+overlap-1;
1722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(;i<N4-((overlap+3)>>2);i++)
1732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
1752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = *xp2;
1762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = *xp1;
1772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2;
1782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2;
1792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(;i<N4;i++)
1812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
1832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
1842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
1852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2;
1862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2;
1872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp1+=2;
1882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp2-=2;
1892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
1912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Pre-rotation */
1922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
1932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp = f;
194c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const kiss_twiddle_scalar *t = &trig[0];
1952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<N4;i++)
1962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
197c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         kiss_fft_cpx yc;
198c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         kiss_twiddle_scalar t0, t1;
1992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar re, im, yr, yi;
200c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         t0 = t[i];
201c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         t1 = t[N4+i];
202c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         re = *yp++;
203c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         im = *yp++;
204c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
205c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
206c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yc.r = yr;
207c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yc.i = yi;
208c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
209c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
210c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         f2[st->bitrev[i]] = yc;
2112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
2122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
2132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
214c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   /* N/4 complex FFT, does not downscale anymore */
215c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   opus_fft_impl(st, f2);
2162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Post-rotate */
2182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
2192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
220c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
2212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
2222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
223c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const kiss_twiddle_scalar *t = &trig[0];
2242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
2252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<N4;i++)
2262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
2272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar yr, yi;
228c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
229c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
230c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         *yp1 = yr;
231c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         *yp2 = yi;
232c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         fp++;
2332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp1 += 2*stride;
2342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp2 -= 2*stride;
2352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
2362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
2372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   RESTORE_STACK;
2382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
239c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif /* OVERRIDE_clt_mdct_forward */
2402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
241c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifndef OVERRIDE_clt_mdct_backward
242c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
243c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
2442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
2452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
2462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int N, N2, N4;
247c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   const kiss_twiddle_scalar *trig;
248c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   (void) arch;
249c91ee5b5642fcc4969150f73d5f6848f88bf1638flim
2502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N = l->n;
251c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   trig = l->trig;
252c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   for (i=0;i<shift;i++)
253c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   {
254c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      N >>= 1;
255c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      trig += N;
256c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   }
2572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N2 = N>>1;
2582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   N4 = N>>2;
2592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Pre-rotate */
2612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
2622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Temp pointers to make it really clear to the compiler what we're doing */
2632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
2642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
265c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
266c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
267c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
2682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<N4;i++)
2692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
270c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         int rev;
2712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar yr, yi;
272c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         rev = *bitrev++;
2730c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
2740c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
275c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         /* We swap real and imag because we use an FFT instead of an IFFT. */
276c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yp[2*rev+1] = yr;
277c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yp[2*rev] = yi;
278c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         /* Storing the pre-rotation directly in the bitrev order. */
2792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp1+=2*stride;
2802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         xp2-=2*stride;
2812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
2822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
2832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
284c91ee5b5642fcc4969150f73d5f6848f88bf1638flim   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
2852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Post-rotate and de-shuffle from both ends of the buffer at once to make
2872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      it in-place. */
2882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
289c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      kiss_fft_scalar * yp0 = out+(overlap>>1);
290c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
291c91ee5b5642fcc4969150f73d5f6848f88bf1638flim      const kiss_twiddle_scalar *t = &trig[0];
2922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
2932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         middle pair will be computed twice. */
2942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i=0;i<(N4+1)>>1;i++)
2952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
2962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar re, im, yr, yi;
2972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_twiddle_scalar t0, t1;
298c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         /* We swap real and imag because we're using an FFT instead of an IFFT. */
299c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         re = yp0[1];
300c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         im = yp0[0];
301c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         t0 = t[i];
302c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         t1 = t[N4+i];
3032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
3040c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
3050c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
306c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         /* We swap real and imag because we're using an FFT instead of an IFFT. */
307c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         re = yp1[1];
308c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         im = yp1[0];
309c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yp0[0] = yr;
310c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yp1[1] = yi;
3112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
312c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         t0 = t[(N4-i-1)];
313c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         t1 = t[(N2-i-1)];
3142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
3150c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
3160c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
317c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yp1[0] = yr;
318c91ee5b5642fcc4969150f73d5f6848f88bf1638flim         yp0[1] = yi;
3192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp0 += 2;
3202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         yp1 -= 2;
3212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
3222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
3232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
3242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Mirror on both sides for TDAC */
3252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
3262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
3272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
3282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp1 = window;
3292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
3302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
3312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      for(i = 0; i < overlap/2; i++)
3322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
3332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         kiss_fft_scalar x1, x2;
3342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         x1 = *xp1;
3352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         x2 = *yp1;
3360c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
3370c2090c324e4f2ba2a8621c8b083559bab74c7c5Felicia Lim         *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
3382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp1++;
3392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         wp2--;
3402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
3412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
3422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
343c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif /* OVERRIDE_clt_mdct_backward */
344