1c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* Copyright (c) 2015 Xiph.Org Foundation 2c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Written by Viswanath Puttagunta */ 3c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/** 4c91ee5b5642fcc4969150f73d5f6848f88bf1638flim @file celt_ne10_mdct.c 5c91ee5b5642fcc4969150f73d5f6848f88bf1638flim @brief ARM Neon optimizations for mdct using NE10 library 6c91ee5b5642fcc4969150f73d5f6848f88bf1638flim */ 7c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 8c91ee5b5642fcc4969150f73d5f6848f88bf1638flim/* 9c91ee5b5642fcc4969150f73d5f6848f88bf1638flim Redistribution and use in source and binary forms, with or without 10c91ee5b5642fcc4969150f73d5f6848f88bf1638flim modification, are permitted provided that the following conditions 11c91ee5b5642fcc4969150f73d5f6848f88bf1638flim are met: 12c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 13c91ee5b5642fcc4969150f73d5f6848f88bf1638flim - Redistributions of source code must retain the above copyright 14c91ee5b5642fcc4969150f73d5f6848f88bf1638flim notice, this list of conditions and the following disclaimer. 15c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 16c91ee5b5642fcc4969150f73d5f6848f88bf1638flim - Redistributions in binary form must reproduce the above copyright 17c91ee5b5642fcc4969150f73d5f6848f88bf1638flim notice, this list of conditions and the following disclaimer in the 18c91ee5b5642fcc4969150f73d5f6848f88bf1638flim documentation and/or other materials provided with the distribution. 19c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 20c91ee5b5642fcc4969150f73d5f6848f88bf1638flim THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23c91ee5b5642fcc4969150f73d5f6848f88bf1638flim A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 24c91ee5b5642fcc4969150f73d5f6848f88bf1638flim OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 25c91ee5b5642fcc4969150f73d5f6848f88bf1638flim EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 26c91ee5b5642fcc4969150f73d5f6848f88bf1638flim PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 27c91ee5b5642fcc4969150f73d5f6848f88bf1638flim PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 28c91ee5b5642fcc4969150f73d5f6848f88bf1638flim LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 29c91ee5b5642fcc4969150f73d5f6848f88bf1638flim NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30c91ee5b5642fcc4969150f73d5f6848f88bf1638flim SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31c91ee5b5642fcc4969150f73d5f6848f88bf1638flim*/ 32c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 33c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifndef SKIP_CONFIG_H 34c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#ifdef HAVE_CONFIG_H 35c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "config.h" 36c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif 37c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#endif 38c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 39c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "kiss_fft.h" 40c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "_kiss_fft_guts.h" 41c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "mdct.h" 42c91ee5b5642fcc4969150f73d5f6848f88bf1638flim#include "stack_alloc.h" 43c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 44c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_forward_neon(const mdct_lookup *l, 45c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar *in, 46c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT out, 47c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 *window, 48c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int overlap, int shift, int stride, int arch) 49c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{ 50c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int i; 51c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int N, N2, N4; 52c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL(kiss_fft_scalar, f); 53c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL(kiss_fft_cpx, f2); 54c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_state *st = l->kfft[shift]; 55c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *trig; 56c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 57c91ee5b5642fcc4969150f73d5f6848f88bf1638flim SAVE_STACK; 58c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 59c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N = l->n; 60c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig = l->trig; 61c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<shift;i++) 62c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 63c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N >>= 1; 64c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig += N; 65c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 66c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N2 = N>>1; 67c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N4 = N>>2; 68c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 69c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC(f, N2, kiss_fft_scalar); 70c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC(f2, N4, kiss_fft_cpx); 71c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 72c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Consider the input to be composed of four blocks: [a, b, c, d] */ 73c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Window, shuffle, fold */ 74c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 75c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Temp pointers to make it really clear to the compiler what we're doing */ 76c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); 77c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); 78c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp = f; 79c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); 80c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; 81c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(i=0;i<((overlap+3)>>2);i++) 82c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 83c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ 84c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); 85c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); 86c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp1+=2; 87c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp2-=2; 88c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp1+=2; 89c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp2-=2; 90c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 91c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp1 = window; 92c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp2 = window+overlap-1; 93c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(;i<N4-((overlap+3)>>2);i++) 94c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 95c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 96c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp++ = *xp2; 97c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp++ = *xp1; 98c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp1+=2; 99c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp2-=2; 100c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 101c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(;i<N4;i++) 102c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 103c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 104c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); 105c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); 106c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp1+=2; 107c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp2-=2; 108c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp1+=2; 109c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp2-=2; 110c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 111c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 112c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Pre-rotation */ 113c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 114c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp = f; 115c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *t = &trig[0]; 116c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(i=0;i<N4;i++) 117c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 118c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_cpx yc; 119c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_twiddle_scalar t0, t1; 120c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar re, im, yr, yi; 121c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t0 = t[i]; 122c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t1 = t[N4+i]; 123c91ee5b5642fcc4969150f73d5f6848f88bf1638flim re = *yp++; 124c91ee5b5642fcc4969150f73d5f6848f88bf1638flim im = *yp++; 125c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(re,t0) - S_MUL(im,t1); 126c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(im,t0) + S_MUL(re,t1); 127c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yc.r = yr; 128c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yc.i = yi; 129c91ee5b5642fcc4969150f73d5f6848f88bf1638flim f2[i] = yc; 130c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 131c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 132c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 133c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_fft(st, f2, (kiss_fft_cpx *)f, arch); 134c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 135c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Post-rotate */ 136c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 137c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Temp pointers to make it really clear to the compiler what we're doing */ 138c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f; 139c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 140c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 141c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *t = &trig[0]; 142c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Temp pointers to make it really clear to the compiler what we're doing */ 143c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(i=0;i<N4;i++) 144c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 145c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar yr, yi; 146c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); 147c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); 148c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp1 = yr; 149c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp2 = yi; 150c91ee5b5642fcc4969150f73d5f6848f88bf1638flim fp++; 151c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp1 += 2*stride; 152c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp2 -= 2*stride; 153c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 154c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 155c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RESTORE_STACK; 156c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} 157c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 158c91ee5b5642fcc4969150f73d5f6848f88bf1638flimvoid clt_mdct_backward_neon(const mdct_lookup *l, 159c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar *in, 160c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT out, 161c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 * OPUS_RESTRICT window, 162c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int overlap, int shift, int stride, int arch) 163c91ee5b5642fcc4969150f73d5f6848f88bf1638flim{ 164c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int i; 165c91ee5b5642fcc4969150f73d5f6848f88bf1638flim int N, N2, N4; 166c91ee5b5642fcc4969150f73d5f6848f88bf1638flim VARDECL(kiss_fft_scalar, f); 167c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *trig; 168c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_state *st = l->kfft[shift]; 169c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 170c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N = l->n; 171c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig = l->trig; 172c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for (i=0;i<shift;i++) 173c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 174c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N >>= 1; 175c91ee5b5642fcc4969150f73d5f6848f88bf1638flim trig += N; 176c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 177c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N2 = N>>1; 178c91ee5b5642fcc4969150f73d5f6848f88bf1638flim N4 = N>>2; 179c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 180c91ee5b5642fcc4969150f73d5f6848f88bf1638flim ALLOC(f, N2, kiss_fft_scalar); 181c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 182c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Pre-rotate */ 183c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 184c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Temp pointers to make it really clear to the compiler what we're doing */ 185c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 186c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 187c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp = f; 188c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0]; 189c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(i=0;i<N4;i++) 190c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 191c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar yr, yi; 192c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); 193c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); 194c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp[2*i] = yr; 195c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp[2*i+1] = yi; 196c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp1+=2*stride; 197c91ee5b5642fcc4969150f73d5f6848f88bf1638flim xp2-=2*stride; 198c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 199c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 200c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 201c91ee5b5642fcc4969150f73d5f6848f88bf1638flim opus_ifft(st, (kiss_fft_cpx *)f, (kiss_fft_cpx*)(out+(overlap>>1)), arch); 202c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 203c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Post-rotate and de-shuffle from both ends of the buffer at once to make 204c91ee5b5642fcc4969150f73d5f6848f88bf1638flim it in-place. */ 205c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 206c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * yp0 = out+(overlap>>1); 207c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2; 208c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const kiss_twiddle_scalar *t = &trig[0]; 209c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 210c91ee5b5642fcc4969150f73d5f6848f88bf1638flim middle pair will be computed twice. */ 211c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(i=0;i<(N4+1)>>1;i++) 212c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 213c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar re, im, yr, yi; 214c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_twiddle_scalar t0, t1; 215c91ee5b5642fcc4969150f73d5f6848f88bf1638flim re = yp0[0]; 216c91ee5b5642fcc4969150f73d5f6848f88bf1638flim im = yp0[1]; 217c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t0 = t[i]; 218c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t1 = t[N4+i]; 219c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 220c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(re,t0) + S_MUL(im,t1); 221c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(re,t1) - S_MUL(im,t0); 222c91ee5b5642fcc4969150f73d5f6848f88bf1638flim re = yp1[0]; 223c91ee5b5642fcc4969150f73d5f6848f88bf1638flim im = yp1[1]; 224c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp0[0] = yr; 225c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp1[1] = yi; 226c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 227c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t0 = t[(N4-i-1)]; 228c91ee5b5642fcc4969150f73d5f6848f88bf1638flim t1 = t[(N2-i-1)]; 229c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 230c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yr = S_MUL(re,t0) + S_MUL(im,t1); 231c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yi = S_MUL(re,t1) - S_MUL(im,t0); 232c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp1[0] = yr; 233c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp0[1] = yi; 234c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp0 += 2; 235c91ee5b5642fcc4969150f73d5f6848f88bf1638flim yp1 -= 2; 236c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 237c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 238c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 239c91ee5b5642fcc4969150f73d5f6848f88bf1638flim /* Mirror on both sides for TDAC */ 240c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 241c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; 242c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 243c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 * OPUS_RESTRICT wp1 = window; 244c91ee5b5642fcc4969150f73d5f6848f88bf1638flim const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 245c91ee5b5642fcc4969150f73d5f6848f88bf1638flim 246c91ee5b5642fcc4969150f73d5f6848f88bf1638flim for(i = 0; i < overlap/2; i++) 247c91ee5b5642fcc4969150f73d5f6848f88bf1638flim { 248c91ee5b5642fcc4969150f73d5f6848f88bf1638flim kiss_fft_scalar x1, x2; 249c91ee5b5642fcc4969150f73d5f6848f88bf1638flim x1 = *xp1; 250c91ee5b5642fcc4969150f73d5f6848f88bf1638flim x2 = *yp1; 251c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); 252c91ee5b5642fcc4969150f73d5f6848f88bf1638flim *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); 253c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp1++; 254c91ee5b5642fcc4969150f73d5f6848f88bf1638flim wp2--; 255c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 256c91ee5b5642fcc4969150f73d5f6848f88bf1638flim } 257c91ee5b5642fcc4969150f73d5f6848f88bf1638flim RESTORE_STACK; 258c91ee5b5642fcc4969150f73d5f6848f88bf1638flim} 259