1/*
2Copyright (c) 2003-2004, Mark Borgerding
3
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13*/
14
15#ifdef HAVE_CONFIG_H
16#include "config.h"
17#endif
18
19#include "os_support.h"
20#include "kiss_fftr.h"
21#include "_kiss_fft_guts.h"
22
23struct kiss_fftr_state{
24    kiss_fft_cfg substate;
25    kiss_fft_cpx * tmpbuf;
26    kiss_fft_cpx * super_twiddles;
27#ifdef USE_SIMD
28    long pad;
29#endif
30};
31
32kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
33{
34    int i;
35    kiss_fftr_cfg st = NULL;
36    size_t subsize, memneeded;
37
38    if (nfft & 1) {
39        speex_warning("Real FFT optimization must be even.\n");
40        return NULL;
41    }
42    nfft >>= 1;
43
44    kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
45    memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
46
47    if (lenmem == NULL) {
48        st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
49    } else {
50        if (*lenmem >= memneeded)
51            st = (kiss_fftr_cfg) mem;
52        *lenmem = memneeded;
53    }
54    if (!st)
55        return NULL;
56
57    st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
58    st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
59    st->super_twiddles = st->tmpbuf + nfft;
60    kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
61
62#ifdef FIXED_POINT
63    for (i=0;i<nfft;++i) {
64       spx_word32_t phase = i+(nfft>>1);
65       if (!inverse_fft)
66          phase = -phase;
67       kf_cexp2(st->super_twiddles+i, DIV32(SHL32(phase,16),nfft));
68    }
69#else
70    for (i=0;i<nfft;++i) {
71       const double pi=3.14159265358979323846264338327;
72       double phase = pi*(((double)i) /nfft + .5);
73       if (!inverse_fft)
74          phase = -phase;
75       kf_cexp(st->super_twiddles+i, phase );
76    }
77#endif
78    return st;
79}
80
81void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
82{
83    /* input buffer timedata is stored row-wise */
84    int k,ncfft;
85    kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
86
87    if ( st->substate->inverse) {
88        speex_fatal("kiss fft usage error: improper alloc\n");
89    }
90
91    ncfft = st->substate->nfft;
92
93    /*perform the parallel fft of two real signals packed in real,imag*/
94    kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
95    /* The real part of the DC element of the frequency spectrum in st->tmpbuf
96     * contains the sum of the even-numbered elements of the input time sequence
97     * The imag part is the sum of the odd-numbered elements
98     *
99     * The sum of tdc.r and tdc.i is the sum of the input time sequence.
100     *      yielding DC of input time sequence
101     * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
102     *      yielding Nyquist bin of input time sequence
103     */
104
105    tdc.r = st->tmpbuf[0].r;
106    tdc.i = st->tmpbuf[0].i;
107    C_FIXDIV(tdc,2);
108    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
109    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
110    freqdata[0].r = tdc.r + tdc.i;
111    freqdata[ncfft].r = tdc.r - tdc.i;
112#ifdef USE_SIMD
113    freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
114#else
115    freqdata[ncfft].i = freqdata[0].i = 0;
116#endif
117
118    for ( k=1;k <= ncfft/2 ; ++k ) {
119        fpk    = st->tmpbuf[k];
120        fpnk.r =   st->tmpbuf[ncfft-k].r;
121        fpnk.i = - st->tmpbuf[ncfft-k].i;
122        C_FIXDIV(fpk,2);
123        C_FIXDIV(fpnk,2);
124
125        C_ADD( f1k, fpk , fpnk );
126        C_SUB( f2k, fpk , fpnk );
127        C_MUL( tw , f2k , st->super_twiddles[k]);
128
129        freqdata[k].r = HALF_OF(f1k.r + tw.r);
130        freqdata[k].i = HALF_OF(f1k.i + tw.i);
131        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
132        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
133    }
134}
135
136void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata)
137{
138    /* input buffer timedata is stored row-wise */
139    int k, ncfft;
140
141    if (st->substate->inverse == 0) {
142        speex_fatal("kiss fft usage error: improper alloc\n");
143    }
144
145    ncfft = st->substate->nfft;
146
147    st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
148    st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
149    /*C_FIXDIV(st->tmpbuf[0],2);*/
150
151    for (k = 1; k <= ncfft / 2; ++k) {
152        kiss_fft_cpx fk, fnkc, fek, fok, tmp;
153        fk = freqdata[k];
154        fnkc.r = freqdata[ncfft - k].r;
155        fnkc.i = -freqdata[ncfft - k].i;
156        /*C_FIXDIV( fk , 2 );
157        C_FIXDIV( fnkc , 2 );*/
158
159        C_ADD (fek, fk, fnkc);
160        C_SUB (tmp, fk, fnkc);
161        C_MUL (fok, tmp, st->super_twiddles[k]);
162        C_ADD (st->tmpbuf[k],     fek, fok);
163        C_SUB (st->tmpbuf[ncfft - k], fek, fok);
164#ifdef USE_SIMD
165        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
166#else
167        st->tmpbuf[ncfft - k].i *= -1;
168#endif
169    }
170    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
171}
172
173void kiss_fftr2(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
174{
175   /* input buffer timedata is stored row-wise */
176   int k,ncfft;
177   kiss_fft_cpx f2k,tdc;
178   spx_word32_t f1kr, f1ki, twr, twi;
179
180   if ( st->substate->inverse) {
181      speex_fatal("kiss fft usage error: improper alloc\n");
182   }
183
184   ncfft = st->substate->nfft;
185
186   /*perform the parallel fft of two real signals packed in real,imag*/
187   kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
188    /* The real part of the DC element of the frequency spectrum in st->tmpbuf
189   * contains the sum of the even-numbered elements of the input time sequence
190   * The imag part is the sum of the odd-numbered elements
191   *
192   * The sum of tdc.r and tdc.i is the sum of the input time sequence.
193   *      yielding DC of input time sequence
194   * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
195   *      yielding Nyquist bin of input time sequence
196    */
197
198   tdc.r = st->tmpbuf[0].r;
199   tdc.i = st->tmpbuf[0].i;
200   C_FIXDIV(tdc,2);
201   CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
202   CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
203   freqdata[0] = tdc.r + tdc.i;
204   freqdata[2*ncfft-1] = tdc.r - tdc.i;
205
206   for ( k=1;k <= ncfft/2 ; ++k )
207   {
208      /*fpk    = st->tmpbuf[k];
209      fpnk.r =   st->tmpbuf[ncfft-k].r;
210      fpnk.i = - st->tmpbuf[ncfft-k].i;
211      C_FIXDIV(fpk,2);
212      C_FIXDIV(fpnk,2);
213
214      C_ADD( f1k, fpk , fpnk );
215      C_SUB( f2k, fpk , fpnk );
216
217      C_MUL( tw , f2k , st->super_twiddles[k]);
218
219      freqdata[2*k-1] = HALF_OF(f1k.r + tw.r);
220      freqdata[2*k] = HALF_OF(f1k.i + tw.i);
221      freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r);
222      freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i);
223      */
224
225      /*f1k.r = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
226      f1k.i = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
227      f2k.r = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
228      f2k.i = SHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
229
230      C_MUL( tw , f2k , st->super_twiddles[k]);
231
232      freqdata[2*k-1] = HALF_OF(f1k.r + tw.r);
233      freqdata[2*k] = HALF_OF(f1k.i + tw.i);
234      freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r);
235      freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i);
236   */
237      f2k.r = SHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
238      f2k.i = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
239
240      f1kr = SHL32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),13);
241      f1ki = SHL32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),13);
242
243      twr = SHR32(SUB32(MULT16_16(f2k.r,st->super_twiddles[k].r),MULT16_16(f2k.i,st->super_twiddles[k].i)), 1);
244      twi = SHR32(ADD32(MULT16_16(f2k.i,st->super_twiddles[k].r),MULT16_16(f2k.r,st->super_twiddles[k].i)), 1);
245
246#ifdef FIXED_POINT
247      freqdata[2*k-1] = PSHR32(f1kr + twr, 15);
248      freqdata[2*k] = PSHR32(f1ki + twi, 15);
249      freqdata[2*(ncfft-k)-1] = PSHR32(f1kr - twr, 15);
250      freqdata[2*(ncfft-k)] = PSHR32(twi - f1ki, 15);
251#else
252      freqdata[2*k-1] = .5f*(f1kr + twr);
253      freqdata[2*k] = .5f*(f1ki + twi);
254      freqdata[2*(ncfft-k)-1] = .5f*(f1kr - twr);
255      freqdata[2*(ncfft-k)] = .5f*(twi - f1ki);
256
257#endif
258   }
259}
260
261void kiss_fftri2(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata)
262{
263   /* input buffer timedata is stored row-wise */
264   int k, ncfft;
265
266   if (st->substate->inverse == 0) {
267      speex_fatal ("kiss fft usage error: improper alloc\n");
268   }
269
270   ncfft = st->substate->nfft;
271
272   st->tmpbuf[0].r = freqdata[0] + freqdata[2*ncfft-1];
273   st->tmpbuf[0].i = freqdata[0] - freqdata[2*ncfft-1];
274   /*C_FIXDIV(st->tmpbuf[0],2);*/
275
276   for (k = 1; k <= ncfft / 2; ++k) {
277      kiss_fft_cpx fk, fnkc, fek, fok, tmp;
278      fk.r = freqdata[2*k-1];
279      fk.i = freqdata[2*k];
280      fnkc.r = freqdata[2*(ncfft - k)-1];
281      fnkc.i = -freqdata[2*(ncfft - k)];
282        /*C_FIXDIV( fk , 2 );
283      C_FIXDIV( fnkc , 2 );*/
284
285      C_ADD (fek, fk, fnkc);
286      C_SUB (tmp, fk, fnkc);
287      C_MUL (fok, tmp, st->super_twiddles[k]);
288      C_ADD (st->tmpbuf[k],     fek, fok);
289      C_SUB (st->tmpbuf[ncfft - k], fek, fok);
290#ifdef USE_SIMD
291      st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
292#else
293      st->tmpbuf[ncfft - k].i *= -1;
294#endif
295   }
296   kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
297}
298