1
2/* -----------------------------------------------------------------------------------------------------------
3Software License for The Fraunhofer FDK AAC Codec Library for Android
4
5� Copyright  1995 - 2013 Fraunhofer-Gesellschaft zur F�rderung der angewandten Forschung e.V.
6  All rights reserved.
7
8 1.    INTRODUCTION
9The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
10the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
11This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
12
13AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
14audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
15independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
16of the MPEG specifications.
17
18Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
19may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
20individually for the purpose of encoding or decoding bit streams in products that are compliant with
21the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
22these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
23software may already be covered under those patent licenses when it is used for those licensed purposes only.
24
25Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
26are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
27applications information and documentation.
28
292.    COPYRIGHT LICENSE
30
31Redistribution and use in source and binary forms, with or without modification, are permitted without
32payment of copyright license fees provided that you satisfy the following conditions:
33
34You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
35your modifications thereto in source code form.
36
37You must retain the complete text of this software license in the documentation and/or other materials
38provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
39You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
40modifications thereto to recipients of copies in binary form.
41
42The name of Fraunhofer may not be used to endorse or promote products derived from this library without
43prior written permission.
44
45You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
46software or your modifications thereto.
47
48Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
49and the date of any change. For modified versions of the FDK AAC Codec, the term
50"Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
51"Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
52
533.    NO PATENT LICENSE
54
55NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
56ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
57respect to this software.
58
59You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
60by appropriate patent licenses.
61
624.    DISCLAIMER
63
64This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
65"AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
66of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
67CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
68including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
69or business interruption, however caused and on any theory of liability, whether in contract, strict
70liability, or tort (including negligence), arising in any way out of the use of this software, even if
71advised of the possibility of such damage.
72
735.    CONTACT INFORMATION
74
75Fraunhofer Institute for Integrated Circuits IIS
76Attention: Audio and Multimedia Departments - FDK AAC LL
77Am Wolfsmantel 33
7891058 Erlangen, Germany
79
80www.iis.fraunhofer.de/amm
81amm-info@iis.fraunhofer.de
82----------------------------------------------------------------------------------------------------------- */
83
84/***************************  Fraunhofer IIS FDK Tools  **********************
85
86   Author(s):   Josef Hoepfl, DSP Solutions
87   Description: Fix point FFT
88
89******************************************************************************/
90
91#include "fft.h"
92
93#include "fft_rad2.h"
94#include "FDK_tools_rom.h"
95
96
97
98
99
100#define F3C(x) STC(x)
101
102#define     C31       (F3C(0x91261468))      /* FL2FXCONST_DBL(-0.86602540)   */
103
104/* Performs the FFT of length 3 according to the algorithm after winograd.
105   No scaling of the input vector because the scaling is already done in the rotation vector. */
106static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat)
107{
108  FIXP_DBL r1,r2;
109  FIXP_DBL s1,s2;
110  /* real part */
111  r1      = pDat[2] + pDat[4];
112  r2      = fMult((pDat[2] - pDat[4]), C31);
113  pDat[0] = pDat[0] + r1;
114  r1      = pDat[0] - r1 - (r1>>1);
115
116  /* imaginary part */
117  s1      = pDat[3] + pDat[5];
118  s2      = fMult((pDat[3] - pDat[5]), C31);
119  pDat[1] = pDat[1] + s1;
120  s1      = pDat[1] - s1 - (s1>>1);
121
122  /* combination */
123  pDat[2] = r1 - s2;
124  pDat[4] = r1 + s2;
125  pDat[3] = s1 + r2;
126  pDat[5] = s1 - r2;
127}
128
129
130#define F5C(x) STC(x)
131
132#define     C51       (F5C(0x79bc3854))      /* FL2FXCONST_DBL( 0.95105652)   */
133#define     C52       (F5C(0x9d839db0))      /* FL2FXCONST_DBL(-1.53884180/2) */
134#define     C53       (F5C(0xd18053ce))      /* FL2FXCONST_DBL(-0.36327126)   */
135#define     C54       (F5C(0x478dde64))      /* FL2FXCONST_DBL( 0.55901699)   */
136#define     C55       (F5C(0xb0000001))      /* FL2FXCONST_DBL(-1.25/2)       */
137
138/* performs the FFT of length 5 according to the algorithm after winograd */
139static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat)
140{
141  FIXP_DBL r1,r2,r3,r4;
142  FIXP_DBL s1,s2,s3,s4;
143  FIXP_DBL t;
144
145  /* real part */
146  r1      = pDat[2] + pDat[8];
147  r4      = pDat[2] - pDat[8];
148  r3      = pDat[4] + pDat[6];
149  r2      = pDat[4] - pDat[6];
150  t       = fMult((r1-r3), C54);
151  r1      = r1 + r3;
152  pDat[0] = pDat[0] + r1;
153  /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
154     the values as fracts */
155  r1      = pDat[0] + (fMultDiv2(r1, C55) <<(2));
156  r3      = r1 - t;
157  r1      = r1 + t;
158  t       = fMult((r4 + r2), C51);
159  /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
160     the values as fracts */
161  r4      = t + (fMultDiv2(r4, C52) <<(2));
162  r2      = t + fMult(r2, C53);
163
164  /* imaginary part */
165  s1      = pDat[3] + pDat[9];
166  s4      = pDat[3] - pDat[9];
167  s3      = pDat[5] + pDat[7];
168  s2      = pDat[5] - pDat[7];
169  t       = fMult((s1 - s3), C54);
170  s1      = s1 + s3;
171  pDat[1] = pDat[1] + s1;
172  /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
173     the values as fracts */
174  s1      = pDat[1] + (fMultDiv2(s1, C55) <<(2));
175  s3      = s1 - t;
176  s1      = s1 + t;
177  t       = fMult((s4 + s2), C51);
178  /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
179     the values as fracts */
180  s4      = t + (fMultDiv2(s4, C52) <<(2));
181  s2      = t + fMult(s2, C53);
182
183  /* combination */
184  pDat[2] = r1 + s2;
185  pDat[8] = r1 - s2;
186  pDat[4] = r3 - s4;
187  pDat[6] = r3 + s4;
188
189  pDat[3] = s1 - r2;
190  pDat[9] = s1 + r2;
191  pDat[5] = s3 + r4;
192  pDat[7] = s3 - r4;
193}
194
195
196
197
198#define N3                    3
199#define N5                    5
200#define N6                    6
201#define N15                   15
202
203/* Performs the FFT of length 15. It is split into FFTs of length 3 and length 5. */
204static inline void fft15(FIXP_DBL *pInput)
205{
206  FIXP_DBL  aDst[2*N15];
207  FIXP_DBL  aDst1[2*N15];
208  int    i,k,l;
209
210  /* Sort input vector for fft's of length 3
211  input3(0:2)   = [input(0) input(5) input(10)];
212  input3(3:5)   = [input(3) input(8) input(13)];
213  input3(6:8)   = [input(6) input(11) input(1)];
214  input3(9:11)  = [input(9) input(14) input(4)];
215  input3(12:14) = [input(12) input(2) input(7)]; */
216  {
217    const FIXP_DBL *pSrc = pInput;
218    FIXP_DBL *RESTRICT pDst = aDst;
219    /* Merge 3 loops into one, skip call of fft3 */
220    for(i=0,l=0,k=0; i<N5; i++, k+=6)
221    {
222      pDst[k+0] = pSrc[l];
223      pDst[k+1] = pSrc[l+1];
224      l += 2*N5;
225      if (l >= (2*N15))
226          l -= (2*N15);
227
228      pDst[k+2] = pSrc[l];
229      pDst[k+3] = pSrc[l+1];
230      l += 2*N5;
231      if (l >= (2*N15))
232          l -= (2*N15);
233      pDst[k+4] = pSrc[l];
234      pDst[k+5] = pSrc[l+1];
235      l += (2*N5) + (2*N3);
236      if (l >= (2*N15))
237          l -= (2*N15);
238
239      /* fft3 merged with shift right by 2 loop */
240      FIXP_DBL r1,r2,r3;
241      FIXP_DBL s1,s2;
242      /* real part */
243      r1      = pDst[k+2] + pDst[k+4];
244      r2      = fMult((pDst[k+2] - pDst[k+4]), C31);
245      s1      = pDst[k+0];
246      pDst[k+0] = (s1 + r1)>>2;
247      r1      = s1 - (r1>>1);
248
249      /* imaginary part */
250      s1      = pDst[k+3] + pDst[k+5];
251      s2      = fMult((pDst[k+3] - pDst[k+5]), C31);
252      r3      = pDst[k+1];
253      pDst[k+1] = (r3 + s1)>>2;
254      s1      = r3 - (s1>>1);
255
256      /* combination */
257      pDst[k+2] = (r1 - s2)>>2;
258      pDst[k+4] = (r1 + s2)>>2;
259      pDst[k+3] = (s1 + r2)>>2;
260      pDst[k+5] = (s1 - r2)>>2;
261    }
262  }
263  /* Sort input vector for fft's of length 5
264  input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
265  input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
266  input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
267  /* Merge 2 loops into one, brings about 10% */
268  {
269    const FIXP_DBL *pSrc = aDst;
270    FIXP_DBL *RESTRICT pDst = aDst1;
271    for(i=0,l=0,k=0; i<N3; i++, k+=10)
272    {
273      l = 2*i;
274      pDst[k+0] = pSrc[l+0];
275      pDst[k+1] = pSrc[l+1];
276      pDst[k+2] = pSrc[l+0+(2*N3)];
277      pDst[k+3] = pSrc[l+1+(2*N3)];
278      pDst[k+4] = pSrc[l+0+(4*N3)];
279      pDst[k+5] = pSrc[l+1+(4*N3)];
280      pDst[k+6] = pSrc[l+0+(6*N3)];
281      pDst[k+7] = pSrc[l+1+(6*N3)];
282      pDst[k+8] = pSrc[l+0+(8*N3)];
283      pDst[k+9] = pSrc[l+1+(8*N3)];
284      fft5(&pDst[k]);
285    }
286  }
287  /* Sort output vector of length 15
288  output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
289            out5(10) out5(1)  out5(7)  out5(13) out5(4)
290            out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
291  /* optimize clumsy loop, brings about 5% */
292  {
293    const FIXP_DBL *pSrc = aDst1;
294    FIXP_DBL *RESTRICT pDst = pInput;
295    for(i=0,l=0,k=0; i<N3; i++, k+=10)
296    {
297      pDst[k+0] = pSrc[l];
298      pDst[k+1] = pSrc[l+1];
299      l += (2*N6);
300      if (l >= (2*N15))
301          l -= (2*N15);
302      pDst[k+2] = pSrc[l];
303      pDst[k+3] = pSrc[l+1];
304      l += (2*N6);
305      if (l >= (2*N15))
306          l -= (2*N15);
307      pDst[k+4] = pSrc[l];
308      pDst[k+5] = pSrc[l+1];
309      l += (2*N6);
310      if (l >= (2*N15))
311          l -= (2*N15);
312      pDst[k+6] = pSrc[l];
313      pDst[k+7] = pSrc[l+1];
314      l += (2*N6);
315      if (l >= (2*N15))
316          l -= (2*N15);
317      pDst[k+8] = pSrc[l];
318      pDst[k+9] = pSrc[l+1];
319      l += 2;    /* no modulo check needed, it cannot occur */
320    }
321  }
322}
323
324#define W_PiFOURTH STC(0x5a82799a)
325#ifndef SUMDIFF_PIFOURTH
326#define SUMDIFF_PIFOURTH(diff,sum,a,b) \
327  { \
328    FIXP_DBL wa, wb;\
329    wa = fMultDiv2(a, W_PiFOURTH);\
330    wb = fMultDiv2(b, W_PiFOURTH);\
331    diff = wb - wa;\
332    sum  = wb + wa;\
333  }
334#endif
335
336/* This version is more overflow save, but less cycle optimal. */
337#define SUMDIFF_EIGTH(x, y, ix, iy, vr, vi, ur, ui) \
338  vr = (x[ 0 + ix]>>1) + (x[16 + ix]>>1);  /* Re A + Re B */ \
339  vi = (x[ 8 + ix]>>1) + (x[24 + ix]>>1);     /* Re C + Re D */ \
340  ur = (x[ 1 + ix]>>1) + (x[17 + ix]>>1);  /* Im A + Im B */ \
341  ui = (x[ 9 + ix]>>1) + (x[25 + ix]>>1);     /* Im C + Im D */ \
342  y[ 0 + iy] = vr + vi;     /* Re A' = ReA + ReB +ReC + ReD */    \
343  y[ 4 + iy] = vr - vi;     /* Re C' = -(ReC+ReD) + (ReA+ReB) */  \
344  y[ 1 + iy] = ur + ui;     /* Im A' = sum of imag values */      \
345  y[ 5 + iy] = ur - ui;     /* Im C' = -Im C -Im D +Im A +Im B */ \
346  vr -= x[16 + ix];              /* Re A - Re B */ \
347  vi = vi - x[24 + ix];          /* Re C - Re D */ \
348  ur -= x[17 + ix];              /* Im A - Im B */ \
349  ui = ui - x[25 + ix];          /* Im C - Im D */ \
350  y[ 2 + iy] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */ \
351  y[ 6 + iy] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */ \
352  y[ 3 + iy] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */  \
353  y[ 7 + iy] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
354
355static const FIXP_STP fft16_w16[2] =  { STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d) };
356
357LNK_SECTION_CODE_L1
358inline void fft_16(FIXP_DBL *RESTRICT x)
359{
360  FIXP_DBL vr, vi, ur, ui;
361  FIXP_DBL y[32];
362
363  SUMDIFF_EIGTH(x, y, 0,  0, vr, vi, ur, ui);
364  SUMDIFF_EIGTH(x, y, 4,  8, vr, vi, ur, ui);
365  SUMDIFF_EIGTH(x, y, 2, 16, vr, vi, ur, ui);
366  SUMDIFF_EIGTH(x, y, 6, 24, vr, vi, ur, ui);
367
368// xt1 =  0
369// xt2 =  8
370  vr = y[ 8];
371  vi = y[ 9];
372  ur = y[ 0]>>1;
373  ui = y[ 1]>>1;
374  x[ 0] = ur + (vr>>1);
375  x[ 1] = ui + (vi>>1);
376  x[ 8] = ur - (vr>>1);
377  x[ 9] = ui - (vi>>1);
378
379// xt1 =  4
380// xt2 = 12
381  vr = y[13];
382  vi = y[12];
383  ur = y[ 4]>>1;
384  ui = y[ 5]>>1;
385  x[ 4] = ur + (vr>>1);
386  x[ 5] = ui - (vi>>1);
387  x[12] = ur - (vr>>1);
388  x[13] = ui + (vi>>1);
389
390// xt1 = 16
391// xt2 = 24
392  vr = y[24];
393  vi = y[25];
394  ur = y[16]>>1;
395  ui = y[17]>>1;
396  x[16] = ur + (vr>>1);
397  x[17] = ui + (vi>>1);
398  x[24] = ur - (vr>>1);
399  x[25] = ui - (vi>>1);
400
401// xt1 = 20
402// xt2 = 28
403  vr = y[29];
404  vi = y[28];
405  ur = y[20]>>1;
406  ui = y[21]>>1;
407  x[20] = ur + (vr>>1);
408  x[21] = ui - (vi>>1);
409  x[28] = ur - (vr>>1);
410  x[29] = ui + (vi>>1);
411
412  // xt1 =  2
413// xt2 = 10
414  SUMDIFF_PIFOURTH(vi, vr, y[10], y[11])
415  //vr = fMultDiv2((y[11] + y[10]),W_PiFOURTH);
416  //vi = fMultDiv2((y[11] - y[10]),W_PiFOURTH);
417  ur = y[ 2];
418  ui = y[ 3];
419  x[ 2] = (ur>>1) + vr;
420  x[ 3] = (ui>>1) + vi;
421  x[10] = (ur>>1) - vr;
422  x[11] = (ui>>1) - vi;
423
424// xt1 =  6
425// xt2 = 14
426  SUMDIFF_PIFOURTH(vr, vi, y[14], y[15])
427  ur = y[ 6];
428  ui = y[ 7];
429  x[ 6] = (ur>>1) + vr;
430  x[ 7] = (ui>>1) - vi;
431  x[14] = (ur>>1) - vr;
432  x[15] = (ui>>1) + vi;
433
434// xt1 = 18
435// xt2 = 26
436  SUMDIFF_PIFOURTH(vi, vr, y[26], y[27])
437  ur = y[18];
438  ui = y[19];
439  x[18] = (ur>>1) + vr;
440  x[19] = (ui>>1) + vi;
441  x[26] = (ur>>1) - vr;
442  x[27] = (ui>>1) - vi;
443
444// xt1 = 22
445// xt2 = 30
446  SUMDIFF_PIFOURTH(vr, vi, y[30], y[31])
447  ur = y[22];
448  ui = y[23];
449  x[22] = (ur>>1) + vr;
450  x[23] = (ui>>1) - vi;
451  x[30] = (ur>>1) - vr;
452  x[31] = (ui>>1) + vi;
453
454// xt1 =  0
455// xt2 = 16
456  vr = x[16];
457  vi = x[17];
458  ur = x[ 0]>>1;
459  ui = x[ 1]>>1;
460  x[ 0] = ur + (vr>>1);
461  x[ 1] = ui + (vi>>1);
462  x[16] = ur - (vr>>1);
463  x[17] = ui - (vi>>1);
464
465// xt1 =  8
466// xt2 = 24
467  vi = x[24];
468  vr = x[25];
469  ur = x[ 8]>>1;
470  ui = x[ 9]>>1;
471  x[ 8] = ur + (vr>>1);
472  x[ 9] = ui - (vi>>1);
473  x[24] = ur - (vr>>1);
474  x[25] = ui + (vi>>1);
475
476// xt1 =  2
477// xt2 = 18
478  cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
479  ur = x[ 2];
480  ui = x[ 3];
481  x[ 2] = (ur>>1) + vr;
482  x[ 3] = (ui>>1) + vi;
483  x[18] = (ur>>1) - vr;
484  x[19] = (ui>>1) - vi;
485
486// xt1 = 10
487// xt2 = 26
488  cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
489  ur = x[10];
490  ui = x[11];
491  x[10] = (ur>>1) + vr;
492  x[11] = (ui>>1) - vi;
493  x[26] = (ur>>1) - vr;
494  x[27] = (ui>>1) + vi;
495
496// xt1 =  4
497// xt2 = 20
498  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
499  ur = x[ 4];
500  ui = x[ 5];
501  x[ 4] = (ur>>1) + vr;
502  x[ 5] = (ui>>1) + vi;
503  x[20] = (ur>>1) - vr;
504  x[21] = (ui>>1) - vi;
505
506// xt1 = 12
507// xt2 = 28
508  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
509  ur = x[12];
510  ui = x[13];
511  x[12] = (ur>>1) + vr;
512  x[13] = (ui>>1) - vi;
513  x[28] = (ur>>1) - vr;
514  x[29] = (ui>>1) + vi;
515
516// xt1 =  6
517// xt2 = 22
518  cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
519  ur = x[ 6];
520  ui = x[ 7];
521  x[ 6] = (ur>>1) + vr;
522  x[ 7] = (ui>>1) + vi;
523  x[22] = (ur>>1) - vr;
524  x[23] = (ui>>1) - vi;
525
526// xt1 = 14
527// xt2 = 30
528  cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
529  ur = x[14];
530  ui = x[15];
531  x[14] = (ur>>1) + vr;
532  x[15] = (ui>>1) - vi;
533  x[30] = (ur>>1) - vr;
534  x[31] = (ui>>1) + vi;
535}
536
537#ifndef FUNCTION_fft_32
538static const FIXP_STP fft32_w32[6] =
539{
540  STCP (0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), STCP(0x7d8a5f40, 0x18f8b83c),
541  STCP (0x6a6d98a4, 0x471cece7), STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)
542};
543
544LNK_SECTION_CODE_L1
545inline void fft_32(FIXP_DBL *x)
546{
547
548#define W_PiFOURTH STC(0x5a82799a)
549
550  FIXP_DBL vr,vi,ur,ui;
551  FIXP_DBL y[64];
552
553  /*
554   * 1+2 stage radix 4
555   */
556
557/////////////////////////////////////////////////////////////////////////////////////////
558
559  // i = 0
560  vr = (x[ 0] + x[32])>>1;  /* Re A + Re B */
561  vi = (x[16] + x[48]);     /* Re C + Re D */
562  ur = (x[ 1] + x[33])>>1;  /* Im A + Im B */
563  ui = (x[17] + x[49]);     /* Im C + Im D */
564
565  y[ 0] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
566  y[ 4] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
567  y[ 1] = ur + (ui>>1);     /* Im A' = sum of imag values */
568  y[ 5] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
569
570  vr -= x[32];              /* Re A - Re B */
571  vi = (vi>>1) - x[48];     /* Re C - Re D */
572  ur -= x[33];              /* Im A - Im B */
573  ui = (ui>>1) - x[49];     /* Im C - Im D */
574
575  y[ 2] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
576  y[ 6] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
577  y[ 3] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
578  y[ 7] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
579
580  //i=8
581  vr = (x[ 8] + x[40])>>1;  /* Re A + Re B */
582  vi = (x[24] + x[56]);     /* Re C + Re D */
583  ur = (x[ 9] + x[41])>>1;  /* Im A + Im B */
584  ui = (x[25] + x[57]);     /* Im C + Im D */
585
586  y[ 8] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
587  y[12] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
588  y[ 9] = ur + (ui>>1);     /* Im A' = sum of imag values */
589  y[13] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
590
591  vr -= x[40];              /* Re A - Re B */
592  vi = (vi>>1) - x[56];     /* Re C - Re D */
593  ur -= x[41];              /* Im A - Im B */
594  ui = (ui>>1) - x[57];     /* Im C - Im D */
595
596  y[10] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
597  y[14] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
598  y[11] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
599  y[15] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
600
601  //i=16
602  vr = (x[ 4] + x[36])>>1;  /* Re A + Re B */
603  vi = (x[20] + x[52]);     /* Re C + Re D */
604  ur = (x[ 5] + x[37])>>1;  /* Im A + Im B */
605  ui = (x[21] + x[53]);     /* Im C + Im D */
606
607  y[16] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
608  y[20] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
609  y[17] = ur + (ui>>1);     /* Im A' = sum of imag values */
610  y[21] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
611
612  vr -= x[36];              /* Re A - Re B */
613  vi = (vi>>1) - x[52];     /* Re C - Re D */
614  ur -= x[37];              /* Im A - Im B */
615  ui = (ui>>1) - x[53];     /* Im C - Im D */
616
617  y[18] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
618  y[22] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
619  y[19] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
620  y[23] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
621
622  //i=24
623  vr = (x[12] + x[44])>>1;  /* Re A + Re B */
624  vi = (x[28] + x[60]);     /* Re C + Re D */
625  ur = (x[13] + x[45])>>1;  /* Im A + Im B */
626  ui = (x[29] + x[61]);     /* Im C + Im D */
627
628  y[24] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
629  y[28] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
630  y[25] = ur + (ui>>1);     /* Im A' = sum of imag values */
631  y[29] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
632
633  vr -= x[44];              /* Re A - Re B */
634  vi = (vi>>1) - x[60];     /* Re C - Re D */
635  ur -= x[45];              /* Im A - Im B */
636  ui = (ui>>1) - x[61];     /* Im C - Im D */
637
638  y[26] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
639  y[30] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
640  y[27] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
641  y[31] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
642
643  // i = 32
644  vr = (x[ 2] + x[34])>>1;  /* Re A + Re B */
645  vi = (x[18] + x[50]);     /* Re C + Re D */
646  ur = (x[ 3] + x[35])>>1;  /* Im A + Im B */
647  ui = (x[19] + x[51]);     /* Im C + Im D */
648
649  y[32] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
650  y[36] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
651  y[33] = ur + (ui>>1);     /* Im A' = sum of imag values */
652  y[37] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
653
654  vr -= x[34];              /* Re A - Re B */
655  vi = (vi>>1) - x[50];     /* Re C - Re D */
656  ur -= x[35];              /* Im A - Im B */
657  ui = (ui>>1) - x[51];     /* Im C - Im D */
658
659  y[34] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
660  y[38] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
661  y[35] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
662  y[39] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
663
664  //i=40
665  vr = (x[10] + x[42])>>1;  /* Re A + Re B */
666  vi = (x[26] + x[58]);     /* Re C + Re D */
667  ur = (x[11] + x[43])>>1;  /* Im A + Im B */
668  ui = (x[27] + x[59]);     /* Im C + Im D */
669
670  y[40] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
671  y[44] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
672  y[41] = ur + (ui>>1);     /* Im A' = sum of imag values */
673  y[45] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
674
675  vr -= x[42];              /* Re A - Re B */
676  vi = (vi>>1) - x[58];     /* Re C - Re D */
677  ur -= x[43];              /* Im A - Im B */
678  ui = (ui>>1) - x[59];     /* Im C - Im D */
679
680  y[42] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
681  y[46] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
682  y[43] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
683  y[47] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
684
685  //i=48
686  vr = (x[ 6] + x[38])>>1;  /* Re A + Re B */
687  vi = (x[22] + x[54]);     /* Re C + Re D */
688  ur = (x[ 7] + x[39])>>1;  /* Im A + Im B */
689  ui = (x[23] + x[55]);     /* Im C + Im D */
690
691  y[48] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
692  y[52] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
693  y[49] = ur + (ui>>1);     /* Im A' = sum of imag values */
694  y[53] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
695
696  vr -= x[38];              /* Re A - Re B */
697  vi = (vi>>1) - x[54];     /* Re C - Re D */
698  ur -= x[39];              /* Im A - Im B */
699  ui = (ui>>1) - x[55];     /* Im C - Im D */
700
701  y[50] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
702  y[54] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
703  y[51] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
704  y[55] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
705
706  //i=56
707  vr = (x[14] + x[46])>>1;  /* Re A + Re B */
708  vi = (x[30] + x[62]);     /* Re C + Re D */
709  ur = (x[15] + x[47])>>1;  /* Im A + Im B */
710  ui = (x[31] + x[63]);     /* Im C + Im D */
711
712  y[56] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
713  y[60] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
714  y[57] = ur + (ui>>1);     /* Im A' = sum of imag values */
715  y[61] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
716
717  vr -= x[46];              /* Re A - Re B */
718  vi = (vi>>1) - x[62];     /* Re C - Re D */
719  ur -= x[47];              /* Im A - Im B */
720  ui = (ui>>1) - x[63];     /* Im C - Im D */
721
722  y[58] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
723  y[62] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
724  y[59] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
725  y[63] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
726
727
728  FIXP_DBL *xt = &x[0];
729  FIXP_DBL *yt = &y[0];
730
731  int j = 4;
732  do
733  {
734    vr = yt[8];
735    vi = yt[9];
736    ur = yt[0]>>1;
737    ui = yt[1]>>1;
738    xt[ 0] = ur + (vr>>1);
739    xt[ 1] = ui + (vi>>1);
740    xt[ 8] = ur - (vr>>1);
741    xt[ 9] = ui - (vi>>1);
742
743    vr = yt[13];
744    vi = yt[12];
745    ur = yt[4]>>1;
746    ui = yt[5]>>1;
747    xt[ 4] = ur + (vr>>1);
748    xt[ 5] = ui - (vi>>1);
749    xt[12] = ur - (vr>>1);
750    xt[13] = ui + (vi>>1);
751
752    SUMDIFF_PIFOURTH(vi, vr, yt[10], yt[11])
753    ur = yt[2];
754    ui = yt[3];
755    xt[ 2] = (ur>>1) + vr;
756    xt[ 3] = (ui>>1) + vi;
757    xt[10] = (ur>>1) - vr;
758    xt[11] = (ui>>1) - vi;
759
760    SUMDIFF_PIFOURTH(vr, vi, yt[14], yt[15])
761    ur = yt[6];
762    ui = yt[7];
763
764    xt[ 6] = (ur>>1) + vr;
765    xt[ 7] = (ui>>1) - vi;
766    xt[14] = (ur>>1) - vr;
767    xt[15] = (ui>>1) + vi;
768    xt += 16;
769    yt += 16;
770  } while (--j != 0);
771
772  vr = x[16];
773  vi = x[17];
774  ur = x[ 0]>>1;
775  ui = x[ 1]>>1;
776  x[ 0] = ur + (vr>>1);
777  x[ 1] = ui + (vi>>1);
778  x[16] = ur - (vr>>1);
779  x[17] = ui - (vi>>1);
780
781  vi = x[24];
782  vr = x[25];
783  ur = x[ 8]>>1;
784  ui = x[ 9]>>1;
785  x[ 8] = ur + (vr>>1);
786  x[ 9] = ui - (vi>>1);
787  x[24] = ur - (vr>>1);
788  x[25] = ui + (vi>>1);
789
790  vr = x[48];
791  vi = x[49];
792  ur = x[32]>>1;
793  ui = x[33]>>1;
794  x[32] = ur + (vr>>1);
795  x[33] = ui + (vi>>1);
796  x[48] = ur - (vr>>1);
797  x[49] = ui - (vi>>1);
798
799  vi = x[56];
800  vr = x[57];
801  ur = x[40]>>1;
802  ui = x[41]>>1;
803  x[40] = ur + (vr>>1);
804  x[41] = ui - (vi>>1);
805  x[56] = ur - (vr>>1);
806  x[57] = ui + (vi>>1);
807
808  cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
809  ur = x[ 2];
810  ui = x[ 3];
811  x[ 2] = (ur>>1) + vr;
812  x[ 3] = (ui>>1) + vi;
813  x[18] = (ur>>1) - vr;
814  x[19] = (ui>>1) - vi;
815
816  cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
817  ur = x[10];
818  ui = x[11];
819  x[10] = (ur>>1) + vr;
820  x[11] = (ui>>1) - vi;
821  x[26] = (ur>>1) - vr;
822  x[27] = (ui>>1) + vi;
823
824  cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
825  ur = x[34];
826  ui = x[35];
827  x[34] = (ur>>1) + vr;
828  x[35] = (ui>>1) + vi;
829  x[50] = (ur>>1) - vr;
830  x[51] = (ui>>1) - vi;
831
832  cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
833  ur = x[42];
834  ui = x[43];
835  x[42] = (ur>>1) + vr;
836  x[43] = (ui>>1) - vi;
837  x[58] = (ur>>1) - vr;
838  x[59] = (ui>>1) + vi;
839
840  SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
841  ur = x[ 4];
842  ui = x[ 5];
843  x[ 4] = (ur>>1) + vr;
844  x[ 5] = (ui>>1) + vi;
845  x[20] = (ur>>1) - vr;
846  x[21] = (ui>>1) - vi;
847
848  SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
849  ur = x[12];
850  ui = x[13];
851  x[12] = (ur>>1) + vr;
852  x[13] = (ui>>1) - vi;
853  x[28] = (ur>>1) - vr;
854  x[29] = (ui>>1) + vi;
855
856  SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
857  ur = x[36];
858  ui = x[37];
859  x[36] = (ur>>1) + vr;
860  x[37] = (ui>>1) + vi;
861  x[52] = (ur>>1) - vr;
862  x[53] = (ui>>1) - vi;
863
864  SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
865  ur = x[44];
866  ui = x[45];
867  x[44] = (ur>>1) + vr;
868  x[45] = (ui>>1) - vi;
869  x[60] = (ur>>1) - vr;
870  x[61] = (ui>>1) + vi;
871
872
873  cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
874  ur = x[ 6];
875  ui = x[ 7];
876  x[ 6] = (ur>>1) + vr;
877  x[ 7] = (ui>>1) + vi;
878  x[22] = (ur>>1) - vr;
879  x[23] = (ui>>1) - vi;
880
881  cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
882  ur = x[14];
883  ui = x[15];
884  x[14] = (ur>>1) + vr;
885  x[15] = (ui>>1) - vi;
886  x[30] = (ur>>1) - vr;
887  x[31] = (ui>>1) + vi;
888
889  cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
890  ur = x[38];
891  ui = x[39];
892  x[38] = (ur>>1) + vr;
893  x[39] = (ui>>1) + vi;
894  x[54] = (ur>>1) - vr;
895  x[55] = (ui>>1) - vi;
896
897  cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
898  ur = x[46];
899  ui = x[47];
900
901  x[46] = (ur>>1) + vr;
902  x[47] = (ui>>1) - vi;
903  x[62] = (ur>>1) - vr;
904  x[63] = (ui>>1) + vi;
905
906  vr = x[32];
907  vi = x[33];
908  ur = x[ 0]>>1;
909  ui = x[ 1]>>1;
910  x[ 0] = ur + (vr>>1);
911  x[ 1] = ui + (vi>>1);
912  x[32] = ur - (vr>>1);
913  x[33] = ui - (vi>>1);
914
915  vi = x[48];
916  vr = x[49];
917  ur = x[16]>>1;
918  ui = x[17]>>1;
919  x[16] = ur + (vr>>1);
920  x[17] = ui - (vi>>1);
921  x[48] = ur - (vr>>1);
922  x[49] = ui + (vi>>1);
923
924  cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
925  ur = x[ 2];
926  ui = x[ 3];
927  x[ 2] = (ur>>1) + vr;
928  x[ 3] = (ui>>1) + vi;
929  x[34] = (ur>>1) - vr;
930  x[35] = (ui>>1) - vi;
931
932  cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
933  ur = x[18];
934  ui = x[19];
935  x[18] = (ur>>1) + vr;
936  x[19] = (ui>>1) - vi;
937  x[50] = (ur>>1) - vr;
938  x[51] = (ui>>1) + vi;
939
940  cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
941  ur = x[ 4];
942  ui = x[ 5];
943  x[ 4] = (ur>>1) + vr;
944  x[ 5] = (ui>>1) + vi;
945  x[36] = (ur>>1) - vr;
946  x[37] = (ui>>1) - vi;
947
948  cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
949  ur = x[20];
950  ui = x[21];
951  x[20] = (ur>>1) + vr;
952  x[21] = (ui>>1) - vi;
953  x[52] = (ur>>1) - vr;
954  x[53] = (ui>>1) + vi;
955
956  cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
957  ur = x[ 6];
958  ui = x[ 7];
959  x[ 6] = (ur>>1) + vr;
960  x[ 7] = (ui>>1) + vi;
961  x[38] = (ur>>1) - vr;
962  x[39] = (ui>>1) - vi;
963
964  cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
965  ur = x[22];
966  ui = x[23];
967  x[22] = (ur>>1) + vr;
968  x[23] = (ui>>1) - vi;
969  x[54] = (ur>>1) - vr;
970  x[55] = (ui>>1) + vi;
971
972  SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
973  ur = x[ 8];
974  ui = x[ 9];
975  x[ 8] = (ur>>1) + vr;
976  x[ 9] = (ui>>1) + vi;
977  x[40] = (ur>>1) - vr;
978  x[41] = (ui>>1) - vi;
979
980  SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
981  ur = x[24];
982  ui = x[25];
983  x[24] = (ur>>1) + vr;
984  x[25] = (ui>>1) - vi;
985  x[56] = (ur>>1) - vr;
986  x[57] = (ui>>1) + vi;
987
988  cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
989  ur = x[10];
990  ui = x[11];
991
992  x[10] = (ur>>1) + vr;
993  x[11] = (ui>>1) + vi;
994  x[42] = (ur>>1) - vr;
995  x[43] = (ui>>1) - vi;
996
997  cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
998  ur = x[26];
999  ui = x[27];
1000  x[26] = (ur>>1) + vr;
1001  x[27] = (ui>>1) - vi;
1002  x[58] = (ur>>1) - vr;
1003  x[59] = (ui>>1) + vi;
1004
1005  cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
1006  ur = x[12];
1007  ui = x[13];
1008  x[12] = (ur>>1) + vr;
1009  x[13] = (ui>>1) + vi;
1010  x[44] = (ur>>1) - vr;
1011  x[45] = (ui>>1) - vi;
1012
1013  cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
1014  ur = x[28];
1015  ui = x[29];
1016  x[28] = (ur>>1) + vr;
1017  x[29] = (ui>>1) - vi;
1018  x[60] = (ur>>1) - vr;
1019  x[61] = (ui>>1) + vi;
1020
1021  cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
1022  ur = x[14];
1023  ui = x[15];
1024  x[14] = (ur>>1) + vr;
1025  x[15] = (ui>>1) + vi;
1026  x[46] = (ur>>1) - vr;
1027  x[47] = (ui>>1) - vi;
1028
1029  cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
1030  ur = x[30];
1031  ui = x[31];
1032  x[30] = (ur>>1) + vr;
1033  x[31] = (ui>>1) - vi;
1034  x[62] = (ur>>1) - vr;
1035  x[63] = (ui>>1) + vi;
1036}
1037#endif /* #ifndef FUNCTION_fft_32 */
1038
1039
1040/**
1041 * \brief Apply rotation vectors to a data buffer.
1042 * \param cl length of each row of input data.
1043 * \param l total length of input data.
1044 * \param pVecRe real part of rotation ceofficient vector.
1045 * \param pVecIm imaginary part of rotation ceofficient vector.
1046 */
1047static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, const int l, const FIXP_STB *pVecRe, const FIXP_STB *pVecIm)
1048{
1049  FIXP_DBL re, im;
1050  FIXP_STB vre, vim;
1051
1052  int i, c;
1053
1054  for(i=0; i<cl; i++) {
1055    re  = pData[2*i];
1056    im  = pData[2*i+1];
1057
1058    pData[2*i]   = re>>2; /* * 0.25 */
1059    pData[2*i+1] = im>>2; /* * 0.25 */
1060  }
1061  for(; i<l; i+=cl)
1062  {
1063    re  = pData[2*i];
1064    im  = pData[2*i+1];
1065
1066    pData[2*i]   = re>>2; /* * 0.25 */
1067    pData[2*i+1] = im>>2; /* * 0.25 */
1068
1069    for (c=i+1; c<i+cl; c++)
1070    {
1071      re  = pData[2*c]>>1;
1072      im  = pData[2*c+1]>>1;
1073      vre = *pVecRe++;
1074      vim = *pVecIm++;
1075
1076      cplxMultDiv2(&pData[2*c+1], &pData[2*c], im, re, vre, vim);
1077    }
1078  }
1079}
1080
1081#define FFT_TWO_STAGE_MACRO_ENABLE
1082
1083
1084#ifdef FFT_TWO_STAGE_MACRO_ENABLE
1085
1086#define fftN2(pInput, length, dim1, dim2, fft_func1, fft_func2, RotVectorReal, RotVectorImag) \
1087{ \
1088  int       i, j; \
1089 \
1090  C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); \
1091  C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); \
1092 \
1093  FDK_ASSERT(length == dim1*dim2); \
1094 \
1095  /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the \
1096  output samples are at the address of pDst. The input vector for the fft of length dim1 is built \
1097  of the interleaved samples in pSrc, the output samples are stored consecutively. \
1098  */ \
1099  { \
1100    const FIXP_DBL* pSrc = pInput; \
1101    FIXP_DBL  *RESTRICT pDst = aDst; \
1102    \
1103    for(i=0; i<dim2; i++) \
1104    { \
1105      for(j=0; j<dim1; j++) \
1106      { \
1107        pDst[2*j]   = pSrc[2*j*dim2]; \
1108        pDst[2*j+1] = pSrc[2*j*dim2+1]; \
1109      } \
1110      \
1111      fft_func1(pDst); \
1112      pSrc += 2; \
1113      pDst = pDst + 2*dim1; \
1114    } \
1115  } \
1116  \
1117  /* Perform the modulation of the output of the fft of length dim1 */ \
1118  fft_apply_rot_vector(aDst, dim1, length, RotVectorReal, RotVectorImag); \
1119  \
1120  /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the \
1121  output samples are at the address of pInput. The input vector for the fft of length dim2 is built \
1122  of the interleaved samples in aDst, the output samples are stored consecutively at the address \
1123  of pInput. \
1124  */ \
1125  { \
1126    const FIXP_DBL* pSrc       = aDst; \
1127    FIXP_DBL *RESTRICT pDst    = aDst2; \
1128    FIXP_DBL *RESTRICT pDstOut = pInput; \
1129    \
1130    for(i=0; i<dim1; i++) \
1131    { \
1132      for(j=0; j<dim2; j++) \
1133      { \
1134        pDst[2*j]   = pSrc[2*j*dim1]; \
1135        pDst[2*j+1] = pSrc[2*j*dim1+1]; \
1136      } \
1137      \
1138      fft_func2(pDst); \
1139      \
1140      for(j=0; j<dim2; j++) \
1141      { \
1142        pDstOut[2*j*dim1]   = pDst[2*j]; \
1143        pDstOut[2*j*dim1+1] = pDst[2*j+1]; \
1144      } \
1145      pSrc += 2; \
1146      pDstOut += 2; \
1147    } \
1148  } \
1149  \
1150  C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); \
1151  C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); \
1152} \
1153
1154#else /* FFT_TWO_STAGE_MACRO_ENABLE */
1155
1156/* select either switch case of function pointer. */
1157//#define FFT_TWO_STAGE_SWITCH_CASE
1158
1159static inline void fftN2(
1160        FIXP_DBL *pInput,
1161        const int length,
1162        const int dim1,
1163        const int dim2,
1164        void (* const fft1)(FIXP_DBL *),
1165        void (* const fft2)(FIXP_DBL *),
1166        const FIXP_STB *RotVectorReal,
1167        const FIXP_STB *RotVectorImag
1168        )
1169{
1170  /* The real part of the input samples are at the addresses with even indices and the imaginary
1171  part of the input samples are at the addresses with odd indices. The output samples are stored
1172  at the address of pInput
1173  */
1174  FIXP_DBL  *pSrc, *pDst, *pDstOut;
1175  int       i, j;
1176
1177  C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2);
1178  C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2);
1179
1180  FDK_ASSERT(length == dim1*dim2);
1181
1182  /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the
1183  output samples are at the address of pDst. The input vector for the fft of length dim1 is built
1184  of the interleaved samples in pSrc, the output samples are stored consecutively.
1185  */
1186  pSrc = pInput;
1187  pDst = aDst;
1188  for(i=0; i<length/dim1; i++)
1189  {
1190    for(j=0; j<length/dim2; j++)
1191    {
1192      pDst[2*j]   = pSrc[2*j*dim2];
1193      pDst[2*j+1] = pSrc[2*j*dim2+1];
1194    }
1195
1196    /* fft of size dim1 */
1197#ifndef FFT_TWO_STAGE_SWITCH_CASE
1198    fft1(pDst);
1199#else
1200    switch (dim1) {
1201      case 3: fft3(pDst); break;
1202      case 4: fft_4(pDst); break;
1203      case 5: fft5(pDst); break;
1204      case 8: fft_8(pDst); break;
1205      case 15: fft15(pDst); break;
1206      case 16: fft_16(pDst); break;
1207      case 32: fft_32(pDst); break;
1208      /*case 64: fft_64(pDst); break;*/
1209      case 128: fft_128(pDst); break;
1210    }
1211#endif
1212    pSrc += 2;
1213    pDst = pDst + 2*length/dim2;
1214  }
1215
1216  /* Perform the modulation of the output of the fft of length dim1 */
1217  pSrc=aDst;
1218  fft_apply_rot_vector(pSrc, length/dim2, length, RotVectorReal, RotVectorImag);
1219
1220  /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the
1221  output samples are at the address of pInput. The input vector for the fft of length dim2 is built
1222  of the interleaved samples in aDst, the output samples are stored consecutively at the address
1223  of pInput.
1224  */
1225  pSrc    = aDst;
1226  pDst    = aDst2;
1227  pDstOut = pInput;
1228  for(i=0; i<length/dim2; i++)
1229  {
1230    for(j=0; j<length/dim1; j++)
1231    {
1232      pDst[2*j]   = pSrc[2*j*dim1];
1233      pDst[2*j+1] = pSrc[2*j*dim1+1];
1234    }
1235
1236#ifndef FFT_TWO_STAGE_SWITCH_CASE
1237    fft2(pDst);
1238#else
1239    switch (dim2) {
1240      case 3: fft3(pDst); break;
1241      case 4: fft_4(pDst); break;
1242      case 5: fft5(pDst); break;
1243      case 8: fft_8(pDst); break;
1244      case 15: fft15(pDst); break;
1245      case 16: fft_16(pDst); break;
1246      case 32: fft_32(pDst); break;
1247      /*case 64: fft_64(pDst); break;*/
1248      case 128: fft_128(pDst); break;
1249    }
1250#endif
1251
1252    for(j=0; j<length/dim1; j++)
1253    {
1254      pDstOut[2*j*dim1]   = pDst[2*j];
1255      pDstOut[2*j*dim1+1] = pDst[2*j+1];
1256    }
1257    pSrc += 2;
1258    pDstOut += 2;
1259  }
1260
1261  C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2);
1262  C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2);
1263}
1264
1265#endif /* FFT_TWO_STAGE_MACRO_ENABLE */
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278#define SCALEFACTOR60         5
1279/**
1280The function performs the fft of length 60. It is splittet into fft's of length 4 and fft's of
1281length 15. Between the fft's a modolation is calculated.
1282*/
1283static inline void fft60(FIXP_DBL *pInput, INT *pScalefactor)
1284{
1285  fftN2(
1286          pInput, 60, 4, 15,
1287          fft_4, fft15,
1288          RotVectorReal60, RotVectorImag60
1289          );
1290  *pScalefactor += SCALEFACTOR60;
1291}
1292
1293
1294
1295/* Fallback implementation in case of no better implementation available. */
1296
1297#define SCALEFACTOR240        7
1298
1299/**
1300The function performs the fft of length 240. It is splittet into fft's of length 16 and fft's of
1301length 15. Between the fft's a modulation is calculated.
1302*/
1303static inline void fft240(FIXP_DBL *pInput, INT *pScalefactor)
1304{
1305  fftN2(
1306          pInput, 240, 16, 15,
1307          fft_16, fft15,
1308          RotVectorReal240, RotVectorImag240
1309          );
1310  *pScalefactor += SCALEFACTOR240;
1311}
1312
1313
1314#define SCALEFACTOR480        8
1315#define N32                   32
1316#define TABLE_SIZE_16        (32/2)
1317
1318/**
1319The function performs the fft of length 480. It is splittet into fft's of length 32 and fft's of
1320length 15. Between the fft's a modulation is calculated.
1321*/
1322static inline void fft480(FIXP_DBL *pInput, INT *pScalefactor)
1323{
1324  fftN2(
1325          pInput, 480, 32, 15,
1326          fft_32, fft15,
1327          RotVectorReal480, RotVectorImag480
1328          );
1329  *pScalefactor += SCALEFACTOR480;
1330}
1331
1332void fft(int length, FIXP_DBL *pInput, INT *pScalefactor)
1333{
1334  if (length == 32)
1335  {
1336      fft_32(pInput);
1337      *pScalefactor += SCALEFACTOR32;
1338  }
1339  else
1340  {
1341
1342  switch (length) {
1343    case 16:
1344      fft_16(pInput);
1345      *pScalefactor += SCALEFACTOR16;
1346      break;
1347    case 8:
1348      fft_8(pInput);
1349      *pScalefactor += SCALEFACTOR8;
1350      break;
1351    case 3:
1352      fft3(pInput);
1353      break;
1354    case 4:
1355      fft_4(pInput);
1356      *pScalefactor += SCALEFACTOR4;
1357      break;
1358    case 5:
1359      fft5(pInput);
1360      break;
1361    case 15:
1362      fft15(pInput);
1363      *pScalefactor += 2;
1364      break;
1365    case 60:
1366      fft60(pInput, pScalefactor);
1367      break;
1368    case 64:
1369      dit_fft(pInput, 6, SineTable512, 512);
1370      *pScalefactor += SCALEFACTOR64;
1371      break;
1372    case 240:
1373      fft240(pInput, pScalefactor);
1374      break;
1375    case 256:
1376      dit_fft(pInput, 8, SineTable512, 512);
1377      *pScalefactor += SCALEFACTOR256;
1378      break;
1379    case 480:
1380      fft480(pInput, pScalefactor);
1381      break;
1382    case 512:
1383      dit_fft(pInput, 9, SineTable512, 512);
1384      *pScalefactor += SCALEFACTOR512;
1385      break;
1386    default:
1387      FDK_ASSERT(0); /* FFT length not supported! */
1388      break;
1389  }
1390  }
1391}
1392
1393
1394void ifft(int length, FIXP_DBL *pInput, INT *scalefactor)
1395{
1396  switch (length) {
1397    default:
1398      FDK_ASSERT(0); /* IFFT length not supported! */
1399      break;
1400  }
1401}
1402
1403
1404