1/*---------------------------------------------------------------------------*
2 *  sp_fft.h  *
3 *                                                                           *
4 *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5 *                                                                           *
6 *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7 *  you may not use this file except in compliance with the License.         *
8 *                                                                           *
9 *  You may obtain a copy of the License at                                  *
10 *      http://www.apache.org/licenses/LICENSE-2.0                           *
11 *                                                                           *
12 *  Unless required by applicable law or agreed to in writing, software      *
13 *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 *  See the License for the specific language governing permissions and      *
16 *  limitations under the License.                                           *
17 *                                                                           *
18 *---------------------------------------------------------------------------*/
19
20#ifndef _SPLIT_RADIX_FFT_H_
21#define _SPLIT_RADIX_FFT_H_
22
23/*
24////////////////////////////////////////////////////////////////////////////
25//
26//  FILE:         fft.h
27//
28//  CREATED:   11-September-99
29//
30//  DESCRIPTION:  Split-Radix FFT
31//
32//
33//
34//
35//  MODIFICATIONS:
36// Revision history log
37    VSS revision history.  Do not edit by hand.
38
39    $NoKeywords: $
40
41*/
42
43/*
44>>>>> Floating and Fixed Point plit-Radix FFT -- The Fastest FFT  <<<<<<<<
45
46The split-radix FFT improves the efficiency of the FFT implementation
47by mixing a radix-2 and a radix-4 FFT algorithms. Specifically,
48the radix-2 decomposes one Fourier transform into two half-sized Fourier
49transform at each stage. It is the FFT presented in almost every textbook
50and implemented. Unfortuantely, it is also the least efficient among the
51power-of-two FFT, The radix-4 decomposes one Fourier transform into
52four quarter-sized Fourier transform. It is much more
53efficient than the radix-2 FFT, but it requires a power-of-four as the data length.
54By combining the radix-2 and the radix-4 algorithms, one not only
55removes the power-of-four limitation of the radix-4 alogorithm, but also
56obtains the most efficient power-of-two FFT algorithm.
57
58The split-radix FFT decomposes a Fourier transform
59
60  X(k) = sum(n = 0 to N-1, x[n] * W**(n*k))
61
62successively into one length N/2 radix-2 and two length N/4 radix-4 FFT
63
64 X(2k) = sum(n = 0 to N/2 - 1, [x[n] + x(n + N/2)] * W**(2*n*k))
65 X(4k + 1) = sum(n = 0 to N/4 - 1, [x[n] - x(n + N/2) - j(x[n] - x(n + 3*N/4))] * W(n)*W**(4*n*k))
66 X(4k + 3) = sum(n = 0 to N/4 - 1, [x[n] - x(n + N/2) + j(x[n] - x(n + 3*N/4))] * W(n)*W**(4*n*k))
67
68where W(n) = exp(-j2*PI*n/N) is called a twiddle factor
69
70The detailed description of the algorithm (with bugs) can be found in
71
72  H.V. Sorensen, M.T. Heideman, and C. S. Burrus, "On Computing the Split-Radix
73  FFT,"IEEE Trans. Acoust., Speech, Signal Processing, pp. 152-200, Feb. 1986.
74
75The implementation of the split-radix is similar to that of the radix-2
76algorithm, except a smart index scheme is needed at each stage to determine
77which nodes need to be decomposed further and which ones do not, since
78a radix-2 decomposition is done at every stage while a radix-4 decomposition
79is done at every other stage. This implementation uses an index scheme designed
80by H.V. Sorensen, M.T. Heideman, and C. S. Burrus.
81
82You can use this implementation for both floating and fixed point FFT and
83inverse FFT computation by changing a compliler constant FIXED_POINT
84in file fft.h. Some addtional information can be found in following sections..
85
86 Usage
87 Efficiency
88 Examples:
89
90>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
91Usage:
92>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
93
94For floating point, you have to undefine the constant FIXED_POINT and
95use typedef to define the trigonomy function and input data types.
96
97For fixed point, the constant FIXED_POINT must be defined and a 32 bit
98representation of both input data and trigonomy function data are required.
99Furthermore, you have three choices to conduct fixed-point FFT.
100If you use the algorithm in both Microsoft C and I86 hardware environment, you
101should use an assemblyline implementation. To do this, you go to the file
102himul32.cpp and define constants MICROSOFTC_HIMUL32 and I86_HIMUL3.
103If you use the algorithm only in the Microsoft C environment,
104a Microsoft specific implementation should be used. You do this by
105going to the file himul32.cpp to undefine I86_HIMUL3, and
106define MICROSOFTC_HIMUL32.
107In any other situation, a stric C implementation should be used by
108undefining both MICROSOFTC_HIMUL32 and I86_HIMUL3.
109
110To use the algorithm, you need to constrcut an fft_info object
111
112 fft_info* my_fft_info = new_fft_info(log2Size);
113
114If you have a real data input data[] of size 2**(log2Size + 1), you
115use
116
117 do_real_fft(my_fft_info, size, data);
118
119The positive half frequency Fourier transform will be returned, in addition
120to the first and last elements of the transform that are stored in the
121first and second elements of the data array. If the data[] array is the
122Fourier transform of a real data input, you can also conduct the inverse
123Fourier transform to get the real data input by
124
125 do_real_ifft(my_fft_info, size, data);
126
127Finally, you should remember to remove my_fft_info object using
128
129 delete_fft_info(my_fft_info)
130
131when you are done with FFT.
132
133>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
134Efficiency:
135>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
136
137TIME: Let M = log2Size be the power-of-two length and N = 2**M be the complex
138  FFT input size. The following formulas give the comutation requirements
139  of the split-radix FFT algorithm.
140
141  Multiplications = (4/3)*M*N - (35/9)N + 4
142  Adds   = (8/3)*M*N - (16/9)N + 2
143
144  On a 266 MHz 64 MB RAM Pentium, using a release build,
145  500 runs of paired 256 point complex (512  point real) input FFT
146  (forward + inverse) have the following time counts:
147
148Floating Point:
149  Real:  0.160 sec.
150  Complex: 0.120 sec.
151
152
153Fixed Point:
154  Assembly:  Real 0.170 sec, Complex 0.140 sec.
155  Microsoft C: Real 0.250 sec, Complex 0.240 sec.
156  Stric C:  Real 0.540 sec, Complex 0.441 sec.
157
158
159SPACE: The computation is done in place so that there is no dynamic memory allocation
160  in do_fft (do_real_fft) and do_ifft (do_real_ifft) call, except some
161  temporary local variables.
162
163  The memory space is needed in fft_info struct to store the cosine (sine) tales
164  butterfly index table, and bit reverse table. Specifically,
165
166  cosine(sine) tables: 3*N (half can be saved if we only save cosine or sine tables)
167  butterfly index table: < N
168  bitrever index table: N
169
170>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
171Example:
172>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
173
174The examples of running this program is given in a compiled out main()
175in fft.cpp. You can run this program by compiling the fft.h, fft.cpp, and
176himul32.cpp with main() compiled.
177
178*/ /* end of the comment block*/
179
180#ifdef __cplusplus
181extern "C"
182{
183#endif
184
185
186#define DATA_BITS 16   /*number of significant bits in fftdata*/
187#define HAMMING_DATA_BITS 15
188
189  typedef fftdata     trigonomydata;
190
191  typedef struct
192  {
193    /* fft log length */
194    unsigned  m_logLength;
195
196    /* fft data length */
197    unsigned  m_length;
198
199    unsigned  *m_bitreverseTbl;
200
201    /* L shaped butterfly index table */
202    unsigned  *m_butterflyIndexTbl;
203
204    /* sine and cosine tables */
205    trigonomydata *m_sin1Tbl;
206    trigonomydata *m_cos1Tbl;
207    trigonomydata *m_cos2Tbl;
208    trigonomydata *m_sin2Tbl;
209    trigonomydata *m_sin3Tbl;
210    trigonomydata *m_cos3Tbl;
211
212
213  }
214  srfft;
215
216  typedef struct
217  {
218    srfft   *m_srfft;
219
220    fftdata *real;
221    fftdata *imag;
222
223    asr_uint32_t  size;
224    asr_uint32_t  size2;
225
226  }
227  fft_info;
228
229
230  /****************************************************************
231   new_srfft: create srfft object
232
233   Parameters:
234    log2Length -- the power-of-two length of the complex FFT
235
236   Returns:
237    the created srfft object
238  ****************************************************************/
239  srfft* new_srfft(unsigned log2Length);
240
241
242  /****************************************************************
243   delete_srfft: release all the memory allocated to srfft object
244
245   Parameters:
246    pthis -- a pointer to a srfft object created by new_srfft
247
248   Returns:
249    none
250  ****************************************************************/
251  void delete_srfft(srfft* pthis);
252
253
254  /******************************************************************
255   do_real_fft conducts a forward FFT of a real data array using
256   the split-radix algorithm
257
258   Parameters:
259    pthis  -- a pointer to the srfft object
260    length -- length of data array, must be a power-of-2 length
261       and must be equal to 2**(log2Length + 1)
262    data   -- A real data array, overwritten by the positive frequency
263       half of its Fourier transform.
264       Data[0] and data[1] store the first and last
265       component of the the Fourier transform. After that, the
266       even elements store the real component, while the odd
267       elements store the imaginary component of the transform.
268   Return:
269    none
270  *********************************************************************/
271  void do_real_fft(srfft* pthis, unsigned length, fftdata* data);
272
273
274  /******************************************************************
275   do_real_ifft conducts an inverse FFT of a real data array's FFT using
276   the split-radix algorithm
277
278   Parameters:
279    pthis  -- a pointer to the srfft object
280    length -- length of data array, must be a power-of-2 length
281       and must be equal to 2**(log2Length + 1)
282    data   -- FFT of an real data array, overwritten by the real data array.
283       For input, data[0] and data[1] store the first and last
284       component of the the Fourier transform. After that,
285       the even elements store the real component of the transform,
286       while the odd elements store the imaginary component of
287       the transform.
288   Return:
289    none
290  *********************************************************************/
291  void do_real_ifft(srfft* pthis, unsigned length, fftdata* data);
292
293
294  /* >>>>>>>>>>>>>> Private Methods <<<<<<<<<<<<<<<<<<<<<<<<< */
295
296  /******************************************************************
297   allocate_butterfly_index uses an index scheme developed by
298   Sorensen, Heideman, and Burrus to determine which L shaped
299   butterfiles needs to be computed at each stage and
300   store these indexes into a table
301
302   Parameters:
303    pthis -- a pointer to the srfft object
304
305   Returns:
306    none
307  *********************************************************************/
308  void allocate_butterfly_tbl(srfft* pthis);
309
310  /******************************************************************
311   allocate_trigonomy_tbl allocates trigonomy function tables
312   for FFT computation
313
314   Parameters:
315    pthis -- a pointer to the srfft object
316
317   Returns:
318    none
319  *********************************************************************/
320  void allocate_trigonomy_tbl(srfft* pthis);
321
322  /******************************************************************
323   allocate_bitreverse_tbl() allocates bit reverse index table
324
325   Parameters:
326    pthis -- a pointer to the srfft object
327
328   Returns:
329    none
330  *********************************************************************/
331  void allocate_bitreverse_tbl(srfft* pthis);
332
333#ifdef __cplusplus
334}
335#endif
336
337#endif
338