1/*
2 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/**
17 * @file picofftsg.c
18 *
19 * FFT/DCT related data types, constants and functions in Pico
20 *
21 * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
22 * All rights reserved.
23 *
24 * History:
25 * - 2009-04-20 -- initial version
26 *
27*/
28
29#include "picoos.h"
30#include "picofftsg.h"
31#include "picodbg.h"
32
33#ifdef __cplusplus
34extern "C" {
35#endif
36
37/**
38 * @addtogroup picofft
39 * ---------------------------------------------------\n
40 * <b> Fast Fourier/Cosine/Sine Transform </b>\n
41 * Adapted from http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html (Copyright Takuya OOURA, 1996-2001)\n
42 * ---------------------------------------------------\n
43
44  Overall features
45  - dimension   :one
46  - data length :power of 2
47  - decimation  :frequency
48  - radix       :split-radix
49  - data        :inplace
50  - table       :not use
51
52  functions
53  - cdft: Complex Discrete Fourier Transform
54  - rdft: Real Discrete Fourier Transform
55  - ddct: Discrete Cosine Transform
56  - ddst: Discrete Sine Transform
57  - dfct: Cosine Transform of RDFT (Real Symmetric DFT)
58  - dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
59
60  function prototypes
61  - void cdft(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
62  - void rdft(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
63  - void ddct(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
64  - void ddst(picoos_int32, picoos_int32, PICOFFTSG_FFTTYPE *);
65  - void dfct(picoos_int32, PICOFFTSG_FFTTYPE *);
66  - void dfst(picoos_int32, PICOFFTSG_FFTTYPE *);
67
68  <b>Complex DFT (Discrete Fourier Transform)</b>
69
70  [definition]
71  - <case1>
72      - X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
73  - <case2>
74      - X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
75  - (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
76
77  [usage]
78  - <case1>
79      - cdft(2*n, 1, a);
80  - <case2>
81      - cdft(2*n, -1, a);
82
83  [parameters]
84  - 2*n            :data length (picoos_int32)
85  - n >= 1, n = power of 2
86  - a[0...2*n-1]   :input/output data (PICOFFTSG_FFTTYPE *)
87  - input data
88      - a[2*j] = Re(x[j]),
89      - a[2*j+1] = Im(x[j]), 0<=j<n
90  - output data
91      - a[2*k] = Re(X[k]),
92      - a[2*k+1] = Im(X[k]), 0<=k<n
93
94  [remark]
95  - Inverse of cdft(2*n, -1, a); is
96      -cdft(2*n, 1, a);
97          - for (j = 0; j <= 2 * n - 1; j++) {
98              - a[j] *= 1.0 / n;
99          - }
100
101
102  <b> Real DFT / Inverse of Real DFT </b>
103
104  [definition]
105  - <case1> RDFT
106      - R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
107      - I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
108  -    <case2> IRDFT (excluding scale)
109      - a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
110      - sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
111      - sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
112
113  [usage]
114  - <case1>
115      - rdft(n, 1, a);
116  -    <case2>
117      - rdft(n, -1, a);
118
119  [parameters]
120  - n              :data length (picoos_int32)
121  - n >= 2, n = power of 2
122  - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
123  - <case1>
124      - output data
125          - a[2*k] = R[k], 0<=k<n/2
126          - a[2*k+1] = I[k], 0<k<n/2
127          - a[1] = R[n/2]
128  - <case2>
129      - input data
130          - a[2*j] = R[j], 0<=j<n/2
131          - a[2*j+1] = I[j], 0<j<n/2
132          - a[1] = R[n/2]
133
134  [remark]
135  - Inverse of rdft(n, 1, a); is
136  - rdft(n, -1, a);
137      - for (j = 0; j <= n - 1; j++) {
138          - a[j] *= 2.0 / n;
139     -}
140
141
142  <b> DCT (Discrete Cosine Transform) / Inverse of DCT</b>
143
144  [definition]
145  - <case1> IDCT (excluding scale)
146      - C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
147  - <case2> DCT
148      - C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
149
150  [usage]
151  - <case1>
152    - ddct(n, 1, a);
153  - <case2>
154    - ddct(n, -1, a);
155
156  [parameters]
157  - n              :data length (picoos_int32)
158  - n >= 2, n = power of 2
159  - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
160  - output data
161      - a[k] = C[k], 0<=k<n
162
163  [remark]
164  - Inverse of ddct(n, -1, a); is
165  - a[0] *= 0.5;
166  - ddct(n, 1, a);
167  - for (j = 0; j <= n - 1; j++) {
168      - a[j] *= 2.0 / n;
169  - }
170
171  <b> DST (Discrete Sine Transform) / Inverse of DST</b>
172
173  [definition]
174  - <case1> IDST (excluding scale)
175      - S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
176  - <case2> DST
177      - S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
178
179  [usage]
180  - <case1>
181      - ddst(n, 1, a);
182  - <case2>
183      - ddst(n, -1, a);
184
185  [parameters]
186  - n              :data length (picoos_int32)
187  - n >= 2, n = power of 2
188  - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
189  - <case1>
190      - input data
191          - a[j] = A[j], 0<j<n
192          - a[0] = A[n]
193      - output data
194          - a[k] = S[k], 0<=k<n
195  - <case2>
196    - output data
197        - a[k] = S[k], 0<k<n
198        - a[0] = S[n]
199
200  [remark]
201  - Inverse of ddst(n, -1, a); is
202  - a[0] *= 0.5;
203  - ddst(n, 1, a);
204  - for (j = 0; j <= n - 1; j++) {
205      - a[j] *= 2.0 / n;
206  - }
207
208  <b> Cosine Transform of RDFT (Real Symmetric DFT)</b>
209
210  [definition]
211  - C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
212
213  [usage]
214  - dfct(n, a);
215
216  [parameters]
217  - n              :data length - 1 (picoos_int32)
218  - n >= 2, n = power of 2
219  - a[0...n]       :input/output data (PICOFFTSG_FFTTYPE *)
220
221  - output data
222    - a[k] = C[k], 0<=k<=n
223
224  [remark]
225  - Inverse of a[0] *= 0.5; a[n] *= 0.5; dfct(n, a); is
226  - a[0] *= 0.5;
227  - a[n] *= 0.5;
228  - dfct(n, a);
229  - for (j = 0; j <= n; j++) {
230      - a[j] *= 2.0 / n;
231  - }
232
233  <b> Sine Transform of RDFT (Real Anti-symmetric DFT)</b>
234
235  [definition]
236  - S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
237
238  [usage]
239  - dfst(n, a);
240
241  [parameters]
242  - n              :data length + 1 (picoos_int32)
243  - n >= 2, n = power of 2
244  - a[0...n-1]     :input/output data (PICOFFTSG_FFTTYPE *)
245  - output data
246      - a[k] = S[k], 0<k<n
247      - (a[0] is used for work area)
248
249  [remark]
250  - Inverse of dfst(n, a); is
251  - dfst(n, a);
252  - for (j = 1; j <= n - 1; j++) {
253    - a[j] *= 2.0 / n;
254  - }
255
256*/
257
258/* fixed point multiplier for weights */
259#define PICODSP_WGT_SHIFT  (0x20000000)  /* 2^29 */
260#define PICOFFTSG_WGT_SHIFT2 (0x10000000)  /* PICODSP_WGT_SHIFT/2 */
261#define PICOFFTSG_WGT_N_SHIFT (29)  /* 2^29 */
262/* fixed point known constants */
263#ifndef WR5000  /* cos(M_PI_2*0.5000) */
264#define WR5000      (PICOFFTSG_FFTTYPE)(0.707106781186547524400844362104849039284835937688*PICODSP_WGT_SHIFT)
265#ifndef WR2500  /* cos(M_PI_2*0.2500) */
266#define WR2500      (PICOFFTSG_FFTTYPE)(0.923879532511286756128183189396788286822416625863*PICODSP_WGT_SHIFT)
267#endif
268#ifndef WI2500  /* sin(M_PI_2*0.2500) */
269#define WI2500      (PICOFFTSG_FFTTYPE)(0.382683432365089771728459984030398866761344562485*PICODSP_WGT_SHIFT)
270#endif
271#ifndef WR1250  /* cos(M_PI_2*0.1250) */
272#define WR1250      (PICOFFTSG_FFTTYPE)(0.980785280403230449126182236134239036973933730893*PICODSP_WGT_SHIFT)
273#endif
274#ifndef WI1250  /* sin(M_PI_2*0.1250) */
275#define WI1250      (PICOFFTSG_FFTTYPE)(0.195090322016128267848284868477022240927691617751*PICODSP_WGT_SHIFT)
276#endif
277#ifndef WR3750  /* cos(M_PI_2*0.3750) */
278#define WR3750      (PICOFFTSG_FFTTYPE)(0.831469612302545237078788377617905756738560811987*PICODSP_WGT_SHIFT)
279#endif
280#ifndef WI3750  /* sin(M_PI_2*0.3750) */
281#define WI3750      (PICOFFTSG_FFTTYPE)(0.555570233019602224742830813948532874374937190754*PICODSP_WGT_SHIFT)
282#endif
283
284#else
285
286 /*#ifndef M_PI_2
287   #define M_PI_2      (PICOFFTSG_FFTTYPE)1.570796326794896619231321691639751442098584699687
288   #endif*/
289#ifndef WR5000  /* cos(M_PI_2*0.5000) */
290#define WR5000      (PICOFFTSG_FFTTYPE)0.707106781186547524400844362104849039284835937688
291#endif
292#ifndef WR2500  /* cos(M_PI_2*0.2500) */
293#define WR2500      (PICOFFTSG_FFTTYPE)0.923879532511286756128183189396788286822416625863
294#endif
295#ifndef WI2500  /* sin(M_PI_2*0.2500) */
296#define WI2500      (PICOFFTSG_FFTTYPE)0.382683432365089771728459984030398866761344562485
297#endif
298#ifndef WR1250  /* cos(M_PI_2*0.1250) */
299#define WR1250      (PICOFFTSG_FFTTYPE)0.980785280403230449126182236134239036973933730893
300#endif
301#ifndef WI1250  /* sin(M_PI_2*0.1250) */
302#define WI1250      (PICOFFTSG_FFTTYPE)0.195090322016128267848284868477022240927691617751
303#endif
304#ifndef WR3750  /* cos(M_PI_2*0.3750) */
305#define WR3750      (PICOFFTSG_FFTTYPE)0.831469612302545237078788377617905756738560811987
306#endif
307#ifndef WI3750  /* sin(M_PI_2*0.3750) */
308#define WI3750      (PICOFFTSG_FFTTYPE)0.555570233019602224742830813948532874374937190754
309#endif
310
311#endif
312
313#ifndef CDFT_LOOP_DIV  /* control of the CDFT's speed & tolerance */
314#define CDFT_LOOP_DIV 32
315#define CDFT_LOOP_DIV_4 128
316#endif
317
318#ifndef RDFT_LOOP_DIV  /* control of the RDFT's speed & tolerance */
319#define RDFT_LOOP_DIV 64
320#define RDFT_LOOP_DIV4 256
321#define RDFT_LOOP_DIV_4 256
322#endif
323
324#ifndef DCST_LOOP_DIV  /* control of the DCT,DST's speed & tolerance */
325#define DCST_LOOP_DIV 64
326#define DCST_LOOP_DIV2 128
327#endif
328
329
330#define POW1 (0x1)
331#define POW2 (0x2)
332#define POW3 (0x4)
333#define POW4 (0x8)
334#define POW5 (0x10)
335#define POW6 (0x20)
336#define POW7 (0x40)
337#define POW8 (0x80)
338#define POW9 (0x100)
339#define POW10 (0x200)
340#define POW11 (0x400)
341#define POW12 (0x800)
342#define POW13 (0x1000)
343#define POW14 (0x2000)
344#define POW15 (0x4000)
345#define POW16 (0x8000)
346#define POW17 (0x10000)
347#define POW18 (0x20000)
348#define POW19 (0x40000)
349#define POW20 (0x80000)
350#define POW21 (0x100000)
351#define POW22 (0x200000)
352#define POW23 (0x400000)
353#define POW24 (0x800000)
354#define POW25 (0x1000000)
355#define POW26 (0x2000000)
356#define POW27 (0x4000000)
357#define POW28 (0x8000000)
358#define POW29 (0x10000000)
359#define POW30 (0x20000000)
360#define POW31 (0x40000000)
361/**************
362 * useful macros
363 ************** */
364#define picofftsg_highestBitPos(x) (x==0?0:(x>=POW17?(x>=POW25?(x>=POW29?(x>=POW31?31:(x>=POW30?30:29)):(x>=POW27?(x>=POW28?28:27):(x>=POW26?26:25))):(x>=POW21?(x>=POW23?(x>=POW24?24:23):(x>=POW22?22:21)):(x>=POW19?(x>=POW20?20:19):(x>=POW18?18:17)))):(x>=POW9?(x>=POW13?(x>=POW15?(x>=POW16?16:15):(x>=POW14?14:13)):(x>=POW11?(x>=POW12?12:11):(x>=POW10?10:9))):(x>=POW5?(x>=POW7?(x>=POW8?8:7):(x>=POW6?6:5)):(x>=POW3?(x>=POW4?4:3):(x>=POW2?2:1))))))
365#define picofftsg_highestBit(x) (x==0?0:(x<0?(zz=-x,(zz>=POW17?(zz>=POW25?(zz>=POW29?(zz>=POW31?31:(zz>=POW30?30:29)):(zz>=POW27?(zz>=POW28?28:27):(zz>=POW26?26:25))):(zz>=POW21?(zz>=POW23?(zz>=POW24?24:23):(zz>=POW22?22:21)):(zz>=POW19?(zz>=POW20?20:19):(zz>=POW18?18:17)))):(zz>=POW9?(zz>=POW13?(zz>=POW15?(zz>=POW16?16:15):(zz>=POW14?14:13)):(zz>=POW11?(zz>=POW12?12:11):(zz>=POW10?10:9))):(zz>=POW5?(zz>=POW7?(zz>=POW8?8:7):(zz>=POW6?6:5)):(zz>=POW3?(zz>=POW4?4:3):(zz>=POW2?2:1)))))):(x>=POW17?(x>=POW25?(x>=POW29?(x>=POW31?31:(x>=POW30?30:29)):(x>=POW27?(x>=POW28?28:27):(x>=POW26?26:25))):(x>=POW21?(x>=POW23?(x>=POW24?24:23):(x>=POW22?22:21)):(x>=POW19?(x>=POW20?20:19):(x>=POW18?18:17)))):(x>=POW9?(x>=POW13?(x>=POW15?(x>=POW16?16:15):(x>=POW14?14:13)):(x>=POW11?(x>=POW12?12:11):(x>=POW10?10:9))):(x>=POW5?(x>=POW7?(x>=POW8?8:7):(x>=POW6?6:5)):(x>=POW3?(x>=POW4?4:3):(x>=POW2?2:1)))))))
366#define Mult_W_W picofftsg_mult_w_w
367
368
369/* ***********************************************************************************************/
370/* forward declarations */
371/* ***********************************************************************************************/
372static PICOFFTSG_FFTTYPE picofftsg_mult_w_w(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1);
373static PICOFFTSG_FFTTYPE picofftsg_mult_w_a(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1);
374
375
376static void cftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
377static void cftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
378static void rftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
379static void rftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
380
381static void cftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
382static void cftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
383static void rftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
384static void rftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
385static void dctsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
386static void dctsub4(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
387
388static void ddct(picoos_int32 n, picoos_int32 isgn, PICOFFTSG_FFTTYPE *a);
389static void bitrv1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
390
391static void bitrv2(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
392static void bitrv216(PICOFFTSG_FFTTYPE *a);
393static void bitrv208(PICOFFTSG_FFTTYPE *a);
394static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
395static void cftrec4(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
396static void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a);
397static void cftfx41(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
398static void cftf161(PICOFFTSG_FFTTYPE *a);
399static void cftf081(PICOFFTSG_FFTTYPE *a);
400static void cftf040(PICOFFTSG_FFTTYPE *a);
401static void cftx020(PICOFFTSG_FFTTYPE *a);
402
403void bitrv2conj(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
404void bitrv216neg(PICOFFTSG_FFTTYPE *a);
405void bitrv208neg(PICOFFTSG_FFTTYPE *a);
406void cftb1st(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
407void cftrec4(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
408void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a);
409void cftfx41(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
410void cftf161(PICOFFTSG_FFTTYPE *a);
411void cftf081(PICOFFTSG_FFTTYPE *a);
412void cftb040(PICOFFTSG_FFTTYPE *a);
413void cftx020(PICOFFTSG_FFTTYPE *a);
414
415static picoos_int32 cfttree(picoos_int32 n, picoos_int32 j, picoos_int32 k, PICOFFTSG_FFTTYPE *a);
416static void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a);
417static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
418
419static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
420static void cftmdl2(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
421
422static void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
423static void cftmdl2(picoos_int32 n, PICOFFTSG_FFTTYPE *a);
424static void cftf161(PICOFFTSG_FFTTYPE *a);
425static void cftf162(PICOFFTSG_FFTTYPE *a);
426static void cftf081(PICOFFTSG_FFTTYPE *a);
427static void cftf082(PICOFFTSG_FFTTYPE *a);
428
429static void cftf161(PICOFFTSG_FFTTYPE *a);
430static void cftf162(PICOFFTSG_FFTTYPE *a);
431static void cftf081(PICOFFTSG_FFTTYPE *a);
432static void cftf082(PICOFFTSG_FFTTYPE *a);
433
434/* ***********************************************************************************************/
435/* Exported functions */
436/* ***********************************************************************************************/
437void rdft(picoos_int32 n, picoos_int32 isgn, PICOFFTSG_FFTTYPE *a)
438{
439    PICOFFTSG_FFTTYPE xi;
440
441    if (isgn >= 0) {
442        if (n > 4) {
443            cftfsub(n, a);
444            rftfsub(n, a);
445        } else if (n == 4) {
446            cftfsub(n, a);
447        }
448        xi = a[0] - a[1];
449        a[0] += a[1];
450        a[1] = xi;
451    } else {
452        a[1] =  (a[0] - a[1]) / 2;
453        a[0] -= a[1];
454        if (n > 4) {
455            rftbsub(n, a);
456            cftbsub(n, a);
457        } else if (n == 4) {
458            cftbsub(n, a);
459        }
460    }
461
462}
463
464
465picoos_single norm_result(picoos_int32 m2, PICOFFTSG_FFTTYPE *tmpX, PICOFFTSG_FFTTYPE *norm_window)
466{
467    picoos_int16 nI;
468    PICOFFTSG_FFTTYPE a,b, E;
469
470    E = (picoos_int32)0;
471    for (nI=0; nI<m2; nI++) {
472        a = (norm_window[nI]>>18) * ((tmpX[nI]>0) ? tmpX[nI]>>11 : -((-tmpX[nI])>>11));
473        tmpX[nI] = a;
474        b = (a>=0?a:-a)  >> 18;
475        E += (b*b);
476    }
477
478    if (E>0) {
479        return (picoos_single)sqrt((double)E/16.0)/m2;
480    }
481    else {
482        return 0.0;
483    }
484}
485
486void ddct(picoos_int32 n, picoos_int32 isgn, PICOFFTSG_FFTTYPE *a)
487{
488    picoos_int32 j;
489    PICOFFTSG_FFTTYPE xr;
490
491    if (isgn < 0) {
492        xr = a[n - 1];
493        for (j = n - 2; j >= 2; j -= 2) {
494            a[j + 1] = a[j] - a[j - 1];
495            a[j] += a[j - 1];
496        }
497        a[1] = a[0] - xr;
498        a[0] += xr;
499        if (n > 4) {
500            rftbsub(n, a);
501            cftbsub(n, a);
502        } else if (n == 4) {
503            cftbsub(n, a);
504        }
505    }
506    if (n > 4) {
507        dctsub(n, a);
508    } else {
509        dctsub4(n, a);
510    }
511    if (isgn >= 0) {
512        if (n > 4) {
513            cftfsub(n, a);
514            rftfsub(n, a);
515        } else if (n == 4) {
516            cftfsub(n, a);
517        }
518
519
520        xr = a[0] - a[1];
521        a[0] += a[1];
522        for (j = 2; j < n; j += 2) {
523            a[j - 1] = a[j] - a[j + 1];
524            a[j] += a[j + 1];
525        }
526        a[n - 1] = xr;
527    }
528}
529
530void dfct_nmf(picoos_int32 n, picoos_int32 *a)
531{
532    picoos_int32 j, k, m, mh;
533    PICOFFTSG_FFTTYPE xr, xi, yr, yi, an;
534    PICOFFTSG_FFTTYPE *aj, *ak, *amj, *amk;
535
536    m = n >> 1;
537    for (j = 0; j < m; j++) {
538        k = n - j;
539        xr = a[j] + a[k];
540        a[j] -= a[k];
541        a[k] = xr;
542    }
543    an = a[n];
544    while (m >= 2) {
545        ddct(m, 1, a);
546        if (m > 2) {
547            bitrv1(m, a);
548        }
549        mh = m >> 1;
550        xi = a[m];
551        a[m] = a[0];
552        a[0] = an - xi;
553        an += xi;
554        k = m-1;
555        aj = a + 1; ak = a + k; amj = aj + m; amk = ak + m;
556        for (j = 1; j < mh; j++, aj++, ak--, amj++, amk--) {
557            xr = *amk;
558            xi = *amj;
559            yr = *aj;
560            yi = *ak;
561            *amj = yr;
562            *amk = yi;
563            *aj = xr - xi;
564            *ak = xr + xi;
565        }
566        xr = *aj;
567        *aj = *amj;
568        *amj = xr;
569
570        m = mh;
571    }
572
573    xi = a[1];
574    a[1] = a[0];
575    a[0] = an + xi;
576    a[n] = an - xi;
577    if (n > 2) {
578        bitrv1(n, a);
579    }
580
581}
582
583/* ***********************************************************************************************/
584/* internal routines */
585/* ***********************************************************************************************/
586/*
587  mult two numbers which are guaranteed to be in the range -1..1
588  shift right as little as possible before mult, and the rest after the mult
589  Also, shift bigger number more - lose less accuracy
590 */
591static PICOFFTSG_FFTTYPE picofftsg_mult_w_w(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1)
592{
593    PICOFFTSG_FFTTYPE x, y;
594    x = x1>=0 ? x1>>15 : -((-x1)>>15);
595    y = y1>=0 ? y1>>14 : -((-y1)>>14);
596    return  x * y;
597}
598
599static PICOFFTSG_FFTTYPE picofftsg_mult_w_a(PICOFFTSG_FFTTYPE x1, PICOFFTSG_FFTTYPE y1)
600{
601    PICOFFTSG_FFTTYPE x, y;
602
603
604    x = x1>=0 ? x1>>15 : -((-x1)>>15);
605    y = y1>=0 ? y1>>14 : -((-y1)>>14);
606    return x * y;
607}
608
609static void cftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
610{
611
612    if (n > 8) {
613        if (n > 32) {
614            cftmdl1(n, a);
615                if (n > 512) {
616                    cftrec4(n, a);
617                } else if (n > 128) {
618                    cftleaf(n, 1, a);
619                } else {
620                    cftfx41(n, a);
621                }
622            bitrv2(n, a);
623        } else if (n == 32) {
624            cftf161(a);
625            bitrv216(a);
626        } else {
627            cftf081(a);
628            bitrv208(a);
629        }
630    } else if (n == 8) {
631        cftf040(a);
632    } else if (n == 4) {
633        cftx020(a);
634    }
635}
636
637
638void cftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
639{
640    if (n > 8) {
641        if (n > 32) {
642            cftb1st(n, a);
643                if (n > 512) {
644                    cftrec4(n, a);
645                } else if (n > 128) {
646                    cftleaf(n, 1, a);
647                } else {
648                    cftfx41(n, a);
649                }
650            bitrv2conj(n, a);
651        } else if (n == 32) {
652            cftf161(a);
653            bitrv216neg(a);
654        } else {
655            cftf081(a);
656            bitrv208neg(a);
657        }
658    } else if (n == 8) {
659        cftb040(a);
660    } else if (n == 4) {
661        cftx020(a);
662    }
663}
664
665/* **************************************************************************************************/
666
667/* **************************************************************************************************/
668void bitrv2(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
669{
670    picoos_int32 j0, k0, j1, k1, l, m, i, j, k, nh, m2;
671    PICOFFTSG_FFTTYPE xr, xi, yr, yi;
672
673    m = 4;
674    for (l = n >> 2; l > 8; l >>= 2) {
675        m <<= 1;
676    }
677    m2 = m + m;
678    nh = n >> 1;
679    if (l == 8) {
680        j0 = 0;
681        for (k0 = 0; k0 < m; k0 += 4) {
682            k = k0;
683            for (j = j0; j < j0 + k0; j += 4) {
684                xr = a[j];
685                xi = a[j + 1];
686                yr = a[k];
687                yi = a[k + 1];
688                a[j] = yr;
689                a[j + 1] = yi;
690                a[k] = xr;
691                a[k + 1] = xi;
692                j1 = j + m;
693                k1 = k + m2;
694                xr = a[j1];
695                xi = a[j1 + 1];
696                yr = a[k1];
697                yi = a[k1 + 1];
698                a[j1] = yr;
699                a[j1 + 1] = yi;
700                a[k1] = xr;
701                a[k1 + 1] = xi;
702                j1 += m;
703                k1 -= m;
704                xr = a[j1];
705                xi = a[j1 + 1];
706                yr = a[k1];
707                yi = a[k1 + 1];
708                a[j1] = yr;
709                a[j1 + 1] = yi;
710                a[k1] = xr;
711                a[k1 + 1] = xi;
712                j1 += m;
713                k1 += m2;
714                xr = a[j1];
715                xi = a[j1 + 1];
716                yr = a[k1];
717                yi = a[k1 + 1];
718                a[j1] = yr;
719                a[j1 + 1] = yi;
720                a[k1] = xr;
721                a[k1 + 1] = xi;
722                j1 += nh;
723                k1 += 2;
724                xr = a[j1];
725                xi = a[j1 + 1];
726                yr = a[k1];
727                yi = a[k1 + 1];
728                a[j1] = yr;
729                a[j1 + 1] = yi;
730                a[k1] = xr;
731                a[k1 + 1] = xi;
732                j1 -= m;
733                k1 -= m2;
734                xr = a[j1];
735                xi = a[j1 + 1];
736                yr = a[k1];
737                yi = a[k1 + 1];
738                a[j1] = yr;
739                a[j1 + 1] = yi;
740                a[k1] = xr;
741                a[k1 + 1] = xi;
742                j1 -= m;
743                k1 += m;
744                xr = a[j1];
745                xi = a[j1 + 1];
746                yr = a[k1];
747                yi = a[k1 + 1];
748                a[j1] = yr;
749                a[j1 + 1] = yi;
750                a[k1] = xr;
751                a[k1 + 1] = xi;
752                j1 -= m;
753                k1 -= m2;
754                xr = a[j1];
755                xi = a[j1 + 1];
756                yr = a[k1];
757                yi = a[k1 + 1];
758                a[j1] = yr;
759                a[j1 + 1] = yi;
760                a[k1] = xr;
761                a[k1 + 1] = xi;
762                j1 += 2;
763                k1 += nh;
764                xr = a[j1];
765                xi = a[j1 + 1];
766                yr = a[k1];
767                yi = a[k1 + 1];
768                a[j1] = yr;
769                a[j1 + 1] = yi;
770                a[k1] = xr;
771                a[k1 + 1] = xi;
772                j1 += m;
773                k1 += m2;
774                xr = a[j1];
775                xi = a[j1 + 1];
776                yr = a[k1];
777                yi = a[k1 + 1];
778                a[j1] = yr;
779                a[j1 + 1] = yi;
780                a[k1] = xr;
781                a[k1 + 1] = xi;
782                j1 += m;
783                k1 -= m;
784                xr = a[j1];
785                xi = a[j1 + 1];
786                yr = a[k1];
787                yi = a[k1 + 1];
788                a[j1] = yr;
789                a[j1 + 1] = yi;
790                a[k1] = xr;
791                a[k1 + 1] = xi;
792                j1 += m;
793                k1 += m2;
794                xr = a[j1];
795                xi = a[j1 + 1];
796                yr = a[k1];
797                yi = a[k1 + 1];
798                a[j1] = yr;
799                a[j1 + 1] = yi;
800                a[k1] = xr;
801                a[k1 + 1] = xi;
802                j1 -= nh;
803                k1 -= 2;
804                xr = a[j1];
805                xi = a[j1 + 1];
806                yr = a[k1];
807                yi = a[k1 + 1];
808                a[j1] = yr;
809                a[j1 + 1] = yi;
810                a[k1] = xr;
811                a[k1 + 1] = xi;
812                j1 -= m;
813                k1 -= m2;
814                xr = a[j1];
815                xi = a[j1 + 1];
816                yr = a[k1];
817                yi = a[k1 + 1];
818                a[j1] = yr;
819                a[j1 + 1] = yi;
820                a[k1] = xr;
821                a[k1 + 1] = xi;
822                j1 -= m;
823                k1 += m;
824                xr = a[j1];
825                xi = a[j1 + 1];
826                yr = a[k1];
827                yi = a[k1 + 1];
828                a[j1] = yr;
829                a[j1 + 1] = yi;
830                a[k1] = xr;
831                a[k1 + 1] = xi;
832                j1 -= m;
833                k1 -= m2;
834                xr = a[j1];
835                xi = a[j1 + 1];
836                yr = a[k1];
837                yi = a[k1 + 1];
838                a[j1] = yr;
839                a[j1 + 1] = yi;
840                a[k1] = xr;
841                a[k1 + 1] = xi;
842                for (i = nh >> 1; i > (k ^= i); i >>= 1) {
843                    /* Avoid warning*/
844                };
845            }
846            k1 = j0 + k0;
847            j1 = k1 + 2;
848            k1 += nh;
849            xr = a[j1];
850            xi = a[j1 + 1];
851            yr = a[k1];
852            yi = a[k1 + 1];
853            a[j1] = yr;
854            a[j1 + 1] = yi;
855            a[k1] = xr;
856            a[k1 + 1] = xi;
857            j1 += m;
858            k1 += m2;
859            xr = a[j1];
860            xi = a[j1 + 1];
861            yr = a[k1];
862            yi = a[k1 + 1];
863            a[j1] = yr;
864            a[j1 + 1] = yi;
865            a[k1] = xr;
866            a[k1 + 1] = xi;
867            j1 += m;
868            k1 -= m;
869            xr = a[j1];
870            xi = a[j1 + 1];
871            yr = a[k1];
872            yi = a[k1 + 1];
873            a[j1] = yr;
874            a[j1 + 1] = yi;
875            a[k1] = xr;
876            a[k1 + 1] = xi;
877            j1 -= 2;
878            k1 -= nh;
879            xr = a[j1];
880            xi = a[j1 + 1];
881            yr = a[k1];
882            yi = a[k1 + 1];
883            a[j1] = yr;
884            a[j1 + 1] = yi;
885            a[k1] = xr;
886            a[k1 + 1] = xi;
887            j1 += nh + 2;
888            k1 += nh + 2;
889            xr = a[j1];
890            xi = a[j1 + 1];
891            yr = a[k1];
892            yi = a[k1 + 1];
893            a[j1] = yr;
894            a[j1 + 1] = yi;
895            a[k1] = xr;
896            a[k1 + 1] = xi;
897            j1 -= nh - m;
898            k1 += m2 - 2;
899            xr = a[j1];
900            xi = a[j1 + 1];
901            yr = a[k1];
902            yi = a[k1 + 1];
903            a[j1] = yr;
904            a[j1 + 1] = yi;
905            a[k1] = xr;
906            a[k1 + 1] = xi;
907            for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
908                /* Avoid warning */
909            }
910        }
911    } else {
912        j0 = 0;
913        for (k0 = 0; k0 < m; k0 += 4) {
914            k = k0;
915            for (j = j0; j < j0 + k0; j += 4) {
916                xr = a[j];
917                xi = a[j + 1];
918                yr = a[k];
919                yi = a[k + 1];
920                a[j] = yr;
921                a[j + 1] = yi;
922                a[k] = xr;
923                a[k + 1] = xi;
924                j1 = j + m;
925                k1 = k + m;
926                xr = a[j1];
927                xi = a[j1 + 1];
928                yr = a[k1];
929                yi = a[k1 + 1];
930                a[j1] = yr;
931                a[j1 + 1] = yi;
932                a[k1] = xr;
933                a[k1 + 1] = xi;
934                j1 += nh;
935                k1 += 2;
936                xr = a[j1];
937                xi = a[j1 + 1];
938                yr = a[k1];
939                yi = a[k1 + 1];
940                a[j1] = yr;
941                a[j1 + 1] = yi;
942                a[k1] = xr;
943                a[k1 + 1] = xi;
944                j1 -= m;
945                k1 -= m;
946                xr = a[j1];
947                xi = a[j1 + 1];
948                yr = a[k1];
949                yi = a[k1 + 1];
950                a[j1] = yr;
951                a[j1 + 1] = yi;
952                a[k1] = xr;
953                a[k1 + 1] = xi;
954                j1 += 2;
955                k1 += nh;
956                xr = a[j1];
957                xi = a[j1 + 1];
958                yr = a[k1];
959                yi = a[k1 + 1];
960                a[j1] = yr;
961                a[j1 + 1] = yi;
962                a[k1] = xr;
963                a[k1 + 1] = xi;
964                j1 += m;
965                k1 += m;
966                xr = a[j1];
967                xi = a[j1 + 1];
968                yr = a[k1];
969                yi = a[k1 + 1];
970                a[j1] = yr;
971                a[j1 + 1] = yi;
972                a[k1] = xr;
973                a[k1 + 1] = xi;
974                j1 -= nh;
975                k1 -= 2;
976                xr = a[j1];
977                xi = a[j1 + 1];
978                yr = a[k1];
979                yi = a[k1 + 1];
980                a[j1] = yr;
981                a[j1 + 1] = yi;
982                a[k1] = xr;
983                a[k1 + 1] = xi;
984                j1 -= m;
985                k1 -= m;
986                xr = a[j1];
987                xi = a[j1 + 1];
988                yr = a[k1];
989                yi = a[k1 + 1];
990                a[j1] = yr;
991                a[j1 + 1] = yi;
992                a[k1] = xr;
993                a[k1 + 1] = xi;
994                for (i = nh >> 1; i > (k ^= i); i >>= 1){
995                    /* Avoid warning */
996                }
997            }
998            k1 = j0 + k0;
999            j1 = k1 + 2;
1000            k1 += nh;
1001            xr = a[j1];
1002            xi = a[j1 + 1];
1003            yr = a[k1];
1004            yi = a[k1 + 1];
1005            a[j1] = yr;
1006            a[j1 + 1] = yi;
1007            a[k1] = xr;
1008            a[k1 + 1] = xi;
1009            j1 += m;
1010            k1 += m;
1011            xr = a[j1];
1012            xi = a[j1 + 1];
1013            yr = a[k1];
1014            yi = a[k1 + 1];
1015            a[j1] = yr;
1016            a[j1 + 1] = yi;
1017            a[k1] = xr;
1018            a[k1 + 1] = xi;
1019            for (i = nh >> 1; i > (j0 ^= i); i >>= 1){
1020                /* Avoid  warning */
1021            }
1022        }
1023    }
1024}
1025
1026
1027void bitrv2conj(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
1028{
1029    picoos_int32 j0, k0, j1, k1, l, m, i, j, k, nh, m2;
1030    PICOFFTSG_FFTTYPE xr, xi, yr, yi;
1031
1032
1033    m = 4;
1034    for (l = n >> 2; l > 8; l >>= 2) {
1035        m <<= 1;
1036    }
1037    m2 = m + m;
1038    nh = n >> 1;
1039    if (l == 8) {
1040        j0 = 0;
1041        for (k0 = 0; k0 < m; k0 += 4) {
1042            k = k0;
1043            for (j = j0; j < j0 + k0; j += 4) {
1044                xr = a[j];
1045                xi = -a[j + 1];
1046                yr = a[k];
1047                yi = -a[k + 1];
1048                a[j] = yr;
1049                a[j + 1] = yi;
1050                a[k] = xr;
1051                a[k + 1] = xi;
1052                j1 = j + m;
1053                k1 = k + m2;
1054                xr = a[j1];
1055                xi = -a[j1 + 1];
1056                yr = a[k1];
1057                yi = -a[k1 + 1];
1058                a[j1] = yr;
1059                a[j1 + 1] = yi;
1060                a[k1] = xr;
1061                a[k1 + 1] = xi;
1062                j1 += m;
1063                k1 -= m;
1064                xr = a[j1];
1065                xi = -a[j1 + 1];
1066                yr = a[k1];
1067                yi = -a[k1 + 1];
1068                a[j1] = yr;
1069                a[j1 + 1] = yi;
1070                a[k1] = xr;
1071                a[k1 + 1] = xi;
1072                j1 += m;
1073                k1 += m2;
1074                xr = a[j1];
1075                xi = -a[j1 + 1];
1076                yr = a[k1];
1077                yi = -a[k1 + 1];
1078                a[j1] = yr;
1079                a[j1 + 1] = yi;
1080                a[k1] = xr;
1081                a[k1 + 1] = xi;
1082                j1 += nh;
1083                k1 += 2;
1084                xr = a[j1];
1085                xi = -a[j1 + 1];
1086                yr = a[k1];
1087                yi = -a[k1 + 1];
1088                a[j1] = yr;
1089                a[j1 + 1] = yi;
1090                a[k1] = xr;
1091                a[k1 + 1] = xi;
1092                j1 -= m;
1093                k1 -= m2;
1094                xr = a[j1];
1095                xi = -a[j1 + 1];
1096                yr = a[k1];
1097                yi = -a[k1 + 1];
1098                a[j1] = yr;
1099                a[j1 + 1] = yi;
1100                a[k1] = xr;
1101                a[k1 + 1] = xi;
1102                j1 -= m;
1103                k1 += m;
1104                xr = a[j1];
1105                xi = -a[j1 + 1];
1106                yr = a[k1];
1107                yi = -a[k1 + 1];
1108                a[j1] = yr;
1109                a[j1 + 1] = yi;
1110                a[k1] = xr;
1111                a[k1 + 1] = xi;
1112                j1 -= m;
1113                k1 -= m2;
1114                xr = a[j1];
1115                xi = -a[j1 + 1];
1116                yr = a[k1];
1117                yi = -a[k1 + 1];
1118                a[j1] = yr;
1119                a[j1 + 1] = yi;
1120                a[k1] = xr;
1121                a[k1 + 1] = xi;
1122                j1 += 2;
1123                k1 += nh;
1124                xr = a[j1];
1125                xi = -a[j1 + 1];
1126                yr = a[k1];
1127                yi = -a[k1 + 1];
1128                a[j1] = yr;
1129                a[j1 + 1] = yi;
1130                a[k1] = xr;
1131                a[k1 + 1] = xi;
1132                j1 += m;
1133                k1 += m2;
1134                xr = a[j1];
1135                xi = -a[j1 + 1];
1136                yr = a[k1];
1137                yi = -a[k1 + 1];
1138                a[j1] = yr;
1139                a[j1 + 1] = yi;
1140                a[k1] = xr;
1141                a[k1 + 1] = xi;
1142                j1 += m;
1143                k1 -= m;
1144                xr = a[j1];
1145                xi = -a[j1 + 1];
1146                yr = a[k1];
1147                yi = -a[k1 + 1];
1148                a[j1] = yr;
1149                a[j1 + 1] = yi;
1150                a[k1] = xr;
1151                a[k1 + 1] = xi;
1152                j1 += m;
1153                k1 += m2;
1154                xr = a[j1];
1155                xi = -a[j1 + 1];
1156                yr = a[k1];
1157                yi = -a[k1 + 1];
1158                a[j1] = yr;
1159                a[j1 + 1] = yi;
1160                a[k1] = xr;
1161                a[k1 + 1] = xi;
1162                j1 -= nh;
1163                k1 -= 2;
1164                xr = a[j1];
1165                xi = -a[j1 + 1];
1166                yr = a[k1];
1167                yi = -a[k1 + 1];
1168                a[j1] = yr;
1169                a[j1 + 1] = yi;
1170                a[k1] = xr;
1171                a[k1 + 1] = xi;
1172                j1 -= m;
1173                k1 -= m2;
1174                xr = a[j1];
1175                xi = -a[j1 + 1];
1176                yr = a[k1];
1177                yi = -a[k1 + 1];
1178                a[j1] = yr;
1179                a[j1 + 1] = yi;
1180                a[k1] = xr;
1181                a[k1 + 1] = xi;
1182                j1 -= m;
1183                k1 += m;
1184                xr = a[j1];
1185                xi = -a[j1 + 1];
1186                yr = a[k1];
1187                yi = -a[k1 + 1];
1188                a[j1] = yr;
1189                a[j1 + 1] = yi;
1190                a[k1] = xr;
1191                a[k1 + 1] = xi;
1192                j1 -= m;
1193                k1 -= m2;
1194                xr = a[j1];
1195                xi = -a[j1 + 1];
1196                yr = a[k1];
1197                yi = -a[k1 + 1];
1198                a[j1] = yr;
1199                a[j1 + 1] = yi;
1200                a[k1] = xr;
1201                a[k1 + 1] = xi;
1202                for (i = nh >> 1; i > (k ^= i); i >>= 1) {
1203                    /* Avoid warning */
1204                }
1205            }
1206            k1 = j0 + k0;
1207            j1 = k1 + 2;
1208            k1 += nh;
1209            a[j1 - 1] = -a[j1 - 1];
1210            xr = a[j1];
1211            xi = -a[j1 + 1];
1212            yr = a[k1];
1213            yi = -a[k1 + 1];
1214            a[j1] = yr;
1215            a[j1 + 1] = yi;
1216            a[k1] = xr;
1217            a[k1 + 1] = xi;
1218            a[k1 + 3] = -a[k1 + 3];
1219            j1 += m;
1220            k1 += m2;
1221            xr = a[j1];
1222            xi = -a[j1 + 1];
1223            yr = a[k1];
1224            yi = -a[k1 + 1];
1225            a[j1] = yr;
1226            a[j1 + 1] = yi;
1227            a[k1] = xr;
1228            a[k1 + 1] = xi;
1229            j1 += m;
1230            k1 -= m;
1231            xr = a[j1];
1232            xi = -a[j1 + 1];
1233            yr = a[k1];
1234            yi = -a[k1 + 1];
1235            a[j1] = yr;
1236            a[j1 + 1] = yi;
1237            a[k1] = xr;
1238            a[k1 + 1] = xi;
1239            j1 -= 2;
1240            k1 -= nh;
1241            xr = a[j1];
1242            xi = -a[j1 + 1];
1243            yr = a[k1];
1244            yi = -a[k1 + 1];
1245            a[j1] = yr;
1246            a[j1 + 1] = yi;
1247            a[k1] = xr;
1248            a[k1 + 1] = xi;
1249            j1 += nh + 2;
1250            k1 += nh + 2;
1251            xr = a[j1];
1252            xi = -a[j1 + 1];
1253            yr = a[k1];
1254            yi = -a[k1 + 1];
1255            a[j1] = yr;
1256            a[j1 + 1] = yi;
1257            a[k1] = xr;
1258            a[k1 + 1] = xi;
1259            j1 -= nh - m;
1260            k1 += m2 - 2;
1261            a[j1 - 1] = -a[j1 - 1];
1262            xr = a[j1];
1263            xi = -a[j1 + 1];
1264            yr = a[k1];
1265            yi = -a[k1 + 1];
1266            a[j1] = yr;
1267            a[j1 + 1] = yi;
1268            a[k1] = xr;
1269            a[k1 + 1] = xi;
1270            a[k1 + 3] = -a[k1 + 3];
1271            for (i = nh >> 1; i > (j0 ^= i); i >>= 1) { /* Avoid warning*/
1272
1273            }
1274        }
1275    } else {
1276        j0 = 0;
1277        for (k0 = 0; k0 < m; k0 += 4) {
1278            k = k0;
1279            for (j = j0; j < j0 + k0; j += 4) {
1280                xr = a[j];
1281                xi = -a[j + 1];
1282                yr = a[k];
1283                yi = -a[k + 1];
1284                a[j] = yr;
1285                a[j + 1] = yi;
1286                a[k] = xr;
1287                a[k + 1] = xi;
1288                j1 = j + m;
1289                k1 = k + m;
1290                xr = a[j1];
1291                xi = -a[j1 + 1];
1292                yr = a[k1];
1293                yi = -a[k1 + 1];
1294                a[j1] = yr;
1295                a[j1 + 1] = yi;
1296                a[k1] = xr;
1297                a[k1 + 1] = xi;
1298                j1 += nh;
1299                k1 += 2;
1300                xr = a[j1];
1301                xi = -a[j1 + 1];
1302                yr = a[k1];
1303                yi = -a[k1 + 1];
1304                a[j1] = yr;
1305                a[j1 + 1] = yi;
1306                a[k1] = xr;
1307                a[k1 + 1] = xi;
1308                j1 -= m;
1309                k1 -= m;
1310                xr = a[j1];
1311                xi = -a[j1 + 1];
1312                yr = a[k1];
1313                yi = -a[k1 + 1];
1314                a[j1] = yr;
1315                a[j1 + 1] = yi;
1316                a[k1] = xr;
1317                a[k1 + 1] = xi;
1318                j1 += 2;
1319                k1 += nh;
1320                xr = a[j1];
1321                xi = -a[j1 + 1];
1322                yr = a[k1];
1323                yi = -a[k1 + 1];
1324                a[j1] = yr;
1325                a[j1 + 1] = yi;
1326                a[k1] = xr;
1327                a[k1 + 1] = xi;
1328                j1 += m;
1329                k1 += m;
1330                xr = a[j1];
1331                xi = -a[j1 + 1];
1332                yr = a[k1];
1333                yi = -a[k1 + 1];
1334                a[j1] = yr;
1335                a[j1 + 1] = yi;
1336                a[k1] = xr;
1337                a[k1 + 1] = xi;
1338                j1 -= nh;
1339                k1 -= 2;
1340                xr = a[j1];
1341                xi = -a[j1 + 1];
1342                yr = a[k1];
1343                yi = -a[k1 + 1];
1344                a[j1] = yr;
1345                a[j1 + 1] = yi;
1346                a[k1] = xr;
1347                a[k1 + 1] = xi;
1348                j1 -= m;
1349                k1 -= m;
1350                xr = a[j1];
1351                xi = -a[j1 + 1];
1352                yr = a[k1];
1353                yi = -a[k1 + 1];
1354                a[j1] = yr;
1355                a[j1 + 1] = yi;
1356                a[k1] = xr;
1357                a[k1 + 1] = xi;
1358                for (i = nh >> 1; i > (k ^= i); i >>= 1) {
1359                    /* Avoid warning*/
1360                }
1361            }
1362            k1 = j0 + k0;
1363            j1 = k1 + 2;
1364            k1 += nh;
1365            a[j1 - 1] = -a[j1 - 1];
1366            xr = a[j1];
1367            xi = -a[j1 + 1];
1368            yr = a[k1];
1369            yi = -a[k1 + 1];
1370            a[j1] = yr;
1371            a[j1 + 1] = yi;
1372            a[k1] = xr;
1373            a[k1 + 1] = xi;
1374            a[k1 + 3] = -a[k1 + 3];
1375            j1 += m;
1376            k1 += m;
1377            a[j1 - 1] = -a[j1 - 1];
1378            xr = a[j1];
1379            xi = -a[j1 + 1];
1380            yr = a[k1];
1381            yi = -a[k1 + 1];
1382            a[j1] = yr;
1383            a[j1 + 1] = yi;
1384            a[k1] = xr;
1385            a[k1 + 1] = xi;
1386            a[k1 + 3] = -a[k1 + 3];
1387            for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
1388                /* Avoid warning*/
1389            }
1390        }
1391    }
1392}
1393
1394
1395void bitrv216(PICOFFTSG_FFTTYPE *a)
1396{
1397    PICOFFTSG_FFTTYPE x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1398        x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
1399        x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1400
1401    x1r = a[2];
1402    x1i = a[3];
1403    x2r = a[4];
1404    x2i = a[5];
1405    x3r = a[6];
1406    x3i = a[7];
1407    x4r = a[8];
1408    x4i = a[9];
1409    x5r = a[10];
1410    x5i = a[11];
1411    x7r = a[14];
1412    x7i = a[15];
1413    x8r = a[16];
1414    x8i = a[17];
1415    x10r = a[20];
1416    x10i = a[21];
1417    x11r = a[22];
1418    x11i = a[23];
1419    x12r = a[24];
1420    x12i = a[25];
1421    x13r = a[26];
1422    x13i = a[27];
1423    x14r = a[28];
1424    x14i = a[29];
1425    a[2] = x8r;
1426    a[3] = x8i;
1427    a[4] = x4r;
1428    a[5] = x4i;
1429    a[6] = x12r;
1430    a[7] = x12i;
1431    a[8] = x2r;
1432    a[9] = x2i;
1433    a[10] = x10r;
1434    a[11] = x10i;
1435    a[14] = x14r;
1436    a[15] = x14i;
1437    a[16] = x1r;
1438    a[17] = x1i;
1439    a[20] = x5r;
1440    a[21] = x5i;
1441    a[22] = x13r;
1442    a[23] = x13i;
1443    a[24] = x3r;
1444    a[25] = x3i;
1445    a[26] = x11r;
1446    a[27] = x11i;
1447    a[28] = x7r;
1448    a[29] = x7i;
1449}
1450
1451
1452void bitrv216neg(PICOFFTSG_FFTTYPE *a)
1453{
1454    PICOFFTSG_FFTTYPE x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1455        x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
1456        x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
1457        x13r, x13i, x14r, x14i, x15r, x15i;
1458
1459    x1r = a[2];
1460    x1i = a[3];
1461    x2r = a[4];
1462    x2i = a[5];
1463    x3r = a[6];
1464    x3i = a[7];
1465    x4r = a[8];
1466    x4i = a[9];
1467    x5r = a[10];
1468    x5i = a[11];
1469    x6r = a[12];
1470    x6i = a[13];
1471    x7r = a[14];
1472    x7i = a[15];
1473    x8r = a[16];
1474    x8i = a[17];
1475    x9r = a[18];
1476    x9i = a[19];
1477    x10r = a[20];
1478    x10i = a[21];
1479    x11r = a[22];
1480    x11i = a[23];
1481    x12r = a[24];
1482    x12i = a[25];
1483    x13r = a[26];
1484    x13i = a[27];
1485    x14r = a[28];
1486    x14i = a[29];
1487    x15r = a[30];
1488    x15i = a[31];
1489    a[2] = x15r;
1490    a[3] = x15i;
1491    a[4] = x7r;
1492    a[5] = x7i;
1493    a[6] = x11r;
1494    a[7] = x11i;
1495    a[8] = x3r;
1496    a[9] = x3i;
1497    a[10] = x13r;
1498    a[11] = x13i;
1499    a[12] = x5r;
1500    a[13] = x5i;
1501    a[14] = x9r;
1502    a[15] = x9i;
1503    a[16] = x1r;
1504    a[17] = x1i;
1505    a[18] = x14r;
1506    a[19] = x14i;
1507    a[20] = x6r;
1508    a[21] = x6i;
1509    a[22] = x10r;
1510    a[23] = x10i;
1511    a[24] = x2r;
1512    a[25] = x2i;
1513    a[26] = x12r;
1514    a[27] = x12i;
1515    a[28] = x4r;
1516    a[29] = x4i;
1517    a[30] = x8r;
1518    a[31] = x8i;
1519}
1520
1521
1522void bitrv208(PICOFFTSG_FFTTYPE *a)
1523{
1524    PICOFFTSG_FFTTYPE x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1525
1526    x1r = a[2];
1527    x1i = a[3];
1528    x3r = a[6];
1529    x3i = a[7];
1530    x4r = a[8];
1531    x4i = a[9];
1532    x6r = a[12];
1533    x6i = a[13];
1534    a[2] = x4r;
1535    a[3] = x4i;
1536    a[6] = x6r;
1537    a[7] = x6i;
1538    a[8] = x1r;
1539    a[9] = x1i;
1540    a[12] = x3r;
1541    a[13] = x3i;
1542}
1543
1544
1545void bitrv208neg(PICOFFTSG_FFTTYPE *a)
1546{
1547    PICOFFTSG_FFTTYPE x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1548        x5r, x5i, x6r, x6i, x7r, x7i;
1549
1550    x1r = a[2];
1551    x1i = a[3];
1552    x2r = a[4];
1553    x2i = a[5];
1554    x3r = a[6];
1555    x3i = a[7];
1556    x4r = a[8];
1557    x4i = a[9];
1558    x5r = a[10];
1559    x5i = a[11];
1560    x6r = a[12];
1561    x6i = a[13];
1562    x7r = a[14];
1563    x7i = a[15];
1564    a[2] = x7r;
1565    a[3] = x7i;
1566    a[4] = x3r;
1567    a[5] = x3i;
1568    a[6] = x5r;
1569    a[7] = x5i;
1570    a[8] = x1r;
1571    a[9] = x1i;
1572    a[10] = x6r;
1573    a[11] = x6i;
1574    a[12] = x2r;
1575    a[13] = x2i;
1576    a[14] = x4r;
1577    a[15] = x4i;
1578}
1579
1580
1581void bitrv1(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
1582{
1583    picoos_int32 j0, k0, j1, k1, l, m, i, j, k, nh;
1584    PICOFFTSG_FFTTYPE x;
1585    nh = n >> 1;
1586    x = a[1];
1587    a[1] = a[nh];
1588    a[nh] = x;
1589    m = 2;
1590    for (l = n >> 2; l > 2; l >>= 2) {
1591        m <<= 1;
1592    }
1593    if (l == 2) {
1594        j1 = m + 1;
1595        k1 = m + nh;
1596        x = a[j1];
1597        a[j1] = a[k1];
1598        a[k1] = x;
1599        j0 = 0;
1600        for (k0 = 2; k0 < m; k0 += 2) {
1601            for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
1602                /* Avoid warning*/
1603            }
1604            k = k0;
1605            for (j = j0; j < j0 + k0; j += 2) {
1606                x = a[j];
1607                a[j] = a[k];
1608                a[k] = x;
1609                j1 = j + m;
1610                k1 = k + m;
1611                x = a[j1];
1612                a[j1] = a[k1];
1613                a[k1] = x;
1614                j1 += nh;
1615                k1++;
1616                x = a[j1];
1617                a[j1] = a[k1];
1618                a[k1] = x;
1619                j1 -= m;
1620                k1 -= m;
1621                x = a[j1];
1622                a[j1] = a[k1];
1623                a[k1] = x;
1624                j1++;
1625                k1 += nh;
1626                x = a[j1];
1627                a[j1] = a[k1];
1628                a[k1] = x;
1629                j1 += m;
1630                k1 += m;
1631                x = a[j1];
1632                a[j1] = a[k1];
1633                a[k1] = x;
1634                j1 -= nh;
1635                k1--;
1636                x = a[j1];
1637                a[j1] = a[k1];
1638                a[k1] = x;
1639                j1 -= m;
1640                k1 -= m;
1641                x = a[j1];
1642                a[j1] = a[k1];
1643                a[k1] = x;
1644                for (i = nh >> 1; i > (k ^= i); i >>= 1) {
1645                    /* Avoid warning*/
1646                }
1647            }
1648            k1 = j0 + k0;
1649            j1 = k1 + 1;
1650            k1 += nh;
1651            x = a[j1];
1652            a[j1] = a[k1];
1653            a[k1] = x;
1654            j1 += m;
1655            k1 += m;
1656            x = a[j1];
1657            a[j1] = a[k1];
1658            a[k1] = x;
1659        }
1660    } else {
1661        j0 = 0;
1662        for (k0 = 2; k0 < m; k0 += 2) {
1663            for (i = nh >> 1; i > (j0 ^= i); i >>= 1) {
1664                /* Avoid warning*/
1665            }
1666            k = k0;
1667            for (j = j0; j < j0 + k0; j += 2) {
1668                x = a[j];
1669                a[j] = a[k];
1670                a[k] = x;
1671                j1 = j + nh;
1672                k1 = k + 1;
1673                x = a[j1];
1674                a[j1] = a[k1];
1675                a[k1] = x;
1676                j1++;
1677                k1 += nh;
1678                x = a[j1];
1679                a[j1] = a[k1];
1680                a[k1] = x;
1681                j1 -= nh;
1682                k1--;
1683                x = a[j1];
1684                a[j1] = a[k1];
1685                a[k1] = x;
1686                for (i = nh >> 1; i > (k ^= i); i >>= 1) {
1687                    /* Avoid warning*/
1688                }
1689            }
1690            k1 = j0 + k0;
1691            j1 = k1 + 1;
1692            k1 += nh;
1693            x = a[j1];
1694            a[j1] = a[k1];
1695            a[k1] = x;
1696        }
1697    }
1698}
1699
1700
1701/* **************************************************************************************************/
1702
1703/* **************************************************************************************************/
1704
1705void cftb1st(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
1706{
1707    picoos_int32 i, i0, j, j0, j1, j2, j3, m, mh;
1708    PICOFFTSG_FFTTYPE wk1r, wk1i, wk3r, wk3i,
1709        wd1r, wd1i, wd3r, wd3i, ss1, ss3;
1710    PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1711
1712    mh = n >> 3;
1713    m = 2 * mh;
1714    j1 = m;
1715    j2 = j1 + m;
1716    j3 = j2 + m;
1717    x0r = a[0] + a[j2];
1718    x0i = -a[1] - a[j2 + 1];
1719    x1r = a[0] - a[j2];
1720    x1i = -a[1] + a[j2 + 1];
1721    x2r = a[j1] + a[j3];
1722    x2i = a[j1 + 1] + a[j3 + 1];
1723    x3r = a[j1] - a[j3];
1724    x3i = a[j1 + 1] - a[j3 + 1];
1725    a[0] = x0r + x2r;
1726    a[1] = x0i - x2i;
1727    a[j1] = x0r - x2r;
1728    a[j1 + 1] = x0i + x2i;
1729    a[j2] = x1r + x3i;
1730    a[j2 + 1] = x1i + x3r;
1731    a[j3] = x1r - x3i;
1732    a[j3 + 1] = x1i - x3r;
1733    wd1r = PICODSP_WGT_SHIFT;
1734    wd1i = 0;
1735    wd3r = PICODSP_WGT_SHIFT;
1736    wd3i = 0;
1737
1738    wk1r  = (PICOFFTSG_FFTTYPE) (0.998795449734  *PICODSP_WGT_SHIFT);
1739    wk1i  = (PICOFFTSG_FFTTYPE) (0.049067676067  *PICODSP_WGT_SHIFT);
1740    ss1   = (PICOFFTSG_FFTTYPE) (0.098135352135  *PICODSP_WGT_SHIFT);
1741    wk3i  = (PICOFFTSG_FFTTYPE) (-0.146730467677 *PICODSP_WGT_SHIFT);
1742    wk3r  = (PICOFFTSG_FFTTYPE) (0.989176511765  *PICODSP_WGT_SHIFT);
1743    ss3   = (PICOFFTSG_FFTTYPE) (-0.293460935354 *PICODSP_WGT_SHIFT);
1744
1745    i = 0;
1746    for (;;) {
1747        i0 = i + CDFT_LOOP_DIV_4;
1748        if (i0 > mh - 4) {
1749            i0 = mh - 4;
1750        }
1751        for (j = i + 2; j < i0; j += 4) {
1752
1753            wd1r -= Mult_W_W(ss1, wk1i);
1754            wd1i += Mult_W_W(ss1, wk1r);
1755            wd3r -= Mult_W_W(ss3, wk3i);
1756            wd3i += Mult_W_W(ss3, wk3r);
1757
1758            j1 = j + m;
1759            j2 = j1 + m;
1760            j3 = j2 + m;
1761            x0r = a[j] + a[j2];
1762            x0i = -a[j + 1] - a[j2 + 1];
1763            x1r = a[j] - a[j2];
1764            x1i = -a[j + 1] + a[j2 + 1];
1765            x2r = a[j1] + a[j3];
1766            x2i = a[j1 + 1] + a[j3 + 1];
1767            x3r = a[j1] - a[j3];
1768            x3i = a[j1 + 1] - a[j3 + 1];
1769            a[j] = x0r + x2r;
1770            a[j + 1] = x0i - x2i;
1771            a[j1] = x0r - x2r;
1772            a[j1 + 1] = x0i + x2i;
1773            x0r = x1r + x3i;
1774            x0i = x1i + x3r;
1775            a[j2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
1776            a[j2 + 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
1777            x0r = x1r - x3i;
1778            x0i = x1i - x3r;
1779            a[j3] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
1780            a[j3 + 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
1781            x0r = a[j + 2] + a[j2 + 2];
1782            x0i = -a[j + 3] - a[j2 + 3];
1783            x1r = a[j + 2] - a[j2 + 2];
1784            x1i = -a[j + 3] + a[j2 + 3];
1785            x2r = a[j1 + 2] + a[j3 + 2];
1786            x2i = a[j1 + 3] + a[j3 + 3];
1787            x3r = a[j1 + 2] - a[j3 + 2];
1788            x3i = a[j1 + 3] - a[j3 + 3];
1789            a[j + 2] = x0r + x2r;
1790            a[j + 3] = x0i - x2i;
1791            a[j1 + 2] = x0r - x2r;
1792            a[j1 + 3] = x0i + x2i;
1793            x0r = x1r + x3i;
1794            x0i = x1i + x3r;
1795            a[j2 + 2] = Mult_W_W(wd1r, x0r) - Mult_W_W(wd1i, x0i);
1796            a[j2 + 3] = Mult_W_W(wd1r, x0i) + Mult_W_W(wd1i, x0r);
1797            x0r = x1r - x3i;
1798            x0i = x1i - x3r;
1799            a[j3 + 2] = Mult_W_W(wd3r, x0r) + Mult_W_W(wd3i, x0i);
1800            a[j3 + 3] = Mult_W_W(wd3r, x0i) - Mult_W_W(wd3i, x0r);
1801            j0 = m - j;
1802            j1 = j0 + m;
1803            j2 = j1 + m;
1804            j3 = j2 + m;
1805            x0r = a[j0] + a[j2];
1806            x0i = -a[j0 + 1] - a[j2 + 1];
1807            x1r = a[j0] - a[j2];
1808            x1i = -a[j0 + 1] + a[j2 + 1];
1809            x2r = a[j1] + a[j3];
1810            x2i = a[j1 + 1] + a[j3 + 1];
1811            x3r = a[j1] - a[j3];
1812            x3i = a[j1 + 1] - a[j3 + 1];
1813            a[j0] = x0r + x2r;
1814            a[j0 + 1] = x0i - x2i;
1815            a[j1] = x0r - x2r;
1816            a[j1 + 1] = x0i + x2i;
1817            x0r = x1r + x3i;
1818            x0i = x1i + x3r;
1819            a[j2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
1820            a[j2 + 1] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
1821            x0r = x1r - x3i;
1822            x0i = x1i - x3r;
1823            a[j3] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
1824            a[j3 + 1] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
1825            x0r = a[j0 - 2] + a[j2 - 2];
1826            x0i = -a[j0 - 1] - a[j2 - 1];
1827            x1r = a[j0 - 2] - a[j2 - 2];
1828            x1i = -a[j0 - 1] + a[j2 - 1];
1829            x2r = a[j1 - 2] + a[j3 - 2];
1830            x2i = a[j1 - 1] + a[j3 - 1];
1831            x3r = a[j1 - 2] - a[j3 - 2];
1832            x3i = a[j1 - 1] - a[j3 - 1];
1833            a[j0 - 2] = x0r + x2r;
1834            a[j0 - 1] = x0i - x2i;
1835            a[j1 - 2] = x0r - x2r;
1836            a[j1 - 1] = x0i + x2i;
1837            x0r = x1r + x3i;
1838            x0i = x1i + x3r;
1839            a[j2 - 2] = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
1840            a[j2 - 1] = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
1841            x0r = x1r - x3i;
1842            x0i = x1i - x3r;
1843            a[j3 - 2] = Mult_W_W(wd3i, x0r) + Mult_W_W(wd3r, x0i);
1844            a[j3 - 1] = Mult_W_W(wd3i, x0i) - Mult_W_W(wd3r, x0r);
1845            wk1r -= Mult_W_W(ss1, wd1i);
1846            wk1i += Mult_W_W(ss1, wd1r);
1847            wk3r -= Mult_W_W(ss3, wd3i);
1848            wk3i += Mult_W_W(ss3, wd3r);
1849        }
1850        if (i0 == mh - 4) {
1851            break;
1852        }
1853    }
1854    wd1r = WR5000;
1855    j0 = mh;
1856    j1 = j0 + m;
1857    j2 = j1 + m;
1858    j3 = j2 + m;
1859    x0r = a[j0 - 2] + a[j2 - 2];
1860    x0i = -a[j0 - 1] - a[j2 - 1];
1861    x1r = a[j0 - 2] - a[j2 - 2];
1862    x1i = -a[j0 - 1] + a[j2 - 1];
1863    x2r = a[j1 - 2] + a[j3 - 2];
1864    x2i = a[j1 - 1] + a[j3 - 1];
1865    x3r = a[j1 - 2] - a[j3 - 2];
1866    x3i = a[j1 - 1] - a[j3 - 1];
1867    a[j0 - 2] = x0r + x2r;
1868    a[j0 - 1] = x0i - x2i;
1869    a[j1 - 2] = x0r - x2r;
1870    a[j1 - 1] = x0i + x2i;
1871    x0r = x1r + x3i;
1872    x0i = x1i + x3r;
1873    a[j2 - 2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
1874    a[j2 - 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
1875    x0r = x1r - x3i;
1876    x0i = x1i - x3r;
1877    a[j3 - 2] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
1878    a[j3 - 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
1879    x0r = a[j0] + a[j2];
1880    x0i = -a[j0 + 1] - a[j2 + 1];
1881    x1r = a[j0] - a[j2];
1882    x1i = -a[j0 + 1] + a[j2 + 1];
1883    x2r = a[j1] + a[j3];
1884    x2i = a[j1 + 1] + a[j3 + 1];
1885    x3r = a[j1] - a[j3];
1886    x3i = a[j1 + 1] - a[j3 + 1];
1887    a[j0] = x0r + x2r;
1888    a[j0 + 1] = x0i - x2i;
1889    a[j1] = x0r - x2r;
1890    a[j1 + 1] = x0i + x2i;
1891    x0r = x1r + x3i;
1892    x0i = x1i + x3r;
1893    a[j2] = picofftsg_mult_w_a(wd1r, (x0r - x0i));
1894    a[j2 + 1] = picofftsg_mult_w_a(wd1r, (x0i + x0r));
1895    x0r = x1r - x3i;
1896    x0i = x1i - x3r;
1897    a[j3] = -picofftsg_mult_w_a(wd1r, (x0r + x0i));
1898    a[j3 + 1] = -picofftsg_mult_w_a(wd1r, (x0i - x0r));
1899    x0r = a[j0 + 2] + a[j2 + 2];
1900    x0i = -a[j0 + 3] - a[j2 + 3];
1901    x1r = a[j0 + 2] - a[j2 + 2];
1902    x1i = -a[j0 + 3] + a[j2 + 3];
1903    x2r = a[j1 + 2] + a[j3 + 2];
1904    x2i = a[j1 + 3] + a[j3 + 3];
1905    x3r = a[j1 + 2] - a[j3 + 2];
1906    x3i = a[j1 + 3] - a[j3 + 3];
1907    a[j0 + 2] = x0r + x2r;
1908    a[j0 + 3] = x0i - x2i;
1909    a[j1 + 2] = x0r - x2r;
1910    a[j1 + 3] = x0i + x2i;
1911    x0r = x1r + x3i;
1912    x0i = x1i + x3r;
1913    a[j2 + 2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
1914    a[j2 + 3] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
1915    x0r = x1r - x3i;
1916    x0i = x1i - x3r;
1917    a[j3 + 2] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
1918    a[j3 + 3] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
1919}
1920
1921void cftrec4(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
1922{
1923    picoos_int32 isplt, j, k, m;
1924
1925    m = n;
1926    while (m > 512) {
1927        m >>= 2;
1928        cftmdl1(m, &a[n - m]);
1929    }
1930    cftleaf(m, 1, &a[n - m]);
1931    k = 0;
1932    for (j = n - m; j > 0; j -= m) {
1933        k++;
1934        isplt = cfttree(m, j, k, a);
1935        cftleaf(m, isplt, &a[j - m]);
1936    }
1937}
1938
1939
1940picoos_int32 cfttree(picoos_int32 n, picoos_int32 j, picoos_int32 k, PICOFFTSG_FFTTYPE *a)
1941{
1942    picoos_int32 i, isplt, m;
1943
1944    if ((k & 3) != 0) {
1945        isplt = k & 1;
1946        if (isplt != 0) {
1947            cftmdl1(n, &a[j - n]);
1948        } else {
1949            cftmdl2(n, &a[j - n]);
1950        }
1951    } else {
1952        m = n;
1953        for (i = k; (i & 3) == 0; i >>= 2) {
1954            m <<= 2;
1955        }
1956        isplt = i & 1;
1957        if (isplt != 0) {
1958            while (m > 128) {
1959                cftmdl1(m, &a[j - m]);
1960                m >>= 2;
1961            }
1962        } else {
1963            while (m > 128) {
1964                cftmdl2(m, &a[j - m]);
1965                m >>= 2;
1966            }
1967        }
1968    }
1969    return isplt;
1970}
1971
1972
1973void cftleaf(picoos_int32 n, picoos_int32 isplt, PICOFFTSG_FFTTYPE *a)
1974{
1975
1976    if (n == 512) {
1977        cftmdl1(128, a);
1978        cftf161(a);
1979        cftf162(&a[32]);
1980        cftf161(&a[64]);
1981        cftf161(&a[96]);
1982        cftmdl2(128, &a[128]);
1983        cftf161(&a[128]);
1984        cftf162(&a[160]);
1985        cftf161(&a[192]);
1986        cftf162(&a[224]);
1987        cftmdl1(128, &a[256]);
1988        cftf161(&a[256]);
1989        cftf162(&a[288]);
1990        cftf161(&a[320]);
1991        cftf161(&a[352]);
1992        if (isplt != 0) {
1993            cftmdl1(128, &a[384]);
1994            cftf161(&a[480]);
1995        } else {
1996            cftmdl2(128, &a[384]);
1997            cftf162(&a[480]);
1998        }
1999        cftf161(&a[384]);
2000        cftf162(&a[416]);
2001        cftf161(&a[448]);
2002    } else {
2003        cftmdl1(64, a);
2004        cftf081(a);
2005        cftf082(&a[16]);
2006        cftf081(&a[32]);
2007        cftf081(&a[48]);
2008        cftmdl2(64, &a[64]);
2009        cftf081(&a[64]);
2010        cftf082(&a[80]);
2011        cftf081(&a[96]);
2012        cftf082(&a[112]);
2013        cftmdl1(64, &a[128]);
2014        cftf081(&a[128]);
2015        cftf082(&a[144]);
2016        cftf081(&a[160]);
2017        cftf081(&a[176]);
2018        if (isplt != 0) {
2019            cftmdl1(64, &a[192]);
2020            cftf081(&a[240]);
2021        } else {
2022            cftmdl2(64, &a[192]);
2023            cftf082(&a[240]);
2024        }
2025        cftf081(&a[192]);
2026        cftf082(&a[208]);
2027        cftf081(&a[224]);
2028    }
2029}
2030
2031
2032void cftmdl1(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
2033{
2034    picoos_int32 i, i0, j, j0, j1, j2, j3, m, mh;
2035    PICOFFTSG_FFTTYPE wk1r, wk1i, wk3r, wk3i,
2036        wd1r, wd1i, wd3r, wd3i, ss1, ss3;
2037    PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2038
2039    mh = n >> 3;
2040    m = 2 * mh;
2041    j1 = m;
2042    j2 = j1 + m;
2043    j3 = j2 + m;
2044    x0r = a[0] + a[j2];
2045    x0i = a[1] + a[j2 + 1];
2046    x1r = a[0] - a[j2];
2047    x1i = a[1] - a[j2 + 1];
2048    x2r = a[j1] + a[j3];
2049    x2i = a[j1 + 1] + a[j3 + 1];
2050    x3r = a[j1] - a[j3];
2051    x3i = a[j1 + 1] - a[j3 + 1];
2052    a[0] = x0r + x2r;
2053    a[1] = x0i + x2i;
2054    a[j1] = x0r - x2r;
2055    a[j1 + 1] = x0i - x2i;
2056    a[j2] = x1r - x3i;
2057    a[j2 + 1] = x1i + x3r;
2058    a[j3] = x1r + x3i;
2059    a[j3 + 1] = x1i - x3r;
2060    wd1r = (PICOFFTSG_FFTTYPE)(PICODSP_WGT_SHIFT);
2061    wd1i = 0;
2062    wd3r = (PICOFFTSG_FFTTYPE)(PICODSP_WGT_SHIFT);
2063    wd3i = 0;
2064    wk1r  =  (PICOFFTSG_FFTTYPE) (0.980785250664  *PICODSP_WGT_SHIFT);
2065    wk1i  =  (PICOFFTSG_FFTTYPE) (0.195090323687  *PICODSP_WGT_SHIFT);
2066    ss1   =  (PICOFFTSG_FFTTYPE) (0.390180647373  *PICODSP_WGT_SHIFT);
2067    wk3i  =  (PICOFFTSG_FFTTYPE) (-0.555570185184 *PICODSP_WGT_SHIFT);
2068    wk3r  =  (PICOFFTSG_FFTTYPE) (0.831469595432  *PICODSP_WGT_SHIFT);
2069    ss3   =  (PICOFFTSG_FFTTYPE) (-1.111140370369 *PICODSP_WGT_SHIFT);
2070
2071    i = 0;
2072    for (;;) {
2073        i0 = i + CDFT_LOOP_DIV_4;
2074        if (i0 > mh - 4) {
2075            i0 = mh - 4;
2076        }
2077        for (j = i + 2; j < i0; j += 4) {
2078            wd1r -= Mult_W_W(ss1, wk1i);
2079            wd1i += Mult_W_W(ss1, wk1r);
2080            wd3r -= Mult_W_W(ss3, wk3i);
2081            wd3i += Mult_W_W(ss3, wk3r);
2082            j1 = j + m;
2083            j2 = j1 + m;
2084            j3 = j2 + m;
2085            x0r = a[j] + a[j2];
2086            x0i = a[j + 1] + a[j2 + 1];
2087            x1r = a[j] - a[j2];
2088            x1i = a[j + 1] - a[j2 + 1];
2089            x2r = a[j1] + a[j3];
2090            x2i = a[j1 + 1] + a[j3 + 1];
2091            x3r = a[j1] - a[j3];
2092            x3i = a[j1 + 1] - a[j3 + 1];
2093            a[j] = x0r + x2r;
2094            a[j + 1] = x0i + x2i;
2095            a[j1] = x0r - x2r;
2096            a[j1 + 1] = x0i - x2i;
2097            x0r = x1r - x3i;
2098            x0i = x1i + x3r;
2099            a[j2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2100            a[j2 + 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2101            x0r = x1r + x3i;
2102            x0i = x1i - x3r;
2103            a[j3] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
2104            a[j3 + 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
2105            x0r = a[j + 2] + a[j2 + 2];
2106            x0i = a[j + 3] + a[j2 + 3];
2107            x1r = a[j + 2] - a[j2 + 2];
2108            x1i = a[j + 3] - a[j2 + 3];
2109            x2r = a[j1 + 2] + a[j3 + 2];
2110            x2i = a[j1 + 3] + a[j3 + 3];
2111            x3r = a[j1 + 2] - a[j3 + 2];
2112            x3i = a[j1 + 3] - a[j3 + 3];
2113            a[j + 2] = x0r + x2r;
2114            a[j + 3] = x0i + x2i;
2115            a[j1 + 2] = x0r - x2r;
2116            a[j1 + 3] = x0i - x2i;
2117            x0r = x1r - x3i;
2118            x0i = x1i + x3r;
2119            a[j2 + 2] = Mult_W_W(wd1r, x0r) - Mult_W_W(wd1i, x0i);
2120            a[j2 + 3] = Mult_W_W(wd1r, x0i) + Mult_W_W(wd1i, x0r);
2121            x0r = x1r + x3i;
2122            x0i = x1i - x3r;
2123            a[j3 + 2] = Mult_W_W(wd3r, x0r) + Mult_W_W(wd3i, x0i);
2124            a[j3 + 3] = Mult_W_W(wd3r, x0i) - Mult_W_W(wd3i, x0r);
2125            j0 = m - j;
2126            j1 = j0 + m;
2127            j2 = j1 + m;
2128            j3 = j2 + m;
2129            x0r = a[j0] + a[j2];
2130            x0i = a[j0 + 1] + a[j2 + 1];
2131            x1r = a[j0] - a[j2];
2132            x1i = a[j0 + 1] - a[j2 + 1];
2133            x2r = a[j1] + a[j3];
2134            x2i = a[j1 + 1] + a[j3 + 1];
2135            x3r = a[j1] - a[j3];
2136            x3i = a[j1 + 1] - a[j3 + 1];
2137            a[j0] = x0r + x2r;
2138            a[j0 + 1] = x0i + x2i;
2139            a[j1] = x0r - x2r;
2140            a[j1 + 1] = x0i - x2i;
2141            x0r = x1r - x3i;
2142            x0i = x1i + x3r;
2143            a[j2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2144            a[j2 + 1] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2145            x0r = x1r + x3i;
2146            x0i = x1i - x3r;
2147            a[j3] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
2148            a[j3 + 1] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
2149            x0r = a[j0 - 2] + a[j2 - 2];
2150            x0i = a[j0 - 1] + a[j2 - 1];
2151            x1r = a[j0 - 2] - a[j2 - 2];
2152            x1i = a[j0 - 1] - a[j2 - 1];
2153            x2r = a[j1 - 2] + a[j3 - 2];
2154            x2i = a[j1 - 1] + a[j3 - 1];
2155            x3r = a[j1 - 2] - a[j3 - 2];
2156            x3i = a[j1 - 1] - a[j3 - 1];
2157            a[j0 - 2] = x0r + x2r;
2158            a[j0 - 1] = x0i + x2i;
2159            a[j1 - 2] = x0r - x2r;
2160            a[j1 - 1] = x0i - x2i;
2161            x0r = x1r - x3i;
2162            x0i = x1i + x3r;
2163            a[j2 - 2] = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
2164            a[j2 - 1] = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
2165            x0r = x1r + x3i;
2166            x0i = x1i - x3r;
2167            a[j3 - 2] = Mult_W_W(wd3i, x0r) + Mult_W_W(wd3r, x0i);
2168            a[j3 - 1] = Mult_W_W(wd3i, x0i) - Mult_W_W(wd3r, x0r);
2169            wk1r -= Mult_W_W(ss1, wd1i);
2170            wk1i += Mult_W_W(ss1, wd1r);
2171            wk3r -= Mult_W_W(ss3, wd3i);
2172            wk3i += Mult_W_W(ss3, wd3r);
2173        }
2174        if (i0 == mh - 4) {
2175            break;
2176        }
2177    }
2178    wd1r = WR5000;
2179    j0 = mh;
2180    j1 = j0 + m;
2181    j2 = j1 + m;
2182    j3 = j2 + m;
2183    x0r = a[j0 - 2] + a[j2 - 2];
2184    x0i = a[j0 - 1] + a[j2 - 1];
2185    x1r = a[j0 - 2] - a[j2 - 2];
2186    x1i = a[j0 - 1] - a[j2 - 1];
2187    x2r = a[j1 - 2] + a[j3 - 2];
2188    x2i = a[j1 - 1] + a[j3 - 1];
2189    x3r = a[j1 - 2] - a[j3 - 2];
2190    x3i = a[j1 - 1] - a[j3 - 1];
2191    a[j0 - 2] = x0r + x2r;
2192    a[j0 - 1] = x0i + x2i;
2193    a[j1 - 2] = x0r - x2r;
2194    a[j1 - 1] = x0i - x2i;
2195    x0r = x1r - x3i;
2196    x0i = x1i + x3r;
2197    a[j2 - 2] = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2198    a[j2 - 1] = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2199    x0r = x1r + x3i;
2200    x0i = x1i - x3r;
2201    a[j3 - 2] = Mult_W_W(wk3r, x0r) + Mult_W_W(wk3i, x0i);
2202    a[j3 - 1] = Mult_W_W(wk3r, x0i) - Mult_W_W(wk3i, x0r);
2203    x0r = a[j0] + a[j2];
2204    x0i = a[j0 + 1] + a[j2 + 1];
2205    x1r = a[j0] - a[j2];
2206    x1i = a[j0 + 1] - a[j2 + 1];
2207    x2r = a[j1] + a[j3];
2208    x2i = a[j1 + 1] + a[j3 + 1];
2209    x3r = a[j1] - a[j3];
2210    x3i = a[j1 + 1] - a[j3 + 1];
2211    a[j0] = x0r + x2r;
2212    a[j0 + 1] = x0i + x2i;
2213    a[j1] = x0r - x2r;
2214    a[j1 + 1] = x0i - x2i;
2215    x0r = x1r - x3i;
2216    x0i = x1i + x3r;
2217    a[j2] = picofftsg_mult_w_a(wd1r, (x0r - x0i));
2218    a[j2 + 1] = picofftsg_mult_w_a(wd1r, (x0i + x0r));
2219    x0r = x1r + x3i;
2220    x0i = x1i - x3r;
2221    a[j3] = -picofftsg_mult_w_a(wd1r, (x0r + x0i));
2222    a[j3 + 1] = -picofftsg_mult_w_a(wd1r, (x0i - x0r));
2223    x0r = a[j0 + 2] + a[j2 + 2];
2224    x0i = a[j0 + 3] + a[j2 + 3];
2225    x1r = a[j0 + 2] - a[j2 + 2];
2226    x1i = a[j0 + 3] - a[j2 + 3];
2227    x2r = a[j1 + 2] + a[j3 + 2];
2228    x2i = a[j1 + 3] + a[j3 + 3];
2229    x3r = a[j1 + 2] - a[j3 + 2];
2230    x3i = a[j1 + 3] - a[j3 + 3];
2231    a[j0 + 2] = x0r + x2r;
2232    a[j0 + 3] = x0i + x2i;
2233    a[j1 + 2] = x0r - x2r;
2234    a[j1 + 3] = x0i - x2i;
2235    x0r = x1r - x3i;
2236    x0i = x1i + x3r;
2237    a[j2 + 2] = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2238    a[j2 + 3] = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2239    x0r = x1r + x3i;
2240    x0i = x1i - x3r;
2241    a[j3 + 2] = Mult_W_W(wk3i, x0r) + Mult_W_W(wk3r, x0i);
2242    a[j3 + 3] = Mult_W_W(wk3i, x0i) - Mult_W_W(wk3r, x0r);
2243}
2244
2245
2246void cftmdl2(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
2247{
2248    picoos_int32 i, i0, j, j0, j1, j2, j3, m, mh;
2249    PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i, wk3r, wk3i,
2250        wl1r, wl1i, wl3r, wl3i, wd1r, wd1i, wd3r, wd3i,
2251        we1r, we1i, we3r, we3i, ss1, ss3;
2252    PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2253
2254    mh = n >> 3;
2255    m = 2 * mh;
2256    wn4r = WR5000;
2257    j1 = m;
2258    j2 = j1 + m;
2259    j3 = j2 + m;
2260    x0r = a[0] - a[j2 + 1];
2261    x0i = a[1] + a[j2];
2262    x1r = a[0] + a[j2 + 1];
2263    x1i = a[1] - a[j2];
2264    x2r = a[j1] - a[j3 + 1];
2265    x2i = a[j1 + 1] + a[j3];
2266    x3r = a[j1] + a[j3 + 1];
2267    x3i = a[j1 + 1] - a[j3];
2268    y0r = picofftsg_mult_w_a(wn4r, (x2r - x2i));
2269    y0i = picofftsg_mult_w_a(wn4r, (x2i + x2r));
2270    a[0] = x0r + y0r;
2271    a[1] = x0i + y0i;
2272    a[j1] = x0r - y0r;
2273    a[j1 + 1] = x0i - y0i;
2274    y0r = picofftsg_mult_w_a(wn4r, (x3r - x3i));
2275    y0i = picofftsg_mult_w_a(wn4r, (x3i + x3r));
2276    a[j2] = x1r - y0i;
2277    a[j2 + 1] = x1i + y0r;
2278    a[j3] = x1r + y0i;
2279    a[j3 + 1] = x1i - y0r;
2280    wl1r = PICODSP_WGT_SHIFT;
2281    wl1i = 0;
2282    wl3r = PICODSP_WGT_SHIFT;
2283    wl3i = 0;
2284    we1r = wn4r;
2285    we1i = wn4r;
2286    we3r = -wn4r;
2287    we3i = -wn4r;
2288
2289    wk1r  =  (PICOFFTSG_FFTTYPE)(0.995184719563  *PICODSP_WGT_SHIFT);
2290    wk1i  =  (PICOFFTSG_FFTTYPE)(0.098017141223  *PICODSP_WGT_SHIFT);
2291    wd1r  =  (PICOFFTSG_FFTTYPE)(0.634393274784  *PICODSP_WGT_SHIFT);
2292    wd1i  =  (PICOFFTSG_FFTTYPE)(0.773010432720  *PICODSP_WGT_SHIFT);
2293    ss1   =  (PICOFFTSG_FFTTYPE)(0.196034282446  *PICODSP_WGT_SHIFT);
2294    wk3i  =  (PICOFFTSG_FFTTYPE)(-0.290284663439 *PICODSP_WGT_SHIFT);
2295    wk3r  =  (PICOFFTSG_FFTTYPE)(0.956940352917  *PICODSP_WGT_SHIFT);
2296    ss3   =  (PICOFFTSG_FFTTYPE)(-0.580569326878 *PICODSP_WGT_SHIFT);
2297    wd3r  =  (PICOFFTSG_FFTTYPE)(-0.881921231747 *PICODSP_WGT_SHIFT);
2298    wd3i  =  (PICOFFTSG_FFTTYPE)(-0.471396744251 *PICODSP_WGT_SHIFT);
2299
2300    i = 0;
2301    for (;;) {
2302        i0 = i + 4 * CDFT_LOOP_DIV;
2303        if (i0 > mh - 4) {
2304            i0 = mh - 4;
2305        }
2306        for (j = i + 2; j < i0; j += 4) {
2307            wl1r -= Mult_W_W(ss1, wk1i);
2308            wl1i += Mult_W_W(ss1, wk1r);
2309            wl3r -= Mult_W_W(ss3, wk3i);
2310            wl3i += Mult_W_W(ss3, wk3r);
2311            we1r -= Mult_W_W(ss1, wd1i);
2312            we1i += Mult_W_W(ss1, wd1r);
2313            we3r -= Mult_W_W(ss3, wd3i);
2314            we3i += Mult_W_W(ss3, wd3r);
2315            j1 = j + m;
2316            j2 = j1 + m;
2317            j3 = j2 + m;
2318            x0r = a[j] - a[j2 + 1];
2319            x0i = a[j + 1] + a[j2];
2320            x1r = a[j] + a[j2 + 1];
2321            x1i = a[j + 1] - a[j2];
2322            x2r = a[j1] - a[j3 + 1];
2323            x2i = a[j1 + 1] + a[j3];
2324            x3r = a[j1] + a[j3 + 1];
2325            x3i = a[j1 + 1] - a[j3];
2326            y0r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2327            y0i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2328            y2r = Mult_W_W(wd1r, x2r) - Mult_W_W(wd1i, x2i);
2329            y2i = Mult_W_W(wd1r, x2i) + Mult_W_W(wd1i, x2r);
2330            a[j] = y0r + y2r;
2331            a[j + 1] = y0i + y2i;
2332            a[j1] = y0r - y2r;
2333            a[j1 + 1] = y0i - y2i;
2334            y0r = Mult_W_W(wk3r, x1r) + Mult_W_W(wk3i, x1i);
2335            y0i = Mult_W_W(wk3r, x1i) - Mult_W_W(wk3i, x1r);
2336            y2r = Mult_W_W(wd3r, x3r) + Mult_W_W(wd3i, x3i);
2337            y2i = Mult_W_W(wd3r, x3i) - Mult_W_W(wd3i, x3r);
2338            a[j2] = y0r + y2r;
2339            a[j2 + 1] = y0i + y2i;
2340            a[j3] = y0r - y2r;
2341            a[j3 + 1] = y0i - y2i;
2342            x0r = a[j + 2] - a[j2 + 3];
2343            x0i = a[j + 3] + a[j2 + 2];
2344            x1r = a[j + 2] + a[j2 + 3];
2345            x1i = a[j + 3] - a[j2 + 2];
2346            x2r = a[j1 + 2] - a[j3 + 3];
2347            x2i = a[j1 + 3] + a[j3 + 2];
2348            x3r = a[j1 + 2] + a[j3 + 3];
2349            x3i = a[j1 + 3] - a[j3 + 2];
2350            y0r = Mult_W_W(wl1r, x0r) - Mult_W_W(wl1i, x0i);
2351            y0i = Mult_W_W(wl1r, x0i) + Mult_W_W(wl1i, x0r);
2352            y2r = Mult_W_W(we1r, x2r) - Mult_W_W(we1i, x2i);
2353            y2i = Mult_W_W(we1r, x2i) + Mult_W_W(we1i, x2r);
2354            a[j + 2] = y0r + y2r;
2355            a[j + 3] = y0i + y2i;
2356            a[j1 + 2] = y0r - y2r;
2357            a[j1 + 3] = y0i - y2i;
2358            y0r = Mult_W_W(wl3r, x1r) + Mult_W_W(wl3i, x1i);
2359            y0i = Mult_W_W(wl3r, x1i) - Mult_W_W(wl3i, x1r);
2360            y2r = Mult_W_W(we3r, x3r) + Mult_W_W(we3i, x3i);
2361            y2i = Mult_W_W(we3r, x3i) - Mult_W_W(we3i, x3r);
2362            a[j2 + 2] = y0r + y2r;
2363            a[j2 + 3] = y0i + y2i;
2364            a[j3 + 2] = y0r - y2r;
2365            a[j3 + 3] = y0i - y2i;
2366            j0 = m - j;
2367            j1 = j0 + m;
2368            j2 = j1 + m;
2369            j3 = j2 + m;
2370            x0r = a[j0] - a[j2 + 1];
2371            x0i = a[j0 + 1] + a[j2];
2372            x1r = a[j0] + a[j2 + 1];
2373            x1i = a[j0 + 1] - a[j2];
2374            x2r = a[j1] - a[j3 + 1];
2375            x2i = a[j1 + 1] + a[j3];
2376            x3r = a[j1] + a[j3 + 1];
2377            x3i = a[j1 + 1] - a[j3];
2378            y0r = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
2379            y0i = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
2380            y2r = Mult_W_W(wk1i, x2r) - Mult_W_W(wk1r, x2i);
2381            y2i = Mult_W_W(wk1i, x2i) + Mult_W_W(wk1r, x2r);
2382            a[j0] = y0r + y2r;
2383            a[j0 + 1] = y0i + y2i;
2384            a[j1] = y0r - y2r;
2385            a[j1 + 1] = y0i - y2i;
2386            y0r = Mult_W_W(wd3i, x1r) + Mult_W_W(wd3r, x1i);
2387            y0i = Mult_W_W(wd3i, x1i) - Mult_W_W(wd3r, x1r);
2388            y2r = Mult_W_W(wk3i, x3r) + Mult_W_W(wk3r, x3i);
2389            y2i = Mult_W_W(wk3i, x3i) - Mult_W_W(wk3r, x3r);
2390            a[j2] = y0r + y2r;
2391            a[j2 + 1] = y0i + y2i;
2392            a[j3] = y0r - y2r;
2393            a[j3 + 1] = y0i - y2i;
2394            x0r = a[j0 - 2] - a[j2 - 1];
2395            x0i = a[j0 - 1] + a[j2 - 2];
2396            x1r = a[j0 - 2] + a[j2 - 1];
2397            x1i = a[j0 - 1] - a[j2 - 2];
2398            x2r = a[j1 - 2] - a[j3 - 1];
2399            x2i = a[j1 - 1] + a[j3 - 2];
2400            x3r = a[j1 - 2] + a[j3 - 1];
2401            x3i = a[j1 - 1] - a[j3 - 2];
2402            y0r = Mult_W_W(we1i, x0r) - Mult_W_W(we1r, x0i);
2403            y0i = Mult_W_W(we1i, x0i) + Mult_W_W(we1r, x0r);
2404            y2r = Mult_W_W(wl1i, x2r) - Mult_W_W(wl1r, x2i);
2405            y2i = Mult_W_W(wl1i, x2i) + Mult_W_W(wl1r, x2r);
2406            a[j0 - 2] = y0r + y2r;
2407            a[j0 - 1] = y0i + y2i;
2408            a[j1 - 2] = y0r - y2r;
2409            a[j1 - 1] = y0i - y2i;
2410            y0r = Mult_W_W(we3i, x1r) + Mult_W_W(we3r, x1i);
2411            y0i = Mult_W_W(we3i, x1i) - Mult_W_W(we3r, x1r);
2412            y2r = Mult_W_W(wl3i, x3r) + Mult_W_W(wl3r, x3i);
2413            y2i = Mult_W_W(wl3i, x3i) - Mult_W_W(wl3r, x3r);
2414            a[j2 - 2] = y0r + y2r;
2415            a[j2 - 1] = y0i + y2i;
2416            a[j3 - 2] = y0r - y2r;
2417            a[j3 - 1] = y0i - y2i;
2418            wk1r -= Mult_W_W(ss1, wl1i);
2419            wk1i += Mult_W_W(ss1, wl1r);
2420            wk3r -= Mult_W_W(ss3, wl3i);
2421            wk3i += Mult_W_W(ss3, wl3r);
2422            wd1r -= Mult_W_W(ss1, we1i);
2423            wd1i += Mult_W_W(ss1, we1r);
2424            wd3r -= Mult_W_W(ss3, we3i);
2425            wd3i += Mult_W_W(ss3, we3r);
2426        }
2427        if (i0 == mh - 4) {
2428            break;
2429        }
2430    }
2431    wl1r = WR2500;
2432    wl1i = WI2500;
2433    j0 = mh;
2434    j1 = j0 + m;
2435    j2 = j1 + m;
2436    j3 = j2 + m;
2437    x0r = a[j0 - 2] - a[j2 - 1];
2438    x0i = a[j0 - 1] + a[j2 - 2];
2439    x1r = a[j0 - 2] + a[j2 - 1];
2440    x1i = a[j0 - 1] - a[j2 - 2];
2441    x2r = a[j1 - 2] - a[j3 - 1];
2442    x2i = a[j1 - 1] + a[j3 - 2];
2443    x3r = a[j1 - 2] + a[j3 - 1];
2444    x3i = a[j1 - 1] - a[j3 - 2];
2445    y0r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2446    y0i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2447    y2r = Mult_W_W(wd1r, x2r) - Mult_W_W(wd1i, x2i);
2448    y2i = Mult_W_W(wd1r, x2i) + Mult_W_W(wd1i, x2r);
2449    a[j0 - 2] = y0r + y2r;
2450    a[j0 - 1] = y0i + y2i;
2451    a[j1 - 2] = y0r - y2r;
2452    a[j1 - 1] = y0i - y2i;
2453    y0r = Mult_W_W(wk3r, x1r) + Mult_W_W(wk3i, x1i);
2454    y0i = Mult_W_W(wk3r, x1i) - Mult_W_W(wk3i, x1r);
2455    y2r = Mult_W_W(wd3r, x3r) + Mult_W_W(wd3i, x3i);
2456    y2i = Mult_W_W(wd3r, x3i) - Mult_W_W(wd3i, x3r);
2457    a[j2 - 2] = y0r + y2r;
2458    a[j2 - 1] = y0i + y2i;
2459    a[j3 - 2] = y0r - y2r;
2460    a[j3 - 1] = y0i - y2i;
2461    x0r = a[j0] - a[j2 + 1];
2462    x0i = a[j0 + 1] + a[j2];
2463    x1r = a[j0] + a[j2 + 1];
2464    x1i = a[j0 + 1] - a[j2];
2465    x2r = a[j1] - a[j3 + 1];
2466    x2i = a[j1 + 1] + a[j3];
2467    x3r = a[j1] + a[j3 + 1];
2468    x3i = a[j1 + 1] - a[j3];
2469    y0r = Mult_W_W(wl1r, x0r) - Mult_W_W(wl1i, x0i);
2470    y0i = Mult_W_W(wl1r, x0i) + Mult_W_W(wl1i, x0r);
2471    y2r = Mult_W_W(wl1i, x2r) - Mult_W_W(wl1r, x2i);
2472    y2i = Mult_W_W(wl1i, x2i) + Mult_W_W(wl1r, x2r);
2473    a[j0] = y0r + y2r;
2474    a[j0 + 1] = y0i + y2i;
2475    a[j1] = y0r - y2r;
2476    a[j1 + 1] = y0i - y2i;
2477    y0r = Mult_W_W(wl1i, x1r) - Mult_W_W(wl1r, x1i);
2478    y0i = Mult_W_W(wl1i, x1i) + Mult_W_W(wl1r, x1r);
2479    y2r = Mult_W_W(wl1r, x3r) - Mult_W_W(wl1i, x3i);
2480    y2i = Mult_W_W(wl1r, x3i) + Mult_W_W(wl1i, x3r);
2481    a[j2] = y0r - y2r;
2482    a[j2 + 1] = y0i - y2i;
2483    a[j3] = y0r + y2r;
2484    a[j3 + 1] = y0i + y2i;
2485    x0r = a[j0 + 2] - a[j2 + 3];
2486    x0i = a[j0 + 3] + a[j2 + 2];
2487    x1r = a[j0 + 2] + a[j2 + 3];
2488    x1i = a[j0 + 3] - a[j2 + 2];
2489    x2r = a[j1 + 2] - a[j3 + 3];
2490    x2i = a[j1 + 3] + a[j3 + 2];
2491    x3r = a[j1 + 2] + a[j3 + 3];
2492    x3i = a[j1 + 3] - a[j3 + 2];
2493    y0r = Mult_W_W(wd1i, x0r) - Mult_W_W(wd1r, x0i);
2494    y0i = Mult_W_W(wd1i, x0i) + Mult_W_W(wd1r, x0r);
2495    y2r = Mult_W_W(wk1i, x2r) - Mult_W_W(wk1r, x2i);
2496    y2i = Mult_W_W(wk1i, x2i) + Mult_W_W(wk1r, x2r);
2497    a[j0 + 2] = y0r + y2r;
2498    a[j0 + 3] = y0i + y2i;
2499    a[j1 + 2] = y0r - y2r;
2500    a[j1 + 3] = y0i - y2i;
2501    y0r = Mult_W_W(wd3i, x1r) + Mult_W_W(wd3r, x1i);
2502    y0i = Mult_W_W(wd3i, x1i) - Mult_W_W(wd3r, x1r);
2503    y2r = Mult_W_W(wk3i, x3r) + Mult_W_W(wk3r, x3i);
2504    y2i = Mult_W_W(wk3i, x3i) - Mult_W_W(wk3r, x3r);
2505    a[j2 + 2] = y0r + y2r;
2506    a[j2 + 3] = y0i + y2i;
2507    a[j3 + 2] = y0r - y2r;
2508    a[j3 + 3] = y0i - y2i;
2509}
2510
2511
2512void cftfx41(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
2513{
2514
2515    if (n == 128) {
2516        cftf161(a);
2517        cftf162(&a[32]);
2518        cftf161(&a[64]);
2519        cftf161(&a[96]);
2520    } else {
2521        cftf081(a);
2522        cftf082(&a[16]);
2523        cftf081(&a[32]);
2524        cftf081(&a[48]);
2525    }
2526}
2527
2528
2529void cftf161(PICOFFTSG_FFTTYPE *a)
2530{
2531    PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i,
2532        x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2533        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2534        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2535        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2536        y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2537
2538    wn4r = WR5000;
2539    wk1r = WR2500;
2540    wk1i = WI2500;
2541    x0r = a[0] + a[16];
2542    x0i = a[1] + a[17];
2543    x1r = a[0] - a[16];
2544    x1i = a[1] - a[17];
2545    x2r = a[8] + a[24];
2546    x2i = a[9] + a[25];
2547    x3r = a[8] - a[24];
2548    x3i = a[9] - a[25];
2549    y0r = x0r + x2r;
2550    y0i = x0i + x2i;
2551    y4r = x0r - x2r;
2552    y4i = x0i - x2i;
2553    y8r = x1r - x3i;
2554    y8i = x1i + x3r;
2555    y12r = x1r + x3i;
2556    y12i = x1i - x3r;
2557    x0r = a[2] + a[18];
2558    x0i = a[3] + a[19];
2559    x1r = a[2] - a[18];
2560    x1i = a[3] - a[19];
2561    x2r = a[10] + a[26];
2562    x2i = a[11] + a[27];
2563    x3r = a[10] - a[26];
2564    x3i = a[11] - a[27];
2565    y1r = x0r + x2r;
2566    y1i = x0i + x2i;
2567    y5r = x0r - x2r;
2568    y5i = x0i - x2i;
2569    x0r = x1r - x3i;
2570    x0i = x1i + x3r;
2571    y9r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2572    y9i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2573    x0r = x1r + x3i;
2574    x0i = x1i - x3r;
2575    y13r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2576    y13i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2577    x0r = a[4] + a[20];
2578    x0i = a[5] + a[21];
2579    x1r = a[4] - a[20];
2580    x1i = a[5] - a[21];
2581    x2r = a[12] + a[28];
2582    x2i = a[13] + a[29];
2583    x3r = a[12] - a[28];
2584    x3i = a[13] - a[29];
2585    y2r = x0r + x2r;
2586    y2i = x0i + x2i;
2587    y6r = x0r - x2r;
2588    y6i = x0i - x2i;
2589    x0r = x1r - x3i;
2590    x0i = x1i + x3r;
2591    y10r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2592    y10i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2593    x0r = x1r + x3i;
2594    x0i = x1i - x3r;
2595    y14r = picofftsg_mult_w_a(wn4r, (x0r + x0i));
2596    y14i = picofftsg_mult_w_a(wn4r, (x0i - x0r));
2597    x0r = a[6] + a[22];
2598    x0i = a[7] + a[23];
2599    x1r = a[6] - a[22];
2600    x1i = a[7] - a[23];
2601    x2r = a[14] + a[30];
2602    x2i = a[15] + a[31];
2603    x3r = a[14] - a[30];
2604    x3i = a[15] - a[31];
2605    y3r = x0r + x2r;
2606    y3i = x0i + x2i;
2607    y7r = x0r - x2r;
2608    y7i = x0i - x2i;
2609    x0r = x1r - x3i;
2610    x0i = x1i + x3r;
2611    y11r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2612    y11i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2613    x0r = x1r + x3i;
2614    x0i = x1i - x3r;
2615    y15r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2616    y15i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2617    x0r = y12r - y14r;
2618    x0i = y12i - y14i;
2619    x1r = y12r + y14r;
2620    x1i = y12i + y14i;
2621    x2r = y13r - y15r;
2622    x2i = y13i - y15i;
2623    x3r = y13r + y15r;
2624    x3i = y13i + y15i;
2625    a[24] = x0r + x2r;
2626    a[25] = x0i + x2i;
2627    a[26] = x0r - x2r;
2628    a[27] = x0i - x2i;
2629    a[28] = x1r - x3i;
2630    a[29] = x1i + x3r;
2631    a[30] = x1r + x3i;
2632    a[31] = x1i - x3r;
2633    x0r = y8r + y10r;
2634    x0i = y8i + y10i;
2635    x1r = y8r - y10r;
2636    x1i = y8i - y10i;
2637    x2r = y9r + y11r;
2638    x2i = y9i + y11i;
2639    x3r = y9r - y11r;
2640    x3i = y9i - y11i;
2641    a[16] = x0r + x2r;
2642    a[17] = x0i + x2i;
2643    a[18] = x0r - x2r;
2644    a[19] = x0i - x2i;
2645    a[20] = x1r - x3i;
2646    a[21] = x1i + x3r;
2647    a[22] = x1r + x3i;
2648    a[23] = x1i - x3r;
2649    x0r = y5r - y7i;
2650    x0i = y5i + y7r;
2651    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2652    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2653    x0r = y5r + y7i;
2654    x0i = y5i - y7r;
2655    x3r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2656    x3i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2657    x0r = y4r - y6i;
2658    x0i = y4i + y6r;
2659    x1r = y4r + y6i;
2660    x1i = y4i - y6r;
2661    a[8] = x0r + x2r;
2662    a[9] = x0i + x2i;
2663    a[10] = x0r - x2r;
2664    a[11] = x0i - x2i;
2665    a[12] = x1r - x3i;
2666    a[13] = x1i + x3r;
2667    a[14] = x1r + x3i;
2668    a[15] = x1i - x3r;
2669    x0r = y0r + y2r;
2670    x0i = y0i + y2i;
2671    x1r = y0r - y2r;
2672    x1i = y0i - y2i;
2673    x2r = y1r + y3r;
2674    x2i = y1i + y3i;
2675    x3r = y1r - y3r;
2676    x3i = y1i - y3i;
2677    a[0] = x0r + x2r;
2678    a[1] = x0i + x2i;
2679    a[2] = x0r - x2r;
2680    a[3] = x0i - x2i;
2681    a[4] = x1r - x3i;
2682    a[5] = x1i + x3r;
2683    a[6] = x1r + x3i;
2684    a[7] = x1i - x3r;
2685}
2686
2687
2688void cftf162(PICOFFTSG_FFTTYPE *a)
2689{
2690    PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
2691        x0r, x0i, x1r, x1i, x2r, x2i,
2692        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2693        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2694        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2695        y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2696
2697    wn4r = WR5000;
2698    wk1r = WR1250;
2699    wk1i = WI1250;
2700    wk2r = WR2500;
2701    wk2i = WI2500;
2702    wk3r = WR3750;
2703    wk3i = WI3750;
2704    x1r = a[0] - a[17];
2705    x1i = a[1] + a[16];
2706    x0r = a[8] - a[25];
2707    x0i = a[9] + a[24];
2708    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2709    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2710    y0r = x1r + x2r;
2711    y0i = x1i + x2i;
2712    y4r = x1r - x2r;
2713    y4i = x1i - x2i;
2714    x1r = a[0] + a[17];
2715    x1i = a[1] - a[16];
2716    x0r = a[8] + a[25];
2717    x0i = a[9] - a[24];
2718    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2719    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2720    y8r = x1r - x2i;
2721    y8i = x1i + x2r;
2722    y12r = x1r + x2i;
2723    y12i = x1i - x2r;
2724    x0r = a[2] - a[19];
2725    x0i = a[3] + a[18];
2726    x1r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2727    x1i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2728    x0r = a[10] - a[27];
2729    x0i = a[11] + a[26];
2730    x2r = Mult_W_W(wk3i, x0r) - Mult_W_W(wk3r, x0i);
2731    x2i = Mult_W_W(wk3i, x0i) + Mult_W_W(wk3r, x0r);
2732    y1r = x1r + x2r;
2733    y1i = x1i + x2i;
2734    y5r = x1r - x2r;
2735    y5i = x1i - x2i;
2736    x0r = a[2] + a[19];
2737    x0i = a[3] - a[18];
2738    x1r = Mult_W_W(wk3r, x0r) - Mult_W_W(wk3i, x0i);
2739    x1i = Mult_W_W(wk3r, x0i) + Mult_W_W(wk3i, x0r);
2740    x0r = a[10] + a[27];
2741    x0i = a[11] - a[26];
2742    x2r = Mult_W_W(wk1r, x0r) + Mult_W_W(wk1i, x0i);
2743    x2i = Mult_W_W(wk1r, x0i) - Mult_W_W(wk1i, x0r);
2744    y9r = x1r - x2r;
2745    y9i = x1i - x2i;
2746    y13r = x1r + x2r;
2747    y13i = x1i + x2i;
2748    x0r = a[4] - a[21];
2749    x0i = a[5] + a[20];
2750    x1r = Mult_W_W(wk2r, x0r) - Mult_W_W(wk2i, x0i);
2751    x1i = Mult_W_W(wk2r, x0i) + Mult_W_W(wk2i, x0r);
2752    x0r = a[12] - a[29];
2753    x0i = a[13] + a[28];
2754    x2r = Mult_W_W(wk2i, x0r) - Mult_W_W(wk2r, x0i);
2755    x2i = Mult_W_W(wk2i, x0i) + Mult_W_W(wk2r, x0r);
2756    y2r = x1r + x2r;
2757    y2i = x1i + x2i;
2758    y6r = x1r - x2r;
2759    y6i = x1i - x2i;
2760    x0r = a[4] + a[21];
2761    x0i = a[5] - a[20];
2762    x1r = Mult_W_W(wk2i, x0r) - Mult_W_W(wk2r, x0i);
2763    x1i = Mult_W_W(wk2i, x0i) + Mult_W_W(wk2r, x0r);
2764    x0r = a[12] + a[29];
2765    x0i = a[13] - a[28];
2766    x2r = Mult_W_W(wk2r, x0r) - Mult_W_W(wk2i, x0i);
2767    x2i = Mult_W_W(wk2r, x0i) + Mult_W_W(wk2i, x0r);
2768    y10r = x1r - x2r;
2769    y10i = x1i - x2i;
2770    y14r = x1r + x2r;
2771    y14i = x1i + x2i;
2772    x0r = a[6] - a[23];
2773    x0i = a[7] + a[22];
2774    x1r = Mult_W_W(wk3r, x0r) - Mult_W_W(wk3i, x0i);
2775    x1i = Mult_W_W(wk3r, x0i) + Mult_W_W(wk3i, x0r);
2776    x0r = a[14] - a[31];
2777    x0i = a[15] + a[30];
2778    x2r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2779    x2i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2780    y3r = x1r + x2r;
2781    y3i = x1i + x2i;
2782    y7r = x1r - x2r;
2783    y7i = x1i - x2i;
2784    x0r = a[6] + a[23];
2785    x0i = a[7] - a[22];
2786    x1r = Mult_W_W(wk1i, x0r) + Mult_W_W(wk1r, x0i);
2787    x1i = Mult_W_W(wk1i, x0i) - Mult_W_W(wk1r, x0r);
2788    x0r = a[14] + a[31];
2789    x0i = a[15] - a[30];
2790    x2r = Mult_W_W(wk3i, x0r) - Mult_W_W(wk3r, x0i);
2791    x2i = Mult_W_W(wk3i, x0i) + Mult_W_W(wk3r, x0r);
2792    y11r = x1r + x2r;
2793    y11i = x1i + x2i;
2794    y15r = x1r - x2r;
2795    y15i = x1i - x2i;
2796    x1r = y0r + y2r;
2797    x1i = y0i + y2i;
2798    x2r = y1r + y3r;
2799    x2i = y1i + y3i;
2800    a[0] = x1r + x2r;
2801    a[1] = x1i + x2i;
2802    a[2] = x1r - x2r;
2803    a[3] = x1i - x2i;
2804    x1r = y0r - y2r;
2805    x1i = y0i - y2i;
2806    x2r = y1r - y3r;
2807    x2i = y1i - y3i;
2808    a[4] = x1r - x2i;
2809    a[5] = x1i + x2r;
2810    a[6] = x1r + x2i;
2811    a[7] = x1i - x2r;
2812    x1r = y4r - y6i;
2813    x1i = y4i + y6r;
2814    x0r = y5r - y7i;
2815    x0i = y5i + y7r;
2816    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2817    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2818    a[8] = x1r + x2r;
2819    a[9] = x1i + x2i;
2820    a[10] = x1r - x2r;
2821    a[11] = x1i - x2i;
2822    x1r = y4r + y6i;
2823    x1i = y4i - y6r;
2824    x0r = y5r + y7i;
2825    x0i = y5i - y7r;
2826    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2827    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2828    a[12] = x1r - x2i;
2829    a[13] = x1i + x2r;
2830    a[14] = x1r + x2i;
2831    a[15] = x1i - x2r;
2832    x1r = y8r + y10r;
2833    x1i = y8i + y10i;
2834    x2r = y9r - y11r;
2835    x2i = y9i - y11i;
2836    a[16] = x1r + x2r;
2837    a[17] = x1i + x2i;
2838    a[18] = x1r - x2r;
2839    a[19] = x1i - x2i;
2840    x1r = y8r - y10r;
2841    x1i = y8i - y10i;
2842    x2r = y9r + y11r;
2843    x2i = y9i + y11i;
2844    a[20] = x1r - x2i;
2845    a[21] = x1i + x2r;
2846    a[22] = x1r + x2i;
2847    a[23] = x1i - x2r;
2848    x1r = y12r - y14i;
2849    x1i = y12i + y14r;
2850    x0r = y13r + y15i;
2851    x0i = y13i - y15r;
2852    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2853    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2854    a[24] = x1r + x2r;
2855    a[25] = x1i + x2i;
2856    a[26] = x1r - x2r;
2857    a[27] = x1i - x2i;
2858    x1r = y12r + y14i;
2859    x1i = y12i - y14r;
2860    x0r = y13r - y15i;
2861    x0i = y13i + y15r;
2862    x2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2863    x2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2864    a[28] = x1r - x2i;
2865    a[29] = x1i + x2r;
2866    a[30] = x1r + x2i;
2867    a[31] = x1i - x2r;
2868}
2869
2870
2871void cftf081(PICOFFTSG_FFTTYPE *a)
2872{
2873    PICOFFTSG_FFTTYPE wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2874        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2875        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
2876
2877    wn4r = WR5000;
2878    x0r = a[0] + a[8];
2879    x0i = a[1] + a[9];
2880    x1r = a[0] - a[8];
2881    x1i = a[1] - a[9];
2882    x2r = a[4] + a[12];
2883    x2i = a[5] + a[13];
2884    x3r = a[4] - a[12];
2885    x3i = a[5] - a[13];
2886    y0r = x0r + x2r;
2887    y0i = x0i + x2i;
2888    y2r = x0r - x2r;
2889    y2i = x0i - x2i;
2890    y1r = x1r - x3i;
2891    y1i = x1i + x3r;
2892    y3r = x1r + x3i;
2893    y3i = x1i - x3r;
2894    x0r = a[2] + a[10];
2895    x0i = a[3] + a[11];
2896    x1r = a[2] - a[10];
2897    x1i = a[3] - a[11];
2898    x2r = a[6] + a[14];
2899    x2i = a[7] + a[15];
2900    x3r = a[6] - a[14];
2901    x3i = a[7] - a[15];
2902    y4r = x0r + x2r;
2903    y4i = x0i + x2i;
2904    y6r = x0r - x2r;
2905    y6i = x0i - x2i;
2906    x0r = x1r - x3i;
2907    x0i = x1i + x3r;
2908    x2r = x1r + x3i;
2909    x2i = x1i - x3r;
2910    y5r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2911    y5i = picofftsg_mult_w_a(wn4r, (x0r + x0i));
2912    y7r = picofftsg_mult_w_a(wn4r, (x2r - x2i));
2913    y7i = picofftsg_mult_w_a(wn4r, (x2r + x2i));
2914    a[8] = y1r + y5r;
2915    a[9] = y1i + y5i;
2916    a[10] = y1r - y5r;
2917    a[11] = y1i - y5i;
2918    a[12] = y3r - y7i;
2919    a[13] = y3i + y7r;
2920    a[14] = y3r + y7i;
2921    a[15] = y3i - y7r;
2922    a[0] = y0r + y4r;
2923    a[1] = y0i + y4i;
2924    a[2] = y0r - y4r;
2925    a[3] = y0i - y4i;
2926    a[4] = y2r - y6i;
2927    a[5] = y2i + y6r;
2928    a[6] = y2r + y6i;
2929    a[7] = y2i - y6r;
2930}
2931
2932
2933void cftf082(PICOFFTSG_FFTTYPE *a)
2934{
2935    PICOFFTSG_FFTTYPE wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
2936        y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2937        y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
2938
2939    wn4r = WR5000;
2940    wk1r = WR2500;
2941    wk1i = WI2500;
2942    y0r = a[0] - a[9];
2943    y0i = a[1] + a[8];
2944    y1r = a[0] + a[9];
2945    y1i = a[1] - a[8];
2946    x0r = a[4] - a[13];
2947    x0i = a[5] + a[12];
2948    y2r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2949    y2i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2950    x0r = a[4] + a[13];
2951    x0i = a[5] - a[12];
2952    y3r = picofftsg_mult_w_a(wn4r, (x0r - x0i));
2953    y3i = picofftsg_mult_w_a(wn4r, (x0i + x0r));
2954    x0r = a[2] - a[11];
2955    x0i = a[3] + a[10];
2956    y4r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2957    y4i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2958    x0r = a[2] + a[11];
2959    x0i = a[3] - a[10];
2960    y5r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2961    y5i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2962    x0r = a[6] - a[15];
2963    x0i = a[7] + a[14];
2964    y6r = Mult_W_W(wk1i, x0r) - Mult_W_W(wk1r, x0i);
2965    y6i = Mult_W_W(wk1i, x0i) + Mult_W_W(wk1r, x0r);
2966    x0r = a[6] + a[15];
2967    x0i = a[7] - a[14];
2968    y7r = Mult_W_W(wk1r, x0r) - Mult_W_W(wk1i, x0i);
2969    y7i = Mult_W_W(wk1r, x0i) + Mult_W_W(wk1i, x0r);
2970    x0r = y0r + y2r;
2971    x0i = y0i + y2i;
2972    x1r = y4r + y6r;
2973    x1i = y4i + y6i;
2974    a[0] = x0r + x1r;
2975    a[1] = x0i + x1i;
2976    a[2] = x0r - x1r;
2977    a[3] = x0i - x1i;
2978    x0r = y0r - y2r;
2979    x0i = y0i - y2i;
2980    x1r = y4r - y6r;
2981    x1i = y4i - y6i;
2982    a[4] = x0r - x1i;
2983    a[5] = x0i + x1r;
2984    a[6] = x0r + x1i;
2985    a[7] = x0i - x1r;
2986    x0r = y1r - y3i;
2987    x0i = y1i + y3r;
2988    x1r = y5r - y7r;
2989    x1i = y5i - y7i;
2990    a[8] = x0r + x1r;
2991    a[9] = x0i + x1i;
2992    a[10] = x0r - x1r;
2993    a[11] = x0i - x1i;
2994    x0r = y1r + y3i;
2995    x0i = y1i - y3r;
2996    x1r = y5r + y7r;
2997    x1i = y5i + y7i;
2998    a[12] = x0r - x1i;
2999    a[13] = x0i + x1r;
3000    a[14] = x0r + x1i;
3001    a[15] = x0i - x1r;
3002}
3003
3004
3005void cftf040(PICOFFTSG_FFTTYPE *a)
3006{
3007    PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3008
3009    x0r = a[0] + a[4];
3010    x0i = a[1] + a[5];
3011    x1r = a[0] - a[4];
3012    x1i = a[1] - a[5];
3013    x2r = a[2] + a[6];
3014    x2i = a[3] + a[7];
3015    x3r = a[2] - a[6];
3016    x3i = a[3] - a[7];
3017    a[0] = x0r + x2r;
3018    a[1] = x0i + x2i;
3019    a[2] = x1r - x3i;
3020    a[3] = x1i + x3r;
3021    a[4] = x0r - x2r;
3022    a[5] = x0i - x2i;
3023    a[6] = x1r + x3i;
3024    a[7] = x1i - x3r;
3025}
3026
3027
3028void cftb040(PICOFFTSG_FFTTYPE *a)
3029{
3030    PICOFFTSG_FFTTYPE x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3031
3032    x0r = a[0] + a[4];
3033    x0i = a[1] + a[5];
3034    x1r = a[0] - a[4];
3035    x1i = a[1] - a[5];
3036    x2r = a[2] + a[6];
3037    x2i = a[3] + a[7];
3038    x3r = a[2] - a[6];
3039    x3i = a[3] - a[7];
3040    a[0] = x0r + x2r;
3041    a[1] = x0i + x2i;
3042    a[2] = x1r + x3i;
3043    a[3] = x1i - x3r;
3044    a[4] = x0r - x2r;
3045    a[5] = x0i - x2i;
3046    a[6] = x1r - x3i;
3047    a[7] = x1i + x3r;
3048}
3049
3050
3051void cftx020(PICOFFTSG_FFTTYPE *a)
3052{
3053    PICOFFTSG_FFTTYPE x0r, x0i;
3054
3055    x0r = a[0] - a[2];
3056    x0i = a[1] - a[3];
3057    a[0] += a[2];
3058    a[1] += a[3];
3059    a[2] = x0r;
3060    a[3] = x0i;
3061}
3062
3063
3064void rftfsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
3065{
3066    picoos_int32 i, i0, j, k;
3067    PICOFFTSG_FFTTYPE w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3068
3069    wkr = 0;
3070    wki = 0;
3071
3072    switch (n) {
3073        case 8 :
3074            wdi=(PICOFFTSG_FFTTYPE)(0.353553414345*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.146446630359*PICODSP_WGT_SHIFT);
3075            w1r=(PICOFFTSG_FFTTYPE)(0.707106709480*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.707106828690*PICODSP_WGT_SHIFT);
3076            ss =(PICOFFTSG_FFTTYPE)(1.414213657379*PICODSP_WGT_SHIFT); break;
3077        case 16 :
3078            wdi=(PICOFFTSG_FFTTYPE)(0.191341713071*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.038060232997*PICODSP_WGT_SHIFT);
3079            w1r=(PICOFFTSG_FFTTYPE)(0.923879504204*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.382683426142*PICODSP_WGT_SHIFT);
3080            ss =(PICOFFTSG_FFTTYPE)(0.765366852283*PICODSP_WGT_SHIFT); break;
3081        case 32 :
3082            wdi=(PICOFFTSG_FFTTYPE)(0.097545161843*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.009607359767*PICODSP_WGT_SHIFT);
3083            w1r=(PICOFFTSG_FFTTYPE)(0.980785250664*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.195090323687*PICODSP_WGT_SHIFT);
3084            ss =(PICOFFTSG_FFTTYPE)(0.390180647373*PICODSP_WGT_SHIFT); break;
3085        case 64 :
3086            wdi=(PICOFFTSG_FFTTYPE)(0.049008570611*PICODSP_WGT_SHIFT); wdr=(PICOFFTSG_FFTTYPE)(0.002407636726*PICODSP_WGT_SHIFT);
3087            w1r=(PICOFFTSG_FFTTYPE)(0.995184719563*PICODSP_WGT_SHIFT); w1i=(PICOFFTSG_FFTTYPE)(0.098017141223*PICODSP_WGT_SHIFT);
3088            ss =(PICOFFTSG_FFTTYPE)(0.196034282446*PICODSP_WGT_SHIFT); break;
3089        default :
3090            wdr = 0; wdi = 0; ss = 0;
3091            break;
3092    }
3093
3094    i = n >> 1;
3095    for (;;) {
3096        i0 = i - RDFT_LOOP_DIV_4;
3097        if (i0 < 4) {
3098            i0 = 4;
3099        }
3100        for (j = i - 4; j >= i0; j -= 4) {
3101            k = n - j;
3102            xr = a[j + 2] - a[k - 2];
3103            xi = a[j + 3] + a[k - 1];
3104            yr = Mult_W_W(wdr, xr) - Mult_W_W(wdi, xi);
3105            yi = Mult_W_W(wdr, xi) + Mult_W_W(wdi, xr);
3106            a[j + 2] -= yr;
3107            a[j + 3] -= yi;
3108            a[k - 2] += yr;
3109            a[k - 1] -= yi;
3110            wkr += Mult_W_W(ss, wdi);
3111            wki += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wdr));
3112            xr = a[j] - a[k];
3113            xi = a[j + 1] + a[k + 1];
3114            yr = Mult_W_W(wkr, xr) - Mult_W_W(wki, xi);
3115            yi = Mult_W_W(wkr, xi) + Mult_W_W(wki, xr);
3116            a[j] -= yr;
3117            a[j + 1] -= yi;
3118            a[k] += yr;
3119            a[k + 1] -= yi;
3120            wdr += Mult_W_W(ss, wki);
3121            wdi += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wkr));
3122        }
3123        if (i0 == 4) {
3124            break;
3125        }
3126    }
3127
3128    xr = a[2] - a[n - 2];
3129    xi = a[3] + a[n - 1];
3130    yr = Mult_W_W(wdr, xr) - Mult_W_W(wdi, xi);
3131    yi = Mult_W_W(wdr, xi) + Mult_W_W(wdi, xr);
3132
3133    a[2] -= yr;
3134    a[3] -= yi;
3135    a[n - 2] += yr;
3136    a[n - 1] -= yi;
3137
3138
3139}
3140
3141
3142void rftbsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
3143{
3144    picoos_int32 i, i0, j, k;
3145    PICOFFTSG_FFTTYPE w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3146    wkr = 0;
3147    wki = 0;
3148    wdi=(PICOFFTSG_FFTTYPE)(0.012270614505*PICODSP_WGT_SHIFT);
3149    wdr=(PICOFFTSG_FFTTYPE)(0.000150590655*PICODSP_WGT_SHIFT);
3150    w1r=(PICOFFTSG_FFTTYPE)(0.999698817730*PICODSP_WGT_SHIFT);
3151    w1i=(PICOFFTSG_FFTTYPE)(0.024541229010*PICODSP_WGT_SHIFT);
3152    ss=(PICOFFTSG_FFTTYPE)(0.049082458019*PICODSP_WGT_SHIFT);
3153
3154    i = n >> 1;
3155    for (;;) {
3156        i0 = i - RDFT_LOOP_DIV4;
3157        if (i0 < 4) {
3158            i0 = 4;
3159        }
3160        for (j = i - 4; j >= i0; j -= 4) {
3161            k = n - j;
3162            xr = a[j + 2] - a[k - 2];
3163            xi = a[j + 3] + a[k - 1];
3164            yr = Mult_W_W(wdr, xr) + Mult_W_W(wdi, xi);
3165            yi = Mult_W_W(wdr, xi) - Mult_W_W(wdi, xr);
3166            a[j + 2] -= yr;
3167            a[j + 3] -= yi;
3168            a[k - 2] += yr;
3169            a[k - 1] -= yi;
3170            wkr += Mult_W_W(ss, wdi);
3171            wki += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wdr));
3172            xr = a[j] - a[k];
3173            xi = a[j + 1] + a[k + 1];
3174            yr = Mult_W_W(wkr, xr) + Mult_W_W(wki, xi);
3175            yi = Mult_W_W(wkr, xi) - Mult_W_W(wki, xr);
3176            a[j] -= yr;
3177            a[j + 1] -= yi;
3178            a[k] += yr;
3179            a[k + 1] -= yi;
3180            wdr += Mult_W_W(ss, wki);
3181            wdi += picofftsg_mult_w_w(ss, (PICOFFTSG_WGT_SHIFT2 - wkr));
3182        }
3183        if (i0 == 4) {
3184            break;
3185        }
3186    }
3187    xr = a[2] - a[n - 2];
3188    xi = a[3] + a[n - 1];
3189    yr = Mult_W_W(wdr, xr) + Mult_W_W(wdi, xi);
3190    yi = Mult_W_W(wdr, xi) - Mult_W_W(wdi, xr);
3191    a[2] -= yr;
3192    a[3] -= yi;
3193    a[n - 2] += yr;
3194    a[n - 1] -= yi;
3195}
3196
3197
3198void dctsub(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
3199{
3200    picoos_int32 i, i0, j, k, m;
3201    PICOFFTSG_FFTTYPE w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
3202    wkr = (PICOFFTSG_FFTTYPE)(0.5*PICODSP_WGT_SHIFT);
3203    wki = (PICOFFTSG_FFTTYPE)(0.5*PICODSP_WGT_SHIFT);
3204
3205    switch (n) {
3206        case 8 :  wdi=(PICOFFTSG_FFTTYPE)(0.587937772274*PICODSP_WGT_SHIFT);
3207            wdr=(PICOFFTSG_FFTTYPE)(0.392847478390*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.980785250664*PICODSP_WGT_SHIFT);
3208            w1i=(PICOFFTSG_FFTTYPE)(0.195090323687*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.390180647373*PICODSP_WGT_SHIFT); break;
3209        case 16 : wdi=(PICOFFTSG_FFTTYPE)(0.546600937843*PICODSP_WGT_SHIFT);
3210            wdr=(PICOFFTSG_FFTTYPE)(0.448583781719*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.995184719563*PICODSP_WGT_SHIFT);
3211            w1i=(PICOFFTSG_FFTTYPE)(0.098017141223*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.196034282446*PICODSP_WGT_SHIFT); break;
3212        case 32 : wdi=(PICOFFTSG_FFTTYPE)(0.523931562901*PICODSP_WGT_SHIFT);
3213            wdr=(PICOFFTSG_FFTTYPE)(0.474863886833*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.998795449734*PICODSP_WGT_SHIFT);
3214            w1i=(PICOFFTSG_FFTTYPE)(0.049067676067*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.098135352135*PICODSP_WGT_SHIFT); break;
3215        case 64 : wdi=(PICOFFTSG_FFTTYPE)(0.512120008469*PICODSP_WGT_SHIFT);
3216            wdr=(PICOFFTSG_FFTTYPE)(0.487578809261*PICODSP_WGT_SHIFT); w1r=(PICOFFTSG_FFTTYPE)(0.999698817730*PICODSP_WGT_SHIFT);
3217            w1i=(PICOFFTSG_FFTTYPE)(0.024541229010*PICODSP_WGT_SHIFT); ss =(PICOFFTSG_FFTTYPE)(0.049082458019*PICODSP_WGT_SHIFT); break;
3218        default:
3219            wdr = 0; wdi = 0; ss = 0; break;
3220    }
3221
3222    m = n >> 1;
3223    i = 0;
3224    for (;;) {
3225        i0 = i + DCST_LOOP_DIV2;
3226        if (i0 > m - 2) {
3227            i0 = m - 2;
3228        }
3229        for (j = i + 2; j <= i0; j += 2) {
3230            k = n - j;
3231            xr = picofftsg_mult_w_a(wdi, a[j - 1]) - picofftsg_mult_w_a(wdr, a[k + 1]);
3232            xi = picofftsg_mult_w_a(wdr, a[j - 1]) + picofftsg_mult_w_a(wdi, a[k + 1]);
3233            wkr -= Mult_W_W(ss, wdi);
3234            wki += Mult_W_W(ss, wdr);
3235            yr = Mult_W_W(wki, a[j]) - Mult_W_W(wkr, a[k]);
3236            yi = Mult_W_W(wkr, a[j]) + Mult_W_W(wki, a[k]);
3237            wdr -= Mult_W_W(ss, wki);
3238            wdi += Mult_W_W(ss, wkr);
3239            a[k + 1] = xr;
3240            a[k] = yr;
3241            a[j - 1] = xi;
3242            a[j] = yi;
3243        }
3244        if (i0 == m - 2) {
3245            break;
3246        }
3247    }
3248    xr = picofftsg_mult_w_a(wdi, a[m - 1]) - picofftsg_mult_w_a(wdr, a[m + 1]);
3249    a[m - 1] = picofftsg_mult_w_a(wdr, a[m - 1]) + picofftsg_mult_w_a(wdi, a[m + 1]);
3250    a[m + 1] = xr;
3251    a[m] = Mult_W_W(WR5000, a[m]);
3252}
3253
3254
3255void dctsub4(picoos_int32 n, PICOFFTSG_FFTTYPE *a)
3256{
3257    picoos_int32 m;
3258    PICOFFTSG_FFTTYPE wki, wdr, wdi, xr;
3259
3260    wki = WR5000;
3261    m = n >> 1;
3262    if (m == 2) {
3263        wdr = Mult_W_W(wki, WI2500);
3264        wdi = Mult_W_W(wki, WR2500);
3265        xr = Mult_W_W(wdi, a[1]) - Mult_W_W(wdr, a[3]);
3266        a[1] = Mult_W_W(wdr, a[1]) + Mult_W_W(wdi, a[3]);
3267        a[3] = xr;
3268    }
3269    a[m] = Mult_W_W(wki, a[m]);
3270}
3271
3272#ifdef __cplusplus
3273}
3274#endif
3275