1/*
2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
3 * Copyright Takuya OOURA, 1996-2001
4 *
5 * You may use, copy, modify and distribute this code for any purpose (include
6 * commercial use) and without fee. Please refer to this package when you modify
7 * this code.
8 *
9 * Changes:
10 * Trivial type modifications by the WebRTC authors.
11 */
12
13/*
14Fast Fourier/Cosine/Sine Transform
15    dimension   :one
16    data length :power of 2
17    decimation  :frequency
18    radix       :4, 2
19    data        :inplace
20    table       :use
21functions
22    cdft: Complex Discrete Fourier Transform
23    rdft: Real Discrete Fourier Transform
24    ddct: Discrete Cosine Transform
25    ddst: Discrete Sine Transform
26    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
27    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
28function prototypes
29    void cdft(int, int, float *, int *, float *);
30    void rdft(int, int, float *, int *, float *);
31    void ddct(int, int, float *, int *, float *);
32    void ddst(int, int, float *, int *, float *);
33    void dfct(int, float *, float *, int *, float *);
34    void dfst(int, float *, float *, int *, float *);
35
36
37-------- Complex DFT (Discrete Fourier Transform) --------
38    [definition]
39        <case1>
40            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
41        <case2>
42            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
43        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
44    [usage]
45        <case1>
46            ip[0] = 0; // first time only
47            cdft(2*n, 1, a, ip, w);
48        <case2>
49            ip[0] = 0; // first time only
50            cdft(2*n, -1, a, ip, w);
51    [parameters]
52        2*n            :data length (int)
53                        n >= 1, n = power of 2
54        a[0...2*n-1]   :input/output data (float *)
55                        input data
56                            a[2*j] = Re(x[j]),
57                            a[2*j+1] = Im(x[j]), 0<=j<n
58                        output data
59                            a[2*k] = Re(X[k]),
60                            a[2*k+1] = Im(X[k]), 0<=k<n
61        ip[0...*]      :work area for bit reversal (int *)
62                        length of ip >= 2+sqrt(n)
63                        strictly,
64                        length of ip >=
65                            2+(1<<(int)(log(n+0.5)/log(2))/2).
66                        ip[0],ip[1] are pointers of the cos/sin table.
67        w[0...n/2-1]   :cos/sin table (float *)
68                        w[],ip[] are initialized if ip[0] == 0.
69    [remark]
70        Inverse of
71            cdft(2*n, -1, a, ip, w);
72        is
73            cdft(2*n, 1, a, ip, w);
74            for (j = 0; j <= 2 * n - 1; j++) {
75                a[j] *= 1.0 / n;
76            }
77        .
78
79
80-------- Real DFT / Inverse of Real DFT --------
81    [definition]
82        <case1> RDFT
83            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
84            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
85        <case2> IRDFT (excluding scale)
86            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
87                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
88                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
89    [usage]
90        <case1>
91            ip[0] = 0; // first time only
92            rdft(n, 1, a, ip, w);
93        <case2>
94            ip[0] = 0; // first time only
95            rdft(n, -1, a, ip, w);
96    [parameters]
97        n              :data length (int)
98                        n >= 2, n = power of 2
99        a[0...n-1]     :input/output data (float *)
100                        <case1>
101                            output data
102                                a[2*k] = R[k], 0<=k<n/2
103                                a[2*k+1] = I[k], 0<k<n/2
104                                a[1] = R[n/2]
105                        <case2>
106                            input data
107                                a[2*j] = R[j], 0<=j<n/2
108                                a[2*j+1] = I[j], 0<j<n/2
109                                a[1] = R[n/2]
110        ip[0...*]      :work area for bit reversal (int *)
111                        length of ip >= 2+sqrt(n/2)
112                        strictly,
113                        length of ip >=
114                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
115                        ip[0],ip[1] are pointers of the cos/sin table.
116        w[0...n/2-1]   :cos/sin table (float *)
117                        w[],ip[] are initialized if ip[0] == 0.
118    [remark]
119        Inverse of
120            rdft(n, 1, a, ip, w);
121        is
122            rdft(n, -1, a, ip, w);
123            for (j = 0; j <= n - 1; j++) {
124                a[j] *= 2.0 / n;
125            }
126        .
127
128
129-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
130    [definition]
131        <case1> IDCT (excluding scale)
132            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
133        <case2> DCT
134            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
135    [usage]
136        <case1>
137            ip[0] = 0; // first time only
138            ddct(n, 1, a, ip, w);
139        <case2>
140            ip[0] = 0; // first time only
141            ddct(n, -1, a, ip, w);
142    [parameters]
143        n              :data length (int)
144                        n >= 2, n = power of 2
145        a[0...n-1]     :input/output data (float *)
146                        output data
147                            a[k] = C[k], 0<=k<n
148        ip[0...*]      :work area for bit reversal (int *)
149                        length of ip >= 2+sqrt(n/2)
150                        strictly,
151                        length of ip >=
152                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
153                        ip[0],ip[1] are pointers of the cos/sin table.
154        w[0...n*5/4-1] :cos/sin table (float *)
155                        w[],ip[] are initialized if ip[0] == 0.
156    [remark]
157        Inverse of
158            ddct(n, -1, a, ip, w);
159        is
160            a[0] *= 0.5;
161            ddct(n, 1, a, ip, w);
162            for (j = 0; j <= n - 1; j++) {
163                a[j] *= 2.0 / n;
164            }
165        .
166
167
168-------- DST (Discrete Sine Transform) / Inverse of DST --------
169    [definition]
170        <case1> IDST (excluding scale)
171            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
172        <case2> DST
173            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
174    [usage]
175        <case1>
176            ip[0] = 0; // first time only
177            ddst(n, 1, a, ip, w);
178        <case2>
179            ip[0] = 0; // first time only
180            ddst(n, -1, a, ip, w);
181    [parameters]
182        n              :data length (int)
183                        n >= 2, n = power of 2
184        a[0...n-1]     :input/output data (float *)
185                        <case1>
186                            input data
187                                a[j] = A[j], 0<j<n
188                                a[0] = A[n]
189                            output data
190                                a[k] = S[k], 0<=k<n
191                        <case2>
192                            output data
193                                a[k] = S[k], 0<k<n
194                                a[0] = S[n]
195        ip[0...*]      :work area for bit reversal (int *)
196                        length of ip >= 2+sqrt(n/2)
197                        strictly,
198                        length of ip >=
199                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
200                        ip[0],ip[1] are pointers of the cos/sin table.
201        w[0...n*5/4-1] :cos/sin table (float *)
202                        w[],ip[] are initialized if ip[0] == 0.
203    [remark]
204        Inverse of
205            ddst(n, -1, a, ip, w);
206        is
207            a[0] *= 0.5;
208            ddst(n, 1, a, ip, w);
209            for (j = 0; j <= n - 1; j++) {
210                a[j] *= 2.0 / n;
211            }
212        .
213
214
215-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
216    [definition]
217        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
218    [usage]
219        ip[0] = 0; // first time only
220        dfct(n, a, t, ip, w);
221    [parameters]
222        n              :data length - 1 (int)
223                        n >= 2, n = power of 2
224        a[0...n]       :input/output data (float *)
225                        output data
226                            a[k] = C[k], 0<=k<=n
227        t[0...n/2]     :work area (float *)
228        ip[0...*]      :work area for bit reversal (int *)
229                        length of ip >= 2+sqrt(n/4)
230                        strictly,
231                        length of ip >=
232                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
233                        ip[0],ip[1] are pointers of the cos/sin table.
234        w[0...n*5/8-1] :cos/sin table (float *)
235                        w[],ip[] are initialized if ip[0] == 0.
236    [remark]
237        Inverse of
238            a[0] *= 0.5;
239            a[n] *= 0.5;
240            dfct(n, a, t, ip, w);
241        is
242            a[0] *= 0.5;
243            a[n] *= 0.5;
244            dfct(n, a, t, ip, w);
245            for (j = 0; j <= n; j++) {
246                a[j] *= 2.0 / n;
247            }
248        .
249
250
251-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
252    [definition]
253        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
254    [usage]
255        ip[0] = 0; // first time only
256        dfst(n, a, t, ip, w);
257    [parameters]
258        n              :data length + 1 (int)
259                        n >= 2, n = power of 2
260        a[0...n-1]     :input/output data (float *)
261                        output data
262                            a[k] = S[k], 0<k<n
263                        (a[0] is used for work area)
264        t[0...n/2-1]   :work area (float *)
265        ip[0...*]      :work area for bit reversal (int *)
266                        length of ip >= 2+sqrt(n/4)
267                        strictly,
268                        length of ip >=
269                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
270                        ip[0],ip[1] are pointers of the cos/sin table.
271        w[0...n*5/8-1] :cos/sin table (float *)
272                        w[],ip[] are initialized if ip[0] == 0.
273    [remark]
274        Inverse of
275            dfst(n, a, t, ip, w);
276        is
277            dfst(n, a, t, ip, w);
278            for (j = 1; j <= n - 1; j++) {
279                a[j] *= 2.0 / n;
280            }
281        .
282
283
284Appendix :
285    The cos/sin table is recalculated when the larger table required.
286    w[] and ip[] are compatible with all routines.
287*/
288
289static void makewt(int nw, int *ip, float *w);
290static void makect(int nc, int *ip, float *c);
291static void bitrv2(int n, int *ip, float *a);
292static void bitrv2conj(int n, int *ip, float *a);
293static void cftfsub(int n, float *a, float *w);
294static void cftbsub(int n, float *a, float *w);
295static void cft1st(int n, float *a, float *w);
296static void cftmdl(int n, int l, float *a, float *w);
297static void rftfsub(int n, float *a, int nc, float *c);
298static void rftbsub(int n, float *a, int nc, float *c);
299#if 0  // Not used.
300static void dctsub(int n, float *a, int nc, float *c)
301static void dstsub(int n, float *a, int nc, float *c)
302#endif
303
304
305void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w)
306{
307    if (n > (ip[0] << 2)) {
308        makewt(n >> 2, ip, w);
309    }
310    if (n > 4) {
311        if (isgn >= 0) {
312            bitrv2(n, ip + 2, a);
313            cftfsub(n, a, w);
314        } else {
315            bitrv2conj(n, ip + 2, a);
316            cftbsub(n, a, w);
317        }
318    } else if (n == 4) {
319        cftfsub(n, a, w);
320    }
321}
322
323
324void WebRtc_rdft(int n, int isgn, float *a, int *ip, float *w)
325{
326    int nw, nc;
327    float xi;
328
329    nw = ip[0];
330    if (n > (nw << 2)) {
331        nw = n >> 2;
332        makewt(nw, ip, w);
333    }
334    nc = ip[1];
335    if (n > (nc << 2)) {
336        nc = n >> 2;
337        makect(nc, ip, w + nw);
338    }
339    if (isgn >= 0) {
340        if (n > 4) {
341            bitrv2(n, ip + 2, a);
342            cftfsub(n, a, w);
343            rftfsub(n, a, nc, w + nw);
344        } else if (n == 4) {
345            cftfsub(n, a, w);
346        }
347        xi = a[0] - a[1];
348        a[0] += a[1];
349        a[1] = xi;
350    } else {
351        a[1] = 0.5f * (a[0] - a[1]);
352        a[0] -= a[1];
353        if (n > 4) {
354            rftbsub(n, a, nc, w + nw);
355            bitrv2(n, ip + 2, a);
356            cftbsub(n, a, w);
357        } else if (n == 4) {
358            cftfsub(n, a, w);
359        }
360    }
361}
362
363#if 0  // Not used.
364static void ddct(int n, int isgn, float *a, int *ip, float *w)
365{
366    int j, nw, nc;
367    float xr;
368
369    nw = ip[0];
370    if (n > (nw << 2)) {
371        nw = n >> 2;
372        makewt(nw, ip, w);
373    }
374    nc = ip[1];
375    if (n > nc) {
376        nc = n;
377        makect(nc, ip, w + nw);
378    }
379    if (isgn < 0) {
380        xr = a[n - 1];
381        for (j = n - 2; j >= 2; j -= 2) {
382            a[j + 1] = a[j] - a[j - 1];
383            a[j] += a[j - 1];
384        }
385        a[1] = a[0] - xr;
386        a[0] += xr;
387        if (n > 4) {
388            rftbsub(n, a, nc, w + nw);
389            bitrv2(n, ip + 2, a);
390            cftbsub(n, a, w);
391        } else if (n == 4) {
392            cftfsub(n, a, w);
393        }
394    }
395    dctsub(n, a, nc, w + nw);
396    if (isgn >= 0) {
397        if (n > 4) {
398            bitrv2(n, ip + 2, a);
399            cftfsub(n, a, w);
400            rftfsub(n, a, nc, w + nw);
401        } else if (n == 4) {
402            cftfsub(n, a, w);
403        }
404        xr = a[0] - a[1];
405        a[0] += a[1];
406        for (j = 2; j < n; j += 2) {
407            a[j - 1] = a[j] - a[j + 1];
408            a[j] += a[j + 1];
409        }
410        a[n - 1] = xr;
411    }
412}
413
414
415static void ddst(int n, int isgn, float *a, int *ip, float *w)
416{
417    int j, nw, nc;
418    float xr;
419
420    nw = ip[0];
421    if (n > (nw << 2)) {
422        nw = n >> 2;
423        makewt(nw, ip, w);
424    }
425    nc = ip[1];
426    if (n > nc) {
427        nc = n;
428        makect(nc, ip, w + nw);
429    }
430    if (isgn < 0) {
431        xr = a[n - 1];
432        for (j = n - 2; j >= 2; j -= 2) {
433            a[j + 1] = -a[j] - a[j - 1];
434            a[j] -= a[j - 1];
435        }
436        a[1] = a[0] + xr;
437        a[0] -= xr;
438        if (n > 4) {
439            rftbsub(n, a, nc, w + nw);
440            bitrv2(n, ip + 2, a);
441            cftbsub(n, a, w);
442        } else if (n == 4) {
443            cftfsub(n, a, w);
444        }
445    }
446    dstsub(n, a, nc, w + nw);
447    if (isgn >= 0) {
448        if (n > 4) {
449            bitrv2(n, ip + 2, a);
450            cftfsub(n, a, w);
451            rftfsub(n, a, nc, w + nw);
452        } else if (n == 4) {
453            cftfsub(n, a, w);
454        }
455        xr = a[0] - a[1];
456        a[0] += a[1];
457        for (j = 2; j < n; j += 2) {
458            a[j - 1] = -a[j] - a[j + 1];
459            a[j] -= a[j + 1];
460        }
461        a[n - 1] = -xr;
462    }
463}
464
465
466static void dfct(int n, float *a, float *t, int *ip, float *w)
467{
468    int j, k, l, m, mh, nw, nc;
469    float xr, xi, yr, yi;
470
471    nw = ip[0];
472    if (n > (nw << 3)) {
473        nw = n >> 3;
474        makewt(nw, ip, w);
475    }
476    nc = ip[1];
477    if (n > (nc << 1)) {
478        nc = n >> 1;
479        makect(nc, ip, w + nw);
480    }
481    m = n >> 1;
482    yi = a[m];
483    xi = a[0] + a[n];
484    a[0] -= a[n];
485    t[0] = xi - yi;
486    t[m] = xi + yi;
487    if (n > 2) {
488        mh = m >> 1;
489        for (j = 1; j < mh; j++) {
490            k = m - j;
491            xr = a[j] - a[n - j];
492            xi = a[j] + a[n - j];
493            yr = a[k] - a[n - k];
494            yi = a[k] + a[n - k];
495            a[j] = xr;
496            a[k] = yr;
497            t[j] = xi - yi;
498            t[k] = xi + yi;
499        }
500        t[mh] = a[mh] + a[n - mh];
501        a[mh] -= a[n - mh];
502        dctsub(m, a, nc, w + nw);
503        if (m > 4) {
504            bitrv2(m, ip + 2, a);
505            cftfsub(m, a, w);
506            rftfsub(m, a, nc, w + nw);
507        } else if (m == 4) {
508            cftfsub(m, a, w);
509        }
510        a[n - 1] = a[0] - a[1];
511        a[1] = a[0] + a[1];
512        for (j = m - 2; j >= 2; j -= 2) {
513            a[2 * j + 1] = a[j] + a[j + 1];
514            a[2 * j - 1] = a[j] - a[j + 1];
515        }
516        l = 2;
517        m = mh;
518        while (m >= 2) {
519            dctsub(m, t, nc, w + nw);
520            if (m > 4) {
521                bitrv2(m, ip + 2, t);
522                cftfsub(m, t, w);
523                rftfsub(m, t, nc, w + nw);
524            } else if (m == 4) {
525                cftfsub(m, t, w);
526            }
527            a[n - l] = t[0] - t[1];
528            a[l] = t[0] + t[1];
529            k = 0;
530            for (j = 2; j < m; j += 2) {
531                k += l << 2;
532                a[k - l] = t[j] - t[j + 1];
533                a[k + l] = t[j] + t[j + 1];
534            }
535            l <<= 1;
536            mh = m >> 1;
537            for (j = 0; j < mh; j++) {
538                k = m - j;
539                t[j] = t[m + k] - t[m + j];
540                t[k] = t[m + k] + t[m + j];
541            }
542            t[mh] = t[m + mh];
543            m = mh;
544        }
545        a[l] = t[0];
546        a[n] = t[2] - t[1];
547        a[0] = t[2] + t[1];
548    } else {
549        a[1] = a[0];
550        a[2] = t[0];
551        a[0] = t[1];
552    }
553}
554
555static void dfst(int n, float *a, float *t, int *ip, float *w)
556{
557    int j, k, l, m, mh, nw, nc;
558    float xr, xi, yr, yi;
559
560    nw = ip[0];
561    if (n > (nw << 3)) {
562        nw = n >> 3;
563        makewt(nw, ip, w);
564    }
565    nc = ip[1];
566    if (n > (nc << 1)) {
567        nc = n >> 1;
568        makect(nc, ip, w + nw);
569    }
570    if (n > 2) {
571        m = n >> 1;
572        mh = m >> 1;
573        for (j = 1; j < mh; j++) {
574            k = m - j;
575            xr = a[j] + a[n - j];
576            xi = a[j] - a[n - j];
577            yr = a[k] + a[n - k];
578            yi = a[k] - a[n - k];
579            a[j] = xr;
580            a[k] = yr;
581            t[j] = xi + yi;
582            t[k] = xi - yi;
583        }
584        t[0] = a[mh] - a[n - mh];
585        a[mh] += a[n - mh];
586        a[0] = a[m];
587        dstsub(m, a, nc, w + nw);
588        if (m > 4) {
589            bitrv2(m, ip + 2, a);
590            cftfsub(m, a, w);
591            rftfsub(m, a, nc, w + nw);
592        } else if (m == 4) {
593            cftfsub(m, a, w);
594        }
595        a[n - 1] = a[1] - a[0];
596        a[1] = a[0] + a[1];
597        for (j = m - 2; j >= 2; j -= 2) {
598            a[2 * j + 1] = a[j] - a[j + 1];
599            a[2 * j - 1] = -a[j] - a[j + 1];
600        }
601        l = 2;
602        m = mh;
603        while (m >= 2) {
604            dstsub(m, t, nc, w + nw);
605            if (m > 4) {
606                bitrv2(m, ip + 2, t);
607                cftfsub(m, t, w);
608                rftfsub(m, t, nc, w + nw);
609            } else if (m == 4) {
610                cftfsub(m, t, w);
611            }
612            a[n - l] = t[1] - t[0];
613            a[l] = t[0] + t[1];
614            k = 0;
615            for (j = 2; j < m; j += 2) {
616                k += l << 2;
617                a[k - l] = -t[j] - t[j + 1];
618                a[k + l] = t[j] - t[j + 1];
619            }
620            l <<= 1;
621            mh = m >> 1;
622            for (j = 1; j < mh; j++) {
623                k = m - j;
624                t[j] = t[m + k] + t[m + j];
625                t[k] = t[m + k] - t[m + j];
626            }
627            t[0] = t[m + mh];
628            m = mh;
629        }
630        a[l] = t[0];
631    }
632    a[0] = 0;
633}
634#endif  // Not used.
635
636
637/* -------- initializing routines -------- */
638
639
640#include <math.h>
641
642static void makewt(int nw, int *ip, float *w)
643{
644    int j, nwh;
645    float delta, x, y;
646
647    ip[0] = nw;
648    ip[1] = 1;
649    if (nw > 2) {
650        nwh = nw >> 1;
651        delta = (float)atan(1.0f) / nwh;
652        w[0] = 1;
653        w[1] = 0;
654        w[nwh] = (float)cos(delta * nwh);
655        w[nwh + 1] = w[nwh];
656        if (nwh > 2) {
657            for (j = 2; j < nwh; j += 2) {
658                x = (float)cos(delta * j);
659                y = (float)sin(delta * j);
660                w[j] = x;
661                w[j + 1] = y;
662                w[nw - j] = y;
663                w[nw - j + 1] = x;
664            }
665            bitrv2(nw, ip + 2, w);
666        }
667    }
668}
669
670
671static void makect(int nc, int *ip, float *c)
672{
673    int j, nch;
674    float delta;
675
676    ip[1] = nc;
677    if (nc > 1) {
678        nch = nc >> 1;
679        delta = (float)atan(1.0f) / nch;
680        c[0] = (float)cos(delta * nch);
681        c[nch] = 0.5f * c[0];
682        for (j = 1; j < nch; j++) {
683            c[j] = 0.5f * (float)cos(delta * j);
684            c[nc - j] = 0.5f * (float)sin(delta * j);
685        }
686    }
687}
688
689
690/* -------- child routines -------- */
691
692
693static void bitrv2(int n, int *ip, float *a)
694{
695    int j, j1, k, k1, l, m, m2;
696    float xr, xi, yr, yi;
697
698    ip[0] = 0;
699    l = n;
700    m = 1;
701    while ((m << 3) < l) {
702        l >>= 1;
703        for (j = 0; j < m; j++) {
704            ip[m + j] = ip[j] + l;
705        }
706        m <<= 1;
707    }
708    m2 = 2 * m;
709    if ((m << 3) == l) {
710        for (k = 0; k < m; k++) {
711            for (j = 0; j < k; j++) {
712                j1 = 2 * j + ip[k];
713                k1 = 2 * k + ip[j];
714                xr = a[j1];
715                xi = a[j1 + 1];
716                yr = a[k1];
717                yi = a[k1 + 1];
718                a[j1] = yr;
719                a[j1 + 1] = yi;
720                a[k1] = xr;
721                a[k1 + 1] = xi;
722                j1 += m2;
723                k1 += 2 * m2;
724                xr = a[j1];
725                xi = a[j1 + 1];
726                yr = a[k1];
727                yi = a[k1 + 1];
728                a[j1] = yr;
729                a[j1 + 1] = yi;
730                a[k1] = xr;
731                a[k1 + 1] = xi;
732                j1 += m2;
733                k1 -= m2;
734                xr = a[j1];
735                xi = a[j1 + 1];
736                yr = a[k1];
737                yi = a[k1 + 1];
738                a[j1] = yr;
739                a[j1 + 1] = yi;
740                a[k1] = xr;
741                a[k1 + 1] = xi;
742                j1 += m2;
743                k1 += 2 * m2;
744                xr = a[j1];
745                xi = a[j1 + 1];
746                yr = a[k1];
747                yi = a[k1 + 1];
748                a[j1] = yr;
749                a[j1 + 1] = yi;
750                a[k1] = xr;
751                a[k1 + 1] = xi;
752            }
753            j1 = 2 * k + m2 + ip[k];
754            k1 = j1 + m2;
755            xr = a[j1];
756            xi = a[j1 + 1];
757            yr = a[k1];
758            yi = a[k1 + 1];
759            a[j1] = yr;
760            a[j1 + 1] = yi;
761            a[k1] = xr;
762            a[k1 + 1] = xi;
763        }
764    } else {
765        for (k = 1; k < m; k++) {
766            for (j = 0; j < k; j++) {
767                j1 = 2 * j + ip[k];
768                k1 = 2 * k + ip[j];
769                xr = a[j1];
770                xi = a[j1 + 1];
771                yr = a[k1];
772                yi = a[k1 + 1];
773                a[j1] = yr;
774                a[j1 + 1] = yi;
775                a[k1] = xr;
776                a[k1 + 1] = xi;
777                j1 += m2;
778                k1 += m2;
779                xr = a[j1];
780                xi = a[j1 + 1];
781                yr = a[k1];
782                yi = a[k1 + 1];
783                a[j1] = yr;
784                a[j1 + 1] = yi;
785                a[k1] = xr;
786                a[k1 + 1] = xi;
787            }
788        }
789    }
790}
791
792
793static void bitrv2conj(int n, int *ip, float *a)
794{
795    int j, j1, k, k1, l, m, m2;
796    float xr, xi, yr, yi;
797
798    ip[0] = 0;
799    l = n;
800    m = 1;
801    while ((m << 3) < l) {
802        l >>= 1;
803        for (j = 0; j < m; j++) {
804            ip[m + j] = ip[j] + l;
805        }
806        m <<= 1;
807    }
808    m2 = 2 * m;
809    if ((m << 3) == l) {
810        for (k = 0; k < m; k++) {
811            for (j = 0; j < k; j++) {
812                j1 = 2 * j + ip[k];
813                k1 = 2 * k + ip[j];
814                xr = a[j1];
815                xi = -a[j1 + 1];
816                yr = a[k1];
817                yi = -a[k1 + 1];
818                a[j1] = yr;
819                a[j1 + 1] = yi;
820                a[k1] = xr;
821                a[k1 + 1] = xi;
822                j1 += m2;
823                k1 += 2 * m2;
824                xr = a[j1];
825                xi = -a[j1 + 1];
826                yr = a[k1];
827                yi = -a[k1 + 1];
828                a[j1] = yr;
829                a[j1 + 1] = yi;
830                a[k1] = xr;
831                a[k1 + 1] = xi;
832                j1 += m2;
833                k1 -= m2;
834                xr = a[j1];
835                xi = -a[j1 + 1];
836                yr = a[k1];
837                yi = -a[k1 + 1];
838                a[j1] = yr;
839                a[j1 + 1] = yi;
840                a[k1] = xr;
841                a[k1 + 1] = xi;
842                j1 += m2;
843                k1 += 2 * m2;
844                xr = a[j1];
845                xi = -a[j1 + 1];
846                yr = a[k1];
847                yi = -a[k1 + 1];
848                a[j1] = yr;
849                a[j1 + 1] = yi;
850                a[k1] = xr;
851                a[k1 + 1] = xi;
852            }
853            k1 = 2 * k + ip[k];
854            a[k1 + 1] = -a[k1 + 1];
855            j1 = k1 + m2;
856            k1 = j1 + m2;
857            xr = a[j1];
858            xi = -a[j1 + 1];
859            yr = a[k1];
860            yi = -a[k1 + 1];
861            a[j1] = yr;
862            a[j1 + 1] = yi;
863            a[k1] = xr;
864            a[k1 + 1] = xi;
865            k1 += m2;
866            a[k1 + 1] = -a[k1 + 1];
867        }
868    } else {
869        a[1] = -a[1];
870        a[m2 + 1] = -a[m2 + 1];
871        for (k = 1; k < m; k++) {
872            for (j = 0; j < k; j++) {
873                j1 = 2 * j + ip[k];
874                k1 = 2 * k + ip[j];
875                xr = a[j1];
876                xi = -a[j1 + 1];
877                yr = a[k1];
878                yi = -a[k1 + 1];
879                a[j1] = yr;
880                a[j1 + 1] = yi;
881                a[k1] = xr;
882                a[k1 + 1] = xi;
883                j1 += m2;
884                k1 += m2;
885                xr = a[j1];
886                xi = -a[j1 + 1];
887                yr = a[k1];
888                yi = -a[k1 + 1];
889                a[j1] = yr;
890                a[j1 + 1] = yi;
891                a[k1] = xr;
892                a[k1 + 1] = xi;
893            }
894            k1 = 2 * k + ip[k];
895            a[k1 + 1] = -a[k1 + 1];
896            a[k1 + m2 + 1] = -a[k1 + m2 + 1];
897        }
898    }
899}
900
901
902static void cftfsub(int n, float *a, float *w)
903{
904    int j, j1, j2, j3, l;
905    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
906
907    l = 2;
908    if (n > 8) {
909        cft1st(n, a, w);
910        l = 8;
911        while ((l << 2) < n) {
912            cftmdl(n, l, a, w);
913            l <<= 2;
914        }
915    }
916    if ((l << 2) == n) {
917        for (j = 0; j < l; j += 2) {
918            j1 = j + l;
919            j2 = j1 + l;
920            j3 = j2 + l;
921            x0r = a[j] + a[j1];
922            x0i = a[j + 1] + a[j1 + 1];
923            x1r = a[j] - a[j1];
924            x1i = a[j + 1] - a[j1 + 1];
925            x2r = a[j2] + a[j3];
926            x2i = a[j2 + 1] + a[j3 + 1];
927            x3r = a[j2] - a[j3];
928            x3i = a[j2 + 1] - a[j3 + 1];
929            a[j] = x0r + x2r;
930            a[j + 1] = x0i + x2i;
931            a[j2] = x0r - x2r;
932            a[j2 + 1] = x0i - x2i;
933            a[j1] = x1r - x3i;
934            a[j1 + 1] = x1i + x3r;
935            a[j3] = x1r + x3i;
936            a[j3 + 1] = x1i - x3r;
937        }
938    } else {
939        for (j = 0; j < l; j += 2) {
940            j1 = j + l;
941            x0r = a[j] - a[j1];
942            x0i = a[j + 1] - a[j1 + 1];
943            a[j] += a[j1];
944            a[j + 1] += a[j1 + 1];
945            a[j1] = x0r;
946            a[j1 + 1] = x0i;
947        }
948    }
949}
950
951
952static void cftbsub(int n, float *a, float *w)
953{
954    int j, j1, j2, j3, l;
955    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
956
957    l = 2;
958    if (n > 8) {
959        cft1st(n, a, w);
960        l = 8;
961        while ((l << 2) < n) {
962            cftmdl(n, l, a, w);
963            l <<= 2;
964        }
965    }
966    if ((l << 2) == n) {
967        for (j = 0; j < l; j += 2) {
968            j1 = j + l;
969            j2 = j1 + l;
970            j3 = j2 + l;
971            x0r = a[j] + a[j1];
972            x0i = -a[j + 1] - a[j1 + 1];
973            x1r = a[j] - a[j1];
974            x1i = -a[j + 1] + a[j1 + 1];
975            x2r = a[j2] + a[j3];
976            x2i = a[j2 + 1] + a[j3 + 1];
977            x3r = a[j2] - a[j3];
978            x3i = a[j2 + 1] - a[j3 + 1];
979            a[j] = x0r + x2r;
980            a[j + 1] = x0i - x2i;
981            a[j2] = x0r - x2r;
982            a[j2 + 1] = x0i + x2i;
983            a[j1] = x1r - x3i;
984            a[j1 + 1] = x1i - x3r;
985            a[j3] = x1r + x3i;
986            a[j3 + 1] = x1i + x3r;
987        }
988    } else {
989        for (j = 0; j < l; j += 2) {
990            j1 = j + l;
991            x0r = a[j] - a[j1];
992            x0i = -a[j + 1] + a[j1 + 1];
993            a[j] += a[j1];
994            a[j + 1] = -a[j + 1] - a[j1 + 1];
995            a[j1] = x0r;
996            a[j1 + 1] = x0i;
997        }
998    }
999}
1000
1001
1002static void cft1st(int n, float *a, float *w)
1003{
1004    int j, k1, k2;
1005    float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1006    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1007
1008    x0r = a[0] + a[2];
1009    x0i = a[1] + a[3];
1010    x1r = a[0] - a[2];
1011    x1i = a[1] - a[3];
1012    x2r = a[4] + a[6];
1013    x2i = a[5] + a[7];
1014    x3r = a[4] - a[6];
1015    x3i = a[5] - a[7];
1016    a[0] = x0r + x2r;
1017    a[1] = x0i + x2i;
1018    a[4] = x0r - x2r;
1019    a[5] = x0i - x2i;
1020    a[2] = x1r - x3i;
1021    a[3] = x1i + x3r;
1022    a[6] = x1r + x3i;
1023    a[7] = x1i - x3r;
1024    wk1r = w[2];
1025    x0r = a[8] + a[10];
1026    x0i = a[9] + a[11];
1027    x1r = a[8] - a[10];
1028    x1i = a[9] - a[11];
1029    x2r = a[12] + a[14];
1030    x2i = a[13] + a[15];
1031    x3r = a[12] - a[14];
1032    x3i = a[13] - a[15];
1033    a[8] = x0r + x2r;
1034    a[9] = x0i + x2i;
1035    a[12] = x2i - x0i;
1036    a[13] = x0r - x2r;
1037    x0r = x1r - x3i;
1038    x0i = x1i + x3r;
1039    a[10] = wk1r * (x0r - x0i);
1040    a[11] = wk1r * (x0r + x0i);
1041    x0r = x3i + x1r;
1042    x0i = x3r - x1i;
1043    a[14] = wk1r * (x0i - x0r);
1044    a[15] = wk1r * (x0i + x0r);
1045    k1 = 0;
1046    for (j = 16; j < n; j += 16) {
1047        k1 += 2;
1048        k2 = 2 * k1;
1049        wk2r = w[k1];
1050        wk2i = w[k1 + 1];
1051        wk1r = w[k2];
1052        wk1i = w[k2 + 1];
1053        wk3r = wk1r - 2 * wk2i * wk1i;
1054        wk3i = 2 * wk2i * wk1r - wk1i;
1055        x0r = a[j] + a[j + 2];
1056        x0i = a[j + 1] + a[j + 3];
1057        x1r = a[j] - a[j + 2];
1058        x1i = a[j + 1] - a[j + 3];
1059        x2r = a[j + 4] + a[j + 6];
1060        x2i = a[j + 5] + a[j + 7];
1061        x3r = a[j + 4] - a[j + 6];
1062        x3i = a[j + 5] - a[j + 7];
1063        a[j] = x0r + x2r;
1064        a[j + 1] = x0i + x2i;
1065        x0r -= x2r;
1066        x0i -= x2i;
1067        a[j + 4] = wk2r * x0r - wk2i * x0i;
1068        a[j + 5] = wk2r * x0i + wk2i * x0r;
1069        x0r = x1r - x3i;
1070        x0i = x1i + x3r;
1071        a[j + 2] = wk1r * x0r - wk1i * x0i;
1072        a[j + 3] = wk1r * x0i + wk1i * x0r;
1073        x0r = x1r + x3i;
1074        x0i = x1i - x3r;
1075        a[j + 6] = wk3r * x0r - wk3i * x0i;
1076        a[j + 7] = wk3r * x0i + wk3i * x0r;
1077        wk1r = w[k2 + 2];
1078        wk1i = w[k2 + 3];
1079        wk3r = wk1r - 2 * wk2r * wk1i;
1080        wk3i = 2 * wk2r * wk1r - wk1i;
1081        x0r = a[j + 8] + a[j + 10];
1082        x0i = a[j + 9] + a[j + 11];
1083        x1r = a[j + 8] - a[j + 10];
1084        x1i = a[j + 9] - a[j + 11];
1085        x2r = a[j + 12] + a[j + 14];
1086        x2i = a[j + 13] + a[j + 15];
1087        x3r = a[j + 12] - a[j + 14];
1088        x3i = a[j + 13] - a[j + 15];
1089        a[j + 8] = x0r + x2r;
1090        a[j + 9] = x0i + x2i;
1091        x0r -= x2r;
1092        x0i -= x2i;
1093        a[j + 12] = -wk2i * x0r - wk2r * x0i;
1094        a[j + 13] = -wk2i * x0i + wk2r * x0r;
1095        x0r = x1r - x3i;
1096        x0i = x1i + x3r;
1097        a[j + 10] = wk1r * x0r - wk1i * x0i;
1098        a[j + 11] = wk1r * x0i + wk1i * x0r;
1099        x0r = x1r + x3i;
1100        x0i = x1i - x3r;
1101        a[j + 14] = wk3r * x0r - wk3i * x0i;
1102        a[j + 15] = wk3r * x0i + wk3i * x0r;
1103    }
1104}
1105
1106
1107static void cftmdl(int n, int l, float *a, float *w)
1108{
1109    int j, j1, j2, j3, k, k1, k2, m, m2;
1110    float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1111    float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1112
1113    m = l << 2;
1114    for (j = 0; j < l; j += 2) {
1115        j1 = j + l;
1116        j2 = j1 + l;
1117        j3 = j2 + l;
1118        x0r = a[j] + a[j1];
1119        x0i = a[j + 1] + a[j1 + 1];
1120        x1r = a[j] - a[j1];
1121        x1i = a[j + 1] - a[j1 + 1];
1122        x2r = a[j2] + a[j3];
1123        x2i = a[j2 + 1] + a[j3 + 1];
1124        x3r = a[j2] - a[j3];
1125        x3i = a[j2 + 1] - a[j3 + 1];
1126        a[j] = x0r + x2r;
1127        a[j + 1] = x0i + x2i;
1128        a[j2] = x0r - x2r;
1129        a[j2 + 1] = x0i - x2i;
1130        a[j1] = x1r - x3i;
1131        a[j1 + 1] = x1i + x3r;
1132        a[j3] = x1r + x3i;
1133        a[j3 + 1] = x1i - x3r;
1134    }
1135    wk1r = w[2];
1136    for (j = m; j < l + m; j += 2) {
1137        j1 = j + l;
1138        j2 = j1 + l;
1139        j3 = j2 + l;
1140        x0r = a[j] + a[j1];
1141        x0i = a[j + 1] + a[j1 + 1];
1142        x1r = a[j] - a[j1];
1143        x1i = a[j + 1] - a[j1 + 1];
1144        x2r = a[j2] + a[j3];
1145        x2i = a[j2 + 1] + a[j3 + 1];
1146        x3r = a[j2] - a[j3];
1147        x3i = a[j2 + 1] - a[j3 + 1];
1148        a[j] = x0r + x2r;
1149        a[j + 1] = x0i + x2i;
1150        a[j2] = x2i - x0i;
1151        a[j2 + 1] = x0r - x2r;
1152        x0r = x1r - x3i;
1153        x0i = x1i + x3r;
1154        a[j1] = wk1r * (x0r - x0i);
1155        a[j1 + 1] = wk1r * (x0r + x0i);
1156        x0r = x3i + x1r;
1157        x0i = x3r - x1i;
1158        a[j3] = wk1r * (x0i - x0r);
1159        a[j3 + 1] = wk1r * (x0i + x0r);
1160    }
1161    k1 = 0;
1162    m2 = 2 * m;
1163    for (k = m2; k < n; k += m2) {
1164        k1 += 2;
1165        k2 = 2 * k1;
1166        wk2r = w[k1];
1167        wk2i = w[k1 + 1];
1168        wk1r = w[k2];
1169        wk1i = w[k2 + 1];
1170        wk3r = wk1r - 2 * wk2i * wk1i;
1171        wk3i = 2 * wk2i * wk1r - wk1i;
1172        for (j = k; j < l + k; j += 2) {
1173            j1 = j + l;
1174            j2 = j1 + l;
1175            j3 = j2 + l;
1176            x0r = a[j] + a[j1];
1177            x0i = a[j + 1] + a[j1 + 1];
1178            x1r = a[j] - a[j1];
1179            x1i = a[j + 1] - a[j1 + 1];
1180            x2r = a[j2] + a[j3];
1181            x2i = a[j2 + 1] + a[j3 + 1];
1182            x3r = a[j2] - a[j3];
1183            x3i = a[j2 + 1] - a[j3 + 1];
1184            a[j] = x0r + x2r;
1185            a[j + 1] = x0i + x2i;
1186            x0r -= x2r;
1187            x0i -= x2i;
1188            a[j2] = wk2r * x0r - wk2i * x0i;
1189            a[j2 + 1] = wk2r * x0i + wk2i * x0r;
1190            x0r = x1r - x3i;
1191            x0i = x1i + x3r;
1192            a[j1] = wk1r * x0r - wk1i * x0i;
1193            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1194            x0r = x1r + x3i;
1195            x0i = x1i - x3r;
1196            a[j3] = wk3r * x0r - wk3i * x0i;
1197            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1198        }
1199        wk1r = w[k2 + 2];
1200        wk1i = w[k2 + 3];
1201        wk3r = wk1r - 2 * wk2r * wk1i;
1202        wk3i = 2 * wk2r * wk1r - wk1i;
1203        for (j = k + m; j < l + (k + m); j += 2) {
1204            j1 = j + l;
1205            j2 = j1 + l;
1206            j3 = j2 + l;
1207            x0r = a[j] + a[j1];
1208            x0i = a[j + 1] + a[j1 + 1];
1209            x1r = a[j] - a[j1];
1210            x1i = a[j + 1] - a[j1 + 1];
1211            x2r = a[j2] + a[j3];
1212            x2i = a[j2 + 1] + a[j3 + 1];
1213            x3r = a[j2] - a[j3];
1214            x3i = a[j2 + 1] - a[j3 + 1];
1215            a[j] = x0r + x2r;
1216            a[j + 1] = x0i + x2i;
1217            x0r -= x2r;
1218            x0i -= x2i;
1219            a[j2] = -wk2i * x0r - wk2r * x0i;
1220            a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
1221            x0r = x1r - x3i;
1222            x0i = x1i + x3r;
1223            a[j1] = wk1r * x0r - wk1i * x0i;
1224            a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1225            x0r = x1r + x3i;
1226            x0i = x1i - x3r;
1227            a[j3] = wk3r * x0r - wk3i * x0i;
1228            a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1229        }
1230    }
1231}
1232
1233
1234static void rftfsub(int n, float *a, int nc, float *c)
1235{
1236    int j, k, kk, ks, m;
1237    float wkr, wki, xr, xi, yr, yi;
1238
1239    m = n >> 1;
1240    ks = 2 * nc / m;
1241    kk = 0;
1242    for (j = 2; j < m; j += 2) {
1243        k = n - j;
1244        kk += ks;
1245        wkr = 0.5f - c[nc - kk];
1246        wki = c[kk];
1247        xr = a[j] - a[k];
1248        xi = a[j + 1] + a[k + 1];
1249        yr = wkr * xr - wki * xi;
1250        yi = wkr * xi + wki * xr;
1251        a[j] -= yr;
1252        a[j + 1] -= yi;
1253        a[k] += yr;
1254        a[k + 1] -= yi;
1255    }
1256}
1257
1258
1259static void rftbsub(int n, float *a, int nc, float *c)
1260{
1261    int j, k, kk, ks, m;
1262    float wkr, wki, xr, xi, yr, yi;
1263
1264    a[1] = -a[1];
1265    m = n >> 1;
1266    ks = 2 * nc / m;
1267    kk = 0;
1268    for (j = 2; j < m; j += 2) {
1269        k = n - j;
1270        kk += ks;
1271        wkr = 0.5f - c[nc - kk];
1272        wki = c[kk];
1273        xr = a[j] - a[k];
1274        xi = a[j + 1] + a[k + 1];
1275        yr = wkr * xr + wki * xi;
1276        yi = wkr * xi - wki * xr;
1277        a[j] -= yr;
1278        a[j + 1] = yi - a[j + 1];
1279        a[k] += yr;
1280        a[k + 1] = yi - a[k + 1];
1281    }
1282    a[m + 1] = -a[m + 1];
1283}
1284
1285#if 0  // Not used.
1286static void dctsub(int n, float *a, int nc, float *c)
1287{
1288    int j, k, kk, ks, m;
1289    float wkr, wki, xr;
1290
1291    m = n >> 1;
1292    ks = nc / n;
1293    kk = 0;
1294    for (j = 1; j < m; j++) {
1295        k = n - j;
1296        kk += ks;
1297        wkr = c[kk] - c[nc - kk];
1298        wki = c[kk] + c[nc - kk];
1299        xr = wki * a[j] - wkr * a[k];
1300        a[j] = wkr * a[j] + wki * a[k];
1301        a[k] = xr;
1302    }
1303    a[m] *= c[0];
1304}
1305
1306
1307static void dstsub(int n, float *a, int nc, float *c)
1308{
1309    int j, k, kk, ks, m;
1310    float wkr, wki, xr;
1311
1312    m = n >> 1;
1313    ks = nc / n;
1314    kk = 0;
1315    for (j = 1; j < m; j++) {
1316        k = n - j;
1317        kk += ks;
1318        wkr = c[kk] - c[nc - kk];
1319        wki = c[kk] + c[nc - kk];
1320        xr = wki * a[k] - wkr * a[j];
1321        a[k] = wkr * a[k] + wki * a[j];
1322        a[j] = xr;
1323    }
1324    a[m] *= c[0];
1325}
1326#endif  // Not used.
1327