1a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten/*
2a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * Copyright (C) 2010 The Android Open Source Project
3a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten *
4a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * Licensed under the Apache License, Version 2.0 (the "License");
5a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * you may not use this file except in compliance with the License.
6a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * You may obtain a copy of the License at
7a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten *
8a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten *      http://www.apache.org/licenses/LICENSE-2.0
9a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten *
10a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * Unless required by applicable law or agreed to in writing, software
11a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * distributed under the License is distributed on an "AS IS" BASIS,
12a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * See the License for the specific language governing permissions and
14a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * limitations under the License.
15a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten */
16a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
17a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten/* A Fixed point implementation of Fast Fourier Transform (FFT). Complex numbers
18a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * are represented by 32-bit integers, where higher 16 bits are real part and
19a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * lower ones are imaginary part. Few compromises are made between efficiency,
20a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * accuracy, and maintainability. To make it fast, arithmetic shifts are used
21a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * instead of divisions, and bitwise inverses are used instead of negates. To
22a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * keep it small, only radix-2 Cooley-Tukey algorithm is implemented, and only
23a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * half of the twiddle factors are stored. Although there are still ways to make
24a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten * it even faster or smaller, it costs too much on one of the aspects.
25a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten */
26a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
27a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#include <stdio.h>
28a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#include <stdint.h>
29a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#ifdef __arm__
30a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#include <machine/cpu-features.h>
31a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#endif
32a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
33a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#include <audio_utils/fixedfft.h>
34a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
35a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#define LOG_FFT_SIZE 10
36a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#define MAX_FFT_SIZE (1 << LOG_FFT_SIZE)
37a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
38a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kastenstatic const int32_t twiddle[MAX_FFT_SIZE / 4] = {
39a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x00008000, 0xff378001, 0xfe6e8002, 0xfda58006, 0xfcdc800a, 0xfc13800f,
40a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xfb4a8016, 0xfa81801e, 0xf9b88027, 0xf8ef8032, 0xf827803e, 0xf75e804b,
41a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xf6958059, 0xf5cd8068, 0xf5058079, 0xf43c808b, 0xf374809e, 0xf2ac80b2,
42a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xf1e480c8, 0xf11c80de, 0xf05580f6, 0xef8d8110, 0xeec6812a, 0xedff8146,
43a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xed388163, 0xec718181, 0xebab81a0, 0xeae481c1, 0xea1e81e2, 0xe9588205,
44a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xe892822a, 0xe7cd824f, 0xe7078276, 0xe642829d, 0xe57d82c6, 0xe4b982f1,
45a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xe3f4831c, 0xe3308349, 0xe26d8377, 0xe1a983a6, 0xe0e683d6, 0xe0238407,
46a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xdf61843a, 0xde9e846e, 0xdddc84a3, 0xdd1b84d9, 0xdc598511, 0xdb998549,
47a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xdad88583, 0xda1885be, 0xd95885fa, 0xd8988637, 0xd7d98676, 0xd71b86b6,
48a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xd65c86f6, 0xd59e8738, 0xd4e1877b, 0xd42487c0, 0xd3678805, 0xd2ab884c,
49a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xd1ef8894, 0xd13488dd, 0xd0798927, 0xcfbe8972, 0xcf0489be, 0xce4b8a0c,
50a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xcd928a5a, 0xccd98aaa, 0xcc218afb, 0xcb698b4d, 0xcab28ba0, 0xc9fc8bf5,
51a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xc9468c4a, 0xc8908ca1, 0xc7db8cf8, 0xc7278d51, 0xc6738dab, 0xc5c08e06,
52a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xc50d8e62, 0xc45b8ebf, 0xc3a98f1d, 0xc2f88f7d, 0xc2488fdd, 0xc198903e,
53a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xc0e990a1, 0xc03a9105, 0xbf8c9169, 0xbedf91cf, 0xbe329236, 0xbd86929e,
54a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xbcda9307, 0xbc2f9371, 0xbb8593dc, 0xbadc9448, 0xba3394b5, 0xb98b9523,
55a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xb8e39592, 0xb83c9603, 0xb7969674, 0xb6f196e6, 0xb64c9759, 0xb5a897ce,
56a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xb5059843, 0xb46298b9, 0xb3c09930, 0xb31f99a9, 0xb27f9a22, 0xb1df9a9c,
57a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xb1409b17, 0xb0a29b94, 0xb0059c11, 0xaf689c8f, 0xaecc9d0e, 0xae319d8e,
58a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xad979e0f, 0xacfd9e91, 0xac659f14, 0xabcd9f98, 0xab36a01c, 0xaaa0a0a2,
59a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xaa0aa129, 0xa976a1b0, 0xa8e2a238, 0xa84fa2c2, 0xa7bda34c, 0xa72ca3d7,
60a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xa69ca463, 0xa60ca4f0, 0xa57ea57e, 0xa4f0a60c, 0xa463a69c, 0xa3d7a72c,
61a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xa34ca7bd, 0xa2c2a84f, 0xa238a8e2, 0xa1b0a976, 0xa129aa0a, 0xa0a2aaa0,
62a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0xa01cab36, 0x9f98abcd, 0x9f14ac65, 0x9e91acfd, 0x9e0fad97, 0x9d8eae31,
63a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x9d0eaecc, 0x9c8faf68, 0x9c11b005, 0x9b94b0a2, 0x9b17b140, 0x9a9cb1df,
64a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x9a22b27f, 0x99a9b31f, 0x9930b3c0, 0x98b9b462, 0x9843b505, 0x97ceb5a8,
65a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x9759b64c, 0x96e6b6f1, 0x9674b796, 0x9603b83c, 0x9592b8e3, 0x9523b98b,
66a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x94b5ba33, 0x9448badc, 0x93dcbb85, 0x9371bc2f, 0x9307bcda, 0x929ebd86,
67a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x9236be32, 0x91cfbedf, 0x9169bf8c, 0x9105c03a, 0x90a1c0e9, 0x903ec198,
68a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x8fddc248, 0x8f7dc2f8, 0x8f1dc3a9, 0x8ebfc45b, 0x8e62c50d, 0x8e06c5c0,
69a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x8dabc673, 0x8d51c727, 0x8cf8c7db, 0x8ca1c890, 0x8c4ac946, 0x8bf5c9fc,
70a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x8ba0cab2, 0x8b4dcb69, 0x8afbcc21, 0x8aaaccd9, 0x8a5acd92, 0x8a0cce4b,
71a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x89becf04, 0x8972cfbe, 0x8927d079, 0x88ddd134, 0x8894d1ef, 0x884cd2ab,
72a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x8805d367, 0x87c0d424, 0x877bd4e1, 0x8738d59e, 0x86f6d65c, 0x86b6d71b,
73a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x8676d7d9, 0x8637d898, 0x85fad958, 0x85beda18, 0x8583dad8, 0x8549db99,
74a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x8511dc59, 0x84d9dd1b, 0x84a3dddc, 0x846ede9e, 0x843adf61, 0x8407e023,
75a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x83d6e0e6, 0x83a6e1a9, 0x8377e26d, 0x8349e330, 0x831ce3f4, 0x82f1e4b9,
76a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x82c6e57d, 0x829de642, 0x8276e707, 0x824fe7cd, 0x822ae892, 0x8205e958,
77a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x81e2ea1e, 0x81c1eae4, 0x81a0ebab, 0x8181ec71, 0x8163ed38, 0x8146edff,
78a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x812aeec6, 0x8110ef8d, 0x80f6f055, 0x80def11c, 0x80c8f1e4, 0x80b2f2ac,
79a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x809ef374, 0x808bf43c, 0x8079f505, 0x8068f5cd, 0x8059f695, 0x804bf75e,
80a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x803ef827, 0x8032f8ef, 0x8027f9b8, 0x801efa81, 0x8016fb4a, 0x800ffc13,
81a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    0x800afcdc, 0x8006fda5, 0x8002fe6e, 0x8001ff37,
82a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten};
83a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
84a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten/* Returns the multiplication of \conj{a} and {b}. */
85a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kastenstatic inline int32_t mult(int32_t a, int32_t b)
86a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten{
87a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#if __ARM_ARCH__ >= 6
88a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    int32_t t = b;
89a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    __asm__("smuad  %0, %0, %1"          : "+r" (t) : "r" (a));
90a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    __asm__("smusdx %0, %0, %1"          : "+r" (b) : "r" (a));
91a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    __asm__("pkhtb  %0, %0, %1, ASR #16" : "+r" (t) : "r" (b));
92a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    return t;
93a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#else
94a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    return (((a >> 16) * (b >> 16) + (int16_t)a * (int16_t)b) & ~0xFFFF) |
95a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        ((((a >> 16) * (int16_t)b - (int16_t)a * (b >> 16)) >> 16) & 0xFFFF);
96a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#endif
97a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten}
98a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
99a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kastenstatic inline int32_t half(int32_t a)
100a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten{
101a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#if __ARM_ARCH__ >= 6
102a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    __asm__("shadd16 %0, %0, %1" : "+r" (a) : "r" (0));
103a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    return a;
104a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#else
105a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    return ((a >> 1) & ~0x8000) | (a & 0x8000);
106a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten#endif
107a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten}
108a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
109a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kastenvoid fixed_fft(int n, int32_t *v)
110a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten{
111a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    int scale = LOG_FFT_SIZE, i, p, r;
112a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
113a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    for (r = 0, i = 1; i < n; ++i) {
114a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        for (p = n; !(p & r); p >>= 1, r ^= p);
115a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        if (i < r) {
116a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            int32_t t = v[i];
117a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            v[i] = v[r];
118a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            v[r] = t;
119a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        }
120a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    }
121a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
122a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    for (p = 1; p < n; p <<= 1) {
123a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        --scale;
124a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
125a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        for (i = 0; i < n; i += p << 1) {
126a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            int32_t x = half(v[i]);
127a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            int32_t y = half(v[i + p]);
128a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            v[i] = x + y;
129a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            v[i + p] = x - y;
130a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        }
131a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
132a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        for (r = 1; r < p; ++r) {
133a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            int32_t w = MAX_FFT_SIZE / 4 - (r << scale);
134a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            i = w >> 31;
135a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            w = twiddle[(w ^ i) - i] ^ (i << 16);
136a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            for (i = r; i < n; i += p << 1) {
137a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten                int32_t x = half(v[i]);
138a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten                int32_t y = mult(w, v[i + p]);
139a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten                v[i] = x - y;
140a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten                v[i + p] = x + y;
141a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten            }
142a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        }
143a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    }
144a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten}
145a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
146a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kastenvoid fixed_fft_real(int n, int32_t *v)
147a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten{
148a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    int scale = LOG_FFT_SIZE, m = n >> 1, i;
149a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
150a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    fixed_fft(n, v);
151a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    for (i = 1; i <= n; i <<= 1, --scale);
152a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    v[0] = mult(~v[0], 0x80008000);
153a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    v[m] = half(v[m]);
154a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten
155a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    for (i = 1; i < n >> 1; ++i) {
156a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        int32_t x = half(v[i]);
157a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        int32_t z = half(v[n - i]);
158a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        int32_t y = z - (x ^ 0xFFFF);
159a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        x = half(x + (z ^ 0xFFFF));
160a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        y = mult(y, twiddle[i << scale]);
161a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        v[i] = x - y;
162a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten        v[n - i] = (x + y) ^ 0xFFFF;
163a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten    }
164a269f35b6247cb69e8815b84440bf1bfc938b87bGlenn Kasten}
165