FFTFrameFFTW.cpp revision 2daae5fd11344eaa88a0d92b0f6d65f8d2255c00
1/*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26// FFTFrame implementation using the FFTW library.
27
28#include "config.h"
29
30#if ENABLE(WEB_AUDIO)
31
32#if !OS(DARWIN) && USE(WEBAUDIO_FFTW)
33
34#include "FFTFrame.h"
35
36#include <wtf/MathExtras.h>
37
38namespace WebCore {
39
40const int kMaxFFTPow2Size = 24;
41
42fftwf_plan* FFTFrame::fftwForwardPlans = 0;
43fftwf_plan* FFTFrame::fftwBackwardPlans = 0;
44
45Mutex* FFTFrame::s_planLock = 0;
46
47namespace {
48
49unsigned unpackedFFTWDataSize(unsigned fftSize)
50{
51    return fftSize / 2 + 1;
52}
53
54} // anonymous namespace
55
56
57// Normal constructor: allocates for a given fftSize.
58FFTFrame::FFTFrame(unsigned fftSize)
59    : m_FFTSize(fftSize)
60    , m_log2FFTSize(static_cast<unsigned>(log2(fftSize)))
61    , m_forwardPlan(0)
62    , m_backwardPlan(0)
63    , m_data(2 * (3 + unpackedFFTWDataSize(fftSize))) // enough space for real and imaginary data plus 16-byte alignment padding
64{
65    // We only allow power of two.
66    ASSERT(1UL << m_log2FFTSize == m_FFTSize);
67
68    // FFTW won't create a plan without being able to look at non-null
69    // pointers for the input and output data; it wants to be able to
70    // see whether these arrays are aligned properly for vector
71    // operations. Ideally we would use fftw_malloc and fftw_free for
72    // the input and output arrays to ensure proper alignment for SIMD
73    // operations, so that we don't have to specify FFTW_UNALIGNED
74    // when creating the plan. However, since we don't have control
75    // over the alignment of the array passed to doFFT / doInverseFFT,
76    // we would need to memcpy it in to or out of the FFTFrame, adding
77    // overhead. For the time being, we just assume unaligned data and
78    // pass a temporary pointer down.
79
80    float temporary;
81    m_forwardPlan = fftwPlanForSize(fftSize, Forward,
82                                    &temporary, realData(), imagData());
83    m_backwardPlan = fftwPlanForSize(fftSize, Backward,
84                                     realData(), imagData(), &temporary);
85}
86
87// Creates a blank/empty frame (interpolate() must later be called).
88FFTFrame::FFTFrame()
89    : m_FFTSize(0)
90    , m_log2FFTSize(0)
91    , m_forwardPlan(0)
92    , m_backwardPlan(0)
93{
94}
95
96// Copy constructor.
97FFTFrame::FFTFrame(const FFTFrame& frame)
98    : m_FFTSize(frame.m_FFTSize)
99    , m_log2FFTSize(frame.m_log2FFTSize)
100    , m_forwardPlan(0)
101    , m_backwardPlan(0)
102    , m_data(2 * (3 + unpackedFFTWDataSize(fftSize()))) // enough space for real and imaginary data plus 16-byte alignment padding
103{
104    // See the normal constructor for an explanation of the temporary pointer.
105    float temporary;
106    m_forwardPlan = fftwPlanForSize(m_FFTSize, Forward,
107                                    &temporary, realData(), imagData());
108    m_backwardPlan = fftwPlanForSize(m_FFTSize, Backward,
109                                     realData(), imagData(), &temporary);
110
111    // Copy/setup frame data.
112    size_t nbytes = sizeof(float) * unpackedFFTWDataSize(fftSize());
113    memcpy(realData(), frame.realData(), nbytes);
114    memcpy(imagData(), frame.imagData(), nbytes);
115}
116
117FFTFrame::~FFTFrame()
118{
119}
120
121void FFTFrame::multiply(const FFTFrame& frame)
122{
123    FFTFrame& frame1 = *this;
124    FFTFrame& frame2 = const_cast<FFTFrame&>(frame);
125
126    float* realP1 = frame1.realData();
127    float* imagP1 = frame1.imagData();
128    const float* realP2 = frame2.realData();
129    const float* imagP2 = frame2.imagData();
130
131    // Scale accounts the peculiar scaling of vecLib on the Mac.
132    // This ensures the right scaling all the way back to inverse FFT.
133    // FIXME: if we change the scaling on the Mac then this scale
134    // factor will need to change too.
135    float scale = 0.5f;
136
137    // Multiply the packed DC/nyquist component
138    realP1[0] *= scale * realP2[0];
139    imagP1[0] *= scale * imagP2[0];
140
141    // Complex multiplication. If this loop turns out to be hot then
142    // we should use SSE or other intrinsics to accelerate it.
143    unsigned halfSize = fftSize() / 2;
144
145    for (unsigned i = 1; i < halfSize; ++i) {
146        float realResult = realP1[i] * realP2[i] - imagP1[i] * imagP2[i];
147        float imagResult = realP1[i] * imagP2[i] + imagP1[i] * realP2[i];
148
149        realP1[i] = scale * realResult;
150        imagP1[i] = scale * imagResult;
151    }
152}
153
154void FFTFrame::doFFT(float* data)
155{
156    fftwf_execute_split_dft_r2c(m_forwardPlan, data, realData(), imagData());
157
158    // Scale the frequency domain data to match vecLib's scale factor
159    // on the Mac. FIXME: if we change the definition of FFTFrame to
160    // eliminate this scale factor then this code will need to change.
161    // Also, if this loop turns out to be hot then we should use SSE
162    // or other intrinsics to accelerate it.
163    float scaleFactor = 2;
164    unsigned length = unpackedFFTWDataSize(fftSize());
165    float* realData = this->realData();
166    float* imagData = this->imagData();
167
168    for (unsigned i = 0; i < length; ++i) {
169        realData[i] = realData[i] * scaleFactor;
170        imagData[i] = imagData[i] * scaleFactor;
171    }
172
173    // Move the Nyquist component to the location expected by the
174    // FFTFrame API.
175    imagData[0] = realData[length - 1];
176}
177
178void FFTFrame::doInverseFFT(float* data)
179{
180    unsigned length = unpackedFFTWDataSize(fftSize());
181    float* realData = this->realData();
182    float* imagData = this->imagData();
183
184    // Move the Nyquist component to the location expected by FFTW.
185    realData[length - 1] = imagData[0];
186    imagData[length - 1] = 0;
187    imagData[0] = 0;
188
189    fftwf_execute_split_dft_c2r(m_backwardPlan, realData, imagData, data);
190
191    // Restore the original scaling of the time domain data.
192    // FIXME: if we change the definition of FFTFrame to eliminate the
193    // scale factor then this code will need to change. Also, if this
194    // loop turns out to be hot then we should use SSE or other
195    // intrinsics to accelerate it.
196    float scaleFactor = 1.0 / (2.0 * fftSize());
197    unsigned n = fftSize();
198    for (unsigned i = 0; i < n; ++i)
199        data[i] *= scaleFactor;
200
201    // Move the Nyquist component back to the location expected by the
202    // FFTFrame API.
203    imagData[0] = realData[length - 1];
204}
205
206void FFTFrame::initialize()
207{
208    if (!fftwForwardPlans) {
209        fftwForwardPlans = new fftwf_plan[kMaxFFTPow2Size];
210        fftwBackwardPlans = new fftwf_plan[kMaxFFTPow2Size];
211        for (int i = 0; i < kMaxFFTPow2Size; ++i) {
212            fftwForwardPlans[i] = 0;
213            fftwBackwardPlans[i] = 0;
214        }
215    }
216
217    if (!s_planLock)
218        s_planLock = new Mutex();
219}
220
221void FFTFrame::cleanup()
222{
223    if (!fftwForwardPlans)
224        return;
225
226    for (int i = 0; i < kMaxFFTPow2Size; ++i) {
227        if (fftwForwardPlans[i])
228            fftwf_destroy_plan(fftwForwardPlans[i]);
229        if (fftwBackwardPlans[i])
230            fftwf_destroy_plan(fftwBackwardPlans[i]);
231    }
232
233    delete[] fftwForwardPlans;
234    delete[] fftwBackwardPlans;
235
236    fftwForwardPlans = 0;
237    fftwBackwardPlans = 0;
238
239    delete s_planLock;
240    s_planLock = 0;
241}
242
243float* FFTFrame::realData() const
244{
245    return const_cast<float*>(m_data.data());
246}
247
248float* FFTFrame::imagData() const
249{
250    // Imaginary data is stored following the real data with enough padding for 16-byte alignment.
251    return const_cast<float*>(realData() + unpackedFFTWDataSize(fftSize()) + 3);
252}
253
254fftwf_plan FFTFrame::fftwPlanForSize(unsigned fftSize, Direction direction,
255                                     float* data1, float* data2, float* data3)
256{
257    // initialize() must be called first.
258    ASSERT(fftwForwardPlans);
259    if (!fftwForwardPlans)
260        return 0;
261
262    ASSERT(s_planLock);
263    if (!s_planLock)
264        return 0;
265    MutexLocker locker(*s_planLock);
266
267    ASSERT(fftSize);
268    int pow2size = static_cast<int>(log2(fftSize));
269    ASSERT(pow2size < kMaxFFTPow2Size);
270    fftwf_plan* plans = (direction == Forward) ? fftwForwardPlans : fftwBackwardPlans;
271    if (!plans[pow2size]) {
272        fftwf_iodim dimension;
273        dimension.n = fftSize;
274        dimension.is = 1;
275        dimension.os = 1;
276
277        // For the time being, we do not take the input data into
278        // account when choosing a plan, so that we can most easily
279        // reuse plans with different input data.
280
281        // FIXME: allocate input and output data inside this class to
282        // be able to take advantage of alignment and SIMD optimizations.
283        unsigned flags = FFTW_ESTIMATE | FFTW_PRESERVE_INPUT | FFTW_UNALIGNED;
284        switch (direction) {
285        case Forward:
286            plans[pow2size] = fftwf_plan_guru_split_dft_r2c(1, &dimension, 0, 0,
287                                                            data1, data2, data3,
288                                                            flags);
289            break;
290        case Backward:
291            plans[pow2size] = fftwf_plan_guru_split_dft_c2r(1, &dimension, 0, 0,
292                                                            data1, data2, data3,
293                                                            flags);
294            break;
295        }
296    }
297    ASSERT(plans[pow2size]);
298    return plans[pow2size];
299}
300
301} // namespace WebCore
302
303#endif // !OS(DARWIN) && USE(WEBAUDIO_FFTW)
304
305#endif // ENABLE(WEB_AUDIO)
306