1/*
2 * Copyright (C) 2010 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
18 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
21 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26// FFTFrame implementation using Intel's Math Kernel Library (MKL),
27// suitable for use on Windows and Linux.
28
29#include "config.h"
30
31#if ENABLE(WEB_AUDIO)
32
33#if !OS(DARWIN) && USE(WEBAUDIO_MKL)
34
35#include "FFTFrame.h"
36
37#include "mkl_vml.h"
38#include <wtf/MathExtras.h>
39
40namespace {
41
42DFTI_DESCRIPTOR_HANDLE createDescriptorHandle(int fftSize)
43{
44    DFTI_DESCRIPTOR_HANDLE handle = 0;
45
46    // Create DFTI descriptor for 1D single precision transform.
47    MKL_LONG status = DftiCreateDescriptor(&handle, DFTI_SINGLE, DFTI_REAL, 1, fftSize);
48    ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
49
50    // Set placement of result to DFTI_NOT_INPLACE.
51    status = DftiSetValue(handle, DFTI_PLACEMENT, DFTI_NOT_INPLACE);
52    ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
53
54    // Set packing format to PERM; this produces the layout which
55    // matches Accelerate.framework's on the Mac, though interleaved.
56    status = DftiSetValue(handle, DFTI_PACKED_FORMAT, DFTI_PERM_FORMAT);
57    ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
58
59    // Set the forward scale factor to 2 to match Accelerate.framework's.
60    // FIXME: FFTFrameMac's scaling factor could be fixed to be 1.0,
61    // in which case this code would need to be changed as well.
62    status = DftiSetValue(handle, DFTI_FORWARD_SCALE, 2.0);
63    ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
64
65    // Set the backward scale factor to 1 / 2n to match Accelerate.framework's.
66    // FIXME: if the above scaling factor is fixed then this needs to be as well.
67    double scale = 1.0 / (2.0 * fftSize);
68    status = DftiSetValue(handle, DFTI_BACKWARD_SCALE, scale);
69    ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
70
71    // Use the default DFTI_CONJUGATE_EVEN_STORAGE = DFTI_COMPLEX_REAL.
72
73    // Commit DFTI descriptor.
74    status = DftiCommitDescriptor(handle);
75    ASSERT(DftiErrorClass(status, DFTI_NO_ERROR));
76
77    return handle;
78}
79
80} // anonymous namespace
81
82namespace WebCore {
83
84const int kMaxFFTPow2Size = 24;
85
86DFTI_DESCRIPTOR_HANDLE* FFTFrame::descriptorHandles = 0;
87
88// Normal constructor: allocates for a given fftSize.
89FFTFrame::FFTFrame(unsigned fftSize)
90    : m_FFTSize(fftSize)
91    , m_log2FFTSize(static_cast<unsigned>(log2(fftSize)))
92    , m_handle(0)
93    , m_complexData(fftSize)
94    , m_realData(fftSize / 2)
95    , m_imagData(fftSize / 2)
96{
97    // We only allow power of two.
98    ASSERT(1UL << m_log2FFTSize == m_FFTSize);
99
100    m_handle = descriptorHandleForSize(fftSize);
101}
102
103// Creates a blank/empty frame (interpolate() must later be called).
104FFTFrame::FFTFrame()
105    : m_FFTSize(0)
106    , m_log2FFTSize(0)
107    , m_handle(0)
108{
109}
110
111// Copy constructor.
112FFTFrame::FFTFrame(const FFTFrame& frame)
113    : m_FFTSize(frame.m_FFTSize)
114    , m_log2FFTSize(frame.m_log2FFTSize)
115    , m_handle(0)
116    , m_complexData(frame.m_FFTSize)
117    , m_realData(frame.m_FFTSize / 2)
118    , m_imagData(frame.m_FFTSize / 2)
119{
120    m_handle = descriptorHandleForSize(m_FFTSize);
121
122    // Copy/setup frame data.
123    unsigned nbytes = sizeof(float) * (m_FFTSize / 2);
124    memcpy(realData(), frame.realData(), nbytes);
125    memcpy(imagData(), frame.imagData(), nbytes);
126}
127
128FFTFrame::~FFTFrame()
129{
130}
131
132void FFTFrame::multiply(const FFTFrame& frame)
133{
134    FFTFrame& frame1 = *this;
135    FFTFrame& frame2 = const_cast<FFTFrame&>(frame);
136
137    float* realP1 = frame1.realData();
138    float* imagP1 = frame1.imagData();
139    const float* realP2 = frame2.realData();
140    const float* imagP2 = frame2.imagData();
141
142    // Scale accounts for vecLib's peculiar scaling.
143    // This ensures the right scaling all the way back to inverse FFT.
144    // FIXME: this scaling factor will be 1.0f if the above 2.0 -> 1.0
145    // scaling factor is fixed.
146    float scale = 0.5f;
147
148    // Multiply packed DC/nyquist component.
149    realP1[0] *= scale * realP2[0];
150    imagP1[0] *= scale * imagP2[0];
151
152    // Multiply the rest, skipping packed DC/Nyquist components.
153    float* interleavedData1 = frame1.getUpToDateComplexData();
154    float* interleavedData2 = frame2.getUpToDateComplexData();
155
156    unsigned halfSize = m_FFTSize / 2;
157
158    // Complex multiply.
159    vcMul(halfSize - 1,
160          reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1,
161          reinterpret_cast<MKL_Complex8*>(interleavedData2) + 1,
162          reinterpret_cast<MKL_Complex8*>(interleavedData1) + 1);
163
164    // De-interleave and scale the rest of the data.
165    // FIXME: find an MKL routine to do at least the scaling more efficiently.
166    for (unsigned i = 1; i < halfSize; ++i) {
167        int baseComplexIndex = 2 * i;
168        realP1[i] = scale * interleavedData1[baseComplexIndex];
169        imagP1[i] = scale * interleavedData1[baseComplexIndex + 1];
170    }
171}
172
173void FFTFrame::doFFT(float* data)
174{
175    // Compute Forward transform.
176    MKL_LONG status = DftiComputeForward(m_handle, data, m_complexData.data());
177    ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
178
179    // De-interleave to separate real and complex arrays. FIXME:
180    // figure out if it's possible to get MKL to use split-complex
181    // form for 1D real-to-complex out-of-place FFTs.
182    int len = m_FFTSize / 2;
183    for (int i = 0; i < len; ++i) {
184        int baseComplexIndex = 2 * i;
185        // m_realData[0] is the DC component and m_imagData[0] the
186        // Nyquist component since the interleaved complex data is
187        // packed.
188        m_realData[i] = m_complexData[baseComplexIndex];
189        m_imagData[i] = m_complexData[baseComplexIndex + 1];
190    }
191}
192
193void FFTFrame::doInverseFFT(float* data)
194{
195    // Prepare interleaved data. FIXME: figure out if it's possible to
196    // get MKL to use split-complex form for 1D backward
197    // (complex-to-real) out-of-place FFTs.
198    float* interleavedData = getUpToDateComplexData();
199
200    // Compute backward transform.
201    MKL_LONG status = DftiComputeBackward(m_handle, interleavedData, data);
202    ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
203}
204
205void FFTFrame::initialize()
206{
207}
208
209void FFTFrame::cleanup()
210{
211    if (!descriptorHandles)
212        return;
213
214    for (int i = 0; i < kMaxFFTPow2Size; ++i) {
215        if (descriptorHandles[i]) {
216            MKL_LONG status = DftiFreeDescriptor(&descriptorHandles[i]);
217            ASSERT_UNUSED(status, DftiErrorClass(status, DFTI_NO_ERROR));
218        }
219    }
220
221    delete[] descriptorHandles;
222    descriptorHandles = 0;
223}
224
225float* FFTFrame::realData() const
226{
227    return const_cast<float*>(m_realData.data());
228}
229
230float* FFTFrame::imagData() const
231{
232    return const_cast<float*>(m_imagData.data());
233}
234
235float* FFTFrame::getUpToDateComplexData()
236{
237    // FIXME: if we can't completely get rid of this method, SSE
238    // optimization could be considered if it shows up hot on profiles.
239    int len = m_FFTSize / 2;
240    for (int i = 0; i < len; ++i) {
241        int baseComplexIndex = 2 * i;
242        m_complexData[baseComplexIndex] = m_realData[i];
243        m_complexData[baseComplexIndex + 1] = m_imagData[i];
244    }
245    return const_cast<float*>(m_complexData.data());
246}
247
248DFTI_DESCRIPTOR_HANDLE FFTFrame::descriptorHandleForSize(unsigned fftSize)
249{
250    if (!descriptorHandles) {
251        descriptorHandles = new DFTI_DESCRIPTOR_HANDLE[kMaxFFTPow2Size];
252        for (int i = 0; i < kMaxFFTPow2Size; ++i)
253            descriptorHandles[i] = 0;
254    }
255
256    ASSERT(fftSize);
257    int pow2size = static_cast<int>(log2(fftSize));
258    ASSERT(pow2size < kMaxFFTPow2Size);
259    if (!descriptorHandles[pow2size])
260        descriptorHandles[pow2size] = createDescriptorHandle(fftSize);
261    return descriptorHandles[pow2size];
262}
263
264} // namespace WebCore
265
266#endif // !OS(DARWIN) && USE(WEBAUDIO_MKL)
267
268#endif // ENABLE(WEB_AUDIO)
269