1/*
2 * Copyright (C) 2012 Intel Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30
31#if ENABLE(WEB_AUDIO)
32
33#include "platform/audio/DirectConvolver.h"
34
35#if OS(MACOSX)
36#include <Accelerate/Accelerate.h>
37#endif
38
39#include "platform/audio/VectorMath.h"
40#include "wtf/CPU.h"
41
42namespace WebCore {
43
44using namespace VectorMath;
45
46DirectConvolver::DirectConvolver(size_t inputBlockSize)
47    : m_inputBlockSize(inputBlockSize)
48#if USE(WEBAUDIO_IPP)
49    , m_overlayBuffer(inputBlockSize)
50#endif // USE(WEBAUDIO_IPP)
51    , m_buffer(inputBlockSize * 2)
52{
53}
54
55void DirectConvolver::process(AudioFloatArray* convolutionKernel, const float* sourceP, float* destP, size_t framesToProcess)
56{
57    ASSERT(framesToProcess == m_inputBlockSize);
58    if (framesToProcess != m_inputBlockSize)
59        return;
60
61    // Only support kernelSize <= m_inputBlockSize
62    size_t kernelSize = convolutionKernel->size();
63    ASSERT(kernelSize <= m_inputBlockSize);
64    if (kernelSize > m_inputBlockSize)
65        return;
66
67    float* kernelP = convolutionKernel->data();
68
69    // Sanity check
70    bool isCopyGood = kernelP && sourceP && destP && m_buffer.data();
71    ASSERT(isCopyGood);
72    if (!isCopyGood)
73        return;
74
75#if USE(WEBAUDIO_IPP)
76    float* outputBuffer = m_buffer.data();
77    float* overlayBuffer = m_overlayBuffer.data();
78    bool isCopyGood2 = overlayBuffer && m_overlayBuffer.size() >= kernelSize && m_buffer.size() == m_inputBlockSize * 2;
79    ASSERT(isCopyGood2);
80    if (!isCopyGood2)
81        return;
82
83    ippsConv_32f(static_cast<const Ipp32f*>(sourceP), framesToProcess, static_cast<Ipp32f*>(kernelP), kernelSize, static_cast<Ipp32f*>(outputBuffer));
84
85    vadd(outputBuffer, 1, overlayBuffer, 1, destP, 1, framesToProcess);
86    memcpy(overlayBuffer, outputBuffer + m_inputBlockSize, sizeof(float) * kernelSize);
87#else
88    float* inputP = m_buffer.data() + m_inputBlockSize;
89
90    // Copy samples to 2nd half of input buffer.
91    memcpy(inputP, sourceP, sizeof(float) * framesToProcess);
92
93#if OS(MACOSX)
94#if CPU(X86)
95    conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize);
96#else
97    vDSP_conv(inputP - kernelSize + 1, 1, kernelP + kernelSize - 1, -1, destP, 1, framesToProcess, kernelSize);
98#endif // CPU(X86)
99#else
100    // FIXME: The macro can be further optimized to avoid pipeline stalls. One possibility is to maintain 4 separate sums and change the macro to CONVOLVE_FOUR_SAMPLES.
101#define CONVOLVE_ONE_SAMPLE                 \
102    do {                                    \
103        sum += inputP[i - j] * kernelP[j];  \
104        j++;                                \
105    } while (0)
106
107    size_t i = 0;
108    while (i < framesToProcess) {
109        size_t j = 0;
110        float sum = 0;
111
112        // FIXME: SSE optimization may be applied here.
113        if (kernelSize == 32) {
114            CONVOLVE_ONE_SAMPLE; // 1
115            CONVOLVE_ONE_SAMPLE; // 2
116            CONVOLVE_ONE_SAMPLE; // 3
117            CONVOLVE_ONE_SAMPLE; // 4
118            CONVOLVE_ONE_SAMPLE; // 5
119            CONVOLVE_ONE_SAMPLE; // 6
120            CONVOLVE_ONE_SAMPLE; // 7
121            CONVOLVE_ONE_SAMPLE; // 8
122            CONVOLVE_ONE_SAMPLE; // 9
123            CONVOLVE_ONE_SAMPLE; // 10
124
125            CONVOLVE_ONE_SAMPLE; // 11
126            CONVOLVE_ONE_SAMPLE; // 12
127            CONVOLVE_ONE_SAMPLE; // 13
128            CONVOLVE_ONE_SAMPLE; // 14
129            CONVOLVE_ONE_SAMPLE; // 15
130            CONVOLVE_ONE_SAMPLE; // 16
131            CONVOLVE_ONE_SAMPLE; // 17
132            CONVOLVE_ONE_SAMPLE; // 18
133            CONVOLVE_ONE_SAMPLE; // 19
134            CONVOLVE_ONE_SAMPLE; // 20
135
136            CONVOLVE_ONE_SAMPLE; // 21
137            CONVOLVE_ONE_SAMPLE; // 22
138            CONVOLVE_ONE_SAMPLE; // 23
139            CONVOLVE_ONE_SAMPLE; // 24
140            CONVOLVE_ONE_SAMPLE; // 25
141            CONVOLVE_ONE_SAMPLE; // 26
142            CONVOLVE_ONE_SAMPLE; // 27
143            CONVOLVE_ONE_SAMPLE; // 28
144            CONVOLVE_ONE_SAMPLE; // 29
145            CONVOLVE_ONE_SAMPLE; // 30
146
147            CONVOLVE_ONE_SAMPLE; // 31
148            CONVOLVE_ONE_SAMPLE; // 32
149
150        } else if (kernelSize == 64) {
151            CONVOLVE_ONE_SAMPLE; // 1
152            CONVOLVE_ONE_SAMPLE; // 2
153            CONVOLVE_ONE_SAMPLE; // 3
154            CONVOLVE_ONE_SAMPLE; // 4
155            CONVOLVE_ONE_SAMPLE; // 5
156            CONVOLVE_ONE_SAMPLE; // 6
157            CONVOLVE_ONE_SAMPLE; // 7
158            CONVOLVE_ONE_SAMPLE; // 8
159            CONVOLVE_ONE_SAMPLE; // 9
160            CONVOLVE_ONE_SAMPLE; // 10
161
162            CONVOLVE_ONE_SAMPLE; // 11
163            CONVOLVE_ONE_SAMPLE; // 12
164            CONVOLVE_ONE_SAMPLE; // 13
165            CONVOLVE_ONE_SAMPLE; // 14
166            CONVOLVE_ONE_SAMPLE; // 15
167            CONVOLVE_ONE_SAMPLE; // 16
168            CONVOLVE_ONE_SAMPLE; // 17
169            CONVOLVE_ONE_SAMPLE; // 18
170            CONVOLVE_ONE_SAMPLE; // 19
171            CONVOLVE_ONE_SAMPLE; // 20
172
173            CONVOLVE_ONE_SAMPLE; // 21
174            CONVOLVE_ONE_SAMPLE; // 22
175            CONVOLVE_ONE_SAMPLE; // 23
176            CONVOLVE_ONE_SAMPLE; // 24
177            CONVOLVE_ONE_SAMPLE; // 25
178            CONVOLVE_ONE_SAMPLE; // 26
179            CONVOLVE_ONE_SAMPLE; // 27
180            CONVOLVE_ONE_SAMPLE; // 28
181            CONVOLVE_ONE_SAMPLE; // 29
182            CONVOLVE_ONE_SAMPLE; // 30
183
184            CONVOLVE_ONE_SAMPLE; // 31
185            CONVOLVE_ONE_SAMPLE; // 32
186            CONVOLVE_ONE_SAMPLE; // 33
187            CONVOLVE_ONE_SAMPLE; // 34
188            CONVOLVE_ONE_SAMPLE; // 35
189            CONVOLVE_ONE_SAMPLE; // 36
190            CONVOLVE_ONE_SAMPLE; // 37
191            CONVOLVE_ONE_SAMPLE; // 38
192            CONVOLVE_ONE_SAMPLE; // 39
193            CONVOLVE_ONE_SAMPLE; // 40
194
195            CONVOLVE_ONE_SAMPLE; // 41
196            CONVOLVE_ONE_SAMPLE; // 42
197            CONVOLVE_ONE_SAMPLE; // 43
198            CONVOLVE_ONE_SAMPLE; // 44
199            CONVOLVE_ONE_SAMPLE; // 45
200            CONVOLVE_ONE_SAMPLE; // 46
201            CONVOLVE_ONE_SAMPLE; // 47
202            CONVOLVE_ONE_SAMPLE; // 48
203            CONVOLVE_ONE_SAMPLE; // 49
204            CONVOLVE_ONE_SAMPLE; // 50
205
206            CONVOLVE_ONE_SAMPLE; // 51
207            CONVOLVE_ONE_SAMPLE; // 52
208            CONVOLVE_ONE_SAMPLE; // 53
209            CONVOLVE_ONE_SAMPLE; // 54
210            CONVOLVE_ONE_SAMPLE; // 55
211            CONVOLVE_ONE_SAMPLE; // 56
212            CONVOLVE_ONE_SAMPLE; // 57
213            CONVOLVE_ONE_SAMPLE; // 58
214            CONVOLVE_ONE_SAMPLE; // 59
215            CONVOLVE_ONE_SAMPLE; // 60
216
217            CONVOLVE_ONE_SAMPLE; // 61
218            CONVOLVE_ONE_SAMPLE; // 62
219            CONVOLVE_ONE_SAMPLE; // 63
220            CONVOLVE_ONE_SAMPLE; // 64
221
222        } else if (kernelSize == 128) {
223            CONVOLVE_ONE_SAMPLE; // 1
224            CONVOLVE_ONE_SAMPLE; // 2
225            CONVOLVE_ONE_SAMPLE; // 3
226            CONVOLVE_ONE_SAMPLE; // 4
227            CONVOLVE_ONE_SAMPLE; // 5
228            CONVOLVE_ONE_SAMPLE; // 6
229            CONVOLVE_ONE_SAMPLE; // 7
230            CONVOLVE_ONE_SAMPLE; // 8
231            CONVOLVE_ONE_SAMPLE; // 9
232            CONVOLVE_ONE_SAMPLE; // 10
233
234            CONVOLVE_ONE_SAMPLE; // 11
235            CONVOLVE_ONE_SAMPLE; // 12
236            CONVOLVE_ONE_SAMPLE; // 13
237            CONVOLVE_ONE_SAMPLE; // 14
238            CONVOLVE_ONE_SAMPLE; // 15
239            CONVOLVE_ONE_SAMPLE; // 16
240            CONVOLVE_ONE_SAMPLE; // 17
241            CONVOLVE_ONE_SAMPLE; // 18
242            CONVOLVE_ONE_SAMPLE; // 19
243            CONVOLVE_ONE_SAMPLE; // 20
244
245            CONVOLVE_ONE_SAMPLE; // 21
246            CONVOLVE_ONE_SAMPLE; // 22
247            CONVOLVE_ONE_SAMPLE; // 23
248            CONVOLVE_ONE_SAMPLE; // 24
249            CONVOLVE_ONE_SAMPLE; // 25
250            CONVOLVE_ONE_SAMPLE; // 26
251            CONVOLVE_ONE_SAMPLE; // 27
252            CONVOLVE_ONE_SAMPLE; // 28
253            CONVOLVE_ONE_SAMPLE; // 29
254            CONVOLVE_ONE_SAMPLE; // 30
255
256            CONVOLVE_ONE_SAMPLE; // 31
257            CONVOLVE_ONE_SAMPLE; // 32
258            CONVOLVE_ONE_SAMPLE; // 33
259            CONVOLVE_ONE_SAMPLE; // 34
260            CONVOLVE_ONE_SAMPLE; // 35
261            CONVOLVE_ONE_SAMPLE; // 36
262            CONVOLVE_ONE_SAMPLE; // 37
263            CONVOLVE_ONE_SAMPLE; // 38
264            CONVOLVE_ONE_SAMPLE; // 39
265            CONVOLVE_ONE_SAMPLE; // 40
266
267            CONVOLVE_ONE_SAMPLE; // 41
268            CONVOLVE_ONE_SAMPLE; // 42
269            CONVOLVE_ONE_SAMPLE; // 43
270            CONVOLVE_ONE_SAMPLE; // 44
271            CONVOLVE_ONE_SAMPLE; // 45
272            CONVOLVE_ONE_SAMPLE; // 46
273            CONVOLVE_ONE_SAMPLE; // 47
274            CONVOLVE_ONE_SAMPLE; // 48
275            CONVOLVE_ONE_SAMPLE; // 49
276            CONVOLVE_ONE_SAMPLE; // 50
277
278            CONVOLVE_ONE_SAMPLE; // 51
279            CONVOLVE_ONE_SAMPLE; // 52
280            CONVOLVE_ONE_SAMPLE; // 53
281            CONVOLVE_ONE_SAMPLE; // 54
282            CONVOLVE_ONE_SAMPLE; // 55
283            CONVOLVE_ONE_SAMPLE; // 56
284            CONVOLVE_ONE_SAMPLE; // 57
285            CONVOLVE_ONE_SAMPLE; // 58
286            CONVOLVE_ONE_SAMPLE; // 59
287            CONVOLVE_ONE_SAMPLE; // 60
288
289            CONVOLVE_ONE_SAMPLE; // 61
290            CONVOLVE_ONE_SAMPLE; // 62
291            CONVOLVE_ONE_SAMPLE; // 63
292            CONVOLVE_ONE_SAMPLE; // 64
293            CONVOLVE_ONE_SAMPLE; // 65
294            CONVOLVE_ONE_SAMPLE; // 66
295            CONVOLVE_ONE_SAMPLE; // 67
296            CONVOLVE_ONE_SAMPLE; // 68
297            CONVOLVE_ONE_SAMPLE; // 69
298            CONVOLVE_ONE_SAMPLE; // 70
299
300            CONVOLVE_ONE_SAMPLE; // 71
301            CONVOLVE_ONE_SAMPLE; // 72
302            CONVOLVE_ONE_SAMPLE; // 73
303            CONVOLVE_ONE_SAMPLE; // 74
304            CONVOLVE_ONE_SAMPLE; // 75
305            CONVOLVE_ONE_SAMPLE; // 76
306            CONVOLVE_ONE_SAMPLE; // 77
307            CONVOLVE_ONE_SAMPLE; // 78
308            CONVOLVE_ONE_SAMPLE; // 79
309            CONVOLVE_ONE_SAMPLE; // 80
310
311            CONVOLVE_ONE_SAMPLE; // 81
312            CONVOLVE_ONE_SAMPLE; // 82
313            CONVOLVE_ONE_SAMPLE; // 83
314            CONVOLVE_ONE_SAMPLE; // 84
315            CONVOLVE_ONE_SAMPLE; // 85
316            CONVOLVE_ONE_SAMPLE; // 86
317            CONVOLVE_ONE_SAMPLE; // 87
318            CONVOLVE_ONE_SAMPLE; // 88
319            CONVOLVE_ONE_SAMPLE; // 89
320            CONVOLVE_ONE_SAMPLE; // 90
321
322            CONVOLVE_ONE_SAMPLE; // 91
323            CONVOLVE_ONE_SAMPLE; // 92
324            CONVOLVE_ONE_SAMPLE; // 93
325            CONVOLVE_ONE_SAMPLE; // 94
326            CONVOLVE_ONE_SAMPLE; // 95
327            CONVOLVE_ONE_SAMPLE; // 96
328            CONVOLVE_ONE_SAMPLE; // 97
329            CONVOLVE_ONE_SAMPLE; // 98
330            CONVOLVE_ONE_SAMPLE; // 99
331            CONVOLVE_ONE_SAMPLE; // 100
332
333            CONVOLVE_ONE_SAMPLE; // 101
334            CONVOLVE_ONE_SAMPLE; // 102
335            CONVOLVE_ONE_SAMPLE; // 103
336            CONVOLVE_ONE_SAMPLE; // 104
337            CONVOLVE_ONE_SAMPLE; // 105
338            CONVOLVE_ONE_SAMPLE; // 106
339            CONVOLVE_ONE_SAMPLE; // 107
340            CONVOLVE_ONE_SAMPLE; // 108
341            CONVOLVE_ONE_SAMPLE; // 109
342            CONVOLVE_ONE_SAMPLE; // 110
343
344            CONVOLVE_ONE_SAMPLE; // 111
345            CONVOLVE_ONE_SAMPLE; // 112
346            CONVOLVE_ONE_SAMPLE; // 113
347            CONVOLVE_ONE_SAMPLE; // 114
348            CONVOLVE_ONE_SAMPLE; // 115
349            CONVOLVE_ONE_SAMPLE; // 116
350            CONVOLVE_ONE_SAMPLE; // 117
351            CONVOLVE_ONE_SAMPLE; // 118
352            CONVOLVE_ONE_SAMPLE; // 119
353            CONVOLVE_ONE_SAMPLE; // 120
354
355            CONVOLVE_ONE_SAMPLE; // 121
356            CONVOLVE_ONE_SAMPLE; // 122
357            CONVOLVE_ONE_SAMPLE; // 123
358            CONVOLVE_ONE_SAMPLE; // 124
359            CONVOLVE_ONE_SAMPLE; // 125
360            CONVOLVE_ONE_SAMPLE; // 126
361            CONVOLVE_ONE_SAMPLE; // 127
362            CONVOLVE_ONE_SAMPLE; // 128
363        } else {
364            while (j < kernelSize) {
365                // Non-optimized using actual while loop.
366                CONVOLVE_ONE_SAMPLE;
367            }
368        }
369        destP[i++] = sum;
370    }
371#endif // OS(MACOSX)
372
373    // Copy 2nd half of input buffer to 1st half.
374    memcpy(m_buffer.data(), inputP, sizeof(float) * framesToProcess);
375#endif
376}
377
378void DirectConvolver::reset()
379{
380    m_buffer.zero();
381#if USE(WEBAUDIO_IPP)
382    m_overlayBuffer.zero();
383#endif // USE(WEBAUDIO_IPP)
384}
385
386} // namespace WebCore
387
388#endif // ENABLE(WEB_AUDIO)
389