1/*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30
31#if ENABLE(WEB_AUDIO)
32
33#include "SincResampler.h"
34
35#include <wtf/MathExtras.h>
36
37using namespace std;
38
39// Input buffer layout, dividing the total buffer into regions (r0 - r5):
40//
41// |----------------|----------------------------------------------------------------|----------------|
42//
43//                                              blockSize + kernelSize / 2
44//                   <-------------------------------------------------------------------------------->
45//                                                  r0
46//
47//   kernelSize / 2   kernelSize / 2                                 kernelSize / 2     kernelSize / 2
48// <---------------> <--------------->                              <---------------> <--------------->
49//         r1                r2                                             r3                r4
50//
51//                                              blockSize
52//                                     <-------------------------------------------------------------->
53//                                                  r5
54
55// The Algorithm:
56//
57// 1) Consume input frames into r0 (r1 is zero-initialized).
58// 2) Position kernel centered at start of r0 (r2) and generate output frames until kernel is centered at start of r4.
59//    or we've finished generating all the output frames.
60// 3) Copy r3 to r1 and r4 to r2.
61// 4) Consume input frames into r5 (zero-pad if we run out of input).
62// 5) Goto (2) until all of input is consumed.
63//
64// note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc.
65
66namespace WebCore {
67
68SincResampler::SincResampler(double scaleFactor, unsigned kernelSize, unsigned numberOfKernelOffsets)
69    : m_scaleFactor(scaleFactor)
70    , m_kernelSize(kernelSize)
71    , m_numberOfKernelOffsets(numberOfKernelOffsets)
72    , m_kernelStorage(m_kernelSize * (m_numberOfKernelOffsets + 1))
73    , m_virtualSourceIndex(0.0)
74    , m_blockSize(512)
75    , m_inputBuffer(m_blockSize + m_kernelSize) // See input buffer layout above.
76    , m_source(0)
77    , m_sourceFramesAvailable(0)
78{
79    initializeKernel();
80}
81
82void SincResampler::initializeKernel()
83{
84    // Blackman window parameters.
85    double alpha = 0.16;
86    double a0 = 0.5 * (1.0 - alpha);
87    double a1 = 0.5;
88    double a2 = 0.5 * alpha;
89
90    // sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter.
91    double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0;
92
93    // The sinc function is an idealized brick-wall filter, but since we're windowing it the
94    // transition from pass to stop does not happen right away. So we should adjust the
95    // lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end.
96    // FIXME: this value is empirical and to be more exact should vary depending on m_kernelSize.
97    sincScaleFactor *= 0.9;
98
99    int n = m_kernelSize;
100    int halfSize = n / 2;
101
102    // Generates a set of windowed sinc() kernels.
103    // We generate a range of sub-sample offsets from 0.0 to 1.0.
104    for (unsigned offsetIndex = 0; offsetIndex <= m_numberOfKernelOffsets; ++offsetIndex) {
105        double subsampleOffset = static_cast<double>(offsetIndex) / m_numberOfKernelOffsets;
106
107        for (int i = 0; i < n; ++i) {
108            // Compute the sinc() with offset.
109            double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset);
110            double sinc = !s ? 1.0 : sin(s) / s;
111            sinc *= sincScaleFactor;
112
113            // Compute Blackman window, matching the offset of the sinc().
114            double x = (i - subsampleOffset) / n;
115            double window = a0 - a1 * cos(2.0 * piDouble * x) + a2 * cos(4.0 * piDouble * x);
116
117            // Window the sinc() function and store at the correct offset.
118            m_kernelStorage[i + offsetIndex * m_kernelSize] = sinc * window;
119        }
120    }
121}
122
123void SincResampler::consumeSource(float* buffer, unsigned numberOfSourceFrames)
124{
125    ASSERT(m_source);
126    if (!m_source)
127        return;
128
129    // Clamp to number of frames available and zero-pad.
130    unsigned framesToCopy = min(m_sourceFramesAvailable, numberOfSourceFrames);
131    memcpy(buffer, m_source, sizeof(float) * framesToCopy);
132
133    // Zero-pad if necessary.
134    if (framesToCopy < numberOfSourceFrames)
135        memset(buffer + framesToCopy, 0, sizeof(float) * (numberOfSourceFrames - framesToCopy));
136
137    m_sourceFramesAvailable -= framesToCopy;
138    m_source += numberOfSourceFrames;
139}
140
141void SincResampler::process(float* source, float* destination, unsigned numberOfSourceFrames)
142{
143    ASSERT(m_blockSize > m_kernelSize);
144    ASSERT(m_inputBuffer.size() >= m_blockSize + m_kernelSize);
145    ASSERT(!(m_kernelSize % 2));
146
147    // Setup various region pointers in the buffer (see diagram above).
148    float* r0 = m_inputBuffer.data() + m_kernelSize / 2;
149    float* r1 = m_inputBuffer.data();
150    float* r2 = r0;
151    float* r3 = r0 + m_blockSize - m_kernelSize / 2;
152    float* r4 = r0 + m_blockSize;
153    float* r5 = r0 + m_kernelSize / 2;
154
155    m_source = source;
156    m_sourceFramesAvailable = numberOfSourceFrames;
157
158    unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / m_scaleFactor);
159
160    // Step (1)
161    // Prime the input buffer.
162    consumeSource(r0, m_blockSize + m_kernelSize / 2);
163
164    // Step (2)
165    m_virtualSourceIndex = 0;
166
167    while (numberOfDestinationFrames) {
168        while (m_virtualSourceIndex < m_blockSize) {
169            // m_virtualSourceIndex lies in between two kernel offsets so figure out what they are.
170            int sourceIndexI = static_cast<int>(m_virtualSourceIndex);
171            double subsampleRemainder = m_virtualSourceIndex - sourceIndexI;
172
173            double virtualOffsetIndex = subsampleRemainder * m_numberOfKernelOffsets;
174            int offsetIndex = static_cast<int>(virtualOffsetIndex);
175
176            float* k1 = m_kernelStorage.data() + offsetIndex * m_kernelSize;
177            float* k2 = k1 + m_kernelSize;
178
179            // Initialize input pointer based on quantized m_virtualSourceIndex.
180            float* inputP = r1 + sourceIndexI;
181
182            // We'll compute "convolutions" for the two kernels which straddle m_virtualSourceIndex
183            float sum1 = 0;
184            float sum2 = 0;
185
186            // Figure out how much to weight each kernel's "convolution".
187            double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex;
188
189            // Generate a single output sample.
190            int n = m_kernelSize;
191
192            // FIXME: add SIMD optimizations for the following. The scalar code-path can probably also be optimized better.
193
194#define CONVOLVE_ONE_SAMPLE      \
195            input = *inputP++;   \
196            sum1 += input * *k1; \
197            sum2 += input * *k2; \
198            ++k1;                \
199            ++k2;
200
201            {
202                float input;
203
204                // Optimize size 32 and size 64 kernels by unrolling the while loop.
205                // A 20 - 30% speed improvement was measured in some cases by using this approach.
206
207                if (n == 32) {
208                    CONVOLVE_ONE_SAMPLE // 1
209                    CONVOLVE_ONE_SAMPLE // 2
210                    CONVOLVE_ONE_SAMPLE // 3
211                    CONVOLVE_ONE_SAMPLE // 4
212                    CONVOLVE_ONE_SAMPLE // 5
213                    CONVOLVE_ONE_SAMPLE // 6
214                    CONVOLVE_ONE_SAMPLE // 7
215                    CONVOLVE_ONE_SAMPLE // 8
216                    CONVOLVE_ONE_SAMPLE // 9
217                    CONVOLVE_ONE_SAMPLE // 10
218                    CONVOLVE_ONE_SAMPLE // 11
219                    CONVOLVE_ONE_SAMPLE // 12
220                    CONVOLVE_ONE_SAMPLE // 13
221                    CONVOLVE_ONE_SAMPLE // 14
222                    CONVOLVE_ONE_SAMPLE // 15
223                    CONVOLVE_ONE_SAMPLE // 16
224                    CONVOLVE_ONE_SAMPLE // 17
225                    CONVOLVE_ONE_SAMPLE // 18
226                    CONVOLVE_ONE_SAMPLE // 19
227                    CONVOLVE_ONE_SAMPLE // 20
228                    CONVOLVE_ONE_SAMPLE // 21
229                    CONVOLVE_ONE_SAMPLE // 22
230                    CONVOLVE_ONE_SAMPLE // 23
231                    CONVOLVE_ONE_SAMPLE // 24
232                    CONVOLVE_ONE_SAMPLE // 25
233                    CONVOLVE_ONE_SAMPLE // 26
234                    CONVOLVE_ONE_SAMPLE // 27
235                    CONVOLVE_ONE_SAMPLE // 28
236                    CONVOLVE_ONE_SAMPLE // 29
237                    CONVOLVE_ONE_SAMPLE // 30
238                    CONVOLVE_ONE_SAMPLE // 31
239                    CONVOLVE_ONE_SAMPLE // 32
240                } else if (n == 64) {
241                    CONVOLVE_ONE_SAMPLE // 1
242                    CONVOLVE_ONE_SAMPLE // 2
243                    CONVOLVE_ONE_SAMPLE // 3
244                    CONVOLVE_ONE_SAMPLE // 4
245                    CONVOLVE_ONE_SAMPLE // 5
246                    CONVOLVE_ONE_SAMPLE // 6
247                    CONVOLVE_ONE_SAMPLE // 7
248                    CONVOLVE_ONE_SAMPLE // 8
249                    CONVOLVE_ONE_SAMPLE // 9
250                    CONVOLVE_ONE_SAMPLE // 10
251                    CONVOLVE_ONE_SAMPLE // 11
252                    CONVOLVE_ONE_SAMPLE // 12
253                    CONVOLVE_ONE_SAMPLE // 13
254                    CONVOLVE_ONE_SAMPLE // 14
255                    CONVOLVE_ONE_SAMPLE // 15
256                    CONVOLVE_ONE_SAMPLE // 16
257                    CONVOLVE_ONE_SAMPLE // 17
258                    CONVOLVE_ONE_SAMPLE // 18
259                    CONVOLVE_ONE_SAMPLE // 19
260                    CONVOLVE_ONE_SAMPLE // 20
261                    CONVOLVE_ONE_SAMPLE // 21
262                    CONVOLVE_ONE_SAMPLE // 22
263                    CONVOLVE_ONE_SAMPLE // 23
264                    CONVOLVE_ONE_SAMPLE // 24
265                    CONVOLVE_ONE_SAMPLE // 25
266                    CONVOLVE_ONE_SAMPLE // 26
267                    CONVOLVE_ONE_SAMPLE // 27
268                    CONVOLVE_ONE_SAMPLE // 28
269                    CONVOLVE_ONE_SAMPLE // 29
270                    CONVOLVE_ONE_SAMPLE // 30
271                    CONVOLVE_ONE_SAMPLE // 31
272                    CONVOLVE_ONE_SAMPLE // 32
273                    CONVOLVE_ONE_SAMPLE // 33
274                    CONVOLVE_ONE_SAMPLE // 34
275                    CONVOLVE_ONE_SAMPLE // 35
276                    CONVOLVE_ONE_SAMPLE // 36
277                    CONVOLVE_ONE_SAMPLE // 37
278                    CONVOLVE_ONE_SAMPLE // 38
279                    CONVOLVE_ONE_SAMPLE // 39
280                    CONVOLVE_ONE_SAMPLE // 40
281                    CONVOLVE_ONE_SAMPLE // 41
282                    CONVOLVE_ONE_SAMPLE // 42
283                    CONVOLVE_ONE_SAMPLE // 43
284                    CONVOLVE_ONE_SAMPLE // 44
285                    CONVOLVE_ONE_SAMPLE // 45
286                    CONVOLVE_ONE_SAMPLE // 46
287                    CONVOLVE_ONE_SAMPLE // 47
288                    CONVOLVE_ONE_SAMPLE // 48
289                    CONVOLVE_ONE_SAMPLE // 49
290                    CONVOLVE_ONE_SAMPLE // 50
291                    CONVOLVE_ONE_SAMPLE // 51
292                    CONVOLVE_ONE_SAMPLE // 52
293                    CONVOLVE_ONE_SAMPLE // 53
294                    CONVOLVE_ONE_SAMPLE // 54
295                    CONVOLVE_ONE_SAMPLE // 55
296                    CONVOLVE_ONE_SAMPLE // 56
297                    CONVOLVE_ONE_SAMPLE // 57
298                    CONVOLVE_ONE_SAMPLE // 58
299                    CONVOLVE_ONE_SAMPLE // 59
300                    CONVOLVE_ONE_SAMPLE // 60
301                    CONVOLVE_ONE_SAMPLE // 61
302                    CONVOLVE_ONE_SAMPLE // 62
303                    CONVOLVE_ONE_SAMPLE // 63
304                    CONVOLVE_ONE_SAMPLE // 64
305                } else {
306                    while (n--) {
307                        // Non-optimized using actual while loop.
308                        CONVOLVE_ONE_SAMPLE
309                    }
310                }
311            }
312
313            // Linearly interpolate the two "convolutions".
314            double result = (1.0 - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
315
316            *destination++ = result;
317
318            --numberOfDestinationFrames;
319            if (!numberOfDestinationFrames)
320                return;
321
322            // Advance the virtual index.
323            m_virtualSourceIndex += m_scaleFactor;
324        }
325
326        // Wrap back around to the start.
327        m_virtualSourceIndex -= m_blockSize;
328
329        // Step (3) Copy r3 to r1 and r4 to r2.
330        // This wraps the last input frames back to the start of the buffer.
331        memcpy(r1, r3, sizeof(float) * (m_kernelSize / 2));
332        memcpy(r2, r4, sizeof(float) * (m_kernelSize / 2));
333
334        // Step (4)
335        // Refresh the buffer with more input.
336        consumeSource(r5, m_blockSize);
337    }
338}
339
340} // namespace WebCore
341
342#endif // ENABLE(WEB_AUDIO)
343