1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "AudioResamplerSinc"
18//#define LOG_NDEBUG 0
19
20#define __STDC_CONSTANT_MACROS
21#include <malloc.h>
22#include <string.h>
23#include <stdlib.h>
24#include <dlfcn.h>
25
26#include <cutils/compiler.h>
27#include <cutils/properties.h>
28
29#include <utils/Log.h>
30#include <audio_utils/primitives.h>
31
32#include "AudioResamplerSinc.h"
33
34#if defined(__clang__) && !__has_builtin(__builtin_assume_aligned)
35#define __builtin_assume_aligned(p, a) \
36	(((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p)))
37#endif
38
39#if defined(__arm__) && !defined(__thumb__)
40#define USE_INLINE_ASSEMBLY (true)
41#else
42#define USE_INLINE_ASSEMBLY (false)
43#endif
44
45#if defined(__aarch64__) || defined(__ARM_NEON__)
46#ifndef USE_NEON
47#define USE_NEON (true)
48#endif
49#else
50#define USE_NEON (false)
51#endif
52#if USE_NEON
53#include <arm_neon.h>
54#endif
55
56#define UNUSED(x) ((void)(x))
57
58namespace android {
59// ----------------------------------------------------------------------------
60
61
62/*
63 * These coeficients are computed with the "fir" utility found in
64 * tools/resampler_tools
65 * cmd-line: fir -l 7 -s 48000 -c 20478
66 */
67const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = {
68#include "AudioResamplerSincUp.h"
69};
70
71/*
72 * These coefficients are optimized for 48KHz -> 44.1KHz
73 * cmd-line: fir -l 7 -s 48000 -c 17189
74 */
75const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = {
76#include "AudioResamplerSincDown.h"
77};
78
79// we use 15 bits to interpolate between these samples
80// this cannot change because the mul below rely on it.
81static const int pLerpBits = 15;
82
83static pthread_once_t once_control = PTHREAD_ONCE_INIT;
84static readCoefficientsFn readResampleCoefficients = NULL;
85
86/*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants;
87/*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants;
88
89void AudioResamplerSinc::init_routine()
90{
91    // for high quality resampler, the parameters for coefficients are compile-time constants
92    Constants *c = &highQualityConstants;
93    c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS;
94    c->cShift = kNumPhaseBits - c->coefsBits;
95    c->cMask = ((1<< c->coefsBits)-1) << c->cShift;
96    c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
97    c->pMask = ((1<< pLerpBits)-1) << c->pShift;
98    c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF;
99
100    // for very high quality resampler, the parameters are load-time constants
101    veryHighQualityConstants = highQualityConstants;
102
103    // Open the dll to get the coefficients for VERY_HIGH_QUALITY
104    void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW);
105    ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib);
106    if (resampleCoeffLib == NULL) {
107        ALOGE("Could not open audio-resampler library: %s", dlerror());
108        return;
109    }
110
111    readResampleFirNumCoeffFn readResampleFirNumCoeff;
112    readResampleFirLerpIntBitsFn readResampleFirLerpIntBits;
113
114    readResampleCoefficients = (readCoefficientsFn)
115            dlsym(resampleCoeffLib, "readResamplerCoefficients");
116    readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
117            dlsym(resampleCoeffLib, "readResampleFirNumCoeff");
118    readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
119            dlsym(resampleCoeffLib, "readResampleFirLerpIntBits");
120
121    if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) {
122        readResampleCoefficients = NULL;
123        dlclose(resampleCoeffLib);
124        resampleCoeffLib = NULL;
125        ALOGE("Could not find symbol: %s", dlerror());
126        return;
127    }
128
129    c = &veryHighQualityConstants;
130    c->coefsBits = readResampleFirLerpIntBits();
131    c->cShift = kNumPhaseBits - c->coefsBits;
132    c->cMask = ((1<<c->coefsBits)-1) << c->cShift;
133    c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
134    c->pMask = ((1<<pLerpBits)-1) << c->pShift;
135    // number of zero-crossing on each side
136    c->halfNumCoefs = readResampleFirNumCoeff();
137    ALOGV("coefsBits = %d", c->coefsBits);
138    ALOGV("halfNumCoefs = %d", c->halfNumCoefs);
139    // note that we "leak" resampleCoeffLib until the process exits
140}
141
142// ----------------------------------------------------------------------------
143
144#if !USE_NEON
145
146static inline
147int32_t mulRL(int left, int32_t in, uint32_t vRL)
148{
149#if USE_INLINE_ASSEMBLY
150    int32_t out;
151    if (left) {
152        asm( "smultb %[out], %[in], %[vRL] \n"
153             : [out]"=r"(out)
154             : [in]"%r"(in), [vRL]"r"(vRL)
155             : );
156    } else {
157        asm( "smultt %[out], %[in], %[vRL] \n"
158             : [out]"=r"(out)
159             : [in]"%r"(in), [vRL]"r"(vRL)
160             : );
161    }
162    return out;
163#else
164    int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16);
165    return int32_t((int64_t(in) * v) >> 16);
166#endif
167}
168
169static inline
170int32_t mulAdd(int16_t in, int32_t v, int32_t a)
171{
172#if USE_INLINE_ASSEMBLY
173    int32_t out;
174    asm( "smlawb %[out], %[v], %[in], %[a] \n"
175         : [out]"=r"(out)
176         : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
177         : );
178    return out;
179#else
180    return a + int32_t((int64_t(v) * in) >> 16);
181#endif
182}
183
184static inline
185int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
186{
187#if USE_INLINE_ASSEMBLY
188    int32_t out;
189    if (left) {
190        asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
191             : [out]"=r"(out)
192             : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
193             : );
194    } else {
195        asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
196             : [out]"=r"(out)
197             : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
198             : );
199    }
200    return out;
201#else
202    int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16);
203    return a + int32_t((int64_t(v) * s) >> 16);
204#endif
205}
206
207#endif // !USE_NEON
208
209// ----------------------------------------------------------------------------
210
211AudioResamplerSinc::AudioResamplerSinc(
212        int inChannelCount, int32_t sampleRate, src_quality quality)
213    : AudioResampler(inChannelCount, sampleRate, quality),
214    mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0)
215{
216    /*
217     * Layout of the state buffer for 32 tap:
218     *
219     * "present" sample            beginning of 2nd buffer
220     *                 v                v
221     *  0              01               2              23              3
222     *  0              F0               0              F0              F
223     * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn]
224     *                 ^               ^ head
225     *
226     * p = past samples, convoluted with the (p)ositive side of sinc()
227     * n = future samples, convoluted with the (n)egative side of sinc()
228     * r = extra space for implementing the ring buffer
229     *
230     */
231
232    mVolumeSIMD[0] = 0;
233    mVolumeSIMD[1] = 0;
234
235    // Load the constants for coefficients
236    int ok = pthread_once(&once_control, init_routine);
237    if (ok != 0) {
238        ALOGE("%s pthread_once failed: %d", __func__, ok);
239    }
240    mConstants = (quality == VERY_HIGH_QUALITY) ?
241            &veryHighQualityConstants : &highQualityConstants;
242}
243
244
245AudioResamplerSinc::~AudioResamplerSinc() {
246    free(mState);
247}
248
249void AudioResamplerSinc::init() {
250    const Constants& c(*mConstants);
251    const size_t numCoefs = 2 * c.halfNumCoefs;
252    const size_t stateSize = numCoefs * mChannelCount * 2;
253    mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t));
254    memset(mState, 0, sizeof(int16_t)*stateSize);
255    mImpulse  = mState   + (c.halfNumCoefs-1)*mChannelCount;
256    mRingFull = mImpulse + (numCoefs+1)*mChannelCount;
257}
258
259void AudioResamplerSinc::setVolume(float left, float right) {
260    AudioResampler::setVolume(left, right);
261    // convert to U4_28 (rounding down).
262    // integer volume values are clamped to 0 to UNITY_GAIN.
263    mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left));
264    mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right));
265}
266
267size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
268            AudioBufferProvider* provider)
269{
270    // FIXME store current state (up or down sample) and only load the coefs when the state
271    // changes. Or load two pointers one for up and one for down in the init function.
272    // Not critical now since the read functions are fast, but would be important if read was slow.
273    if (mConstants == &veryHighQualityConstants && readResampleCoefficients) {
274        mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate );
275    } else {
276        mFirCoefs = (const int32_t *)
277                ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown);
278    }
279
280    // select the appropriate resampler
281    switch (mChannelCount) {
282    case 1:
283        return resample<1>(out, outFrameCount, provider);
284    case 2:
285        return resample<2>(out, outFrameCount, provider);
286    default:
287        LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount);
288        return 0;
289    }
290}
291
292
293template<int CHANNELS>
294size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
295        AudioBufferProvider* provider)
296{
297    const Constants& c(*mConstants);
298    const size_t headOffset = c.halfNumCoefs*CHANNELS;
299    int16_t* impulse = mImpulse;
300    uint32_t vRL = mVolumeRL;
301    size_t inputIndex = mInputIndex;
302    uint32_t phaseFraction = mPhaseFraction;
303    uint32_t phaseIncrement = mPhaseIncrement;
304    size_t outputIndex = 0;
305    size_t outputSampleCount = outFrameCount * 2;
306    size_t inFrameCount = getInFrameCountRequired(outFrameCount);
307
308    while (outputIndex < outputSampleCount) {
309        // buffer is empty, fetch a new one
310        while (mBuffer.frameCount == 0) {
311            mBuffer.frameCount = inFrameCount;
312            provider->getNextBuffer(&mBuffer);
313            if (mBuffer.raw == NULL) {
314                goto resample_exit;
315            }
316            const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits;
317            if (phaseIndex == 1) {
318                // read one frame
319                read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
320            } else if (phaseIndex == 2) {
321                // read 2 frames
322                read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
323                inputIndex++;
324                if (inputIndex >= mBuffer.frameCount) {
325                    inputIndex -= mBuffer.frameCount;
326                    provider->releaseBuffer(&mBuffer);
327                } else {
328                    read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
329                }
330            }
331        }
332        int16_t const * const in = mBuffer.i16;
333        const size_t frameCount = mBuffer.frameCount;
334
335        // Always read-in the first samples from the input buffer
336        int16_t* head = impulse + headOffset;
337        for (size_t i=0 ; i<CHANNELS ; i++) {
338            head[i] = in[inputIndex*CHANNELS + i];
339        }
340
341        // handle boundary case
342        while (CC_LIKELY(outputIndex < outputSampleCount)) {
343            filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL);
344            outputIndex += 2;
345
346            phaseFraction += phaseIncrement;
347            const size_t phaseIndex = phaseFraction >> kNumPhaseBits;
348            for (size_t i=0 ; i<phaseIndex ; i++) {
349                inputIndex++;
350                if (inputIndex >= frameCount) {
351                    goto done;  // need a new buffer
352                }
353                read<CHANNELS>(impulse, phaseFraction, in, inputIndex);
354            }
355        }
356done:
357        // if done with buffer, save samples
358        if (inputIndex >= frameCount) {
359            inputIndex -= frameCount;
360            provider->releaseBuffer(&mBuffer);
361        }
362    }
363
364resample_exit:
365    mImpulse = impulse;
366    mInputIndex = inputIndex;
367    mPhaseFraction = phaseFraction;
368    return outputIndex / CHANNELS;
369}
370
371template<int CHANNELS>
372/***
373* read()
374*
375* This function reads only one frame from input buffer and writes it in
376* state buffer
377*
378**/
379void AudioResamplerSinc::read(
380        int16_t*& impulse, uint32_t& phaseFraction,
381        const int16_t* in, size_t inputIndex)
382{
383    impulse += CHANNELS;
384    phaseFraction -= 1LU<<kNumPhaseBits;
385
386    const Constants& c(*mConstants);
387    if (CC_UNLIKELY(impulse >= mRingFull)) {
388        const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS;
389        memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize);
390        impulse -= stateSize;
391    }
392
393    int16_t* head = impulse + c.halfNumCoefs*CHANNELS;
394    for (size_t i=0 ; i<CHANNELS ; i++) {
395        head[i] = in[inputIndex*CHANNELS + i];
396    }
397}
398
399template<int CHANNELS>
400void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase,
401         const int16_t *samples, uint32_t vRL)
402{
403    // NOTE: be very careful when modifying the code here. register
404    // pressure is very high and a small change might cause the compiler
405    // to generate far less efficient code.
406    // Always sanity check the result with objdump or test-resample.
407
408    // compute the index of the coefficient on the positive side and
409    // negative side
410    const Constants& c(*mConstants);
411    const int32_t ONE = c.cMask | c.pMask;
412    uint32_t indexP = ( phase & c.cMask) >> c.cShift;
413    uint32_t lerpP  = ( phase & c.pMask) >> c.pShift;
414    uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift;
415    uint32_t lerpN  = ((ONE-phase) & c.pMask) >> c.pShift;
416
417    const size_t offset = c.halfNumCoefs;
418    indexP *= offset;
419    indexN *= offset;
420
421    int32_t const* coefsP = mFirCoefs + indexP;
422    int32_t const* coefsN = mFirCoefs + indexN;
423    int16_t const* sP = samples;
424    int16_t const* sN = samples + CHANNELS;
425
426    size_t count = offset;
427
428#if !USE_NEON
429    int32_t l = 0;
430    int32_t r = 0;
431    for (size_t i=0 ; i<count ; i++) {
432        interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
433        sP -= CHANNELS;
434        interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
435        sN += CHANNELS;
436    }
437    out[0] += 2 * mulRL(1, l, vRL);
438    out[1] += 2 * mulRL(0, r, vRL);
439#else
440    UNUSED(vRL);
441    if (CHANNELS == 1) {
442        int32_t const* coefsP1 = coefsP + offset;
443        int32_t const* coefsN1 = coefsN + offset;
444        sP -= CHANNELS*3;
445
446        int32x4_t sum;
447        int32x2_t lerpPN;
448        lerpPN = vdup_n_s32(0);
449        lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
450        lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
451        lerpPN = vshl_n_s32(lerpPN, 16);
452        sum = vdupq_n_s32(0);
453
454        int16x4_t sampleP, sampleN;
455        int32x4_t samplePExt, sampleNExt;
456        int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
457
458        coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
459        coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
460        coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
461        coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
462        for (; count > 0; count -= 4) {
463            sampleP = vld1_s16(sP);
464            sampleN = vld1_s16(sN);
465            coefsPV0 = vld1q_s32(coefsP);
466            coefsNV0 = vld1q_s32(coefsN);
467            coefsPV1 = vld1q_s32(coefsP1);
468            coefsNV1 = vld1q_s32(coefsN1);
469            sP -= 4;
470            sN += 4;
471            coefsP += 4;
472            coefsN += 4;
473            coefsP1 += 4;
474            coefsN1 += 4;
475
476            sampleP = vrev64_s16(sampleP);
477
478            // interpolate (step1)
479            coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
480            coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
481            samplePExt = vshll_n_s16(sampleP, 15);
482            // interpolate (step2)
483            coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
484            coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
485            sampleNExt = vshll_n_s16(sampleN, 15);
486            // interpolate (step3)
487            coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
488            coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
489
490            samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0);
491            sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0);
492            sum = vaddq_s32(sum, samplePExt);
493            sum = vaddq_s32(sum, sampleNExt);
494        }
495        int32x2_t volumesV, outV;
496        volumesV = vld1_s32(mVolumeSIMD);
497        outV = vld1_s32(out);
498
499        //add all 4 partial sums
500        int32x2_t sumLow, sumHigh;
501        sumLow = vget_low_s32(sum);
502        sumHigh = vget_high_s32(sum);
503        sumLow = vpadd_s32(sumLow, sumHigh);
504        sumLow = vpadd_s32(sumLow, sumLow);
505
506        sumLow = vqrdmulh_s32(sumLow, volumesV);
507        outV = vadd_s32(outV, sumLow);
508        vst1_s32(out, outV);
509    } else if (CHANNELS == 2) {
510        int32_t const* coefsP1 = coefsP + offset;
511        int32_t const* coefsN1 = coefsN + offset;
512        sP -= CHANNELS*3;
513
514        int32x4_t sum0, sum1;
515        int32x2_t lerpPN;
516
517        lerpPN = vdup_n_s32(0);
518        lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
519        lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
520        lerpPN = vshl_n_s32(lerpPN, 16);
521        sum0 = vdupq_n_s32(0);
522        sum1 = vdupq_n_s32(0);
523
524        int16x4x2_t sampleP, sampleN;
525        int32x4x2_t samplePExt, sampleNExt;
526        int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
527
528        coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
529        coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
530        coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
531        coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
532        for (; count > 0; count -= 4) {
533            sampleP = vld2_s16(sP);
534            sampleN = vld2_s16(sN);
535            coefsPV0 = vld1q_s32(coefsP);
536            coefsNV0 = vld1q_s32(coefsN);
537            coefsPV1 = vld1q_s32(coefsP1);
538            coefsNV1 = vld1q_s32(coefsN1);
539            sP -= 8;
540            sN += 8;
541            coefsP += 4;
542            coefsN += 4;
543            coefsP1 += 4;
544            coefsN1 += 4;
545
546            sampleP.val[0] = vrev64_s16(sampleP.val[0]);
547            sampleP.val[1] = vrev64_s16(sampleP.val[1]);
548
549            // interpolate (step1)
550            coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
551            coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
552            samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15);
553            samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15);
554            // interpolate (step2)
555            coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
556            coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
557            sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15);
558            sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15);
559            // interpolate (step3)
560            coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
561            coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
562
563            samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0);
564            samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0);
565            sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0);
566            sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0);
567            sum0 = vaddq_s32(sum0, samplePExt.val[0]);
568            sum1 = vaddq_s32(sum1, samplePExt.val[1]);
569            sum0 = vaddq_s32(sum0, sampleNExt.val[0]);
570            sum1 = vaddq_s32(sum1, sampleNExt.val[1]);
571        }
572        int32x2_t volumesV, outV;
573        volumesV = vld1_s32(mVolumeSIMD);
574        outV = vld1_s32(out);
575
576        //add all 4 partial sums
577        int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1;
578        sumLow0 = vget_low_s32(sum0);
579        sumHigh0 = vget_high_s32(sum0);
580        sumLow1 = vget_low_s32(sum1);
581        sumHigh1 = vget_high_s32(sum1);
582        sumLow0 = vpadd_s32(sumLow0, sumHigh0);
583        sumLow0 = vpadd_s32(sumLow0, sumLow0);
584        sumLow1 = vpadd_s32(sumLow1, sumHigh1);
585        sumLow1 = vpadd_s32(sumLow1, sumLow1);
586
587        sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0];
588        sumLow0 = vqrdmulh_s32(sumLow0, volumesV);
589        outV = vadd_s32(outV, sumLow0);
590        vst1_s32(out, outV);
591    }
592#endif
593}
594
595template<int CHANNELS>
596void AudioResamplerSinc::interpolate(
597        int32_t& l, int32_t& r,
598        const int32_t* coefs, size_t offset,
599        int32_t lerp, const int16_t* samples)
600{
601    int32_t c0 = coefs[0];
602    int32_t c1 = coefs[offset];
603    int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0);
604    if (CHANNELS == 2) {
605        uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
606        l = mulAddRL(1, rl, sinc, l);
607        r = mulAddRL(0, rl, sinc, r);
608    } else {
609        r = l = mulAdd(samples[0], sinc, l);
610    }
611}
612// ----------------------------------------------------------------------------
613} // namespace android
614