AudioResamplerDyn.cpp revision 83be2560d9396b3bd32919123bd67a783e6aaf7c
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "AudioResamplerDyn"
18//#define LOG_NDEBUG 0
19
20#include <malloc.h>
21#include <string.h>
22#include <stdlib.h>
23#include <dlfcn.h>
24#include <math.h>
25
26#include <cutils/compiler.h>
27#include <cutils/properties.h>
28#include <utils/Log.h>
29
30#include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here
31#include "AudioResamplerFirProcess.h"
32#include "AudioResamplerFirProcessNeon.h"
33#include "AudioResamplerFirGen.h" // requires math.h
34#include "AudioResamplerDyn.h"
35
36//#define DEBUG_RESAMPLER
37
38namespace android {
39
40// generate a unique resample type compile-time constant (constexpr)
41#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \
42    ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 | (COEFTYPE)<<2 \
43    | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3)
44
45/*
46 * InBuffer is a type agnostic input buffer.
47 *
48 * Layout of the state buffer for halfNumCoefs=8.
49 *
50 * [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr]
51 *  S            I                                R
52 *
53 * S = mState
54 * I = mImpulse
55 * R = mRingFull
56 * p = past samples, convoluted with the (p)ositive side of sinc()
57 * n = future samples, convoluted with the (n)egative side of sinc()
58 * r = extra space for implementing the ring buffer
59 */
60
61template<typename TI>
62AudioResamplerDyn::InBuffer<TI>::InBuffer()
63    : mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) {
64}
65
66template<typename TI>
67AudioResamplerDyn::InBuffer<TI>::~InBuffer() {
68    init();
69}
70
71template<typename TI>
72void AudioResamplerDyn::InBuffer<TI>::init() {
73    free(mState);
74    mState = NULL;
75    mImpulse = NULL;
76    mRingFull = NULL;
77    mStateSize = 0;
78}
79
80// resizes the state buffer to accommodate the appropriate filter length
81template<typename TI>
82void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) {
83    // calculate desired state size
84    int stateSize = halfNumCoefs * CHANNELS * 2
85            * kStateSizeMultipleOfFilterLength;
86
87    // check if buffer needs resizing
88    if (mState
89            && stateSize == mStateSize
90            && mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) {
91        return;
92    }
93
94    // create new buffer
95    TI* state = (int16_t*)memalign(32, stateSize*sizeof(*state));
96    memset(state, 0, stateSize*sizeof(*state));
97
98    // attempt to preserve state
99    if (mState) {
100        TI* srcLo = mImpulse - halfNumCoefs*CHANNELS;
101        TI* srcHi = mImpulse + halfNumCoefs*CHANNELS;
102        TI* dst = state;
103
104        if (srcLo < mState) {
105            dst += mState-srcLo;
106            srcLo = mState;
107        }
108        if (srcHi > mState + mStateSize) {
109            srcHi = mState + mStateSize;
110        }
111        memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo));
112        free(mState);
113    }
114
115    // set class member vars
116    mState = state;
117    mStateSize = stateSize;
118    mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed
119    mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS;
120}
121
122// copy in the input data into the head (impulse+halfNumCoefs) of the buffer.
123template<typename TI>
124template<int CHANNELS>
125void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs,
126        const TI* const in, const size_t inputIndex) {
127    int16_t* head = impulse + halfNumCoefs*CHANNELS;
128    for (size_t i=0 ; i<CHANNELS ; i++) {
129        head[i] = in[inputIndex*CHANNELS + i];
130    }
131}
132
133// advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs)
134template<typename TI>
135template<int CHANNELS>
136void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs,
137        const TI* const in, const size_t inputIndex) {
138    impulse += CHANNELS;
139
140    if (CC_UNLIKELY(impulse >= mRingFull)) {
141        const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS;
142        memcpy(mState, mState+shiftDown, halfNumCoefs*CHANNELS*2*sizeof(TI));
143        impulse -= shiftDown;
144    }
145    readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
146}
147
148void AudioResamplerDyn::Constants::set(
149        int L, int halfNumCoefs, int inSampleRate, int outSampleRate)
150{
151    int bits = 0;
152    int lscale = inSampleRate/outSampleRate < 2 ? L - 1 :
153            static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate);
154    for (int i=lscale; i; ++bits, i>>=1)
155        ;
156    mL = L;
157    mShift = kNumPhaseBits - bits;
158    mHalfNumCoefs = halfNumCoefs;
159}
160
161AudioResamplerDyn::AudioResamplerDyn(int bitDepth,
162        int inChannelCount, int32_t sampleRate, src_quality quality)
163    : AudioResampler(bitDepth, inChannelCount, sampleRate, quality),
164    mResampleType(0), mFilterSampleRate(0), mFilterQuality(DEFAULT_QUALITY),
165    mCoefBuffer(NULL)
166{
167    mVolumeSimd[0] = mVolumeSimd[1] = 0;
168    mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better
169}
170
171AudioResamplerDyn::~AudioResamplerDyn() {
172    free(mCoefBuffer);
173}
174
175void AudioResamplerDyn::init() {
176    mFilterSampleRate = 0; // always trigger new filter generation
177    mInBuffer.init();
178}
179
180void AudioResamplerDyn::setVolume(int16_t left, int16_t right) {
181    AudioResampler::setVolume(left, right);
182    mVolumeSimd[0] = static_cast<int32_t>(left)<<16;
183    mVolumeSimd[1] = static_cast<int32_t>(right)<<16;
184}
185
186template <typename T> T max(T a, T b) {return a > b ? a : b;}
187
188template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;}
189
190template<typename T>
191void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten,
192        int inSampleRate, int outSampleRate, double tbwCheat) {
193    T* buf = reinterpret_cast<T*>(memalign(32, (c.mL+1)*c.mHalfNumCoefs*sizeof(T)));
194    static const double atten = 0.9998;   // to avoid ripple overflow
195    double fcr;
196    double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten);
197
198    if (inSampleRate < outSampleRate) { // upsample
199        fcr = max(0.5*tbwCheat - tbw/2, tbw/2);
200    } else { // downsample
201        fcr = max(0.5*tbwCheat*outSampleRate/inSampleRate - tbw/2, tbw/2);
202    }
203    // create and set filter
204    firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten);
205    c.setBuf(buf);
206    if (mCoefBuffer) {
207        free(mCoefBuffer);
208    }
209    mCoefBuffer = buf;
210#ifdef DEBUG_RESAMPLER
211    // print basic filter stats
212    printf("L:%d  hnc:%d  stopBandAtten:%lf  fcr:%lf  atten:%lf  tbw:%lf\n",
213            c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw);
214    // test the filter and report results
215    double fp = (fcr - tbw/2)/c.mL;
216    double fs = (fcr + tbw/2)/c.mL;
217    double passMin, passMax, passRipple;
218    double stopMax, stopRipple;
219    testFir(buf, c.mL, c.mHalfNumCoefs, fp, fs, /*passSteps*/ 1000, /*stopSteps*/ 100000,
220            passMin, passMax, passRipple, stopMax, stopRipple);
221    printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, passMin, passMax, passRipple);
222    printf("stopband(%lf, %lf): %.8lf %.3lf\n", fs, 0.5, stopMax, stopRipple);
223#endif
224}
225
226// recursive gcd. Using objdump, it appears the tail recursion is converted to a while loop.
227static int gcd(int n, int m) {
228    if (m == 0) {
229        return n;
230    }
231    return gcd(m, n % m);
232}
233
234static bool isClose(int32_t newSampleRate, int32_t prevSampleRate,
235        int32_t filterSampleRate, int32_t outSampleRate) {
236
237    // different upsampling ratios do not need a filter change.
238    if (filterSampleRate != 0
239            && filterSampleRate < outSampleRate
240            && newSampleRate < outSampleRate)
241        return true;
242
243    // check design criteria again if downsampling is detected.
244    int pdiff = absdiff(newSampleRate, prevSampleRate);
245    int adiff = absdiff(newSampleRate, filterSampleRate);
246
247    // allow up to 6% relative change increments.
248    // allow up to 12% absolute change increments (from filter design)
249    return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3;
250}
251
252void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) {
253    if (mInSampleRate == inSampleRate) {
254        return;
255    }
256    int32_t oldSampleRate = mInSampleRate;
257    int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs;
258    uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift;
259    bool useS32 = false;
260
261    mInSampleRate = inSampleRate;
262
263    // TODO: Add precalculated Equiripple filters
264
265    if (mFilterQuality != getQuality() ||
266            !isClose(inSampleRate, oldSampleRate, mFilterSampleRate, mSampleRate)) {
267        mFilterSampleRate = inSampleRate;
268        mFilterQuality = getQuality();
269
270        // Begin Kaiser Filter computation
271        //
272        // The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB.
273        // Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters
274        //
275        // For s32 we keep the stop band attenuation at the same as 16b resolution, about
276        // 96-98dB
277        //
278
279        double stopBandAtten;
280        double tbwCheat = 1.; // how much we "cheat" into aliasing
281        int halfLength;
282        if (mFilterQuality == DYN_HIGH_QUALITY) {
283            // 32b coefficients, 64 length
284            useS32 = true;
285            stopBandAtten = 98.;
286            halfLength = 32;
287        } else if (mFilterQuality == DYN_LOW_QUALITY) {
288            // 16b coefficients, 16-32 length
289            useS32 = false;
290            stopBandAtten = 80.;
291            if (mSampleRate >= inSampleRate * 2) {
292                halfLength = 16;
293            } else {
294                halfLength = 8;
295            }
296            if (mSampleRate >= inSampleRate) {
297                tbwCheat = 1.05;
298            } else {
299                tbwCheat = 1.03;
300            }
301        } else { // DYN_MED_QUALITY
302            // 16b coefficients, 32-64 length
303            // note: > 64 length filters with 16b coefs can have quantization noise problems
304            useS32 = false;
305            stopBandAtten = 84.;
306            if (mSampleRate >= inSampleRate * 4) {
307                halfLength = 32;
308            } else if (mSampleRate >= inSampleRate * 2) {
309                halfLength = 24;
310            } else {
311                halfLength = 16;
312            }
313            if (mSampleRate >= inSampleRate) {
314                tbwCheat = 1.03;
315            } else {
316                tbwCheat = 1.01;
317            }
318        }
319
320        // determine the number of polyphases in the filterbank.
321        // for 16b, it is desirable to have 2^(16/2) = 256 phases.
322        // https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html
323        //
324        // We are a bit more lax on this.
325
326        int phases = mSampleRate / gcd(mSampleRate, inSampleRate);
327
328        // TODO: Once dynamic sample rate change is an option, the code below
329        // should be modified to execute only when dynamic sample rate change is enabled.
330        //
331        // as above, #phases less than 63 is too few phases for accurate linear interpolation.
332        // we increase the phases to compensate, but more phases means more memory per
333        // filter and more time to compute the filter.
334        //
335        // if we know that the filter will be used for dynamic sample rate changes,
336        // that would allow us skip this part for fixed sample rate resamplers.
337        //
338        while (phases<63) {
339            phases *= 2; // this code only needed to support dynamic rate changes
340        }
341
342        if (phases>=256) {  // too many phases, always interpolate
343            phases = 127;
344        }
345
346        // create the filter
347        mConstants.set(phases, halfLength, inSampleRate, mSampleRate);
348        if (useS32) {
349            createKaiserFir<int32_t>(mConstants, stopBandAtten,
350                    inSampleRate, mSampleRate, tbwCheat);
351        } else {
352            createKaiserFir<int16_t>(mConstants, stopBandAtten,
353                    inSampleRate, mSampleRate, tbwCheat);
354        }
355    } // End Kaiser filter
356
357    // update phase and state based on the new filter.
358    const Constants& c(mConstants);
359    mInBuffer.resize(mChannelCount, c.mHalfNumCoefs);
360    const uint32_t phaseWrapLimit = c.mL << c.mShift;
361    // try to preserve as much of the phase fraction as possible for on-the-fly changes
362    mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction)
363            * phaseWrapLimit / oldPhaseWrapLimit;
364    mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case.
365    mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit)
366            * inSampleRate / mSampleRate);
367
368    // determine which resampler to use
369    // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits")
370    int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0;
371    int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2;
372    if (locked) {
373        mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase
374    }
375
376    mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32);
377#ifdef DEBUG_RESAMPLER
378    printf("channels:%d  %s  stride:%d  %s  coef:%d  shift:%d\n",
379            mChannelCount, locked ? "locked" : "interpolated",
380            stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift);
381#endif
382}
383
384void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
385            AudioBufferProvider* provider)
386{
387    // TODO:
388    // 24 cases - this perhaps can be reduced later, as testing might take too long
389    switch (mResampleType) {
390
391    // stride 16 (falls back to stride 2 for machines that do not support NEON)
392    case RESAMPLETYPE(1, true, 16, 0):
393        return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
394    case RESAMPLETYPE(2, true, 16, 0):
395        return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
396    case RESAMPLETYPE(1, false, 16, 0):
397        return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
398    case RESAMPLETYPE(2, false, 16, 0):
399        return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
400    case RESAMPLETYPE(1, true, 16, 1):
401        return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
402    case RESAMPLETYPE(2, true, 16, 1):
403        return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
404    case RESAMPLETYPE(1, false, 16, 1):
405        return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
406    case RESAMPLETYPE(2, false, 16, 1):
407        return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
408#if 0
409    // TODO: Remove these?
410    // stride 8
411    case RESAMPLETYPE(1, true, 8, 0):
412        return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
413    case RESAMPLETYPE(2, true, 8, 0):
414        return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
415    case RESAMPLETYPE(1, false, 8, 0):
416        return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
417    case RESAMPLETYPE(2, false, 8, 0):
418        return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
419    case RESAMPLETYPE(1, true, 8, 1):
420        return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
421    case RESAMPLETYPE(2, true, 8, 1):
422        return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
423    case RESAMPLETYPE(1, false, 8, 1):
424        return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
425    case RESAMPLETYPE(2, false, 8, 1):
426        return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
427    // stride 2 (can handle any filter length)
428    case RESAMPLETYPE(1, true, 2, 0):
429        return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
430    case RESAMPLETYPE(2, true, 2, 0):
431        return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
432    case RESAMPLETYPE(1, false, 2, 0):
433        return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
434    case RESAMPLETYPE(2, false, 2, 0):
435        return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
436    case RESAMPLETYPE(1, true, 2, 1):
437        return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
438    case RESAMPLETYPE(2, true, 2, 1):
439        return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
440    case RESAMPLETYPE(1, false, 2, 1):
441        return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
442    case RESAMPLETYPE(2, false, 2, 1):
443        return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
444#endif
445    default:
446        ; // error
447    }
448}
449
450template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
451void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
452        const TC* const coefs,  AudioBufferProvider* provider)
453{
454    const Constants& c(mConstants);
455    int16_t* impulse = mInBuffer.getImpulse();
456    size_t inputIndex = mInputIndex;
457    uint32_t phaseFraction = mPhaseFraction;
458    const uint32_t phaseIncrement = mPhaseIncrement;
459    size_t outputIndex = 0;
460    size_t outputSampleCount = outFrameCount * 2;   // stereo output
461    size_t inFrameCount = (outFrameCount*mInSampleRate)/mSampleRate;
462    const uint32_t phaseWrapLimit = c.mL << c.mShift;
463
464    // NOTE: be very careful when modifying the code here. register
465    // pressure is very high and a small change might cause the compiler
466    // to generate far less efficient code.
467    // Always sanity check the result with objdump or test-resample.
468
469    // the following logic is a bit convoluted to keep the main processing loop
470    // as tight as possible with register allocation.
471    while (outputIndex < outputSampleCount) {
472        // buffer is empty, fetch a new one
473        while (mBuffer.frameCount == 0) {
474            mBuffer.frameCount = inFrameCount;
475            provider->getNextBuffer(&mBuffer,
476                    calculateOutputPTS(outputIndex / 2));
477            if (mBuffer.raw == NULL) {
478                goto resample_exit;
479            }
480            if (phaseFraction >= phaseWrapLimit) { // read in data
481                mInBuffer.readAdvance<CHANNELS>(
482                        impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
483                phaseFraction -= phaseWrapLimit;
484                while (phaseFraction >= phaseWrapLimit) {
485                    inputIndex++;
486                    if (inputIndex >= mBuffer.frameCount) {
487                        inputIndex -= mBuffer.frameCount;
488                        provider->releaseBuffer(&mBuffer);
489                        break;
490                    }
491                    mInBuffer.readAdvance<CHANNELS>(
492                            impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
493                    phaseFraction -= phaseWrapLimit;
494                }
495            }
496        }
497        const int16_t* const in = mBuffer.i16;
498        const size_t frameCount = mBuffer.frameCount;
499        const int coefShift = c.mShift;
500        const int halfNumCoefs = c.mHalfNumCoefs;
501        const int32_t* const volumeSimd = mVolumeSimd;
502
503        // reread the last input in.
504        mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
505
506        // main processing loop
507        while (CC_LIKELY(outputIndex < outputSampleCount)) {
508            // caution: fir() is inlined and may be large.
509            // output will be loaded with the appropriate values
510            //
511            // from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs]
512            // from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs.
513            //
514            fir<CHANNELS, LOCKED, STRIDE>(
515                    &out[outputIndex],
516                    phaseFraction, phaseWrapLimit,
517                    coefShift, halfNumCoefs, coefs,
518                    impulse, volumeSimd);
519            outputIndex += 2;
520
521            phaseFraction += phaseIncrement;
522            while (phaseFraction >= phaseWrapLimit) {
523                inputIndex++;
524                if (inputIndex >= frameCount) {
525                    goto done;  // need a new buffer
526                }
527                mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
528                phaseFraction -= phaseWrapLimit;
529            }
530        }
531done:
532        // often arrives here when input buffer runs out
533        if (inputIndex >= frameCount) {
534            inputIndex -= frameCount;
535            provider->releaseBuffer(&mBuffer);
536            // mBuffer.frameCount MUST be zero here.
537        }
538    }
539
540resample_exit:
541    mInBuffer.setImpulse(impulse);
542    mInputIndex = inputIndex;
543    mPhaseFraction = phaseFraction;
544}
545
546// ----------------------------------------------------------------------------
547}; // namespace android
548