AudioResamplerDyn.cpp revision 86eae0e5931103e040ac2cdd023ef5db252e09f6
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "AudioResamplerDyn"
18//#define LOG_NDEBUG 0
19
20#include <malloc.h>
21#include <string.h>
22#include <stdlib.h>
23#include <dlfcn.h>
24#include <math.h>
25
26#include <cutils/compiler.h>
27#include <cutils/properties.h>
28#include <utils/Log.h>
29
30#include "AudioResamplerFirOps.h" // USE_NEON and USE_INLINE_ASSEMBLY defined here
31#include "AudioResamplerFirProcess.h"
32#include "AudioResamplerFirProcessNeon.h"
33#include "AudioResamplerFirGen.h" // requires math.h
34#include "AudioResamplerDyn.h"
35
36//#define DEBUG_RESAMPLER
37
38namespace android {
39
40// generate a unique resample type compile-time constant (constexpr)
41#define RESAMPLETYPE(CHANNELS, LOCKED, STRIDE, COEFTYPE) \
42    ((((CHANNELS)-1)&1) | !!(LOCKED)<<1 | (COEFTYPE)<<2 \
43    | ((STRIDE)==8 ? 1 : (STRIDE)==16 ? 2 : 0)<<3)
44
45/*
46 * InBuffer is a type agnostic input buffer.
47 *
48 * Layout of the state buffer for halfNumCoefs=8.
49 *
50 * [rrrrrrppppppppnnnnnnnnrrrrrrrrrrrrrrrrrrr.... rrrrrrr]
51 *  S            I                                R
52 *
53 * S = mState
54 * I = mImpulse
55 * R = mRingFull
56 * p = past samples, convoluted with the (p)ositive side of sinc()
57 * n = future samples, convoluted with the (n)egative side of sinc()
58 * r = extra space for implementing the ring buffer
59 */
60
61template<typename TI>
62AudioResamplerDyn::InBuffer<TI>::InBuffer()
63    : mState(NULL), mImpulse(NULL), mRingFull(NULL), mStateSize(0) {
64}
65
66template<typename TI>
67AudioResamplerDyn::InBuffer<TI>::~InBuffer() {
68    init();
69}
70
71template<typename TI>
72void AudioResamplerDyn::InBuffer<TI>::init() {
73    free(mState);
74    mState = NULL;
75    mImpulse = NULL;
76    mRingFull = NULL;
77    mStateSize = 0;
78}
79
80// resizes the state buffer to accommodate the appropriate filter length
81template<typename TI>
82void AudioResamplerDyn::InBuffer<TI>::resize(int CHANNELS, int halfNumCoefs) {
83    // calculate desired state size
84    int stateSize = halfNumCoefs * CHANNELS * 2
85            * kStateSizeMultipleOfFilterLength;
86
87    // check if buffer needs resizing
88    if (mState
89            && stateSize == mStateSize
90            && mRingFull-mState == mStateSize-halfNumCoefs*CHANNELS) {
91        return;
92    }
93
94    // create new buffer
95    TI* state = (int16_t*)memalign(32, stateSize*sizeof(*state));
96    memset(state, 0, stateSize*sizeof(*state));
97
98    // attempt to preserve state
99    if (mState) {
100        TI* srcLo = mImpulse - halfNumCoefs*CHANNELS;
101        TI* srcHi = mImpulse + halfNumCoefs*CHANNELS;
102        TI* dst = state;
103
104        if (srcLo < mState) {
105            dst += mState-srcLo;
106            srcLo = mState;
107        }
108        if (srcHi > mState + mStateSize) {
109            srcHi = mState + mStateSize;
110        }
111        memcpy(dst, srcLo, (srcHi - srcLo) * sizeof(*srcLo));
112        free(mState);
113    }
114
115    // set class member vars
116    mState = state;
117    mStateSize = stateSize;
118    mImpulse = mState + halfNumCoefs*CHANNELS; // actually one sample greater than needed
119    mRingFull = mState + mStateSize - halfNumCoefs*CHANNELS;
120}
121
122// copy in the input data into the head (impulse+halfNumCoefs) of the buffer.
123template<typename TI>
124template<int CHANNELS>
125void AudioResamplerDyn::InBuffer<TI>::readAgain(TI*& impulse, const int halfNumCoefs,
126        const TI* const in, const size_t inputIndex) {
127    int16_t* head = impulse + halfNumCoefs*CHANNELS;
128    for (size_t i=0 ; i<CHANNELS ; i++) {
129        head[i] = in[inputIndex*CHANNELS + i];
130    }
131}
132
133// advance the impulse pointer, and load in data into the head (impulse+halfNumCoefs)
134template<typename TI>
135template<int CHANNELS>
136void AudioResamplerDyn::InBuffer<TI>::readAdvance(TI*& impulse, const int halfNumCoefs,
137        const TI* const in, const size_t inputIndex) {
138    impulse += CHANNELS;
139
140    if (CC_UNLIKELY(impulse >= mRingFull)) {
141        const size_t shiftDown = mRingFull - mState - halfNumCoefs*CHANNELS;
142        memcpy(mState, mState+shiftDown, halfNumCoefs*CHANNELS*2*sizeof(TI));
143        impulse -= shiftDown;
144    }
145    readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
146}
147
148void AudioResamplerDyn::Constants::set(
149        int L, int halfNumCoefs, int inSampleRate, int outSampleRate)
150{
151    int bits = 0;
152    int lscale = inSampleRate/outSampleRate < 2 ? L - 1 :
153            static_cast<int>(static_cast<uint64_t>(L)*inSampleRate/outSampleRate);
154    for (int i=lscale; i; ++bits, i>>=1)
155        ;
156    mL = L;
157    mShift = kNumPhaseBits - bits;
158    mHalfNumCoefs = halfNumCoefs;
159}
160
161AudioResamplerDyn::AudioResamplerDyn(int bitDepth,
162        int inChannelCount, int32_t sampleRate, src_quality quality)
163    : AudioResampler(bitDepth, inChannelCount, sampleRate, quality),
164    mResampleType(0), mFilterSampleRate(0), mCoefBuffer(NULL)
165{
166    mVolumeSimd[0] = mVolumeSimd[1] = 0;
167    mConstants.set(128, 8, mSampleRate, mSampleRate); // TODO: set better
168}
169
170AudioResamplerDyn::~AudioResamplerDyn() {
171    free(mCoefBuffer);
172}
173
174void AudioResamplerDyn::init() {
175    mFilterSampleRate = 0; // always trigger new filter generation
176    mInBuffer.init();
177}
178
179void AudioResamplerDyn::setVolume(int16_t left, int16_t right) {
180    AudioResampler::setVolume(left, right);
181    mVolumeSimd[0] = static_cast<int32_t>(left)<<16;
182    mVolumeSimd[1] = static_cast<int32_t>(right)<<16;
183}
184
185template <typename T> T max(T a, T b) {return a > b ? a : b;}
186
187template <typename T> T absdiff(T a, T b) {return a > b ? a - b : b - a;}
188
189template<typename T>
190void AudioResamplerDyn::createKaiserFir(Constants &c, double stopBandAtten,
191        int inSampleRate, int outSampleRate, double tbwCheat) {
192    T* buf = reinterpret_cast<T*>(memalign(32, (c.mL+1)*c.mHalfNumCoefs*sizeof(T)));
193    static const double atten = 0.9998;   // to avoid ripple overflow
194    double fcr;
195    double tbw = firKaiserTbw(c.mHalfNumCoefs, stopBandAtten);
196
197    if (inSampleRate < outSampleRate) { // upsample
198        fcr = max(0.5*tbwCheat - tbw/2, tbw/2);
199    } else { // downsample
200        fcr = max(0.5*tbwCheat*outSampleRate/inSampleRate - tbw/2, tbw/2);
201    }
202    // create and set filter
203    firKaiserGen(buf, c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten);
204    c.setBuf(buf);
205    if (mCoefBuffer) {
206        free(mCoefBuffer);
207    }
208    mCoefBuffer = buf;
209#ifdef DEBUG_RESAMPLER
210    // print basic filter stats
211    printf("L:%d  hnc:%d  stopBandAtten:%lf  fcr:%lf  atten:%lf  tbw:%lf\n",
212            c.mL, c.mHalfNumCoefs, stopBandAtten, fcr, atten, tbw);
213    // test the filter and report results
214    double fp = (fcr - tbw/2)/c.mL;
215    double fs = (fcr + tbw/2)/c.mL;
216    double fmin, fmax;
217    testFir(buf, c.mL, c.mHalfNumCoefs, 0., fp, 100, fmin, fmax);
218    double d1 = (fmax - fmin)/2.;
219    double ap = -20.*log10(1. - d1); // passband ripple
220    printf("passband(%lf, %lf): %.8lf %.8lf %.8lf\n", 0., fp, (fmax + fmin)/2., d1, ap);
221    testFir(buf, c.mL, c.mHalfNumCoefs, fs, 0.5, 100, fmin, fmax);
222    double d2 = fmax;
223    double as = -20.*log10(d2); // stopband attenuation
224    printf("stopband(%lf, %lf): %.8lf %.8lf %.3lf\n", fs, 0.5, (fmax + fmin)/2., d2, as);
225#endif
226}
227
228// recursive gcd (TODO: verify tail recursion elimination should make this iterate)
229static int gcd(int n, int m) {
230    if (m == 0) {
231        return n;
232    }
233    return gcd(m, n % m);
234}
235
236static bool isClose(int32_t newSampleRate, int32_t prevSampleRate, int32_t filterSampleRate) {
237    int pdiff = absdiff(newSampleRate, prevSampleRate);
238    int adiff = absdiff(newSampleRate, filterSampleRate);
239
240    // allow up to 6% relative change increments.
241    // allow up to 12% absolute change increments (from filter design)
242    return pdiff < prevSampleRate>>4 && adiff < filterSampleRate>>3;
243}
244
245void AudioResamplerDyn::setSampleRate(int32_t inSampleRate) {
246    if (mInSampleRate == inSampleRate) {
247        return;
248    }
249    int32_t oldSampleRate = mInSampleRate;
250    int32_t oldHalfNumCoefs = mConstants.mHalfNumCoefs;
251    uint32_t oldPhaseWrapLimit = mConstants.mL << mConstants.mShift;
252    bool useS32 = false;
253
254    mInSampleRate = inSampleRate;
255
256    // TODO: Add precalculated Equiripple filters
257
258    if (!isClose(inSampleRate, oldSampleRate, mFilterSampleRate)) {
259        mFilterSampleRate = inSampleRate;
260
261        // Begin Kaiser Filter computation
262        //
263        // The quantization floor for S16 is about 96db - 10*log_10(#length) + 3dB.
264        // Keep the stop band attenuation no greater than 84-85dB for 32 length S16 filters
265        //
266        // For s32 we keep the stop band attenuation at the same as 16b resolution, about
267        // 96-98dB
268        //
269
270        double stopBandAtten;
271        double tbwCheat = 1.; // how much we "cheat" into aliasing
272        int halfLength;
273        if (getQuality() == DYN_HIGH_QUALITY) {
274            // 32b coefficients, 64 length
275            useS32 = true;
276            stopBandAtten = 98.;
277            halfLength = 32;
278        } else if (getQuality() == DYN_LOW_QUALITY) {
279            // 16b coefficients, 16-32 length
280            useS32 = false;
281            stopBandAtten = 80.;
282            if (mSampleRate >= inSampleRate * 2) {
283                halfLength = 16;
284            } else {
285                halfLength = 8;
286            }
287            if (mSampleRate >= inSampleRate) {
288                tbwCheat = 1.05;
289            } else {
290                tbwCheat = 1.03;
291            }
292        } else { // medium quality
293            // 16b coefficients, 32-64 length
294            useS32 = false;
295            stopBandAtten = 84.;
296            if (mSampleRate >= inSampleRate * 4) {
297                halfLength = 32;
298            } else if (mSampleRate >= inSampleRate * 2) {
299                halfLength = 24;
300            } else {
301                halfLength = 16;
302            }
303            if (mSampleRate >= inSampleRate) {
304                tbwCheat = 1.03;
305            } else {
306                tbwCheat = 1.01;
307            }
308        }
309
310        // determine the number of polyphases in the filterbank.
311        // for 16b, it is desirable to have 2^(16/2) = 256 phases.
312        // https://ccrma.stanford.edu/~jos/resample/Relation_Interpolation_Error_Quantization.html
313        //
314        // We are a bit more lax on this.
315
316        int phases = mSampleRate / gcd(mSampleRate, inSampleRate);
317
318        while (phases<63) { // too few phases, allow room for interpolation
319            phases *= 2; // this code only needed to support dynamic rate changes
320        }
321        if (phases>=256) {  // too many phases, always interpolate
322            phases = 127;
323        }
324
325        // create the filter
326        mConstants.set(phases, halfLength, inSampleRate, mSampleRate);
327        if (useS32) {
328            createKaiserFir<int32_t>(mConstants, stopBandAtten,
329                    inSampleRate, mSampleRate, tbwCheat);
330        } else {
331            createKaiserFir<int16_t>(mConstants, stopBandAtten,
332                    inSampleRate, mSampleRate, tbwCheat);
333        }
334    } // End Kaiser filter
335
336    // update phase and state based on the new filter.
337    const Constants& c(mConstants);
338    mInBuffer.resize(mChannelCount, c.mHalfNumCoefs);
339    const uint32_t phaseWrapLimit = c.mL << c.mShift;
340    // try to preserve as much of the phase fraction as possible for on-the-fly changes
341    mPhaseFraction = static_cast<unsigned long long>(mPhaseFraction)
342            * phaseWrapLimit / oldPhaseWrapLimit;
343    mPhaseFraction %= phaseWrapLimit; // should not do anything, but just in case.
344    mPhaseIncrement = static_cast<uint32_t>(static_cast<double>(phaseWrapLimit)
345            * inSampleRate / mSampleRate);
346
347    // determine which resampler to use
348    // check if locked phase (works only if mPhaseIncrement has no "fractional phase bits")
349    int locked = (mPhaseIncrement << (sizeof(mPhaseIncrement)*8 - c.mShift)) == 0;
350    int stride = (c.mHalfNumCoefs&7)==0 ? 16 : (c.mHalfNumCoefs&3)==0 ? 8 : 2;
351    if (locked) {
352        mPhaseFraction = mPhaseFraction >> c.mShift << c.mShift; // remove fractional phase
353    }
354    if (!USE_NEON) {
355        stride = 2; // C version only
356    }
357    // TODO: Remove this for testing
358    //stride = 2;
359    mResampleType = RESAMPLETYPE(mChannelCount, locked, stride, !!useS32);
360#ifdef DEBUG_RESAMPLER
361    printf("channels:%d  %s  stride:%d  %s  coef:%d  shift:%d\n",
362            mChannelCount, locked ? "locked" : "interpolated",
363            stride, useS32 ? "S32" : "S16", 2*c.mHalfNumCoefs, c.mShift);
364#endif
365}
366
367void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
368            AudioBufferProvider* provider)
369{
370    // TODO:
371    // 24 cases - this perhaps can be reduced later, as testing might take too long
372    switch (mResampleType) {
373
374    // stride 16 (stride 2 for machines that do not support NEON)
375    case RESAMPLETYPE(1, true, 16, 0):
376        return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
377    case RESAMPLETYPE(2, true, 16, 0):
378        return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
379    case RESAMPLETYPE(1, false, 16, 0):
380        return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
381    case RESAMPLETYPE(2, false, 16, 0):
382        return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
383    case RESAMPLETYPE(1, true, 16, 1):
384        return resample<1, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
385    case RESAMPLETYPE(2, true, 16, 1):
386        return resample<2, true, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
387    case RESAMPLETYPE(1, false, 16, 1):
388        return resample<1, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
389    case RESAMPLETYPE(2, false, 16, 1):
390        return resample<2, false, 16>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
391#if 0
392    // TODO: Remove these?
393    // stride 8
394    case RESAMPLETYPE(1, true, 8, 0):
395        return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
396    case RESAMPLETYPE(2, true, 8, 0):
397        return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
398    case RESAMPLETYPE(1, false, 8, 0):
399        return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
400    case RESAMPLETYPE(2, false, 8, 0):
401        return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
402    case RESAMPLETYPE(1, true, 8, 1):
403        return resample<1, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
404    case RESAMPLETYPE(2, true, 8, 1):
405        return resample<2, true, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
406    case RESAMPLETYPE(1, false, 8, 1):
407        return resample<1, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
408    case RESAMPLETYPE(2, false, 8, 1):
409        return resample<2, false, 8>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
410    // stride 2 (can handle any filter length)
411    case RESAMPLETYPE(1, true, 2, 0):
412        return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
413    case RESAMPLETYPE(2, true, 2, 0):
414        return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
415    case RESAMPLETYPE(1, false, 2, 0):
416        return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
417    case RESAMPLETYPE(2, false, 2, 0):
418        return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS16, provider);
419    case RESAMPLETYPE(1, true, 2, 1):
420        return resample<1, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
421    case RESAMPLETYPE(2, true, 2, 1):
422        return resample<2, true, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
423    case RESAMPLETYPE(1, false, 2, 1):
424        return resample<1, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
425    case RESAMPLETYPE(2, false, 2, 1):
426        return resample<2, false, 2>(out, outFrameCount, mConstants.mFirCoefsS32, provider);
427#endif
428    default:
429        ; // error
430    }
431}
432
433template<int CHANNELS, bool LOCKED, int STRIDE, typename TC>
434void AudioResamplerDyn::resample(int32_t* out, size_t outFrameCount,
435        const TC* const coefs,  AudioBufferProvider* provider)
436{
437    const Constants& c(mConstants);
438    int16_t* impulse = mInBuffer.getImpulse();
439    size_t inputIndex = mInputIndex;
440    uint32_t phaseFraction = mPhaseFraction;
441    const uint32_t phaseIncrement = mPhaseIncrement;
442    size_t outputIndex = 0;
443    size_t outputSampleCount = outFrameCount * 2;   // stereo output
444    size_t inFrameCount = (outFrameCount*mInSampleRate)/mSampleRate;
445    const uint32_t phaseWrapLimit = c.mL << c.mShift;
446
447    // NOTE: be very careful when modifying the code here. register
448    // pressure is very high and a small change might cause the compiler
449    // to generate far less efficient code.
450    // Always sanity check the result with objdump or test-resample.
451
452    // the following logic is a bit convoluted to keep the main processing loop
453    // as tight as possible with register allocation.
454    while (outputIndex < outputSampleCount) {
455        // buffer is empty, fetch a new one
456        while (mBuffer.frameCount == 0) {
457            mBuffer.frameCount = inFrameCount;
458            provider->getNextBuffer(&mBuffer,
459                    calculateOutputPTS(outputIndex / 2));
460            if (mBuffer.raw == NULL) {
461                goto resample_exit;
462            }
463            if (phaseFraction >= phaseWrapLimit) { // read in data
464                mInBuffer.readAdvance<CHANNELS>(
465                        impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
466                phaseFraction -= phaseWrapLimit;
467                while (phaseFraction >= phaseWrapLimit) {
468                    inputIndex++;
469                    if (inputIndex >= mBuffer.frameCount) {
470                        inputIndex -= mBuffer.frameCount;
471                        provider->releaseBuffer(&mBuffer);
472                        break;
473                    }
474                    mInBuffer.readAdvance<CHANNELS>(
475                            impulse, c.mHalfNumCoefs, mBuffer.i16, inputIndex);
476                    phaseFraction -= phaseWrapLimit;
477                }
478            }
479        }
480        const int16_t* const in = mBuffer.i16;
481        const size_t frameCount = mBuffer.frameCount;
482        const int coefShift = c.mShift;
483        const int halfNumCoefs = c.mHalfNumCoefs;
484        const int32_t* const volumeSimd = mVolumeSimd;
485
486        // reread the last input in.
487        mInBuffer.readAgain<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
488
489        // main processing loop
490        while (CC_LIKELY(outputIndex < outputSampleCount)) {
491            // caution: fir() is inlined and may be large.
492            // output will be loaded with the appropriate values
493            //
494            // from the input samples in impulse[-halfNumCoefs+1]... impulse[halfNumCoefs]
495            // from the polyphase filter of (phaseFraction / phaseWrapLimit) in coefs.
496            //
497            fir<CHANNELS, LOCKED, STRIDE>(
498                    &out[outputIndex],
499                    phaseFraction, phaseWrapLimit,
500                    coefShift, halfNumCoefs, coefs,
501                    impulse, volumeSimd);
502            outputIndex += 2;
503
504            phaseFraction += phaseIncrement;
505            while (phaseFraction >= phaseWrapLimit) {
506                inputIndex++;
507                if (inputIndex >= frameCount) {
508                    goto done;  // need a new buffer
509                }
510                mInBuffer.readAdvance<CHANNELS>(impulse, halfNumCoefs, in, inputIndex);
511                phaseFraction -= phaseWrapLimit;
512            }
513        }
514done:
515        // often arrives here when input buffer runs out
516        if (inputIndex >= frameCount) {
517            inputIndex -= frameCount;
518            provider->releaseBuffer(&mBuffer);
519            // mBuffer.frameCount MUST be zero here.
520        }
521    }
522
523resample_exit:
524    mInBuffer.setImpulse(impulse);
525    mInputIndex = inputIndex;
526    mPhaseFraction = phaseFraction;
527}
528
529// ----------------------------------------------------------------------------
530}; // namespace android
531