AudioResamplerFirGen.h revision 6582f2b14a21e630654c5522ef9ad64e80d5058d
1/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
18#define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
19
20namespace android {
21
22/*
23 * generates a sine wave at equal steps.
24 *
25 * As most of our functions use sine or cosine at equal steps,
26 * it is very efficient to compute them that way (single multiply and subtract),
27 * rather than invoking the math library sin() or cos() each time.
28 *
29 * SineGen uses Goertzel's Algorithm (as a generator not a filter)
30 * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
31 * by stepping through 0, 1, ... n.
32 *
33 * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
34 *
35 * or looking at just the imaginary sine term, as the cosine follows identically:
36 *
37 * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
38 *
39 * Goertzel's algorithm is more efficient than the angle addition formula,
40 * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
41 * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
42 * cosine generation due to the complex * complex multiply (full rotation).
43 *
44 * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
45 *
46 */
47
48class SineGen {
49public:
50    SineGen(double wstart, double wstep, bool cosine = false) {
51        if (cosine) {
52            mCurrent = cos(wstart);
53            mPrevious = cos(wstart - wstep);
54        } else {
55            mCurrent = sin(wstart);
56            mPrevious = sin(wstart - wstep);
57        }
58        mTwoCos = 2.*cos(wstep);
59    }
60    SineGen(double expNow, double expPrev, double twoCosStep) {
61        mCurrent = expNow;
62        mPrevious = expPrev;
63        mTwoCos = twoCosStep;
64    }
65    inline double value() const {
66        return mCurrent;
67    }
68    inline void advance() {
69        double tmp = mCurrent;
70        mCurrent = mCurrent*mTwoCos - mPrevious;
71        mPrevious = tmp;
72    }
73    inline double valueAdvance() {
74        double tmp = mCurrent;
75        mCurrent = mCurrent*mTwoCos - mPrevious;
76        mPrevious = tmp;
77        return tmp;
78    }
79
80private:
81    double mCurrent; // current value of sine/cosine
82    double mPrevious; // previous value of sine/cosine
83    double mTwoCos; // stepping factor
84};
85
86/*
87 * generates a series of sine generators, phase offset by fixed steps.
88 *
89 * This is used to generate polyphase sine generators, one per polyphase
90 * in the filter code below.
91 *
92 * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
93 * increments by innerStep.
94 *
95 */
96
97class SineGenGen {
98public:
99    SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
100            : mSineInnerCur(outerStart, outerStep, cosine),
101              mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
102    {
103        mTwoCos = 2.*cos(innerStep);
104    }
105    inline SineGen value() {
106        return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
107    }
108    inline void advance() {
109        mSineInnerCur.advance();
110        mSineInnerPrev.advance();
111    }
112    inline SineGen valueAdvance() {
113        return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
114    }
115
116private:
117    SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
118    SineGen mSineInnerPrev; // generate the inner sine previous values
119                            // (behind by innerStep, stepped by outerStep).
120    double mTwoCos; // the inner stepping factor for the returned SineGen.
121};
122
123static inline double sqr(double x) {
124    return x * x;
125}
126
127/*
128 * rounds a double to the nearest integer for FIR coefficients.
129 *
130 * One variant uses noise shaping, which must keep error history
131 * to work (the err parameter, initialized to 0).
132 * The other variant is a non-noise shaped version for
133 * S32 coefficients (noise shaping doesn't gain much).
134 *
135 * Caution: No bounds saturation is applied, but isn't needed in this case.
136 *
137 * @param x is the value to round.
138 *
139 * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
140 * Typically this may be the maximum positive integer+1 (using the fact that double precision
141 * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
142 *
143 * @param err is the previous error (actual - rounded) for the previous rounding op.
144 * For 16b coefficients this can improve stopband dB performance by up to 2dB.
145 *
146 * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
147 *
148 */
149
150static inline int64_t toint(double x, int64_t maxval, double& err) {
151    double val = x * maxval;
152    double ival = floor(val + 0.5 + err*0.2);
153    err = val - ival;
154    return static_cast<int64_t>(ival);
155}
156
157static inline int64_t toint(double x, int64_t maxval) {
158    return static_cast<int64_t>(floor(x * maxval + 0.5));
159}
160
161/*
162 * Modified Bessel function of the first kind
163 * http://en.wikipedia.org/wiki/Bessel_function
164 *
165 * The formulas are taken from Abramowitz and Stegun,
166 * _Handbook of Mathematical Functions_ (links below):
167 *
168 * http://people.math.sfu.ca/~cbm/aands/page_375.htm
169 * http://people.math.sfu.ca/~cbm/aands/page_378.htm
170 *
171 * http://dlmf.nist.gov/10.25
172 * http://dlmf.nist.gov/10.40
173 *
174 * Note we assume x is nonnegative (the function is symmetric,
175 * pass in the absolute value as needed).
176 *
177 * Constants are compile time derived with templates I0Term<> and
178 * I0ATerm<> to the precision of the compiler.  The series can be expanded
179 * to any precision needed, but currently set around 24b precision.
180 *
181 * We use a bit of template math here, constexpr would probably be
182 * more appropriate for a C++11 compiler.
183 *
184 * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
185 *
186 */
187
188template <int N>
189struct I0Term {
190    static const double value = I0Term<N-1>::value / (4. * N * N);
191};
192
193template <>
194struct I0Term<0> {
195    static const double value = 1.;
196};
197
198template <int N>
199struct I0ATerm {
200    static const double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N);
201};
202
203template <>
204struct I0ATerm<0> { // 1/sqrt(2*PI);
205    static const double value = 0.398942280401432677939946059934381868475858631164934657665925;
206};
207
208#if USE_HORNERS_METHOD
209/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
210 * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
211 *
212 * This has fewer multiplications than Estrin's method below, but has back to back
213 * floating point dependencies.
214 *
215 * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
216 */
217
218inline double Poly2(double A, double B, double x) {
219    return A + x * B;
220}
221
222inline double Poly4(double A, double B, double C, double D, double x) {
223    return A + x * (B + x * (C + x * (D)));
224}
225
226inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
227        double x) {
228    return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
229}
230
231inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
232        double H, double I, double x) {
233    return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
234}
235
236#else
237/* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
238 * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
239 *
240 * This is typically faster, perhaps gains about 5-10% overall on ARM processors
241 * over Horner's method above.
242 */
243
244inline double Poly2(double A, double B, double x) {
245    return A + B * x;
246}
247
248inline double Poly3(double A, double B, double C, double x, double x2) {
249    return Poly2(A, B, x) + C * x2;
250}
251
252inline double Poly3(double A, double B, double C, double x) {
253    return Poly2(A, B, x) + C * x * x;
254}
255
256inline double Poly4(double A, double B, double C, double D, double x, double x2) {
257    return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
258}
259
260inline double Poly4(double A, double B, double C, double D, double x) {
261    return Poly4(A, B, C, D, x, x * x);
262}
263
264inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
265        double x) {
266    double x2 = x * x;
267    return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
268}
269
270inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
271        double H, double x, double x2, double x4) {
272    return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
273}
274
275inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
276        double H, double I, double x) {
277    double x2 = x * x;
278#if 1
279    // It does not seem faster to explicitly decompose Poly8 into Poly4, but
280    // could depend on compiler floating point scheduling.
281    double x4 = x2 * x2;
282    return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
283#else
284    double val = Poly4(A, B, C, D, x, x2);
285    double x4 = x2 * x2;
286    return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
287#endif
288}
289#endif
290
291static inline double I0(double x) {
292    if (x < 3.75) {
293        x *= x;
294        return Poly7(I0Term<0>::value, I0Term<1>::value,
295                I0Term<2>::value, I0Term<3>::value,
296                I0Term<4>::value, I0Term<5>::value,
297                I0Term<6>::value, x); // e < 1.6e-7
298    }
299    if (1) {
300        /*
301         * Series expansion coefs are easy to calculate, but are expanded around 0,
302         * so error is unequal over the interval 0 < x < 3.75, the error being
303         * significantly better near 0.
304         *
305         * A better solution is to use precise minimax polynomial fits.
306         *
307         * We use a slightly more complicated solution for 3.75 < x < 15, based on
308         * the tables in Blair and Edwards, "Stable Rational Minimax Approximations
309         * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
310         * AECL-4928.
311         *
312         * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
313         *
314         * See Table 11 for 0 < x < 15; e < 10^(-7.13).
315         *
316         * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
317         *
318         * This speeds up overall computation by about 40% over using the else clause below,
319         * which requires sqrt and exp.
320         *
321         */
322
323        x *= x;
324        double num = Poly9(-0.13544938430e9, -0.33153754512e8,
325                -0.19406631946e7, -0.48058318783e5,
326                -0.63269783360e3, -0.49520779070e1,
327                -0.24970910370e-1, -0.74741159550e-4,
328                -0.18257612460e-6, x);
329        double y = x - 225.; // reflection around 15 (squared)
330        double den = Poly4(-0.34598737196e8, 0.23852643181e6,
331                -0.70699387620e3, 0.10000000000e1, y);
332        return num / den;
333
334#if IO_EXTENDED_BETA
335        /* Table 42 for x > 15; e < 10^(-8.11).
336         * This is used for Beta>15, but is disabled here as
337         * we never use Beta that high.
338         *
339         * NOTE: This should be enabled only for x > 15.
340         */
341
342        double y = 1./x;
343        double z = y - (1./15);
344        double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
345        double den = Poly3(0.103150763823e2, -0.14181687413e2,
346                0.1000000000e1, z);
347        return exp(x) * sqrt(y) * num / den;
348#endif
349    } else {
350        /*
351         * NOT USED, but reference for large Beta.
352         *
353         * Abramowitz and Stegun asymptotic formula.
354         * works for x > 3.75.
355         */
356        double y = 1./x;
357        return exp(x) * sqrt(y) *
358                // note: reciprocal squareroot may be easier!
359                // http://en.wikipedia.org/wiki/Fast_inverse_square_root
360                Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
361                        I0ATerm<2>::value, I0ATerm<3>::value,
362                        I0ATerm<4>::value, I0ATerm<5>::value,
363                        I0ATerm<6>::value, I0ATerm<7>::value,
364                        I0ATerm<8>::value, y); // (... e) < 1.9e-7
365    }
366}
367
368/*
369 * calculates the transition bandwidth for a Kaiser filter
370 *
371 * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
372 * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
373 *
374 * @param halfNumCoef is half the number of coefficients per filter phase.
375 *
376 * @param stopBandAtten is the stop band attenuation desired.
377 *
378 * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
379 */
380static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
381    return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef);
382}
383
384/*
385 * calculates the fir transfer response of the overall polyphase filter at w.
386 *
387 * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
388 * fact that h[n] is symmetric (cosines only, no complex arithmetic).
389 *
390 * We use Goertzel's algorithm to accelerate the computation to essentially
391 * a single multiply and 2 adds per filter coefficient h[].
392 *
393 * Be careful be careful to consider that h[n] is the overall polyphase filter,
394 * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
395 * as you only use one of the polyphases at a time.
396 */
397template <typename T>
398static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
399    double accum = static_cast<double>(coef[0])*0.5;  // "center coefficient" from first bank
400    coef += halfNumCoef;    // skip first filterbank (picked up by the last filterbank).
401#if SLOW_FIRTRANSFER
402    /* Original code for reference.  This is equivalent to the code below, but slower. */
403    for (int i=1 ; i<=L ; ++i) {
404        for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
405            accum += cos(ix*w)*static_cast<double>(*coef++);
406        }
407    }
408#else
409    /*
410     * Our overall filter is stored striped by polyphases, not a contiguous h[n].
411     * We could fetch coefficients in a non-contiguous fashion
412     * but that will not scale to vector processing.
413     *
414     * We apply Goertzel's algorithm directly to each polyphase filter bank instead of
415     * using cosine generation/multiplication, thereby saving one multiply per inner loop.
416     *
417     * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
418     * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
419     *
420     * We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
421     * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
422     *
423     * y[n] = s[n] - e^(iw)s[n-1]
424     *      = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
425     *      = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
426     *
427     * The summation contains the frequency steps we want multiplied by the source
428     * (similar to a DTFT).
429     *
430     * Using symmetry, and just the real part (be careful, this must happen
431     * after any internal complex multiplications), the polyphase filterbank
432     * transfer function is:
433     *
434     * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
435     *                = Re{ e^(iwn + iw_0) y[n]}
436     *                = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
437     *
438     * using the fact that s[n] of real x[n] is real.
439     *
440     */
441    double dcos = 2. * cos(L*w);
442    int start = ((halfNumCoef)*L + 1);
443    SineGen cc((start - L) * w, w, true); // cosine
444    SineGen cp(start * w, w, true); // cosine
445    for (int i=1 ; i<=L ; ++i) {
446        double sc = 0;
447        double sp = 0;
448        for (int j=0 ; j<halfNumCoef ; ++j) {
449            double tmp = sc;
450            sc  = static_cast<double>(*coef++) + dcos*sc - sp;
451            sp = tmp;
452        }
453        // If we are awfully clever, we can apply Goertzel's algorithm
454        // again on the sc and sp sequences returned here.
455        accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
456    }
457#endif
458    return accum*2.;
459}
460
461/*
462 * evaluates the minimum and maximum |H(f)| bound in a band region.
463 *
464 * This is usually done with equally spaced increments in the target band in question.
465 * The passband is often very small, and sampled that way. The stopband is often much
466 * larger.
467 *
468 * We use the fact that the overall polyphase filter has an additional bank at the end
469 * for interpolation; hence it is overspecified for the H(f) computation.  Thus the
470 * first polyphase is never actually checked, excepting its first term.
471 *
472 * In this code we use the firTransfer() evaluator above, which uses Goertzel's
473 * algorithm to calculate the transfer function at each point.
474 *
475 * TODO: An alternative with equal spacing is the FFT/DFT.  An alternative with unequal
476 * spacing is a chirp transform.
477 *
478 * @param coef is the designed polyphase filter banks
479 *
480 * @param L is the number of phases (for interpolation)
481 *
482 * @param halfNumCoef should be half the number of coefficients for a single
483 * polyphase.
484 *
485 * @param fstart is the normalized frequency start.
486 *
487 * @param fend is the normalized frequency end.
488 *
489 * @param steps is the number of steps to take (sampling) between frequency start and end
490 *
491 * @param firMin returns the minimum transfer |H(f)| found
492 *
493 * @param firMax returns the maximum transfer |H(f)| found
494 *
495 * 0 <= f <= 0.5.
496 * This is used to test passband and stopband performance.
497 */
498template <typename T>
499static void testFir(const T* coef, int L, int halfNumCoef,
500        double fstart, double fend, int steps, double &firMin, double &firMax) {
501    double wstart = fstart*(2.*M_PI);
502    double wend = fend*(2.*M_PI);
503    double wstep = (wend - wstart)/steps;
504    double fmax, fmin;
505    double trf = firTransfer(coef, L, halfNumCoef, wstart);
506    if (trf<0) {
507        trf = -trf;
508    }
509    fmin = fmax = trf;
510    wstart += wstep;
511    for (int i=1; i<steps; ++i) {
512        trf = firTransfer(coef, L, halfNumCoef, wstart);
513        if (trf<0) {
514            trf = -trf;
515        }
516        if (trf>fmax) {
517            fmax = trf;
518        }
519        else if (trf<fmin) {
520            fmin = trf;
521        }
522        wstart += wstep;
523    }
524    // renormalize - this is only needed for integer filter types
525    double norm = 1./((1ULL<<(sizeof(T)*8-1))*L);
526
527    firMin = fmin * norm;
528    firMax = fmax * norm;
529}
530
531/*
532 * evaluates the |H(f)| lowpass band characteristics.
533 *
534 * This function tests the lowpass characteristics for the overall polyphase filter,
535 * and is used to verify the design.  For this case, fp should be set to the
536 * passband normalized frequency from 0 to 0.5 for the overall filter (thus it
537 * is the designed polyphase bank value / L).  Likewise for fs.
538 *
539 * @param coef is the designed polyphase filter banks
540 *
541 * @param L is the number of phases (for interpolation)
542 *
543 * @param halfNumCoef should be half the number of coefficients for a single
544 * polyphase.
545 *
546 * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
547 *
548 * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
549 *
550 * @param passSteps is the number of passband sampling steps.
551 *
552 * @param stopSteps is the number of stopband sampling steps.
553 *
554 * @param passMin is the minimum value in the passband
555 *
556 * @param passMax is the maximum value in the passband (useful for scaling).  This should
557 * be less than 1., to avoid sine wave test overflow.
558 *
559 * @param passRipple is the passband ripple.  Typically this should be less than 0.1 for
560 * an audio filter.  Generally speaker/headphone device characteristics will dominate
561 * the passband term.
562 *
563 * @param stopMax is the maximum value in the stopband.
564 *
565 * @param stopRipple is the stopband ripple, also known as stopband attenuation.
566 * Typically this should be greater than ~80dB for low quality, and greater than
567 * ~100dB for full 16b quality, otherwise aliasing may become noticeable.
568 *
569 */
570template <typename T>
571static void testFir(const T* coef, int L, int halfNumCoef,
572        double fp, double fs, int passSteps, int stopSteps,
573        double &passMin, double &passMax, double &passRipple,
574        double &stopMax, double &stopRipple) {
575    double fmin, fmax;
576    testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
577    double d1 = (fmax - fmin)/2.;
578    passMin = fmin;
579    passMax = fmax;
580    passRipple = -20.*log10(1. - d1); // passband ripple
581    testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
582    // fmin is really not important for the stopband.
583    stopMax = fmax;
584    stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
585}
586
587/*
588 * Calculates the overall polyphase filter based on a windowed sinc function.
589 *
590 * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1
591 * taps for the entire kernel.  This is then decomposed into L+1 polyphase filterbanks.
592 * The last filterbank is used for interpolation purposes (and is mostly composed
593 * of the first bank shifted by one sample), and is unnecessary if one does
594 * not do interpolation.
595 *
596 * We use the last filterbank for some transfer function calculation purposes,
597 * so it needs to be generated anyways.
598 *
599 * @param coef is the caller allocated space for coefficients.  This should be
600 * exactly (L+1)*halfNumCoef in size.
601 *
602 * @param L is the number of phases (for interpolation)
603 *
604 * @param halfNumCoef should be half the number of coefficients for a single
605 * polyphase.
606 *
607 * @param stopBandAtten is the stopband value, should be >50dB.
608 *
609 * @param fcr is cutoff frequency/sampling rate (<0.5).  At this point, the energy
610 * should be 6dB less. (fcr is where the amplitude drops by half).  Use the
611 * firKaiserTbw() to calculate the transition bandwidth.  fcr is the midpoint
612 * between the stop band and the pass band (fstop+fpass)/2.
613 *
614 * @param atten is the attenuation (generally slightly less than 1).
615 */
616
617template <typename T>
618static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
619        double stopBandAtten, double fcr, double atten) {
620    //
621    // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
622    // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
623    //
624    // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
625    //
626    // Kaiser window and beta parameter
627    //
628    //         | 0.1102*(A - 8.7)                         A > 50
629    //  beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21)   21 <= A <= 50
630    //         | 0.                                       A < 21
631    //
632    // with A is the desired stop-band attenuation in dBFS
633    //
634    //    30 dB    2.210
635    //    40 dB    3.384
636    //    50 dB    4.538
637    //    60 dB    5.658
638    //    70 dB    6.764
639    //    80 dB    7.865
640    //    90 dB    8.960
641    //   100 dB   10.056
642
643    const int N = L * halfNumCoef; // non-negative half
644    const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
645    const double xstep = (2. * M_PI) * fcr / L;
646    const double xfrac = 1. / N;
647    const double yscale = atten * L / (I0(beta) * M_PI);
648
649    // We use sine generators, which computes sines on regular step intervals.
650    // This speeds up overall computation about 40% from computing the sine directly.
651
652    SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
653
654    for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
655
656        // computation for a single polyphase of the overall filter.
657        SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
658        double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
659
660        for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
661            double y;
662            if (CC_LIKELY(ix)) {
663                double x = static_cast<double>(ix);
664
665                // sine generator: sg.valueAdvance() returns sin(ix*xstep);
666                y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
667            } else {
668                y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
669                sg.advance();
670            }
671
672            // (caution!) float version does not need rounding
673            if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
674                *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err));
675            } else {
676                *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1)));
677            }
678        }
679    }
680}
681
682}; // namespace android
683
684#endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/
685