186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Copyright (C) 2013 The Android Open Source Project
386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Licensed under the Apache License, Version 2.0 (the "License");
586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * you may not use this file except in compliance with the License.
686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * You may obtain a copy of the License at
786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *      http://www.apache.org/licenses/LICENSE-2.0
986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
1086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Unless required by applicable law or agreed to in writing, software
1186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * distributed under the License is distributed on an "AS IS" BASIS,
1286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * See the License for the specific language governing permissions and
1486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * limitations under the License.
1586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
1686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
1786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
1886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
1986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungnamespace android {
2186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung// depends on AudioResamplerFirOps.h
2386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
24d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for input type TI = int16_t input samples */
25d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
2686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
27d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(int32_t& l, int32_t& r, TC coef, const int16_t* samples)
2886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
29d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
30d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    l = mulAddRL(1, rl, coef, l);
31d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    r = mulAddRL(0, rl, coef, r);
3286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
3386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
34d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
3586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
36d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(int32_t& l, TC coef, const int16_t* samples)
3786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
38d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    l = mulAdd(samples[0], coef, l);
39d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
4086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
41d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for input type TI = float input samples */
42d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
43d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
44d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(float& l, float& r, TC coef,  const float* samples)
45d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
46d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    l += *samples++ * coef;
4768ffa200de7c4662c088851a328923be715c6c24Andy Hung    r += *samples * coef;
48d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
49d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
50d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
51d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
52d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(float& l, TC coef,  const float* samples)
53d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
5468ffa200de7c4662c088851a328923be715c6c24Andy Hung    l += *samples * coef;
55d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
56d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
57d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for output type TO = int32_t output samples */
58d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
59d549139155b20d7cbf6a4326133e06def465ef54Andy Hungint32_t volumeAdjust(int32_t value, int32_t volume)
60d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
61d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    return 2 * mulRL(0, value, volume);  // Note: only use top 16b
62d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
63d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
64d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for output type TO = float output samples */
65d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
66d549139155b20d7cbf6a4326133e06def465ef54Andy Hungfloat volumeAdjust(float value, float volume)
67d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
68d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    return value * volume;
6986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
7086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
7186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
7268ffa200de7c4662c088851a328923be715c6c24Andy Hung * Helper template functions for loop unrolling accumulator operations.
7386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
7468ffa200de7c4662c088851a328923be715c6c24Andy Hung * Unrolling the loops achieves about 2x gain.
7568ffa200de7c4662c088851a328923be715c6c24Andy Hung * Using a recursive template rather than an array of TO[] for the accumulator
7668ffa200de7c4662c088851a328923be715c6c24Andy Hung * values is an additional 10-20% gain.
7786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
7886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
7968ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate<int CHANNELS, typename TO>
8068ffa200de7c4662c088851a328923be715c6c24Andy Hungclass Accumulator : public Accumulator<CHANNELS-1, TO> // recursive
8186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
8268ffa200de7c4662c088851a328923be715c6c24Andy Hungpublic:
8368ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void clear() {
8468ffa200de7c4662c088851a328923be715c6c24Andy Hung        value = 0;
8568ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS-1, TO>::clear();
86d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    }
8768ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TI>
8868ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void acc(TC coef, const TI*& data) {
8968ffa200de7c4662c088851a328923be715c6c24Andy Hung        mac(value, coef, data++);
9068ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS-1, TO>::acc(coef, data);
9168ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
9268ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void volume(TO*& out, TO gain) {
9368ffa200de7c4662c088851a328923be715c6c24Andy Hung        *out++ = volumeAdjust(value, gain);
9468ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS-1, TO>::volume(out, gain);
9568ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
9668ffa200de7c4662c088851a328923be715c6c24Andy Hung
9768ffa200de7c4662c088851a328923be715c6c24Andy Hung    TO value; // one per recursive inherited base class
9868ffa200de7c4662c088851a328923be715c6c24Andy Hung};
9968ffa200de7c4662c088851a328923be715c6c24Andy Hung
10068ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate<typename TO>
10168ffa200de7c4662c088851a328923be715c6c24Andy Hungclass Accumulator<0, TO> {
10268ffa200de7c4662c088851a328923be715c6c24Andy Hungpublic:
10368ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void clear() {
10468ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
10568ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TI>
10668ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void acc(TC coef __unused, const TI*& data __unused) {
10768ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
10868ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void volume(TO*& out __unused, TO gain __unused) {
10968ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
11068ffa200de7c4662c088851a328923be715c6c24Andy Hung};
11186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
112d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC, typename TINTERP>
11342b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline
114d549139155b20d7cbf6a4326133e06def465ef54Andy HungTC interpolate(TC coef_0, TC coef_1, TINTERP lerp)
115d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
116d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    return lerp * (coef_1 - coef_0) + coef_0;
117d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
118d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
11942b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungtemplate<>
12042b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline
12142b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungint16_t interpolate<int16_t, uint32_t>(int16_t coef_0, int16_t coef_1, uint32_t lerp)
12242b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung{   // in some CPU architectures 16b x 16b multiplies are faster.
12342b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung    return (static_cast<int16_t>(lerp) * static_cast<int16_t>(coef_1 - coef_0) >> 15) + coef_0;
124d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
125d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
12642b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungtemplate<>
12742b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline
12842b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungint32_t interpolate<int32_t, uint32_t>(int32_t coef_0, int32_t coef_1, uint32_t lerp)
129d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
13042b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung    return (lerp * static_cast<int64_t>(coef_1 - coef_0) >> 31) + coef_0;
131d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
132d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
13368ffa200de7c4662c088851a328923be715c6c24Andy Hung/* class scope for passing in functions into templates */
13468ffa200de7c4662c088851a328923be715c6c24Andy Hungstruct InterpCompute {
13568ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
13668ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
13768ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) {
13868ffa200de7c4662c088851a328923be715c6c24Andy Hung        return interpolate(coef_0, coef_1, lerp);
13968ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
14068ffa200de7c4662c088851a328923be715c6c24Andy Hung
14168ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
14268ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
14368ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) {
14468ffa200de7c4662c088851a328923be715c6c24Andy Hung        return interpolate(coef_0, coef_1, lerp);
14568ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
14668ffa200de7c4662c088851a328923be715c6c24Andy Hung};
14768ffa200de7c4662c088851a328923be715c6c24Andy Hung
14868ffa200de7c4662c088851a328923be715c6c24Andy Hungstruct InterpNull {
14968ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
15068ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
15168ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) {
15268ffa200de7c4662c088851a328923be715c6c24Andy Hung        return coef_0;
15368ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
15468ffa200de7c4662c088851a328923be715c6c24Andy Hung
15568ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
15668ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
15768ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) {
15868ffa200de7c4662c088851a328923be715c6c24Andy Hung        return coef_1;
15968ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
16068ffa200de7c4662c088851a328923be715c6c24Andy Hung};
16168ffa200de7c4662c088851a328923be715c6c24Andy Hung
16268ffa200de7c4662c088851a328923be715c6c24Andy Hung/*
16368ffa200de7c4662c088851a328923be715c6c24Andy Hung * Calculates a single output frame (two samples).
16468ffa200de7c4662c088851a328923be715c6c24Andy Hung *
16568ffa200de7c4662c088851a328923be715c6c24Andy Hung * The Process*() functions compute both the positive half FIR dot product and
16668ffa200de7c4662c088851a328923be715c6c24Andy Hung * the negative half FIR dot product, accumulates, and then applies the volume.
16768ffa200de7c4662c088851a328923be715c6c24Andy Hung *
16868ffa200de7c4662c088851a328923be715c6c24Andy Hung * Use fir() to compute the proper coefficient pointers for a polyphase
16968ffa200de7c4662c088851a328923be715c6c24Andy Hung * filter bank.
17068ffa200de7c4662c088851a328923be715c6c24Andy Hung *
17168ffa200de7c4662c088851a328923be715c6c24Andy Hung * ProcessBase() is the fundamental processing template function.
17268ffa200de7c4662c088851a328923be715c6c24Andy Hung *
17368ffa200de7c4662c088851a328923be715c6c24Andy Hung * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase.
17468ffa200de7c4662c088851a328923be715c6c24Andy Hung * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase.
17568ffa200de7c4662c088851a328923be715c6c24Andy Hung */
17668ffa200de7c4662c088851a328923be715c6c24Andy Hung
177b187de1ada34a9023c05d020a4592686ba761278Glenn Kastentemplate <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO,
178b187de1ada34a9023c05d020a4592686ba761278Glenn Kasten        typename TINTERP>
17986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
18068ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid ProcessBase(TO* const out,
181a4daf0b4f934b800a49f199fb8c09409391c8fc0Glenn Kasten        size_t count,
18286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP,
18386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN,
184d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sP,
185d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sN,
186d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TINTERP lerpP,
187d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TO* const volumeLR)
18886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
18991164e7f94030b3e49a31d88c91d70d4bd358ebeGlenn Kasten    static_assert(CHANNELS > 0, "CHANNELS must be > 0");
190d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
19168ffa200de7c4662c088851a328923be715c6c24Andy Hung    if (CHANNELS > 2) {
19268ffa200de7c4662c088851a328923be715c6c24Andy Hung        // TO accum[CHANNELS];
19368ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS, TO> accum;
19468ffa200de7c4662c088851a328923be715c6c24Andy Hung
19568ffa200de7c4662c088851a328923be715c6c24Andy Hung        // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0;
19668ffa200de7c4662c088851a328923be715c6c24Andy Hung        accum.clear();
19768ffa200de7c4662c088851a328923be715c6c24Andy Hung        for (size_t i = 0; i < count; ++i) {
19868ffa200de7c4662c088851a328923be715c6c24Andy Hung            TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP);
19968ffa200de7c4662c088851a328923be715c6c24Andy Hung
20068ffa200de7c4662c088851a328923be715c6c24Andy Hung            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j);
20168ffa200de7c4662c088851a328923be715c6c24Andy Hung            const TI *tmp_data = sP; // tmp_ptr seems to work better
20268ffa200de7c4662c088851a328923be715c6c24Andy Hung            accum.acc(c, tmp_data);
20368ffa200de7c4662c088851a328923be715c6c24Andy Hung
20468ffa200de7c4662c088851a328923be715c6c24Andy Hung            coefsP++;
20568ffa200de7c4662c088851a328923be715c6c24Andy Hung            sP -= CHANNELS;
20668ffa200de7c4662c088851a328923be715c6c24Andy Hung            c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP);
20768ffa200de7c4662c088851a328923be715c6c24Andy Hung
20868ffa200de7c4662c088851a328923be715c6c24Andy Hung            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j);
20968ffa200de7c4662c088851a328923be715c6c24Andy Hung            tmp_data = sN; // tmp_ptr seems faster than directly using sN
21068ffa200de7c4662c088851a328923be715c6c24Andy Hung            accum.acc(c, tmp_data);
21168ffa200de7c4662c088851a328923be715c6c24Andy Hung
21268ffa200de7c4662c088851a328923be715c6c24Andy Hung            coefsN++;
21368ffa200de7c4662c088851a328923be715c6c24Andy Hung            sN += CHANNELS;
21468ffa200de7c4662c088851a328923be715c6c24Andy Hung        }
21568ffa200de7c4662c088851a328923be715c6c24Andy Hung        // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]);
21668ffa200de7c4662c088851a328923be715c6c24Andy Hung        TO *tmp_out = out; // may remove if const out definition changes.
21768ffa200de7c4662c088851a328923be715c6c24Andy Hung        accum.volume(tmp_out, volumeLR[0]);
21868ffa200de7c4662c088851a328923be715c6c24Andy Hung    } else if (CHANNELS == 2) {
219d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TO l = 0;
220d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TO r = 0;
221d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        for (size_t i = 0; i < count; ++i) {
22268ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
223d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsP++;
224d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sP -= CHANNELS;
22568ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
226d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsN++;
227d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sN += CHANNELS;
228d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        }
229d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[0] += volumeAdjust(l, volumeLR[0]);
230d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[1] += volumeAdjust(r, volumeLR[1]);
231d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    } else { /* CHANNELS == 1 */
232d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TO l = 0;
233d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        for (size_t i = 0; i < count; ++i) {
23468ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
235d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsP++;
236d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sP -= CHANNELS;
23768ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
238d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsN++;
239d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sN += CHANNELS;
240d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        }
241d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[0] += volumeAdjust(l, volumeLR[0]);
242d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[1] += volumeAdjust(l, volumeLR[1]);
24386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
24486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
24586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2466b667dde03a5707285a2ff76ada525075d4c60efAndy Hung/* Calculates a single output frame from a polyphase resampling filter.
2476b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * See Process() for parameter details.
2486b667dde03a5707285a2ff76ada525075d4c60efAndy Hung */
24968ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
25068ffa200de7c4662c088851a328923be715c6c24Andy Hungstatic inline
25168ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid ProcessL(TO* const out,
25268ffa200de7c4662c088851a328923be715c6c24Andy Hung        int count,
25368ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsP,
25468ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsN,
25568ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sP,
25668ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sN,
25768ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TO* const volumeLR)
25868ffa200de7c4662c088851a328923be715c6c24Andy Hung{
25968ffa200de7c4662c088851a328923be715c6c24Andy Hung    ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR);
26068ffa200de7c4662c088851a328923be715c6c24Andy Hung}
26168ffa200de7c4662c088851a328923be715c6c24Andy Hung
2626b667dde03a5707285a2ff76ada525075d4c60efAndy Hung/*
2636b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * Calculates a single output frame from a polyphase resampling filter,
2646b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * with filter phase interpolation.
2656b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2666b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param out should point to the output buffer with space for at least one output frame.
2676b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2686b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param count should be half the size of the total filter length (halfNumCoefs), as we
2696b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * use symmetry in filter coefficients to evaluate two dot products.
2706b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2716b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param coefsP is one phase of the polyphase filter bank of size halfNumCoefs, corresponding
2726b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * to the positive sP.
2736b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2746b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param coefsN is one phase of the polyphase filter bank of size halfNumCoefs, corresponding
2756b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * to the negative sN.
2766b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2776b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param coefsP1 is the next phase of coefsP (used for interpolation).
2786b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2796b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param coefsN1 is the next phase of coefsN (used for interpolation).
2806b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2816b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param sP is the positive half of the coefficients (as viewed by a convolution),
2826b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * starting at the original samples pointer and decrementing (by CHANNELS).
2836b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2846b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param sN is the negative half of the samples (as viewed by a convolution),
2856b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * starting at the original samples pointer + CHANNELS and incrementing (by CHANNELS).
2866b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2876b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param lerpP The fractional siting between the polyphase indices is given by the bits
2886b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * below coefShift. See fir() for details.
2896b667dde03a5707285a2ff76ada525075d4c60efAndy Hung *
2906b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel,
2916b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * expressed as a S32 integer or float.  A negative value inverts the channel 180 degrees.
2926b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * The pointer volumeLR should be aligned to a minimum of 8 bytes.
2936b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * A typical value for volume is 0x1000 to align to a unity gain output of 20.12.
2946b667dde03a5707285a2ff76ada525075d4c60efAndy Hung */
29568ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
29668ffa200de7c4662c088851a328923be715c6c24Andy Hungstatic inline
29768ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid Process(TO* const out,
29868ffa200de7c4662c088851a328923be715c6c24Andy Hung        int count,
29968ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsP,
30068ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsN,
30168ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsP1 __unused,
30268ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsN1 __unused,
30368ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sP,
30468ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sN,
30568ffa200de7c4662c088851a328923be715c6c24Andy Hung        TINTERP lerpP,
30668ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TO* const volumeLR)
30768ffa200de7c4662c088851a328923be715c6c24Andy Hung{
308b187de1ada34a9023c05d020a4592686ba761278Glenn Kasten    ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP,
309b187de1ada34a9023c05d020a4592686ba761278Glenn Kasten            volumeLR);
31068ffa200de7c4662c088851a328923be715c6c24Andy Hung}
31168ffa200de7c4662c088851a328923be715c6c24Andy Hung
31286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
3136b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * Calculates a single output frame from input sample pointer.
31486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
31586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * This sets up the params for the accelerated Process() and ProcessL()
31686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * functions to do the appropriate dot products.
31786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
318d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * @param out should point to the output buffer with space for at least one output frame.
31986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
320d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * @param phase is the fractional distance between input frames for interpolation:
32186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * phase >= 0  && phase < phaseWrapLimit.  It can be thought of as a rational fraction
32286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * of phase/phaseWrapLimit.
32386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases
32586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift).
32686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param coefShift gives the bit alignment of the polyphase index in the phase parameter.
32886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the
33086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored.
33186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
33286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to
33386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * and including the #polyphases.  Each polyphase of the filter has half-length halfNumCoefs
33486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * (due to symmetry).  The total size of the filter bank in coefficients is
33586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * (#polyphases+1)*halfNumCoefs.
33686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
33786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line).
33886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
33986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The coefs should be attenuated (to compensate for passband ripple)
34086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * if storing back into the native format.
34186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
34286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param samples are unaligned input samples.  The position is in the "middle" of the
34386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * sample array with respect to the FIR filter:
34486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs;
34586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1.
34686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
34786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel,
3486b667dde03a5707285a2ff76ada525075d4c60efAndy Hung * expressed as a S32 integer or float.  A negative value inverts the channel 180 degrees.
34986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The pointer volumeLR should be aligned to a minimum of 8 bytes.
35086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * A typical value for volume is 0x1000 to align to a unity gain output of 20.12.
35186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
35286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where
35386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling.
35486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
35586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The filter polyphase index is given by indexP = phase >> coefShift. Due to
35686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * odd length symmetric filter, the polyphase index of the negative half depends on
35786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * whether interpolation is used.
35886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
35986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The fractional siting between the polyphase indices is given by the bits below coefShift:
36086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
36186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << 32 - coefShift >> 1;  // for 32 bit unsigned phase multiply
36286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply
36386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
36486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * For integer types, this is expressed as:
36586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
36686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << sizeof(phase)*8 - coefShift
36786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *              >> (sizeof(phase)-sizeof(*coefs))*8 + 1;
36886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
369d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * For floating point, lerpP is the fractional phase scaled to [0.0, 1.0):
370d549139155b20d7cbf6a4326133e06def465ef54Andy Hung *
371d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * lerpP = (phase << 32 - coefShift) / (1 << 32); // floating point equivalent
37286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
37386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
374d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<int CHANNELS, bool LOCKED, int STRIDE, typename TC, typename TI, typename TO>
37586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
376d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid fir(TO* const out,
37786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const uint32_t phase, const uint32_t phaseWrapLimit,
37886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const int coefShift, const int halfNumCoefs, const TC* const coefs,
379d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* const samples, const TO* const volumeLR)
38086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
38186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // NOTE: be very careful when modifying the code here. register
38286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // pressure is very high and a small change might cause the compiler
38386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // to generate far less efficient code.
38486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // Always sanity check the result with objdump or test-resample.
38586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
38686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    if (LOCKED) {
38786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // locked polyphase (no interpolation)
38886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // Compute the polyphase filter index on the positive and negative side.
38986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexP = phase >> coefShift;
39086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexN = (phaseWrapLimit - phase) >> coefShift;
39186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP = coefs + indexP*halfNumCoefs;
39286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN = coefs + indexN*halfNumCoefs;
393d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sP = samples;
394d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sN = samples + CHANNELS;
39586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
39686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // dot product filter.
39786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        ProcessL<CHANNELS, STRIDE>(out,
39886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung                halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR);
39986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    } else {
40086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // interpolated polyphase
40186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // Compute the polyphase filter index on the positive and negative side.
40286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexP = phase >> coefShift;
40386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement.
40486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP = coefs + indexP*halfNumCoefs;
40586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN = coefs + indexN*halfNumCoefs;
40686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP1 = coefsP + halfNumCoefs;
40786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN1 = coefsN + halfNumCoefs;
408d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sP = samples;
409d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sN = samples + CHANNELS;
41086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
41186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // Interpolation fraction lerpP derived by shifting all the way up and down
41286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // to clear the appropriate bits and align to the appropriate level
41386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // for the integer multiply.  The constants should resolve in compile time.
41486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        //
41586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // The interpolated filter coefficient is derived as follows for the pos/neg half:
41686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        //
41786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP)
41886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP)
41986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
42086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // on-the-fly interpolated dot product filter
421d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        if (is_same<TC, float>::value || is_same<TC, double>::value) {
422d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            static const TC scale = 1. / (65536. * 65536.); // scale phase bits to [0.0, 1.0)
423d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            TC lerpP = TC(phase << (sizeof(phase)*8 - coefShift)) * scale;
424d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
425d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            Process<CHANNELS, STRIDE>(out,
426d549139155b20d7cbf6a4326133e06def465ef54Andy Hung                    halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
427d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        } else {
428d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift)
429d549139155b20d7cbf6a4326133e06def465ef54Andy Hung                    >> ((sizeof(phase)-sizeof(*coefs))*8 + 1);
430d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
431d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            Process<CHANNELS, STRIDE>(out,
432d549139155b20d7cbf6a4326133e06def465ef54Andy Hung                    halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
433d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        }
43486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
43586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
43686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
43763238efb0d674758902918e3cdaac322126484b7Glenn Kasten} // namespace android
43886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
43986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/
440