186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Copyright (C) 2013 The Android Open Source Project
386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Licensed under the Apache License, Version 2.0 (the "License");
586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * you may not use this file except in compliance with the License.
686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * You may obtain a copy of the License at
786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *      http://www.apache.org/licenses/LICENSE-2.0
986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
1086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Unless required by applicable law or agreed to in writing, software
1186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * distributed under the License is distributed on an "AS IS" BASIS,
1286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * See the License for the specific language governing permissions and
1486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * limitations under the License.
1586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
1686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
1786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
1886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H
1986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungnamespace android {
2186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
2286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung// depends on AudioResamplerFirOps.h
2386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
24d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for input type TI = int16_t input samples */
25d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
2686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
27d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(int32_t& l, int32_t& r, TC coef, const int16_t* samples)
2886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
29d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
30d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    l = mulAddRL(1, rl, coef, l);
31d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    r = mulAddRL(0, rl, coef, r);
3286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
3386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
34d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
3586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
36d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(int32_t& l, TC coef, const int16_t* samples)
3786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
38d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    l = mulAdd(samples[0], coef, l);
39d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
4086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
41d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for input type TI = float input samples */
42d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
43d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
44d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(float& l, float& r, TC coef,  const float* samples)
45d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
46d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    l += *samples++ * coef;
4768ffa200de7c4662c088851a328923be715c6c24Andy Hung    r += *samples * coef;
48d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
49d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
50d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC>
51d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
52d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(float& l, TC coef,  const float* samples)
53d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
5468ffa200de7c4662c088851a328923be715c6c24Andy Hung    l += *samples * coef;
55d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
56d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
57d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for output type TO = int32_t output samples */
58d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
59d549139155b20d7cbf6a4326133e06def465ef54Andy Hungint32_t volumeAdjust(int32_t value, int32_t volume)
60d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
61d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    return 2 * mulRL(0, value, volume);  // Note: only use top 16b
62d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
63d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
64d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for output type TO = float output samples */
65d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline
66d549139155b20d7cbf6a4326133e06def465ef54Andy Hungfloat volumeAdjust(float value, float volume)
67d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
68d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    return value * volume;
6986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
7086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
7186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
7268ffa200de7c4662c088851a328923be715c6c24Andy Hung * Helper template functions for loop unrolling accumulator operations.
7386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
7468ffa200de7c4662c088851a328923be715c6c24Andy Hung * Unrolling the loops achieves about 2x gain.
7568ffa200de7c4662c088851a328923be715c6c24Andy Hung * Using a recursive template rather than an array of TO[] for the accumulator
7668ffa200de7c4662c088851a328923be715c6c24Andy Hung * values is an additional 10-20% gain.
7786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
7886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
7968ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate<int CHANNELS, typename TO>
8068ffa200de7c4662c088851a328923be715c6c24Andy Hungclass Accumulator : public Accumulator<CHANNELS-1, TO> // recursive
8186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
8268ffa200de7c4662c088851a328923be715c6c24Andy Hungpublic:
8368ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void clear() {
8468ffa200de7c4662c088851a328923be715c6c24Andy Hung        value = 0;
8568ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS-1, TO>::clear();
86d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    }
8768ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TI>
8868ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void acc(TC coef, const TI*& data) {
8968ffa200de7c4662c088851a328923be715c6c24Andy Hung        mac(value, coef, data++);
9068ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS-1, TO>::acc(coef, data);
9168ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
9268ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void volume(TO*& out, TO gain) {
9368ffa200de7c4662c088851a328923be715c6c24Andy Hung        *out++ = volumeAdjust(value, gain);
9468ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS-1, TO>::volume(out, gain);
9568ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
9668ffa200de7c4662c088851a328923be715c6c24Andy Hung
9768ffa200de7c4662c088851a328923be715c6c24Andy Hung    TO value; // one per recursive inherited base class
9868ffa200de7c4662c088851a328923be715c6c24Andy Hung};
9968ffa200de7c4662c088851a328923be715c6c24Andy Hung
10068ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate<typename TO>
10168ffa200de7c4662c088851a328923be715c6c24Andy Hungclass Accumulator<0, TO> {
10268ffa200de7c4662c088851a328923be715c6c24Andy Hungpublic:
10368ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void clear() {
10468ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
10568ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TI>
10668ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void acc(TC coef __unused, const TI*& data __unused) {
10768ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
10868ffa200de7c4662c088851a328923be715c6c24Andy Hung    inline void volume(TO*& out __unused, TO gain __unused) {
10968ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
11068ffa200de7c4662c088851a328923be715c6c24Andy Hung};
11186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
112d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC, typename TINTERP>
11342b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline
114d549139155b20d7cbf6a4326133e06def465ef54Andy HungTC interpolate(TC coef_0, TC coef_1, TINTERP lerp)
115d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
116d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    return lerp * (coef_1 - coef_0) + coef_0;
117d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
118d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
11942b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungtemplate<>
12042b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline
12142b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungint16_t interpolate<int16_t, uint32_t>(int16_t coef_0, int16_t coef_1, uint32_t lerp)
12242b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung{   // in some CPU architectures 16b x 16b multiplies are faster.
12342b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung    return (static_cast<int16_t>(lerp) * static_cast<int16_t>(coef_1 - coef_0) >> 15) + coef_0;
124d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
125d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
12642b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungtemplate<>
12742b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline
12842b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungint32_t interpolate<int32_t, uint32_t>(int32_t coef_0, int32_t coef_1, uint32_t lerp)
129d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{
13042b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung    return (lerp * static_cast<int64_t>(coef_1 - coef_0) >> 31) + coef_0;
131d549139155b20d7cbf6a4326133e06def465ef54Andy Hung}
132d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
13368ffa200de7c4662c088851a328923be715c6c24Andy Hung/* class scope for passing in functions into templates */
13468ffa200de7c4662c088851a328923be715c6c24Andy Hungstruct InterpCompute {
13568ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
13668ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
13768ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) {
13868ffa200de7c4662c088851a328923be715c6c24Andy Hung        return interpolate(coef_0, coef_1, lerp);
13968ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
14068ffa200de7c4662c088851a328923be715c6c24Andy Hung
14168ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
14268ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
14368ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) {
14468ffa200de7c4662c088851a328923be715c6c24Andy Hung        return interpolate(coef_0, coef_1, lerp);
14568ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
14668ffa200de7c4662c088851a328923be715c6c24Andy Hung};
14768ffa200de7c4662c088851a328923be715c6c24Andy Hung
14868ffa200de7c4662c088851a328923be715c6c24Andy Hungstruct InterpNull {
14968ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
15068ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
15168ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) {
15268ffa200de7c4662c088851a328923be715c6c24Andy Hung        return coef_0;
15368ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
15468ffa200de7c4662c088851a328923be715c6c24Andy Hung
15568ffa200de7c4662c088851a328923be715c6c24Andy Hung    template<typename TC, typename TINTERP>
15668ffa200de7c4662c088851a328923be715c6c24Andy Hung    static inline
15768ffa200de7c4662c088851a328923be715c6c24Andy Hung    TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) {
15868ffa200de7c4662c088851a328923be715c6c24Andy Hung        return coef_1;
15968ffa200de7c4662c088851a328923be715c6c24Andy Hung    }
16068ffa200de7c4662c088851a328923be715c6c24Andy Hung};
16168ffa200de7c4662c088851a328923be715c6c24Andy Hung
16268ffa200de7c4662c088851a328923be715c6c24Andy Hung/*
16368ffa200de7c4662c088851a328923be715c6c24Andy Hung * Calculates a single output frame (two samples).
16468ffa200de7c4662c088851a328923be715c6c24Andy Hung *
16568ffa200de7c4662c088851a328923be715c6c24Andy Hung * The Process*() functions compute both the positive half FIR dot product and
16668ffa200de7c4662c088851a328923be715c6c24Andy Hung * the negative half FIR dot product, accumulates, and then applies the volume.
16768ffa200de7c4662c088851a328923be715c6c24Andy Hung *
16868ffa200de7c4662c088851a328923be715c6c24Andy Hung * Use fir() to compute the proper coefficient pointers for a polyphase
16968ffa200de7c4662c088851a328923be715c6c24Andy Hung * filter bank.
17068ffa200de7c4662c088851a328923be715c6c24Andy Hung *
17168ffa200de7c4662c088851a328923be715c6c24Andy Hung * ProcessBase() is the fundamental processing template function.
17268ffa200de7c4662c088851a328923be715c6c24Andy Hung *
17368ffa200de7c4662c088851a328923be715c6c24Andy Hung * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase.
17468ffa200de7c4662c088851a328923be715c6c24Andy Hung * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase.
17568ffa200de7c4662c088851a328923be715c6c24Andy Hung */
17668ffa200de7c4662c088851a328923be715c6c24Andy Hung
17768ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP>
17886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
17968ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid ProcessBase(TO* const out,
180a4daf0b4f934b800a49f199fb8c09409391c8fc0Glenn Kasten        size_t count,
18186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP,
18286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN,
183d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sP,
184d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sN,
185d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TINTERP lerpP,
186d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TO* const volumeLR)
18786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
18868ffa200de7c4662c088851a328923be715c6c24Andy Hung    COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0)
189d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
19068ffa200de7c4662c088851a328923be715c6c24Andy Hung    if (CHANNELS > 2) {
19168ffa200de7c4662c088851a328923be715c6c24Andy Hung        // TO accum[CHANNELS];
19268ffa200de7c4662c088851a328923be715c6c24Andy Hung        Accumulator<CHANNELS, TO> accum;
19368ffa200de7c4662c088851a328923be715c6c24Andy Hung
19468ffa200de7c4662c088851a328923be715c6c24Andy Hung        // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0;
19568ffa200de7c4662c088851a328923be715c6c24Andy Hung        accum.clear();
19668ffa200de7c4662c088851a328923be715c6c24Andy Hung        for (size_t i = 0; i < count; ++i) {
19768ffa200de7c4662c088851a328923be715c6c24Andy Hung            TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP);
19868ffa200de7c4662c088851a328923be715c6c24Andy Hung
19968ffa200de7c4662c088851a328923be715c6c24Andy Hung            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j);
20068ffa200de7c4662c088851a328923be715c6c24Andy Hung            const TI *tmp_data = sP; // tmp_ptr seems to work better
20168ffa200de7c4662c088851a328923be715c6c24Andy Hung            accum.acc(c, tmp_data);
20268ffa200de7c4662c088851a328923be715c6c24Andy Hung
20368ffa200de7c4662c088851a328923be715c6c24Andy Hung            coefsP++;
20468ffa200de7c4662c088851a328923be715c6c24Andy Hung            sP -= CHANNELS;
20568ffa200de7c4662c088851a328923be715c6c24Andy Hung            c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP);
20668ffa200de7c4662c088851a328923be715c6c24Andy Hung
20768ffa200de7c4662c088851a328923be715c6c24Andy Hung            // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j);
20868ffa200de7c4662c088851a328923be715c6c24Andy Hung            tmp_data = sN; // tmp_ptr seems faster than directly using sN
20968ffa200de7c4662c088851a328923be715c6c24Andy Hung            accum.acc(c, tmp_data);
21068ffa200de7c4662c088851a328923be715c6c24Andy Hung
21168ffa200de7c4662c088851a328923be715c6c24Andy Hung            coefsN++;
21268ffa200de7c4662c088851a328923be715c6c24Andy Hung            sN += CHANNELS;
21368ffa200de7c4662c088851a328923be715c6c24Andy Hung        }
21468ffa200de7c4662c088851a328923be715c6c24Andy Hung        // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]);
21568ffa200de7c4662c088851a328923be715c6c24Andy Hung        TO *tmp_out = out; // may remove if const out definition changes.
21668ffa200de7c4662c088851a328923be715c6c24Andy Hung        accum.volume(tmp_out, volumeLR[0]);
21768ffa200de7c4662c088851a328923be715c6c24Andy Hung    } else if (CHANNELS == 2) {
218d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TO l = 0;
219d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TO r = 0;
220d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        for (size_t i = 0; i < count; ++i) {
22168ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
222d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsP++;
223d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sP -= CHANNELS;
22468ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
225d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsN++;
226d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sN += CHANNELS;
227d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        }
228d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[0] += volumeAdjust(l, volumeLR[0]);
229d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[1] += volumeAdjust(r, volumeLR[1]);
230d549139155b20d7cbf6a4326133e06def465ef54Andy Hung    } else { /* CHANNELS == 1 */
231d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        TO l = 0;
232d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        for (size_t i = 0; i < count; ++i) {
23368ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP);
234d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsP++;
235d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sP -= CHANNELS;
23668ffa200de7c4662c088851a328923be715c6c24Andy Hung            mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN);
237d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            coefsN++;
238d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            sN += CHANNELS;
239d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        }
240d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[0] += volumeAdjust(l, volumeLR[0]);
241d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        out[1] += volumeAdjust(l, volumeLR[1]);
24286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
24386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
24486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
24568ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO>
24668ffa200de7c4662c088851a328923be715c6c24Andy Hungstatic inline
24768ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid ProcessL(TO* const out,
24868ffa200de7c4662c088851a328923be715c6c24Andy Hung        int count,
24968ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsP,
25068ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsN,
25168ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sP,
25268ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sN,
25368ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TO* const volumeLR)
25468ffa200de7c4662c088851a328923be715c6c24Andy Hung{
25568ffa200de7c4662c088851a328923be715c6c24Andy Hung    ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR);
25668ffa200de7c4662c088851a328923be715c6c24Andy Hung}
25768ffa200de7c4662c088851a328923be715c6c24Andy Hung
25868ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP>
25968ffa200de7c4662c088851a328923be715c6c24Andy Hungstatic inline
26068ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid Process(TO* const out,
26168ffa200de7c4662c088851a328923be715c6c24Andy Hung        int count,
26268ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsP,
26368ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsN,
26468ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsP1 __unused,
26568ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TC* coefsN1 __unused,
26668ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sP,
26768ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TI* sN,
26868ffa200de7c4662c088851a328923be715c6c24Andy Hung        TINTERP lerpP,
26968ffa200de7c4662c088851a328923be715c6c24Andy Hung        const TO* const volumeLR)
27068ffa200de7c4662c088851a328923be715c6c24Andy Hung{
27168ffa200de7c4662c088851a328923be715c6c24Andy Hung    ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR);
27268ffa200de7c4662c088851a328923be715c6c24Andy Hung}
27368ffa200de7c4662c088851a328923be715c6c24Andy Hung
27486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/*
275d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * Calculates a single output frame (two samples) from input sample pointer.
27686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
27786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * This sets up the params for the accelerated Process() and ProcessL()
27886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * functions to do the appropriate dot products.
27986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
280d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * @param out should point to the output buffer with space for at least one output frame.
28186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
282d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * @param phase is the fractional distance between input frames for interpolation:
28386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * phase >= 0  && phase < phaseWrapLimit.  It can be thought of as a rational fraction
28486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * of phase/phaseWrapLimit.
28586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
28686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases
28786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift).
28886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
28986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param coefShift gives the bit alignment of the polyphase index in the phase parameter.
29086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
29186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the
29286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored.
29386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
29486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to
29586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * and including the #polyphases.  Each polyphase of the filter has half-length halfNumCoefs
29686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * (due to symmetry).  The total size of the filter bank in coefficients is
29786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * (#polyphases+1)*halfNumCoefs.
29886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
29986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line).
30086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
30186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The coefs should be attenuated (to compensate for passband ripple)
30286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * if storing back into the native format.
30386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
30486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param samples are unaligned input samples.  The position is in the "middle" of the
30586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * sample array with respect to the FIR filter:
30686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs;
30786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1.
30886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
30986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel,
31086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * expressed as a S32 integer.  A negative value inverts the channel 180 degrees.
31186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The pointer volumeLR should be aligned to a minimum of 8 bytes.
31286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * A typical value for volume is 0x1000 to align to a unity gain output of 20.12.
31386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
31486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where
31586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling.
31686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
31786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The filter polyphase index is given by indexP = phase >> coefShift. Due to
31886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * odd length symmetric filter, the polyphase index of the negative half depends on
31986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * whether interpolation is used.
32086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The fractional siting between the polyphase indices is given by the bits below coefShift:
32286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << 32 - coefShift >> 1;  // for 32 bit unsigned phase multiply
32486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply
32586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * For integer types, this is expressed as:
32786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
32886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << sizeof(phase)*8 - coefShift
32986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *              >> (sizeof(phase)-sizeof(*coefs))*8 + 1;
33086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung *
331d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * For floating point, lerpP is the fractional phase scaled to [0.0, 1.0):
332d549139155b20d7cbf6a4326133e06def465ef54Andy Hung *
333d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * lerpP = (phase << 32 - coefShift) / (1 << 32); // floating point equivalent
33486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */
33586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
336d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<int CHANNELS, bool LOCKED, int STRIDE, typename TC, typename TI, typename TO>
33786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline
338d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid fir(TO* const out,
33986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const uint32_t phase, const uint32_t phaseWrapLimit,
34086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const int coefShift, const int halfNumCoefs, const TC* const coefs,
341d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* const samples, const TO* const volumeLR)
34286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{
34386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // NOTE: be very careful when modifying the code here. register
34486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // pressure is very high and a small change might cause the compiler
34586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // to generate far less efficient code.
34686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    // Always sanity check the result with objdump or test-resample.
34786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
34886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    if (LOCKED) {
34986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // locked polyphase (no interpolation)
35086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // Compute the polyphase filter index on the positive and negative side.
35186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexP = phase >> coefShift;
35286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexN = (phaseWrapLimit - phase) >> coefShift;
35386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP = coefs + indexP*halfNumCoefs;
35486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN = coefs + indexN*halfNumCoefs;
355d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sP = samples;
356d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sN = samples + CHANNELS;
35786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
35886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // dot product filter.
35986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        ProcessL<CHANNELS, STRIDE>(out,
36086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung                halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR);
36186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    } else {
36286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // interpolated polyphase
36386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // Compute the polyphase filter index on the positive and negative side.
36486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexP = phase >> coefShift;
36586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement.
36686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP = coefs + indexP*halfNumCoefs;
36786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN = coefs + indexN*halfNumCoefs;
36886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsP1 = coefsP + halfNumCoefs;
36986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        const TC* coefsN1 = coefsN + halfNumCoefs;
370d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sP = samples;
371d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        const TI* sN = samples + CHANNELS;
37286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
37386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // Interpolation fraction lerpP derived by shifting all the way up and down
37486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // to clear the appropriate bits and align to the appropriate level
37586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // for the integer multiply.  The constants should resolve in compile time.
37686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        //
37786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // The interpolated filter coefficient is derived as follows for the pos/neg half:
37886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        //
37986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP)
38086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP)
38186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
38286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung        // on-the-fly interpolated dot product filter
383d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        if (is_same<TC, float>::value || is_same<TC, double>::value) {
384d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            static const TC scale = 1. / (65536. * 65536.); // scale phase bits to [0.0, 1.0)
385d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            TC lerpP = TC(phase << (sizeof(phase)*8 - coefShift)) * scale;
386d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
387d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            Process<CHANNELS, STRIDE>(out,
388d549139155b20d7cbf6a4326133e06def465ef54Andy Hung                    halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
389d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        } else {
390d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift)
391d549139155b20d7cbf6a4326133e06def465ef54Andy Hung                    >> ((sizeof(phase)-sizeof(*coefs))*8 + 1);
392d549139155b20d7cbf6a4326133e06def465ef54Andy Hung
393d549139155b20d7cbf6a4326133e06def465ef54Andy Hung            Process<CHANNELS, STRIDE>(out,
394d549139155b20d7cbf6a4326133e06def465ef54Andy Hung                    halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR);
395d549139155b20d7cbf6a4326133e06def465ef54Andy Hung        }
39686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung    }
39786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}
39886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
39986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}; // namespace android
40086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung
40186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/
402