186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/* 286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Copyright (C) 2013 The Android Open Source Project 386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Licensed under the Apache License, Version 2.0 (the "License"); 586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * you may not use this file except in compliance with the License. 686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * You may obtain a copy of the License at 786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * http://www.apache.org/licenses/LICENSE-2.0 986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 1086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * Unless required by applicable law or agreed to in writing, software 1186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * distributed under the License is distributed on an "AS IS" BASIS, 1286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * See the License for the specific language governing permissions and 1486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * limitations under the License. 1586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */ 1686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 1786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#ifndef ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H 1886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#define ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H 1986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 2086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungnamespace android { 2186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 2286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung// depends on AudioResamplerFirOps.h 2386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 24d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for input type TI = int16_t input samples */ 25d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC> 2686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline 27d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(int32_t& l, int32_t& r, TC coef, const int16_t* samples) 2886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{ 29d549139155b20d7cbf6a4326133e06def465ef54Andy Hung uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); 30d549139155b20d7cbf6a4326133e06def465ef54Andy Hung l = mulAddRL(1, rl, coef, l); 31d549139155b20d7cbf6a4326133e06def465ef54Andy Hung r = mulAddRL(0, rl, coef, r); 3286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung} 3386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 34d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC> 3586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline 36d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(int32_t& l, TC coef, const int16_t* samples) 3786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{ 38d549139155b20d7cbf6a4326133e06def465ef54Andy Hung l = mulAdd(samples[0], coef, l); 39d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 4086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 41d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for input type TI = float input samples */ 42d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC> 43d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline 44d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(float& l, float& r, TC coef, const float* samples) 45d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{ 46d549139155b20d7cbf6a4326133e06def465ef54Andy Hung l += *samples++ * coef; 4768ffa200de7c4662c088851a328923be715c6c24Andy Hung r += *samples * coef; 48d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 49d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 50d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC> 51d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline 52d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid mac(float& l, TC coef, const float* samples) 53d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{ 5468ffa200de7c4662c088851a328923be715c6c24Andy Hung l += *samples * coef; 55d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 56d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 57d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for output type TO = int32_t output samples */ 58d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline 59d549139155b20d7cbf6a4326133e06def465ef54Andy Hungint32_t volumeAdjust(int32_t value, int32_t volume) 60d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{ 61d549139155b20d7cbf6a4326133e06def465ef54Andy Hung return 2 * mulRL(0, value, volume); // Note: only use top 16b 62d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 63d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 64d549139155b20d7cbf6a4326133e06def465ef54Andy Hung/* variant for output type TO = float output samples */ 65d549139155b20d7cbf6a4326133e06def465ef54Andy Hungstatic inline 66d549139155b20d7cbf6a4326133e06def465ef54Andy Hungfloat volumeAdjust(float value, float volume) 67d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{ 68d549139155b20d7cbf6a4326133e06def465ef54Andy Hung return value * volume; 6986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung} 7086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 7186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/* 7268ffa200de7c4662c088851a328923be715c6c24Andy Hung * Helper template functions for loop unrolling accumulator operations. 7386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 7468ffa200de7c4662c088851a328923be715c6c24Andy Hung * Unrolling the loops achieves about 2x gain. 7568ffa200de7c4662c088851a328923be715c6c24Andy Hung * Using a recursive template rather than an array of TO[] for the accumulator 7668ffa200de7c4662c088851a328923be715c6c24Andy Hung * values is an additional 10-20% gain. 7786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */ 7886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 7968ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate<int CHANNELS, typename TO> 8068ffa200de7c4662c088851a328923be715c6c24Andy Hungclass Accumulator : public Accumulator<CHANNELS-1, TO> // recursive 8186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{ 8268ffa200de7c4662c088851a328923be715c6c24Andy Hungpublic: 8368ffa200de7c4662c088851a328923be715c6c24Andy Hung inline void clear() { 8468ffa200de7c4662c088851a328923be715c6c24Andy Hung value = 0; 8568ffa200de7c4662c088851a328923be715c6c24Andy Hung Accumulator<CHANNELS-1, TO>::clear(); 86d549139155b20d7cbf6a4326133e06def465ef54Andy Hung } 8768ffa200de7c4662c088851a328923be715c6c24Andy Hung template<typename TC, typename TI> 8868ffa200de7c4662c088851a328923be715c6c24Andy Hung inline void acc(TC coef, const TI*& data) { 8968ffa200de7c4662c088851a328923be715c6c24Andy Hung mac(value, coef, data++); 9068ffa200de7c4662c088851a328923be715c6c24Andy Hung Accumulator<CHANNELS-1, TO>::acc(coef, data); 9168ffa200de7c4662c088851a328923be715c6c24Andy Hung } 9268ffa200de7c4662c088851a328923be715c6c24Andy Hung inline void volume(TO*& out, TO gain) { 9368ffa200de7c4662c088851a328923be715c6c24Andy Hung *out++ = volumeAdjust(value, gain); 9468ffa200de7c4662c088851a328923be715c6c24Andy Hung Accumulator<CHANNELS-1, TO>::volume(out, gain); 9568ffa200de7c4662c088851a328923be715c6c24Andy Hung } 9668ffa200de7c4662c088851a328923be715c6c24Andy Hung 9768ffa200de7c4662c088851a328923be715c6c24Andy Hung TO value; // one per recursive inherited base class 9868ffa200de7c4662c088851a328923be715c6c24Andy Hung}; 9968ffa200de7c4662c088851a328923be715c6c24Andy Hung 10068ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate<typename TO> 10168ffa200de7c4662c088851a328923be715c6c24Andy Hungclass Accumulator<0, TO> { 10268ffa200de7c4662c088851a328923be715c6c24Andy Hungpublic: 10368ffa200de7c4662c088851a328923be715c6c24Andy Hung inline void clear() { 10468ffa200de7c4662c088851a328923be715c6c24Andy Hung } 10568ffa200de7c4662c088851a328923be715c6c24Andy Hung template<typename TC, typename TI> 10668ffa200de7c4662c088851a328923be715c6c24Andy Hung inline void acc(TC coef __unused, const TI*& data __unused) { 10768ffa200de7c4662c088851a328923be715c6c24Andy Hung } 10868ffa200de7c4662c088851a328923be715c6c24Andy Hung inline void volume(TO*& out __unused, TO gain __unused) { 10968ffa200de7c4662c088851a328923be715c6c24Andy Hung } 11068ffa200de7c4662c088851a328923be715c6c24Andy Hung}; 11186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 112d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<typename TC, typename TINTERP> 11342b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline 114d549139155b20d7cbf6a4326133e06def465ef54Andy HungTC interpolate(TC coef_0, TC coef_1, TINTERP lerp) 115d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{ 116d549139155b20d7cbf6a4326133e06def465ef54Andy Hung return lerp * (coef_1 - coef_0) + coef_0; 117d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 118d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 11942b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungtemplate<> 12042b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline 12142b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungint16_t interpolate<int16_t, uint32_t>(int16_t coef_0, int16_t coef_1, uint32_t lerp) 12242b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung{ // in some CPU architectures 16b x 16b multiplies are faster. 12342b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung return (static_cast<int16_t>(lerp) * static_cast<int16_t>(coef_1 - coef_0) >> 15) + coef_0; 124d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 125d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 12642b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungtemplate<> 12742b011166ece30969667e0ff9dcf4832568c9c1aAndy Hunginline 12842b011166ece30969667e0ff9dcf4832568c9c1aAndy Hungint32_t interpolate<int32_t, uint32_t>(int32_t coef_0, int32_t coef_1, uint32_t lerp) 129d549139155b20d7cbf6a4326133e06def465ef54Andy Hung{ 13042b011166ece30969667e0ff9dcf4832568c9c1aAndy Hung return (lerp * static_cast<int64_t>(coef_1 - coef_0) >> 31) + coef_0; 131d549139155b20d7cbf6a4326133e06def465ef54Andy Hung} 132d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 13368ffa200de7c4662c088851a328923be715c6c24Andy Hung/* class scope for passing in functions into templates */ 13468ffa200de7c4662c088851a328923be715c6c24Andy Hungstruct InterpCompute { 13568ffa200de7c4662c088851a328923be715c6c24Andy Hung template<typename TC, typename TINTERP> 13668ffa200de7c4662c088851a328923be715c6c24Andy Hung static inline 13768ffa200de7c4662c088851a328923be715c6c24Andy Hung TC interpolatep(TC coef_0, TC coef_1, TINTERP lerp) { 13868ffa200de7c4662c088851a328923be715c6c24Andy Hung return interpolate(coef_0, coef_1, lerp); 13968ffa200de7c4662c088851a328923be715c6c24Andy Hung } 14068ffa200de7c4662c088851a328923be715c6c24Andy Hung 14168ffa200de7c4662c088851a328923be715c6c24Andy Hung template<typename TC, typename TINTERP> 14268ffa200de7c4662c088851a328923be715c6c24Andy Hung static inline 14368ffa200de7c4662c088851a328923be715c6c24Andy Hung TC interpolaten(TC coef_0, TC coef_1, TINTERP lerp) { 14468ffa200de7c4662c088851a328923be715c6c24Andy Hung return interpolate(coef_0, coef_1, lerp); 14568ffa200de7c4662c088851a328923be715c6c24Andy Hung } 14668ffa200de7c4662c088851a328923be715c6c24Andy Hung}; 14768ffa200de7c4662c088851a328923be715c6c24Andy Hung 14868ffa200de7c4662c088851a328923be715c6c24Andy Hungstruct InterpNull { 14968ffa200de7c4662c088851a328923be715c6c24Andy Hung template<typename TC, typename TINTERP> 15068ffa200de7c4662c088851a328923be715c6c24Andy Hung static inline 15168ffa200de7c4662c088851a328923be715c6c24Andy Hung TC interpolatep(TC coef_0, TC coef_1 __unused, TINTERP lerp __unused) { 15268ffa200de7c4662c088851a328923be715c6c24Andy Hung return coef_0; 15368ffa200de7c4662c088851a328923be715c6c24Andy Hung } 15468ffa200de7c4662c088851a328923be715c6c24Andy Hung 15568ffa200de7c4662c088851a328923be715c6c24Andy Hung template<typename TC, typename TINTERP> 15668ffa200de7c4662c088851a328923be715c6c24Andy Hung static inline 15768ffa200de7c4662c088851a328923be715c6c24Andy Hung TC interpolaten(TC coef_0 __unused, TC coef_1, TINTERP lerp __unused) { 15868ffa200de7c4662c088851a328923be715c6c24Andy Hung return coef_1; 15968ffa200de7c4662c088851a328923be715c6c24Andy Hung } 16068ffa200de7c4662c088851a328923be715c6c24Andy Hung}; 16168ffa200de7c4662c088851a328923be715c6c24Andy Hung 16268ffa200de7c4662c088851a328923be715c6c24Andy Hung/* 16368ffa200de7c4662c088851a328923be715c6c24Andy Hung * Calculates a single output frame (two samples). 16468ffa200de7c4662c088851a328923be715c6c24Andy Hung * 16568ffa200de7c4662c088851a328923be715c6c24Andy Hung * The Process*() functions compute both the positive half FIR dot product and 16668ffa200de7c4662c088851a328923be715c6c24Andy Hung * the negative half FIR dot product, accumulates, and then applies the volume. 16768ffa200de7c4662c088851a328923be715c6c24Andy Hung * 16868ffa200de7c4662c088851a328923be715c6c24Andy Hung * Use fir() to compute the proper coefficient pointers for a polyphase 16968ffa200de7c4662c088851a328923be715c6c24Andy Hung * filter bank. 17068ffa200de7c4662c088851a328923be715c6c24Andy Hung * 17168ffa200de7c4662c088851a328923be715c6c24Andy Hung * ProcessBase() is the fundamental processing template function. 17268ffa200de7c4662c088851a328923be715c6c24Andy Hung * 17368ffa200de7c4662c088851a328923be715c6c24Andy Hung * ProcessL() calls ProcessBase() with TFUNC = InterpNull, for fixed/locked phase. 17468ffa200de7c4662c088851a328923be715c6c24Andy Hung * Process() calls ProcessBase() with TFUNC = InterpCompute, for interpolated phase. 17568ffa200de7c4662c088851a328923be715c6c24Andy Hung */ 17668ffa200de7c4662c088851a328923be715c6c24Andy Hung 17768ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TFUNC, typename TC, typename TI, typename TO, typename TINTERP> 17886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline 17968ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid ProcessBase(TO* const out, 180a4daf0b4f934b800a49f199fb8c09409391c8fc0Glenn Kasten size_t count, 18186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsP, 18286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsN, 183d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* sP, 184d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* sN, 185d549139155b20d7cbf6a4326133e06def465ef54Andy Hung TINTERP lerpP, 186d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TO* const volumeLR) 18786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{ 18868ffa200de7c4662c088851a328923be715c6c24Andy Hung COMPILE_TIME_ASSERT_FUNCTION_SCOPE(CHANNELS > 0) 189d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 19068ffa200de7c4662c088851a328923be715c6c24Andy Hung if (CHANNELS > 2) { 19168ffa200de7c4662c088851a328923be715c6c24Andy Hung // TO accum[CHANNELS]; 19268ffa200de7c4662c088851a328923be715c6c24Andy Hung Accumulator<CHANNELS, TO> accum; 19368ffa200de7c4662c088851a328923be715c6c24Andy Hung 19468ffa200de7c4662c088851a328923be715c6c24Andy Hung // for (int j = 0; j < CHANNELS; ++j) accum[j] = 0; 19568ffa200de7c4662c088851a328923be715c6c24Andy Hung accum.clear(); 19668ffa200de7c4662c088851a328923be715c6c24Andy Hung for (size_t i = 0; i < count; ++i) { 19768ffa200de7c4662c088851a328923be715c6c24Andy Hung TC c = TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP); 19868ffa200de7c4662c088851a328923be715c6c24Andy Hung 19968ffa200de7c4662c088851a328923be715c6c24Andy Hung // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sP + j); 20068ffa200de7c4662c088851a328923be715c6c24Andy Hung const TI *tmp_data = sP; // tmp_ptr seems to work better 20168ffa200de7c4662c088851a328923be715c6c24Andy Hung accum.acc(c, tmp_data); 20268ffa200de7c4662c088851a328923be715c6c24Andy Hung 20368ffa200de7c4662c088851a328923be715c6c24Andy Hung coefsP++; 20468ffa200de7c4662c088851a328923be715c6c24Andy Hung sP -= CHANNELS; 20568ffa200de7c4662c088851a328923be715c6c24Andy Hung c = TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP); 20668ffa200de7c4662c088851a328923be715c6c24Andy Hung 20768ffa200de7c4662c088851a328923be715c6c24Andy Hung // for (int j = 0; j < CHANNELS; ++j) mac(accum[j], c, sN + j); 20868ffa200de7c4662c088851a328923be715c6c24Andy Hung tmp_data = sN; // tmp_ptr seems faster than directly using sN 20968ffa200de7c4662c088851a328923be715c6c24Andy Hung accum.acc(c, tmp_data); 21068ffa200de7c4662c088851a328923be715c6c24Andy Hung 21168ffa200de7c4662c088851a328923be715c6c24Andy Hung coefsN++; 21268ffa200de7c4662c088851a328923be715c6c24Andy Hung sN += CHANNELS; 21368ffa200de7c4662c088851a328923be715c6c24Andy Hung } 21468ffa200de7c4662c088851a328923be715c6c24Andy Hung // for (int j = 0; j < CHANNELS; ++j) out[j] += volumeAdjust(accum[j], volumeLR[0]); 21568ffa200de7c4662c088851a328923be715c6c24Andy Hung TO *tmp_out = out; // may remove if const out definition changes. 21668ffa200de7c4662c088851a328923be715c6c24Andy Hung accum.volume(tmp_out, volumeLR[0]); 21768ffa200de7c4662c088851a328923be715c6c24Andy Hung } else if (CHANNELS == 2) { 218d549139155b20d7cbf6a4326133e06def465ef54Andy Hung TO l = 0; 219d549139155b20d7cbf6a4326133e06def465ef54Andy Hung TO r = 0; 220d549139155b20d7cbf6a4326133e06def465ef54Andy Hung for (size_t i = 0; i < count; ++i) { 22168ffa200de7c4662c088851a328923be715c6c24Andy Hung mac(l, r, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); 222d549139155b20d7cbf6a4326133e06def465ef54Andy Hung coefsP++; 223d549139155b20d7cbf6a4326133e06def465ef54Andy Hung sP -= CHANNELS; 22468ffa200de7c4662c088851a328923be715c6c24Andy Hung mac(l, r, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); 225d549139155b20d7cbf6a4326133e06def465ef54Andy Hung coefsN++; 226d549139155b20d7cbf6a4326133e06def465ef54Andy Hung sN += CHANNELS; 227d549139155b20d7cbf6a4326133e06def465ef54Andy Hung } 228d549139155b20d7cbf6a4326133e06def465ef54Andy Hung out[0] += volumeAdjust(l, volumeLR[0]); 229d549139155b20d7cbf6a4326133e06def465ef54Andy Hung out[1] += volumeAdjust(r, volumeLR[1]); 230d549139155b20d7cbf6a4326133e06def465ef54Andy Hung } else { /* CHANNELS == 1 */ 231d549139155b20d7cbf6a4326133e06def465ef54Andy Hung TO l = 0; 232d549139155b20d7cbf6a4326133e06def465ef54Andy Hung for (size_t i = 0; i < count; ++i) { 23368ffa200de7c4662c088851a328923be715c6c24Andy Hung mac(l, TFUNC::interpolatep(coefsP[0], coefsP[count], lerpP), sP); 234d549139155b20d7cbf6a4326133e06def465ef54Andy Hung coefsP++; 235d549139155b20d7cbf6a4326133e06def465ef54Andy Hung sP -= CHANNELS; 23668ffa200de7c4662c088851a328923be715c6c24Andy Hung mac(l, TFUNC::interpolaten(coefsN[count], coefsN[0], lerpP), sN); 237d549139155b20d7cbf6a4326133e06def465ef54Andy Hung coefsN++; 238d549139155b20d7cbf6a4326133e06def465ef54Andy Hung sN += CHANNELS; 239d549139155b20d7cbf6a4326133e06def465ef54Andy Hung } 240d549139155b20d7cbf6a4326133e06def465ef54Andy Hung out[0] += volumeAdjust(l, volumeLR[0]); 241d549139155b20d7cbf6a4326133e06def465ef54Andy Hung out[1] += volumeAdjust(l, volumeLR[1]); 24286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung } 24386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung} 24486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 24568ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO> 24668ffa200de7c4662c088851a328923be715c6c24Andy Hungstatic inline 24768ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid ProcessL(TO* const out, 24868ffa200de7c4662c088851a328923be715c6c24Andy Hung int count, 24968ffa200de7c4662c088851a328923be715c6c24Andy Hung const TC* coefsP, 25068ffa200de7c4662c088851a328923be715c6c24Andy Hung const TC* coefsN, 25168ffa200de7c4662c088851a328923be715c6c24Andy Hung const TI* sP, 25268ffa200de7c4662c088851a328923be715c6c24Andy Hung const TI* sN, 25368ffa200de7c4662c088851a328923be715c6c24Andy Hung const TO* const volumeLR) 25468ffa200de7c4662c088851a328923be715c6c24Andy Hung{ 25568ffa200de7c4662c088851a328923be715c6c24Andy Hung ProcessBase<CHANNELS, STRIDE, InterpNull>(out, count, coefsP, coefsN, sP, sN, 0, volumeLR); 25668ffa200de7c4662c088851a328923be715c6c24Andy Hung} 25768ffa200de7c4662c088851a328923be715c6c24Andy Hung 25868ffa200de7c4662c088851a328923be715c6c24Andy Hungtemplate <int CHANNELS, int STRIDE, typename TC, typename TI, typename TO, typename TINTERP> 25968ffa200de7c4662c088851a328923be715c6c24Andy Hungstatic inline 26068ffa200de7c4662c088851a328923be715c6c24Andy Hungvoid Process(TO* const out, 26168ffa200de7c4662c088851a328923be715c6c24Andy Hung int count, 26268ffa200de7c4662c088851a328923be715c6c24Andy Hung const TC* coefsP, 26368ffa200de7c4662c088851a328923be715c6c24Andy Hung const TC* coefsN, 26468ffa200de7c4662c088851a328923be715c6c24Andy Hung const TC* coefsP1 __unused, 26568ffa200de7c4662c088851a328923be715c6c24Andy Hung const TC* coefsN1 __unused, 26668ffa200de7c4662c088851a328923be715c6c24Andy Hung const TI* sP, 26768ffa200de7c4662c088851a328923be715c6c24Andy Hung const TI* sN, 26868ffa200de7c4662c088851a328923be715c6c24Andy Hung TINTERP lerpP, 26968ffa200de7c4662c088851a328923be715c6c24Andy Hung const TO* const volumeLR) 27068ffa200de7c4662c088851a328923be715c6c24Andy Hung{ 27168ffa200de7c4662c088851a328923be715c6c24Andy Hung ProcessBase<CHANNELS, STRIDE, InterpCompute>(out, count, coefsP, coefsN, sP, sN, lerpP, volumeLR); 27268ffa200de7c4662c088851a328923be715c6c24Andy Hung} 27368ffa200de7c4662c088851a328923be715c6c24Andy Hung 27486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung/* 275d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * Calculates a single output frame (two samples) from input sample pointer. 27686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 27786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * This sets up the params for the accelerated Process() and ProcessL() 27886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * functions to do the appropriate dot products. 27986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 280d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * @param out should point to the output buffer with space for at least one output frame. 28186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 282d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * @param phase is the fractional distance between input frames for interpolation: 28386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * phase >= 0 && phase < phaseWrapLimit. It can be thought of as a rational fraction 28486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * of phase/phaseWrapLimit. 28586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 28686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param phaseWrapLimit is #polyphases<<coefShift, where #polyphases is the number of polyphases 28786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * in the polyphase filter. Likewise, #polyphases can be obtained as (phaseWrapLimit>>coefShift). 28886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 28986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param coefShift gives the bit alignment of the polyphase index in the phase parameter. 29086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 29186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param halfNumCoefs is the half the number of coefficients per polyphase filter. Since the 29286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * overall filterbank is odd-length symmetric, only halfNumCoefs need be stored. 29386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 29486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param coefs is the polyphase filter bank, starting at from polyphase index 0, and ranging to 29586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * and including the #polyphases. Each polyphase of the filter has half-length halfNumCoefs 29686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * (due to symmetry). The total size of the filter bank in coefficients is 29786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * (#polyphases+1)*halfNumCoefs. 29886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 29986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The filter bank coefs should be aligned to a minimum of 16 bytes (preferrably to cache line). 30086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 30186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The coefs should be attenuated (to compensate for passband ripple) 30286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * if storing back into the native format. 30386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 30486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param samples are unaligned input samples. The position is in the "middle" of the 30586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * sample array with respect to the FIR filter: 30686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * the negative half of the filter is dot product from samples+1 to samples+halfNumCoefs; 30786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * the positive half of the filter is dot product from samples to samples-halfNumCoefs+1. 30886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 30986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * @param volumeLR is a pointer to an array of two 32 bit volume values, one per stereo channel, 31086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * expressed as a S32 integer. A negative value inverts the channel 180 degrees. 31186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The pointer volumeLR should be aligned to a minimum of 8 bytes. 31286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * A typical value for volume is 0x1000 to align to a unity gain output of 20.12. 31386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 31486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * In between calls to filterCoefficient, the phase is incremented by phaseIncrement, where 31586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * phaseIncrement is calculated as inputSampling * phaseWrapLimit / outputSampling. 31686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 31786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The filter polyphase index is given by indexP = phase >> coefShift. Due to 31886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * odd length symmetric filter, the polyphase index of the negative half depends on 31986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * whether interpolation is used. 32086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 32186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * The fractional siting between the polyphase indices is given by the bits below coefShift: 32286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 32386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << 32 - coefShift >> 1; // for 32 bit unsigned phase multiply 32486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << 32 - coefShift >> 17; // for 16 bit unsigned phase multiply 32586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 32686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * For integer types, this is expressed as: 32786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 32886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * lerpP = phase << sizeof(phase)*8 - coefShift 32986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * >> (sizeof(phase)-sizeof(*coefs))*8 + 1; 33086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung * 331d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * For floating point, lerpP is the fractional phase scaled to [0.0, 1.0): 332d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * 333d549139155b20d7cbf6a4326133e06def465ef54Andy Hung * lerpP = (phase << 32 - coefShift) / (1 << 32); // floating point equivalent 33486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung */ 33586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 336d549139155b20d7cbf6a4326133e06def465ef54Andy Hungtemplate<int CHANNELS, bool LOCKED, int STRIDE, typename TC, typename TI, typename TO> 33786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hungstatic inline 338d549139155b20d7cbf6a4326133e06def465ef54Andy Hungvoid fir(TO* const out, 33986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const uint32_t phase, const uint32_t phaseWrapLimit, 34086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const int coefShift, const int halfNumCoefs, const TC* const coefs, 341d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* const samples, const TO* const volumeLR) 34286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung{ 34386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // NOTE: be very careful when modifying the code here. register 34486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // pressure is very high and a small change might cause the compiler 34586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // to generate far less efficient code. 34686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // Always sanity check the result with objdump or test-resample. 34786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 34886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung if (LOCKED) { 34986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // locked polyphase (no interpolation) 35086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // Compute the polyphase filter index on the positive and negative side. 35186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung uint32_t indexP = phase >> coefShift; 35286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung uint32_t indexN = (phaseWrapLimit - phase) >> coefShift; 35386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsP = coefs + indexP*halfNumCoefs; 35486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsN = coefs + indexN*halfNumCoefs; 355d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* sP = samples; 356d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* sN = samples + CHANNELS; 35786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 35886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // dot product filter. 35986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung ProcessL<CHANNELS, STRIDE>(out, 36086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung halfNumCoefs, coefsP, coefsN, sP, sN, volumeLR); 36186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung } else { 36286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // interpolated polyphase 36386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // Compute the polyphase filter index on the positive and negative side. 36486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung uint32_t indexP = phase >> coefShift; 36586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung uint32_t indexN = (phaseWrapLimit - phase - 1) >> coefShift; // one's complement. 36686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsP = coefs + indexP*halfNumCoefs; 36786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsN = coefs + indexN*halfNumCoefs; 36886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsP1 = coefsP + halfNumCoefs; 36986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung const TC* coefsN1 = coefsN + halfNumCoefs; 370d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* sP = samples; 371d549139155b20d7cbf6a4326133e06def465ef54Andy Hung const TI* sN = samples + CHANNELS; 37286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 37386eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // Interpolation fraction lerpP derived by shifting all the way up and down 37486eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // to clear the appropriate bits and align to the appropriate level 37586eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // for the integer multiply. The constants should resolve in compile time. 37686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // 37786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // The interpolated filter coefficient is derived as follows for the pos/neg half: 37886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // 37986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // interpolated[P] = index[P]*lerpP + index[P+1]*(1-lerpP) 38086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // interpolated[N] = index[N+1]*lerpP + index[N]*(1-lerpP) 38186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 38286eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung // on-the-fly interpolated dot product filter 383d549139155b20d7cbf6a4326133e06def465ef54Andy Hung if (is_same<TC, float>::value || is_same<TC, double>::value) { 384d549139155b20d7cbf6a4326133e06def465ef54Andy Hung static const TC scale = 1. / (65536. * 65536.); // scale phase bits to [0.0, 1.0) 385d549139155b20d7cbf6a4326133e06def465ef54Andy Hung TC lerpP = TC(phase << (sizeof(phase)*8 - coefShift)) * scale; 386d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 387d549139155b20d7cbf6a4326133e06def465ef54Andy Hung Process<CHANNELS, STRIDE>(out, 388d549139155b20d7cbf6a4326133e06def465ef54Andy Hung halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR); 389d549139155b20d7cbf6a4326133e06def465ef54Andy Hung } else { 390d549139155b20d7cbf6a4326133e06def465ef54Andy Hung uint32_t lerpP = phase << (sizeof(phase)*8 - coefShift) 391d549139155b20d7cbf6a4326133e06def465ef54Andy Hung >> ((sizeof(phase)-sizeof(*coefs))*8 + 1); 392d549139155b20d7cbf6a4326133e06def465ef54Andy Hung 393d549139155b20d7cbf6a4326133e06def465ef54Andy Hung Process<CHANNELS, STRIDE>(out, 394d549139155b20d7cbf6a4326133e06def465ef54Andy Hung halfNumCoefs, coefsP, coefsN, coefsP1, coefsN1, sP, sN, lerpP, volumeLR); 395d549139155b20d7cbf6a4326133e06def465ef54Andy Hung } 39686eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung } 39786eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung} 39886eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 39986eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung}; // namespace android 40086eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung 40186eae0e5931103e040ac2cdd023ef5db252e09f6Andy Hung#endif /*ANDROID_AUDIO_RESAMPLER_FIR_PROCESS_H*/ 402