110602db812fa270fc923f5e48fb47202288828f3David Rowe/* 210602db812fa270fc923f5e48fb47202288828f3David Rowe * SpanDSP - a series of DSP components for telephony 310602db812fa270fc923f5e48fb47202288828f3David Rowe * 410602db812fa270fc923f5e48fb47202288828f3David Rowe * fir.h - General telephony FIR routines 510602db812fa270fc923f5e48fb47202288828f3David Rowe * 610602db812fa270fc923f5e48fb47202288828f3David Rowe * Written by Steve Underwood <steveu@coppice.org> 710602db812fa270fc923f5e48fb47202288828f3David Rowe * 810602db812fa270fc923f5e48fb47202288828f3David Rowe * Copyright (C) 2002 Steve Underwood 910602db812fa270fc923f5e48fb47202288828f3David Rowe * 1010602db812fa270fc923f5e48fb47202288828f3David Rowe * All rights reserved. 1110602db812fa270fc923f5e48fb47202288828f3David Rowe * 1210602db812fa270fc923f5e48fb47202288828f3David Rowe * This program is free software; you can redistribute it and/or modify 1310602db812fa270fc923f5e48fb47202288828f3David Rowe * it under the terms of the GNU General Public License version 2, as 1410602db812fa270fc923f5e48fb47202288828f3David Rowe * published by the Free Software Foundation. 1510602db812fa270fc923f5e48fb47202288828f3David Rowe * 1610602db812fa270fc923f5e48fb47202288828f3David Rowe * This program is distributed in the hope that it will be useful, 1710602db812fa270fc923f5e48fb47202288828f3David Rowe * but WITHOUT ANY WARRANTY; without even the implied warranty of 1810602db812fa270fc923f5e48fb47202288828f3David Rowe * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1910602db812fa270fc923f5e48fb47202288828f3David Rowe * GNU General Public License for more details. 2010602db812fa270fc923f5e48fb47202288828f3David Rowe * 2110602db812fa270fc923f5e48fb47202288828f3David Rowe * You should have received a copy of the GNU General Public License 2210602db812fa270fc923f5e48fb47202288828f3David Rowe * along with this program; if not, write to the Free Software 2310602db812fa270fc923f5e48fb47202288828f3David Rowe * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 2410602db812fa270fc923f5e48fb47202288828f3David Rowe */ 2510602db812fa270fc923f5e48fb47202288828f3David Rowe 2610602db812fa270fc923f5e48fb47202288828f3David Rowe#if !defined(_FIR_H_) 2710602db812fa270fc923f5e48fb47202288828f3David Rowe#define _FIR_H_ 2810602db812fa270fc923f5e48fb47202288828f3David Rowe 2910602db812fa270fc923f5e48fb47202288828f3David Rowe/* 3010602db812fa270fc923f5e48fb47202288828f3David Rowe Blackfin NOTES & IDEAS: 3110602db812fa270fc923f5e48fb47202288828f3David Rowe 3210602db812fa270fc923f5e48fb47202288828f3David Rowe A simple dot product function is used to implement the filter. This performs 3310602db812fa270fc923f5e48fb47202288828f3David Rowe just one MAC/cycle which is inefficient but was easy to implement as a first 3410602db812fa270fc923f5e48fb47202288828f3David Rowe pass. The current Blackfin code also uses an unrolled form of the filter 3510602db812fa270fc923f5e48fb47202288828f3David Rowe history to avoid 0 length hardware loop issues. This is wasteful of 3610602db812fa270fc923f5e48fb47202288828f3David Rowe memory. 3710602db812fa270fc923f5e48fb47202288828f3David Rowe 3810602db812fa270fc923f5e48fb47202288828f3David Rowe Ideas for improvement: 3910602db812fa270fc923f5e48fb47202288828f3David Rowe 4010602db812fa270fc923f5e48fb47202288828f3David Rowe 1/ Rewrite filter for dual MAC inner loop. The issue here is handling 4110602db812fa270fc923f5e48fb47202288828f3David Rowe history sample offsets that are 16 bit aligned - the dual MAC needs 4210602db812fa270fc923f5e48fb47202288828f3David Rowe 32 bit aligmnent. There are some good examples in libbfdsp. 4310602db812fa270fc923f5e48fb47202288828f3David Rowe 4410602db812fa270fc923f5e48fb47202288828f3David Rowe 2/ Use the hardware circular buffer facility tohalve memory usage. 4510602db812fa270fc923f5e48fb47202288828f3David Rowe 4610602db812fa270fc923f5e48fb47202288828f3David Rowe 3/ Consider using internal memory. 4710602db812fa270fc923f5e48fb47202288828f3David Rowe 4810602db812fa270fc923f5e48fb47202288828f3David Rowe Using less memory might also improve speed as cache misses will be 4910602db812fa270fc923f5e48fb47202288828f3David Rowe reduced. A drop in MIPs and memory approaching 50% should be 5010602db812fa270fc923f5e48fb47202288828f3David Rowe possible. 5110602db812fa270fc923f5e48fb47202288828f3David Rowe 5210602db812fa270fc923f5e48fb47202288828f3David Rowe The foreground and background filters currenlty use a total of 5310602db812fa270fc923f5e48fb47202288828f3David Rowe about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo 5410602db812fa270fc923f5e48fb47202288828f3David Rowe can. 5510602db812fa270fc923f5e48fb47202288828f3David Rowe*/ 5610602db812fa270fc923f5e48fb47202288828f3David Rowe 5756791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman/* 5856791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * 16 bit integer FIR descriptor. This defines the working state for a single 5956791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * instance of an FIR filter using 16 bit integer coefficients. 6056791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman */ 61c82895b8976a93d739373cb61daff72e1b490605J.R. Maurostruct fir16_state_t { 6210602db812fa270fc923f5e48fb47202288828f3David Rowe int taps; 6310602db812fa270fc923f5e48fb47202288828f3David Rowe int curr_pos; 6410602db812fa270fc923f5e48fb47202288828f3David Rowe const int16_t *coeffs; 6510602db812fa270fc923f5e48fb47202288828f3David Rowe int16_t *history; 66c82895b8976a93d739373cb61daff72e1b490605J.R. Mauro}; 6710602db812fa270fc923f5e48fb47202288828f3David Rowe 6856791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman/* 6956791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * 32 bit integer FIR descriptor. This defines the working state for a single 7056791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * instance of an FIR filter using 32 bit integer coefficients, and filtering 7156791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * 16 bit integer data. 7256791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman */ 73c82895b8976a93d739373cb61daff72e1b490605J.R. Maurostruct fir32_state_t { 7410602db812fa270fc923f5e48fb47202288828f3David Rowe int taps; 7510602db812fa270fc923f5e48fb47202288828f3David Rowe int curr_pos; 7610602db812fa270fc923f5e48fb47202288828f3David Rowe const int32_t *coeffs; 7710602db812fa270fc923f5e48fb47202288828f3David Rowe int16_t *history; 78c82895b8976a93d739373cb61daff72e1b490605J.R. Mauro}; 7910602db812fa270fc923f5e48fb47202288828f3David Rowe 8056791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman/* 8156791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * Floating point FIR descriptor. This defines the working state for a single 8256791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * instance of an FIR filter using floating point coefficients and data. 8356791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman */ 84c82895b8976a93d739373cb61daff72e1b490605J.R. Maurostruct fir_float_state_t { 8510602db812fa270fc923f5e48fb47202288828f3David Rowe int taps; 8610602db812fa270fc923f5e48fb47202288828f3David Rowe int curr_pos; 8710602db812fa270fc923f5e48fb47202288828f3David Rowe const float *coeffs; 8810602db812fa270fc923f5e48fb47202288828f3David Rowe float *history; 89c82895b8976a93d739373cb61daff72e1b490605J.R. Mauro}; 9010602db812fa270fc923f5e48fb47202288828f3David Rowe 91dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline const int16_t *fir16_create(struct fir16_state_t *fir, 92dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov const int16_t *coeffs, int taps) 9310602db812fa270fc923f5e48fb47202288828f3David Rowe{ 9410602db812fa270fc923f5e48fb47202288828f3David Rowe fir->taps = taps; 9510602db812fa270fc923f5e48fb47202288828f3David Rowe fir->curr_pos = taps - 1; 9610602db812fa270fc923f5e48fb47202288828f3David Rowe fir->coeffs = coeffs; 97c8b3953c4c267133ed3276d7b052282dca1e2e91Greg Kroah-Hartman#if defined(__bfin__) 984460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); 9910602db812fa270fc923f5e48fb47202288828f3David Rowe#else 100db2af149bd0c798ce599365ee4320dd30dda852cPekka Enberg fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); 10110602db812fa270fc923f5e48fb47202288828f3David Rowe#endif 10210602db812fa270fc923f5e48fb47202288828f3David Rowe return fir->history; 10310602db812fa270fc923f5e48fb47202288828f3David Rowe} 10410602db812fa270fc923f5e48fb47202288828f3David Rowe 105dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir16_flush(struct fir16_state_t *fir) 10610602db812fa270fc923f5e48fb47202288828f3David Rowe{ 107c8b3953c4c267133ed3276d7b052282dca1e2e91Greg Kroah-Hartman#if defined(__bfin__) 1084460a860f728983f685cb23140c241c10dca0d32J.R. Mauro memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); 10910602db812fa270fc923f5e48fb47202288828f3David Rowe#else 1104460a860f728983f685cb23140c241c10dca0d32J.R. Mauro memset(fir->history, 0, fir->taps * sizeof(int16_t)); 11110602db812fa270fc923f5e48fb47202288828f3David Rowe#endif 11210602db812fa270fc923f5e48fb47202288828f3David Rowe} 11310602db812fa270fc923f5e48fb47202288828f3David Rowe 114dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir16_free(struct fir16_state_t *fir) 11510602db812fa270fc923f5e48fb47202288828f3David Rowe{ 116db2af149bd0c798ce599365ee4320dd30dda852cPekka Enberg kfree(fir->history); 11710602db812fa270fc923f5e48fb47202288828f3David Rowe} 11810602db812fa270fc923f5e48fb47202288828f3David Rowe 119f55ccbf6bc5e5e857b15f51d481aa7b1cd993ae0Tzafrir Cohen#ifdef __bfin__ 12010602db812fa270fc923f5e48fb47202288828f3David Rowestatic inline int32_t dot_asm(short *x, short *y, int len) 12110602db812fa270fc923f5e48fb47202288828f3David Rowe{ 1224460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int dot; 1234460a860f728983f685cb23140c241c10dca0d32J.R. Mauro 1244460a860f728983f685cb23140c241c10dca0d32J.R. Mauro len--; 1254460a860f728983f685cb23140c241c10dca0d32J.R. Mauro 1264460a860f728983f685cb23140c241c10dca0d32J.R. Mauro __asm__("I0 = %1;\n\t" 1274460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "I1 = %2;\n\t" 1284460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "A0 = 0;\n\t" 1294460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "R0.L = W[I0++] || R1.L = W[I1++];\n\t" 1304460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "LOOP dot%= LC0 = %3;\n\t" 1314460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "LOOP_BEGIN dot%=;\n\t" 1324460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t" 1334460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "LOOP_END dot%=;\n\t" 1344460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "A0 += R0.L*R1.L (IS);\n\t" 1354460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "R0 = A0;\n\t" 1364460a860f728983f685cb23140c241c10dca0d32J.R. Mauro "%0 = R0;\n\t" 137dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov : "=&d"(dot) 138dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov : "a"(x), "a"(y), "a"(len) 139dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov : "I0", "I1", "A1", "A0", "R0", "R1" 1404460a860f728983f685cb23140c241c10dca0d32J.R. Mauro ); 1414460a860f728983f685cb23140c241c10dca0d32J.R. Mauro 1424460a860f728983f685cb23140c241c10dca0d32J.R. Mauro return dot; 14310602db812fa270fc923f5e48fb47202288828f3David Rowe} 14410602db812fa270fc923f5e48fb47202288828f3David Rowe#endif 14510602db812fa270fc923f5e48fb47202288828f3David Rowe 146dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline int16_t fir16(struct fir16_state_t *fir, int16_t sample) 14710602db812fa270fc923f5e48fb47202288828f3David Rowe{ 1484460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int32_t y; 149c8b3953c4c267133ed3276d7b052282dca1e2e91Greg Kroah-Hartman#if defined(__bfin__) 1504460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->history[fir->curr_pos] = sample; 1514460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->history[fir->curr_pos + fir->taps] = sample; 1524460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos], 1534460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->taps); 15410602db812fa270fc923f5e48fb47202288828f3David Rowe#else 1554460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int i; 1564460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int offset1; 1574460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int offset2; 1584460a860f728983f685cb23140c241c10dca0d32J.R. Mauro 1594460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->history[fir->curr_pos] = sample; 1604460a860f728983f685cb23140c241c10dca0d32J.R. Mauro 1614460a860f728983f685cb23140c241c10dca0d32J.R. Mauro offset2 = fir->curr_pos; 1624460a860f728983f685cb23140c241c10dca0d32J.R. Mauro offset1 = fir->taps - offset2; 1634460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y = 0; 1644460a860f728983f685cb23140c241c10dca0d32J.R. Mauro for (i = fir->taps - 1; i >= offset1; i--) 1654460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y += fir->coeffs[i] * fir->history[i - offset1]; 1664460a860f728983f685cb23140c241c10dca0d32J.R. Mauro for (; i >= 0; i--) 1674460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y += fir->coeffs[i] * fir->history[i + offset2]; 16810602db812fa270fc923f5e48fb47202288828f3David Rowe#endif 1694460a860f728983f685cb23140c241c10dca0d32J.R. Mauro if (fir->curr_pos <= 0) 1704460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->curr_pos = fir->taps; 1714460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->curr_pos--; 1724460a860f728983f685cb23140c241c10dca0d32J.R. Mauro return (int16_t) (y >> 15); 17310602db812fa270fc923f5e48fb47202288828f3David Rowe} 17410602db812fa270fc923f5e48fb47202288828f3David Rowe 175dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline const int16_t *fir32_create(struct fir32_state_t *fir, 176dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov const int32_t *coeffs, int taps) 17710602db812fa270fc923f5e48fb47202288828f3David Rowe{ 1784460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->taps = taps; 1794460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->curr_pos = taps - 1; 1804460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->coeffs = coeffs; 1814460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); 1824460a860f728983f685cb23140c241c10dca0d32J.R. Mauro return fir->history; 18310602db812fa270fc923f5e48fb47202288828f3David Rowe} 18410602db812fa270fc923f5e48fb47202288828f3David Rowe 185dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir32_flush(struct fir32_state_t *fir) 18610602db812fa270fc923f5e48fb47202288828f3David Rowe{ 1874460a860f728983f685cb23140c241c10dca0d32J.R. Mauro memset(fir->history, 0, fir->taps * sizeof(int16_t)); 18810602db812fa270fc923f5e48fb47202288828f3David Rowe} 18910602db812fa270fc923f5e48fb47202288828f3David Rowe 190dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir32_free(struct fir32_state_t *fir) 19110602db812fa270fc923f5e48fb47202288828f3David Rowe{ 1924460a860f728983f685cb23140c241c10dca0d32J.R. Mauro kfree(fir->history); 19310602db812fa270fc923f5e48fb47202288828f3David Rowe} 19410602db812fa270fc923f5e48fb47202288828f3David Rowe 195dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline int16_t fir32(struct fir32_state_t *fir, int16_t sample) 19610602db812fa270fc923f5e48fb47202288828f3David Rowe{ 1974460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int i; 1984460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int32_t y; 1994460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int offset1; 2004460a860f728983f685cb23140c241c10dca0d32J.R. Mauro int offset2; 2014460a860f728983f685cb23140c241c10dca0d32J.R. Mauro 2024460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->history[fir->curr_pos] = sample; 2034460a860f728983f685cb23140c241c10dca0d32J.R. Mauro offset2 = fir->curr_pos; 2044460a860f728983f685cb23140c241c10dca0d32J.R. Mauro offset1 = fir->taps - offset2; 2054460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y = 0; 2064460a860f728983f685cb23140c241c10dca0d32J.R. Mauro for (i = fir->taps - 1; i >= offset1; i--) 2074460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y += fir->coeffs[i] * fir->history[i - offset1]; 2084460a860f728983f685cb23140c241c10dca0d32J.R. Mauro for (; i >= 0; i--) 2094460a860f728983f685cb23140c241c10dca0d32J.R. Mauro y += fir->coeffs[i] * fir->history[i + offset2]; 2104460a860f728983f685cb23140c241c10dca0d32J.R. Mauro if (fir->curr_pos <= 0) 2114460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->curr_pos = fir->taps; 2124460a860f728983f685cb23140c241c10dca0d32J.R. Mauro fir->curr_pos--; 2134460a860f728983f685cb23140c241c10dca0d32J.R. Mauro return (int16_t) (y >> 15); 21410602db812fa270fc923f5e48fb47202288828f3David Rowe} 21510602db812fa270fc923f5e48fb47202288828f3David Rowe 21610602db812fa270fc923f5e48fb47202288828f3David Rowe#endif 217