110602db812fa270fc923f5e48fb47202288828f3David Rowe/*
210602db812fa270fc923f5e48fb47202288828f3David Rowe * SpanDSP - a series of DSP components for telephony
310602db812fa270fc923f5e48fb47202288828f3David Rowe *
410602db812fa270fc923f5e48fb47202288828f3David Rowe * fir.h - General telephony FIR routines
510602db812fa270fc923f5e48fb47202288828f3David Rowe *
610602db812fa270fc923f5e48fb47202288828f3David Rowe * Written by Steve Underwood <steveu@coppice.org>
710602db812fa270fc923f5e48fb47202288828f3David Rowe *
810602db812fa270fc923f5e48fb47202288828f3David Rowe * Copyright (C) 2002 Steve Underwood
910602db812fa270fc923f5e48fb47202288828f3David Rowe *
1010602db812fa270fc923f5e48fb47202288828f3David Rowe * All rights reserved.
1110602db812fa270fc923f5e48fb47202288828f3David Rowe *
1210602db812fa270fc923f5e48fb47202288828f3David Rowe * This program is free software; you can redistribute it and/or modify
1310602db812fa270fc923f5e48fb47202288828f3David Rowe * it under the terms of the GNU General Public License version 2, as
1410602db812fa270fc923f5e48fb47202288828f3David Rowe * published by the Free Software Foundation.
1510602db812fa270fc923f5e48fb47202288828f3David Rowe *
1610602db812fa270fc923f5e48fb47202288828f3David Rowe * This program is distributed in the hope that it will be useful,
1710602db812fa270fc923f5e48fb47202288828f3David Rowe * but WITHOUT ANY WARRANTY; without even the implied warranty of
1810602db812fa270fc923f5e48fb47202288828f3David Rowe * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1910602db812fa270fc923f5e48fb47202288828f3David Rowe * GNU General Public License for more details.
2010602db812fa270fc923f5e48fb47202288828f3David Rowe *
2110602db812fa270fc923f5e48fb47202288828f3David Rowe * You should have received a copy of the GNU General Public License
2210602db812fa270fc923f5e48fb47202288828f3David Rowe * along with this program; if not, write to the Free Software
2310602db812fa270fc923f5e48fb47202288828f3David Rowe * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
2410602db812fa270fc923f5e48fb47202288828f3David Rowe */
2510602db812fa270fc923f5e48fb47202288828f3David Rowe
2610602db812fa270fc923f5e48fb47202288828f3David Rowe#if !defined(_FIR_H_)
2710602db812fa270fc923f5e48fb47202288828f3David Rowe#define _FIR_H_
2810602db812fa270fc923f5e48fb47202288828f3David Rowe
2910602db812fa270fc923f5e48fb47202288828f3David Rowe/*
3010602db812fa270fc923f5e48fb47202288828f3David Rowe   Blackfin NOTES & IDEAS:
3110602db812fa270fc923f5e48fb47202288828f3David Rowe
3210602db812fa270fc923f5e48fb47202288828f3David Rowe   A simple dot product function is used to implement the filter.  This performs
3310602db812fa270fc923f5e48fb47202288828f3David Rowe   just one MAC/cycle which is inefficient but was easy to implement as a first
3410602db812fa270fc923f5e48fb47202288828f3David Rowe   pass.  The current Blackfin code also uses an unrolled form of the filter
3510602db812fa270fc923f5e48fb47202288828f3David Rowe   history to avoid 0 length hardware loop issues.  This is wasteful of
3610602db812fa270fc923f5e48fb47202288828f3David Rowe   memory.
3710602db812fa270fc923f5e48fb47202288828f3David Rowe
3810602db812fa270fc923f5e48fb47202288828f3David Rowe   Ideas for improvement:
3910602db812fa270fc923f5e48fb47202288828f3David Rowe
4010602db812fa270fc923f5e48fb47202288828f3David Rowe   1/ Rewrite filter for dual MAC inner loop.  The issue here is handling
4110602db812fa270fc923f5e48fb47202288828f3David Rowe   history sample offsets that are 16 bit aligned - the dual MAC needs
4210602db812fa270fc923f5e48fb47202288828f3David Rowe   32 bit aligmnent.  There are some good examples in libbfdsp.
4310602db812fa270fc923f5e48fb47202288828f3David Rowe
4410602db812fa270fc923f5e48fb47202288828f3David Rowe   2/ Use the hardware circular buffer facility tohalve memory usage.
4510602db812fa270fc923f5e48fb47202288828f3David Rowe
4610602db812fa270fc923f5e48fb47202288828f3David Rowe   3/ Consider using internal memory.
4710602db812fa270fc923f5e48fb47202288828f3David Rowe
4810602db812fa270fc923f5e48fb47202288828f3David Rowe   Using less memory might also improve speed as cache misses will be
4910602db812fa270fc923f5e48fb47202288828f3David Rowe   reduced. A drop in MIPs and memory approaching 50% should be
5010602db812fa270fc923f5e48fb47202288828f3David Rowe   possible.
5110602db812fa270fc923f5e48fb47202288828f3David Rowe
5210602db812fa270fc923f5e48fb47202288828f3David Rowe   The foreground and background filters currenlty use a total of
5310602db812fa270fc923f5e48fb47202288828f3David Rowe   about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
5410602db812fa270fc923f5e48fb47202288828f3David Rowe   can.
5510602db812fa270fc923f5e48fb47202288828f3David Rowe*/
5610602db812fa270fc923f5e48fb47202288828f3David Rowe
5756791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman/*
5856791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * 16 bit integer FIR descriptor. This defines the working state for a single
5956791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * instance of an FIR filter using 16 bit integer coefficients.
6056791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman */
61c82895b8976a93d739373cb61daff72e1b490605J.R. Maurostruct fir16_state_t {
6210602db812fa270fc923f5e48fb47202288828f3David Rowe	int taps;
6310602db812fa270fc923f5e48fb47202288828f3David Rowe	int curr_pos;
6410602db812fa270fc923f5e48fb47202288828f3David Rowe	const int16_t *coeffs;
6510602db812fa270fc923f5e48fb47202288828f3David Rowe	int16_t *history;
66c82895b8976a93d739373cb61daff72e1b490605J.R. Mauro};
6710602db812fa270fc923f5e48fb47202288828f3David Rowe
6856791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman/*
6956791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * 32 bit integer FIR descriptor. This defines the working state for a single
7056791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * instance of an FIR filter using 32 bit integer coefficients, and filtering
7156791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * 16 bit integer data.
7256791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman */
73c82895b8976a93d739373cb61daff72e1b490605J.R. Maurostruct fir32_state_t {
7410602db812fa270fc923f5e48fb47202288828f3David Rowe	int taps;
7510602db812fa270fc923f5e48fb47202288828f3David Rowe	int curr_pos;
7610602db812fa270fc923f5e48fb47202288828f3David Rowe	const int32_t *coeffs;
7710602db812fa270fc923f5e48fb47202288828f3David Rowe	int16_t *history;
78c82895b8976a93d739373cb61daff72e1b490605J.R. Mauro};
7910602db812fa270fc923f5e48fb47202288828f3David Rowe
8056791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman/*
8156791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * Floating point FIR descriptor. This defines the working state for a single
8256791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman * instance of an FIR filter using floating point coefficients and data.
8356791f0a85382936d3922ecd05b4eedbfb53d2a6Greg Kroah-Hartman */
84c82895b8976a93d739373cb61daff72e1b490605J.R. Maurostruct fir_float_state_t {
8510602db812fa270fc923f5e48fb47202288828f3David Rowe	int taps;
8610602db812fa270fc923f5e48fb47202288828f3David Rowe	int curr_pos;
8710602db812fa270fc923f5e48fb47202288828f3David Rowe	const float *coeffs;
8810602db812fa270fc923f5e48fb47202288828f3David Rowe	float *history;
89c82895b8976a93d739373cb61daff72e1b490605J.R. Mauro};
9010602db812fa270fc923f5e48fb47202288828f3David Rowe
91dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline const int16_t *fir16_create(struct fir16_state_t *fir,
92dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov					      const int16_t *coeffs, int taps)
9310602db812fa270fc923f5e48fb47202288828f3David Rowe{
9410602db812fa270fc923f5e48fb47202288828f3David Rowe	fir->taps = taps;
9510602db812fa270fc923f5e48fb47202288828f3David Rowe	fir->curr_pos = taps - 1;
9610602db812fa270fc923f5e48fb47202288828f3David Rowe	fir->coeffs = coeffs;
97c8b3953c4c267133ed3276d7b052282dca1e2e91Greg Kroah-Hartman#if defined(__bfin__)
984460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL);
9910602db812fa270fc923f5e48fb47202288828f3David Rowe#else
100db2af149bd0c798ce599365ee4320dd30dda852cPekka Enberg	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
10110602db812fa270fc923f5e48fb47202288828f3David Rowe#endif
10210602db812fa270fc923f5e48fb47202288828f3David Rowe	return fir->history;
10310602db812fa270fc923f5e48fb47202288828f3David Rowe}
10410602db812fa270fc923f5e48fb47202288828f3David Rowe
105dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir16_flush(struct fir16_state_t *fir)
10610602db812fa270fc923f5e48fb47202288828f3David Rowe{
107c8b3953c4c267133ed3276d7b052282dca1e2e91Greg Kroah-Hartman#if defined(__bfin__)
1084460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t));
10910602db812fa270fc923f5e48fb47202288828f3David Rowe#else
1104460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	memset(fir->history, 0, fir->taps * sizeof(int16_t));
11110602db812fa270fc923f5e48fb47202288828f3David Rowe#endif
11210602db812fa270fc923f5e48fb47202288828f3David Rowe}
11310602db812fa270fc923f5e48fb47202288828f3David Rowe
114dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir16_free(struct fir16_state_t *fir)
11510602db812fa270fc923f5e48fb47202288828f3David Rowe{
116db2af149bd0c798ce599365ee4320dd30dda852cPekka Enberg	kfree(fir->history);
11710602db812fa270fc923f5e48fb47202288828f3David Rowe}
11810602db812fa270fc923f5e48fb47202288828f3David Rowe
119f55ccbf6bc5e5e857b15f51d481aa7b1cd993ae0Tzafrir Cohen#ifdef __bfin__
12010602db812fa270fc923f5e48fb47202288828f3David Rowestatic inline int32_t dot_asm(short *x, short *y, int len)
12110602db812fa270fc923f5e48fb47202288828f3David Rowe{
1224460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int dot;
1234460a860f728983f685cb23140c241c10dca0d32J.R. Mauro
1244460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	len--;
1254460a860f728983f685cb23140c241c10dca0d32J.R. Mauro
1264460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	__asm__("I0 = %1;\n\t"
1274460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"I1 = %2;\n\t"
1284460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"A0 = 0;\n\t"
1294460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
1304460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"LOOP dot%= LC0 = %3;\n\t"
1314460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"LOOP_BEGIN dot%=;\n\t"
1324460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
1334460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"LOOP_END dot%=;\n\t"
1344460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"A0 += R0.L*R1.L (IS);\n\t"
1354460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"R0 = A0;\n\t"
1364460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		"%0 = R0;\n\t"
137dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov		: "=&d"(dot)
138dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov		: "a"(x), "a"(y), "a"(len)
139dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov		: "I0", "I1", "A1", "A0", "R0", "R1"
1404460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	);
1414460a860f728983f685cb23140c241c10dca0d32J.R. Mauro
1424460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	return dot;
14310602db812fa270fc923f5e48fb47202288828f3David Rowe}
14410602db812fa270fc923f5e48fb47202288828f3David Rowe#endif
14510602db812fa270fc923f5e48fb47202288828f3David Rowe
146dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline int16_t fir16(struct fir16_state_t *fir, int16_t sample)
14710602db812fa270fc923f5e48fb47202288828f3David Rowe{
1484460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int32_t y;
149c8b3953c4c267133ed3276d7b052282dca1e2e91Greg Kroah-Hartman#if defined(__bfin__)
1504460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->history[fir->curr_pos] = sample;
1514460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->history[fir->curr_pos + fir->taps] = sample;
1524460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	y = dot_asm((int16_t *) fir->coeffs, &fir->history[fir->curr_pos],
1534460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		    fir->taps);
15410602db812fa270fc923f5e48fb47202288828f3David Rowe#else
1554460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int i;
1564460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int offset1;
1574460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int offset2;
1584460a860f728983f685cb23140c241c10dca0d32J.R. Mauro
1594460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->history[fir->curr_pos] = sample;
1604460a860f728983f685cb23140c241c10dca0d32J.R. Mauro
1614460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	offset2 = fir->curr_pos;
1624460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	offset1 = fir->taps - offset2;
1634460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	y = 0;
1644460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	for (i = fir->taps - 1; i >= offset1; i--)
1654460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		y += fir->coeffs[i] * fir->history[i - offset1];
1664460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	for (; i >= 0; i--)
1674460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		y += fir->coeffs[i] * fir->history[i + offset2];
16810602db812fa270fc923f5e48fb47202288828f3David Rowe#endif
1694460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	if (fir->curr_pos <= 0)
1704460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		fir->curr_pos = fir->taps;
1714460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->curr_pos--;
1724460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	return (int16_t) (y >> 15);
17310602db812fa270fc923f5e48fb47202288828f3David Rowe}
17410602db812fa270fc923f5e48fb47202288828f3David Rowe
175dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline const int16_t *fir32_create(struct fir32_state_t *fir,
176dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalov					      const int32_t *coeffs, int taps)
17710602db812fa270fc923f5e48fb47202288828f3David Rowe{
1784460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->taps = taps;
1794460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->curr_pos = taps - 1;
1804460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->coeffs = coeffs;
1814460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL);
1824460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	return fir->history;
18310602db812fa270fc923f5e48fb47202288828f3David Rowe}
18410602db812fa270fc923f5e48fb47202288828f3David Rowe
185dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir32_flush(struct fir32_state_t *fir)
18610602db812fa270fc923f5e48fb47202288828f3David Rowe{
1874460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	memset(fir->history, 0, fir->taps * sizeof(int16_t));
18810602db812fa270fc923f5e48fb47202288828f3David Rowe}
18910602db812fa270fc923f5e48fb47202288828f3David Rowe
190dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline void fir32_free(struct fir32_state_t *fir)
19110602db812fa270fc923f5e48fb47202288828f3David Rowe{
1924460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	kfree(fir->history);
19310602db812fa270fc923f5e48fb47202288828f3David Rowe}
19410602db812fa270fc923f5e48fb47202288828f3David Rowe
195dc57a3ea80a85a74094b873a693c3e07b2cb5da4Alexander Beregalovstatic inline int16_t fir32(struct fir32_state_t *fir, int16_t sample)
19610602db812fa270fc923f5e48fb47202288828f3David Rowe{
1974460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int i;
1984460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int32_t y;
1994460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int offset1;
2004460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	int offset2;
2014460a860f728983f685cb23140c241c10dca0d32J.R. Mauro
2024460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->history[fir->curr_pos] = sample;
2034460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	offset2 = fir->curr_pos;
2044460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	offset1 = fir->taps - offset2;
2054460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	y = 0;
2064460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	for (i = fir->taps - 1; i >= offset1; i--)
2074460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		y += fir->coeffs[i] * fir->history[i - offset1];
2084460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	for (; i >= 0; i--)
2094460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		y += fir->coeffs[i] * fir->history[i + offset2];
2104460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	if (fir->curr_pos <= 0)
2114460a860f728983f685cb23140c241c10dca0d32J.R. Mauro		fir->curr_pos = fir->taps;
2124460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	fir->curr_pos--;
2134460a860f728983f685cb23140c241c10dca0d32J.R. Mauro	return (int16_t) (y >> 15);
21410602db812fa270fc923f5e48fb47202288828f3David Rowe}
21510602db812fa270fc923f5e48fb47202288828f3David Rowe
21610602db812fa270fc923f5e48fb47202288828f3David Rowe#endif
217