1/* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6#include <string.h>
7#include "crossover2.h"
8#include "biquad.h"
9
10static void lr42_set(struct lr42 *lr42, enum biquad_type type, float freq)
11{
12	struct biquad q;
13	biquad_set(&q, type, freq, 0, 0);
14	memset(lr42, 0, sizeof(*lr42));
15	lr42->b0 = q.b0;
16	lr42->b1 = q.b1;
17	lr42->b2 = q.b2;
18	lr42->a1 = q.a1;
19	lr42->a2 = q.a2;
20}
21
22/* Split input data using two LR4 filters, put the result into the input array
23 * and another array.
24 *
25 * data0 --+-- lp --> data0
26 *         |
27 *         \-- hp --> data1
28 */
29#if defined(__ARM_NEON__)
30#include <arm_neon.h>
31static void lr42_split(struct lr42 *lp, struct lr42 *hp, int count,
32		       float *data0L, float *data0R,
33		       float *data1L, float *data1R)
34{
35	float32x4_t x1 = {lp->x1L, hp->x1L, lp->x1R, hp->x1R};
36	float32x4_t x2 = {lp->x2L, hp->x2L, lp->x2R, hp->x2R};
37	float32x4_t y1 = {lp->y1L, hp->y1L, lp->y1R, hp->y1R};
38	float32x4_t y2 = {lp->y2L, hp->y2L, lp->y2R, hp->y2R};
39	float32x4_t z1 = {lp->z1L, hp->z1L, lp->z1R, hp->z1R};
40	float32x4_t z2 = {lp->z2L, hp->z2L, lp->z2R, hp->z2R};
41	float32x4_t b0 = {lp->b0, hp->b0, lp->b0, hp->b0};
42	float32x4_t b1 = {lp->b1, hp->b1, lp->b1, hp->b1};
43	float32x4_t b2 = {lp->b2, hp->b2, lp->b2, hp->b2};
44	float32x4_t a1 = {lp->a1, hp->a1, lp->a1, hp->a1};
45	float32x4_t a2 = {lp->a2, hp->a2, lp->a2, hp->a2};
46
47	__asm__ __volatile__(
48		/* q0 = x, q1 = y, q2 = z */
49		"1:                                     \n"
50		"vmul.f32 q1, %q[b1], %q[x1]            \n"
51		"vld1.32 d0[], [%[data0L]]              \n"
52		"vld1.32 d1[], [%[data0R]]              \n"
53		"subs %[count], #1                      \n"
54		"vmul.f32 q2, %q[b1], %q[y1]            \n"
55		"vmla.f32 q1, %q[b0], q0                \n"
56		"vmla.f32 q1, %q[b2], %q[x2]            \n"
57		"vmov.f32 %q[x2], %q[x1]                \n"
58		"vmov.f32 %q[x1], q0                    \n"
59		"vmls.f32 q1, %q[a1], %q[y1]            \n"
60		"vmls.f32 q1, %q[a2], %q[y2]            \n"
61		"vmla.f32 q2, %q[b0], q1                \n"
62		"vmla.f32 q2, %q[b2], %q[y2]            \n"
63		"vmov.f32 %q[y2], %q[y1]                \n"
64		"vmov.f32 %q[y1], q1                    \n"
65		"vmls.f32 q2, %q[a1], %q[z1]            \n"
66		"vmls.f32 q2, %q[a2], %q[z2]            \n"
67		"vmov.f32 %q[z2], %q[z1]                \n"
68		"vmov.f32 %q[z1], q2                    \n"
69		"vst1.f32 d4[0], [%[data0L]]!           \n"
70		"vst1.f32 d4[1], [%[data1L]]!           \n"
71		"vst1.f32 d5[0], [%[data0R]]!           \n"
72		"vst1.f32 d5[1], [%[data1R]]!           \n"
73		"bne 1b                                 \n"
74		: /* output */
75		  "=r"(data0L),
76		  "=r"(data0R),
77		  "=r"(data1L),
78		  "=r"(data1R),
79		  "=r"(count),
80		  [x1]"+w"(x1),
81		  [x2]"+w"(x2),
82		  [y1]"+w"(y1),
83		  [y2]"+w"(y2),
84		  [z1]"+w"(z1),
85		  [z2]"+w"(z2)
86		: /* input */
87		  [data0L]"0"(data0L),
88		  [data0R]"1"(data0R),
89		  [data1L]"2"(data1L),
90		  [data1R]"3"(data1R),
91		  [count]"4"(count),
92		  [b0]"w"(b0),
93		  [b1]"w"(b1),
94		  [b2]"w"(b2),
95		  [a1]"w"(a1),
96		  [a2]"w"(a2)
97		: /* clobber */
98		  "q0", "q1", "q2", "memory", "cc"
99		);
100
101	lp->x1L = x1[0]; lp->x1R = x1[2];
102	lp->x2L = x2[0]; lp->x2R = x2[2];
103	lp->y1L = y1[0]; lp->y1R = y1[2];
104	lp->y2L = y2[0]; lp->y2R = y2[2];
105	lp->z1L = z1[0]; lp->z1R = z1[2];
106	lp->z2L = z2[0]; lp->z2R = z2[2];
107
108	hp->x1L = x1[1]; hp->x1R = x1[3];
109	hp->x2L = x2[1]; hp->x2R = x2[3];
110	hp->y1L = y1[1]; hp->y1R = y1[3];
111	hp->y2L = y2[1]; hp->y2R = y2[3];
112	hp->z1L = z1[1]; hp->z1R = z1[3];
113	hp->z2L = z2[1]; hp->z2R = z2[3];
114}
115#elif defined(__SSE3__) && defined(__x86_64__)
116#include <emmintrin.h>
117static void lr42_split(struct lr42 *lp, struct lr42 *hp, int count,
118		       float *data0L, float *data0R,
119		       float *data1L, float *data1R)
120{
121	__m128 x1 = {lp->x1L, hp->x1L, lp->x1R, hp->x1R};
122	__m128 x2 = {lp->x2L, hp->x2L, lp->x2R, hp->x2R};
123	__m128 y1 = {lp->y1L, hp->y1L, lp->y1R, hp->y1R};
124	__m128 y2 = {lp->y2L, hp->y2L, lp->y2R, hp->y2R};
125	__m128 z1 = {lp->z1L, hp->z1L, lp->z1R, hp->z1R};
126	__m128 z2 = {lp->z2L, hp->z2L, lp->z2R, hp->z2R};
127	__m128 b0 = {lp->b0, hp->b0, lp->b0, hp->b0};
128	__m128 b1 = {lp->b1, hp->b1, lp->b1, hp->b1};
129	__m128 b2 = {lp->b2, hp->b2, lp->b2, hp->b2};
130	__m128 a1 = {lp->a1, hp->a1, lp->a1, hp->a1};
131	__m128 a2 = {lp->a2, hp->a2, lp->a2, hp->a2};
132
133	__asm__ __volatile__(
134		"1:                                     \n"
135		"movss (%[data0L]), %%xmm2              \n"
136		"movss (%[data0R]), %%xmm1              \n"
137		"shufps $0, %%xmm1, %%xmm2              \n"
138		"mulps %[b2],%[x2]                      \n"
139		"movaps %[b0], %%xmm0                   \n"
140		"mulps %[a2],%[z2]                      \n"
141		"movaps %[b1], %%xmm1                   \n"
142		"mulps %%xmm2,%%xmm0                    \n"
143		"mulps %[x1],%%xmm1                     \n"
144		"addps %%xmm1,%%xmm0                    \n"
145		"movaps %[a1],%%xmm1                    \n"
146		"mulps %[y1],%%xmm1                     \n"
147		"addps %[x2],%%xmm0                     \n"
148		"movaps %[b1],%[x2]                     \n"
149		"mulps %[y1],%[x2]                      \n"
150		"subps %%xmm1,%%xmm0                    \n"
151		"movaps %[a2],%%xmm1                    \n"
152		"mulps %[y2],%%xmm1                     \n"
153		"mulps %[b2],%[y2]                      \n"
154		"subps %%xmm1,%%xmm0                    \n"
155		"movaps %[b0],%%xmm1                    \n"
156		"mulps %%xmm0,%%xmm1                    \n"
157		"addps %[x2],%%xmm1                     \n"
158		"movaps %[x1],%[x2]                     \n"
159		"movaps %%xmm2,%[x1]                    \n"
160		"addps %[y2],%%xmm1                     \n"
161		"movaps %[a1],%[y2]                     \n"
162		"mulps %[z1],%[y2]                      \n"
163		"subps %[y2],%%xmm1                     \n"
164		"movaps %[y1],%[y2]                     \n"
165		"movaps %%xmm0,%[y1]                    \n"
166		"subps %[z2],%%xmm1                     \n"
167		"movaps %[z1],%[z2]                     \n"
168		"movaps %%xmm1,%[z1]                    \n"
169		"movss %%xmm1, (%[data0L])              \n"
170		"shufps $0x39, %%xmm1, %%xmm1           \n"
171		"movss %%xmm1, (%[data1L])              \n"
172		"shufps $0x39, %%xmm1, %%xmm1           \n"
173		"movss %%xmm1, (%[data0R])              \n"
174		"shufps $0x39, %%xmm1, %%xmm1           \n"
175		"movss %%xmm1, (%[data1R])              \n"
176		"add $4, %[data0L]                      \n"
177		"add $4, %[data1L]                      \n"
178		"add $4, %[data0R]                      \n"
179		"add $4, %[data1R]                      \n"
180		"sub $1, %[count]                       \n"
181		"jnz 1b                                 \n"
182		: /* output */
183		  [data0L]"+r"(data0L),
184		  [data0R]"+r"(data0R),
185		  [data1L]"+r"(data1L),
186		  [data1R]"+r"(data1R),
187		  [count]"+r"(count),
188		  [x1]"+x"(x1),
189		  [x2]"+x"(x2),
190		  [y1]"+x"(y1),
191		  [y2]"+x"(y2),
192		  [z1]"+x"(z1),
193		  [z2]"+x"(z2)
194		: /* input */
195		  [b0]"x"(b0),
196		  [b1]"x"(b1),
197		  [b2]"x"(b2),
198		  [a1]"x"(a1),
199		  [a2]"x"(a2)
200		: /* clobber */
201		  "xmm0", "xmm1", "xmm2", "memory", "cc"
202		);
203
204	lp->x1L = x1[0]; lp->x1R = x1[2];
205	lp->x2L = x2[0]; lp->x2R = x2[2];
206	lp->y1L = y1[0]; lp->y1R = y1[2];
207	lp->y2L = y2[0]; lp->y2R = y2[2];
208	lp->z1L = z1[0]; lp->z1R = z1[2];
209	lp->z2L = z2[0]; lp->z2R = z2[2];
210
211	hp->x1L = x1[1]; hp->x1R = x1[3];
212	hp->x2L = x2[1]; hp->x2R = x2[3];
213	hp->y1L = y1[1]; hp->y1R = y1[3];
214	hp->y2L = y2[1]; hp->y2R = y2[3];
215	hp->z1L = z1[1]; hp->z1R = z1[3];
216	hp->z2L = z2[1]; hp->z2R = z2[3];
217}
218#else
219static void lr42_split(struct lr42 *lp, struct lr42 *hp, int count,
220		       float *data0L, float *data0R,
221		       float *data1L, float *data1R)
222{
223	float lx1L = lp->x1L, lx1R = lp->x1R;
224	float lx2L = lp->x2L, lx2R = lp->x2R;
225	float ly1L = lp->y1L, ly1R = lp->y1R;
226	float ly2L = lp->y2L, ly2R = lp->y2R;
227	float lz1L = lp->z1L, lz1R = lp->z1R;
228	float lz2L = lp->z2L, lz2R = lp->z2R;
229	float lb0 = lp->b0;
230	float lb1 = lp->b1;
231	float lb2 = lp->b2;
232	float la1 = lp->a1;
233	float la2 = lp->a2;
234
235	float hx1L = hp->x1L, hx1R = hp->x1R;
236	float hx2L = hp->x2L, hx2R = hp->x2R;
237	float hy1L = hp->y1L, hy1R = hp->y1R;
238	float hy2L = hp->y2L, hy2R = hp->y2R;
239	float hz1L = hp->z1L, hz1R = hp->z1R;
240	float hz2L = hp->z2L, hz2R = hp->z2R;
241	float hb0 = hp->b0;
242	float hb1 = hp->b1;
243	float hb2 = hp->b2;
244	float ha1 = hp->a1;
245	float ha2 = hp->a2;
246
247	int i;
248	for (i = 0; i < count; i++) {
249		float xL, yL, zL, xR, yR, zR;
250		xL = data0L[i];
251		xR = data0R[i];
252		yL = lb0*xL + lb1*lx1L + lb2*lx2L - la1*ly1L - la2*ly2L;
253		yR = lb0*xR + lb1*lx1R + lb2*lx2R - la1*ly1R - la2*ly2R;
254		zL = lb0*yL + lb1*ly1L + lb2*ly2L - la1*lz1L - la2*lz2L;
255		zR = lb0*yR + lb1*ly1R + lb2*ly2R - la1*lz1R - la2*lz2R;
256		lx2L = lx1L;
257		lx2R = lx1R;
258		lx1L = xL;
259		lx1R = xR;
260		ly2L = ly1L;
261		ly2R = ly1R;
262		ly1L = yL;
263		ly1R = yR;
264		lz2L = lz1L;
265		lz2R = lz1R;
266		lz1L = zL;
267		lz1R = zR;
268		data0L[i] = zL;
269		data0R[i] = zR;
270
271		yL = hb0*xL + hb1*hx1L + hb2*hx2L - ha1*hy1L - ha2*hy2L;
272		yR = hb0*xR + hb1*hx1R + hb2*hx2R - ha1*hy1R - ha2*hy2R;
273		zL = hb0*yL + hb1*hy1L + hb2*hy2L - ha1*hz1L - ha2*hz2L;
274		zR = hb0*yR + hb1*hy1R + hb2*hy2R - ha1*hz1R - ha2*hz2R;
275		hx2L = hx1L;
276		hx2R = hx1R;
277		hx1L = xL;
278		hx1R = xR;
279		hy2L = hy1L;
280		hy2R = hy1R;
281		hy1L = yL;
282		hy1R = yR;
283		hz2L = hz1L;
284		hz2R = hz1R;
285		hz1L = zL;
286		hz1R = zR;
287		data1L[i] = zL;
288		data1R[i] = zR;
289	}
290
291	lp->x1L = lx1L; lp->x1R = lx1R;
292	lp->x2L = lx2L;	lp->x2R = lx2R;
293	lp->y1L = ly1L;	lp->y1R = ly1R;
294	lp->y2L = ly2L;	lp->y2R = ly2R;
295	lp->z1L = lz1L;	lp->z1R = lz1R;
296	lp->z2L = lz2L;	lp->z2R = lz2R;
297
298	hp->x1L = hx1L; hp->x1R = hx1R;
299	hp->x2L = hx2L;	hp->x2R = hx2R;
300	hp->y1L = hy1L;	hp->y1R = hy1R;
301	hp->y2L = hy2L;	hp->y2R = hy2R;
302	hp->z1L = hz1L;	hp->z1R = hz1R;
303	hp->z2L = hz2L;	hp->z2R = hz2R;
304}
305#endif
306
307/* Split input data using two LR4 filters and sum them back to the original
308 * data array.
309 *
310 * data --+-- lp --+--> data
311 *        |        |
312 *        \-- hp --/
313 */
314#if defined(__ARM_NEON__)
315#include <arm_neon.h>
316static void lr42_merge(struct lr42 *lp, struct lr42 *hp, int count,
317		       float *dataL, float *dataR)
318{
319	float32x4_t x1 = {lp->x1L, hp->x1L, lp->x1R, hp->x1R};
320	float32x4_t x2 = {lp->x2L, hp->x2L, lp->x2R, hp->x2R};
321	float32x4_t y1 = {lp->y1L, hp->y1L, lp->y1R, hp->y1R};
322	float32x4_t y2 = {lp->y2L, hp->y2L, lp->y2R, hp->y2R};
323	float32x4_t z1 = {lp->z1L, hp->z1L, lp->z1R, hp->z1R};
324	float32x4_t z2 = {lp->z2L, hp->z2L, lp->z2R, hp->z2R};
325	float32x4_t b0 = {lp->b0, hp->b0, lp->b0, hp->b0};
326	float32x4_t b1 = {lp->b1, hp->b1, lp->b1, hp->b1};
327	float32x4_t b2 = {lp->b2, hp->b2, lp->b2, hp->b2};
328	float32x4_t a1 = {lp->a1, hp->a1, lp->a1, hp->a1};
329	float32x4_t a2 = {lp->a2, hp->a2, lp->a2, hp->a2};
330
331	__asm__ __volatile__(
332		/* q0 = x, q1 = y, q2 = z */
333		"1:                                     \n"
334		"vmul.f32 q1, %q[b1], %q[x1]            \n"
335		"vld1.32 d0[], [%[dataL]]               \n"
336		"vld1.32 d1[], [%[dataR]]               \n"
337		"subs %[count], #1                      \n"
338		"vmul.f32 q2, %q[b1], %q[y1]            \n"
339		"vmla.f32 q1, %q[b0], q0                \n"
340		"vmla.f32 q1, %q[b2], %q[x2]            \n"
341		"vmov.f32 %q[x2], %q[x1]                \n"
342		"vmov.f32 %q[x1], q0                    \n"
343		"vmls.f32 q1, %q[a1], %q[y1]            \n"
344		"vmls.f32 q1, %q[a2], %q[y2]            \n"
345		"vmla.f32 q2, %q[b0], q1                \n"
346		"vmla.f32 q2, %q[b2], %q[y2]            \n"
347		"vmov.f32 %q[y2], %q[y1]                \n"
348		"vmov.f32 %q[y1], q1                    \n"
349		"vmls.f32 q2, %q[a1], %q[z1]            \n"
350		"vmls.f32 q2, %q[a2], %q[z2]            \n"
351		"vmov.f32 %q[z2], %q[z1]                \n"
352		"vmov.f32 %q[z1], q2                    \n"
353		"vpadd.f32 d4, d4, d5                   \n"
354		"vst1.f32 d4[0], [%[dataL]]!            \n"
355		"vst1.f32 d4[1], [%[dataR]]!            \n"
356		"bne 1b                                 \n"
357		: /* output */
358		  "=r"(dataL),
359		  "=r"(dataR),
360		  "=r"(count),
361		  [x1]"+w"(x1),
362		  [x2]"+w"(x2),
363		  [y1]"+w"(y1),
364		  [y2]"+w"(y2),
365		  [z1]"+w"(z1),
366		  [z2]"+w"(z2)
367		: /* input */
368		  [dataL]"0"(dataL),
369		  [dataR]"1"(dataR),
370		  [count]"2"(count),
371		  [b0]"w"(b0),
372		  [b1]"w"(b1),
373		  [b2]"w"(b2),
374		  [a1]"w"(a1),
375		  [a2]"w"(a2)
376		: /* clobber */
377		  "q0", "q1", "q2", "memory", "cc"
378		);
379
380	lp->x1L = x1[0]; lp->x1R = x1[2];
381	lp->x2L = x2[0]; lp->x2R = x2[2];
382	lp->y1L = y1[0]; lp->y1R = y1[2];
383	lp->y2L = y2[0]; lp->y2R = y2[2];
384	lp->z1L = z1[0]; lp->z1R = z1[2];
385	lp->z2L = z2[0]; lp->z2R = z2[2];
386
387	hp->x1L = x1[1]; hp->x1R = x1[3];
388	hp->x2L = x2[1]; hp->x2R = x2[3];
389	hp->y1L = y1[1]; hp->y1R = y1[3];
390	hp->y2L = y2[1]; hp->y2R = y2[3];
391	hp->z1L = z1[1]; hp->z1R = z1[3];
392	hp->z2L = z2[1]; hp->z2R = z2[3];
393}
394#elif defined(__SSE3__) && defined(__x86_64__)
395#include <emmintrin.h>
396static void lr42_merge(struct lr42 *lp, struct lr42 *hp, int count,
397		       float *dataL, float *dataR)
398{
399	__m128 x1 = {lp->x1L, hp->x1L, lp->x1R, hp->x1R};
400	__m128 x2 = {lp->x2L, hp->x2L, lp->x2R, hp->x2R};
401	__m128 y1 = {lp->y1L, hp->y1L, lp->y1R, hp->y1R};
402	__m128 y2 = {lp->y2L, hp->y2L, lp->y2R, hp->y2R};
403	__m128 z1 = {lp->z1L, hp->z1L, lp->z1R, hp->z1R};
404	__m128 z2 = {lp->z2L, hp->z2L, lp->z2R, hp->z2R};
405	__m128 b0 = {lp->b0, hp->b0, lp->b0, hp->b0};
406	__m128 b1 = {lp->b1, hp->b1, lp->b1, hp->b1};
407	__m128 b2 = {lp->b2, hp->b2, lp->b2, hp->b2};
408	__m128 a1 = {lp->a1, hp->a1, lp->a1, hp->a1};
409	__m128 a2 = {lp->a2, hp->a2, lp->a2, hp->a2};
410
411	__asm__ __volatile__(
412		"1:                                     \n"
413		"movss (%[dataL]), %%xmm2               \n"
414		"movss (%[dataR]), %%xmm1               \n"
415		"shufps $0, %%xmm1, %%xmm2              \n"
416		"mulps %[b2],%[x2]                      \n"
417		"movaps %[b0], %%xmm0                   \n"
418		"mulps %[a2],%[z2]                      \n"
419		"movaps %[b1], %%xmm1                   \n"
420		"mulps %%xmm2,%%xmm0                    \n"
421		"mulps %[x1],%%xmm1                     \n"
422		"addps %%xmm1,%%xmm0                    \n"
423		"movaps %[a1],%%xmm1                    \n"
424		"mulps %[y1],%%xmm1                     \n"
425		"addps %[x2],%%xmm0                     \n"
426		"movaps %[b1],%[x2]                     \n"
427		"mulps %[y1],%[x2]                      \n"
428		"subps %%xmm1,%%xmm0                    \n"
429		"movaps %[a2],%%xmm1                    \n"
430		"mulps %[y2],%%xmm1                     \n"
431		"mulps %[b2],%[y2]                      \n"
432		"subps %%xmm1,%%xmm0                    \n"
433		"movaps %[b0],%%xmm1                    \n"
434		"mulps %%xmm0,%%xmm1                    \n"
435		"addps %[x2],%%xmm1                     \n"
436		"movaps %[x1],%[x2]                     \n"
437		"movaps %%xmm2,%[x1]                    \n"
438		"addps %[y2],%%xmm1                     \n"
439		"movaps %[a1],%[y2]                     \n"
440		"mulps %[z1],%[y2]                      \n"
441		"subps %[y2],%%xmm1                     \n"
442		"movaps %[y1],%[y2]                     \n"
443		"movaps %%xmm0,%[y1]                    \n"
444		"subps %[z2],%%xmm1                     \n"
445		"movaps %[z1],%[z2]                     \n"
446		"movaps %%xmm1,%[z1]                    \n"
447		"haddps %%xmm1, %%xmm1                  \n"
448		"movss %%xmm1, (%[dataL])               \n"
449		"shufps $0x39, %%xmm1, %%xmm1           \n"
450		"movss %%xmm1, (%[dataR])               \n"
451		"add $4, %[dataL]                       \n"
452		"add $4, %[dataR]                       \n"
453		"sub $1, %[count]                       \n"
454		"jnz 1b                                 \n"
455		: /* output */
456		  [dataL]"+r"(dataL),
457		  [dataR]"+r"(dataR),
458		  [count]"+r"(count),
459		  [x1]"+x"(x1),
460		  [x2]"+x"(x2),
461		  [y1]"+x"(y1),
462		  [y2]"+x"(y2),
463		  [z1]"+x"(z1),
464		  [z2]"+x"(z2)
465		: /* input */
466		  [b0]"x"(b0),
467		  [b1]"x"(b1),
468		  [b2]"x"(b2),
469		  [a1]"x"(a1),
470		  [a2]"x"(a2)
471		: /* clobber */
472		  "xmm0", "xmm1", "xmm2", "memory", "cc"
473		);
474
475	lp->x1L = x1[0]; lp->x1R = x1[2];
476	lp->x2L = x2[0]; lp->x2R = x2[2];
477	lp->y1L = y1[0]; lp->y1R = y1[2];
478	lp->y2L = y2[0]; lp->y2R = y2[2];
479	lp->z1L = z1[0]; lp->z1R = z1[2];
480	lp->z2L = z2[0]; lp->z2R = z2[2];
481
482	hp->x1L = x1[1]; hp->x1R = x1[3];
483	hp->x2L = x2[1]; hp->x2R = x2[3];
484	hp->y1L = y1[1]; hp->y1R = y1[3];
485	hp->y2L = y2[1]; hp->y2R = y2[3];
486	hp->z1L = z1[1]; hp->z1R = z1[3];
487	hp->z2L = z2[1]; hp->z2R = z2[3];
488}
489#else
490static void lr42_merge(struct lr42 *lp, struct lr42 *hp, int count,
491		       float *dataL, float *dataR)
492{
493	float lx1L = lp->x1L, lx1R = lp->x1R;
494	float lx2L = lp->x2L, lx2R = lp->x2R;
495	float ly1L = lp->y1L, ly1R = lp->y1R;
496	float ly2L = lp->y2L, ly2R = lp->y2R;
497	float lz1L = lp->z1L, lz1R = lp->z1R;
498	float lz2L = lp->z2L, lz2R = lp->z2R;
499	float lb0 = lp->b0;
500	float lb1 = lp->b1;
501	float lb2 = lp->b2;
502	float la1 = lp->a1;
503	float la2 = lp->a2;
504
505	float hx1L = hp->x1L, hx1R = hp->x1R;
506	float hx2L = hp->x2L, hx2R = hp->x2R;
507	float hy1L = hp->y1L, hy1R = hp->y1R;
508	float hy2L = hp->y2L, hy2R = hp->y2R;
509	float hz1L = hp->z1L, hz1R = hp->z1R;
510	float hz2L = hp->z2L, hz2R = hp->z2R;
511	float hb0 = hp->b0;
512	float hb1 = hp->b1;
513	float hb2 = hp->b2;
514	float ha1 = hp->a1;
515	float ha2 = hp->a2;
516
517	int i;
518	for (i = 0; i < count; i++) {
519		float xL, yL, zL, xR, yR, zR;
520		xL = dataL[i];
521		xR = dataR[i];
522		yL = lb0*xL + lb1*lx1L + lb2*lx2L - la1*ly1L - la2*ly2L;
523		yR = lb0*xR + lb1*lx1R + lb2*lx2R - la1*ly1R - la2*ly2R;
524		zL = lb0*yL + lb1*ly1L + lb2*ly2L - la1*lz1L - la2*lz2L;
525		zR = lb0*yR + lb1*ly1R + lb2*ly2R - la1*lz1R - la2*lz2R;
526		lx2L = lx1L;
527		lx2R = lx1R;
528		lx1L = xL;
529		lx1R = xR;
530		ly2L = ly1L;
531		ly2R = ly1R;
532		ly1L = yL;
533		ly1R = yR;
534		lz2L = lz1L;
535		lz2R = lz1R;
536		lz1L = zL;
537		lz1R = zR;
538
539		yL = hb0*xL + hb1*hx1L + hb2*hx2L - ha1*hy1L - ha2*hy2L;
540		yR = hb0*xR + hb1*hx1R + hb2*hx2R - ha1*hy1R - ha2*hy2R;
541		zL = hb0*yL + hb1*hy1L + hb2*hy2L - ha1*hz1L - ha2*hz2L;
542		zR = hb0*yR + hb1*hy1R + hb2*hy2R - ha1*hz1R - ha2*hz2R;
543		hx2L = hx1L;
544		hx2R = hx1R;
545		hx1L = xL;
546		hx1R = xR;
547		hy2L = hy1L;
548		hy2R = hy1R;
549		hy1L = yL;
550		hy1R = yR;
551		hz2L = hz1L;
552		hz2R = hz1R;
553		hz1L = zL;
554		hz1R = zR;
555		dataL[i] = zL + lz1L;
556		dataR[i] = zR + lz1R;
557	}
558
559	lp->x1L = lx1L; lp->x1R = lx1R;
560	lp->x2L = lx2L;	lp->x2R = lx2R;
561	lp->y1L = ly1L;	lp->y1R = ly1R;
562	lp->y2L = ly2L;	lp->y2R = ly2R;
563	lp->z1L = lz1L;	lp->z1R = lz1R;
564	lp->z2L = lz2L;	lp->z2R = lz2R;
565
566	hp->x1L = hx1L; hp->x1R = hx1R;
567	hp->x2L = hx2L;	hp->x2R = hx2R;
568	hp->y1L = hy1L;	hp->y1R = hy1R;
569	hp->y2L = hy2L;	hp->y2R = hy2R;
570	hp->z1L = hz1L;	hp->z1R = hz1R;
571	hp->z2L = hz2L;	hp->z2R = hz2R;
572}
573#endif
574
575void crossover2_init(struct crossover2 *xo2, float freq1, float freq2)
576{
577	int i;
578	for (i = 0; i < 3; i++) {
579		float f = (i == 0) ? freq1 : freq2;
580		lr42_set(&xo2->lp[i], BQ_LOWPASS, f);
581		lr42_set(&xo2->hp[i], BQ_HIGHPASS, f);
582	}
583}
584
585void crossover2_process(struct crossover2 *xo2, int count,
586			float *data0L, float *data0R,
587			float *data1L, float *data1R,
588			float *data2L, float *data2R)
589{
590	if (!count)
591		return;
592
593	lr42_split(&xo2->lp[0], &xo2->hp[0], count, data0L, data0R,
594		   data1L, data1R);
595	lr42_merge(&xo2->lp[1], &xo2->hp[1], count, data0L, data0R);
596	lr42_split(&xo2->lp[2], &xo2->hp[2], count, data1L, data1R,
597		   data2L, data2R);
598}
599