1/*
2 *
3 *  Bluetooth low-complexity, subband codec (SBC) library
4 *
5 *  Copyright (C) 2004-2009  Marcel Holtmann <marcel@holtmann.org>
6 *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
7 *  Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
8 *
9 *
10 *  This library is free software; you can redistribute it and/or
11 *  modify it under the terms of the GNU Lesser General Public
12 *  License as published by the Free Software Foundation; either
13 *  version 2.1 of the License, or (at your option) any later version.
14 *
15 *  This library is distributed in the hope that it will be useful,
16 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 *  Lesser General Public License for more details.
19 *
20 *  You should have received a copy of the GNU Lesser General Public
21 *  License along with this library; if not, write to the Free Software
22 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23 *
24 */
25
26#include <stdint.h>
27#include <limits.h>
28#include "sbc.h"
29#include "sbc_math.h"
30#include "sbc_tables.h"
31
32#include "sbc_primitives_mmx.h"
33
34/*
35 * MMX optimizations
36 */
37
38#ifdef SBC_BUILD_WITH_MMX_SUPPORT
39
40static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out,
41					const FIXED_T *consts)
42{
43	static const SBC_ALIGNED int32_t round_c[2] = {
44		1 << (SBC_PROTO_FIXED4_SCALE - 1),
45		1 << (SBC_PROTO_FIXED4_SCALE - 1),
46	};
47	asm volatile (
48		"movq        (%0), %%mm0\n"
49		"movq       8(%0), %%mm1\n"
50		"pmaddwd     (%1), %%mm0\n"
51		"pmaddwd    8(%1), %%mm1\n"
52		"paddd       (%2), %%mm0\n"
53		"paddd       (%2), %%mm1\n"
54		"\n"
55		"movq      16(%0), %%mm2\n"
56		"movq      24(%0), %%mm3\n"
57		"pmaddwd   16(%1), %%mm2\n"
58		"pmaddwd   24(%1), %%mm3\n"
59		"paddd      %%mm2, %%mm0\n"
60		"paddd      %%mm3, %%mm1\n"
61		"\n"
62		"movq      32(%0), %%mm2\n"
63		"movq      40(%0), %%mm3\n"
64		"pmaddwd   32(%1), %%mm2\n"
65		"pmaddwd   40(%1), %%mm3\n"
66		"paddd      %%mm2, %%mm0\n"
67		"paddd      %%mm3, %%mm1\n"
68		"\n"
69		"movq      48(%0), %%mm2\n"
70		"movq      56(%0), %%mm3\n"
71		"pmaddwd   48(%1), %%mm2\n"
72		"pmaddwd   56(%1), %%mm3\n"
73		"paddd      %%mm2, %%mm0\n"
74		"paddd      %%mm3, %%mm1\n"
75		"\n"
76		"movq      64(%0), %%mm2\n"
77		"movq      72(%0), %%mm3\n"
78		"pmaddwd   64(%1), %%mm2\n"
79		"pmaddwd   72(%1), %%mm3\n"
80		"paddd      %%mm2, %%mm0\n"
81		"paddd      %%mm3, %%mm1\n"
82		"\n"
83		"psrad         %4, %%mm0\n"
84		"psrad         %4, %%mm1\n"
85		"packssdw   %%mm0, %%mm0\n"
86		"packssdw   %%mm1, %%mm1\n"
87		"\n"
88		"movq       %%mm0, %%mm2\n"
89		"pmaddwd   80(%1), %%mm0\n"
90		"pmaddwd   88(%1), %%mm2\n"
91		"\n"
92		"movq       %%mm1, %%mm3\n"
93		"pmaddwd   96(%1), %%mm1\n"
94		"pmaddwd  104(%1), %%mm3\n"
95		"paddd      %%mm1, %%mm0\n"
96		"paddd      %%mm3, %%mm2\n"
97		"\n"
98		"movq       %%mm0, (%3)\n"
99		"movq       %%mm2, 8(%3)\n"
100		:
101		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
102			"i" (SBC_PROTO_FIXED4_SCALE)
103		: "memory");
104}
105
106static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out,
107							const FIXED_T *consts)
108{
109	static const SBC_ALIGNED int32_t round_c[2] = {
110		1 << (SBC_PROTO_FIXED8_SCALE - 1),
111		1 << (SBC_PROTO_FIXED8_SCALE - 1),
112	};
113	asm volatile (
114		"movq        (%0), %%mm0\n"
115		"movq       8(%0), %%mm1\n"
116		"movq      16(%0), %%mm2\n"
117		"movq      24(%0), %%mm3\n"
118		"pmaddwd     (%1), %%mm0\n"
119		"pmaddwd    8(%1), %%mm1\n"
120		"pmaddwd   16(%1), %%mm2\n"
121		"pmaddwd   24(%1), %%mm3\n"
122		"paddd       (%2), %%mm0\n"
123		"paddd       (%2), %%mm1\n"
124		"paddd       (%2), %%mm2\n"
125		"paddd       (%2), %%mm3\n"
126		"\n"
127		"movq      32(%0), %%mm4\n"
128		"movq      40(%0), %%mm5\n"
129		"movq      48(%0), %%mm6\n"
130		"movq      56(%0), %%mm7\n"
131		"pmaddwd   32(%1), %%mm4\n"
132		"pmaddwd   40(%1), %%mm5\n"
133		"pmaddwd   48(%1), %%mm6\n"
134		"pmaddwd   56(%1), %%mm7\n"
135		"paddd      %%mm4, %%mm0\n"
136		"paddd      %%mm5, %%mm1\n"
137		"paddd      %%mm6, %%mm2\n"
138		"paddd      %%mm7, %%mm3\n"
139		"\n"
140		"movq      64(%0), %%mm4\n"
141		"movq      72(%0), %%mm5\n"
142		"movq      80(%0), %%mm6\n"
143		"movq      88(%0), %%mm7\n"
144		"pmaddwd   64(%1), %%mm4\n"
145		"pmaddwd   72(%1), %%mm5\n"
146		"pmaddwd   80(%1), %%mm6\n"
147		"pmaddwd   88(%1), %%mm7\n"
148		"paddd      %%mm4, %%mm0\n"
149		"paddd      %%mm5, %%mm1\n"
150		"paddd      %%mm6, %%mm2\n"
151		"paddd      %%mm7, %%mm3\n"
152		"\n"
153		"movq      96(%0), %%mm4\n"
154		"movq     104(%0), %%mm5\n"
155		"movq     112(%0), %%mm6\n"
156		"movq     120(%0), %%mm7\n"
157		"pmaddwd   96(%1), %%mm4\n"
158		"pmaddwd  104(%1), %%mm5\n"
159		"pmaddwd  112(%1), %%mm6\n"
160		"pmaddwd  120(%1), %%mm7\n"
161		"paddd      %%mm4, %%mm0\n"
162		"paddd      %%mm5, %%mm1\n"
163		"paddd      %%mm6, %%mm2\n"
164		"paddd      %%mm7, %%mm3\n"
165		"\n"
166		"movq     128(%0), %%mm4\n"
167		"movq     136(%0), %%mm5\n"
168		"movq     144(%0), %%mm6\n"
169		"movq     152(%0), %%mm7\n"
170		"pmaddwd  128(%1), %%mm4\n"
171		"pmaddwd  136(%1), %%mm5\n"
172		"pmaddwd  144(%1), %%mm6\n"
173		"pmaddwd  152(%1), %%mm7\n"
174		"paddd      %%mm4, %%mm0\n"
175		"paddd      %%mm5, %%mm1\n"
176		"paddd      %%mm6, %%mm2\n"
177		"paddd      %%mm7, %%mm3\n"
178		"\n"
179		"psrad         %4, %%mm0\n"
180		"psrad         %4, %%mm1\n"
181		"psrad         %4, %%mm2\n"
182		"psrad         %4, %%mm3\n"
183		"\n"
184		"packssdw   %%mm0, %%mm0\n"
185		"packssdw   %%mm1, %%mm1\n"
186		"packssdw   %%mm2, %%mm2\n"
187		"packssdw   %%mm3, %%mm3\n"
188		"\n"
189		"movq       %%mm0, %%mm4\n"
190		"movq       %%mm0, %%mm5\n"
191		"pmaddwd  160(%1), %%mm4\n"
192		"pmaddwd  168(%1), %%mm5\n"
193		"\n"
194		"movq       %%mm1, %%mm6\n"
195		"movq       %%mm1, %%mm7\n"
196		"pmaddwd  192(%1), %%mm6\n"
197		"pmaddwd  200(%1), %%mm7\n"
198		"paddd      %%mm6, %%mm4\n"
199		"paddd      %%mm7, %%mm5\n"
200		"\n"
201		"movq       %%mm2, %%mm6\n"
202		"movq       %%mm2, %%mm7\n"
203		"pmaddwd  224(%1), %%mm6\n"
204		"pmaddwd  232(%1), %%mm7\n"
205		"paddd      %%mm6, %%mm4\n"
206		"paddd      %%mm7, %%mm5\n"
207		"\n"
208		"movq       %%mm3, %%mm6\n"
209		"movq       %%mm3, %%mm7\n"
210		"pmaddwd  256(%1), %%mm6\n"
211		"pmaddwd  264(%1), %%mm7\n"
212		"paddd      %%mm6, %%mm4\n"
213		"paddd      %%mm7, %%mm5\n"
214		"\n"
215		"movq       %%mm4, (%3)\n"
216		"movq       %%mm5, 8(%3)\n"
217		"\n"
218		"movq       %%mm0, %%mm5\n"
219		"pmaddwd  176(%1), %%mm0\n"
220		"pmaddwd  184(%1), %%mm5\n"
221		"\n"
222		"movq       %%mm1, %%mm7\n"
223		"pmaddwd  208(%1), %%mm1\n"
224		"pmaddwd  216(%1), %%mm7\n"
225		"paddd      %%mm1, %%mm0\n"
226		"paddd      %%mm7, %%mm5\n"
227		"\n"
228		"movq       %%mm2, %%mm7\n"
229		"pmaddwd  240(%1), %%mm2\n"
230		"pmaddwd  248(%1), %%mm7\n"
231		"paddd      %%mm2, %%mm0\n"
232		"paddd      %%mm7, %%mm5\n"
233		"\n"
234		"movq       %%mm3, %%mm7\n"
235		"pmaddwd  272(%1), %%mm3\n"
236		"pmaddwd  280(%1), %%mm7\n"
237		"paddd      %%mm3, %%mm0\n"
238		"paddd      %%mm7, %%mm5\n"
239		"\n"
240		"movq       %%mm0, 16(%3)\n"
241		"movq       %%mm5, 24(%3)\n"
242		:
243		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
244			"i" (SBC_PROTO_FIXED8_SCALE)
245		: "memory");
246}
247
248static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out,
249						int out_stride)
250{
251	/* Analyze blocks */
252	sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd);
253	out += out_stride;
254	sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even);
255	out += out_stride;
256	sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd);
257	out += out_stride;
258	sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even);
259
260	asm volatile ("emms\n");
261}
262
263static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
264						int out_stride)
265{
266	/* Analyze blocks */
267	sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd);
268	out += out_stride;
269	sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even);
270	out += out_stride;
271	sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd);
272	out += out_stride;
273	sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even);
274
275	asm volatile ("emms\n");
276}
277
278static int check_mmx_support(void)
279{
280#ifdef __amd64__
281	return 1; /* We assume that all 64-bit processors have MMX support */
282#else
283	int cpuid_feature_information;
284	asm volatile (
285		/* According to Intel manual, CPUID instruction is supported
286		 * if the value of ID bit (bit 21) in EFLAGS can be modified */
287		"pushf\n"
288		"movl     (%%esp),   %0\n"
289		"xorl     $0x200000, (%%esp)\n" /* try to modify ID bit */
290		"popf\n"
291		"pushf\n"
292		"xorl     (%%esp),   %0\n"      /* check if ID bit changed */
293		"jz       1f\n"
294		"push     %%eax\n"
295		"push     %%ebx\n"
296		"push     %%ecx\n"
297		"mov      $1,        %%eax\n"
298		"cpuid\n"
299		"pop      %%ecx\n"
300		"pop      %%ebx\n"
301		"pop      %%eax\n"
302		"1:\n"
303		"popf\n"
304		: "=d" (cpuid_feature_information)
305		:
306		: "cc");
307    return cpuid_feature_information & (1 << 23);
308#endif
309}
310
311void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
312{
313	if (check_mmx_support()) {
314		state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
315		state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
316		state->implementation_info = "MMX";
317	}
318}
319
320#endif
321