1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __TMMINTRIN_H
25#define __TMMINTRIN_H
26
27#ifndef __SSSE3__
28#error "SSSE3 instruction set not enabled"
29#else
30
31#include <pmmintrin.h>
32
33static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
34_mm_abs_pi8(__m64 __a)
35{
36    return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
37}
38
39static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
40_mm_abs_epi8(__m128i __a)
41{
42    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
43}
44
45static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
46_mm_abs_pi16(__m64 __a)
47{
48    return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
49}
50
51static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
52_mm_abs_epi16(__m128i __a)
53{
54    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
55}
56
57static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
58_mm_abs_pi32(__m64 __a)
59{
60    return (__m64)__builtin_ia32_pabsd((__v2si)__a);
61}
62
63static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
64_mm_abs_epi32(__m128i __a)
65{
66    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
67}
68
69#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
70  __m128i __a = (a); \
71  __m128i __b = (b); \
72  (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
73
74#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
75  __m64 __a = (a); \
76  __m64 __b = (b); \
77  (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
78
79static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
80_mm_hadd_epi16(__m128i __a, __m128i __b)
81{
82    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
83}
84
85static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
86_mm_hadd_epi32(__m128i __a, __m128i __b)
87{
88    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
89}
90
91static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
92_mm_hadd_pi16(__m64 __a, __m64 __b)
93{
94    return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
95}
96
97static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
98_mm_hadd_pi32(__m64 __a, __m64 __b)
99{
100    return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
101}
102
103static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
104_mm_hadds_epi16(__m128i __a, __m128i __b)
105{
106    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
107}
108
109static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
110_mm_hadds_pi16(__m64 __a, __m64 __b)
111{
112    return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
113}
114
115static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
116_mm_hsub_epi16(__m128i __a, __m128i __b)
117{
118    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
119}
120
121static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
122_mm_hsub_epi32(__m128i __a, __m128i __b)
123{
124    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
125}
126
127static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
128_mm_hsub_pi16(__m64 __a, __m64 __b)
129{
130    return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
131}
132
133static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
134_mm_hsub_pi32(__m64 __a, __m64 __b)
135{
136    return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
137}
138
139static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
140_mm_hsubs_epi16(__m128i __a, __m128i __b)
141{
142    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
143}
144
145static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
146_mm_hsubs_pi16(__m64 __a, __m64 __b)
147{
148    return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
149}
150
151static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
152_mm_maddubs_epi16(__m128i __a, __m128i __b)
153{
154    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
155}
156
157static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
158_mm_maddubs_pi16(__m64 __a, __m64 __b)
159{
160    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
161}
162
163static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
164_mm_mulhrs_epi16(__m128i __a, __m128i __b)
165{
166    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
167}
168
169static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
170_mm_mulhrs_pi16(__m64 __a, __m64 __b)
171{
172    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
173}
174
175static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
176_mm_shuffle_epi8(__m128i __a, __m128i __b)
177{
178    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
179}
180
181static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
182_mm_shuffle_pi8(__m64 __a, __m64 __b)
183{
184    return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
185}
186
187static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
188_mm_sign_epi8(__m128i __a, __m128i __b)
189{
190    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
191}
192
193static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
194_mm_sign_epi16(__m128i __a, __m128i __b)
195{
196    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
197}
198
199static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
200_mm_sign_epi32(__m128i __a, __m128i __b)
201{
202    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
203}
204
205static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
206_mm_sign_pi8(__m64 __a, __m64 __b)
207{
208    return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
209}
210
211static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
212_mm_sign_pi16(__m64 __a, __m64 __b)
213{
214    return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
215}
216
217static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
218_mm_sign_pi32(__m64 __a, __m64 __b)
219{
220    return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
221}
222
223#endif /* __SSSE3__ */
224
225#endif /* __TMMINTRIN_H */
226