1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512BWINTRIN_H
29#define __AVX512BWINTRIN_H
30
31typedef unsigned int __mmask32;
32typedef unsigned long long __mmask64;
33typedef char __v64qi __attribute__ ((__vector_size__ (64)));
34typedef short __v32hi __attribute__ ((__vector_size__ (64)));
35
36
37/* Integer compare */
38
39static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
40_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
41  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
42                                                   (__mmask64)-1);
43}
44
45static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
46_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
47  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
48                                                   __u);
49}
50
51static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
52_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
53  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
54                                                 (__mmask64)-1);
55}
56
57static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
58_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
59  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
60                                                 __u);
61}
62
63static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
64_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
65  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
66                                                   (__mmask32)-1);
67}
68
69static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
70_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
71  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
72                                                   __u);
73}
74
75static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
76_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
77  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
78                                                 (__mmask32)-1);
79}
80
81static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
82_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
83  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
84                                                 __u);
85}
86
87static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
88_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
89  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
90                                                (__mmask64)-1);
91}
92
93static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
94_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
95  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
96                                                __u);
97}
98
99static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
100_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
101  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
102                                                 (__mmask64)-1);
103}
104
105static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
106_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
107  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
108                                                 __u);
109}
110
111static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
112_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
113  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
114                                                (__mmask32)-1);
115}
116
117static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
118_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
119  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
120                                                __u);
121}
122
123static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
124_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
125  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
126                                                 (__mmask32)-1);
127}
128
129static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
130_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
131  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
132                                                 __u);
133}
134
135static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
136_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
137  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
138                                                   (__mmask64)-1);
139}
140
141static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
142_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
143  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
144                                                   __u);
145}
146
147static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
148_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
149  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
150                                                 (__mmask64)-1);
151}
152
153static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
154_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
155  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
156                                                 __u);
157}
158
159static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
160_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
161  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
162                                                   (__mmask32)-1);
163}
164
165static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
166_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
167  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
168                                                   __u);
169}
170
171static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
172_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
173  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
174                                                 (__mmask32)-1);
175}
176
177static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
178_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
179  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
180                                                 __u);
181}
182
183static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
184_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
185  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
186                                                (__mmask64)-1);
187}
188
189static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
190_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
191  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
192                                                __u);
193}
194
195static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
196_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
197  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
198                                                 (__mmask64)-1);
199}
200
201static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
202_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
203  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
204                                                 __u);
205}
206
207static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
208_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
209  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
210                                                (__mmask32)-1);
211}
212
213static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
214_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
215  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
216                                                __u);
217}
218
219static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
220_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
221  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
222                                                 (__mmask32)-1);
223}
224
225static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
226_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
227  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
228                                                 __u);
229}
230
231static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
232_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
233  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
234                                                (__mmask64)-1);
235}
236
237static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
238_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
239  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
240                                                __u);
241}
242
243static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
244_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
245  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
246                                                 (__mmask64)-1);
247}
248
249static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
250_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
251  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
252                                                 __u);
253}
254
255static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
256_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
257  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
258                                                (__mmask32)-1);
259}
260
261static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
262_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
263  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
264                                                __u);
265}
266
267static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
268_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
269  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
270                                                 (__mmask32)-1);
271}
272
273static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
274_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
275  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
276                                                 __u);
277}
278
279static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
280_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
281  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
282                                                (__mmask64)-1);
283}
284
285static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
286_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
287  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
288                                                __u);
289}
290
291static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
292_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
293  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
294                                                 (__mmask64)-1);
295}
296
297static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
298_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
299  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
300                                                 __u);
301}
302
303static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
304_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
305  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
306                                                (__mmask32)-1);
307}
308
309static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
310_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
311  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
312                                                __u);
313}
314
315static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
316_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
317  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
318                                                 (__mmask32)-1);
319}
320
321static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
322_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
323  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
324                                                 __u);
325}
326
327#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
328  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
329                                         (__v64qi)(__m512i)(b), \
330                                         (p), (__mmask64)-1); })
331
332#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
333  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
334                                         (__v64qi)(__m512i)(b), \
335                                         (p), (__mmask64)(m)); })
336
337#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
338  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
339                                          (__v64qi)(__m512i)(b), \
340                                          (p), (__mmask64)-1); })
341
342#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
343  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
344                                          (__v64qi)(__m512i)(b), \
345                                          (p), (__mmask64)(m)); })
346
347#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
348  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
349                                         (__v32hi)(__m512i)(b), \
350                                         (p), (__mmask32)-1); })
351
352#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
353  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
354                                         (__v32hi)(__m512i)(b), \
355                                         (p), (__mmask32)(m)); })
356
357#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
358  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
359                                          (__v32hi)(__m512i)(b), \
360                                          (p), (__mmask32)-1); })
361
362#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
363  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
364                                          (__v32hi)(__m512i)(b), \
365                                          (p), (__mmask32)(m)); })
366
367#endif
368