avx512bwintrin.h revision b6d6993e6e6d3daf4d9876794254d20a134e37c2
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512BWINTRIN_H
29#define __AVX512BWINTRIN_H
30
31typedef unsigned int __mmask32;
32typedef unsigned long long __mmask64;
33typedef char __v64qi __attribute__ ((__vector_size__ (64)));
34typedef short __v32hi __attribute__ ((__vector_size__ (64)));
35
36static  __inline __v64qi __attribute__ ((__always_inline__, __nodebug__))
37_mm512_setzero_qi (void) {
38  return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
39                       0, 0, 0, 0, 0, 0, 0, 0,
40                       0, 0, 0, 0, 0, 0, 0, 0,
41                       0, 0, 0, 0, 0, 0, 0, 0,
42                       0, 0, 0, 0, 0, 0, 0, 0,
43                       0, 0, 0, 0, 0, 0, 0, 0,
44                       0, 0, 0, 0, 0, 0, 0, 0,
45                       0, 0, 0, 0, 0, 0, 0, 0 };
46}
47
48static  __inline __v32hi __attribute__ ((__always_inline__, __nodebug__))
49_mm512_setzero_hi (void) {
50  return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
51                       0, 0, 0, 0, 0, 0, 0, 0,
52                       0, 0, 0, 0, 0, 0, 0, 0,
53                       0, 0, 0, 0, 0, 0, 0, 0 };
54}
55
56/* Integer compare */
57
58static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
59_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) {
60  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
61                                                   (__mmask64)-1);
62}
63
64static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
65_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
66  return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b,
67                                                   __u);
68}
69
70static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
71_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) {
72  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
73                                                 (__mmask64)-1);
74}
75
76static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
77_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
78  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0,
79                                                 __u);
80}
81
82static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
83_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) {
84  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
85                                                   (__mmask32)-1);
86}
87
88static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
89_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
90  return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b,
91                                                   __u);
92}
93
94static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
95_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) {
96  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
97                                                 (__mmask32)-1);
98}
99
100static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
101_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
102  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0,
103                                                 __u);
104}
105
106static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
107_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) {
108  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
109                                                (__mmask64)-1);
110}
111
112static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
113_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
114  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
115                                                __u);
116}
117
118static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
119_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) {
120  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
121                                                 (__mmask64)-1);
122}
123
124static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
125_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
126  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5,
127                                                 __u);
128}
129
130static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
131_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) {
132  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
133                                                (__mmask32)-1);
134}
135
136static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
137_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
138  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
139                                                __u);
140}
141
142static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
143_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) {
144  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
145                                                 (__mmask32)-1);
146}
147
148static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
149_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
150  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5,
151                                                 __u);
152}
153
154static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
155_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) {
156  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
157                                                   (__mmask64)-1);
158}
159
160static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
161_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
162  return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b,
163                                                   __u);
164}
165
166static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
167_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) {
168  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
169                                                 (__mmask64)-1);
170}
171
172static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
173_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
174  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6,
175                                                 __u);
176}
177
178static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
179_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) {
180  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
181                                                   (__mmask32)-1);
182}
183
184static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
185_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
186  return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b,
187                                                   __u);
188}
189
190static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
191_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) {
192  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
193                                                 (__mmask32)-1);
194}
195
196static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
197_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
198  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6,
199                                                 __u);
200}
201
202static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
203_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) {
204  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
205                                                (__mmask64)-1);
206}
207
208static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
209_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
210  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
211                                                __u);
212}
213
214static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
215_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) {
216  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
217                                                 (__mmask64)-1);
218}
219
220static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
221_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
222  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2,
223                                                 __u);
224}
225
226static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
227_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) {
228  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
229                                                (__mmask32)-1);
230}
231
232static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
233_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
234  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
235                                                __u);
236}
237
238static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
239_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) {
240  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
241                                                 (__mmask32)-1);
242}
243
244static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
245_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
246  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2,
247                                                 __u);
248}
249
250static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
251_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) {
252  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
253                                                (__mmask64)-1);
254}
255
256static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
257_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
258  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
259                                                __u);
260}
261
262static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
263_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) {
264  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
265                                                 (__mmask64)-1);
266}
267
268static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
269_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
270  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1,
271                                                 __u);
272}
273
274static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
275_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) {
276  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
277                                                (__mmask32)-1);
278}
279
280static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
281_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
282  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
283                                                __u);
284}
285
286static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
287_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) {
288  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
289                                                 (__mmask32)-1);
290}
291
292static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
293_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
294  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1,
295                                                 __u);
296}
297
298static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
299_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) {
300  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
301                                                (__mmask64)-1);
302}
303
304static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
305_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
306  return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
307                                                __u);
308}
309
310static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
311_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) {
312  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
313                                                 (__mmask64)-1);
314}
315
316static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__))
317_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) {
318  return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4,
319                                                 __u);
320}
321
322static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
323_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) {
324  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
325                                                (__mmask32)-1);
326}
327
328static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
329_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
330  return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
331                                                __u);
332}
333
334static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
335_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) {
336  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
337                                                 (__mmask32)-1);
338}
339
340static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__))
341_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) {
342  return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4,
343                                                 __u);
344}
345
346static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
347_mm512_add_epi8 (__m512i __A, __m512i __B) {
348  return (__m512i) ((__v64qi) __A + (__v64qi) __B);
349}
350
351static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
352_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
353  return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
354             (__v64qi) __B,
355             (__v64qi) __W,
356             (__mmask64) __U);
357}
358
359static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
360_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
361  return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
362             (__v64qi) __B,
363             (__v64qi)
364             _mm512_setzero_qi (),
365             (__mmask64) __U);
366}
367
368static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
369_mm512_sub_epi8 (__m512i __A, __m512i __B) {
370  return (__m512i) ((__v64qi) __A - (__v64qi) __B);
371}
372
373static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
374_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
375  return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
376             (__v64qi) __B,
377             (__v64qi) __W,
378             (__mmask64) __U);
379}
380
381static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
382_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
383  return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
384             (__v64qi) __B,
385             (__v64qi)
386             _mm512_setzero_qi (),
387             (__mmask64) __U);
388}
389
390static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
391_mm512_add_epi16 (__m512i __A, __m512i __B) {
392  return (__m512i) ((__v32hi) __A + (__v32hi) __B);
393}
394
395static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
396_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
397  return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
398             (__v32hi) __B,
399             (__v32hi) __W,
400             (__mmask32) __U);
401}
402
403static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
404_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
405  return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
406             (__v32hi) __B,
407             (__v32hi)
408             _mm512_setzero_hi (),
409             (__mmask32) __U);
410}
411
412static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
413_mm512_sub_epi16 (__m512i __A, __m512i __B) {
414  return (__m512i) ((__v32hi) __A - (__v32hi) __B);
415}
416
417static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
418_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
419  return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
420             (__v32hi) __B,
421             (__v32hi) __W,
422             (__mmask32) __U);
423}
424
425static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
426_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
427  return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
428             (__v32hi) __B,
429             (__v32hi)
430             _mm512_setzero_hi (),
431             (__mmask32) __U);
432}
433
434static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
435_mm512_mullo_epi16 (__m512i __A, __m512i __B) {
436  return (__m512i) ((__v32hi) __A * (__v32hi) __B);
437}
438
439static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
440_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
441  return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
442              (__v32hi) __B,
443              (__v32hi) __W,
444              (__mmask32) __U);
445}
446
447static __inline__ __m512i __attribute__((__always_inline__, __nodebug__))
448_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
449  return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
450              (__v32hi) __B,
451              (__v32hi)
452              _mm512_setzero_hi (),
453              (__mmask32) __U);
454}
455
456#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
457  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
458                                         (__v64qi)(__m512i)(b), \
459                                         (p), (__mmask64)-1); })
460
461#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \
462  (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
463                                         (__v64qi)(__m512i)(b), \
464                                         (p), (__mmask64)(m)); })
465
466#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \
467  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
468                                          (__v64qi)(__m512i)(b), \
469                                          (p), (__mmask64)-1); })
470
471#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \
472  (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
473                                          (__v64qi)(__m512i)(b), \
474                                          (p), (__mmask64)(m)); })
475
476#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \
477  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
478                                         (__v32hi)(__m512i)(b), \
479                                         (p), (__mmask32)-1); })
480
481#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \
482  (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
483                                         (__v32hi)(__m512i)(b), \
484                                         (p), (__mmask32)(m)); })
485
486#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \
487  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
488                                          (__v32hi)(__m512i)(b), \
489                                          (p), (__mmask32)-1); })
490
491#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \
492  (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
493                                          (__v32hi)(__m512i)(b), \
494                                          (p), (__mmask32)(m)); })
495
496#endif
497