1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLINTRIN_H
29#define __AVX512VLINTRIN_H
30
31#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
32
33/* Doesn't require avx512vl, used in avx512dqintrin.h */
34static  __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
35_mm_setzero_di(void) {
36  return (__m128i)(__v2di){ 0LL, 0LL};
37}
38
39/* Integer compare */
40
41static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
43  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
44                                                  (__mmask8)-1);
45}
46
47static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
49  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
50                                                  __u);
51}
52
53static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
55  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
56                                                (__mmask8)-1);
57}
58
59static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
61  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
62                                                __u);
63}
64
65static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
67  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
68                                                  (__mmask8)-1);
69}
70
71static __inline__ __mmask8 __DEFAULT_FN_ATTRS
72_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
73  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
74                                                  __u);
75}
76
77static __inline__ __mmask8 __DEFAULT_FN_ATTRS
78_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
79  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
80                                                (__mmask8)-1);
81}
82
83static __inline__ __mmask8 __DEFAULT_FN_ATTRS
84_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
85  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
86                                                __u);
87}
88
89static __inline__ __mmask8 __DEFAULT_FN_ATTRS
90_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
91  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
92                                                  (__mmask8)-1);
93}
94
95static __inline__ __mmask8 __DEFAULT_FN_ATTRS
96_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
97  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
98                                                  __u);
99}
100
101static __inline__ __mmask8 __DEFAULT_FN_ATTRS
102_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
103  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
104                                                (__mmask8)-1);
105}
106
107static __inline__ __mmask8 __DEFAULT_FN_ATTRS
108_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
109  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
110                                                __u);
111}
112
113static __inline__ __mmask8 __DEFAULT_FN_ATTRS
114_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
115  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
116                                                  (__mmask8)-1);
117}
118
119static __inline__ __mmask8 __DEFAULT_FN_ATTRS
120_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
121  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
122                                                  __u);
123}
124
125static __inline__ __mmask8 __DEFAULT_FN_ATTRS
126_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
127  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
128                                                (__mmask8)-1);
129}
130
131static __inline__ __mmask8 __DEFAULT_FN_ATTRS
132_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
133  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
134                                                __u);
135}
136
137
138static __inline__ __mmask8 __DEFAULT_FN_ATTRS
139_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
140  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
141                                               (__mmask8)-1);
142}
143
144static __inline__ __mmask8 __DEFAULT_FN_ATTRS
145_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
146  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
147                                               __u);
148}
149
150static __inline__ __mmask8 __DEFAULT_FN_ATTRS
151_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
152  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
153                                                (__mmask8)-1);
154}
155
156static __inline__ __mmask8 __DEFAULT_FN_ATTRS
157_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
158  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
159                                                __u);
160}
161
162static __inline__ __mmask8 __DEFAULT_FN_ATTRS
163_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
164  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
165                                               (__mmask8)-1);
166}
167
168static __inline__ __mmask8 __DEFAULT_FN_ATTRS
169_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
170  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
171                                               __u);
172}
173
174static __inline__ __mmask8 __DEFAULT_FN_ATTRS
175_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
176  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
177                                                (__mmask8)-1);
178}
179
180static __inline__ __mmask8 __DEFAULT_FN_ATTRS
181_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
182  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
183                                                __u);
184}
185
186static __inline__ __mmask8 __DEFAULT_FN_ATTRS
187_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
188  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
189                                               (__mmask8)-1);
190}
191
192static __inline__ __mmask8 __DEFAULT_FN_ATTRS
193_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
194  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
195                                               __u);
196}
197
198static __inline__ __mmask8 __DEFAULT_FN_ATTRS
199_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
200  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
201                                                (__mmask8)-1);
202}
203
204static __inline__ __mmask8 __DEFAULT_FN_ATTRS
205_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
206  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
207                                                __u);
208}
209
210static __inline__ __mmask8 __DEFAULT_FN_ATTRS
211_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
212  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
213                                               (__mmask8)-1);
214}
215
216static __inline__ __mmask8 __DEFAULT_FN_ATTRS
217_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
218  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
219                                               __u);
220}
221
222static __inline__ __mmask8 __DEFAULT_FN_ATTRS
223_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
224  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
225                                                (__mmask8)-1);
226}
227
228static __inline__ __mmask8 __DEFAULT_FN_ATTRS
229_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
230  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
231                                                __u);
232}
233
234static __inline__ __mmask8 __DEFAULT_FN_ATTRS
235_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
236  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
237                                                  (__mmask8)-1);
238}
239
240static __inline__ __mmask8 __DEFAULT_FN_ATTRS
241_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
242  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
243                                                  __u);
244}
245
246static __inline__ __mmask8 __DEFAULT_FN_ATTRS
247_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
248  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
249                                                (__mmask8)-1);
250}
251
252static __inline__ __mmask8 __DEFAULT_FN_ATTRS
253_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
254  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
255                                                __u);
256}
257
258static __inline__ __mmask8 __DEFAULT_FN_ATTRS
259_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
260  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
261                                                  (__mmask8)-1);
262}
263
264static __inline__ __mmask8 __DEFAULT_FN_ATTRS
265_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
266  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
267                                                  __u);
268}
269
270static __inline__ __mmask8 __DEFAULT_FN_ATTRS
271_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
272  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
273                                                (__mmask8)-1);
274}
275
276static __inline__ __mmask8 __DEFAULT_FN_ATTRS
277_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
278  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
279                                                __u);
280}
281
282static __inline__ __mmask8 __DEFAULT_FN_ATTRS
283_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
284  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
285                                                  (__mmask8)-1);
286}
287
288static __inline__ __mmask8 __DEFAULT_FN_ATTRS
289_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
290  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
291                                                  __u);
292}
293
294static __inline__ __mmask8 __DEFAULT_FN_ATTRS
295_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
296  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
297                                                (__mmask8)-1);
298}
299
300static __inline__ __mmask8 __DEFAULT_FN_ATTRS
301_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
302  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
303                                                __u);
304}
305
306static __inline__ __mmask8 __DEFAULT_FN_ATTRS
307_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
308  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
309                                                  (__mmask8)-1);
310}
311
312static __inline__ __mmask8 __DEFAULT_FN_ATTRS
313_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
314  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
315                                                  __u);
316}
317
318static __inline__ __mmask8 __DEFAULT_FN_ATTRS
319_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
320  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
321                                                (__mmask8)-1);
322}
323
324static __inline__ __mmask8 __DEFAULT_FN_ATTRS
325_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
326  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
327                                                __u);
328}
329
330static __inline__ __mmask8 __DEFAULT_FN_ATTRS
331_mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
332  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
333                                               (__mmask8)-1);
334}
335
336static __inline__ __mmask8 __DEFAULT_FN_ATTRS
337_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
338  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
339                                               __u);
340}
341
342static __inline__ __mmask8 __DEFAULT_FN_ATTRS
343_mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
344  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
345                                                (__mmask8)-1);
346}
347
348static __inline__ __mmask8 __DEFAULT_FN_ATTRS
349_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
350  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
351                                                __u);
352}
353
354static __inline__ __mmask8 __DEFAULT_FN_ATTRS
355_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
356  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
357                                               (__mmask8)-1);
358}
359
360static __inline__ __mmask8 __DEFAULT_FN_ATTRS
361_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
362  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
363                                               __u);
364}
365
366static __inline__ __mmask8 __DEFAULT_FN_ATTRS
367_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
368  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
369                                                (__mmask8)-1);
370}
371
372static __inline__ __mmask8 __DEFAULT_FN_ATTRS
373_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
374  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
375                                                __u);
376}
377
378static __inline__ __mmask8 __DEFAULT_FN_ATTRS
379_mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
380  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
381                                               (__mmask8)-1);
382}
383
384static __inline__ __mmask8 __DEFAULT_FN_ATTRS
385_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
386  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
387                                               __u);
388}
389
390static __inline__ __mmask8 __DEFAULT_FN_ATTRS
391_mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
392  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
393                                                (__mmask8)-1);
394}
395
396static __inline__ __mmask8 __DEFAULT_FN_ATTRS
397_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
398  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
399                                                __u);
400}
401
402static __inline__ __mmask8 __DEFAULT_FN_ATTRS
403_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
404  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
405                                               (__mmask8)-1);
406}
407
408static __inline__ __mmask8 __DEFAULT_FN_ATTRS
409_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
410  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
411                                               __u);
412}
413
414static __inline__ __mmask8 __DEFAULT_FN_ATTRS
415_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
416  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
417                                                (__mmask8)-1);
418}
419
420static __inline__ __mmask8 __DEFAULT_FN_ATTRS
421_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
422  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
423                                                __u);
424}
425
426static __inline__ __mmask8 __DEFAULT_FN_ATTRS
427_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
428  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
429                                               (__mmask8)-1);
430}
431
432static __inline__ __mmask8 __DEFAULT_FN_ATTRS
433_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
434  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
435                                               __u);
436}
437
438static __inline__ __mmask8 __DEFAULT_FN_ATTRS
439_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
440  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
441                                                (__mmask8)-1);
442}
443
444static __inline__ __mmask8 __DEFAULT_FN_ATTRS
445_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
446  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
447                                                __u);
448}
449
450static __inline__ __mmask8 __DEFAULT_FN_ATTRS
451_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
452  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
453                                               (__mmask8)-1);
454}
455
456static __inline__ __mmask8 __DEFAULT_FN_ATTRS
457_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
458  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
459                                               __u);
460}
461
462static __inline__ __mmask8 __DEFAULT_FN_ATTRS
463_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
464  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
465                                                (__mmask8)-1);
466}
467
468static __inline__ __mmask8 __DEFAULT_FN_ATTRS
469_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
470  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
471                                                __u);
472}
473
474static __inline__ __mmask8 __DEFAULT_FN_ATTRS
475_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
476  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
477                                               (__mmask8)-1);
478}
479
480static __inline__ __mmask8 __DEFAULT_FN_ATTRS
481_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
482  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
483                                               __u);
484}
485
486static __inline__ __mmask8 __DEFAULT_FN_ATTRS
487_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
488  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
489                                                (__mmask8)-1);
490}
491
492static __inline__ __mmask8 __DEFAULT_FN_ATTRS
493_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
494  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
495                                                __u);
496}
497
498static __inline__ __mmask8 __DEFAULT_FN_ATTRS
499_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
500  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
501                                               (__mmask8)-1);
502}
503
504static __inline__ __mmask8 __DEFAULT_FN_ATTRS
505_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
506  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
507                                               __u);
508}
509
510static __inline__ __mmask8 __DEFAULT_FN_ATTRS
511_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
512  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
513                                                (__mmask8)-1);
514}
515
516static __inline__ __mmask8 __DEFAULT_FN_ATTRS
517_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
518  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
519                                                __u);
520}
521
522static __inline__ __mmask8 __DEFAULT_FN_ATTRS
523_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
524  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
525                                               (__mmask8)-1);
526}
527
528static __inline__ __mmask8 __DEFAULT_FN_ATTRS
529_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
530  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
531                                               __u);
532}
533
534static __inline__ __mmask8 __DEFAULT_FN_ATTRS
535_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
536  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
537                                                (__mmask8)-1);
538}
539
540static __inline__ __mmask8 __DEFAULT_FN_ATTRS
541_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
542  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
543                                                __u);
544}
545
546static __inline__ __mmask8 __DEFAULT_FN_ATTRS
547_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
548  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
549                                               (__mmask8)-1);
550}
551
552static __inline__ __mmask8 __DEFAULT_FN_ATTRS
553_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
554  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
555                                               __u);
556}
557
558static __inline__ __mmask8 __DEFAULT_FN_ATTRS
559_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
560  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
561                                                (__mmask8)-1);
562}
563
564static __inline__ __mmask8 __DEFAULT_FN_ATTRS
565_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
566  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
567                                                __u);
568}
569
570static __inline__ __mmask8 __DEFAULT_FN_ATTRS
571_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
572  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
573                                               (__mmask8)-1);
574}
575
576static __inline__ __mmask8 __DEFAULT_FN_ATTRS
577_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
578  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
579                                               __u);
580}
581
582static __inline__ __mmask8 __DEFAULT_FN_ATTRS
583_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
584  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
585                                                (__mmask8)-1);
586}
587
588static __inline__ __mmask8 __DEFAULT_FN_ATTRS
589_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
590  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
591                                                __u);
592}
593
594static __inline__ __mmask8 __DEFAULT_FN_ATTRS
595_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
596  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
597                                               (__mmask8)-1);
598}
599
600static __inline__ __mmask8 __DEFAULT_FN_ATTRS
601_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
602  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
603                                               __u);
604}
605
606static __inline__ __mmask8 __DEFAULT_FN_ATTRS
607_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
608  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
609                                                (__mmask8)-1);
610}
611
612static __inline__ __mmask8 __DEFAULT_FN_ATTRS
613_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
614  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
615                                                __u);
616}
617
618static __inline__ __m256i __DEFAULT_FN_ATTRS
619_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
620           __m256i __B)
621{
622  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
623             (__v8si) __B,
624             (__v8si) __W,
625             (__mmask8) __U);
626}
627
628static __inline__ __m256i __DEFAULT_FN_ATTRS
629_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
630{
631  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
632             (__v8si) __B,
633             (__v8si)
634             _mm256_setzero_si256 (),
635             (__mmask8) __U);
636}
637
638static __inline__ __m256i __DEFAULT_FN_ATTRS
639_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
640           __m256i __B)
641{
642  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
643             (__v4di) __B,
644             (__v4di) __W,
645             (__mmask8) __U);
646}
647
648static __inline__ __m256i __DEFAULT_FN_ATTRS
649_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
650{
651  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
652             (__v4di) __B,
653             (__v4di)
654             _mm256_setzero_si256 (),
655             (__mmask8) __U);
656}
657
658static __inline__ __m256i __DEFAULT_FN_ATTRS
659_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
660           __m256i __B)
661{
662  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
663             (__v8si) __B,
664             (__v8si) __W,
665             (__mmask8) __U);
666}
667
668static __inline__ __m256i __DEFAULT_FN_ATTRS
669_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
670{
671  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
672             (__v8si) __B,
673             (__v8si)
674             _mm256_setzero_si256 (),
675             (__mmask8) __U);
676}
677
678static __inline__ __m256i __DEFAULT_FN_ATTRS
679_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
680           __m256i __B)
681{
682  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
683             (__v4di) __B,
684             (__v4di) __W,
685             (__mmask8) __U);
686}
687
688static __inline__ __m256i __DEFAULT_FN_ATTRS
689_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
690{
691  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
692             (__v4di) __B,
693             (__v4di)
694             _mm256_setzero_si256 (),
695             (__mmask8) __U);
696}
697
698static __inline__ __m128i __DEFAULT_FN_ATTRS
699_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
700        __m128i __B)
701{
702  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
703             (__v4si) __B,
704             (__v4si) __W,
705             (__mmask8) __U);
706}
707
708static __inline__ __m128i __DEFAULT_FN_ATTRS
709_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
710{
711  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
712             (__v4si) __B,
713             (__v4si)
714             _mm_setzero_si128 (),
715             (__mmask8) __U);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS
719_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
720        __m128i __B)
721{
722  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
723             (__v2di) __B,
724             (__v2di) __W,
725             (__mmask8) __U);
726}
727
728static __inline__ __m128i __DEFAULT_FN_ATTRS
729_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
730{
731  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
732             (__v2di) __B,
733             (__v2di)
734             _mm_setzero_si128 (),
735             (__mmask8) __U);
736}
737
738static __inline__ __m128i __DEFAULT_FN_ATTRS
739_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
740        __m128i __B)
741{
742  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
743             (__v4si) __B,
744             (__v4si) __W,
745             (__mmask8) __U);
746}
747
748static __inline__ __m128i __DEFAULT_FN_ATTRS
749_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
750{
751  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
752             (__v4si) __B,
753             (__v4si)
754             _mm_setzero_si128 (),
755             (__mmask8) __U);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS
759_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
760        __m128i __B)
761{
762  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
763             (__v2di) __B,
764             (__v2di) __W,
765             (__mmask8) __U);
766}
767
768static __inline__ __m128i __DEFAULT_FN_ATTRS
769_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
770{
771  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
772             (__v2di) __B,
773             (__v2di)
774             _mm_setzero_si128 (),
775             (__mmask8) __U);
776}
777
778static __inline__ __m256i __DEFAULT_FN_ATTRS
779_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
780           __m256i __Y)
781{
782  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
783              (__v8si) __Y,
784              (__v4di) __W, __M);
785}
786
787static __inline__ __m256i __DEFAULT_FN_ATTRS
788_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
789{
790  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
791              (__v8si) __Y,
792              (__v4di)
793              _mm256_setzero_si256 (),
794              __M);
795}
796
797static __inline__ __m128i __DEFAULT_FN_ATTRS
798_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
799        __m128i __Y)
800{
801  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
802              (__v4si) __Y,
803              (__v2di) __W, __M);
804}
805
806static __inline__ __m128i __DEFAULT_FN_ATTRS
807_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
808{
809  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
810              (__v4si) __Y,
811              (__v2di)
812              _mm_setzero_si128 (),
813              __M);
814}
815
816static __inline__ __m256i __DEFAULT_FN_ATTRS
817_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
818           __m256i __Y)
819{
820  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
821               (__v8si) __Y,
822               (__v4di) __W, __M);
823}
824
825static __inline__ __m256i __DEFAULT_FN_ATTRS
826_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
827{
828  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
829               (__v8si) __Y,
830               (__v4di)
831               _mm256_setzero_si256 (),
832               __M);
833}
834
835static __inline__ __m128i __DEFAULT_FN_ATTRS
836_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
837        __m128i __Y)
838{
839  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
840               (__v4si) __Y,
841               (__v2di) __W, __M);
842}
843
844static __inline__ __m128i __DEFAULT_FN_ATTRS
845_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
846{
847  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
848               (__v4si) __Y,
849               (__v2di)
850               _mm_setzero_si128 (),
851               __M);
852}
853
854static __inline__ __m256i __DEFAULT_FN_ATTRS
855_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
856{
857  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
858              (__v8si) __B,
859              (__v8si)
860              _mm256_setzero_si256 (),
861              __M);
862}
863
864static __inline__ __m256i __DEFAULT_FN_ATTRS
865_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
866       __m256i __B)
867{
868  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
869              (__v8si) __B,
870              (__v8si) __W, __M);
871}
872
873static __inline__ __m128i __DEFAULT_FN_ATTRS
874_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
875{
876  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
877              (__v4si) __B,
878              (__v4si)
879              _mm_setzero_si128 (),
880              __M);
881}
882
883static __inline__ __m128i __DEFAULT_FN_ATTRS
884_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A,
885          __m128i __B)
886{
887  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
888              (__v4si) __B,
889              (__v4si) __W, __M);
890}
891
892static __inline__ __m256i __DEFAULT_FN_ATTRS
893_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
894{
895  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
896                                             (__v8si)_mm256_and_si256(__A, __B),
897                                             (__v8si)__W);
898}
899
900static __inline__ __m256i __DEFAULT_FN_ATTRS
901_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
902{
903  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
904}
905
906static __inline__ __m128i __DEFAULT_FN_ATTRS
907_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
908{
909  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
910                                             (__v4si)_mm_and_si128(__A, __B),
911                                             (__v4si)__W);
912}
913
914static __inline__ __m128i __DEFAULT_FN_ATTRS
915_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
916{
917  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
918}
919
920static __inline__ __m256i __DEFAULT_FN_ATTRS
921_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
922{
923  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
924                                          (__v8si)_mm256_andnot_si256(__A, __B),
925                                          (__v8si)__W);
926}
927
928static __inline__ __m256i __DEFAULT_FN_ATTRS
929_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
930{
931  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
932                                           __U, __A, __B);
933}
934
935static __inline__ __m128i __DEFAULT_FN_ATTRS
936_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
937{
938  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
939                                             (__v4si)_mm_andnot_si128(__A, __B),
940                                             (__v4si)__W);
941}
942
943static __inline__ __m128i __DEFAULT_FN_ATTRS
944_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
945{
946  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
947}
948
949static __inline__ __m256i __DEFAULT_FN_ATTRS
950_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
951{
952  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
953                                             (__v8si)_mm256_or_si256(__A, __B),
954                                             (__v8si)__W);
955}
956
957static __inline__ __m256i __DEFAULT_FN_ATTRS
958_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
959{
960  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
961}
962
963static __inline__ __m128i __DEFAULT_FN_ATTRS
964_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
965{
966  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
967                                             (__v4si)_mm_or_si128(__A, __B),
968                                             (__v4si)__W);
969}
970
971static __inline__ __m128i __DEFAULT_FN_ATTRS
972_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
973{
974  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
975}
976
977static __inline__ __m256i __DEFAULT_FN_ATTRS
978_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
979{
980  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
981                                             (__v8si)_mm256_xor_si256(__A, __B),
982                                             (__v8si)__W);
983}
984
985static __inline__ __m256i __DEFAULT_FN_ATTRS
986_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
987{
988  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
989}
990
991static __inline__ __m128i __DEFAULT_FN_ATTRS
992_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
993        __m128i __B)
994{
995  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
996                                             (__v4si)_mm_xor_si128(__A, __B),
997                                             (__v4si)__W);
998}
999
1000static __inline__ __m128i __DEFAULT_FN_ATTRS
1001_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
1002{
1003  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
1004}
1005
1006static __inline__ __m256i __DEFAULT_FN_ATTRS
1007_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1008{
1009  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1010                                             (__v4di)_mm256_and_si256(__A, __B),
1011                                             (__v4di)__W);
1012}
1013
1014static __inline__ __m256i __DEFAULT_FN_ATTRS
1015_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1016{
1017  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
1018}
1019
1020static __inline__ __m128i __DEFAULT_FN_ATTRS
1021_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1022{
1023  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1024                                             (__v2di)_mm_and_si128(__A, __B),
1025                                             (__v2di)__W);
1026}
1027
1028static __inline__ __m128i __DEFAULT_FN_ATTRS
1029_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1030{
1031  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
1032}
1033
1034static __inline__ __m256i __DEFAULT_FN_ATTRS
1035_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1036{
1037  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1038                                          (__v4di)_mm256_andnot_si256(__A, __B),
1039                                          (__v4di)__W);
1040}
1041
1042static __inline__ __m256i __DEFAULT_FN_ATTRS
1043_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1044{
1045  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
1046                                           __U, __A, __B);
1047}
1048
1049static __inline__ __m128i __DEFAULT_FN_ATTRS
1050_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1051{
1052  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1053                                             (__v2di)_mm_andnot_si128(__A, __B),
1054                                             (__v2di)__W);
1055}
1056
1057static __inline__ __m128i __DEFAULT_FN_ATTRS
1058_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1059{
1060  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
1061}
1062
1063static __inline__ __m256i __DEFAULT_FN_ATTRS
1064_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1065{
1066  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1067                                             (__v4di)_mm256_or_si256(__A, __B),
1068                                             (__v4di)__W);
1069}
1070
1071static __inline__ __m256i __DEFAULT_FN_ATTRS
1072_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1073{
1074  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
1075}
1076
1077static __inline__ __m128i __DEFAULT_FN_ATTRS
1078_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1079{
1080  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1081                                             (__v2di)_mm_or_si128(__A, __B),
1082                                             (__v2di)__W);
1083}
1084
1085static __inline__ __m128i __DEFAULT_FN_ATTRS
1086_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1087{
1088  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
1089}
1090
1091static __inline__ __m256i __DEFAULT_FN_ATTRS
1092_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1093{
1094  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1095                                             (__v4di)_mm256_xor_si256(__A, __B),
1096                                             (__v4di)__W);
1097}
1098
1099static __inline__ __m256i __DEFAULT_FN_ATTRS
1100_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1101{
1102  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
1103}
1104
1105static __inline__ __m128i __DEFAULT_FN_ATTRS
1106_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
1107        __m128i __B)
1108{
1109  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1110                                             (__v2di)_mm_xor_si128(__A, __B),
1111                                             (__v2di)__W);
1112}
1113
1114static __inline__ __m128i __DEFAULT_FN_ATTRS
1115_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1116{
1117  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
1118}
1119
1120#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
1121  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1122                                        (__v4si)(__m128i)(b), (int)(p), \
1123                                        (__mmask8)-1); })
1124
1125#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1126  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1127                                        (__v4si)(__m128i)(b), (int)(p), \
1128                                        (__mmask8)(m)); })
1129
1130#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
1131  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1132                                         (__v4si)(__m128i)(b), (int)(p), \
1133                                         (__mmask8)-1); })
1134
1135#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1136  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1137                                         (__v4si)(__m128i)(b), (int)(p), \
1138                                         (__mmask8)(m)); })
1139
1140#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
1141  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1142                                        (__v8si)(__m256i)(b), (int)(p), \
1143                                        (__mmask8)-1); })
1144
1145#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1146  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1147                                        (__v8si)(__m256i)(b), (int)(p), \
1148                                        (__mmask8)(m)); })
1149
1150#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
1151  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1152                                         (__v8si)(__m256i)(b), (int)(p), \
1153                                         (__mmask8)-1); })
1154
1155#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1156  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1157                                         (__v8si)(__m256i)(b), (int)(p), \
1158                                         (__mmask8)(m)); })
1159
1160#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
1161  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1162                                        (__v2di)(__m128i)(b), (int)(p), \
1163                                        (__mmask8)-1); })
1164
1165#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1166  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1167                                        (__v2di)(__m128i)(b), (int)(p), \
1168                                        (__mmask8)(m)); })
1169
1170#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
1171  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1172                                         (__v2di)(__m128i)(b), (int)(p), \
1173                                         (__mmask8)-1); })
1174
1175#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1176  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1177                                         (__v2di)(__m128i)(b), (int)(p), \
1178                                         (__mmask8)(m)); })
1179
1180#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
1181  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1182                                        (__v4di)(__m256i)(b), (int)(p), \
1183                                        (__mmask8)-1); })
1184
1185#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1186  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1187                                        (__v4di)(__m256i)(b), (int)(p), \
1188                                        (__mmask8)(m)); })
1189
1190#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
1191  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1192                                         (__v4di)(__m256i)(b), (int)(p), \
1193                                         (__mmask8)-1); })
1194
1195#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1196  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1197                                         (__v4di)(__m256i)(b), (int)(p), \
1198                                         (__mmask8)(m)); })
1199
1200#define _mm256_cmp_ps_mask(a, b, p)  __extension__ ({ \
1201  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1202                                         (__v8sf)(__m256)(b), (int)(p), \
1203                                         (__mmask8)-1); })
1204
1205#define _mm256_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
1206  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1207                                         (__v8sf)(__m256)(b), (int)(p), \
1208                                         (__mmask8)(m)); })
1209
1210#define _mm256_cmp_pd_mask(a, b, p)  __extension__ ({ \
1211  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1212                                         (__v4df)(__m256d)(b), (int)(p), \
1213                                         (__mmask8)-1); })
1214
1215#define _mm256_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
1216  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1217                                         (__v4df)(__m256d)(b), (int)(p), \
1218                                         (__mmask8)(m)); })
1219
1220#define _mm_cmp_ps_mask(a, b, p)  __extension__ ({ \
1221  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1222                                         (__v4sf)(__m128)(b), (int)(p), \
1223                                         (__mmask8)-1); })
1224
1225#define _mm_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
1226  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1227                                         (__v4sf)(__m128)(b), (int)(p), \
1228                                         (__mmask8)(m)); })
1229
1230#define _mm_cmp_pd_mask(a, b, p)  __extension__ ({ \
1231  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1232                                         (__v2df)(__m128d)(b), (int)(p), \
1233                                         (__mmask8)-1); })
1234
1235#define _mm_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
1236  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1237                                         (__v2df)(__m128d)(b), (int)(p), \
1238                                         (__mmask8)(m)); })
1239
1240static __inline__ __m128d __DEFAULT_FN_ATTRS
1241_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1242{
1243  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1244                                                    (__v2df) __B,
1245                                                    (__v2df) __C,
1246                                                    (__mmask8) __U);
1247}
1248
1249static __inline__ __m128d __DEFAULT_FN_ATTRS
1250_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1251{
1252  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
1253                                                     (__v2df) __B,
1254                                                     (__v2df) __C,
1255                                                     (__mmask8) __U);
1256}
1257
1258static __inline__ __m128d __DEFAULT_FN_ATTRS
1259_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1260{
1261  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1262                                                     (__v2df) __B,
1263                                                     (__v2df) __C,
1264                                                     (__mmask8) __U);
1265}
1266
1267static __inline__ __m128d __DEFAULT_FN_ATTRS
1268_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1269{
1270  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1271                                                    (__v2df) __B,
1272                                                    -(__v2df) __C,
1273                                                    (__mmask8) __U);
1274}
1275
1276static __inline__ __m128d __DEFAULT_FN_ATTRS
1277_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1278{
1279  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1280                                                     (__v2df) __B,
1281                                                     -(__v2df) __C,
1282                                                     (__mmask8) __U);
1283}
1284
1285static __inline__ __m128d __DEFAULT_FN_ATTRS
1286_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1287{
1288  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
1289                                                     (__v2df) __B,
1290                                                     (__v2df) __C,
1291                                                     (__mmask8) __U);
1292}
1293
1294static __inline__ __m128d __DEFAULT_FN_ATTRS
1295_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1296{
1297  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1298                                                     (__v2df) __B,
1299                                                     (__v2df) __C,
1300                                                     (__mmask8) __U);
1301}
1302
1303static __inline__ __m128d __DEFAULT_FN_ATTRS
1304_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1305{
1306  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1307                                                     (__v2df) __B,
1308                                                     -(__v2df) __C,
1309                                                     (__mmask8) __U);
1310}
1311
1312static __inline__ __m256d __DEFAULT_FN_ATTRS
1313_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1314{
1315  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1316                                                    (__v4df) __B,
1317                                                    (__v4df) __C,
1318                                                    (__mmask8) __U);
1319}
1320
1321static __inline__ __m256d __DEFAULT_FN_ATTRS
1322_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1323{
1324  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
1325                                                     (__v4df) __B,
1326                                                     (__v4df) __C,
1327                                                     (__mmask8) __U);
1328}
1329
1330static __inline__ __m256d __DEFAULT_FN_ATTRS
1331_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1332{
1333  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1334                                                     (__v4df) __B,
1335                                                     (__v4df) __C,
1336                                                     (__mmask8) __U);
1337}
1338
1339static __inline__ __m256d __DEFAULT_FN_ATTRS
1340_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1341{
1342  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1343                                                    (__v4df) __B,
1344                                                    -(__v4df) __C,
1345                                                    (__mmask8) __U);
1346}
1347
1348static __inline__ __m256d __DEFAULT_FN_ATTRS
1349_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1350{
1351  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1352                                                     (__v4df) __B,
1353                                                     -(__v4df) __C,
1354                                                     (__mmask8) __U);
1355}
1356
1357static __inline__ __m256d __DEFAULT_FN_ATTRS
1358_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1359{
1360  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
1361                                                     (__v4df) __B,
1362                                                     (__v4df) __C,
1363                                                     (__mmask8) __U);
1364}
1365
1366static __inline__ __m256d __DEFAULT_FN_ATTRS
1367_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1368{
1369  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1370                                                     (__v4df) __B,
1371                                                     (__v4df) __C,
1372                                                     (__mmask8) __U);
1373}
1374
1375static __inline__ __m256d __DEFAULT_FN_ATTRS
1376_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1377{
1378  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1379                                                     (__v4df) __B,
1380                                                     -(__v4df) __C,
1381                                                     (__mmask8) __U);
1382}
1383
1384static __inline__ __m128 __DEFAULT_FN_ATTRS
1385_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1386{
1387  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1388                                                   (__v4sf) __B,
1389                                                   (__v4sf) __C,
1390                                                   (__mmask8) __U);
1391}
1392
1393static __inline__ __m128 __DEFAULT_FN_ATTRS
1394_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1395{
1396  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
1397                                                    (__v4sf) __B,
1398                                                    (__v4sf) __C,
1399                                                    (__mmask8) __U);
1400}
1401
1402static __inline__ __m128 __DEFAULT_FN_ATTRS
1403_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1404{
1405  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1406                                                    (__v4sf) __B,
1407                                                    (__v4sf) __C,
1408                                                    (__mmask8) __U);
1409}
1410
1411static __inline__ __m128 __DEFAULT_FN_ATTRS
1412_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1413{
1414  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1415                                                   (__v4sf) __B,
1416                                                   -(__v4sf) __C,
1417                                                   (__mmask8) __U);
1418}
1419
1420static __inline__ __m128 __DEFAULT_FN_ATTRS
1421_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1422{
1423  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1424                                                    (__v4sf) __B,
1425                                                    -(__v4sf) __C,
1426                                                    (__mmask8) __U);
1427}
1428
1429static __inline__ __m128 __DEFAULT_FN_ATTRS
1430_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1431{
1432  return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1433                                                    (__v4sf) __B,
1434                                                    (__v4sf) __C,
1435                                                    (__mmask8) __U);
1436}
1437
1438static __inline__ __m128 __DEFAULT_FN_ATTRS
1439_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1440{
1441  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1442                                                    (__v4sf) __B,
1443                                                    (__v4sf) __C,
1444                                                    (__mmask8) __U);
1445}
1446
1447static __inline__ __m128 __DEFAULT_FN_ATTRS
1448_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1449{
1450  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1451                                                    (__v4sf) __B,
1452                                                    -(__v4sf) __C,
1453                                                    (__mmask8) __U);
1454}
1455
1456static __inline__ __m256 __DEFAULT_FN_ATTRS
1457_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1458{
1459  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1460                                                   (__v8sf) __B,
1461                                                   (__v8sf) __C,
1462                                                   (__mmask8) __U);
1463}
1464
1465static __inline__ __m256 __DEFAULT_FN_ATTRS
1466_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1467{
1468  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1469                                                    (__v8sf) __B,
1470                                                    (__v8sf) __C,
1471                                                    (__mmask8) __U);
1472}
1473
1474static __inline__ __m256 __DEFAULT_FN_ATTRS
1475_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1476{
1477  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1478                                                    (__v8sf) __B,
1479                                                    (__v8sf) __C,
1480                                                    (__mmask8) __U);
1481}
1482
1483static __inline__ __m256 __DEFAULT_FN_ATTRS
1484_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1485{
1486  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1487                                                   (__v8sf) __B,
1488                                                   -(__v8sf) __C,
1489                                                   (__mmask8) __U);
1490}
1491
1492static __inline__ __m256 __DEFAULT_FN_ATTRS
1493_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1494{
1495  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1496                                                    (__v8sf) __B,
1497                                                    -(__v8sf) __C,
1498                                                    (__mmask8) __U);
1499}
1500
1501static __inline__ __m256 __DEFAULT_FN_ATTRS
1502_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1503{
1504  return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1505                                                    (__v8sf) __B,
1506                                                    (__v8sf) __C,
1507                                                    (__mmask8) __U);
1508}
1509
1510static __inline__ __m256 __DEFAULT_FN_ATTRS
1511_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1512{
1513  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1514                                                    (__v8sf) __B,
1515                                                    (__v8sf) __C,
1516                                                    (__mmask8) __U);
1517}
1518
1519static __inline__ __m256 __DEFAULT_FN_ATTRS
1520_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1521{
1522  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1523                                                    (__v8sf) __B,
1524                                                    -(__v8sf) __C,
1525                                                    (__mmask8) __U);
1526}
1527
1528static __inline__ __m128d __DEFAULT_FN_ATTRS
1529_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1530{
1531  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1532                                                       (__v2df) __B,
1533                                                       (__v2df) __C,
1534                                                       (__mmask8) __U);
1535}
1536
1537static __inline__ __m128d __DEFAULT_FN_ATTRS
1538_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1539{
1540  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1541                                                        (__v2df) __B,
1542                                                        (__v2df) __C,
1543                                                        (__mmask8)
1544                                                        __U);
1545}
1546
1547static __inline__ __m128d __DEFAULT_FN_ATTRS
1548_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1549{
1550  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1551                                                        (__v2df) __B,
1552                                                        (__v2df) __C,
1553                                                        (__mmask8)
1554                                                        __U);
1555}
1556
1557static __inline__ __m128d __DEFAULT_FN_ATTRS
1558_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1559{
1560  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1561                                                       (__v2df) __B,
1562                                                       -(__v2df) __C,
1563                                                       (__mmask8) __U);
1564}
1565
1566static __inline__ __m128d __DEFAULT_FN_ATTRS
1567_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1568{
1569  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1570                                                        (__v2df) __B,
1571                                                        -(__v2df) __C,
1572                                                        (__mmask8)
1573                                                        __U);
1574}
1575
1576static __inline__ __m256d __DEFAULT_FN_ATTRS
1577_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1578{
1579  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1580                                                       (__v4df) __B,
1581                                                       (__v4df) __C,
1582                                                       (__mmask8) __U);
1583}
1584
1585static __inline__ __m256d __DEFAULT_FN_ATTRS
1586_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1587{
1588  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1589                                                        (__v4df) __B,
1590                                                        (__v4df) __C,
1591                                                        (__mmask8)
1592                                                        __U);
1593}
1594
1595static __inline__ __m256d __DEFAULT_FN_ATTRS
1596_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1597{
1598  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1599                                                        (__v4df) __B,
1600                                                        (__v4df) __C,
1601                                                        (__mmask8)
1602                                                        __U);
1603}
1604
1605static __inline__ __m256d __DEFAULT_FN_ATTRS
1606_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1607{
1608  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1609                                                       (__v4df) __B,
1610                                                       -(__v4df) __C,
1611                                                       (__mmask8) __U);
1612}
1613
1614static __inline__ __m256d __DEFAULT_FN_ATTRS
1615_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1616{
1617  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1618                                                        (__v4df) __B,
1619                                                        -(__v4df) __C,
1620                                                        (__mmask8)
1621                                                        __U);
1622}
1623
1624static __inline__ __m128 __DEFAULT_FN_ATTRS
1625_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1626{
1627  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1628                                                      (__v4sf) __B,
1629                                                      (__v4sf) __C,
1630                                                      (__mmask8) __U);
1631}
1632
1633static __inline__ __m128 __DEFAULT_FN_ATTRS
1634_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1635{
1636  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1637                                                       (__v4sf) __B,
1638                                                       (__v4sf) __C,
1639                                                       (__mmask8) __U);
1640}
1641
1642static __inline__ __m128 __DEFAULT_FN_ATTRS
1643_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1644{
1645  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1646                                                       (__v4sf) __B,
1647                                                       (__v4sf) __C,
1648                                                       (__mmask8) __U);
1649}
1650
1651static __inline__ __m128 __DEFAULT_FN_ATTRS
1652_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1653{
1654  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1655                                                      (__v4sf) __B,
1656                                                      -(__v4sf) __C,
1657                                                      (__mmask8) __U);
1658}
1659
1660static __inline__ __m128 __DEFAULT_FN_ATTRS
1661_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1662{
1663  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1664                                                       (__v4sf) __B,
1665                                                       -(__v4sf) __C,
1666                                                       (__mmask8) __U);
1667}
1668
1669static __inline__ __m256 __DEFAULT_FN_ATTRS
1670_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1671                         __m256 __C)
1672{
1673  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1674                                                      (__v8sf) __B,
1675                                                      (__v8sf) __C,
1676                                                      (__mmask8) __U);
1677}
1678
1679static __inline__ __m256 __DEFAULT_FN_ATTRS
1680_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1681{
1682  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1683                                                       (__v8sf) __B,
1684                                                       (__v8sf) __C,
1685                                                       (__mmask8) __U);
1686}
1687
1688static __inline__ __m256 __DEFAULT_FN_ATTRS
1689_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1690{
1691  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1692                                                       (__v8sf) __B,
1693                                                       (__v8sf) __C,
1694                                                       (__mmask8) __U);
1695}
1696
1697static __inline__ __m256 __DEFAULT_FN_ATTRS
1698_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1699{
1700  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1701                                                      (__v8sf) __B,
1702                                                      -(__v8sf) __C,
1703                                                      (__mmask8) __U);
1704}
1705
1706static __inline__ __m256 __DEFAULT_FN_ATTRS
1707_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1708{
1709  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1710                                                       (__v8sf) __B,
1711                                                       -(__v8sf) __C,
1712                                                       (__mmask8) __U);
1713}
1714
1715static __inline__ __m128d __DEFAULT_FN_ATTRS
1716_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1717{
1718  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1719                                                     (__v2df) __B,
1720                                                     (__v2df) __C,
1721                                                     (__mmask8) __U);
1722}
1723
1724static __inline__ __m256d __DEFAULT_FN_ATTRS
1725_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1726{
1727  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1728                                                     (__v4df) __B,
1729                                                     (__v4df) __C,
1730                                                     (__mmask8) __U);
1731}
1732
1733static __inline__ __m128 __DEFAULT_FN_ATTRS
1734_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1735{
1736  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1737                                                    (__v4sf) __B,
1738                                                    (__v4sf) __C,
1739                                                    (__mmask8) __U);
1740}
1741
1742static __inline__ __m256 __DEFAULT_FN_ATTRS
1743_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1744{
1745  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1746                                                    (__v8sf) __B,
1747                                                    (__v8sf) __C,
1748                                                    (__mmask8) __U);
1749}
1750
1751static __inline__ __m128d __DEFAULT_FN_ATTRS
1752_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1753{
1754  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1755                                                        (__v2df) __B,
1756                                                        (__v2df) __C,
1757                                                        (__mmask8)
1758                                                        __U);
1759}
1760
1761static __inline__ __m256d __DEFAULT_FN_ATTRS
1762_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1763{
1764  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1765                                                        (__v4df) __B,
1766                                                        (__v4df) __C,
1767                                                        (__mmask8)
1768                                                        __U);
1769}
1770
1771static __inline__ __m128 __DEFAULT_FN_ATTRS
1772_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1773{
1774  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1775                                                       (__v4sf) __B,
1776                                                       (__v4sf) __C,
1777                                                       (__mmask8) __U);
1778}
1779
1780static __inline__ __m256 __DEFAULT_FN_ATTRS
1781_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1782{
1783  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1784                                                       (__v8sf) __B,
1785                                                       (__v8sf) __C,
1786                                                       (__mmask8) __U);
1787}
1788
1789static __inline__ __m128d __DEFAULT_FN_ATTRS
1790_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1791{
1792  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1793                                                     (__v2df) __B,
1794                                                     (__v2df) __C,
1795                                                     (__mmask8) __U);
1796}
1797
1798static __inline__ __m256d __DEFAULT_FN_ATTRS
1799_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1800{
1801  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1802                                                     (__v4df) __B,
1803                                                     (__v4df) __C,
1804                                                     (__mmask8) __U);
1805}
1806
1807static __inline__ __m128 __DEFAULT_FN_ATTRS
1808_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1809{
1810  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1811                                                    (__v4sf) __B,
1812                                                    (__v4sf) __C,
1813                                                    (__mmask8) __U);
1814}
1815
1816static __inline__ __m256 __DEFAULT_FN_ATTRS
1817_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1818{
1819  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1820                                                    (__v8sf) __B,
1821                                                    (__v8sf) __C,
1822                                                    (__mmask8) __U);
1823}
1824
1825static __inline__ __m128d __DEFAULT_FN_ATTRS
1826_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1827{
1828  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1829                                                     (__v2df) __B,
1830                                                     (__v2df) __C,
1831                                                     (__mmask8) __U);
1832}
1833
1834static __inline__ __m128d __DEFAULT_FN_ATTRS
1835_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1836{
1837  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1838                                                      (__v2df) __B,
1839                                                      (__v2df) __C,
1840                                                      (__mmask8) __U);
1841}
1842
1843static __inline__ __m256d __DEFAULT_FN_ATTRS
1844_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1845{
1846  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1847                                                     (__v4df) __B,
1848                                                     (__v4df) __C,
1849                                                     (__mmask8) __U);
1850}
1851
1852static __inline__ __m256d __DEFAULT_FN_ATTRS
1853_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1854{
1855  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1856                                                      (__v4df) __B,
1857                                                      (__v4df) __C,
1858                                                      (__mmask8) __U);
1859}
1860
1861static __inline__ __m128 __DEFAULT_FN_ATTRS
1862_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1863{
1864  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1865                                                    (__v4sf) __B,
1866                                                    (__v4sf) __C,
1867                                                    (__mmask8) __U);
1868}
1869
1870static __inline__ __m128 __DEFAULT_FN_ATTRS
1871_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1872{
1873  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1874                                                     (__v4sf) __B,
1875                                                     (__v4sf) __C,
1876                                                     (__mmask8) __U);
1877}
1878
1879static __inline__ __m256 __DEFAULT_FN_ATTRS
1880_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1881{
1882  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1883                                                    (__v8sf) __B,
1884                                                    (__v8sf) __C,
1885                                                    (__mmask8) __U);
1886}
1887
1888static __inline__ __m256 __DEFAULT_FN_ATTRS
1889_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1890{
1891  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1892                                                     (__v8sf) __B,
1893                                                     (__v8sf) __C,
1894                                                     (__mmask8) __U);
1895}
1896
1897static __inline__ __m128d __DEFAULT_FN_ATTRS
1898_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1899  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
1900             (__v2df) __B,
1901             (__v2df) __W,
1902             (__mmask8) __U);
1903}
1904
1905static __inline__ __m128d __DEFAULT_FN_ATTRS
1906_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B) {
1907  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
1908             (__v2df) __B,
1909             (__v2df)
1910             _mm_setzero_pd (),
1911             (__mmask8) __U);
1912}
1913
1914static __inline__ __m256d __DEFAULT_FN_ATTRS
1915_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1916  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
1917             (__v4df) __B,
1918             (__v4df) __W,
1919             (__mmask8) __U);
1920}
1921
1922static __inline__ __m256d __DEFAULT_FN_ATTRS
1923_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B) {
1924  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
1925             (__v4df) __B,
1926             (__v4df)
1927             _mm256_setzero_pd (),
1928             (__mmask8) __U);
1929}
1930
1931static __inline__ __m128 __DEFAULT_FN_ATTRS
1932_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
1933  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
1934            (__v4sf) __B,
1935            (__v4sf) __W,
1936            (__mmask8) __U);
1937}
1938
1939static __inline__ __m128 __DEFAULT_FN_ATTRS
1940_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B) {
1941  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
1942            (__v4sf) __B,
1943            (__v4sf)
1944            _mm_setzero_ps (),
1945            (__mmask8) __U);
1946}
1947
1948static __inline__ __m256 __DEFAULT_FN_ATTRS
1949_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
1950  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
1951            (__v8sf) __B,
1952            (__v8sf) __W,
1953            (__mmask8) __U);
1954}
1955
1956static __inline__ __m256 __DEFAULT_FN_ATTRS
1957_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) {
1958  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
1959            (__v8sf) __B,
1960            (__v8sf)
1961            _mm256_setzero_ps (),
1962            (__mmask8) __U);
1963}
1964
1965static __inline__ __m128i __DEFAULT_FN_ATTRS
1966_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1967  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1968                (__v4si) __W,
1969                (__v4si) __A);
1970}
1971
1972static __inline__ __m256i __DEFAULT_FN_ATTRS
1973_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1974  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1975                (__v8si) __W,
1976                (__v8si) __A);
1977}
1978
1979static __inline__ __m128d __DEFAULT_FN_ATTRS
1980_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1981  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1982                 (__v2df) __W,
1983                 (__v2df) __A);
1984}
1985
1986static __inline__ __m256d __DEFAULT_FN_ATTRS
1987_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1988  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1989                 (__v4df) __W,
1990                 (__v4df) __A);
1991}
1992
1993static __inline__ __m128 __DEFAULT_FN_ATTRS
1994_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1995  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1996                (__v4sf) __W,
1997                (__v4sf) __A);
1998}
1999
2000static __inline__ __m256 __DEFAULT_FN_ATTRS
2001_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
2002  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
2003                (__v8sf) __W,
2004                (__v8sf) __A);
2005}
2006
2007static __inline__ __m128i __DEFAULT_FN_ATTRS
2008_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
2009  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
2010                (__v2di) __W,
2011                (__v2di) __A);
2012}
2013
2014static __inline__ __m256i __DEFAULT_FN_ATTRS
2015_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
2016  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
2017                (__v4di) __W,
2018                (__v4di) __A);
2019}
2020
2021static __inline__ __m128d __DEFAULT_FN_ATTRS
2022_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2023  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
2024                  (__v2df) __W,
2025                  (__mmask8) __U);
2026}
2027
2028static __inline__ __m128d __DEFAULT_FN_ATTRS
2029_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
2030  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
2031                  (__v2df)
2032                  _mm_setzero_pd (),
2033                  (__mmask8) __U);
2034}
2035
2036static __inline__ __m256d __DEFAULT_FN_ATTRS
2037_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2038  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
2039                  (__v4df) __W,
2040                  (__mmask8) __U);
2041}
2042
2043static __inline__ __m256d __DEFAULT_FN_ATTRS
2044_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
2045  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
2046                  (__v4df)
2047                  _mm256_setzero_pd (),
2048                  (__mmask8) __U);
2049}
2050
2051static __inline__ __m128i __DEFAULT_FN_ATTRS
2052_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2053  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
2054                  (__v2di) __W,
2055                  (__mmask8) __U);
2056}
2057
2058static __inline__ __m128i __DEFAULT_FN_ATTRS
2059_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
2060  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
2061                  (__v2di)
2062                  _mm_setzero_si128 (),
2063                  (__mmask8) __U);
2064}
2065
2066static __inline__ __m256i __DEFAULT_FN_ATTRS
2067_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2068  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2069                  (__v4di) __W,
2070                  (__mmask8) __U);
2071}
2072
2073static __inline__ __m256i __DEFAULT_FN_ATTRS
2074_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
2075  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2076                  (__v4di)
2077                  _mm256_setzero_si256 (),
2078                  (__mmask8) __U);
2079}
2080
2081static __inline__ __m128 __DEFAULT_FN_ATTRS
2082_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2083  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2084                 (__v4sf) __W,
2085                 (__mmask8) __U);
2086}
2087
2088static __inline__ __m128 __DEFAULT_FN_ATTRS
2089_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
2090  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2091                 (__v4sf)
2092                 _mm_setzero_ps (),
2093                 (__mmask8) __U);
2094}
2095
2096static __inline__ __m256 __DEFAULT_FN_ATTRS
2097_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2098  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2099                 (__v8sf) __W,
2100                 (__mmask8) __U);
2101}
2102
2103static __inline__ __m256 __DEFAULT_FN_ATTRS
2104_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
2105  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2106                 (__v8sf)
2107                 _mm256_setzero_ps (),
2108                 (__mmask8) __U);
2109}
2110
2111static __inline__ __m128i __DEFAULT_FN_ATTRS
2112_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2113  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2114                  (__v4si) __W,
2115                  (__mmask8) __U);
2116}
2117
2118static __inline__ __m128i __DEFAULT_FN_ATTRS
2119_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
2120  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2121                  (__v4si)
2122                  _mm_setzero_si128 (),
2123                  (__mmask8) __U);
2124}
2125
2126static __inline__ __m256i __DEFAULT_FN_ATTRS
2127_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2128  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2129                  (__v8si) __W,
2130                  (__mmask8) __U);
2131}
2132
2133static __inline__ __m256i __DEFAULT_FN_ATTRS
2134_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
2135  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2136                  (__v8si)
2137                  _mm256_setzero_si256 (),
2138                  (__mmask8) __U);
2139}
2140
2141static __inline__ void __DEFAULT_FN_ATTRS
2142_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
2143  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
2144            (__v2df) __A,
2145            (__mmask8) __U);
2146}
2147
2148static __inline__ void __DEFAULT_FN_ATTRS
2149_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
2150  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
2151            (__v4df) __A,
2152            (__mmask8) __U);
2153}
2154
2155static __inline__ void __DEFAULT_FN_ATTRS
2156_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
2157  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
2158            (__v2di) __A,
2159            (__mmask8) __U);
2160}
2161
2162static __inline__ void __DEFAULT_FN_ATTRS
2163_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
2164  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
2165            (__v4di) __A,
2166            (__mmask8) __U);
2167}
2168
2169static __inline__ void __DEFAULT_FN_ATTRS
2170_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
2171  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
2172            (__v4sf) __A,
2173            (__mmask8) __U);
2174}
2175
2176static __inline__ void __DEFAULT_FN_ATTRS
2177_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
2178  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
2179            (__v8sf) __A,
2180            (__mmask8) __U);
2181}
2182
2183static __inline__ void __DEFAULT_FN_ATTRS
2184_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
2185  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
2186            (__v4si) __A,
2187            (__mmask8) __U);
2188}
2189
2190static __inline__ void __DEFAULT_FN_ATTRS
2191_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
2192  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
2193            (__v8si) __A,
2194            (__mmask8) __U);
2195}
2196
2197static __inline__ __m128d __DEFAULT_FN_ATTRS
2198_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2199  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
2200                (__v2df) __W,
2201                (__mmask8) __U);
2202}
2203
2204static __inline__ __m128d __DEFAULT_FN_ATTRS
2205_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
2206  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
2207                (__v2df)
2208                _mm_setzero_pd (),
2209                (__mmask8) __U);
2210}
2211
2212static __inline__ __m256d __DEFAULT_FN_ATTRS
2213_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2214  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
2215                (__v4df) __W,
2216                (__mmask8) __U);
2217}
2218
2219static __inline__ __m256d __DEFAULT_FN_ATTRS
2220_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
2221  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
2222                (__v4df)
2223                _mm256_setzero_pd (),
2224                (__mmask8) __U);
2225}
2226
2227static __inline__ __m128 __DEFAULT_FN_ATTRS
2228_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2229  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2230               (__v4sf) __W,
2231               (__mmask8) __U);
2232}
2233
2234static __inline__ __m128 __DEFAULT_FN_ATTRS
2235_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
2236  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2237               (__v4sf)
2238               _mm_setzero_ps (),
2239               (__mmask8) __U);
2240}
2241
2242static __inline__ __m256 __DEFAULT_FN_ATTRS
2243_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2244  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2245               (__v8sf) __W,
2246               (__mmask8) __U);
2247}
2248
2249static __inline__ __m256 __DEFAULT_FN_ATTRS
2250_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
2251  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2252               (__v8sf)
2253               _mm256_setzero_ps (),
2254               (__mmask8) __U);
2255}
2256
2257static __inline__ __m128i __DEFAULT_FN_ATTRS
2258_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2259  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2260                (__v4si) __W,
2261                (__mmask8) __U);
2262}
2263
2264static __inline__ __m128i __DEFAULT_FN_ATTRS
2265_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
2266  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2267                (__v4si)
2268                _mm_setzero_si128 (),
2269                (__mmask8) __U);
2270}
2271
2272static __inline__ __m128i __DEFAULT_FN_ATTRS
2273_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2274  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2275                (__v4si) __W,
2276                (__mmask8) __U);
2277}
2278
2279static __inline__ __m128i __DEFAULT_FN_ATTRS
2280_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
2281  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2282                (__v4si)
2283                _mm_setzero_si128 (),
2284                (__mmask8) __U);
2285}
2286
2287static __inline__ __m128 __DEFAULT_FN_ATTRS
2288_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
2289  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2290            (__v4sf) __W,
2291            (__mmask8) __U);
2292}
2293
2294static __inline__ __m128 __DEFAULT_FN_ATTRS
2295_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2296  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2297            (__v4sf)
2298            _mm_setzero_ps (),
2299            (__mmask8) __U);
2300}
2301
2302static __inline__ __m128 __DEFAULT_FN_ATTRS
2303_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2304  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2305               (__v4sf) __W,
2306               (__mmask8) __U);
2307}
2308
2309static __inline__ __m128 __DEFAULT_FN_ATTRS
2310_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2311  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2312               (__v4sf)
2313               _mm_setzero_ps (),
2314               (__mmask8) __U);
2315}
2316
2317static __inline__ __m128i __DEFAULT_FN_ATTRS
2318_mm_cvtpd_epu32 (__m128d __A) {
2319  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2320                 (__v4si)
2321                 _mm_setzero_si128 (),
2322                 (__mmask8) -1);
2323}
2324
2325static __inline__ __m128i __DEFAULT_FN_ATTRS
2326_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2327  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2328                 (__v4si) __W,
2329                 (__mmask8) __U);
2330}
2331
2332static __inline__ __m128i __DEFAULT_FN_ATTRS
2333_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2334  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2335                 (__v4si)
2336                 _mm_setzero_si128 (),
2337                 (__mmask8) __U);
2338}
2339
2340static __inline__ __m128i __DEFAULT_FN_ATTRS
2341_mm256_cvtpd_epu32 (__m256d __A) {
2342  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2343                 (__v4si)
2344                 _mm_setzero_si128 (),
2345                 (__mmask8) -1);
2346}
2347
2348static __inline__ __m128i __DEFAULT_FN_ATTRS
2349_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2350  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2351                 (__v4si) __W,
2352                 (__mmask8) __U);
2353}
2354
2355static __inline__ __m128i __DEFAULT_FN_ATTRS
2356_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2357  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2358                 (__v4si)
2359                 _mm_setzero_si128 (),
2360                 (__mmask8) __U);
2361}
2362
2363static __inline__ __m128i __DEFAULT_FN_ATTRS
2364_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2365  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2366                (__v4si) __W,
2367                (__mmask8) __U);
2368}
2369
2370static __inline__ __m128i __DEFAULT_FN_ATTRS
2371_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2372  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2373                (__v4si)
2374                _mm_setzero_si128 (),
2375                (__mmask8) __U);
2376}
2377
2378static __inline__ __m256i __DEFAULT_FN_ATTRS
2379_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2380  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2381                (__v8si) __W,
2382                (__mmask8) __U);
2383}
2384
2385static __inline__ __m256i __DEFAULT_FN_ATTRS
2386_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2387  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2388                (__v8si)
2389                _mm256_setzero_si256 (),
2390                (__mmask8) __U);
2391}
2392
2393static __inline__ __m128d __DEFAULT_FN_ATTRS
2394_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2395  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2396                (__v2df) __W,
2397                (__mmask8) __U);
2398}
2399
2400static __inline__ __m128d __DEFAULT_FN_ATTRS
2401_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2402  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2403                (__v2df)
2404                _mm_setzero_pd (),
2405                (__mmask8) __U);
2406}
2407
2408static __inline__ __m256d __DEFAULT_FN_ATTRS
2409_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2410  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2411                (__v4df) __W,
2412                (__mmask8) __U);
2413}
2414
2415static __inline__ __m256d __DEFAULT_FN_ATTRS
2416_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2417  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2418                (__v4df)
2419                _mm256_setzero_pd (),
2420                (__mmask8) __U);
2421}
2422
2423static __inline__ __m128i __DEFAULT_FN_ATTRS
2424_mm_cvtps_epu32 (__m128 __A) {
2425  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2426                 (__v4si)
2427                 _mm_setzero_si128 (),
2428                 (__mmask8) -1);
2429}
2430
2431static __inline__ __m128i __DEFAULT_FN_ATTRS
2432_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2433  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2434                 (__v4si) __W,
2435                 (__mmask8) __U);
2436}
2437
2438static __inline__ __m128i __DEFAULT_FN_ATTRS
2439_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2440  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2441                 (__v4si)
2442                 _mm_setzero_si128 (),
2443                 (__mmask8) __U);
2444}
2445
2446static __inline__ __m256i __DEFAULT_FN_ATTRS
2447_mm256_cvtps_epu32 (__m256 __A) {
2448  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2449                 (__v8si)
2450                 _mm256_setzero_si256 (),
2451                 (__mmask8) -1);
2452}
2453
2454static __inline__ __m256i __DEFAULT_FN_ATTRS
2455_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2456  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2457                 (__v8si) __W,
2458                 (__mmask8) __U);
2459}
2460
2461static __inline__ __m256i __DEFAULT_FN_ATTRS
2462_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2463  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2464                 (__v8si)
2465                 _mm256_setzero_si256 (),
2466                 (__mmask8) __U);
2467}
2468
2469static __inline__ __m128i __DEFAULT_FN_ATTRS
2470_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2471  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2472                 (__v4si) __W,
2473                 (__mmask8) __U);
2474}
2475
2476static __inline__ __m128i __DEFAULT_FN_ATTRS
2477_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2478  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2479                 (__v4si)
2480                 _mm_setzero_si128 (),
2481                 (__mmask8) __U);
2482}
2483
2484static __inline__ __m128i __DEFAULT_FN_ATTRS
2485_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2486  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2487                 (__v4si) __W,
2488                 (__mmask8) __U);
2489}
2490
2491static __inline__ __m128i __DEFAULT_FN_ATTRS
2492_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2493  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2494                 (__v4si)
2495                 _mm_setzero_si128 (),
2496                 (__mmask8) __U);
2497}
2498
2499static __inline__ __m128i __DEFAULT_FN_ATTRS
2500_mm_cvttpd_epu32 (__m128d __A) {
2501  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2502                  (__v4si)
2503                  _mm_setzero_si128 (),
2504                  (__mmask8) -1);
2505}
2506
2507static __inline__ __m128i __DEFAULT_FN_ATTRS
2508_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2509  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2510                  (__v4si) __W,
2511                  (__mmask8) __U);
2512}
2513
2514static __inline__ __m128i __DEFAULT_FN_ATTRS
2515_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2516  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2517                  (__v4si)
2518                  _mm_setzero_si128 (),
2519                  (__mmask8) __U);
2520}
2521
2522static __inline__ __m128i __DEFAULT_FN_ATTRS
2523_mm256_cvttpd_epu32 (__m256d __A) {
2524  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2525                  (__v4si)
2526                  _mm_setzero_si128 (),
2527                  (__mmask8) -1);
2528}
2529
2530static __inline__ __m128i __DEFAULT_FN_ATTRS
2531_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2532  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2533                  (__v4si) __W,
2534                  (__mmask8) __U);
2535}
2536
2537static __inline__ __m128i __DEFAULT_FN_ATTRS
2538_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2539  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2540                  (__v4si)
2541                  _mm_setzero_si128 (),
2542                  (__mmask8) __U);
2543}
2544
2545static __inline__ __m128i __DEFAULT_FN_ATTRS
2546_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2547  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2548                 (__v4si) __W,
2549                 (__mmask8) __U);
2550}
2551
2552static __inline__ __m128i __DEFAULT_FN_ATTRS
2553_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2554  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2555                 (__v4si)
2556                 _mm_setzero_si128 (),
2557                 (__mmask8) __U);
2558}
2559
2560static __inline__ __m256i __DEFAULT_FN_ATTRS
2561_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2562  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2563                 (__v8si) __W,
2564                 (__mmask8) __U);
2565}
2566
2567static __inline__ __m256i __DEFAULT_FN_ATTRS
2568_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2569  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2570                 (__v8si)
2571                 _mm256_setzero_si256 (),
2572                 (__mmask8) __U);
2573}
2574
2575static __inline__ __m128i __DEFAULT_FN_ATTRS
2576_mm_cvttps_epu32 (__m128 __A) {
2577  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2578                  (__v4si)
2579                  _mm_setzero_si128 (),
2580                  (__mmask8) -1);
2581}
2582
2583static __inline__ __m128i __DEFAULT_FN_ATTRS
2584_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2585  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2586                  (__v4si) __W,
2587                  (__mmask8) __U);
2588}
2589
2590static __inline__ __m128i __DEFAULT_FN_ATTRS
2591_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2592  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2593                  (__v4si)
2594                  _mm_setzero_si128 (),
2595                  (__mmask8) __U);
2596}
2597
2598static __inline__ __m256i __DEFAULT_FN_ATTRS
2599_mm256_cvttps_epu32 (__m256 __A) {
2600  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2601                  (__v8si)
2602                  _mm256_setzero_si256 (),
2603                  (__mmask8) -1);
2604}
2605
2606static __inline__ __m256i __DEFAULT_FN_ATTRS
2607_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2608  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2609                  (__v8si) __W,
2610                  (__mmask8) __U);
2611}
2612
2613static __inline__ __m256i __DEFAULT_FN_ATTRS
2614_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2615  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2616                  (__v8si)
2617                  _mm256_setzero_si256 (),
2618                  (__mmask8) __U);
2619}
2620
2621static __inline__ __m128d __DEFAULT_FN_ATTRS
2622_mm_cvtepu32_pd (__m128i __A) {
2623  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2624                 (__v2df)
2625                 _mm_setzero_pd (),
2626                 (__mmask8) -1);
2627}
2628
2629static __inline__ __m128d __DEFAULT_FN_ATTRS
2630_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2631  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2632                 (__v2df) __W,
2633                 (__mmask8) __U);
2634}
2635
2636static __inline__ __m128d __DEFAULT_FN_ATTRS
2637_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2638  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2639                 (__v2df)
2640                 _mm_setzero_pd (),
2641                 (__mmask8) __U);
2642}
2643
2644static __inline__ __m256d __DEFAULT_FN_ATTRS
2645_mm256_cvtepu32_pd (__m128i __A) {
2646  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2647                 (__v4df)
2648                 _mm256_setzero_pd (),
2649                 (__mmask8) -1);
2650}
2651
2652static __inline__ __m256d __DEFAULT_FN_ATTRS
2653_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2654  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2655                 (__v4df) __W,
2656                 (__mmask8) __U);
2657}
2658
2659static __inline__ __m256d __DEFAULT_FN_ATTRS
2660_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2661  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2662                 (__v4df)
2663                 _mm256_setzero_pd (),
2664                 (__mmask8) __U);
2665}
2666
2667static __inline__ __m128 __DEFAULT_FN_ATTRS
2668_mm_cvtepu32_ps (__m128i __A) {
2669  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2670                (__v4sf)
2671                _mm_setzero_ps (),
2672                (__mmask8) -1);
2673}
2674
2675static __inline__ __m128 __DEFAULT_FN_ATTRS
2676_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2677  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2678                (__v4sf) __W,
2679                (__mmask8) __U);
2680}
2681
2682static __inline__ __m128 __DEFAULT_FN_ATTRS
2683_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2684  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2685                (__v4sf)
2686                _mm_setzero_ps (),
2687                (__mmask8) __U);
2688}
2689
2690static __inline__ __m256 __DEFAULT_FN_ATTRS
2691_mm256_cvtepu32_ps (__m256i __A) {
2692  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2693                (__v8sf)
2694                _mm256_setzero_ps (),
2695                (__mmask8) -1);
2696}
2697
2698static __inline__ __m256 __DEFAULT_FN_ATTRS
2699_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2700  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2701                (__v8sf) __W,
2702                (__mmask8) __U);
2703}
2704
2705static __inline__ __m256 __DEFAULT_FN_ATTRS
2706_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2707  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2708                (__v8sf)
2709                _mm256_setzero_ps (),
2710                (__mmask8) __U);
2711}
2712
2713static __inline__ __m128d __DEFAULT_FN_ATTRS
2714_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2715  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
2716                (__v2df) __B,
2717                (__v2df) __W,
2718                (__mmask8) __U);
2719}
2720
2721static __inline__ __m128d __DEFAULT_FN_ATTRS
2722_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B) {
2723  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
2724                (__v2df) __B,
2725                (__v2df)
2726                _mm_setzero_pd (),
2727                (__mmask8) __U);
2728}
2729
2730static __inline__ __m256d __DEFAULT_FN_ATTRS
2731_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
2732        __m256d __B) {
2733  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
2734             (__v4df) __B,
2735             (__v4df) __W,
2736             (__mmask8) __U);
2737}
2738
2739static __inline__ __m256d __DEFAULT_FN_ATTRS
2740_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B) {
2741  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
2742             (__v4df) __B,
2743             (__v4df)
2744             _mm256_setzero_pd (),
2745             (__mmask8) __U);
2746}
2747
2748static __inline__ __m128 __DEFAULT_FN_ATTRS
2749_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2750  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
2751               (__v4sf) __B,
2752               (__v4sf) __W,
2753               (__mmask8) __U);
2754}
2755
2756static __inline__ __m128 __DEFAULT_FN_ATTRS
2757_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B) {
2758  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
2759               (__v4sf) __B,
2760               (__v4sf)
2761               _mm_setzero_ps (),
2762               (__mmask8) __U);
2763}
2764
2765static __inline__ __m256 __DEFAULT_FN_ATTRS
2766_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2767  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
2768            (__v8sf) __B,
2769            (__v8sf) __W,
2770            (__mmask8) __U);
2771}
2772
2773static __inline__ __m256 __DEFAULT_FN_ATTRS
2774_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B) {
2775  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
2776            (__v8sf) __B,
2777            (__v8sf)
2778            _mm256_setzero_ps (),
2779            (__mmask8) __U);
2780}
2781
2782static __inline__ __m128d __DEFAULT_FN_ATTRS
2783_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2784  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2785                (__v2df) __W,
2786                (__mmask8) __U);
2787}
2788
2789static __inline__ __m128d __DEFAULT_FN_ATTRS
2790_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2791  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2792                 (__v2df)
2793                 _mm_setzero_pd (),
2794                 (__mmask8) __U);
2795}
2796
2797static __inline__ __m256d __DEFAULT_FN_ATTRS
2798_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2799  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2800                (__v4df) __W,
2801                (__mmask8) __U);
2802}
2803
2804static __inline__ __m256d __DEFAULT_FN_ATTRS
2805_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2806  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2807                 (__v4df)
2808                 _mm256_setzero_pd (),
2809                 (__mmask8) __U);
2810}
2811
2812static __inline__ __m128i __DEFAULT_FN_ATTRS
2813_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2814  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2815                (__v2di) __W,
2816                (__mmask8) __U);
2817}
2818
2819static __inline__ __m128i __DEFAULT_FN_ATTRS
2820_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2821  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2822                 (__v2di)
2823                 _mm_setzero_si128 (),
2824                 (__mmask8) __U);
2825}
2826
2827static __inline__ __m256i __DEFAULT_FN_ATTRS
2828_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2829  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2830                (__v4di) __W,
2831                (__mmask8) __U);
2832}
2833
2834static __inline__ __m256i __DEFAULT_FN_ATTRS
2835_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2836  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2837                 (__v4di)
2838                 _mm256_setzero_si256 (),
2839                 (__mmask8) __U);
2840}
2841
2842static __inline__ __m128d __DEFAULT_FN_ATTRS
2843_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2844  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2845              (__v2df) __W,
2846              (__mmask8)
2847              __U);
2848}
2849
2850static __inline__ __m128d __DEFAULT_FN_ATTRS
2851_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2852  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2853               (__v2df)
2854               _mm_setzero_pd (),
2855               (__mmask8)
2856               __U);
2857}
2858
2859static __inline__ __m256d __DEFAULT_FN_ATTRS
2860_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2861  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2862              (__v4df) __W,
2863              (__mmask8)
2864              __U);
2865}
2866
2867static __inline__ __m256d __DEFAULT_FN_ATTRS
2868_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2869  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2870               (__v4df)
2871               _mm256_setzero_pd (),
2872               (__mmask8)
2873               __U);
2874}
2875
2876static __inline__ __m128i __DEFAULT_FN_ATTRS
2877_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2878  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2879              (__v2di) __W,
2880              (__mmask8)
2881              __U);
2882}
2883
2884static __inline__ __m128i __DEFAULT_FN_ATTRS
2885_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2886  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2887               (__v2di)
2888               _mm_setzero_si128 (),
2889               (__mmask8)
2890               __U);
2891}
2892
2893static __inline__ __m256i __DEFAULT_FN_ATTRS
2894_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2895             void const *__P) {
2896  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2897              (__v4di) __W,
2898              (__mmask8)
2899              __U);
2900}
2901
2902static __inline__ __m256i __DEFAULT_FN_ATTRS
2903_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2904  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2905               (__v4di)
2906               _mm256_setzero_si256 (),
2907               (__mmask8)
2908               __U);
2909}
2910
2911static __inline__ __m128 __DEFAULT_FN_ATTRS
2912_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2913  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2914                   (__v4sf) __W,
2915                   (__mmask8) __U);
2916}
2917
2918static __inline__ __m128 __DEFAULT_FN_ATTRS
2919_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2920  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2921              (__v4sf)
2922              _mm_setzero_ps (),
2923              (__mmask8)
2924              __U);
2925}
2926
2927static __inline__ __m256 __DEFAULT_FN_ATTRS
2928_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2929  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2930                   (__v8sf) __W,
2931                   (__mmask8) __U);
2932}
2933
2934static __inline__ __m256 __DEFAULT_FN_ATTRS
2935_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2936  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2937              (__v8sf)
2938              _mm256_setzero_ps (),
2939              (__mmask8)
2940              __U);
2941}
2942
2943static __inline__ __m128i __DEFAULT_FN_ATTRS
2944_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2945  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2946              (__v4si) __W,
2947              (__mmask8)
2948              __U);
2949}
2950
2951static __inline__ __m128i __DEFAULT_FN_ATTRS
2952_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2953  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2954               (__v4si)
2955               _mm_setzero_si128 (),
2956               (__mmask8)     __U);
2957}
2958
2959static __inline__ __m256i __DEFAULT_FN_ATTRS
2960_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2961             void const *__P) {
2962  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2963              (__v8si) __W,
2964              (__mmask8)
2965              __U);
2966}
2967
2968static __inline__ __m256i __DEFAULT_FN_ATTRS
2969_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2970  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2971               (__v8si)
2972               _mm256_setzero_si256 (),
2973               (__mmask8)
2974               __U);
2975}
2976
2977static __inline__ __m128 __DEFAULT_FN_ATTRS
2978_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2979  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2980               (__v4sf) __W,
2981               (__mmask8) __U);
2982}
2983
2984static __inline__ __m128 __DEFAULT_FN_ATTRS
2985_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2986  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2987                (__v4sf)
2988                _mm_setzero_ps (),
2989                (__mmask8) __U);
2990}
2991
2992static __inline__ __m256 __DEFAULT_FN_ATTRS
2993_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2994  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2995               (__v8sf) __W,
2996               (__mmask8) __U);
2997}
2998
2999static __inline__ __m256 __DEFAULT_FN_ATTRS
3000_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
3001  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
3002                (__v8sf)
3003                _mm256_setzero_ps (),
3004                (__mmask8) __U);
3005}
3006
3007static __inline__ __m128i __DEFAULT_FN_ATTRS
3008_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
3009  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
3010                (__v4si) __W,
3011                (__mmask8) __U);
3012}
3013
3014static __inline__ __m128i __DEFAULT_FN_ATTRS
3015_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
3016  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
3017                 (__v4si)
3018                 _mm_setzero_si128 (),
3019                 (__mmask8) __U);
3020}
3021
3022static __inline__ __m256i __DEFAULT_FN_ATTRS
3023_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
3024  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
3025                (__v8si) __W,
3026                (__mmask8) __U);
3027}
3028
3029static __inline__ __m256i __DEFAULT_FN_ATTRS
3030_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
3031  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
3032                 (__v8si)
3033                 _mm256_setzero_si256 (),
3034                 (__mmask8) __U);
3035}
3036
3037static __inline__ __m128d __DEFAULT_FN_ATTRS
3038_mm_getexp_pd (__m128d __A) {
3039  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3040                (__v2df)
3041                _mm_setzero_pd (),
3042                (__mmask8) -1);
3043}
3044
3045static __inline__ __m128d __DEFAULT_FN_ATTRS
3046_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
3047  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3048                (__v2df) __W,
3049                (__mmask8) __U);
3050}
3051
3052static __inline__ __m128d __DEFAULT_FN_ATTRS
3053_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
3054  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3055                (__v2df)
3056                _mm_setzero_pd (),
3057                (__mmask8) __U);
3058}
3059
3060static __inline__ __m256d __DEFAULT_FN_ATTRS
3061_mm256_getexp_pd (__m256d __A) {
3062  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3063                (__v4df)
3064                _mm256_setzero_pd (),
3065                (__mmask8) -1);
3066}
3067
3068static __inline__ __m256d __DEFAULT_FN_ATTRS
3069_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
3070  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3071                (__v4df) __W,
3072                (__mmask8) __U);
3073}
3074
3075static __inline__ __m256d __DEFAULT_FN_ATTRS
3076_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
3077  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3078                (__v4df)
3079                _mm256_setzero_pd (),
3080                (__mmask8) __U);
3081}
3082
3083static __inline__ __m128 __DEFAULT_FN_ATTRS
3084_mm_getexp_ps (__m128 __A) {
3085  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3086               (__v4sf)
3087               _mm_setzero_ps (),
3088               (__mmask8) -1);
3089}
3090
3091static __inline__ __m128 __DEFAULT_FN_ATTRS
3092_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
3093  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3094               (__v4sf) __W,
3095               (__mmask8) __U);
3096}
3097
3098static __inline__ __m128 __DEFAULT_FN_ATTRS
3099_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
3100  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3101               (__v4sf)
3102               _mm_setzero_ps (),
3103               (__mmask8) __U);
3104}
3105
3106static __inline__ __m256 __DEFAULT_FN_ATTRS
3107_mm256_getexp_ps (__m256 __A) {
3108  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3109               (__v8sf)
3110               _mm256_setzero_ps (),
3111               (__mmask8) -1);
3112}
3113
3114static __inline__ __m256 __DEFAULT_FN_ATTRS
3115_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
3116  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3117               (__v8sf) __W,
3118               (__mmask8) __U);
3119}
3120
3121static __inline__ __m256 __DEFAULT_FN_ATTRS
3122_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
3123  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3124               (__v8sf)
3125               _mm256_setzero_ps (),
3126               (__mmask8) __U);
3127}
3128
3129static __inline__ __m128d __DEFAULT_FN_ATTRS
3130_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3131  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
3132                (__v2df) __B,
3133                (__v2df) __W,
3134                (__mmask8) __U);
3135}
3136
3137static __inline__ __m128d __DEFAULT_FN_ATTRS
3138_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3139  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
3140                (__v2df) __B,
3141                (__v2df)
3142                _mm_setzero_pd (),
3143                (__mmask8) __U);
3144}
3145
3146static __inline__ __m256d __DEFAULT_FN_ATTRS
3147_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
3148        __m256d __B) {
3149  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
3150             (__v4df) __B,
3151             (__v4df) __W,
3152             (__mmask8) __U);
3153}
3154
3155static __inline__ __m256d __DEFAULT_FN_ATTRS
3156_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3157  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
3158             (__v4df) __B,
3159             (__v4df)
3160             _mm256_setzero_pd (),
3161             (__mmask8) __U);
3162}
3163
3164static __inline__ __m128 __DEFAULT_FN_ATTRS
3165_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3166  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
3167               (__v4sf) __B,
3168               (__v4sf) __W,
3169               (__mmask8) __U);
3170}
3171
3172static __inline__ __m128 __DEFAULT_FN_ATTRS
3173_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3174  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
3175               (__v4sf) __B,
3176               (__v4sf)
3177               _mm_setzero_ps (),
3178               (__mmask8) __U);
3179}
3180
3181static __inline__ __m256 __DEFAULT_FN_ATTRS
3182_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3183  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
3184            (__v8sf) __B,
3185            (__v8sf) __W,
3186            (__mmask8) __U);
3187}
3188
3189static __inline__ __m256 __DEFAULT_FN_ATTRS
3190_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3191  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
3192            (__v8sf) __B,
3193            (__v8sf)
3194            _mm256_setzero_ps (),
3195            (__mmask8) __U);
3196}
3197
3198static __inline__ __m128d __DEFAULT_FN_ATTRS
3199_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3200  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
3201                (__v2df) __B,
3202                (__v2df) __W,
3203                (__mmask8) __U);
3204}
3205
3206static __inline__ __m128d __DEFAULT_FN_ATTRS
3207_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3208  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
3209                (__v2df) __B,
3210                (__v2df)
3211                _mm_setzero_pd (),
3212                (__mmask8) __U);
3213}
3214
3215static __inline__ __m256d __DEFAULT_FN_ATTRS
3216_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
3217        __m256d __B) {
3218  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
3219             (__v4df) __B,
3220             (__v4df) __W,
3221             (__mmask8) __U);
3222}
3223
3224static __inline__ __m256d __DEFAULT_FN_ATTRS
3225_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3226  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
3227             (__v4df) __B,
3228             (__v4df)
3229             _mm256_setzero_pd (),
3230             (__mmask8) __U);
3231}
3232
3233static __inline__ __m128 __DEFAULT_FN_ATTRS
3234_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3235  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
3236               (__v4sf) __B,
3237               (__v4sf) __W,
3238               (__mmask8) __U);
3239}
3240
3241static __inline__ __m128 __DEFAULT_FN_ATTRS
3242_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3243  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
3244               (__v4sf) __B,
3245               (__v4sf)
3246               _mm_setzero_ps (),
3247               (__mmask8) __U);
3248}
3249
3250static __inline__ __m256 __DEFAULT_FN_ATTRS
3251_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3252  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
3253            (__v8sf) __B,
3254            (__v8sf) __W,
3255            (__mmask8) __U);
3256}
3257
3258static __inline__ __m256 __DEFAULT_FN_ATTRS
3259_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3260  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
3261            (__v8sf) __B,
3262            (__v8sf)
3263            _mm256_setzero_ps (),
3264            (__mmask8) __U);
3265}
3266
3267static __inline__ __m128d __DEFAULT_FN_ATTRS
3268_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3269  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
3270                (__v2df) __B,
3271                (__v2df) __W,
3272                (__mmask8) __U);
3273}
3274
3275static __inline__ __m128d __DEFAULT_FN_ATTRS
3276_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3277  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
3278                (__v2df) __B,
3279                (__v2df)
3280                _mm_setzero_pd (),
3281                (__mmask8) __U);
3282}
3283
3284static __inline__ __m256d __DEFAULT_FN_ATTRS
3285_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
3286        __m256d __B) {
3287  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
3288             (__v4df) __B,
3289             (__v4df) __W,
3290             (__mmask8) __U);
3291}
3292
3293static __inline__ __m256d __DEFAULT_FN_ATTRS
3294_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3295  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
3296             (__v4df) __B,
3297             (__v4df)
3298             _mm256_setzero_pd (),
3299             (__mmask8) __U);
3300}
3301
3302static __inline__ __m128 __DEFAULT_FN_ATTRS
3303_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3304  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
3305               (__v4sf) __B,
3306               (__v4sf) __W,
3307               (__mmask8) __U);
3308}
3309
3310static __inline__ __m128 __DEFAULT_FN_ATTRS
3311_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3312  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
3313               (__v4sf) __B,
3314               (__v4sf)
3315               _mm_setzero_ps (),
3316               (__mmask8) __U);
3317}
3318
3319static __inline__ __m256 __DEFAULT_FN_ATTRS
3320_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3321  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
3322            (__v8sf) __B,
3323            (__v8sf) __W,
3324            (__mmask8) __U);
3325}
3326
3327static __inline__ __m256 __DEFAULT_FN_ATTRS
3328_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3329  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
3330            (__v8sf) __B,
3331            (__v8sf)
3332            _mm256_setzero_ps (),
3333            (__mmask8) __U);
3334}
3335
3336static __inline__ __m128i __DEFAULT_FN_ATTRS
3337_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
3338  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
3339             (__v4si) __W,
3340             (__mmask8) __U);
3341}
3342
3343static __inline__ __m128i __DEFAULT_FN_ATTRS
3344_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A) {
3345  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
3346             (__v4si)
3347             _mm_setzero_si128 (),
3348             (__mmask8) __U);
3349}
3350
3351static __inline__ __m256i __DEFAULT_FN_ATTRS
3352_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
3353  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
3354             (__v8si) __W,
3355             (__mmask8) __U);
3356}
3357
3358static __inline__ __m256i __DEFAULT_FN_ATTRS
3359_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A) {
3360  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
3361             (__v8si)
3362             _mm256_setzero_si256 (),
3363             (__mmask8) __U);
3364}
3365
3366static __inline__ __m128i __DEFAULT_FN_ATTRS
3367_mm_abs_epi64 (__m128i __A) {
3368  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3369             (__v2di)
3370             _mm_setzero_si128 (),
3371             (__mmask8) -1);
3372}
3373
3374static __inline__ __m128i __DEFAULT_FN_ATTRS
3375_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3376  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3377             (__v2di) __W,
3378             (__mmask8) __U);
3379}
3380
3381static __inline__ __m128i __DEFAULT_FN_ATTRS
3382_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3383  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3384             (__v2di)
3385             _mm_setzero_si128 (),
3386             (__mmask8) __U);
3387}
3388
3389static __inline__ __m256i __DEFAULT_FN_ATTRS
3390_mm256_abs_epi64 (__m256i __A) {
3391  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3392             (__v4di)
3393             _mm256_setzero_si256 (),
3394             (__mmask8) -1);
3395}
3396
3397static __inline__ __m256i __DEFAULT_FN_ATTRS
3398_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3399  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3400             (__v4di) __W,
3401             (__mmask8) __U);
3402}
3403
3404static __inline__ __m256i __DEFAULT_FN_ATTRS
3405_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3406  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3407             (__v4di)
3408             _mm256_setzero_si256 (),
3409             (__mmask8) __U);
3410}
3411
3412static __inline__ __m128i __DEFAULT_FN_ATTRS
3413_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
3414  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
3415              (__v4si) __B,
3416              (__v4si)
3417              _mm_setzero_si128 (),
3418              __M);
3419}
3420
3421static __inline__ __m128i __DEFAULT_FN_ATTRS
3422_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
3423        __m128i __B) {
3424  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
3425              (__v4si) __B,
3426              (__v4si) __W, __M);
3427}
3428
3429static __inline__ __m256i __DEFAULT_FN_ATTRS
3430_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
3431  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
3432              (__v8si) __B,
3433              (__v8si)
3434              _mm256_setzero_si256 (),
3435              __M);
3436}
3437
3438static __inline__ __m256i __DEFAULT_FN_ATTRS
3439_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
3440           __m256i __B) {
3441  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
3442              (__v8si) __B,
3443              (__v8si) __W, __M);
3444}
3445
3446static __inline__ __m128i __DEFAULT_FN_ATTRS
3447_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3448  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3449              (__v2di) __B,
3450              (__v2di)
3451              _mm_setzero_si128 (),
3452              __M);
3453}
3454
3455static __inline__ __m128i __DEFAULT_FN_ATTRS
3456_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3457        __m128i __B) {
3458  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3459              (__v2di) __B,
3460              (__v2di) __W, __M);
3461}
3462
3463static __inline__ __m128i __DEFAULT_FN_ATTRS
3464_mm_max_epi64 (__m128i __A, __m128i __B) {
3465  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3466              (__v2di) __B,
3467              (__v2di)
3468              _mm_setzero_si128 (),
3469              (__mmask8) -1);
3470}
3471
3472static __inline__ __m256i __DEFAULT_FN_ATTRS
3473_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3474  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3475              (__v4di) __B,
3476              (__v4di)
3477              _mm256_setzero_si256 (),
3478              __M);
3479}
3480
3481static __inline__ __m256i __DEFAULT_FN_ATTRS
3482_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3483           __m256i __B) {
3484  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3485              (__v4di) __B,
3486              (__v4di) __W, __M);
3487}
3488
3489static __inline__ __m256i __DEFAULT_FN_ATTRS
3490_mm256_max_epi64 (__m256i __A, __m256i __B) {
3491  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3492              (__v4di) __B,
3493              (__v4di)
3494              _mm256_setzero_si256 (),
3495              (__mmask8) -1);
3496}
3497
3498static __inline__ __m128i __DEFAULT_FN_ATTRS
3499_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B) {
3500  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
3501              (__v4si) __B,
3502              (__v4si)
3503              _mm_setzero_si128 (),
3504              __M);
3505}
3506
3507static __inline__ __m128i __DEFAULT_FN_ATTRS
3508_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
3509        __m128i __B) {
3510  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
3511              (__v4si) __B,
3512              (__v4si) __W, __M);
3513}
3514
3515static __inline__ __m256i __DEFAULT_FN_ATTRS
3516_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B) {
3517  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
3518              (__v8si) __B,
3519              (__v8si)
3520              _mm256_setzero_si256 (),
3521              __M);
3522}
3523
3524static __inline__ __m256i __DEFAULT_FN_ATTRS
3525_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
3526           __m256i __B) {
3527  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
3528              (__v8si) __B,
3529              (__v8si) __W, __M);
3530}
3531
3532static __inline__ __m128i __DEFAULT_FN_ATTRS
3533_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3534  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3535              (__v2di) __B,
3536              (__v2di)
3537              _mm_setzero_si128 (),
3538              __M);
3539}
3540
3541static __inline__ __m128i __DEFAULT_FN_ATTRS
3542_mm_max_epu64 (__m128i __A, __m128i __B) {
3543  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3544              (__v2di) __B,
3545              (__v2di)
3546              _mm_setzero_si128 (),
3547              (__mmask8) -1);
3548}
3549
3550static __inline__ __m128i __DEFAULT_FN_ATTRS
3551_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3552        __m128i __B) {
3553  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3554              (__v2di) __B,
3555              (__v2di) __W, __M);
3556}
3557
3558static __inline__ __m256i __DEFAULT_FN_ATTRS
3559_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3560  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3561              (__v4di) __B,
3562              (__v4di)
3563              _mm256_setzero_si256 (),
3564              __M);
3565}
3566
3567static __inline__ __m256i __DEFAULT_FN_ATTRS
3568_mm256_max_epu64 (__m256i __A, __m256i __B) {
3569  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3570              (__v4di) __B,
3571              (__v4di)
3572              _mm256_setzero_si256 (),
3573              (__mmask8) -1);
3574}
3575
3576static __inline__ __m256i __DEFAULT_FN_ATTRS
3577_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3578           __m256i __B) {
3579  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3580              (__v4di) __B,
3581              (__v4di) __W, __M);
3582}
3583
3584static __inline__ __m128i __DEFAULT_FN_ATTRS
3585_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
3586  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
3587              (__v4si) __B,
3588              (__v4si)
3589              _mm_setzero_si128 (),
3590              __M);
3591}
3592
3593static __inline__ __m128i __DEFAULT_FN_ATTRS
3594_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
3595        __m128i __B) {
3596  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
3597              (__v4si) __B,
3598              (__v4si) __W, __M);
3599}
3600
3601static __inline__ __m256i __DEFAULT_FN_ATTRS
3602_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
3603  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
3604              (__v8si) __B,
3605              (__v8si)
3606              _mm256_setzero_si256 (),
3607              __M);
3608}
3609
3610static __inline__ __m256i __DEFAULT_FN_ATTRS
3611_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
3612           __m256i __B) {
3613  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
3614              (__v8si) __B,
3615              (__v8si) __W, __M);
3616}
3617
3618static __inline__ __m128i __DEFAULT_FN_ATTRS
3619_mm_min_epi64 (__m128i __A, __m128i __B) {
3620  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3621              (__v2di) __B,
3622              (__v2di)
3623              _mm_setzero_si128 (),
3624              (__mmask8) -1);
3625}
3626
3627static __inline__ __m128i __DEFAULT_FN_ATTRS
3628_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3629        __m128i __B) {
3630  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3631              (__v2di) __B,
3632              (__v2di) __W, __M);
3633}
3634
3635static __inline__ __m128i __DEFAULT_FN_ATTRS
3636_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3637  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3638              (__v2di) __B,
3639              (__v2di)
3640              _mm_setzero_si128 (),
3641              __M);
3642}
3643
3644static __inline__ __m256i __DEFAULT_FN_ATTRS
3645_mm256_min_epi64 (__m256i __A, __m256i __B) {
3646  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3647              (__v4di) __B,
3648              (__v4di)
3649              _mm256_setzero_si256 (),
3650              (__mmask8) -1);
3651}
3652
3653static __inline__ __m256i __DEFAULT_FN_ATTRS
3654_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3655           __m256i __B) {
3656  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3657              (__v4di) __B,
3658              (__v4di) __W, __M);
3659}
3660
3661static __inline__ __m256i __DEFAULT_FN_ATTRS
3662_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3663  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3664              (__v4di) __B,
3665              (__v4di)
3666              _mm256_setzero_si256 (),
3667              __M);
3668}
3669
3670static __inline__ __m128i __DEFAULT_FN_ATTRS
3671_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B) {
3672  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
3673              (__v4si) __B,
3674              (__v4si)
3675              _mm_setzero_si128 (),
3676              __M);
3677}
3678
3679static __inline__ __m128i __DEFAULT_FN_ATTRS
3680_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
3681        __m128i __B) {
3682  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
3683              (__v4si) __B,
3684              (__v4si) __W, __M);
3685}
3686
3687static __inline__ __m256i __DEFAULT_FN_ATTRS
3688_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B) {
3689  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
3690              (__v8si) __B,
3691              (__v8si)
3692              _mm256_setzero_si256 (),
3693              __M);
3694}
3695
3696static __inline__ __m256i __DEFAULT_FN_ATTRS
3697_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
3698           __m256i __B) {
3699  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
3700              (__v8si) __B,
3701              (__v8si) __W, __M);
3702}
3703
3704static __inline__ __m128i __DEFAULT_FN_ATTRS
3705_mm_min_epu64 (__m128i __A, __m128i __B) {
3706  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3707              (__v2di) __B,
3708              (__v2di)
3709              _mm_setzero_si128 (),
3710              (__mmask8) -1);
3711}
3712
3713static __inline__ __m128i __DEFAULT_FN_ATTRS
3714_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3715        __m128i __B) {
3716  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3717              (__v2di) __B,
3718              (__v2di) __W, __M);
3719}
3720
3721static __inline__ __m128i __DEFAULT_FN_ATTRS
3722_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3723  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3724              (__v2di) __B,
3725              (__v2di)
3726              _mm_setzero_si128 (),
3727              __M);
3728}
3729
3730static __inline__ __m256i __DEFAULT_FN_ATTRS
3731_mm256_min_epu64 (__m256i __A, __m256i __B) {
3732  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3733              (__v4di) __B,
3734              (__v4di)
3735              _mm256_setzero_si256 (),
3736              (__mmask8) -1);
3737}
3738
3739static __inline__ __m256i __DEFAULT_FN_ATTRS
3740_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3741           __m256i __B) {
3742  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3743              (__v4di) __B,
3744              (__v4di) __W, __M);
3745}
3746
3747static __inline__ __m256i __DEFAULT_FN_ATTRS
3748_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3749  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3750              (__v4di) __B,
3751              (__v4di)
3752              _mm256_setzero_si256 (),
3753              __M);
3754}
3755
3756#define _mm_roundscale_pd(A, imm) __extension__ ({ \
3757  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3758                                              (int)(imm), \
3759                                              (__v2df)_mm_setzero_pd(), \
3760                                              (__mmask8)-1); })
3761
3762
3763#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3764  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3765                                              (int)(imm), \
3766                                              (__v2df)(__m128d)(W), \
3767                                              (__mmask8)(U)); })
3768
3769
3770#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3771  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3772                                              (int)(imm), \
3773                                              (__v2df)_mm_setzero_pd(), \
3774                                              (__mmask8)(U)); })
3775
3776
3777#define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3778  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3779                                              (int)(imm), \
3780                                              (__v4df)_mm256_setzero_pd(), \
3781                                              (__mmask8)-1); })
3782
3783
3784#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3785  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3786                                              (int)(imm), \
3787                                              (__v4df)(__m256d)(W), \
3788                                              (__mmask8)(U)); })
3789
3790
3791#define _mm256_maskz_roundscale_pd(U, A, imm)  __extension__ ({ \
3792  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3793                                              (int)(imm), \
3794                                              (__v4df)_mm256_setzero_pd(), \
3795                                              (__mmask8)(U)); })
3796
3797#define _mm_roundscale_ps(A, imm)  __extension__ ({ \
3798  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3799                                             (__v4sf)_mm_setzero_ps(), \
3800                                             (__mmask8)-1); })
3801
3802
3803#define _mm_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
3804  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3805                                             (__v4sf)(__m128)(W), \
3806                                             (__mmask8)(U)); })
3807
3808
3809#define _mm_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
3810  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3811                                             (__v4sf)_mm_setzero_ps(), \
3812                                             (__mmask8)(U)); })
3813
3814#define _mm256_roundscale_ps(A, imm)  __extension__ ({ \
3815  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3816                                             (__v8sf)_mm256_setzero_ps(), \
3817                                             (__mmask8)-1); })
3818
3819#define _mm256_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
3820  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3821                                             (__v8sf)(__m256)(W), \
3822                                             (__mmask8)(U)); })
3823
3824
3825#define _mm256_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
3826  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3827                                             (__v8sf)_mm256_setzero_ps(), \
3828                                             (__mmask8)(U)); })
3829
3830static __inline__ __m128d __DEFAULT_FN_ATTRS
3831_mm_scalef_pd (__m128d __A, __m128d __B) {
3832  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3833                (__v2df) __B,
3834                (__v2df)
3835                _mm_setzero_pd (),
3836                (__mmask8) -1);
3837}
3838
3839static __inline__ __m128d __DEFAULT_FN_ATTRS
3840_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3841        __m128d __B) {
3842  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3843                (__v2df) __B,
3844                (__v2df) __W,
3845                (__mmask8) __U);
3846}
3847
3848static __inline__ __m128d __DEFAULT_FN_ATTRS
3849_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3850  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3851                (__v2df) __B,
3852                (__v2df)
3853                _mm_setzero_pd (),
3854                (__mmask8) __U);
3855}
3856
3857static __inline__ __m256d __DEFAULT_FN_ATTRS
3858_mm256_scalef_pd (__m256d __A, __m256d __B) {
3859  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3860                (__v4df) __B,
3861                (__v4df)
3862                _mm256_setzero_pd (),
3863                (__mmask8) -1);
3864}
3865
3866static __inline__ __m256d __DEFAULT_FN_ATTRS
3867_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3868           __m256d __B) {
3869  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3870                (__v4df) __B,
3871                (__v4df) __W,
3872                (__mmask8) __U);
3873}
3874
3875static __inline__ __m256d __DEFAULT_FN_ATTRS
3876_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3877  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3878                (__v4df) __B,
3879                (__v4df)
3880                _mm256_setzero_pd (),
3881                (__mmask8) __U);
3882}
3883
3884static __inline__ __m128 __DEFAULT_FN_ATTRS
3885_mm_scalef_ps (__m128 __A, __m128 __B) {
3886  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3887               (__v4sf) __B,
3888               (__v4sf)
3889               _mm_setzero_ps (),
3890               (__mmask8) -1);
3891}
3892
3893static __inline__ __m128 __DEFAULT_FN_ATTRS
3894_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3895  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3896               (__v4sf) __B,
3897               (__v4sf) __W,
3898               (__mmask8) __U);
3899}
3900
3901static __inline__ __m128 __DEFAULT_FN_ATTRS
3902_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3903  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3904               (__v4sf) __B,
3905               (__v4sf)
3906               _mm_setzero_ps (),
3907               (__mmask8) __U);
3908}
3909
3910static __inline__ __m256 __DEFAULT_FN_ATTRS
3911_mm256_scalef_ps (__m256 __A, __m256 __B) {
3912  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3913               (__v8sf) __B,
3914               (__v8sf)
3915               _mm256_setzero_ps (),
3916               (__mmask8) -1);
3917}
3918
3919static __inline__ __m256 __DEFAULT_FN_ATTRS
3920_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3921           __m256 __B) {
3922  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3923               (__v8sf) __B,
3924               (__v8sf) __W,
3925               (__mmask8) __U);
3926}
3927
3928static __inline__ __m256 __DEFAULT_FN_ATTRS
3929_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3930  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3931               (__v8sf) __B,
3932               (__v8sf)
3933               _mm256_setzero_ps (),
3934               (__mmask8) __U);
3935}
3936
3937#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3938  __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3939                               (__v2di)(__m128i)(index), \
3940                               (__v2df)(__m128d)(v1), (int)(scale)); })
3941
3942#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3943  __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3944                               (__v2di)(__m128i)(index), \
3945                               (__v2df)(__m128d)(v1), (int)(scale)); })
3946
3947#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3948  __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3949                               (__v2di)(__m128i)(index), \
3950                               (__v2di)(__m128i)(v1), (int)(scale)); })
3951
3952#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3953  __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3954                               (__v2di)(__m128i)(index), \
3955                               (__v2di)(__m128i)(v1), (int)(scale)); })
3956
3957#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3958  __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3959                               (__v4di)(__m256i)(index), \
3960                               (__v4df)(__m256d)(v1), (int)(scale)); })
3961
3962#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3963  __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3964                               (__v4di)(__m256i)(index), \
3965                               (__v4df)(__m256d)(v1), (int)(scale)); })
3966
3967#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3968  __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3969                               (__v4di)(__m256i)(index), \
3970                               (__v4di)(__m256i)(v1), (int)(scale)); })
3971
3972#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3973  __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3974                               (__v4di)(__m256i)(index), \
3975                               (__v4di)(__m256i)(v1), (int)(scale)); })
3976
3977#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3978  __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3979                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3980                               (int)(scale)); })
3981
3982#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3983  __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3984                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3985                               (int)(scale)); })
3986
3987#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3988  __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3989                               (__v2di)(__m128i)(index), \
3990                               (__v4si)(__m128i)(v1), (int)(scale)); })
3991
3992#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3993  __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3994                               (__v2di)(__m128i)(index), \
3995                               (__v4si)(__m128i)(v1), (int)(scale)); })
3996
3997#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3998  __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3999                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
4000                               (int)(scale)); })
4001
4002#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4003  __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
4004                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
4005                               (int)(scale)); })
4006
4007#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4008  __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
4009                               (__v4di)(__m256i)(index), \
4010                               (__v4si)(__m128i)(v1), (int)(scale)); })
4011
4012#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({  \
4013  __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
4014                               (__v4di)(__m256i)(index), \
4015                               (__v4si)(__m128i)(v1), (int)(scale)); })
4016
4017#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({      \
4018  __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
4019                               (__v4si)(__m128i)(index), \
4020                               (__v2df)(__m128d)(v1), (int)(scale)); })
4021
4022#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({        \
4023  __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
4024                               (__v4si)(__m128i)(index), \
4025                               (__v2df)(__m128d)(v1), (int)(scale)); })
4026
4027#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
4028  __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
4029                               (__v4si)(__m128i)(index), \
4030                               (__v2di)(__m128i)(v1), (int)(scale)); })
4031
4032#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
4033  __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
4034                               (__v4si)(__m128i)(index), \
4035                               (__v2di)(__m128i)(v1), (int)(scale)); })
4036
4037#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
4038  __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
4039                               (__v4si)(__m128i)(index), \
4040                               (__v4df)(__m256d)(v1), (int)(scale)); })
4041
4042#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
4043  __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
4044                               (__v4si)(__m128i)(index), \
4045                               (__v4df)(__m256d)(v1), (int)(scale)); })
4046
4047#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
4048  __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
4049                               (__v4si)(__m128i)(index), \
4050                               (__v4di)(__m256i)(v1), (int)(scale)); })
4051
4052#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
4053  __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
4054                               (__v4si)(__m128i)(index), \
4055                               (__v4di)(__m256i)(v1), (int)(scale)); })
4056
4057#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
4058  __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
4059                               (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
4060                               (int)(scale)); })
4061
4062#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4063  __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
4064                               (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
4065                               (int)(scale)); })
4066
4067#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4068  __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
4069                               (__v4si)(__m128i)(index), \
4070                               (__v4si)(__m128i)(v1), (int)(scale)); })
4071
4072#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4073  __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
4074                               (__v4si)(__m128i)(index), \
4075                               (__v4si)(__m128i)(v1), (int)(scale)); })
4076
4077#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
4078  __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
4079                               (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
4080                               (int)(scale)); })
4081
4082#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
4083  __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
4084                               (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
4085                               (int)(scale)); })
4086
4087#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
4088  __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
4089                               (__v8si)(__m256i)(index), \
4090                               (__v8si)(__m256i)(v1), (int)(scale)); })
4091
4092#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
4093  __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
4094                               (__v8si)(__m256i)(index), \
4095                               (__v8si)(__m256i)(v1), (int)(scale)); })
4096
4097static __inline__ __m128d __DEFAULT_FN_ATTRS
4098_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A) {
4099  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
4100              (__v2df) __W,
4101              (__mmask8) __U);
4102}
4103
4104static __inline__ __m128d __DEFAULT_FN_ATTRS
4105_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A) {
4106  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
4107              (__v2df)
4108              _mm_setzero_pd (),
4109              (__mmask8) __U);
4110}
4111
4112static __inline__ __m256d __DEFAULT_FN_ATTRS
4113_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A) {
4114  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
4115              (__v4df) __W,
4116              (__mmask8) __U);
4117}
4118
4119static __inline__ __m256d __DEFAULT_FN_ATTRS
4120_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A) {
4121  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
4122              (__v4df)
4123              _mm256_setzero_pd (),
4124              (__mmask8) __U);
4125}
4126
4127static __inline__ __m128 __DEFAULT_FN_ATTRS
4128_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A) {
4129  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
4130             (__v4sf) __W,
4131             (__mmask8) __U);
4132}
4133
4134static __inline__ __m128 __DEFAULT_FN_ATTRS
4135_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A) {
4136  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
4137             (__v4sf)
4138             _mm_setzero_ps (),
4139             (__mmask8) __U);
4140}
4141
4142static __inline__ __m256 __DEFAULT_FN_ATTRS
4143_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A) {
4144  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
4145             (__v8sf) __W,
4146             (__mmask8) __U);
4147}
4148
4149static __inline__ __m256 __DEFAULT_FN_ATTRS
4150_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A) {
4151  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
4152             (__v8sf)
4153             _mm256_setzero_ps (),
4154             (__mmask8) __U);
4155}
4156
4157static __inline__ __m128d __DEFAULT_FN_ATTRS
4158_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
4159  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
4160             (__v2df) __B,
4161             (__v2df) __W,
4162             (__mmask8) __U);
4163}
4164
4165static __inline__ __m128d __DEFAULT_FN_ATTRS
4166_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B) {
4167  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
4168             (__v2df) __B,
4169             (__v2df)
4170             _mm_setzero_pd (),
4171             (__mmask8) __U);
4172}
4173
4174static __inline__ __m256d __DEFAULT_FN_ATTRS
4175_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
4176        __m256d __B) {
4177  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
4178             (__v4df) __B,
4179             (__v4df) __W,
4180             (__mmask8) __U);
4181}
4182
4183static __inline__ __m256d __DEFAULT_FN_ATTRS
4184_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B) {
4185  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
4186             (__v4df) __B,
4187             (__v4df)
4188             _mm256_setzero_pd (),
4189             (__mmask8) __U);
4190}
4191
4192static __inline__ __m128 __DEFAULT_FN_ATTRS
4193_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
4194  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
4195            (__v4sf) __B,
4196            (__v4sf) __W,
4197            (__mmask8) __U);
4198}
4199
4200static __inline__ __m128 __DEFAULT_FN_ATTRS
4201_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B) {
4202  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
4203            (__v4sf) __B,
4204            (__v4sf)
4205            _mm_setzero_ps (),
4206            (__mmask8) __U);
4207}
4208
4209static __inline__ __m256 __DEFAULT_FN_ATTRS
4210_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
4211  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
4212            (__v8sf) __B,
4213            (__v8sf) __W,
4214            (__mmask8) __U);
4215}
4216
4217static __inline__ __m256 __DEFAULT_FN_ATTRS
4218_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B) {
4219  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
4220            (__v8sf) __B,
4221            (__v8sf)
4222            _mm256_setzero_ps (),
4223            (__mmask8) __U);
4224}
4225
4226static __inline__ __m128i __DEFAULT_FN_ATTRS
4227_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
4228            __m128i __B) {
4229  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
4230                   (__v4si) __I
4231                   /* idx */ ,
4232                   (__v4si) __B,
4233                   (__mmask8) __U);
4234}
4235
4236static __inline__ __m256i __DEFAULT_FN_ATTRS
4237_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
4238         __mmask8 __U, __m256i __B) {
4239  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
4240                   (__v8si) __I
4241                   /* idx */ ,
4242                   (__v8si) __B,
4243                   (__mmask8) __U);
4244}
4245
4246static __inline__ __m128d __DEFAULT_FN_ATTRS
4247_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
4248         __m128d __B) {
4249  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
4250              (__v2di) __I
4251              /* idx */ ,
4252              (__v2df) __B,
4253              (__mmask8)
4254              __U);
4255}
4256
4257static __inline__ __m256d __DEFAULT_FN_ATTRS
4258_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
4259            __m256d __B) {
4260  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
4261              (__v4di) __I
4262              /* idx */ ,
4263              (__v4df) __B,
4264              (__mmask8)
4265              __U);
4266}
4267
4268static __inline__ __m128 __DEFAULT_FN_ATTRS
4269_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
4270         __m128 __B) {
4271  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
4272                   (__v4si) __I
4273                   /* idx */ ,
4274                   (__v4sf) __B,
4275                   (__mmask8) __U);
4276}
4277
4278static __inline__ __m256 __DEFAULT_FN_ATTRS
4279_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
4280            __m256 __B) {
4281  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
4282                   (__v8si) __I
4283                   /* idx */ ,
4284                   (__v8sf) __B,
4285                   (__mmask8) __U);
4286}
4287
4288static __inline__ __m128i __DEFAULT_FN_ATTRS
4289_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
4290            __m128i __B) {
4291  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
4292                   (__v2di) __I
4293                   /* idx */ ,
4294                   (__v2di) __B,
4295                   (__mmask8) __U);
4296}
4297
4298static __inline__ __m256i __DEFAULT_FN_ATTRS
4299_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
4300         __mmask8 __U, __m256i __B) {
4301  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
4302                   (__v4di) __I
4303                   /* idx */ ,
4304                   (__v4di) __B,
4305                   (__mmask8) __U);
4306}
4307
4308static __inline__ __m128i __DEFAULT_FN_ATTRS
4309_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
4310  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4311                   /* idx */ ,
4312                   (__v4si) __A,
4313                   (__v4si) __B,
4314                   (__mmask8) -1);
4315}
4316
4317static __inline__ __m128i __DEFAULT_FN_ATTRS
4318_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
4319           __m128i __B) {
4320  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4321                   /* idx */ ,
4322                   (__v4si) __A,
4323                   (__v4si) __B,
4324                   (__mmask8) __U);
4325}
4326
4327static __inline__ __m128i __DEFAULT_FN_ATTRS
4328_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
4329            __m128i __B) {
4330  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
4331              /* idx */ ,
4332              (__v4si) __A,
4333              (__v4si) __B,
4334              (__mmask8)
4335              __U);
4336}
4337
4338static __inline__ __m256i __DEFAULT_FN_ATTRS
4339_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
4340  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4341                   /* idx */ ,
4342                   (__v8si) __A,
4343                   (__v8si) __B,
4344                   (__mmask8) -1);
4345}
4346
4347static __inline__ __m256i __DEFAULT_FN_ATTRS
4348_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
4349        __m256i __B) {
4350  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4351                   /* idx */ ,
4352                   (__v8si) __A,
4353                   (__v8si) __B,
4354                   (__mmask8) __U);
4355}
4356
4357static __inline__ __m256i __DEFAULT_FN_ATTRS
4358_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
4359         __m256i __I, __m256i __B) {
4360  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
4361              /* idx */ ,
4362              (__v8si) __A,
4363              (__v8si) __B,
4364              (__mmask8)
4365              __U);
4366}
4367
4368static __inline__ __m128d __DEFAULT_FN_ATTRS
4369_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
4370  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4371              /* idx */ ,
4372              (__v2df) __A,
4373              (__v2df) __B,
4374              (__mmask8) -
4375              1);
4376}
4377
4378static __inline__ __m128d __DEFAULT_FN_ATTRS
4379_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
4380        __m128d __B) {
4381  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4382              /* idx */ ,
4383              (__v2df) __A,
4384              (__v2df) __B,
4385              (__mmask8)
4386              __U);
4387}
4388
4389static __inline__ __m128d __DEFAULT_FN_ATTRS
4390_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
4391         __m128d __B) {
4392  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
4393               /* idx */ ,
4394               (__v2df) __A,
4395               (__v2df) __B,
4396               (__mmask8)
4397               __U);
4398}
4399
4400static __inline__ __m256d __DEFAULT_FN_ATTRS
4401_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
4402  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4403              /* idx */ ,
4404              (__v4df) __A,
4405              (__v4df) __B,
4406              (__mmask8) -
4407              1);
4408}
4409
4410static __inline__ __m256d __DEFAULT_FN_ATTRS
4411_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
4412           __m256d __B) {
4413  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4414              /* idx */ ,
4415              (__v4df) __A,
4416              (__v4df) __B,
4417              (__mmask8)
4418              __U);
4419}
4420
4421static __inline__ __m256d __DEFAULT_FN_ATTRS
4422_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
4423            __m256d __B) {
4424  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
4425               /* idx */ ,
4426               (__v4df) __A,
4427               (__v4df) __B,
4428               (__mmask8)
4429               __U);
4430}
4431
4432static __inline__ __m128 __DEFAULT_FN_ATTRS
4433_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
4434  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4435                   /* idx */ ,
4436                   (__v4sf) __A,
4437                   (__v4sf) __B,
4438                   (__mmask8) -1);
4439}
4440
4441static __inline__ __m128 __DEFAULT_FN_ATTRS
4442_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
4443        __m128 __B) {
4444  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4445                   /* idx */ ,
4446                   (__v4sf) __A,
4447                   (__v4sf) __B,
4448                   (__mmask8) __U);
4449}
4450
4451static __inline__ __m128 __DEFAULT_FN_ATTRS
4452_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
4453         __m128 __B) {
4454  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
4455              /* idx */ ,
4456              (__v4sf) __A,
4457              (__v4sf) __B,
4458              (__mmask8)
4459              __U);
4460}
4461
4462static __inline__ __m256 __DEFAULT_FN_ATTRS
4463_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
4464  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4465                   /* idx */ ,
4466                   (__v8sf) __A,
4467                   (__v8sf) __B,
4468                   (__mmask8) -1);
4469}
4470
4471static __inline__ __m256 __DEFAULT_FN_ATTRS
4472_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
4473           __m256 __B) {
4474  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4475                   /* idx */ ,
4476                   (__v8sf) __A,
4477                   (__v8sf) __B,
4478                   (__mmask8) __U);
4479}
4480
4481static __inline__ __m256 __DEFAULT_FN_ATTRS
4482_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
4483            __m256 __B) {
4484  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
4485              /* idx */ ,
4486              (__v8sf) __A,
4487              (__v8sf) __B,
4488              (__mmask8)
4489              __U);
4490}
4491
4492static __inline__ __m128i __DEFAULT_FN_ATTRS
4493_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
4494  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4495                   /* idx */ ,
4496                   (__v2di) __A,
4497                   (__v2di) __B,
4498                   (__mmask8) -1);
4499}
4500
4501static __inline__ __m128i __DEFAULT_FN_ATTRS
4502_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
4503           __m128i __B) {
4504  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4505                   /* idx */ ,
4506                   (__v2di) __A,
4507                   (__v2di) __B,
4508                   (__mmask8) __U);
4509}
4510
4511static __inline__ __m128i __DEFAULT_FN_ATTRS
4512_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
4513            __m128i __B) {
4514  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
4515              /* idx */ ,
4516              (__v2di) __A,
4517              (__v2di) __B,
4518              (__mmask8)
4519              __U);
4520}
4521
4522
4523static __inline__ __m256i __DEFAULT_FN_ATTRS
4524_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
4525  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4526                   /* idx */ ,
4527                   (__v4di) __A,
4528                   (__v4di) __B,
4529                   (__mmask8) -1);
4530}
4531
4532static __inline__ __m256i __DEFAULT_FN_ATTRS
4533_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
4534        __m256i __B) {
4535  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4536                   /* idx */ ,
4537                   (__v4di) __A,
4538                   (__v4di) __B,
4539                   (__mmask8) __U);
4540}
4541
4542static __inline__ __m256i __DEFAULT_FN_ATTRS
4543_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
4544         __m256i __I, __m256i __B) {
4545  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4546              /* idx */ ,
4547              (__v4di) __A,
4548              (__v4di) __B,
4549              (__mmask8)
4550              __U);
4551}
4552
4553static __inline__ __m128i __DEFAULT_FN_ATTRS
4554_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4555{
4556  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
4557                (__v4si) __W,
4558                (__mmask8) __U);
4559}
4560
4561static __inline__ __m128i __DEFAULT_FN_ATTRS
4562_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4563{
4564  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
4565                (__v4si)
4566                _mm_setzero_si128 (),
4567                (__mmask8) __U);
4568}
4569
4570static __inline__ __m256i __DEFAULT_FN_ATTRS
4571_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4572{
4573  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
4574                (__v8si) __W,
4575                (__mmask8) __U);
4576}
4577
4578static __inline__ __m256i __DEFAULT_FN_ATTRS
4579_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4580{
4581  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
4582                (__v8si)
4583                _mm256_setzero_si256 (),
4584                (__mmask8) __U);
4585}
4586
4587static __inline__ __m128i __DEFAULT_FN_ATTRS
4588_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
4589{
4590  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
4591                (__v2di) __W,
4592                (__mmask8) __U);
4593}
4594
4595static __inline__ __m128i __DEFAULT_FN_ATTRS
4596_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
4597{
4598  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
4599                (__v2di)
4600                _mm_setzero_si128 (),
4601                (__mmask8) __U);
4602}
4603
4604static __inline__ __m256i __DEFAULT_FN_ATTRS
4605_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
4606{
4607  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
4608                (__v4di) __W,
4609                (__mmask8) __U);
4610}
4611
4612static __inline__ __m256i __DEFAULT_FN_ATTRS
4613_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
4614{
4615  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
4616                (__v4di)
4617                _mm256_setzero_si256 (),
4618                (__mmask8) __U);
4619}
4620
4621static __inline__ __m128i __DEFAULT_FN_ATTRS
4622_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
4623{
4624  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
4625                (__v2di) __W,
4626                (__mmask8) __U);
4627}
4628
4629static __inline__ __m128i __DEFAULT_FN_ATTRS
4630_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
4631{
4632  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
4633                (__v2di)
4634                _mm_setzero_si128 (),
4635                (__mmask8) __U);
4636}
4637
4638static __inline__ __m256i __DEFAULT_FN_ATTRS
4639_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
4640{
4641  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
4642                (__v4di) __W,
4643                (__mmask8) __U);
4644}
4645
4646static __inline__ __m256i __DEFAULT_FN_ATTRS
4647_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
4648{
4649  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
4650                (__v4di)
4651                _mm256_setzero_si256 (),
4652                (__mmask8) __U);
4653}
4654
4655static __inline__ __m128i __DEFAULT_FN_ATTRS
4656_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4657{
4658  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
4659                (__v4si) __W,
4660                (__mmask8) __U);
4661}
4662
4663static __inline__ __m128i __DEFAULT_FN_ATTRS
4664_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4665{
4666  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
4667                (__v4si)
4668                _mm_setzero_si128 (),
4669                (__mmask8) __U);
4670}
4671
4672static __inline__ __m256i __DEFAULT_FN_ATTRS
4673_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4674{
4675  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
4676                (__v8si) __W,
4677                (__mmask8) __U);
4678}
4679
4680static __inline__ __m256i __DEFAULT_FN_ATTRS
4681_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4682{
4683  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
4684                (__v8si)
4685                _mm256_setzero_si256 (),
4686                (__mmask8) __U);
4687}
4688
4689static __inline__ __m128i __DEFAULT_FN_ATTRS
4690_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
4691{
4692  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
4693                (__v2di) __W,
4694                (__mmask8) __U);
4695}
4696
4697static __inline__ __m128i __DEFAULT_FN_ATTRS
4698_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
4699{
4700  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
4701                (__v2di)
4702                _mm_setzero_si128 (),
4703                (__mmask8) __U);
4704}
4705
4706static __inline__ __m256i __DEFAULT_FN_ATTRS
4707_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
4708{
4709  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
4710                (__v4di) __W,
4711                (__mmask8) __U);
4712}
4713
4714static __inline__ __m256i __DEFAULT_FN_ATTRS
4715_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
4716{
4717  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
4718                (__v4di)
4719                _mm256_setzero_si256 (),
4720                (__mmask8) __U);
4721}
4722
4723
4724static __inline__ __m128i __DEFAULT_FN_ATTRS
4725_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4726{
4727  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
4728                (__v4si) __W,
4729                (__mmask8) __U);
4730}
4731
4732static __inline__ __m128i __DEFAULT_FN_ATTRS
4733_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
4734{
4735  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
4736                (__v4si)
4737                _mm_setzero_si128 (),
4738                (__mmask8) __U);
4739}
4740
4741static __inline__ __m256i __DEFAULT_FN_ATTRS
4742_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4743{
4744  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
4745                (__v8si) __W,
4746                (__mmask8) __U);
4747}
4748
4749static __inline__ __m256i __DEFAULT_FN_ATTRS
4750_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
4751{
4752  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
4753                (__v8si)
4754                _mm256_setzero_si256 (),
4755                (__mmask8) __U);
4756}
4757
4758static __inline__ __m128i __DEFAULT_FN_ATTRS
4759_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
4760{
4761  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
4762                (__v2di) __W,
4763                (__mmask8) __U);
4764}
4765
4766static __inline__ __m128i __DEFAULT_FN_ATTRS
4767_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4768{
4769  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
4770                (__v2di)
4771                _mm_setzero_si128 (),
4772                (__mmask8) __U);
4773}
4774
4775static __inline__ __m256i __DEFAULT_FN_ATTRS
4776_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
4777{
4778  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
4779                (__v4di) __W,
4780                (__mmask8) __U);
4781}
4782
4783static __inline__ __m256i __DEFAULT_FN_ATTRS
4784_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4785{
4786  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
4787                (__v4di)
4788                _mm256_setzero_si256 (),
4789                (__mmask8) __U);
4790}
4791
4792static __inline__ __m128i __DEFAULT_FN_ATTRS
4793_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
4794{
4795  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
4796                (__v2di) __W,
4797                (__mmask8) __U);
4798}
4799
4800static __inline__ __m128i __DEFAULT_FN_ATTRS
4801_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
4802{
4803  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
4804                (__v2di)
4805                _mm_setzero_si128 (),
4806                (__mmask8) __U);
4807}
4808
4809static __inline__ __m256i __DEFAULT_FN_ATTRS
4810_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
4811{
4812  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
4813                (__v4di) __W,
4814                (__mmask8) __U);
4815}
4816
4817static __inline__ __m256i __DEFAULT_FN_ATTRS
4818_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
4819{
4820  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
4821                (__v4di)
4822                _mm256_setzero_si256 (),
4823                (__mmask8) __U);
4824}
4825
4826static __inline__ __m128i __DEFAULT_FN_ATTRS
4827_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
4828{
4829  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
4830                (__v4si) __W,
4831                (__mmask8) __U);
4832}
4833
4834static __inline__ __m128i __DEFAULT_FN_ATTRS
4835_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
4836{
4837  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
4838                (__v4si)
4839                _mm_setzero_si128 (),
4840                (__mmask8) __U);
4841}
4842
4843static __inline__ __m256i __DEFAULT_FN_ATTRS
4844_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4845{
4846  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
4847                (__v8si) __W,
4848                (__mmask8) __U);
4849}
4850
4851static __inline__ __m256i __DEFAULT_FN_ATTRS
4852_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
4853{
4854  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
4855                (__v8si)
4856                _mm256_setzero_si256 (),
4857                (__mmask8) __U);
4858}
4859
4860static __inline__ __m128i __DEFAULT_FN_ATTRS
4861_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
4862{
4863  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
4864                (__v2di) __W,
4865                (__mmask8) __U);
4866}
4867
4868static __inline__ __m128i __DEFAULT_FN_ATTRS
4869_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
4870{
4871  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
4872                (__v2di)
4873                _mm_setzero_si128 (),
4874                (__mmask8) __U);
4875}
4876
4877static __inline__ __m256i __DEFAULT_FN_ATTRS
4878_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
4879{
4880  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
4881                (__v4di) __W,
4882                (__mmask8) __U);
4883}
4884
4885static __inline__ __m256i __DEFAULT_FN_ATTRS
4886_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
4887{
4888  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
4889                (__v4di)
4890                _mm256_setzero_si256 (),
4891                (__mmask8) __U);
4892}
4893
4894
4895#define _mm_rol_epi32(a, b) __extension__ ({\
4896  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4897                                        (__v4si)_mm_setzero_si128(), \
4898                                        (__mmask8)-1); })
4899
4900#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4901  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4902                                        (__v4si)(__m128i)(w), (__mmask8)(u)); })
4903
4904#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4905  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4906                                        (__v4si)_mm_setzero_si128(), \
4907                                        (__mmask8)(u)); })
4908
4909#define _mm256_rol_epi32(a, b) __extension__ ({\
4910  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4911                                        (__v8si)_mm256_setzero_si256(), \
4912                                        (__mmask8)-1); })
4913
4914#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4915  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4916                                        (__v8si)(__m256i)(w), (__mmask8)(u)); })
4917
4918#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4919  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4920                                        (__v8si)_mm256_setzero_si256(), \
4921                                        (__mmask8)(u)); })
4922
4923#define _mm_rol_epi64(a, b) __extension__ ({\
4924  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4925                                        (__v2di)_mm_setzero_di(), \
4926                                        (__mmask8)-1); })
4927
4928#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4929  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4930                                        (__v2di)(__m128i)(w), (__mmask8)(u)); })
4931
4932#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4933  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4934                                        (__v2di)_mm_setzero_di(), \
4935                                        (__mmask8)(u)); })
4936
4937#define _mm256_rol_epi64(a, b) __extension__ ({\
4938  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4939                                        (__v4di)_mm256_setzero_si256(), \
4940                                        (__mmask8)-1); })
4941
4942#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4943  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4944                                        (__v4di)(__m256i)(w), (__mmask8)(u)); })
4945
4946#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4947  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4948                                        (__v4di)_mm256_setzero_si256(), \
4949                                        (__mmask8)(u)); })
4950
4951static __inline__ __m128i __DEFAULT_FN_ATTRS
4952_mm_rolv_epi32 (__m128i __A, __m128i __B)
4953{
4954  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4955              (__v4si) __B,
4956              (__v4si)
4957              _mm_setzero_si128 (),
4958              (__mmask8) -1);
4959}
4960
4961static __inline__ __m128i __DEFAULT_FN_ATTRS
4962_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4963         __m128i __B)
4964{
4965  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4966              (__v4si) __B,
4967              (__v4si) __W,
4968              (__mmask8) __U);
4969}
4970
4971static __inline__ __m128i __DEFAULT_FN_ATTRS
4972_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4973{
4974  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4975              (__v4si) __B,
4976              (__v4si)
4977              _mm_setzero_si128 (),
4978              (__mmask8) __U);
4979}
4980
4981static __inline__ __m256i __DEFAULT_FN_ATTRS
4982_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4983{
4984  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4985              (__v8si) __B,
4986              (__v8si)
4987              _mm256_setzero_si256 (),
4988              (__mmask8) -1);
4989}
4990
4991static __inline__ __m256i __DEFAULT_FN_ATTRS
4992_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4993      __m256i __B)
4994{
4995  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4996              (__v8si) __B,
4997              (__v8si) __W,
4998              (__mmask8) __U);
4999}
5000
5001static __inline__ __m256i __DEFAULT_FN_ATTRS
5002_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5003{
5004  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
5005              (__v8si) __B,
5006              (__v8si)
5007              _mm256_setzero_si256 (),
5008              (__mmask8) __U);
5009}
5010
5011static __inline__ __m128i __DEFAULT_FN_ATTRS
5012_mm_rolv_epi64 (__m128i __A, __m128i __B)
5013{
5014  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
5015              (__v2di) __B,
5016              (__v2di)
5017              _mm_setzero_di (),
5018              (__mmask8) -1);
5019}
5020
5021static __inline__ __m128i __DEFAULT_FN_ATTRS
5022_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5023         __m128i __B)
5024{
5025  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
5026              (__v2di) __B,
5027              (__v2di) __W,
5028              (__mmask8) __U);
5029}
5030
5031static __inline__ __m128i __DEFAULT_FN_ATTRS
5032_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5033{
5034  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
5035              (__v2di) __B,
5036              (__v2di)
5037              _mm_setzero_di (),
5038              (__mmask8) __U);
5039}
5040
5041static __inline__ __m256i __DEFAULT_FN_ATTRS
5042_mm256_rolv_epi64 (__m256i __A, __m256i __B)
5043{
5044  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
5045              (__v4di) __B,
5046              (__v4di)
5047              _mm256_setzero_si256 (),
5048              (__mmask8) -1);
5049}
5050
5051static __inline__ __m256i __DEFAULT_FN_ATTRS
5052_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5053      __m256i __B)
5054{
5055  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
5056              (__v4di) __B,
5057              (__v4di) __W,
5058              (__mmask8) __U);
5059}
5060
5061static __inline__ __m256i __DEFAULT_FN_ATTRS
5062_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5063{
5064  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
5065              (__v4di) __B,
5066              (__v4di)
5067              _mm256_setzero_si256 (),
5068              (__mmask8) __U);
5069}
5070
5071#define _mm_ror_epi32(A, B) __extension__ ({ \
5072  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5073                                        (__v4si)_mm_setzero_si128(), \
5074                                        (__mmask8)-1); })
5075
5076#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5077  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5078                                        (__v4si)(__m128i)(W), (__mmask8)(U)); })
5079
5080#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
5081  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
5082                                        (__v4si)_mm_setzero_si128(), \
5083                                        (__mmask8)(U)); })
5084
5085#define _mm256_ror_epi32(A, B) __extension__ ({ \
5086  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5087                                        (__v8si)_mm256_setzero_si256(), \
5088                                        (__mmask8)-1); })
5089
5090#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5091  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5092                                        (__v8si)(__m256i)(W), (__mmask8)(U)); })
5093
5094#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
5095  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
5096                                        (__v8si)_mm256_setzero_si256(), \
5097                                        (__mmask8)(U)); })
5098
5099#define _mm_ror_epi64(A, B) __extension__ ({ \
5100  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5101                                        (__v2di)_mm_setzero_di(), \
5102                                        (__mmask8)-1); })
5103
5104#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5105  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5106                                        (__v2di)(__m128i)(W), (__mmask8)(U)); })
5107
5108#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
5109  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
5110                                        (__v2di)_mm_setzero_di(), \
5111                                        (__mmask8)(U)); })
5112
5113#define _mm256_ror_epi64(A, B) __extension__ ({ \
5114  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5115                                        (__v4di)_mm256_setzero_si256(), \
5116                                        (__mmask8)-1); })
5117
5118#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5119  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5120                                        (__v4di)(__m256i)(W), (__mmask8)(U)); })
5121
5122#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
5123  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
5124                                        (__v4di)_mm256_setzero_si256(), \
5125                                        (__mmask8)(U)); })
5126
5127static __inline__ __m128i __DEFAULT_FN_ATTRS
5128_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5129        __m128i __B)
5130{
5131  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
5132             (__v4si) __B,
5133             (__v4si) __W,
5134             (__mmask8) __U);
5135}
5136
5137static __inline__ __m128i __DEFAULT_FN_ATTRS
5138_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5139{
5140  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
5141             (__v4si) __B,
5142             (__v4si)
5143             _mm_setzero_si128 (),
5144             (__mmask8) __U);
5145}
5146
5147static __inline__ __m256i __DEFAULT_FN_ATTRS
5148_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5149           __m128i __B)
5150{
5151  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
5152             (__v4si) __B,
5153             (__v8si) __W,
5154             (__mmask8) __U);
5155}
5156
5157static __inline__ __m256i __DEFAULT_FN_ATTRS
5158_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
5159{
5160  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
5161             (__v4si) __B,
5162             (__v8si)
5163             _mm256_setzero_si256 (),
5164             (__mmask8) __U);
5165}
5166
5167#define _mm_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5168  (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
5169                                         (__v4si)(__m128i)(W), \
5170                                         (__mmask8)(U)); })
5171
5172#define _mm_maskz_slli_epi32(U, A, B) __extension__ ({ \
5173  (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \
5174                                         (__v4si)_mm_setzero_si128(), \
5175                                         (__mmask8)(U)); })
5176
5177#define _mm256_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5178  (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
5179                                         (__v8si)(__m256i)(W), \
5180                                         (__mmask8)(U)); })
5181
5182#define _mm256_maskz_slli_epi32(U, A, B) __extension__ ({ \
5183  (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \
5184                                         (__v8si)_mm256_setzero_si256(), \
5185                                         (__mmask8)(U)); })
5186
5187static __inline__ __m128i __DEFAULT_FN_ATTRS
5188_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5189        __m128i __B)
5190{
5191  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
5192             (__v2di) __B,
5193             (__v2di) __W,
5194             (__mmask8) __U);
5195}
5196
5197static __inline__ __m128i __DEFAULT_FN_ATTRS
5198_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5199{
5200  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
5201             (__v2di) __B,
5202             (__v2di)
5203             _mm_setzero_di (),
5204             (__mmask8) __U);
5205}
5206
5207static __inline__ __m256i __DEFAULT_FN_ATTRS
5208_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5209           __m128i __B)
5210{
5211  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
5212             (__v2di) __B,
5213             (__v4di) __W,
5214             (__mmask8) __U);
5215}
5216
5217static __inline__ __m256i __DEFAULT_FN_ATTRS
5218_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
5219{
5220  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
5221             (__v2di) __B,
5222             (__v4di)
5223             _mm256_setzero_si256 (),
5224             (__mmask8) __U);
5225}
5226
5227#define _mm_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5228  (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
5229                                         (__v2di)(__m128i)(W), \
5230                                         (__mmask8)(U)); })
5231
5232#define _mm_maskz_slli_epi64(U, A, B) __extension__ ({ \
5233  (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \
5234                                         (__v2di)_mm_setzero_di(), \
5235                                         (__mmask8)(U)); })
5236
5237#define _mm256_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5238  (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
5239                                         (__v4di)(__m256i)(W), \
5240                                         (__mmask8)(U)); })
5241
5242#define _mm256_maskz_slli_epi64(U, A, B) __extension__ ({ \
5243  (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \
5244                                         (__v4di)_mm256_setzero_si256(), \
5245                                         (__mmask8)(U)); })
5246
5247
5248static __inline__ __m128i __DEFAULT_FN_ATTRS
5249_mm_rorv_epi32 (__m128i __A, __m128i __B)
5250{
5251  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5252              (__v4si) __B,
5253              (__v4si)
5254              _mm_setzero_si128 (),
5255              (__mmask8) -1);
5256}
5257
5258static __inline__ __m128i __DEFAULT_FN_ATTRS
5259_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5260         __m128i __B)
5261{
5262  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5263              (__v4si) __B,
5264              (__v4si) __W,
5265              (__mmask8) __U);
5266}
5267
5268static __inline__ __m128i __DEFAULT_FN_ATTRS
5269_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5270{
5271  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5272              (__v4si) __B,
5273              (__v4si)
5274              _mm_setzero_si128 (),
5275              (__mmask8) __U);
5276}
5277
5278static __inline__ __m256i __DEFAULT_FN_ATTRS
5279_mm256_rorv_epi32 (__m256i __A, __m256i __B)
5280{
5281  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5282              (__v8si) __B,
5283              (__v8si)
5284              _mm256_setzero_si256 (),
5285              (__mmask8) -1);
5286}
5287
5288static __inline__ __m256i __DEFAULT_FN_ATTRS
5289_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5290      __m256i __B)
5291{
5292  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5293              (__v8si) __B,
5294              (__v8si) __W,
5295              (__mmask8) __U);
5296}
5297
5298static __inline__ __m256i __DEFAULT_FN_ATTRS
5299_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5300{
5301  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5302              (__v8si) __B,
5303              (__v8si)
5304              _mm256_setzero_si256 (),
5305              (__mmask8) __U);
5306}
5307
5308static __inline__ __m128i __DEFAULT_FN_ATTRS
5309_mm_rorv_epi64 (__m128i __A, __m128i __B)
5310{
5311  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5312              (__v2di) __B,
5313              (__v2di)
5314              _mm_setzero_di (),
5315              (__mmask8) -1);
5316}
5317
5318static __inline__ __m128i __DEFAULT_FN_ATTRS
5319_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5320         __m128i __B)
5321{
5322  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5323              (__v2di) __B,
5324              (__v2di) __W,
5325              (__mmask8) __U);
5326}
5327
5328static __inline__ __m128i __DEFAULT_FN_ATTRS
5329_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5330{
5331  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5332              (__v2di) __B,
5333              (__v2di)
5334              _mm_setzero_di (),
5335              (__mmask8) __U);
5336}
5337
5338static __inline__ __m256i __DEFAULT_FN_ATTRS
5339_mm256_rorv_epi64 (__m256i __A, __m256i __B)
5340{
5341  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5342              (__v4di) __B,
5343              (__v4di)
5344              _mm256_setzero_si256 (),
5345              (__mmask8) -1);
5346}
5347
5348static __inline__ __m256i __DEFAULT_FN_ATTRS
5349_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5350      __m256i __B)
5351{
5352  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5353              (__v4di) __B,
5354              (__v4di) __W,
5355              (__mmask8) __U);
5356}
5357
5358static __inline__ __m256i __DEFAULT_FN_ATTRS
5359_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5360{
5361  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5362              (__v4di) __B,
5363              (__v4di)
5364              _mm256_setzero_si256 (),
5365              (__mmask8) __U);
5366}
5367
5368static __inline__ __m128i __DEFAULT_FN_ATTRS
5369_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
5370         __m128i __Y)
5371{
5372  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
5373             (__v2di) __Y,
5374             (__v2di) __W,
5375             (__mmask8) __U);
5376}
5377
5378static __inline__ __m128i __DEFAULT_FN_ATTRS
5379_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
5380{
5381  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
5382             (__v2di) __Y,
5383             (__v2di)
5384             _mm_setzero_di (),
5385             (__mmask8) __U);
5386}
5387
5388static __inline__ __m256i __DEFAULT_FN_ATTRS
5389_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
5390      __m256i __Y)
5391{
5392  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
5393             (__v4di) __Y,
5394             (__v4di) __W,
5395             (__mmask8) __U);
5396}
5397
5398static __inline__ __m256i __DEFAULT_FN_ATTRS
5399_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5400{
5401  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
5402             (__v4di) __Y,
5403             (__v4di)
5404             _mm256_setzero_si256 (),
5405             (__mmask8) __U);
5406}
5407
5408static __inline__ __m128i __DEFAULT_FN_ATTRS
5409_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
5410         __m128i __Y)
5411{
5412  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
5413             (__v4si) __Y,
5414             (__v4si) __W,
5415             (__mmask8) __U);
5416}
5417
5418static __inline__ __m128i __DEFAULT_FN_ATTRS
5419_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
5420{
5421  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
5422             (__v4si) __Y,
5423             (__v4si)
5424             _mm_setzero_si128 (),
5425             (__mmask8) __U);
5426}
5427
5428static __inline__ __m256i __DEFAULT_FN_ATTRS
5429_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
5430      __m256i __Y)
5431{
5432  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
5433             (__v8si) __Y,
5434             (__v8si) __W,
5435             (__mmask8) __U);
5436}
5437
5438static __inline__ __m256i __DEFAULT_FN_ATTRS
5439_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
5440{
5441  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
5442             (__v8si) __Y,
5443             (__v8si)
5444             _mm256_setzero_si256 (),
5445             (__mmask8) __U);
5446}
5447
5448
5449
5450static __inline__ __m128i __DEFAULT_FN_ATTRS
5451_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
5452         __m128i __Y)
5453{
5454  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
5455             (__v2di) __Y,
5456             (__v2di) __W,
5457             (__mmask8) __U);
5458}
5459
5460static __inline__ __m128i __DEFAULT_FN_ATTRS
5461_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
5462{
5463  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
5464             (__v2di) __Y,
5465             (__v2di)
5466             _mm_setzero_di (),
5467             (__mmask8) __U);
5468}
5469
5470static __inline__ __m256i __DEFAULT_FN_ATTRS
5471_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
5472      __m256i __Y)
5473{
5474  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
5475             (__v4di) __Y,
5476             (__v4di) __W,
5477             (__mmask8) __U);
5478}
5479
5480static __inline__ __m256i __DEFAULT_FN_ATTRS
5481_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5482{
5483  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
5484             (__v4di) __Y,
5485             (__v4di)
5486             _mm256_setzero_si256 (),
5487             (__mmask8) __U);
5488}
5489
5490static __inline__ __m128i __DEFAULT_FN_ATTRS
5491_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
5492         __m128i __Y)
5493{
5494  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
5495             (__v4si) __Y,
5496             (__v4si) __W,
5497             (__mmask8) __U);
5498}
5499
5500static __inline__ __m128i __DEFAULT_FN_ATTRS
5501_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
5502{
5503  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
5504             (__v4si) __Y,
5505             (__v4si)
5506             _mm_setzero_si128 (),
5507             (__mmask8) __U);
5508}
5509
5510static __inline__ __m256i __DEFAULT_FN_ATTRS
5511_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
5512      __m256i __Y)
5513{
5514  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
5515             (__v8si) __Y,
5516             (__v8si) __W,
5517             (__mmask8) __U);
5518}
5519
5520static __inline__ __m256i __DEFAULT_FN_ATTRS
5521_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
5522{
5523  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
5524             (__v8si) __Y,
5525             (__v8si)
5526             _mm256_setzero_si256 (),
5527             (__mmask8) __U);
5528}
5529
5530
5531
5532static __inline__ __m128i __DEFAULT_FN_ATTRS
5533_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5534        __m128i __B)
5535{
5536  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
5537             (__v4si) __B,
5538             (__v4si) __W,
5539             (__mmask8) __U);
5540}
5541
5542static __inline__ __m128i __DEFAULT_FN_ATTRS
5543_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5544{
5545  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
5546             (__v4si) __B,
5547             (__v4si)
5548             _mm_setzero_si128 (),
5549             (__mmask8) __U);
5550}
5551
5552static __inline__ __m256i __DEFAULT_FN_ATTRS
5553_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5554           __m128i __B)
5555{
5556  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
5557             (__v4si) __B,
5558             (__v8si) __W,
5559             (__mmask8) __U);
5560}
5561
5562static __inline__ __m256i __DEFAULT_FN_ATTRS
5563_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
5564{
5565  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
5566             (__v4si) __B,
5567             (__v8si)
5568             _mm256_setzero_si256 (),
5569             (__mmask8) __U);
5570}
5571
5572#define _mm_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
5573  (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
5574                                         (__v4si)(__m128i)(W), \
5575                                         (__mmask8)(U)); })
5576
5577#define _mm_maskz_srli_epi32(U, A, imm) __extension__ ({ \
5578  (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \
5579                                         (__v4si)_mm_setzero_si128(), \
5580                                         (__mmask8)(U)); })
5581
5582#define _mm256_mask_srli_epi32(W, U, A, imm) __extension__ ({ \
5583  (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
5584                                         (__v8si)(__m256i)(W), \
5585                                         (__mmask8)(U)); })
5586
5587#define _mm256_maskz_srli_epi32(U, A, imm) __extension__ ({ \
5588  (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \
5589                                         (__v8si)_mm256_setzero_si256(), \
5590                                         (__mmask8)(U)); })
5591
5592static __inline__ __m128i __DEFAULT_FN_ATTRS
5593_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5594        __m128i __B)
5595{
5596  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
5597             (__v2di) __B,
5598             (__v2di) __W,
5599             (__mmask8) __U);
5600}
5601
5602static __inline__ __m128i __DEFAULT_FN_ATTRS
5603_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5604{
5605  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
5606             (__v2di) __B,
5607             (__v2di)
5608             _mm_setzero_di (),
5609             (__mmask8) __U);
5610}
5611
5612static __inline__ __m256i __DEFAULT_FN_ATTRS
5613_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5614           __m128i __B)
5615{
5616  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
5617             (__v2di) __B,
5618             (__v4di) __W,
5619             (__mmask8) __U);
5620}
5621
5622static __inline__ __m256i __DEFAULT_FN_ATTRS
5623_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
5624{
5625  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
5626             (__v2di) __B,
5627             (__v4di)
5628             _mm256_setzero_si256 (),
5629             (__mmask8) __U);
5630}
5631
5632#define _mm_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
5633  (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
5634                                         (__v2di)(__m128i)(W), \
5635                                         (__mmask8)(U)); })
5636
5637#define _mm_maskz_srli_epi64(U, A, imm) __extension__ ({ \
5638  (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
5639                                         (__v2di)_mm_setzero_si128(), \
5640                                         (__mmask8)(U)); })
5641
5642#define _mm256_mask_srli_epi64(W, U, A, imm) __extension__ ({ \
5643  (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
5644                                         (__v4di)(__m256i)(W), \
5645                                         (__mmask8)(U)); })
5646
5647#define _mm256_maskz_srli_epi64(U, A, imm) __extension__ ({ \
5648  (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
5649                                         (__v4di)_mm256_setzero_si256(), \
5650                                         (__mmask8)(U)); })
5651
5652static __inline__ __m128i __DEFAULT_FN_ATTRS
5653_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
5654         __m128i __Y)
5655{
5656  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
5657             (__v4si) __Y,
5658             (__v4si) __W,
5659             (__mmask8) __U);
5660}
5661
5662static __inline__ __m128i __DEFAULT_FN_ATTRS
5663_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
5664{
5665  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
5666             (__v4si) __Y,
5667             (__v4si)
5668             _mm_setzero_si128 (),
5669             (__mmask8) __U);
5670}
5671
5672static __inline__ __m256i __DEFAULT_FN_ATTRS
5673_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
5674      __m256i __Y)
5675{
5676  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
5677             (__v8si) __Y,
5678             (__v8si) __W,
5679             (__mmask8) __U);
5680}
5681
5682static __inline__ __m256i __DEFAULT_FN_ATTRS
5683_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
5684{
5685  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
5686             (__v8si) __Y,
5687             (__v8si)
5688             _mm256_setzero_si256 (),
5689             (__mmask8) __U);
5690}
5691
5692static __inline__ __m128i __DEFAULT_FN_ATTRS
5693_mm_srav_epi64 (__m128i __X, __m128i __Y)
5694{
5695  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5696              (__v2di) __Y,
5697              (__v2di)
5698              _mm_setzero_di (),
5699              (__mmask8) -1);
5700}
5701
5702static __inline__ __m128i __DEFAULT_FN_ATTRS
5703_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
5704         __m128i __Y)
5705{
5706  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5707              (__v2di) __Y,
5708              (__v2di) __W,
5709              (__mmask8) __U);
5710}
5711
5712static __inline__ __m128i __DEFAULT_FN_ATTRS
5713_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
5714{
5715  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5716              (__v2di) __Y,
5717              (__v2di)
5718              _mm_setzero_di (),
5719              (__mmask8) __U);
5720}
5721
5722static __inline__ __m256i __DEFAULT_FN_ATTRS
5723_mm256_srav_epi64 (__m256i __X, __m256i __Y)
5724{
5725  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5726              (__v4di) __Y,
5727              (__v4di)
5728              _mm256_setzero_si256 (),
5729              (__mmask8) -1);
5730}
5731
5732static __inline__ __m256i __DEFAULT_FN_ATTRS
5733_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
5734      __m256i __Y)
5735{
5736  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5737              (__v4di) __Y,
5738              (__v4di) __W,
5739              (__mmask8) __U);
5740}
5741
5742static __inline__ __m256i __DEFAULT_FN_ATTRS
5743_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5744{
5745  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5746              (__v4di) __Y,
5747              (__v4di)
5748              _mm256_setzero_si256 (),
5749              (__mmask8) __U);
5750}
5751
5752static __inline__ __m128i __DEFAULT_FN_ATTRS
5753_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5754{
5755  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5756                 (__v4si) __A,
5757                 (__v4si) __W);
5758}
5759
5760static __inline__ __m128i __DEFAULT_FN_ATTRS
5761_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5762{
5763  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5764                 (__v4si) __A,
5765                 (__v4si) _mm_setzero_si128 ());
5766}
5767
5768
5769static __inline__ __m256i __DEFAULT_FN_ATTRS
5770_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5771{
5772  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5773                 (__v8si) __A,
5774                 (__v8si) __W);
5775}
5776
5777static __inline__ __m256i __DEFAULT_FN_ATTRS
5778_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5779{
5780  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5781                 (__v8si) __A,
5782                 (__v8si) _mm256_setzero_si256 ());
5783}
5784
5785static __inline__ __m128i __DEFAULT_FN_ATTRS
5786_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5787{
5788  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5789              (__v4si) __W,
5790              (__mmask8)
5791              __U);
5792}
5793
5794static __inline__ __m128i __DEFAULT_FN_ATTRS
5795_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5796{
5797  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5798              (__v4si)
5799              _mm_setzero_si128 (),
5800              (__mmask8)
5801              __U);
5802}
5803
5804static __inline__ __m256i __DEFAULT_FN_ATTRS
5805_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5806{
5807  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5808              (__v8si) __W,
5809              (__mmask8)
5810              __U);
5811}
5812
5813static __inline__ __m256i __DEFAULT_FN_ATTRS
5814_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5815{
5816  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5817              (__v8si)
5818              _mm256_setzero_si256 (),
5819              (__mmask8)
5820              __U);
5821}
5822
5823static __inline__ void __DEFAULT_FN_ATTRS
5824_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5825{
5826  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5827          (__v4si) __A,
5828          (__mmask8) __U);
5829}
5830
5831static __inline__ void __DEFAULT_FN_ATTRS
5832_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5833{
5834  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5835          (__v8si) __A,
5836          (__mmask8) __U);
5837}
5838
5839static __inline__ __m128i __DEFAULT_FN_ATTRS
5840_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5841{
5842  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5843                 (__v2di) __A,
5844                 (__v2di) __W);
5845}
5846
5847static __inline__ __m128i __DEFAULT_FN_ATTRS
5848_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5849{
5850  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5851                 (__v2di) __A,
5852                 (__v2di) _mm_setzero_di ());
5853}
5854
5855static __inline__ __m256i __DEFAULT_FN_ATTRS
5856_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5857{
5858  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5859                 (__v4di) __A,
5860                 (__v4di) __W);
5861}
5862
5863static __inline__ __m256i __DEFAULT_FN_ATTRS
5864_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5865{
5866  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5867                 (__v4di) __A,
5868                 (__v4di) _mm256_setzero_si256 ());
5869}
5870
5871static __inline__ __m128i __DEFAULT_FN_ATTRS
5872_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5873{
5874  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5875              (__v2di) __W,
5876              (__mmask8)
5877              __U);
5878}
5879
5880static __inline__ __m128i __DEFAULT_FN_ATTRS
5881_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5882{
5883  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5884              (__v2di)
5885              _mm_setzero_di (),
5886              (__mmask8)
5887              __U);
5888}
5889
5890static __inline__ __m256i __DEFAULT_FN_ATTRS
5891_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5892{
5893  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5894              (__v4di) __W,
5895              (__mmask8)
5896              __U);
5897}
5898
5899static __inline__ __m256i __DEFAULT_FN_ATTRS
5900_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5901{
5902  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5903              (__v4di)
5904              _mm256_setzero_si256 (),
5905              (__mmask8)
5906              __U);
5907}
5908
5909static __inline__ void __DEFAULT_FN_ATTRS
5910_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5911{
5912  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5913          (__v2di) __A,
5914          (__mmask8) __U);
5915}
5916
5917static __inline__ void __DEFAULT_FN_ATTRS
5918_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5919{
5920  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5921          (__v4di) __A,
5922          (__mmask8) __U);
5923}
5924
5925static __inline__ __m128d __DEFAULT_FN_ATTRS
5926_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5927{
5928  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5929                                              (__v2df)_mm_movedup_pd(__A),
5930                                              (__v2df)__W);
5931}
5932
5933static __inline__ __m128d __DEFAULT_FN_ATTRS
5934_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5935{
5936  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5937                                              (__v2df)_mm_movedup_pd(__A),
5938                                              (__v2df)_mm_setzero_pd());
5939}
5940
5941static __inline__ __m256d __DEFAULT_FN_ATTRS
5942_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5943{
5944  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5945                                              (__v4df)_mm256_movedup_pd(__A),
5946                                              (__v4df)__W);
5947}
5948
5949static __inline__ __m256d __DEFAULT_FN_ATTRS
5950_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5951{
5952  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5953                                              (__v4df)_mm256_movedup_pd(__A),
5954                                              (__v4df)_mm256_setzero_pd());
5955}
5956
5957
5958#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
5959  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5960                                                  (__v4si)(__m128i)(O), \
5961                                                  (__mmask8)(M)); })
5962
5963#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
5964  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5965                                                  (__v4si)_mm_setzero_si128(), \
5966                                                  (__mmask8)(M)); })
5967
5968#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
5969  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5970                                                  (__v8si)(__m256i)(O), \
5971                                                  (__mmask8)(M)); })
5972
5973#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
5974  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5975                                                  (__v8si)_mm256_setzero_si256(), \
5976                                                  (__mmask8)(M)); })
5977
5978static __inline__ __m128i __DEFAULT_FN_ATTRS
5979_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5980{
5981  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
5982                 __M);
5983}
5984
5985static __inline__ __m128i __DEFAULT_FN_ATTRS
5986_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5987{
5988  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
5989                 (__v2di)
5990                 _mm_setzero_si128 (),
5991                 __M);
5992}
5993
5994static __inline__ __m256i __DEFAULT_FN_ATTRS
5995_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5996{
5997  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
5998                 __M);
5999}
6000
6001static __inline__ __m256i __DEFAULT_FN_ATTRS
6002_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
6003{
6004  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
6005                 (__v4di)
6006                 _mm256_setzero_si256 (),
6007                 __M);
6008}
6009
6010#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
6011  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
6012                                             (__v2df)(__m128d)(B), \
6013                                             (__v2di)(__m128i)(C), (int)(imm), \
6014                                             (__mmask8)-1); })
6015
6016#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
6017  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
6018                                             (__v2df)(__m128d)(B), \
6019                                             (__v2di)(__m128i)(C), (int)(imm), \
6020                                             (__mmask8)(U)); })
6021
6022#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
6023  (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
6024                                              (__v2df)(__m128d)(B), \
6025                                              (__v2di)(__m128i)(C), \
6026                                              (int)(imm), (__mmask8)(U)); })
6027
6028#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
6029  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
6030                                             (__v4df)(__m256d)(B), \
6031                                             (__v4di)(__m256i)(C), (int)(imm), \
6032                                             (__mmask8)-1); })
6033
6034#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
6035  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
6036                                             (__v4df)(__m256d)(B), \
6037                                             (__v4di)(__m256i)(C), (int)(imm), \
6038                                             (__mmask8)(U)); })
6039
6040#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
6041  (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
6042                                              (__v4df)(__m256d)(B), \
6043                                              (__v4di)(__m256i)(C), \
6044                                              (int)(imm), (__mmask8)(U)); })
6045
6046#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
6047  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
6048                                            (__v4sf)(__m128)(B), \
6049                                            (__v4si)(__m128i)(C), (int)(imm), \
6050                                            (__mmask8)-1); })
6051
6052#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
6053  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
6054                                            (__v4sf)(__m128)(B), \
6055                                            (__v4si)(__m128i)(C), (int)(imm), \
6056                                            (__mmask8)(U)); })
6057
6058#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
6059  (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
6060                                             (__v4sf)(__m128)(B), \
6061                                             (__v4si)(__m128i)(C), (int)(imm), \
6062                                             (__mmask8)(U)); })
6063
6064#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
6065  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
6066                                            (__v8sf)(__m256)(B), \
6067                                            (__v8si)(__m256i)(C), (int)(imm), \
6068                                            (__mmask8)-1); })
6069
6070#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
6071  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
6072                                            (__v8sf)(__m256)(B), \
6073                                            (__v8si)(__m256i)(C), (int)(imm), \
6074                                            (__mmask8)(U)); })
6075
6076#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
6077  (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
6078                                             (__v8sf)(__m256)(B), \
6079                                             (__v8si)(__m256i)(C), (int)(imm), \
6080                                             (__mmask8)(U)); })
6081
6082static __inline__ __m128d __DEFAULT_FN_ATTRS
6083_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
6084{
6085  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
6086               (__v2df) __W,
6087               (__mmask8) __U);
6088}
6089
6090static __inline__ __m128d __DEFAULT_FN_ATTRS
6091_mm_maskz_load_pd (__mmask8 __U, void const *__P)
6092{
6093  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
6094               (__v2df)
6095               _mm_setzero_pd (),
6096               (__mmask8) __U);
6097}
6098
6099static __inline__ __m256d __DEFAULT_FN_ATTRS
6100_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
6101{
6102  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
6103               (__v4df) __W,
6104               (__mmask8) __U);
6105}
6106
6107static __inline__ __m256d __DEFAULT_FN_ATTRS
6108_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
6109{
6110  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
6111               (__v4df)
6112               _mm256_setzero_pd (),
6113               (__mmask8) __U);
6114}
6115
6116static __inline__ __m128 __DEFAULT_FN_ATTRS
6117_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
6118{
6119  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
6120              (__v4sf) __W,
6121              (__mmask8) __U);
6122}
6123
6124static __inline__ __m128 __DEFAULT_FN_ATTRS
6125_mm_maskz_load_ps (__mmask8 __U, void const *__P)
6126{
6127  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
6128              (__v4sf)
6129              _mm_setzero_ps (),
6130              (__mmask8) __U);
6131}
6132
6133static __inline__ __m256 __DEFAULT_FN_ATTRS
6134_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
6135{
6136  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
6137              (__v8sf) __W,
6138              (__mmask8) __U);
6139}
6140
6141static __inline__ __m256 __DEFAULT_FN_ATTRS
6142_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
6143{
6144  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
6145              (__v8sf)
6146              _mm256_setzero_ps (),
6147              (__mmask8) __U);
6148}
6149
6150static __inline__ __m128i __DEFAULT_FN_ATTRS
6151_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6152{
6153  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
6154                 (__v2di) __W,
6155                 (__mmask8) __U);
6156}
6157
6158static __inline__ __m128i __DEFAULT_FN_ATTRS
6159_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6160{
6161  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
6162                 (__v2di)
6163                 _mm_setzero_si128 (),
6164                 (__mmask8) __U);
6165}
6166
6167static __inline__ __m256i __DEFAULT_FN_ATTRS
6168_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
6169{
6170  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
6171                 (__v4di) __W,
6172                 (__mmask8) __U);
6173}
6174
6175static __inline__ __m256i __DEFAULT_FN_ATTRS
6176_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
6177{
6178  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
6179                 (__v4di)
6180                 _mm256_setzero_si256 (),
6181                 (__mmask8) __U);
6182}
6183
6184static __inline__ __m128i __DEFAULT_FN_ATTRS
6185_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6186{
6187  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
6188                 (__v4si) __W,
6189                 (__mmask8) __U);
6190}
6191
6192static __inline__ __m128i __DEFAULT_FN_ATTRS
6193_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
6194{
6195  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
6196                 (__v4si)
6197                 _mm_setzero_si128 (),
6198                 (__mmask8) __U);
6199}
6200
6201static __inline__ __m256i __DEFAULT_FN_ATTRS
6202_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
6203{
6204  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
6205                 (__v8si) __W,
6206                 (__mmask8) __U);
6207}
6208
6209static __inline__ __m256i __DEFAULT_FN_ATTRS
6210_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
6211{
6212  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
6213                 (__v8si)
6214                 _mm256_setzero_si256 (),
6215                 (__mmask8) __U);
6216}
6217
6218static __inline__ __m128d __DEFAULT_FN_ATTRS
6219_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6220{
6221  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6222               (__v2df) __W,
6223               (__mmask8) __U);
6224}
6225
6226static __inline__ __m128d __DEFAULT_FN_ATTRS
6227_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
6228{
6229  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6230               (__v2df)
6231               _mm_setzero_pd (),
6232               (__mmask8) __U);
6233}
6234
6235static __inline__ __m256d __DEFAULT_FN_ATTRS
6236_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6237{
6238  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6239               (__v4df) __W,
6240               (__mmask8) __U);
6241}
6242
6243static __inline__ __m256d __DEFAULT_FN_ATTRS
6244_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
6245{
6246  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6247               (__v4df)
6248               _mm256_setzero_pd (),
6249               (__mmask8) __U);
6250}
6251
6252static __inline__ __m128 __DEFAULT_FN_ATTRS
6253_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6254{
6255  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6256              (__v4sf) __W,
6257              (__mmask8) __U);
6258}
6259
6260static __inline__ __m128 __DEFAULT_FN_ATTRS
6261_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
6262{
6263  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6264              (__v4sf)
6265              _mm_setzero_ps (),
6266              (__mmask8) __U);
6267}
6268
6269static __inline__ __m256 __DEFAULT_FN_ATTRS
6270_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6271{
6272  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6273              (__v8sf) __W,
6274              (__mmask8) __U);
6275}
6276
6277static __inline__ __m256 __DEFAULT_FN_ATTRS
6278_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
6279{
6280  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6281              (__v8sf)
6282              _mm256_setzero_ps (),
6283              (__mmask8) __U);
6284}
6285
6286static __inline__ void __DEFAULT_FN_ATTRS
6287_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
6288{
6289  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
6290           (__v2df) __A,
6291           (__mmask8) __U);
6292}
6293
6294static __inline__ void __DEFAULT_FN_ATTRS
6295_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
6296{
6297  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
6298           (__v4df) __A,
6299           (__mmask8) __U);
6300}
6301
6302static __inline__ void __DEFAULT_FN_ATTRS
6303_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
6304{
6305  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
6306           (__v4sf) __A,
6307           (__mmask8) __U);
6308}
6309
6310static __inline__ void __DEFAULT_FN_ATTRS
6311_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
6312{
6313  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
6314           (__v8sf) __A,
6315           (__mmask8) __U);
6316}
6317
6318static __inline__ void __DEFAULT_FN_ATTRS
6319_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
6320{
6321  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
6322             (__v2di) __A,
6323             (__mmask8) __U);
6324}
6325
6326static __inline__ void __DEFAULT_FN_ATTRS
6327_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
6328{
6329  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
6330             (__v4di) __A,
6331             (__mmask8) __U);
6332}
6333
6334static __inline__ void __DEFAULT_FN_ATTRS
6335_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
6336{
6337  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
6338             (__v4si) __A,
6339             (__mmask8) __U);
6340}
6341
6342static __inline__ void __DEFAULT_FN_ATTRS
6343_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
6344{
6345  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
6346             (__v8si) __A,
6347             (__mmask8) __U);
6348}
6349
6350static __inline__ void __DEFAULT_FN_ATTRS
6351_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
6352{
6353  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
6354           (__v2df) __A,
6355           (__mmask8) __U);
6356}
6357
6358static __inline__ void __DEFAULT_FN_ATTRS
6359_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
6360{
6361  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
6362           (__v4df) __A,
6363           (__mmask8) __U);
6364}
6365
6366static __inline__ void __DEFAULT_FN_ATTRS
6367_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
6368{
6369  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
6370           (__v4sf) __A,
6371           (__mmask8) __U);
6372}
6373
6374static __inline__ void __DEFAULT_FN_ATTRS
6375_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
6376{
6377  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
6378           (__v8sf) __A,
6379           (__mmask8) __U);
6380}
6381
6382
6383static __inline__ __m128d __DEFAULT_FN_ATTRS
6384_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6385{
6386  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6387                                              (__v2df)_mm_unpackhi_pd(__A, __B),
6388                                              (__v2df)__W);
6389}
6390
6391static __inline__ __m128d __DEFAULT_FN_ATTRS
6392_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
6393{
6394  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6395                                              (__v2df)_mm_unpackhi_pd(__A, __B),
6396                                              (__v2df)_mm_setzero_pd());
6397}
6398
6399static __inline__ __m256d __DEFAULT_FN_ATTRS
6400_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
6401{
6402  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6403                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
6404                                           (__v4df)__W);
6405}
6406
6407static __inline__ __m256d __DEFAULT_FN_ATTRS
6408_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
6409{
6410  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6411                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
6412                                           (__v4df)_mm256_setzero_pd());
6413}
6414
6415static __inline__ __m128 __DEFAULT_FN_ATTRS
6416_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6417{
6418  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6419                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
6420                                             (__v4sf)__W);
6421}
6422
6423static __inline__ __m128 __DEFAULT_FN_ATTRS
6424_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
6425{
6426  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6427                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
6428                                             (__v4sf)_mm_setzero_ps());
6429}
6430
6431static __inline__ __m256 __DEFAULT_FN_ATTRS
6432_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
6433{
6434  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6435                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
6436                                           (__v8sf)__W);
6437}
6438
6439static __inline__ __m256 __DEFAULT_FN_ATTRS
6440_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
6441{
6442  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6443                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
6444                                           (__v8sf)_mm256_setzero_ps());
6445}
6446
6447static __inline__ __m128d __DEFAULT_FN_ATTRS
6448_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6449{
6450  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6451                                              (__v2df)_mm_unpacklo_pd(__A, __B),
6452                                              (__v2df)__W);
6453}
6454
6455static __inline__ __m128d __DEFAULT_FN_ATTRS
6456_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
6457{
6458  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6459                                              (__v2df)_mm_unpacklo_pd(__A, __B),
6460                                              (__v2df)_mm_setzero_pd());
6461}
6462
6463static __inline__ __m256d __DEFAULT_FN_ATTRS
6464_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
6465{
6466  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6467                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
6468                                           (__v4df)__W);
6469}
6470
6471static __inline__ __m256d __DEFAULT_FN_ATTRS
6472_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
6473{
6474  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6475                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
6476                                           (__v4df)_mm256_setzero_pd());
6477}
6478
6479static __inline__ __m128 __DEFAULT_FN_ATTRS
6480_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6481{
6482  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6483                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
6484                                             (__v4sf)__W);
6485}
6486
6487static __inline__ __m128 __DEFAULT_FN_ATTRS
6488_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
6489{
6490  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6491                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
6492                                             (__v4sf)_mm_setzero_ps());
6493}
6494
6495static __inline__ __m256 __DEFAULT_FN_ATTRS
6496_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
6497{
6498  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6499                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
6500                                           (__v8sf)__W);
6501}
6502
6503static __inline__ __m256 __DEFAULT_FN_ATTRS
6504_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
6505{
6506  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6507                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
6508                                           (__v8sf)_mm256_setzero_ps());
6509}
6510
6511static __inline__ __m128d __DEFAULT_FN_ATTRS
6512_mm_rcp14_pd (__m128d __A)
6513{
6514  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6515                (__v2df)
6516                _mm_setzero_pd (),
6517                (__mmask8) -1);
6518}
6519
6520static __inline__ __m128d __DEFAULT_FN_ATTRS
6521_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6522{
6523  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6524                (__v2df) __W,
6525                (__mmask8) __U);
6526}
6527
6528static __inline__ __m128d __DEFAULT_FN_ATTRS
6529_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
6530{
6531  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6532                (__v2df)
6533                _mm_setzero_pd (),
6534                (__mmask8) __U);
6535}
6536
6537static __inline__ __m256d __DEFAULT_FN_ATTRS
6538_mm256_rcp14_pd (__m256d __A)
6539{
6540  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6541                (__v4df)
6542                _mm256_setzero_pd (),
6543                (__mmask8) -1);
6544}
6545
6546static __inline__ __m256d __DEFAULT_FN_ATTRS
6547_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6548{
6549  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6550                (__v4df) __W,
6551                (__mmask8) __U);
6552}
6553
6554static __inline__ __m256d __DEFAULT_FN_ATTRS
6555_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
6556{
6557  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6558                (__v4df)
6559                _mm256_setzero_pd (),
6560                (__mmask8) __U);
6561}
6562
6563static __inline__ __m128 __DEFAULT_FN_ATTRS
6564_mm_rcp14_ps (__m128 __A)
6565{
6566  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6567               (__v4sf)
6568               _mm_setzero_ps (),
6569               (__mmask8) -1);
6570}
6571
6572static __inline__ __m128 __DEFAULT_FN_ATTRS
6573_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6574{
6575  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6576               (__v4sf) __W,
6577               (__mmask8) __U);
6578}
6579
6580static __inline__ __m128 __DEFAULT_FN_ATTRS
6581_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6582{
6583  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6584               (__v4sf)
6585               _mm_setzero_ps (),
6586               (__mmask8) __U);
6587}
6588
6589static __inline__ __m256 __DEFAULT_FN_ATTRS
6590_mm256_rcp14_ps (__m256 __A)
6591{
6592  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6593               (__v8sf)
6594               _mm256_setzero_ps (),
6595               (__mmask8) -1);
6596}
6597
6598static __inline__ __m256 __DEFAULT_FN_ATTRS
6599_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6600{
6601  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6602               (__v8sf) __W,
6603               (__mmask8) __U);
6604}
6605
6606static __inline__ __m256 __DEFAULT_FN_ATTRS
6607_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6608{
6609  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6610               (__v8sf)
6611               _mm256_setzero_ps (),
6612               (__mmask8) __U);
6613}
6614
6615#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6616  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6617                                       (__v2df)_mm_permute_pd((X), (C)), \
6618                                       (__v2df)(__m128d)(W)); })
6619
6620#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6621  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6622                                       (__v2df)_mm_permute_pd((X), (C)), \
6623                                       (__v2df)_mm_setzero_pd()); })
6624
6625#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6626  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6627                                       (__v4df)_mm256_permute_pd((X), (C)), \
6628                                       (__v4df)(__m256d)(W)); })
6629
6630#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6631  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6632                                       (__v4df)_mm256_permute_pd((X), (C)), \
6633                                       (__v4df)_mm256_setzero_pd()); })
6634
6635#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6636  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6637                                      (__v4sf)_mm_permute_ps((X), (C)), \
6638                                      (__v4sf)(__m128)(W)); })
6639
6640#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6641  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6642                                      (__v4sf)_mm_permute_ps((X), (C)), \
6643                                      (__v4sf)_mm_setzero_ps()); })
6644
6645#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6646  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6647                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6648                                      (__v8sf)(__m256)(W)); })
6649
6650#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6651  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6652                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6653                                      (__v8sf)_mm256_setzero_ps()); })
6654
6655static __inline__ __m128d __DEFAULT_FN_ATTRS
6656_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
6657      __m128i __C)
6658{
6659  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
6660                 (__v2di) __C,
6661                 (__v2df) __W,
6662                 (__mmask8) __U);
6663}
6664
6665static __inline__ __m128d __DEFAULT_FN_ATTRS
6666_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
6667{
6668  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
6669                 (__v2di) __C,
6670                 (__v2df)
6671                 _mm_setzero_pd (),
6672                 (__mmask8) __U);
6673}
6674
6675static __inline__ __m256d __DEFAULT_FN_ATTRS
6676_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
6677         __m256i __C)
6678{
6679  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
6680              (__v4di) __C,
6681              (__v4df) __W,
6682              (__mmask8)
6683              __U);
6684}
6685
6686static __inline__ __m256d __DEFAULT_FN_ATTRS
6687_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
6688{
6689  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
6690              (__v4di) __C,
6691              (__v4df)
6692              _mm256_setzero_pd (),
6693              (__mmask8)
6694              __U);
6695}
6696
6697static __inline__ __m128 __DEFAULT_FN_ATTRS
6698_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
6699      __m128i __C)
6700{
6701  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
6702                (__v4si) __C,
6703                (__v4sf) __W,
6704                (__mmask8) __U);
6705}
6706
6707static __inline__ __m128 __DEFAULT_FN_ATTRS
6708_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
6709{
6710  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
6711                (__v4si) __C,
6712                (__v4sf)
6713                _mm_setzero_ps (),
6714                (__mmask8) __U);
6715}
6716
6717static __inline__ __m256 __DEFAULT_FN_ATTRS
6718_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
6719         __m256i __C)
6720{
6721  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
6722                   (__v8si) __C,
6723                   (__v8sf) __W,
6724                   (__mmask8) __U);
6725}
6726
6727static __inline__ __m256 __DEFAULT_FN_ATTRS
6728_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
6729{
6730  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
6731                   (__v8si) __C,
6732                   (__v8sf)
6733                   _mm256_setzero_ps (),
6734                   (__mmask8) __U);
6735}
6736
6737static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6738_mm_test_epi32_mask (__m128i __A, __m128i __B)
6739{
6740  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6741                 (__v4si) __B,
6742                 (__mmask8) -1);
6743}
6744
6745static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6746_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6747{
6748  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6749                 (__v4si) __B, __U);
6750}
6751
6752static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6753_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6754{
6755  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6756                 (__v8si) __B,
6757                 (__mmask8) -1);
6758}
6759
6760static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6761_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6762{
6763  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6764                 (__v8si) __B, __U);
6765}
6766
6767static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6768_mm_test_epi64_mask (__m128i __A, __m128i __B)
6769{
6770  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6771                 (__v2di) __B,
6772                 (__mmask8) -1);
6773}
6774
6775static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6776_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6777{
6778  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6779                 (__v2di) __B, __U);
6780}
6781
6782static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6783_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6784{
6785  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6786                 (__v4di) __B,
6787                 (__mmask8) -1);
6788}
6789
6790static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6791_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6792{
6793  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6794                 (__v4di) __B, __U);
6795}
6796
6797static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6798_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6799{
6800  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6801            (__v4si) __B,
6802            (__mmask8) -1);
6803}
6804
6805static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6806_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6807{
6808  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6809            (__v4si) __B, __U);
6810}
6811
6812static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6813_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6814{
6815  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6816            (__v8si) __B,
6817            (__mmask8) -1);
6818}
6819
6820static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6821_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6822{
6823  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6824            (__v8si) __B, __U);
6825}
6826
6827static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6828_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6829{
6830  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6831            (__v2di) __B,
6832            (__mmask8) -1);
6833}
6834
6835static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6836_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6837{
6838  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6839            (__v2di) __B, __U);
6840}
6841
6842static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6843_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6844{
6845  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6846            (__v4di) __B,
6847            (__mmask8) -1);
6848}
6849
6850static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6851_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6852{
6853  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6854            (__v4di) __B, __U);
6855}
6856
6857
6858
6859static __inline__ __m128i __DEFAULT_FN_ATTRS
6860_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6861{
6862  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6863                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6864                                           (__v4si)__W);
6865}
6866
6867static __inline__ __m128i __DEFAULT_FN_ATTRS
6868_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6869{
6870  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6871                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6872                                           (__v4si)_mm_setzero_si128());
6873}
6874
6875static __inline__ __m256i __DEFAULT_FN_ATTRS
6876_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6877{
6878  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6879                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6880                                        (__v8si)__W);
6881}
6882
6883static __inline__ __m256i __DEFAULT_FN_ATTRS
6884_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6885{
6886  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6887                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6888                                        (__v8si)_mm256_setzero_si256());
6889}
6890
6891static __inline__ __m128i __DEFAULT_FN_ATTRS
6892_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6893{
6894  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6895                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6896                                           (__v2di)__W);
6897}
6898
6899static __inline__ __m128i __DEFAULT_FN_ATTRS
6900_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6901{
6902  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6903                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6904                                           (__v2di)_mm_setzero_di());
6905}
6906
6907static __inline__ __m256i __DEFAULT_FN_ATTRS
6908_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6909{
6910  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6911                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6912                                        (__v4di)__W);
6913}
6914
6915static __inline__ __m256i __DEFAULT_FN_ATTRS
6916_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6917{
6918  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6919                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6920                                        (__v4di)_mm256_setzero_si256());
6921}
6922
6923static __inline__ __m128i __DEFAULT_FN_ATTRS
6924_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6925{
6926  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6927                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6928                                           (__v4si)__W);
6929}
6930
6931static __inline__ __m128i __DEFAULT_FN_ATTRS
6932_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6933{
6934  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6935                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6936                                           (__v4si)_mm_setzero_si128());
6937}
6938
6939static __inline__ __m256i __DEFAULT_FN_ATTRS
6940_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6941{
6942  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6943                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6944                                        (__v8si)__W);
6945}
6946
6947static __inline__ __m256i __DEFAULT_FN_ATTRS
6948_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6949{
6950  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6951                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6952                                        (__v8si)_mm256_setzero_si256());
6953}
6954
6955static __inline__ __m128i __DEFAULT_FN_ATTRS
6956_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6957{
6958  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6959                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6960                                           (__v2di)__W);
6961}
6962
6963static __inline__ __m128i __DEFAULT_FN_ATTRS
6964_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6965{
6966  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6967                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6968                                           (__v2di)_mm_setzero_di());
6969}
6970
6971static __inline__ __m256i __DEFAULT_FN_ATTRS
6972_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6973{
6974  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6975                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6976                                        (__v4di)__W);
6977}
6978
6979static __inline__ __m256i __DEFAULT_FN_ATTRS
6980_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6981{
6982  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6983                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6984                                        (__v4di)_mm256_setzero_si256());
6985}
6986
6987static __inline__ __m128i __DEFAULT_FN_ATTRS
6988_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
6989        __m128i __B)
6990{
6991  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
6992             (__v4si) __B,
6993             (__v4si) __W,
6994             (__mmask8) __U);
6995}
6996
6997static __inline__ __m128i __DEFAULT_FN_ATTRS
6998_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
6999{
7000  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
7001             (__v4si) __B,
7002             (__v4si)
7003             _mm_setzero_si128 (),
7004             (__mmask8) __U);
7005}
7006
7007static __inline__ __m256i __DEFAULT_FN_ATTRS
7008_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7009           __m128i __B)
7010{
7011  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
7012             (__v4si) __B,
7013             (__v8si) __W,
7014             (__mmask8) __U);
7015}
7016
7017static __inline__ __m256i __DEFAULT_FN_ATTRS
7018_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
7019{
7020  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
7021             (__v4si) __B,
7022             (__v8si)
7023             _mm256_setzero_si256 (),
7024             (__mmask8) __U);
7025}
7026
7027#define _mm_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
7028  (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
7029                                         (__v4si)(__m128i)(W), \
7030                                         (__mmask8)(U)); })
7031
7032#define _mm_maskz_srai_epi32(U, A, imm) __extension__ ({ \
7033  (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \
7034                                         (__v4si)_mm_setzero_si128(), \
7035                                         (__mmask8)(U)); })
7036
7037#define _mm256_mask_srai_epi32(W, U, A, imm) __extension__ ({ \
7038  (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
7039                                         (__v8si)(__m256i)(W), \
7040                                         (__mmask8)(U)); })
7041
7042#define _mm256_maskz_srai_epi32(U, A, imm) __extension__ ({ \
7043  (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \
7044                                         (__v8si)_mm256_setzero_si256(), \
7045                                         (__mmask8)(U)); })
7046
7047static __inline__ __m128i __DEFAULT_FN_ATTRS
7048_mm_sra_epi64 (__m128i __A, __m128i __B)
7049{
7050  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
7051             (__v2di) __B,
7052             (__v2di)
7053             _mm_setzero_di (),
7054             (__mmask8) -1);
7055}
7056
7057static __inline__ __m128i __DEFAULT_FN_ATTRS
7058_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7059        __m128i __B)
7060{
7061  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
7062             (__v2di) __B,
7063             (__v2di) __W,
7064             (__mmask8) __U);
7065}
7066
7067static __inline__ __m128i __DEFAULT_FN_ATTRS
7068_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7069{
7070  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
7071             (__v2di) __B,
7072             (__v2di)
7073             _mm_setzero_di (),
7074             (__mmask8) __U);
7075}
7076
7077static __inline__ __m256i __DEFAULT_FN_ATTRS
7078_mm256_sra_epi64 (__m256i __A, __m128i __B)
7079{
7080  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
7081             (__v2di) __B,
7082             (__v4di)
7083             _mm256_setzero_si256 (),
7084             (__mmask8) -1);
7085}
7086
7087static __inline__ __m256i __DEFAULT_FN_ATTRS
7088_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7089           __m128i __B)
7090{
7091  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
7092             (__v2di) __B,
7093             (__v4di) __W,
7094             (__mmask8) __U);
7095}
7096
7097static __inline__ __m256i __DEFAULT_FN_ATTRS
7098_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
7099{
7100  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
7101             (__v2di) __B,
7102             (__v4di)
7103             _mm256_setzero_si256 (),
7104             (__mmask8) __U);
7105}
7106
7107#define _mm_srai_epi64(A, imm) __extension__ ({ \
7108  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7109                                         (__v2di)_mm_setzero_di(), \
7110                                         (__mmask8)-1); })
7111
7112#define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
7113  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7114                                         (__v2di)(__m128i)(W), \
7115                                         (__mmask8)(U)); })
7116
7117#define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \
7118  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
7119                                         (__v2di)_mm_setzero_si128(), \
7120                                         (__mmask8)(U)); })
7121
7122#define _mm256_srai_epi64(A, imm) __extension__ ({ \
7123  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7124                                         (__v4di)_mm256_setzero_si256(), \
7125                                         (__mmask8)-1); })
7126
7127#define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
7128  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7129                                         (__v4di)(__m256i)(W), \
7130                                         (__mmask8)(U)); })
7131
7132#define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \
7133  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
7134                                         (__v4di)_mm256_setzero_si256(), \
7135                                         (__mmask8)(U)); })
7136
7137#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
7138  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
7139                                            (__v4si)(__m128i)(B), \
7140                                            (__v4si)(__m128i)(C), (int)(imm), \
7141                                            (__mmask8)-1); })
7142
7143#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
7144  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
7145                                            (__v4si)(__m128i)(B), \
7146                                            (__v4si)(__m128i)(C), (int)(imm), \
7147                                            (__mmask8)(U)); })
7148
7149#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
7150  (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
7151                                             (__v4si)(__m128i)(B), \
7152                                             (__v4si)(__m128i)(C), (int)(imm), \
7153                                             (__mmask8)(U)); })
7154
7155#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
7156  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
7157                                            (__v8si)(__m256i)(B), \
7158                                            (__v8si)(__m256i)(C), (int)(imm), \
7159                                            (__mmask8)-1); })
7160
7161#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
7162  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
7163                                            (__v8si)(__m256i)(B), \
7164                                            (__v8si)(__m256i)(C), (int)(imm), \
7165                                            (__mmask8)(U)); })
7166
7167#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
7168  (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
7169                                             (__v8si)(__m256i)(B), \
7170                                             (__v8si)(__m256i)(C), (int)(imm), \
7171                                             (__mmask8)(U)); })
7172
7173#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
7174  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
7175                                            (__v2di)(__m128i)(B), \
7176                                            (__v2di)(__m128i)(C), (int)(imm), \
7177                                            (__mmask8)-1); })
7178
7179#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
7180  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
7181                                            (__v2di)(__m128i)(B), \
7182                                            (__v2di)(__m128i)(C), (int)(imm), \
7183                                            (__mmask8)(U)); })
7184
7185#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7186  (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
7187                                             (__v2di)(__m128i)(B), \
7188                                             (__v2di)(__m128i)(C), (int)(imm), \
7189                                             (__mmask8)(U)); })
7190
7191#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
7192  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
7193                                            (__v4di)(__m256i)(B), \
7194                                            (__v4di)(__m256i)(C), (int)(imm), \
7195                                            (__mmask8)-1); })
7196
7197#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
7198  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
7199                                            (__v4di)(__m256i)(B), \
7200                                            (__v4di)(__m256i)(C), (int)(imm), \
7201                                            (__mmask8)(U)); })
7202
7203#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7204  (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
7205                                             (__v4di)(__m256i)(B), \
7206                                             (__v4di)(__m256i)(C), (int)(imm), \
7207                                             (__mmask8)(U)); })
7208
7209
7210
7211#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
7212  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7213                                             (__v8sf)(__m256)(B), (int)(imm), \
7214                                             (__v8sf)_mm256_setzero_ps(), \
7215                                             (__mmask8)-1); })
7216
7217#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7218  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7219                                             (__v8sf)(__m256)(B), (int)(imm), \
7220                                             (__v8sf)(__m256)(W), \
7221                                             (__mmask8)(U)); })
7222
7223#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7224  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7225                                             (__v8sf)(__m256)(B), (int)(imm), \
7226                                             (__v8sf)_mm256_setzero_ps(), \
7227                                             (__mmask8)(U)); })
7228
7229#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
7230  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7231                                              (__v4df)(__m256d)(B), \
7232                                              (int)(imm), \
7233                                              (__v4df)_mm256_setzero_pd(), \
7234                                              (__mmask8)-1); })
7235
7236#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7237  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7238                                              (__v4df)(__m256d)(B), \
7239                                              (int)(imm), \
7240                                              (__v4df)(__m256d)(W), \
7241                                              (__mmask8)(U)); })
7242
7243#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7244  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7245                                              (__v4df)(__m256d)(B), \
7246                                              (int)(imm), \
7247                                              (__v4df)_mm256_setzero_pd(), \
7248                                              (__mmask8)(U)); })
7249
7250#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
7251  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7252                                              (__v8si)(__m256i)(B), \
7253                                              (int)(imm), \
7254                                              (__v8si)_mm256_setzero_si256(), \
7255                                              (__mmask8)-1); })
7256
7257#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7258  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7259                                              (__v8si)(__m256i)(B), \
7260                                              (int)(imm), \
7261                                              (__v8si)(__m256i)(W), \
7262                                              (__mmask8)(U)); })
7263
7264#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7265  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7266                                              (__v8si)(__m256i)(B), \
7267                                              (int)(imm), \
7268                                              (__v8si)_mm256_setzero_si256(), \
7269                                              (__mmask8)(U)); })
7270
7271#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
7272  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7273                                              (__v4di)(__m256i)(B), \
7274                                              (int)(imm), \
7275                                              (__v4di)_mm256_setzero_si256(), \
7276                                              (__mmask8)-1); })
7277
7278#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7279  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7280                                              (__v4di)(__m256i)(B), \
7281                                              (int)(imm), \
7282                                              (__v4di)(__m256i)(W), \
7283                                              (__mmask8)(U)); })
7284
7285#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7286  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7287                                              (__v4di)(__m256i)(B), \
7288                                              (int)(imm), \
7289                                              (__v4di)_mm256_setzero_si256(), \
7290                                              (__mmask8)(U)); })
7291
7292#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7293  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7294                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7295                                       (__v2df)(__m128d)(W)); })
7296
7297#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7298  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7299                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7300                                       (__v2df)_mm_setzero_pd()); })
7301
7302#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7303  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7304                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7305                                       (__v4df)(__m256d)(W)); })
7306
7307#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7308  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7309                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7310                                       (__v4df)_mm256_setzero_pd()); })
7311
7312#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7313  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7314                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7315                                      (__v4sf)(__m128)(W)); })
7316
7317#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7318  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7319                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7320                                      (__v4sf)_mm_setzero_ps()); })
7321
7322#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7323  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7324                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7325                                      (__v8sf)(__m256)(W)); })
7326
7327#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7328  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7329                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7330                                      (__v8sf)_mm256_setzero_ps()); })
7331
7332static __inline__ __m128d __DEFAULT_FN_ATTRS
7333_mm_rsqrt14_pd (__m128d __A)
7334{
7335  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7336                 (__v2df)
7337                 _mm_setzero_pd (),
7338                 (__mmask8) -1);
7339}
7340
7341static __inline__ __m128d __DEFAULT_FN_ATTRS
7342_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
7343{
7344  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7345                 (__v2df) __W,
7346                 (__mmask8) __U);
7347}
7348
7349static __inline__ __m128d __DEFAULT_FN_ATTRS
7350_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
7351{
7352  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7353                 (__v2df)
7354                 _mm_setzero_pd (),
7355                 (__mmask8) __U);
7356}
7357
7358static __inline__ __m256d __DEFAULT_FN_ATTRS
7359_mm256_rsqrt14_pd (__m256d __A)
7360{
7361  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7362                 (__v4df)
7363                 _mm256_setzero_pd (),
7364                 (__mmask8) -1);
7365}
7366
7367static __inline__ __m256d __DEFAULT_FN_ATTRS
7368_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
7369{
7370  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7371                 (__v4df) __W,
7372                 (__mmask8) __U);
7373}
7374
7375static __inline__ __m256d __DEFAULT_FN_ATTRS
7376_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
7377{
7378  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7379                 (__v4df)
7380                 _mm256_setzero_pd (),
7381                 (__mmask8) __U);
7382}
7383
7384static __inline__ __m128 __DEFAULT_FN_ATTRS
7385_mm_rsqrt14_ps (__m128 __A)
7386{
7387  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7388                (__v4sf)
7389                _mm_setzero_ps (),
7390                (__mmask8) -1);
7391}
7392
7393static __inline__ __m128 __DEFAULT_FN_ATTRS
7394_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
7395{
7396  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7397                (__v4sf) __W,
7398                (__mmask8) __U);
7399}
7400
7401static __inline__ __m128 __DEFAULT_FN_ATTRS
7402_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
7403{
7404  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7405                (__v4sf)
7406                _mm_setzero_ps (),
7407                (__mmask8) __U);
7408}
7409
7410static __inline__ __m256 __DEFAULT_FN_ATTRS
7411_mm256_rsqrt14_ps (__m256 __A)
7412{
7413  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7414                (__v8sf)
7415                _mm256_setzero_ps (),
7416                (__mmask8) -1);
7417}
7418
7419static __inline__ __m256 __DEFAULT_FN_ATTRS
7420_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
7421{
7422  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7423                (__v8sf) __W,
7424                (__mmask8) __U);
7425}
7426
7427static __inline__ __m256 __DEFAULT_FN_ATTRS
7428_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
7429{
7430  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7431                (__v8sf)
7432                _mm256_setzero_ps (),
7433                (__mmask8) __U);
7434}
7435
7436static __inline__ __m256 __DEFAULT_FN_ATTRS
7437_mm256_broadcast_f32x4 (__m128 __A)
7438{
7439  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7440                (__v8sf)_mm256_undefined_pd (),
7441                (__mmask8) -1);
7442}
7443
7444static __inline__ __m256 __DEFAULT_FN_ATTRS
7445_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
7446{
7447  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7448                (__v8sf) __O,
7449                __M);
7450}
7451
7452static __inline__ __m256 __DEFAULT_FN_ATTRS
7453_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
7454{
7455  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7456                (__v8sf) _mm256_setzero_ps (),
7457                __M);
7458}
7459
7460static __inline__ __m256i __DEFAULT_FN_ATTRS
7461_mm256_broadcast_i32x4 (__m128i __A)
7462{
7463  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7464                 (__v8si)_mm256_undefined_si256 (),
7465                 (__mmask8) -1);
7466}
7467
7468static __inline__ __m256i __DEFAULT_FN_ATTRS
7469_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
7470{
7471  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7472                 (__v8si)
7473                 __O, __M);
7474}
7475
7476static __inline__ __m256i __DEFAULT_FN_ATTRS
7477_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
7478{
7479  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
7480                 __A,
7481                 (__v8si) _mm256_setzero_si256 (),
7482                 __M);
7483}
7484
7485static __inline__ __m256d __DEFAULT_FN_ATTRS
7486_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
7487{
7488  return (__m256d)__builtin_ia32_selectpd_256(__M,
7489                                              (__v4df) _mm256_broadcastsd_pd(__A),
7490                                              (__v4df) __O);
7491}
7492
7493static __inline__ __m256d __DEFAULT_FN_ATTRS
7494_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7495{
7496  return (__m256d)__builtin_ia32_selectpd_256(__M,
7497                                              (__v4df) _mm256_broadcastsd_pd(__A),
7498                                              (__v4df) _mm256_setzero_pd());
7499}
7500
7501static __inline__ __m128 __DEFAULT_FN_ATTRS
7502_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
7503{
7504  return (__m128)__builtin_ia32_selectps_128(__M,
7505                                             (__v4sf) _mm_broadcastss_ps(__A),
7506                                             (__v4sf) __O);
7507}
7508
7509static __inline__ __m128 __DEFAULT_FN_ATTRS
7510_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
7511{
7512  return (__m128)__builtin_ia32_selectps_128(__M,
7513                                             (__v4sf) _mm_broadcastss_ps(__A),
7514                                             (__v4sf) _mm_setzero_ps());
7515}
7516
7517static __inline__ __m256 __DEFAULT_FN_ATTRS
7518_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
7519{
7520  return (__m256)__builtin_ia32_selectps_256(__M,
7521                                             (__v8sf) _mm256_broadcastss_ps(__A),
7522                                             (__v8sf) __O);
7523}
7524
7525static __inline__ __m256 __DEFAULT_FN_ATTRS
7526_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
7527{
7528  return (__m256)__builtin_ia32_selectps_256(__M,
7529                                             (__v8sf) _mm256_broadcastss_ps(__A),
7530                                             (__v8sf) _mm256_setzero_ps());
7531}
7532
7533static __inline__ __m128i __DEFAULT_FN_ATTRS
7534_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7535{
7536  return (__m128i)__builtin_ia32_selectd_128(__M,
7537                                             (__v4si) _mm_broadcastd_epi32(__A),
7538                                             (__v4si) __O);
7539}
7540
7541static __inline__ __m128i __DEFAULT_FN_ATTRS
7542_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
7543{
7544  return (__m128i)__builtin_ia32_selectd_128(__M,
7545                                             (__v4si) _mm_broadcastd_epi32(__A),
7546                                             (__v4si) _mm_setzero_si128());
7547}
7548
7549static __inline__ __m256i __DEFAULT_FN_ATTRS
7550_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
7551{
7552  return (__m256i)__builtin_ia32_selectd_256(__M,
7553                                             (__v8si) _mm256_broadcastd_epi32(__A),
7554                                             (__v8si) __O);
7555}
7556
7557static __inline__ __m256i __DEFAULT_FN_ATTRS
7558_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
7559{
7560  return (__m256i)__builtin_ia32_selectd_256(__M,
7561                                             (__v8si) _mm256_broadcastd_epi32(__A),
7562                                             (__v8si) _mm256_setzero_si256());
7563}
7564
7565static __inline__ __m128i __DEFAULT_FN_ATTRS
7566_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
7567{
7568  return (__m128i)__builtin_ia32_selectq_128(__M,
7569                                             (__v2di) _mm_broadcastq_epi64(__A),
7570                                             (__v2di) __O);
7571}
7572
7573static __inline__ __m128i __DEFAULT_FN_ATTRS
7574_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7575{
7576  return (__m128i)__builtin_ia32_selectq_128(__M,
7577                                             (__v2di) _mm_broadcastq_epi64(__A),
7578                                             (__v2di) _mm_setzero_si128());
7579}
7580
7581static __inline__ __m256i __DEFAULT_FN_ATTRS
7582_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
7583{
7584  return (__m256i)__builtin_ia32_selectq_256(__M,
7585                                             (__v4di) _mm256_broadcastq_epi64(__A),
7586                                             (__v4di) __O);
7587}
7588
7589static __inline__ __m256i __DEFAULT_FN_ATTRS
7590_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7591{
7592  return (__m256i)__builtin_ia32_selectq_256(__M,
7593                                             (__v4di) _mm256_broadcastq_epi64(__A),
7594                                             (__v4di) _mm256_setzero_si256());
7595}
7596
7597static __inline__ __m128i __DEFAULT_FN_ATTRS
7598_mm_cvtsepi32_epi8 (__m128i __A)
7599{
7600  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7601               (__v16qi)_mm_undefined_si128(),
7602               (__mmask8) -1);
7603}
7604
7605static __inline__ __m128i __DEFAULT_FN_ATTRS
7606_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7607{
7608  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7609               (__v16qi) __O, __M);
7610}
7611
7612static __inline__ __m128i __DEFAULT_FN_ATTRS
7613_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
7614{
7615  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7616               (__v16qi) _mm_setzero_si128 (),
7617               __M);
7618}
7619
7620static __inline__ void __DEFAULT_FN_ATTRS
7621_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7622{
7623  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7624}
7625
7626static __inline__ __m128i __DEFAULT_FN_ATTRS
7627_mm256_cvtsepi32_epi8 (__m256i __A)
7628{
7629  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7630               (__v16qi)_mm_undefined_si128(),
7631               (__mmask8) -1);
7632}
7633
7634static __inline__ __m128i __DEFAULT_FN_ATTRS
7635_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7636{
7637  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7638               (__v16qi) __O, __M);
7639}
7640
7641static __inline__ __m128i __DEFAULT_FN_ATTRS
7642_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7643{
7644  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7645               (__v16qi) _mm_setzero_si128 (),
7646               __M);
7647}
7648
7649static __inline__ void __DEFAULT_FN_ATTRS
7650_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7651{
7652  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7653}
7654
7655static __inline__ __m128i __DEFAULT_FN_ATTRS
7656_mm_cvtsepi32_epi16 (__m128i __A)
7657{
7658  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7659               (__v8hi)_mm_setzero_si128 (),
7660               (__mmask8) -1);
7661}
7662
7663static __inline__ __m128i __DEFAULT_FN_ATTRS
7664_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7665{
7666  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7667               (__v8hi)__O,
7668               __M);
7669}
7670
7671static __inline__ __m128i __DEFAULT_FN_ATTRS
7672_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7673{
7674  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7675               (__v8hi) _mm_setzero_si128 (),
7676               __M);
7677}
7678
7679static __inline__ void __DEFAULT_FN_ATTRS
7680_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7681{
7682  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7683}
7684
7685static __inline__ __m128i __DEFAULT_FN_ATTRS
7686_mm256_cvtsepi32_epi16 (__m256i __A)
7687{
7688  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7689               (__v8hi)_mm_undefined_si128(),
7690               (__mmask8) -1);
7691}
7692
7693static __inline__ __m128i __DEFAULT_FN_ATTRS
7694_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7695{
7696  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7697               (__v8hi) __O, __M);
7698}
7699
7700static __inline__ __m128i __DEFAULT_FN_ATTRS
7701_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7702{
7703  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7704               (__v8hi) _mm_setzero_si128 (),
7705               __M);
7706}
7707
7708static __inline__ void __DEFAULT_FN_ATTRS
7709_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7710{
7711  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7712}
7713
7714static __inline__ __m128i __DEFAULT_FN_ATTRS
7715_mm_cvtsepi64_epi8 (__m128i __A)
7716{
7717  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7718               (__v16qi)_mm_undefined_si128(),
7719               (__mmask8) -1);
7720}
7721
7722static __inline__ __m128i __DEFAULT_FN_ATTRS
7723_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7724{
7725  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7726               (__v16qi) __O, __M);
7727}
7728
7729static __inline__ __m128i __DEFAULT_FN_ATTRS
7730_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7731{
7732  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7733               (__v16qi) _mm_setzero_si128 (),
7734               __M);
7735}
7736
7737static __inline__ void __DEFAULT_FN_ATTRS
7738_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7739{
7740  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7741}
7742
7743static __inline__ __m128i __DEFAULT_FN_ATTRS
7744_mm256_cvtsepi64_epi8 (__m256i __A)
7745{
7746  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7747               (__v16qi)_mm_undefined_si128(),
7748               (__mmask8) -1);
7749}
7750
7751static __inline__ __m128i __DEFAULT_FN_ATTRS
7752_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7753{
7754  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7755               (__v16qi) __O, __M);
7756}
7757
7758static __inline__ __m128i __DEFAULT_FN_ATTRS
7759_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7760{
7761  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7762               (__v16qi) _mm_setzero_si128 (),
7763               __M);
7764}
7765
7766static __inline__ void __DEFAULT_FN_ATTRS
7767_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7768{
7769  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7770}
7771
7772static __inline__ __m128i __DEFAULT_FN_ATTRS
7773_mm_cvtsepi64_epi32 (__m128i __A)
7774{
7775  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7776               (__v4si)_mm_undefined_si128(),
7777               (__mmask8) -1);
7778}
7779
7780static __inline__ __m128i __DEFAULT_FN_ATTRS
7781_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7782{
7783  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7784               (__v4si) __O, __M);
7785}
7786
7787static __inline__ __m128i __DEFAULT_FN_ATTRS
7788_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7789{
7790  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7791               (__v4si) _mm_setzero_si128 (),
7792               __M);
7793}
7794
7795static __inline__ void __DEFAULT_FN_ATTRS
7796_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7797{
7798  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7799}
7800
7801static __inline__ __m128i __DEFAULT_FN_ATTRS
7802_mm256_cvtsepi64_epi32 (__m256i __A)
7803{
7804  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7805               (__v4si)_mm_undefined_si128(),
7806               (__mmask8) -1);
7807}
7808
7809static __inline__ __m128i __DEFAULT_FN_ATTRS
7810_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7811{
7812  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7813               (__v4si)__O,
7814               __M);
7815}
7816
7817static __inline__ __m128i __DEFAULT_FN_ATTRS
7818_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7819{
7820  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7821               (__v4si) _mm_setzero_si128 (),
7822               __M);
7823}
7824
7825static __inline__ void __DEFAULT_FN_ATTRS
7826_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7827{
7828  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7829}
7830
7831static __inline__ __m128i __DEFAULT_FN_ATTRS
7832_mm_cvtsepi64_epi16 (__m128i __A)
7833{
7834  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7835               (__v8hi)_mm_undefined_si128(),
7836               (__mmask8) -1);
7837}
7838
7839static __inline__ __m128i __DEFAULT_FN_ATTRS
7840_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7841{
7842  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7843               (__v8hi) __O, __M);
7844}
7845
7846static __inline__ __m128i __DEFAULT_FN_ATTRS
7847_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7848{
7849  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7850               (__v8hi) _mm_setzero_si128 (),
7851               __M);
7852}
7853
7854static __inline__ void __DEFAULT_FN_ATTRS
7855_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7856{
7857  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7858}
7859
7860static __inline__ __m128i __DEFAULT_FN_ATTRS
7861_mm256_cvtsepi64_epi16 (__m256i __A)
7862{
7863  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7864               (__v8hi)_mm_undefined_si128(),
7865               (__mmask8) -1);
7866}
7867
7868static __inline__ __m128i __DEFAULT_FN_ATTRS
7869_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7870{
7871  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7872               (__v8hi) __O, __M);
7873}
7874
7875static __inline__ __m128i __DEFAULT_FN_ATTRS
7876_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7877{
7878  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7879               (__v8hi) _mm_setzero_si128 (),
7880               __M);
7881}
7882
7883static __inline__ void __DEFAULT_FN_ATTRS
7884_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7885{
7886  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7887}
7888
7889static __inline__ __m128i __DEFAULT_FN_ATTRS
7890_mm_cvtusepi32_epi8 (__m128i __A)
7891{
7892  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7893                (__v16qi)_mm_undefined_si128(),
7894                (__mmask8) -1);
7895}
7896
7897static __inline__ __m128i __DEFAULT_FN_ATTRS
7898_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7899{
7900  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7901                (__v16qi) __O,
7902                __M);
7903}
7904
7905static __inline__ __m128i __DEFAULT_FN_ATTRS
7906_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7907{
7908  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7909                (__v16qi) _mm_setzero_si128 (),
7910                __M);
7911}
7912
7913static __inline__ void __DEFAULT_FN_ATTRS
7914_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7915{
7916  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7917}
7918
7919static __inline__ __m128i __DEFAULT_FN_ATTRS
7920_mm256_cvtusepi32_epi8 (__m256i __A)
7921{
7922  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7923                (__v16qi)_mm_undefined_si128(),
7924                (__mmask8) -1);
7925}
7926
7927static __inline__ __m128i __DEFAULT_FN_ATTRS
7928_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7929{
7930  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7931                (__v16qi) __O,
7932                __M);
7933}
7934
7935static __inline__ __m128i __DEFAULT_FN_ATTRS
7936_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7937{
7938  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7939                (__v16qi) _mm_setzero_si128 (),
7940                __M);
7941}
7942
7943static __inline__ void __DEFAULT_FN_ATTRS
7944_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7945{
7946  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7947}
7948
7949static __inline__ __m128i __DEFAULT_FN_ATTRS
7950_mm_cvtusepi32_epi16 (__m128i __A)
7951{
7952  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7953                (__v8hi)_mm_undefined_si128(),
7954                (__mmask8) -1);
7955}
7956
7957static __inline__ __m128i __DEFAULT_FN_ATTRS
7958_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7959{
7960  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7961                (__v8hi) __O, __M);
7962}
7963
7964static __inline__ __m128i __DEFAULT_FN_ATTRS
7965_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7966{
7967  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7968                (__v8hi) _mm_setzero_si128 (),
7969                __M);
7970}
7971
7972static __inline__ void __DEFAULT_FN_ATTRS
7973_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7974{
7975  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7976}
7977
7978static __inline__ __m128i __DEFAULT_FN_ATTRS
7979_mm256_cvtusepi32_epi16 (__m256i __A)
7980{
7981  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7982                (__v8hi) _mm_undefined_si128(),
7983                (__mmask8) -1);
7984}
7985
7986static __inline__ __m128i __DEFAULT_FN_ATTRS
7987_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7988{
7989  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7990                (__v8hi) __O, __M);
7991}
7992
7993static __inline__ __m128i __DEFAULT_FN_ATTRS
7994_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7995{
7996  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7997                (__v8hi) _mm_setzero_si128 (),
7998                __M);
7999}
8000
8001static __inline__ void __DEFAULT_FN_ATTRS
8002_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
8003{
8004  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8005}
8006
8007static __inline__ __m128i __DEFAULT_FN_ATTRS
8008_mm_cvtusepi64_epi8 (__m128i __A)
8009{
8010  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
8011                (__v16qi)_mm_undefined_si128(),
8012                (__mmask8) -1);
8013}
8014
8015static __inline__ __m128i __DEFAULT_FN_ATTRS
8016_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
8017{
8018  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
8019                (__v16qi) __O,
8020                __M);
8021}
8022
8023static __inline__ __m128i __DEFAULT_FN_ATTRS
8024_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
8025{
8026  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
8027                (__v16qi) _mm_setzero_si128 (),
8028                __M);
8029}
8030
8031static __inline__ void __DEFAULT_FN_ATTRS
8032_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
8033{
8034  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8035}
8036
8037static __inline__ __m128i __DEFAULT_FN_ATTRS
8038_mm256_cvtusepi64_epi8 (__m256i __A)
8039{
8040  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
8041                (__v16qi)_mm_undefined_si128(),
8042                (__mmask8) -1);
8043}
8044
8045static __inline__ __m128i __DEFAULT_FN_ATTRS
8046_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
8047{
8048  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
8049                (__v16qi) __O,
8050                __M);
8051}
8052
8053static __inline__ __m128i __DEFAULT_FN_ATTRS
8054_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
8055{
8056  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
8057                (__v16qi) _mm_setzero_si128 (),
8058                __M);
8059}
8060
8061static __inline__ void __DEFAULT_FN_ATTRS
8062_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
8063{
8064  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8065}
8066
8067static __inline__ __m128i __DEFAULT_FN_ATTRS
8068_mm_cvtusepi64_epi32 (__m128i __A)
8069{
8070  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
8071                (__v4si)_mm_undefined_si128(),
8072                (__mmask8) -1);
8073}
8074
8075static __inline__ __m128i __DEFAULT_FN_ATTRS
8076_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
8077{
8078  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
8079                (__v4si) __O, __M);
8080}
8081
8082static __inline__ __m128i __DEFAULT_FN_ATTRS
8083_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
8084{
8085  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
8086                (__v4si) _mm_setzero_si128 (),
8087                __M);
8088}
8089
8090static __inline__ void __DEFAULT_FN_ATTRS
8091_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
8092{
8093  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8094}
8095
8096static __inline__ __m128i __DEFAULT_FN_ATTRS
8097_mm256_cvtusepi64_epi32 (__m256i __A)
8098{
8099  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
8100                (__v4si)_mm_undefined_si128(),
8101                (__mmask8) -1);
8102}
8103
8104static __inline__ __m128i __DEFAULT_FN_ATTRS
8105_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
8106{
8107  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
8108                (__v4si) __O, __M);
8109}
8110
8111static __inline__ __m128i __DEFAULT_FN_ATTRS
8112_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
8113{
8114  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
8115                (__v4si) _mm_setzero_si128 (),
8116                __M);
8117}
8118
8119static __inline__ void __DEFAULT_FN_ATTRS
8120_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
8121{
8122  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8123}
8124
8125static __inline__ __m128i __DEFAULT_FN_ATTRS
8126_mm_cvtusepi64_epi16 (__m128i __A)
8127{
8128  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
8129                (__v8hi)_mm_undefined_si128(),
8130                (__mmask8) -1);
8131}
8132
8133static __inline__ __m128i __DEFAULT_FN_ATTRS
8134_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8135{
8136  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
8137                (__v8hi) __O, __M);
8138}
8139
8140static __inline__ __m128i __DEFAULT_FN_ATTRS
8141_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
8142{
8143  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
8144                (__v8hi) _mm_setzero_si128 (),
8145                __M);
8146}
8147
8148static __inline__ void __DEFAULT_FN_ATTRS
8149_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8150{
8151  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8152}
8153
8154static __inline__ __m128i __DEFAULT_FN_ATTRS
8155_mm256_cvtusepi64_epi16 (__m256i __A)
8156{
8157  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
8158                (__v8hi)_mm_undefined_si128(),
8159                (__mmask8) -1);
8160}
8161
8162static __inline__ __m128i __DEFAULT_FN_ATTRS
8163_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8164{
8165  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
8166                (__v8hi) __O, __M);
8167}
8168
8169static __inline__ __m128i __DEFAULT_FN_ATTRS
8170_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
8171{
8172  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
8173                (__v8hi) _mm_setzero_si128 (),
8174                __M);
8175}
8176
8177static __inline__ void __DEFAULT_FN_ATTRS
8178_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
8179{
8180  return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8181}
8182
8183static __inline__ __m128i __DEFAULT_FN_ATTRS
8184_mm_cvtepi32_epi8 (__m128i __A)
8185{
8186  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8187              (__v16qi)_mm_undefined_si128(),
8188              (__mmask8) -1);
8189}
8190
8191static __inline__ __m128i __DEFAULT_FN_ATTRS
8192_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
8193{
8194  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8195              (__v16qi) __O, __M);
8196}
8197
8198static __inline__ __m128i __DEFAULT_FN_ATTRS
8199_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
8200{
8201  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8202              (__v16qi)
8203              _mm_setzero_si128 (),
8204              __M);
8205}
8206
8207static __inline__ void __DEFAULT_FN_ATTRS
8208_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
8209{
8210  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
8211}
8212
8213static __inline__ __m128i __DEFAULT_FN_ATTRS
8214_mm256_cvtepi32_epi8 (__m256i __A)
8215{
8216  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8217              (__v16qi)_mm_undefined_si128(),
8218              (__mmask8) -1);
8219}
8220
8221static __inline__ __m128i __DEFAULT_FN_ATTRS
8222_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
8223{
8224  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8225              (__v16qi) __O, __M);
8226}
8227
8228static __inline__ __m128i __DEFAULT_FN_ATTRS
8229_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
8230{
8231  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8232              (__v16qi) _mm_setzero_si128 (),
8233              __M);
8234}
8235
8236static __inline__ void __DEFAULT_FN_ATTRS
8237_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
8238{
8239  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
8240}
8241
8242static __inline__ __m128i __DEFAULT_FN_ATTRS
8243_mm_cvtepi32_epi16 (__m128i __A)
8244{
8245  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8246              (__v8hi) _mm_setzero_si128 (),
8247              (__mmask8) -1);
8248}
8249
8250static __inline__ __m128i __DEFAULT_FN_ATTRS
8251_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8252{
8253  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8254              (__v8hi) __O, __M);
8255}
8256
8257static __inline__ __m128i __DEFAULT_FN_ATTRS
8258_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
8259{
8260  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8261              (__v8hi) _mm_setzero_si128 (),
8262              __M);
8263}
8264
8265static __inline__ void __DEFAULT_FN_ATTRS
8266_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8267{
8268  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
8269}
8270
8271static __inline__ __m128i __DEFAULT_FN_ATTRS
8272_mm256_cvtepi32_epi16 (__m256i __A)
8273{
8274  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8275              (__v8hi)_mm_setzero_si128 (),
8276              (__mmask8) -1);
8277}
8278
8279static __inline__ __m128i __DEFAULT_FN_ATTRS
8280_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8281{
8282  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8283              (__v8hi) __O, __M);
8284}
8285
8286static __inline__ __m128i __DEFAULT_FN_ATTRS
8287_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
8288{
8289  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8290              (__v8hi) _mm_setzero_si128 (),
8291              __M);
8292}
8293
8294static __inline__ void __DEFAULT_FN_ATTRS
8295_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
8296{
8297  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8298}
8299
8300static __inline__ __m128i __DEFAULT_FN_ATTRS
8301_mm_cvtepi64_epi8 (__m128i __A)
8302{
8303  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8304              (__v16qi) _mm_undefined_si128(),
8305              (__mmask8) -1);
8306}
8307
8308static __inline__ __m128i __DEFAULT_FN_ATTRS
8309_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
8310{
8311  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8312              (__v16qi) __O, __M);
8313}
8314
8315static __inline__ __m128i __DEFAULT_FN_ATTRS
8316_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
8317{
8318  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8319              (__v16qi) _mm_setzero_si128 (),
8320              __M);
8321}
8322
8323static __inline__ void __DEFAULT_FN_ATTRS
8324_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
8325{
8326  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8327}
8328
8329static __inline__ __m128i __DEFAULT_FN_ATTRS
8330_mm256_cvtepi64_epi8 (__m256i __A)
8331{
8332  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8333              (__v16qi) _mm_undefined_si128(),
8334              (__mmask8) -1);
8335}
8336
8337static __inline__ __m128i __DEFAULT_FN_ATTRS
8338_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
8339{
8340  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8341              (__v16qi) __O, __M);
8342}
8343
8344static __inline__ __m128i __DEFAULT_FN_ATTRS
8345_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
8346{
8347  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8348              (__v16qi) _mm_setzero_si128 (),
8349              __M);
8350}
8351
8352static __inline__ void __DEFAULT_FN_ATTRS
8353_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
8354{
8355  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8356}
8357
8358static __inline__ __m128i __DEFAULT_FN_ATTRS
8359_mm_cvtepi64_epi32 (__m128i __A)
8360{
8361  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8362              (__v4si)_mm_undefined_si128(),
8363              (__mmask8) -1);
8364}
8365
8366static __inline__ __m128i __DEFAULT_FN_ATTRS
8367_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
8368{
8369  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8370              (__v4si) __O, __M);
8371}
8372
8373static __inline__ __m128i __DEFAULT_FN_ATTRS
8374_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
8375{
8376  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8377              (__v4si) _mm_setzero_si128 (),
8378              __M);
8379}
8380
8381static __inline__ void __DEFAULT_FN_ATTRS
8382_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
8383{
8384  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8385}
8386
8387static __inline__ __m128i __DEFAULT_FN_ATTRS
8388_mm256_cvtepi64_epi32 (__m256i __A)
8389{
8390  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8391              (__v4si) _mm_undefined_si128(),
8392              (__mmask8) -1);
8393}
8394
8395static __inline__ __m128i __DEFAULT_FN_ATTRS
8396_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
8397{
8398  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8399              (__v4si) __O, __M);
8400}
8401
8402static __inline__ __m128i __DEFAULT_FN_ATTRS
8403_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
8404{
8405  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8406              (__v4si) _mm_setzero_si128 (),
8407              __M);
8408}
8409
8410static __inline__ void __DEFAULT_FN_ATTRS
8411_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
8412{
8413  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8414}
8415
8416static __inline__ __m128i __DEFAULT_FN_ATTRS
8417_mm_cvtepi64_epi16 (__m128i __A)
8418{
8419  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8420              (__v8hi) _mm_undefined_si128(),
8421              (__mmask8) -1);
8422}
8423
8424static __inline__ __m128i __DEFAULT_FN_ATTRS
8425_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8426{
8427  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8428              (__v8hi)__O,
8429              __M);
8430}
8431
8432static __inline__ __m128i __DEFAULT_FN_ATTRS
8433_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
8434{
8435  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8436              (__v8hi) _mm_setzero_si128 (),
8437              __M);
8438}
8439
8440static __inline__ void __DEFAULT_FN_ATTRS
8441_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8442{
8443  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8444}
8445
8446static __inline__ __m128i __DEFAULT_FN_ATTRS
8447_mm256_cvtepi64_epi16 (__m256i __A)
8448{
8449  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8450              (__v8hi)_mm_undefined_si128(),
8451              (__mmask8) -1);
8452}
8453
8454static __inline__ __m128i __DEFAULT_FN_ATTRS
8455_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8456{
8457  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8458              (__v8hi) __O, __M);
8459}
8460
8461static __inline__ __m128i __DEFAULT_FN_ATTRS
8462_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
8463{
8464  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8465              (__v8hi) _mm_setzero_si128 (),
8466              __M);
8467}
8468
8469static __inline__ void __DEFAULT_FN_ATTRS
8470_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
8471{
8472  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8473}
8474
8475#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
8476  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8477                                               (int)(imm), \
8478                                               (__v4sf)_mm_setzero_ps(), \
8479                                               (__mmask8)-1); })
8480
8481#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
8482  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8483                                               (int)(imm), \
8484                                               (__v4sf)(__m128)(W), \
8485                                               (__mmask8)(U)); })
8486
8487#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
8488  (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
8489                                               (int)(imm), \
8490                                               (__v4sf)_mm_setzero_ps(), \
8491                                               (__mmask8)(U)); })
8492
8493#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
8494  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8495                                                (int)(imm), \
8496                                                (__v4si)_mm_setzero_si128(), \
8497                                                (__mmask8)-1); })
8498
8499#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
8500  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8501                                                (int)(imm), \
8502                                                (__v4si)(__m128i)(W), \
8503                                                (__mmask8)(U)); })
8504
8505#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
8506  (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
8507                                                (int)(imm), \
8508                                                (__v4si)_mm_setzero_si128(), \
8509                                                (__mmask8)(U)); })
8510
8511#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
8512  (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8513                                              (__v4sf)(__m128)(B), (int)(imm), \
8514                                              (__v8sf)_mm256_setzero_ps(), \
8515                                              (__mmask8)-1); })
8516
8517#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8518  (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8519                                              (__v4sf)(__m128)(B), (int)(imm), \
8520                                              (__v8sf)(__m256)(W), \
8521                                              (__mmask8)(U)); })
8522
8523#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8524  (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
8525                                              (__v4sf)(__m128)(B), (int)(imm), \
8526                                              (__v8sf)_mm256_setzero_ps(), \
8527                                              (__mmask8)(U)); })
8528
8529#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
8530  (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8531                                               (__v4si)(__m128i)(B), \
8532                                               (int)(imm), \
8533                                               (__v8si)_mm256_setzero_si256(), \
8534                                               (__mmask8)-1); })
8535
8536#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8537  (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8538                                               (__v4si)(__m128i)(B), \
8539                                               (int)(imm), \
8540                                               (__v8si)(__m256i)(W), \
8541                                               (__mmask8)(U)); })
8542
8543#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8544  (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
8545                                               (__v4si)(__m128i)(B), \
8546                                               (int)(imm), \
8547                                               (__v8si)_mm256_setzero_si256(), \
8548                                               (__mmask8)(U)); })
8549
8550#define _mm_getmant_pd(A, B, C) __extension__({\
8551  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8552                                            (int)(((C)<<2) | (B)), \
8553                                            (__v2df)_mm_setzero_pd(), \
8554                                            (__mmask8)-1); })
8555
8556#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
8557  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8558                                            (int)(((C)<<2) | (B)), \
8559                                            (__v2df)(__m128d)(W), \
8560                                            (__mmask8)(U)); })
8561
8562#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
8563  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8564                                            (int)(((C)<<2) | (B)), \
8565                                            (__v2df)_mm_setzero_pd(), \
8566                                            (__mmask8)(U)); })
8567
8568#define _mm256_getmant_pd(A, B, C) __extension__ ({ \
8569  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8570                                            (int)(((C)<<2) | (B)), \
8571                                            (__v4df)_mm256_setzero_pd(), \
8572                                            (__mmask8)-1); })
8573
8574#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8575  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8576                                            (int)(((C)<<2) | (B)), \
8577                                            (__v4df)(__m256d)(W), \
8578                                            (__mmask8)(U)); })
8579
8580#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8581  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8582                                            (int)(((C)<<2) | (B)), \
8583                                            (__v4df)_mm256_setzero_pd(), \
8584                                            (__mmask8)(U)); })
8585
8586#define _mm_getmant_ps(A, B, C) __extension__ ({ \
8587  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8588                                           (int)(((C)<<2) | (B)), \
8589                                           (__v4sf)_mm_setzero_ps(), \
8590                                           (__mmask8)-1); })
8591
8592#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8593  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8594                                           (int)(((C)<<2) | (B)), \
8595                                           (__v4sf)(__m128)(W), \
8596                                           (__mmask8)(U)); })
8597
8598#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8599  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8600                                           (int)(((C)<<2) | (B)), \
8601                                           (__v4sf)_mm_setzero_ps(), \
8602                                           (__mmask8)(U)); })
8603
8604#define _mm256_getmant_ps(A, B, C) __extension__ ({ \
8605  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8606                                           (int)(((C)<<2) | (B)), \
8607                                           (__v8sf)_mm256_setzero_ps(), \
8608                                           (__mmask8)-1); })
8609
8610#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8611  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8612                                           (int)(((C)<<2) | (B)), \
8613                                           (__v8sf)(__m256)(W), \
8614                                           (__mmask8)(U)); })
8615
8616#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8617  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8618                                           (int)(((C)<<2) | (B)), \
8619                                           (__v8sf)_mm256_setzero_ps(), \
8620                                           (__mmask8)(U)); })
8621
8622#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8623  (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
8624                                        (double const *)(addr), \
8625                                        (__v2di)(__m128i)(index), \
8626                                        (__mmask8)(mask), (int)(scale)); })
8627
8628#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8629  (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8630                                        (long long const *)(addr), \
8631                                        (__v2di)(__m128i)(index), \
8632                                        (__mmask8)(mask), (int)(scale)); })
8633
8634#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8635  (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8636                                        (double const *)(addr), \
8637                                        (__v4di)(__m256i)(index), \
8638                                        (__mmask8)(mask), (int)(scale)); })
8639
8640#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8641  (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8642                                        (long long const *)(addr), \
8643                                        (__v4di)(__m256i)(index), \
8644                                        (__mmask8)(mask), (int)(scale)); })
8645
8646#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8647  (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8648                                       (float const *)(addr), \
8649                                       (__v2di)(__m128i)(index), \
8650                                       (__mmask8)(mask), (int)(scale)); })
8651
8652#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8653  (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8654                                        (int const *)(addr), \
8655                                        (__v2di)(__m128i)(index), \
8656                                        (__mmask8)(mask), (int)(scale)); })
8657
8658#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8659  (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8660                                       (float const *)(addr), \
8661                                       (__v4di)(__m256i)(index), \
8662                                       (__mmask8)(mask), (int)(scale)); })
8663
8664#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8665  (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8666                                        (int const *)(addr), \
8667                                        (__v4di)(__m256i)(index), \
8668                                        (__mmask8)(mask), (int)(scale)); })
8669
8670#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8671  (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8672                                        (double const *)(addr), \
8673                                        (__v4si)(__m128i)(index), \
8674                                        (__mmask8)(mask), (int)(scale)); })
8675
8676#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8677  (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8678                                        (long long const *)(addr), \
8679                                        (__v4si)(__m128i)(index), \
8680                                        (__mmask8)(mask), (int)(scale)); })
8681
8682#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8683  (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8684                                        (double const *)(addr), \
8685                                        (__v4si)(__m128i)(index), \
8686                                        (__mmask8)(mask), (int)(scale)); })
8687
8688#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8689  (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8690                                        (long long const *)(addr), \
8691                                        (__v4si)(__m128i)(index), \
8692                                        (__mmask8)(mask), (int)(scale)); })
8693
8694#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8695  (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8696                                       (float const *)(addr), \
8697                                       (__v4si)(__m128i)(index), \
8698                                       (__mmask8)(mask), (int)(scale)); })
8699
8700#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8701  (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8702                                        (int const *)(addr), \
8703                                        (__v4si)(__m128i)(index), \
8704                                        (__mmask8)(mask), (int)(scale)); })
8705
8706#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8707  (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8708                                       (float const *)(addr), \
8709                                       (__v8si)(__m256i)(index), \
8710                                       (__mmask8)(mask), (int)(scale)); })
8711
8712#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8713  (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8714                                        (int const *)(addr), \
8715                                        (__v8si)(__m256i)(index), \
8716                                        (__mmask8)(mask), (int)(scale)); })
8717
8718#define _mm256_permutex_pd(X, C) __extension__ ({ \
8719  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8720                                   (__v4df)_mm256_undefined_pd(), \
8721                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8722                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8723
8724#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8725  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8726                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8727                                       (__v4df)(__m256d)(W)); })
8728
8729#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8730  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8731                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8732                                       (__v4df)_mm256_setzero_pd()); })
8733
8734#define _mm256_permutex_epi64(X, C) __extension__ ({ \
8735  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8736                                   (__v4di)_mm256_undefined_si256(), \
8737                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8738                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8739
8740#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8741  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8742                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8743                                      (__v4di)(__m256i)(W)); })
8744
8745#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8746  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8747                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8748                                      (__v4di)_mm256_setzero_si256()); })
8749
8750static __inline__ __m256d __DEFAULT_FN_ATTRS
8751_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8752{
8753  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8754                 (__v4di) __X,
8755                 (__v4df) _mm256_undefined_si256 (),
8756                 (__mmask8) -1);
8757}
8758
8759static __inline__ __m256d __DEFAULT_FN_ATTRS
8760_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8761          __m256d __Y)
8762{
8763  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8764                 (__v4di) __X,
8765                 (__v4df) __W,
8766                 (__mmask8) __U);
8767}
8768
8769static __inline__ __m256d __DEFAULT_FN_ATTRS
8770_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8771{
8772  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8773                 (__v4di) __X,
8774                 (__v4df) _mm256_setzero_pd (),
8775                 (__mmask8) __U);
8776}
8777
8778static __inline__ __m256i __DEFAULT_FN_ATTRS
8779_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8780{
8781  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8782                 (__v4di) __X,
8783                 (__v4di) _mm256_setzero_si256 (),
8784                 (__mmask8) __M);
8785}
8786
8787static __inline__ __m256i __DEFAULT_FN_ATTRS
8788_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8789{
8790  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8791                 (__v4di) __X,
8792                 (__v4di) _mm256_undefined_si256 (),
8793                 (__mmask8) -1);
8794}
8795
8796static __inline__ __m256i __DEFAULT_FN_ATTRS
8797_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8798             __m256i __Y)
8799{
8800  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8801                 (__v4di) __X,
8802                 (__v4di) __W,
8803                 __M);
8804}
8805
8806static __inline__ __m256 __DEFAULT_FN_ATTRS
8807_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8808          __m256 __Y)
8809{
8810  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8811                (__v8si) __X,
8812                (__v8sf) __W,
8813                (__mmask8) __U);
8814}
8815
8816static __inline__ __m256 __DEFAULT_FN_ATTRS
8817_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8818{
8819  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8820                (__v8si) __X,
8821                (__v8sf) _mm256_setzero_ps (),
8822                (__mmask8) __U);
8823}
8824
8825static __inline__ __m256 __DEFAULT_FN_ATTRS
8826_mm256_permutexvar_ps (__m256i __X, __m256 __Y)
8827{
8828  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8829                (__v8si) __X,
8830                (__v8sf) _mm256_undefined_si256 (),
8831                (__mmask8) -1);
8832}
8833
8834static __inline__ __m256i __DEFAULT_FN_ATTRS
8835_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
8836{
8837  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8838                 (__v8si) __X,
8839                 (__v8si) _mm256_setzero_si256 (),
8840                 __M);
8841}
8842
8843static __inline__ __m256i __DEFAULT_FN_ATTRS
8844_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
8845             __m256i __Y)
8846{
8847  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8848                 (__v8si) __X,
8849                 (__v8si) __W,
8850                 (__mmask8) __M);
8851}
8852
8853static __inline__ __m256i __DEFAULT_FN_ATTRS
8854_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
8855{
8856  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8857                 (__v8si) __X,
8858                 (__v8si) _mm256_undefined_si256(),
8859                 (__mmask8) -1);
8860}
8861
8862#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8863  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8864                                         (__v4si)(__m128i)(B), (int)(imm), \
8865                                         (__v4si)_mm_undefined_si128(), \
8866                                         (__mmask8)-1); })
8867
8868#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8869  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8870                                         (__v4si)(__m128i)(B), (int)(imm), \
8871                                         (__v4si)(__m128i)(W), \
8872                                         (__mmask8)(U)); })
8873
8874#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8875  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8876                                         (__v4si)(__m128i)(B), (int)(imm), \
8877                                         (__v4si)_mm_setzero_si128(), \
8878                                         (__mmask8)(U)); })
8879
8880#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8881  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8882                                         (__v8si)(__m256i)(B), (int)(imm), \
8883                                         (__v8si)_mm256_undefined_si256(), \
8884                                         (__mmask8)-1); })
8885
8886#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8887  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8888                                         (__v8si)(__m256i)(B), (int)(imm), \
8889                                         (__v8si)(__m256i)(W), \
8890                                         (__mmask8)(U)); })
8891
8892#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8893  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8894                                         (__v8si)(__m256i)(B), (int)(imm), \
8895                                         (__v8si)_mm256_setzero_si256(), \
8896                                         (__mmask8)(U)); })
8897
8898#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8899  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8900                                         (__v2di)(__m128i)(B), (int)(imm), \
8901                                         (__v2di)_mm_setzero_di(), \
8902                                         (__mmask8)-1); })
8903
8904#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8905  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8906                                         (__v2di)(__m128i)(B), (int)(imm), \
8907                                         (__v2di)(__m128i)(W), \
8908                                         (__mmask8)(U)); })
8909
8910#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8911  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8912                                         (__v2di)(__m128i)(B), (int)(imm), \
8913                                         (__v2di)_mm_setzero_di(), \
8914                                         (__mmask8)(U)); })
8915
8916#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8917  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8918                                         (__v4di)(__m256i)(B), (int)(imm), \
8919                                         (__v4di)_mm256_undefined_pd(), \
8920                                         (__mmask8)-1); })
8921
8922#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8923  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8924                                         (__v4di)(__m256i)(B), (int)(imm), \
8925                                         (__v4di)(__m256i)(W), \
8926                                         (__mmask8)(U)); })
8927
8928#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8929  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8930                                         (__v4di)(__m256i)(B), (int)(imm), \
8931                                         (__v4di)_mm256_setzero_si256(), \
8932                                         (__mmask8)(U)); })
8933
8934static __inline__ __m128 __DEFAULT_FN_ATTRS
8935_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8936{
8937  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8938                                             (__v4sf)_mm_movehdup_ps(__A),
8939                                             (__v4sf)__W);
8940}
8941
8942static __inline__ __m128 __DEFAULT_FN_ATTRS
8943_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8944{
8945  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8946                                             (__v4sf)_mm_movehdup_ps(__A),
8947                                             (__v4sf)_mm_setzero_ps());
8948}
8949
8950static __inline__ __m256 __DEFAULT_FN_ATTRS
8951_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8952{
8953  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8954                                             (__v8sf)_mm256_movehdup_ps(__A),
8955                                             (__v8sf)__W);
8956}
8957
8958static __inline__ __m256 __DEFAULT_FN_ATTRS
8959_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8960{
8961  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8962                                             (__v8sf)_mm256_movehdup_ps(__A),
8963                                             (__v8sf)_mm256_setzero_ps());
8964}
8965
8966static __inline__ __m128 __DEFAULT_FN_ATTRS
8967_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8968{
8969  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8970                                             (__v4sf)_mm_moveldup_ps(__A),
8971                                             (__v4sf)__W);
8972}
8973
8974static __inline__ __m128 __DEFAULT_FN_ATTRS
8975_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8976{
8977  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8978                                             (__v4sf)_mm_moveldup_ps(__A),
8979                                             (__v4sf)_mm_setzero_ps());
8980}
8981
8982static __inline__ __m256 __DEFAULT_FN_ATTRS
8983_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8984{
8985  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8986                                             (__v8sf)_mm256_moveldup_ps(__A),
8987                                             (__v8sf)__W);
8988}
8989
8990static __inline__ __m256 __DEFAULT_FN_ATTRS
8991_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8992{
8993  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8994                                             (__v8sf)_mm256_moveldup_ps(__A),
8995                                             (__v8sf)_mm256_setzero_ps());
8996}
8997
8998#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8999  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
9000                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
9001                                      (__v8si)(__m256i)(W)); })
9002
9003#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
9004  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
9005                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
9006                                      (__v8si)_mm256_setzero_si256()); })
9007
9008#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
9009  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
9010                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
9011                                      (__v4si)(__m128i)(W)); })
9012
9013#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
9014  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
9015                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
9016                                      (__v4si)_mm_setzero_si128()); })
9017
9018static __inline__ __m128d __DEFAULT_FN_ATTRS
9019_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
9020{
9021  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
9022              (__v2df) __A,
9023              (__v2df) __W);
9024}
9025
9026static __inline__ __m128d __DEFAULT_FN_ATTRS
9027_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
9028{
9029  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
9030              (__v2df) __A,
9031              (__v2df) _mm_setzero_pd ());
9032}
9033
9034static __inline__ __m256d __DEFAULT_FN_ATTRS
9035_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
9036{
9037  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
9038              (__v4df) __A,
9039              (__v4df) __W);
9040}
9041
9042static __inline__ __m256d __DEFAULT_FN_ATTRS
9043_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
9044{
9045  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
9046              (__v4df) __A,
9047              (__v4df) _mm256_setzero_pd ());
9048}
9049
9050static __inline__ __m128 __DEFAULT_FN_ATTRS
9051_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
9052{
9053  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
9054             (__v4sf) __A,
9055             (__v4sf) __W);
9056}
9057
9058static __inline__ __m128 __DEFAULT_FN_ATTRS
9059_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
9060{
9061  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
9062             (__v4sf) __A,
9063             (__v4sf) _mm_setzero_ps ());
9064}
9065
9066static __inline__ __m256 __DEFAULT_FN_ATTRS
9067_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
9068{
9069  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
9070             (__v8sf) __A,
9071             (__v8sf) __W);
9072}
9073
9074static __inline__ __m256 __DEFAULT_FN_ATTRS
9075_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
9076{
9077  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
9078             (__v8sf) __A,
9079             (__v8sf) _mm256_setzero_ps ());
9080}
9081
9082static __inline__ __m128 __DEFAULT_FN_ATTRS
9083_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
9084{
9085  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
9086             (__v4sf) __W,
9087             (__mmask8) __U);
9088}
9089
9090static __inline__ __m128 __DEFAULT_FN_ATTRS
9091_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
9092{
9093  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
9094             (__v4sf)
9095             _mm_setzero_ps (),
9096             (__mmask8) __U);
9097}
9098
9099static __inline__ __m256 __DEFAULT_FN_ATTRS
9100_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
9101{
9102  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
9103                (__v8sf) __W,
9104                (__mmask8) __U);
9105}
9106
9107static __inline__ __m256 __DEFAULT_FN_ATTRS
9108_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
9109{
9110  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
9111                (__v8sf)
9112                _mm256_setzero_ps (),
9113                (__mmask8) __U);
9114}
9115
9116static __inline __m128i __DEFAULT_FN_ATTRS
9117_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
9118{
9119  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
9120                                                  (__v8hi) __W,
9121                                                  (__mmask8) __U);
9122}
9123
9124static __inline __m128i __DEFAULT_FN_ATTRS
9125_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
9126{
9127  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
9128                                                  (__v8hi) _mm_setzero_si128 (),
9129                                                  (__mmask8) __U);
9130}
9131
9132#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
9133  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
9134                                         (__v8hi)(__m128i)(W), \
9135                                         (__mmask8)(U)); })
9136
9137#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
9138  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
9139                                         (__v8hi)_mm_setzero_si128(), \
9140                                         (__mmask8)(U)); })
9141
9142static __inline __m128i __DEFAULT_FN_ATTRS
9143_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
9144{
9145  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
9146                                                      (__v8hi) __W,
9147                                                      (__mmask8) __U);
9148}
9149
9150static __inline __m128i __DEFAULT_FN_ATTRS
9151_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
9152{
9153  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
9154                                                      (__v8hi) _mm_setzero_si128(),
9155                                                      (__mmask8) __U);
9156}
9157#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
9158  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
9159                                            (__v8hi)(__m128i)(W), \
9160                                            (__mmask8)(U)); })
9161
9162#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
9163  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
9164                                            (__v8hi)_mm_setzero_si128(), \
9165                                            (__mmask8)(U)); })
9166
9167
9168#undef __DEFAULT_FN_ATTRS
9169
9170#endif /* __AVX512VLINTRIN_H */
9171