1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLINTRIN_H
29#define __AVX512VLINTRIN_H
30
31#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
32
33/* Doesn't require avx512vl, used in avx512dqintrin.h */
34static  __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
35_mm_setzero_di(void) {
36  return (__m128i)(__v2di){ 0LL, 0LL};
37}
38
39/* Integer compare */
40
41static __inline__ __mmask8 __DEFAULT_FN_ATTRS
42_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
43  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
44                                                  (__mmask8)-1);
45}
46
47static __inline__ __mmask8 __DEFAULT_FN_ATTRS
48_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
49  return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
50                                                  __u);
51}
52
53static __inline__ __mmask8 __DEFAULT_FN_ATTRS
54_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
55  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
56                                                (__mmask8)-1);
57}
58
59static __inline__ __mmask8 __DEFAULT_FN_ATTRS
60_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
61  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
62                                                __u);
63}
64
65static __inline__ __mmask8 __DEFAULT_FN_ATTRS
66_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
67  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
68                                                  (__mmask8)-1);
69}
70
71static __inline__ __mmask8 __DEFAULT_FN_ATTRS
72_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
73  return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
74                                                  __u);
75}
76
77static __inline__ __mmask8 __DEFAULT_FN_ATTRS
78_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) {
79  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
80                                                (__mmask8)-1);
81}
82
83static __inline__ __mmask8 __DEFAULT_FN_ATTRS
84_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
85  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
86                                                __u);
87}
88
89static __inline__ __mmask8 __DEFAULT_FN_ATTRS
90_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
91  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
92                                                  (__mmask8)-1);
93}
94
95static __inline__ __mmask8 __DEFAULT_FN_ATTRS
96_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
97  return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
98                                                  __u);
99}
100
101static __inline__ __mmask8 __DEFAULT_FN_ATTRS
102_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) {
103  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
104                                                (__mmask8)-1);
105}
106
107static __inline__ __mmask8 __DEFAULT_FN_ATTRS
108_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
109  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
110                                                __u);
111}
112
113static __inline__ __mmask8 __DEFAULT_FN_ATTRS
114_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
115  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
116                                                  (__mmask8)-1);
117}
118
119static __inline__ __mmask8 __DEFAULT_FN_ATTRS
120_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
121  return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
122                                                  __u);
123}
124
125static __inline__ __mmask8 __DEFAULT_FN_ATTRS
126_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) {
127  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
128                                                (__mmask8)-1);
129}
130
131static __inline__ __mmask8 __DEFAULT_FN_ATTRS
132_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
133  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
134                                                __u);
135}
136
137
138static __inline__ __mmask8 __DEFAULT_FN_ATTRS
139_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) {
140  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
141                                               (__mmask8)-1);
142}
143
144static __inline__ __mmask8 __DEFAULT_FN_ATTRS
145_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
146  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
147                                               __u);
148}
149
150static __inline__ __mmask8 __DEFAULT_FN_ATTRS
151_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) {
152  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
153                                                (__mmask8)-1);
154}
155
156static __inline__ __mmask8 __DEFAULT_FN_ATTRS
157_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
158  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
159                                                __u);
160}
161
162static __inline__ __mmask8 __DEFAULT_FN_ATTRS
163_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) {
164  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
165                                               (__mmask8)-1);
166}
167
168static __inline__ __mmask8 __DEFAULT_FN_ATTRS
169_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
170  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
171                                               __u);
172}
173
174static __inline__ __mmask8 __DEFAULT_FN_ATTRS
175_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) {
176  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
177                                                (__mmask8)-1);
178}
179
180static __inline__ __mmask8 __DEFAULT_FN_ATTRS
181_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
182  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
183                                                __u);
184}
185
186static __inline__ __mmask8 __DEFAULT_FN_ATTRS
187_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) {
188  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
189                                               (__mmask8)-1);
190}
191
192static __inline__ __mmask8 __DEFAULT_FN_ATTRS
193_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
194  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
195                                               __u);
196}
197
198static __inline__ __mmask8 __DEFAULT_FN_ATTRS
199_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) {
200  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
201                                                (__mmask8)-1);
202}
203
204static __inline__ __mmask8 __DEFAULT_FN_ATTRS
205_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
206  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
207                                                __u);
208}
209
210static __inline__ __mmask8 __DEFAULT_FN_ATTRS
211_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) {
212  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
213                                               (__mmask8)-1);
214}
215
216static __inline__ __mmask8 __DEFAULT_FN_ATTRS
217_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
218  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
219                                               __u);
220}
221
222static __inline__ __mmask8 __DEFAULT_FN_ATTRS
223_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) {
224  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
225                                                (__mmask8)-1);
226}
227
228static __inline__ __mmask8 __DEFAULT_FN_ATTRS
229_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
230  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
231                                                __u);
232}
233
234static __inline__ __mmask8 __DEFAULT_FN_ATTRS
235_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
236  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
237                                                  (__mmask8)-1);
238}
239
240static __inline__ __mmask8 __DEFAULT_FN_ATTRS
241_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
242  return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
243                                                  __u);
244}
245
246static __inline__ __mmask8 __DEFAULT_FN_ATTRS
247_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) {
248  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
249                                                (__mmask8)-1);
250}
251
252static __inline__ __mmask8 __DEFAULT_FN_ATTRS
253_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
254  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
255                                                __u);
256}
257
258static __inline__ __mmask8 __DEFAULT_FN_ATTRS
259_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
260  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
261                                                  (__mmask8)-1);
262}
263
264static __inline__ __mmask8 __DEFAULT_FN_ATTRS
265_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
266  return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
267                                                  __u);
268}
269
270static __inline__ __mmask8 __DEFAULT_FN_ATTRS
271_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) {
272  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
273                                                (__mmask8)-1);
274}
275
276static __inline__ __mmask8 __DEFAULT_FN_ATTRS
277_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
278  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
279                                                __u);
280}
281
282static __inline__ __mmask8 __DEFAULT_FN_ATTRS
283_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
284  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
285                                                  (__mmask8)-1);
286}
287
288static __inline__ __mmask8 __DEFAULT_FN_ATTRS
289_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
290  return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
291                                                  __u);
292}
293
294static __inline__ __mmask8 __DEFAULT_FN_ATTRS
295_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) {
296  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
297                                                (__mmask8)-1);
298}
299
300static __inline__ __mmask8 __DEFAULT_FN_ATTRS
301_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
302  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
303                                                __u);
304}
305
306static __inline__ __mmask8 __DEFAULT_FN_ATTRS
307_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
308  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
309                                                  (__mmask8)-1);
310}
311
312static __inline__ __mmask8 __DEFAULT_FN_ATTRS
313_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
314  return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
315                                                  __u);
316}
317
318static __inline__ __mmask8 __DEFAULT_FN_ATTRS
319_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) {
320  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
321                                                (__mmask8)-1);
322}
323
324static __inline__ __mmask8 __DEFAULT_FN_ATTRS
325_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
326  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
327                                                __u);
328}
329
330static __inline__ __mmask8 __DEFAULT_FN_ATTRS
331_mm_cmple_epi32_mask(__m128i __a, __m128i __b) {
332  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
333                                               (__mmask8)-1);
334}
335
336static __inline__ __mmask8 __DEFAULT_FN_ATTRS
337_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
338  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
339                                               __u);
340}
341
342static __inline__ __mmask8 __DEFAULT_FN_ATTRS
343_mm_cmple_epu32_mask(__m128i __a, __m128i __b) {
344  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
345                                                (__mmask8)-1);
346}
347
348static __inline__ __mmask8 __DEFAULT_FN_ATTRS
349_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
350  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
351                                                __u);
352}
353
354static __inline__ __mmask8 __DEFAULT_FN_ATTRS
355_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) {
356  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
357                                               (__mmask8)-1);
358}
359
360static __inline__ __mmask8 __DEFAULT_FN_ATTRS
361_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
362  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
363                                               __u);
364}
365
366static __inline__ __mmask8 __DEFAULT_FN_ATTRS
367_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) {
368  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
369                                                (__mmask8)-1);
370}
371
372static __inline__ __mmask8 __DEFAULT_FN_ATTRS
373_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
374  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
375                                                __u);
376}
377
378static __inline__ __mmask8 __DEFAULT_FN_ATTRS
379_mm_cmple_epi64_mask(__m128i __a, __m128i __b) {
380  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
381                                               (__mmask8)-1);
382}
383
384static __inline__ __mmask8 __DEFAULT_FN_ATTRS
385_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
386  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
387                                               __u);
388}
389
390static __inline__ __mmask8 __DEFAULT_FN_ATTRS
391_mm_cmple_epu64_mask(__m128i __a, __m128i __b) {
392  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
393                                                (__mmask8)-1);
394}
395
396static __inline__ __mmask8 __DEFAULT_FN_ATTRS
397_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
398  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
399                                                __u);
400}
401
402static __inline__ __mmask8 __DEFAULT_FN_ATTRS
403_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) {
404  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
405                                               (__mmask8)-1);
406}
407
408static __inline__ __mmask8 __DEFAULT_FN_ATTRS
409_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
410  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
411                                               __u);
412}
413
414static __inline__ __mmask8 __DEFAULT_FN_ATTRS
415_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) {
416  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
417                                                (__mmask8)-1);
418}
419
420static __inline__ __mmask8 __DEFAULT_FN_ATTRS
421_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
422  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
423                                                __u);
424}
425
426static __inline__ __mmask8 __DEFAULT_FN_ATTRS
427_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) {
428  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
429                                               (__mmask8)-1);
430}
431
432static __inline__ __mmask8 __DEFAULT_FN_ATTRS
433_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
434  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
435                                               __u);
436}
437
438static __inline__ __mmask8 __DEFAULT_FN_ATTRS
439_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) {
440  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
441                                                (__mmask8)-1);
442}
443
444static __inline__ __mmask8 __DEFAULT_FN_ATTRS
445_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
446  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
447                                                __u);
448}
449
450static __inline__ __mmask8 __DEFAULT_FN_ATTRS
451_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) {
452  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
453                                               (__mmask8)-1);
454}
455
456static __inline__ __mmask8 __DEFAULT_FN_ATTRS
457_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
458  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
459                                               __u);
460}
461
462static __inline__ __mmask8 __DEFAULT_FN_ATTRS
463_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) {
464  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
465                                                (__mmask8)-1);
466}
467
468static __inline__ __mmask8 __DEFAULT_FN_ATTRS
469_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
470  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
471                                                __u);
472}
473
474static __inline__ __mmask8 __DEFAULT_FN_ATTRS
475_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) {
476  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
477                                               (__mmask8)-1);
478}
479
480static __inline__ __mmask8 __DEFAULT_FN_ATTRS
481_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
482  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
483                                               __u);
484}
485
486static __inline__ __mmask8 __DEFAULT_FN_ATTRS
487_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) {
488  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
489                                                (__mmask8)-1);
490}
491
492static __inline__ __mmask8 __DEFAULT_FN_ATTRS
493_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
494  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
495                                                __u);
496}
497
498static __inline__ __mmask8 __DEFAULT_FN_ATTRS
499_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) {
500  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
501                                               (__mmask8)-1);
502}
503
504static __inline__ __mmask8 __DEFAULT_FN_ATTRS
505_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
506  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
507                                               __u);
508}
509
510static __inline__ __mmask8 __DEFAULT_FN_ATTRS
511_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) {
512  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
513                                                (__mmask8)-1);
514}
515
516static __inline__ __mmask8 __DEFAULT_FN_ATTRS
517_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
518  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
519                                                __u);
520}
521
522static __inline__ __mmask8 __DEFAULT_FN_ATTRS
523_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) {
524  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
525                                               (__mmask8)-1);
526}
527
528static __inline__ __mmask8 __DEFAULT_FN_ATTRS
529_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
530  return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
531                                               __u);
532}
533
534static __inline__ __mmask8 __DEFAULT_FN_ATTRS
535_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) {
536  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
537                                                (__mmask8)-1);
538}
539
540static __inline__ __mmask8 __DEFAULT_FN_ATTRS
541_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
542  return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
543                                                __u);
544}
545
546static __inline__ __mmask8 __DEFAULT_FN_ATTRS
547_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) {
548  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
549                                               (__mmask8)-1);
550}
551
552static __inline__ __mmask8 __DEFAULT_FN_ATTRS
553_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
554  return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
555                                               __u);
556}
557
558static __inline__ __mmask8 __DEFAULT_FN_ATTRS
559_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) {
560  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
561                                                (__mmask8)-1);
562}
563
564static __inline__ __mmask8 __DEFAULT_FN_ATTRS
565_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
566  return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
567                                                __u);
568}
569
570static __inline__ __mmask8 __DEFAULT_FN_ATTRS
571_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) {
572  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
573                                               (__mmask8)-1);
574}
575
576static __inline__ __mmask8 __DEFAULT_FN_ATTRS
577_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
578  return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
579                                               __u);
580}
581
582static __inline__ __mmask8 __DEFAULT_FN_ATTRS
583_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) {
584  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
585                                                (__mmask8)-1);
586}
587
588static __inline__ __mmask8 __DEFAULT_FN_ATTRS
589_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
590  return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
591                                                __u);
592}
593
594static __inline__ __mmask8 __DEFAULT_FN_ATTRS
595_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) {
596  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
597                                               (__mmask8)-1);
598}
599
600static __inline__ __mmask8 __DEFAULT_FN_ATTRS
601_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
602  return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
603                                               __u);
604}
605
606static __inline__ __mmask8 __DEFAULT_FN_ATTRS
607_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) {
608  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
609                                                (__mmask8)-1);
610}
611
612static __inline__ __mmask8 __DEFAULT_FN_ATTRS
613_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
614  return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
615                                                __u);
616}
617
618static __inline__ __m256i __DEFAULT_FN_ATTRS
619_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
620{
621  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
622                                             (__v8si)_mm256_add_epi32(__A, __B),
623                                             (__v8si)__W);
624}
625
626static __inline__ __m256i __DEFAULT_FN_ATTRS
627_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
628{
629  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
630                                             (__v8si)_mm256_add_epi32(__A, __B),
631                                             (__v8si)_mm256_setzero_si256());
632}
633
634static __inline__ __m256i __DEFAULT_FN_ATTRS
635_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
636{
637  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
638                                             (__v4di)_mm256_add_epi64(__A, __B),
639                                             (__v4di)__W);
640}
641
642static __inline__ __m256i __DEFAULT_FN_ATTRS
643_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
644{
645  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
646                                             (__v4di)_mm256_add_epi64(__A, __B),
647                                             (__v4di)_mm256_setzero_si256());
648}
649
650static __inline__ __m256i __DEFAULT_FN_ATTRS
651_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
652{
653  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
654                                             (__v8si)_mm256_sub_epi32(__A, __B),
655                                             (__v8si)__W);
656}
657
658static __inline__ __m256i __DEFAULT_FN_ATTRS
659_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
660{
661  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
662                                             (__v8si)_mm256_sub_epi32(__A, __B),
663                                             (__v8si)_mm256_setzero_si256());
664}
665
666static __inline__ __m256i __DEFAULT_FN_ATTRS
667_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
668{
669  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
670                                             (__v4di)_mm256_sub_epi64(__A, __B),
671                                             (__v4di)__W);
672}
673
674static __inline__ __m256i __DEFAULT_FN_ATTRS
675_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
676{
677  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
678                                             (__v4di)_mm256_sub_epi64(__A, __B),
679                                             (__v4di)_mm256_setzero_si256());
680}
681
682static __inline__ __m128i __DEFAULT_FN_ATTRS
683_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
684{
685  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
686                                             (__v4si)_mm_add_epi32(__A, __B),
687                                             (__v4si)__W);
688}
689
690static __inline__ __m128i __DEFAULT_FN_ATTRS
691_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
692{
693  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
694                                             (__v4si)_mm_add_epi32(__A, __B),
695                                             (__v4si)_mm_setzero_si128());
696}
697
698static __inline__ __m128i __DEFAULT_FN_ATTRS
699_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
700{
701  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
702                                             (__v2di)_mm_add_epi64(__A, __B),
703                                             (__v2di)__W);
704}
705
706static __inline__ __m128i __DEFAULT_FN_ATTRS
707_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
708{
709  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
710                                             (__v2di)_mm_add_epi64(__A, __B),
711                                             (__v2di)_mm_setzero_si128());
712}
713
714static __inline__ __m128i __DEFAULT_FN_ATTRS
715_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
716{
717  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
718                                             (__v4si)_mm_sub_epi32(__A, __B),
719                                             (__v4si)__W);
720}
721
722static __inline__ __m128i __DEFAULT_FN_ATTRS
723_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
724{
725  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
726                                             (__v4si)_mm_sub_epi32(__A, __B),
727                                             (__v4si)_mm_setzero_si128());
728}
729
730static __inline__ __m128i __DEFAULT_FN_ATTRS
731_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
732{
733  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
734                                             (__v2di)_mm_sub_epi64(__A, __B),
735                                             (__v2di)__W);
736}
737
738static __inline__ __m128i __DEFAULT_FN_ATTRS
739_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
740{
741  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
742                                             (__v2di)_mm_sub_epi64(__A, __B),
743                                             (__v2di)_mm_setzero_si128());
744}
745
746static __inline__ __m256i __DEFAULT_FN_ATTRS
747_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
748{
749  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
750                                             (__v4di)_mm256_mul_epi32(__X, __Y),
751                                             (__v4di)__W);
752}
753
754static __inline__ __m256i __DEFAULT_FN_ATTRS
755_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
756{
757  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
758                                             (__v4di)_mm256_mul_epi32(__X, __Y),
759                                             (__v4di)_mm256_setzero_si256());
760}
761
762static __inline__ __m128i __DEFAULT_FN_ATTRS
763_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
764{
765  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
766                                             (__v2di)_mm_mul_epi32(__X, __Y),
767                                             (__v2di)__W);
768}
769
770static __inline__ __m128i __DEFAULT_FN_ATTRS
771_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
772{
773  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
774                                             (__v2di)_mm_mul_epi32(__X, __Y),
775                                             (__v2di)_mm_setzero_si128());
776}
777
778static __inline__ __m256i __DEFAULT_FN_ATTRS
779_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
780{
781  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
782                                             (__v4di)_mm256_mul_epu32(__X, __Y),
783                                             (__v4di)__W);
784}
785
786static __inline__ __m256i __DEFAULT_FN_ATTRS
787_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
788{
789  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
790                                             (__v4di)_mm256_mul_epu32(__X, __Y),
791                                             (__v4di)_mm256_setzero_si256());
792}
793
794static __inline__ __m128i __DEFAULT_FN_ATTRS
795_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
796{
797  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
798                                             (__v2di)_mm_mul_epu32(__X, __Y),
799                                             (__v2di)__W);
800}
801
802static __inline__ __m128i __DEFAULT_FN_ATTRS
803_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
804{
805  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
806                                             (__v2di)_mm_mul_epu32(__X, __Y),
807                                             (__v2di)_mm_setzero_si128());
808}
809
810static __inline__ __m256i __DEFAULT_FN_ATTRS
811_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
812{
813  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
814                                             (__v8si)_mm256_mullo_epi32(__A, __B),
815                                             (__v8si)_mm256_setzero_si256());
816}
817
818static __inline__ __m256i __DEFAULT_FN_ATTRS
819_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
820{
821  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
822                                             (__v8si)_mm256_mullo_epi32(__A, __B),
823                                             (__v8si)__W);
824}
825
826static __inline__ __m128i __DEFAULT_FN_ATTRS
827_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
828{
829  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
830                                             (__v4si)_mm_mullo_epi32(__A, __B),
831                                             (__v4si)_mm_setzero_si128());
832}
833
834static __inline__ __m128i __DEFAULT_FN_ATTRS
835_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
836{
837  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
838                                             (__v4si)_mm_mullo_epi32(__A, __B),
839                                             (__v4si)__W);
840}
841
842static __inline__ __m256i __DEFAULT_FN_ATTRS
843_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
844{
845  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
846                                             (__v8si)_mm256_and_si256(__A, __B),
847                                             (__v8si)__W);
848}
849
850static __inline__ __m256i __DEFAULT_FN_ATTRS
851_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
852{
853  return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
854}
855
856static __inline__ __m128i __DEFAULT_FN_ATTRS
857_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
858{
859  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
860                                             (__v4si)_mm_and_si128(__A, __B),
861                                             (__v4si)__W);
862}
863
864static __inline__ __m128i __DEFAULT_FN_ATTRS
865_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
866{
867  return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
868}
869
870static __inline__ __m256i __DEFAULT_FN_ATTRS
871_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
872{
873  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
874                                          (__v8si)_mm256_andnot_si256(__A, __B),
875                                          (__v8si)__W);
876}
877
878static __inline__ __m256i __DEFAULT_FN_ATTRS
879_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
880{
881  return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
882                                           __U, __A, __B);
883}
884
885static __inline__ __m128i __DEFAULT_FN_ATTRS
886_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
887{
888  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
889                                             (__v4si)_mm_andnot_si128(__A, __B),
890                                             (__v4si)__W);
891}
892
893static __inline__ __m128i __DEFAULT_FN_ATTRS
894_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
895{
896  return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
897}
898
899static __inline__ __m256i __DEFAULT_FN_ATTRS
900_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
901{
902  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
903                                             (__v8si)_mm256_or_si256(__A, __B),
904                                             (__v8si)__W);
905}
906
907static __inline__ __m256i __DEFAULT_FN_ATTRS
908_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
909{
910  return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
911}
912
913static __inline__ __m128i __DEFAULT_FN_ATTRS
914_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
915{
916  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
917                                             (__v4si)_mm_or_si128(__A, __B),
918                                             (__v4si)__W);
919}
920
921static __inline__ __m128i __DEFAULT_FN_ATTRS
922_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
923{
924  return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
925}
926
927static __inline__ __m256i __DEFAULT_FN_ATTRS
928_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
929{
930  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
931                                             (__v8si)_mm256_xor_si256(__A, __B),
932                                             (__v8si)__W);
933}
934
935static __inline__ __m256i __DEFAULT_FN_ATTRS
936_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
937{
938  return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
939}
940
941static __inline__ __m128i __DEFAULT_FN_ATTRS
942_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
943        __m128i __B)
944{
945  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
946                                             (__v4si)_mm_xor_si128(__A, __B),
947                                             (__v4si)__W);
948}
949
950static __inline__ __m128i __DEFAULT_FN_ATTRS
951_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
952{
953  return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
954}
955
956static __inline__ __m256i __DEFAULT_FN_ATTRS
957_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
958{
959  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
960                                             (__v4di)_mm256_and_si256(__A, __B),
961                                             (__v4di)__W);
962}
963
964static __inline__ __m256i __DEFAULT_FN_ATTRS
965_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
966{
967  return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
968}
969
970static __inline__ __m128i __DEFAULT_FN_ATTRS
971_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
972{
973  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
974                                             (__v2di)_mm_and_si128(__A, __B),
975                                             (__v2di)__W);
976}
977
978static __inline__ __m128i __DEFAULT_FN_ATTRS
979_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
980{
981  return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
982}
983
984static __inline__ __m256i __DEFAULT_FN_ATTRS
985_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
986{
987  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
988                                          (__v4di)_mm256_andnot_si256(__A, __B),
989                                          (__v4di)__W);
990}
991
992static __inline__ __m256i __DEFAULT_FN_ATTRS
993_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
994{
995  return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
996                                           __U, __A, __B);
997}
998
999static __inline__ __m128i __DEFAULT_FN_ATTRS
1000_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1001{
1002  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1003                                             (__v2di)_mm_andnot_si128(__A, __B),
1004                                             (__v2di)__W);
1005}
1006
1007static __inline__ __m128i __DEFAULT_FN_ATTRS
1008_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1009{
1010  return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
1011}
1012
1013static __inline__ __m256i __DEFAULT_FN_ATTRS
1014_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1015{
1016  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1017                                             (__v4di)_mm256_or_si256(__A, __B),
1018                                             (__v4di)__W);
1019}
1020
1021static __inline__ __m256i __DEFAULT_FN_ATTRS
1022_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1023{
1024  return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
1025}
1026
1027static __inline__ __m128i __DEFAULT_FN_ATTRS
1028_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
1029{
1030  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1031                                             (__v2di)_mm_or_si128(__A, __B),
1032                                             (__v2di)__W);
1033}
1034
1035static __inline__ __m128i __DEFAULT_FN_ATTRS
1036_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1037{
1038  return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
1039}
1040
1041static __inline__ __m256i __DEFAULT_FN_ATTRS
1042_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
1043{
1044  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
1045                                             (__v4di)_mm256_xor_si256(__A, __B),
1046                                             (__v4di)__W);
1047}
1048
1049static __inline__ __m256i __DEFAULT_FN_ATTRS
1050_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
1051{
1052  return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
1053}
1054
1055static __inline__ __m128i __DEFAULT_FN_ATTRS
1056_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
1057        __m128i __B)
1058{
1059  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
1060                                             (__v2di)_mm_xor_si128(__A, __B),
1061                                             (__v2di)__W);
1062}
1063
1064static __inline__ __m128i __DEFAULT_FN_ATTRS
1065_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
1066{
1067  return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
1068}
1069
1070#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
1071  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1072                                        (__v4si)(__m128i)(b), (int)(p), \
1073                                        (__mmask8)-1); })
1074
1075#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1076  (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1077                                        (__v4si)(__m128i)(b), (int)(p), \
1078                                        (__mmask8)(m)); })
1079
1080#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
1081  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1082                                         (__v4si)(__m128i)(b), (int)(p), \
1083                                         (__mmask8)-1); })
1084
1085#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1086  (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1087                                         (__v4si)(__m128i)(b), (int)(p), \
1088                                         (__mmask8)(m)); })
1089
1090#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
1091  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1092                                        (__v8si)(__m256i)(b), (int)(p), \
1093                                        (__mmask8)-1); })
1094
1095#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1096  (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1097                                        (__v8si)(__m256i)(b), (int)(p), \
1098                                        (__mmask8)(m)); })
1099
1100#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
1101  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1102                                         (__v8si)(__m256i)(b), (int)(p), \
1103                                         (__mmask8)-1); })
1104
1105#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1106  (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1107                                         (__v8si)(__m256i)(b), (int)(p), \
1108                                         (__mmask8)(m)); })
1109
1110#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
1111  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1112                                        (__v2di)(__m128i)(b), (int)(p), \
1113                                        (__mmask8)-1); })
1114
1115#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1116  (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1117                                        (__v2di)(__m128i)(b), (int)(p), \
1118                                        (__mmask8)(m)); })
1119
1120#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
1121  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1122                                         (__v2di)(__m128i)(b), (int)(p), \
1123                                         (__mmask8)-1); })
1124
1125#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1126  (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1127                                         (__v2di)(__m128i)(b), (int)(p), \
1128                                         (__mmask8)(m)); })
1129
1130#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
1131  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1132                                        (__v4di)(__m256i)(b), (int)(p), \
1133                                        (__mmask8)-1); })
1134
1135#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1136  (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1137                                        (__v4di)(__m256i)(b), (int)(p), \
1138                                        (__mmask8)(m)); })
1139
1140#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
1141  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1142                                         (__v4di)(__m256i)(b), (int)(p), \
1143                                         (__mmask8)-1); })
1144
1145#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1146  (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1147                                         (__v4di)(__m256i)(b), (int)(p), \
1148                                         (__mmask8)(m)); })
1149
1150#define _mm256_cmp_ps_mask(a, b, p)  __extension__ ({ \
1151  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1152                                         (__v8sf)(__m256)(b), (int)(p), \
1153                                         (__mmask8)-1); })
1154
1155#define _mm256_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
1156  (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1157                                         (__v8sf)(__m256)(b), (int)(p), \
1158                                         (__mmask8)(m)); })
1159
1160#define _mm256_cmp_pd_mask(a, b, p)  __extension__ ({ \
1161  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1162                                         (__v4df)(__m256d)(b), (int)(p), \
1163                                         (__mmask8)-1); })
1164
1165#define _mm256_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
1166  (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1167                                         (__v4df)(__m256d)(b), (int)(p), \
1168                                         (__mmask8)(m)); })
1169
1170#define _mm_cmp_ps_mask(a, b, p)  __extension__ ({ \
1171  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1172                                         (__v4sf)(__m128)(b), (int)(p), \
1173                                         (__mmask8)-1); })
1174
1175#define _mm_mask_cmp_ps_mask(m, a, b, p)  __extension__ ({ \
1176  (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1177                                         (__v4sf)(__m128)(b), (int)(p), \
1178                                         (__mmask8)(m)); })
1179
1180#define _mm_cmp_pd_mask(a, b, p)  __extension__ ({ \
1181  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1182                                         (__v2df)(__m128d)(b), (int)(p), \
1183                                         (__mmask8)-1); })
1184
1185#define _mm_mask_cmp_pd_mask(m, a, b, p)  __extension__ ({ \
1186  (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1187                                         (__v2df)(__m128d)(b), (int)(p), \
1188                                         (__mmask8)(m)); })
1189
1190static __inline__ __m128d __DEFAULT_FN_ATTRS
1191_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1192{
1193  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1194                                                    (__v2df) __B,
1195                                                    (__v2df) __C,
1196                                                    (__mmask8) __U);
1197}
1198
1199static __inline__ __m128d __DEFAULT_FN_ATTRS
1200_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1201{
1202  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
1203                                                     (__v2df) __B,
1204                                                     (__v2df) __C,
1205                                                     (__mmask8) __U);
1206}
1207
1208static __inline__ __m128d __DEFAULT_FN_ATTRS
1209_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1210{
1211  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1212                                                     (__v2df) __B,
1213                                                     (__v2df) __C,
1214                                                     (__mmask8) __U);
1215}
1216
1217static __inline__ __m128d __DEFAULT_FN_ATTRS
1218_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1219{
1220  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1221                                                    (__v2df) __B,
1222                                                    -(__v2df) __C,
1223                                                    (__mmask8) __U);
1224}
1225
1226static __inline__ __m128d __DEFAULT_FN_ATTRS
1227_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1228{
1229  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1230                                                     (__v2df) __B,
1231                                                     -(__v2df) __C,
1232                                                     (__mmask8) __U);
1233}
1234
1235static __inline__ __m128d __DEFAULT_FN_ATTRS
1236_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1237{
1238  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
1239                                                     (__v2df) __B,
1240                                                     (__v2df) __C,
1241                                                     (__mmask8) __U);
1242}
1243
1244static __inline__ __m128d __DEFAULT_FN_ATTRS
1245_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1246{
1247  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1248                                                     (__v2df) __B,
1249                                                     (__v2df) __C,
1250                                                     (__mmask8) __U);
1251}
1252
1253static __inline__ __m128d __DEFAULT_FN_ATTRS
1254_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1255{
1256  return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1257                                                     (__v2df) __B,
1258                                                     -(__v2df) __C,
1259                                                     (__mmask8) __U);
1260}
1261
1262static __inline__ __m256d __DEFAULT_FN_ATTRS
1263_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1264{
1265  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1266                                                    (__v4df) __B,
1267                                                    (__v4df) __C,
1268                                                    (__mmask8) __U);
1269}
1270
1271static __inline__ __m256d __DEFAULT_FN_ATTRS
1272_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1273{
1274  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
1275                                                     (__v4df) __B,
1276                                                     (__v4df) __C,
1277                                                     (__mmask8) __U);
1278}
1279
1280static __inline__ __m256d __DEFAULT_FN_ATTRS
1281_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1282{
1283  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1284                                                     (__v4df) __B,
1285                                                     (__v4df) __C,
1286                                                     (__mmask8) __U);
1287}
1288
1289static __inline__ __m256d __DEFAULT_FN_ATTRS
1290_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1291{
1292  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1293                                                    (__v4df) __B,
1294                                                    -(__v4df) __C,
1295                                                    (__mmask8) __U);
1296}
1297
1298static __inline__ __m256d __DEFAULT_FN_ATTRS
1299_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1300{
1301  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1302                                                     (__v4df) __B,
1303                                                     -(__v4df) __C,
1304                                                     (__mmask8) __U);
1305}
1306
1307static __inline__ __m256d __DEFAULT_FN_ATTRS
1308_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1309{
1310  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
1311                                                     (__v4df) __B,
1312                                                     (__v4df) __C,
1313                                                     (__mmask8) __U);
1314}
1315
1316static __inline__ __m256d __DEFAULT_FN_ATTRS
1317_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1318{
1319  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1320                                                     (__v4df) __B,
1321                                                     (__v4df) __C,
1322                                                     (__mmask8) __U);
1323}
1324
1325static __inline__ __m256d __DEFAULT_FN_ATTRS
1326_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1327{
1328  return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1329                                                     (__v4df) __B,
1330                                                     -(__v4df) __C,
1331                                                     (__mmask8) __U);
1332}
1333
1334static __inline__ __m128 __DEFAULT_FN_ATTRS
1335_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1336{
1337  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1338                                                   (__v4sf) __B,
1339                                                   (__v4sf) __C,
1340                                                   (__mmask8) __U);
1341}
1342
1343static __inline__ __m128 __DEFAULT_FN_ATTRS
1344_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1345{
1346  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
1347                                                    (__v4sf) __B,
1348                                                    (__v4sf) __C,
1349                                                    (__mmask8) __U);
1350}
1351
1352static __inline__ __m128 __DEFAULT_FN_ATTRS
1353_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1354{
1355  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1356                                                    (__v4sf) __B,
1357                                                    (__v4sf) __C,
1358                                                    (__mmask8) __U);
1359}
1360
1361static __inline__ __m128 __DEFAULT_FN_ATTRS
1362_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1363{
1364  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1365                                                   (__v4sf) __B,
1366                                                   -(__v4sf) __C,
1367                                                   (__mmask8) __U);
1368}
1369
1370static __inline__ __m128 __DEFAULT_FN_ATTRS
1371_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1372{
1373  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1374                                                    (__v4sf) __B,
1375                                                    -(__v4sf) __C,
1376                                                    (__mmask8) __U);
1377}
1378
1379static __inline__ __m128 __DEFAULT_FN_ATTRS
1380_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1381{
1382  return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1383                                                    (__v4sf) __B,
1384                                                    (__v4sf) __C,
1385                                                    (__mmask8) __U);
1386}
1387
1388static __inline__ __m128 __DEFAULT_FN_ATTRS
1389_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1390{
1391  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1392                                                    (__v4sf) __B,
1393                                                    (__v4sf) __C,
1394                                                    (__mmask8) __U);
1395}
1396
1397static __inline__ __m128 __DEFAULT_FN_ATTRS
1398_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1399{
1400  return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1401                                                    (__v4sf) __B,
1402                                                    -(__v4sf) __C,
1403                                                    (__mmask8) __U);
1404}
1405
1406static __inline__ __m256 __DEFAULT_FN_ATTRS
1407_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1408{
1409  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1410                                                   (__v8sf) __B,
1411                                                   (__v8sf) __C,
1412                                                   (__mmask8) __U);
1413}
1414
1415static __inline__ __m256 __DEFAULT_FN_ATTRS
1416_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1417{
1418  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1419                                                    (__v8sf) __B,
1420                                                    (__v8sf) __C,
1421                                                    (__mmask8) __U);
1422}
1423
1424static __inline__ __m256 __DEFAULT_FN_ATTRS
1425_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1426{
1427  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1428                                                    (__v8sf) __B,
1429                                                    (__v8sf) __C,
1430                                                    (__mmask8) __U);
1431}
1432
1433static __inline__ __m256 __DEFAULT_FN_ATTRS
1434_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1435{
1436  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1437                                                   (__v8sf) __B,
1438                                                   -(__v8sf) __C,
1439                                                   (__mmask8) __U);
1440}
1441
1442static __inline__ __m256 __DEFAULT_FN_ATTRS
1443_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1444{
1445  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1446                                                    (__v8sf) __B,
1447                                                    -(__v8sf) __C,
1448                                                    (__mmask8) __U);
1449}
1450
1451static __inline__ __m256 __DEFAULT_FN_ATTRS
1452_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1453{
1454  return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1455                                                    (__v8sf) __B,
1456                                                    (__v8sf) __C,
1457                                                    (__mmask8) __U);
1458}
1459
1460static __inline__ __m256 __DEFAULT_FN_ATTRS
1461_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1462{
1463  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1464                                                    (__v8sf) __B,
1465                                                    (__v8sf) __C,
1466                                                    (__mmask8) __U);
1467}
1468
1469static __inline__ __m256 __DEFAULT_FN_ATTRS
1470_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1471{
1472  return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1473                                                    (__v8sf) __B,
1474                                                    -(__v8sf) __C,
1475                                                    (__mmask8) __U);
1476}
1477
1478static __inline__ __m128d __DEFAULT_FN_ATTRS
1479_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1480{
1481  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1482                                                       (__v2df) __B,
1483                                                       (__v2df) __C,
1484                                                       (__mmask8) __U);
1485}
1486
1487static __inline__ __m128d __DEFAULT_FN_ATTRS
1488_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1489{
1490  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1491                                                        (__v2df) __B,
1492                                                        (__v2df) __C,
1493                                                        (__mmask8)
1494                                                        __U);
1495}
1496
1497static __inline__ __m128d __DEFAULT_FN_ATTRS
1498_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1499{
1500  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1501                                                        (__v2df) __B,
1502                                                        (__v2df) __C,
1503                                                        (__mmask8)
1504                                                        __U);
1505}
1506
1507static __inline__ __m128d __DEFAULT_FN_ATTRS
1508_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1509{
1510  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1511                                                       (__v2df) __B,
1512                                                       -(__v2df) __C,
1513                                                       (__mmask8) __U);
1514}
1515
1516static __inline__ __m128d __DEFAULT_FN_ATTRS
1517_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1518{
1519  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1520                                                        (__v2df) __B,
1521                                                        -(__v2df) __C,
1522                                                        (__mmask8)
1523                                                        __U);
1524}
1525
1526static __inline__ __m256d __DEFAULT_FN_ATTRS
1527_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1528{
1529  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1530                                                       (__v4df) __B,
1531                                                       (__v4df) __C,
1532                                                       (__mmask8) __U);
1533}
1534
1535static __inline__ __m256d __DEFAULT_FN_ATTRS
1536_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1537{
1538  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1539                                                        (__v4df) __B,
1540                                                        (__v4df) __C,
1541                                                        (__mmask8)
1542                                                        __U);
1543}
1544
1545static __inline__ __m256d __DEFAULT_FN_ATTRS
1546_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1547{
1548  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1549                                                        (__v4df) __B,
1550                                                        (__v4df) __C,
1551                                                        (__mmask8)
1552                                                        __U);
1553}
1554
1555static __inline__ __m256d __DEFAULT_FN_ATTRS
1556_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1557{
1558  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1559                                                       (__v4df) __B,
1560                                                       -(__v4df) __C,
1561                                                       (__mmask8) __U);
1562}
1563
1564static __inline__ __m256d __DEFAULT_FN_ATTRS
1565_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1566{
1567  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1568                                                        (__v4df) __B,
1569                                                        -(__v4df) __C,
1570                                                        (__mmask8)
1571                                                        __U);
1572}
1573
1574static __inline__ __m128 __DEFAULT_FN_ATTRS
1575_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1576{
1577  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1578                                                      (__v4sf) __B,
1579                                                      (__v4sf) __C,
1580                                                      (__mmask8) __U);
1581}
1582
1583static __inline__ __m128 __DEFAULT_FN_ATTRS
1584_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1585{
1586  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1587                                                       (__v4sf) __B,
1588                                                       (__v4sf) __C,
1589                                                       (__mmask8) __U);
1590}
1591
1592static __inline__ __m128 __DEFAULT_FN_ATTRS
1593_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1594{
1595  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1596                                                       (__v4sf) __B,
1597                                                       (__v4sf) __C,
1598                                                       (__mmask8) __U);
1599}
1600
1601static __inline__ __m128 __DEFAULT_FN_ATTRS
1602_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1603{
1604  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1605                                                      (__v4sf) __B,
1606                                                      -(__v4sf) __C,
1607                                                      (__mmask8) __U);
1608}
1609
1610static __inline__ __m128 __DEFAULT_FN_ATTRS
1611_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1612{
1613  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1614                                                       (__v4sf) __B,
1615                                                       -(__v4sf) __C,
1616                                                       (__mmask8) __U);
1617}
1618
1619static __inline__ __m256 __DEFAULT_FN_ATTRS
1620_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1621                         __m256 __C)
1622{
1623  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1624                                                      (__v8sf) __B,
1625                                                      (__v8sf) __C,
1626                                                      (__mmask8) __U);
1627}
1628
1629static __inline__ __m256 __DEFAULT_FN_ATTRS
1630_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1631{
1632  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1633                                                       (__v8sf) __B,
1634                                                       (__v8sf) __C,
1635                                                       (__mmask8) __U);
1636}
1637
1638static __inline__ __m256 __DEFAULT_FN_ATTRS
1639_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1640{
1641  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1642                                                       (__v8sf) __B,
1643                                                       (__v8sf) __C,
1644                                                       (__mmask8) __U);
1645}
1646
1647static __inline__ __m256 __DEFAULT_FN_ATTRS
1648_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1649{
1650  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1651                                                      (__v8sf) __B,
1652                                                      -(__v8sf) __C,
1653                                                      (__mmask8) __U);
1654}
1655
1656static __inline__ __m256 __DEFAULT_FN_ATTRS
1657_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1658{
1659  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1660                                                       (__v8sf) __B,
1661                                                       -(__v8sf) __C,
1662                                                       (__mmask8) __U);
1663}
1664
1665static __inline__ __m128d __DEFAULT_FN_ATTRS
1666_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1667{
1668  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1669                                                     (__v2df) __B,
1670                                                     (__v2df) __C,
1671                                                     (__mmask8) __U);
1672}
1673
1674static __inline__ __m256d __DEFAULT_FN_ATTRS
1675_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1676{
1677  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1678                                                     (__v4df) __B,
1679                                                     (__v4df) __C,
1680                                                     (__mmask8) __U);
1681}
1682
1683static __inline__ __m128 __DEFAULT_FN_ATTRS
1684_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1685{
1686  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1687                                                    (__v4sf) __B,
1688                                                    (__v4sf) __C,
1689                                                    (__mmask8) __U);
1690}
1691
1692static __inline__ __m256 __DEFAULT_FN_ATTRS
1693_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1694{
1695  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1696                                                    (__v8sf) __B,
1697                                                    (__v8sf) __C,
1698                                                    (__mmask8) __U);
1699}
1700
1701static __inline__ __m128d __DEFAULT_FN_ATTRS
1702_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1703{
1704  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1705                                                        (__v2df) __B,
1706                                                        (__v2df) __C,
1707                                                        (__mmask8)
1708                                                        __U);
1709}
1710
1711static __inline__ __m256d __DEFAULT_FN_ATTRS
1712_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1713{
1714  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1715                                                        (__v4df) __B,
1716                                                        (__v4df) __C,
1717                                                        (__mmask8)
1718                                                        __U);
1719}
1720
1721static __inline__ __m128 __DEFAULT_FN_ATTRS
1722_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1723{
1724  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1725                                                       (__v4sf) __B,
1726                                                       (__v4sf) __C,
1727                                                       (__mmask8) __U);
1728}
1729
1730static __inline__ __m256 __DEFAULT_FN_ATTRS
1731_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1732{
1733  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1734                                                       (__v8sf) __B,
1735                                                       (__v8sf) __C,
1736                                                       (__mmask8) __U);
1737}
1738
1739static __inline__ __m128d __DEFAULT_FN_ATTRS
1740_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1741{
1742  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1743                                                     (__v2df) __B,
1744                                                     (__v2df) __C,
1745                                                     (__mmask8) __U);
1746}
1747
1748static __inline__ __m256d __DEFAULT_FN_ATTRS
1749_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1750{
1751  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1752                                                     (__v4df) __B,
1753                                                     (__v4df) __C,
1754                                                     (__mmask8) __U);
1755}
1756
1757static __inline__ __m128 __DEFAULT_FN_ATTRS
1758_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1759{
1760  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1761                                                    (__v4sf) __B,
1762                                                    (__v4sf) __C,
1763                                                    (__mmask8) __U);
1764}
1765
1766static __inline__ __m256 __DEFAULT_FN_ATTRS
1767_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1768{
1769  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1770                                                    (__v8sf) __B,
1771                                                    (__v8sf) __C,
1772                                                    (__mmask8) __U);
1773}
1774
1775static __inline__ __m128d __DEFAULT_FN_ATTRS
1776_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1777{
1778  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1779                                                     (__v2df) __B,
1780                                                     (__v2df) __C,
1781                                                     (__mmask8) __U);
1782}
1783
1784static __inline__ __m128d __DEFAULT_FN_ATTRS
1785_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1786{
1787  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1788                                                      (__v2df) __B,
1789                                                      (__v2df) __C,
1790                                                      (__mmask8) __U);
1791}
1792
1793static __inline__ __m256d __DEFAULT_FN_ATTRS
1794_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1795{
1796  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1797                                                     (__v4df) __B,
1798                                                     (__v4df) __C,
1799                                                     (__mmask8) __U);
1800}
1801
1802static __inline__ __m256d __DEFAULT_FN_ATTRS
1803_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1804{
1805  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1806                                                      (__v4df) __B,
1807                                                      (__v4df) __C,
1808                                                      (__mmask8) __U);
1809}
1810
1811static __inline__ __m128 __DEFAULT_FN_ATTRS
1812_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1813{
1814  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1815                                                    (__v4sf) __B,
1816                                                    (__v4sf) __C,
1817                                                    (__mmask8) __U);
1818}
1819
1820static __inline__ __m128 __DEFAULT_FN_ATTRS
1821_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1822{
1823  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1824                                                     (__v4sf) __B,
1825                                                     (__v4sf) __C,
1826                                                     (__mmask8) __U);
1827}
1828
1829static __inline__ __m256 __DEFAULT_FN_ATTRS
1830_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1831{
1832  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1833                                                    (__v8sf) __B,
1834                                                    (__v8sf) __C,
1835                                                    (__mmask8) __U);
1836}
1837
1838static __inline__ __m256 __DEFAULT_FN_ATTRS
1839_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1840{
1841  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1842                                                     (__v8sf) __B,
1843                                                     (__v8sf) __C,
1844                                                     (__mmask8) __U);
1845}
1846
1847static __inline__ __m128d __DEFAULT_FN_ATTRS
1848_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1849  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1850                                              (__v2df)_mm_add_pd(__A, __B),
1851                                              (__v2df)__W);
1852}
1853
1854static __inline__ __m128d __DEFAULT_FN_ATTRS
1855_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1856  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1857                                              (__v2df)_mm_add_pd(__A, __B),
1858                                              (__v2df)_mm_setzero_pd());
1859}
1860
1861static __inline__ __m256d __DEFAULT_FN_ATTRS
1862_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1863  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1864                                              (__v4df)_mm256_add_pd(__A, __B),
1865                                              (__v4df)__W);
1866}
1867
1868static __inline__ __m256d __DEFAULT_FN_ATTRS
1869_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1870  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1871                                              (__v4df)_mm256_add_pd(__A, __B),
1872                                              (__v4df)_mm256_setzero_pd());
1873}
1874
1875static __inline__ __m128 __DEFAULT_FN_ATTRS
1876_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1877  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1878                                             (__v4sf)_mm_add_ps(__A, __B),
1879                                             (__v4sf)__W);
1880}
1881
1882static __inline__ __m128 __DEFAULT_FN_ATTRS
1883_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1884  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1885                                             (__v4sf)_mm_add_ps(__A, __B),
1886                                             (__v4sf)_mm_setzero_ps());
1887}
1888
1889static __inline__ __m256 __DEFAULT_FN_ATTRS
1890_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1891  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1892                                             (__v8sf)_mm256_add_ps(__A, __B),
1893                                             (__v8sf)__W);
1894}
1895
1896static __inline__ __m256 __DEFAULT_FN_ATTRS
1897_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1898  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1899                                             (__v8sf)_mm256_add_ps(__A, __B),
1900                                             (__v8sf)_mm256_setzero_ps());
1901}
1902
1903static __inline__ __m128i __DEFAULT_FN_ATTRS
1904_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1905  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1906                (__v4si) __W,
1907                (__v4si) __A);
1908}
1909
1910static __inline__ __m256i __DEFAULT_FN_ATTRS
1911_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1912  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1913                (__v8si) __W,
1914                (__v8si) __A);
1915}
1916
1917static __inline__ __m128d __DEFAULT_FN_ATTRS
1918_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1919  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1920                 (__v2df) __W,
1921                 (__v2df) __A);
1922}
1923
1924static __inline__ __m256d __DEFAULT_FN_ATTRS
1925_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1926  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1927                 (__v4df) __W,
1928                 (__v4df) __A);
1929}
1930
1931static __inline__ __m128 __DEFAULT_FN_ATTRS
1932_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1933  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1934                (__v4sf) __W,
1935                (__v4sf) __A);
1936}
1937
1938static __inline__ __m256 __DEFAULT_FN_ATTRS
1939_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1940  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1941                (__v8sf) __W,
1942                (__v8sf) __A);
1943}
1944
1945static __inline__ __m128i __DEFAULT_FN_ATTRS
1946_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1947  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1948                (__v2di) __W,
1949                (__v2di) __A);
1950}
1951
1952static __inline__ __m256i __DEFAULT_FN_ATTRS
1953_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1954  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1955                (__v4di) __W,
1956                (__v4di) __A);
1957}
1958
1959static __inline__ __m128d __DEFAULT_FN_ATTRS
1960_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1961  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1962                  (__v2df) __W,
1963                  (__mmask8) __U);
1964}
1965
1966static __inline__ __m128d __DEFAULT_FN_ATTRS
1967_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
1968  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1969                  (__v2df)
1970                  _mm_setzero_pd (),
1971                  (__mmask8) __U);
1972}
1973
1974static __inline__ __m256d __DEFAULT_FN_ATTRS
1975_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1976  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1977                  (__v4df) __W,
1978                  (__mmask8) __U);
1979}
1980
1981static __inline__ __m256d __DEFAULT_FN_ATTRS
1982_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
1983  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1984                  (__v4df)
1985                  _mm256_setzero_pd (),
1986                  (__mmask8) __U);
1987}
1988
1989static __inline__ __m128i __DEFAULT_FN_ATTRS
1990_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1991  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1992                  (__v2di) __W,
1993                  (__mmask8) __U);
1994}
1995
1996static __inline__ __m128i __DEFAULT_FN_ATTRS
1997_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
1998  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1999                  (__v2di)
2000                  _mm_setzero_si128 (),
2001                  (__mmask8) __U);
2002}
2003
2004static __inline__ __m256i __DEFAULT_FN_ATTRS
2005_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2006  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2007                  (__v4di) __W,
2008                  (__mmask8) __U);
2009}
2010
2011static __inline__ __m256i __DEFAULT_FN_ATTRS
2012_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
2013  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2014                  (__v4di)
2015                  _mm256_setzero_si256 (),
2016                  (__mmask8) __U);
2017}
2018
2019static __inline__ __m128 __DEFAULT_FN_ATTRS
2020_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2021  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2022                 (__v4sf) __W,
2023                 (__mmask8) __U);
2024}
2025
2026static __inline__ __m128 __DEFAULT_FN_ATTRS
2027_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
2028  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2029                 (__v4sf)
2030                 _mm_setzero_ps (),
2031                 (__mmask8) __U);
2032}
2033
2034static __inline__ __m256 __DEFAULT_FN_ATTRS
2035_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2036  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2037                 (__v8sf) __W,
2038                 (__mmask8) __U);
2039}
2040
2041static __inline__ __m256 __DEFAULT_FN_ATTRS
2042_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
2043  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2044                 (__v8sf)
2045                 _mm256_setzero_ps (),
2046                 (__mmask8) __U);
2047}
2048
2049static __inline__ __m128i __DEFAULT_FN_ATTRS
2050_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2051  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2052                  (__v4si) __W,
2053                  (__mmask8) __U);
2054}
2055
2056static __inline__ __m128i __DEFAULT_FN_ATTRS
2057_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
2058  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2059                  (__v4si)
2060                  _mm_setzero_si128 (),
2061                  (__mmask8) __U);
2062}
2063
2064static __inline__ __m256i __DEFAULT_FN_ATTRS
2065_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2066  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2067                  (__v8si) __W,
2068                  (__mmask8) __U);
2069}
2070
2071static __inline__ __m256i __DEFAULT_FN_ATTRS
2072_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
2073  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2074                  (__v8si)
2075                  _mm256_setzero_si256 (),
2076                  (__mmask8) __U);
2077}
2078
2079static __inline__ void __DEFAULT_FN_ATTRS
2080_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
2081  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
2082            (__v2df) __A,
2083            (__mmask8) __U);
2084}
2085
2086static __inline__ void __DEFAULT_FN_ATTRS
2087_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
2088  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
2089            (__v4df) __A,
2090            (__mmask8) __U);
2091}
2092
2093static __inline__ void __DEFAULT_FN_ATTRS
2094_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
2095  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
2096            (__v2di) __A,
2097            (__mmask8) __U);
2098}
2099
2100static __inline__ void __DEFAULT_FN_ATTRS
2101_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
2102  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
2103            (__v4di) __A,
2104            (__mmask8) __U);
2105}
2106
2107static __inline__ void __DEFAULT_FN_ATTRS
2108_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
2109  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
2110            (__v4sf) __A,
2111            (__mmask8) __U);
2112}
2113
2114static __inline__ void __DEFAULT_FN_ATTRS
2115_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
2116  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
2117            (__v8sf) __A,
2118            (__mmask8) __U);
2119}
2120
2121static __inline__ void __DEFAULT_FN_ATTRS
2122_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
2123  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
2124            (__v4si) __A,
2125            (__mmask8) __U);
2126}
2127
2128static __inline__ void __DEFAULT_FN_ATTRS
2129_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
2130  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
2131            (__v8si) __A,
2132            (__mmask8) __U);
2133}
2134
2135static __inline__ __m128d __DEFAULT_FN_ATTRS
2136_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2137  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
2138                (__v2df) __W,
2139                (__mmask8) __U);
2140}
2141
2142static __inline__ __m128d __DEFAULT_FN_ATTRS
2143_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
2144  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
2145                (__v2df)
2146                _mm_setzero_pd (),
2147                (__mmask8) __U);
2148}
2149
2150static __inline__ __m256d __DEFAULT_FN_ATTRS
2151_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2152  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
2153                (__v4df) __W,
2154                (__mmask8) __U);
2155}
2156
2157static __inline__ __m256d __DEFAULT_FN_ATTRS
2158_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
2159  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
2160                (__v4df)
2161                _mm256_setzero_pd (),
2162                (__mmask8) __U);
2163}
2164
2165static __inline__ __m128 __DEFAULT_FN_ATTRS
2166_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2167  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2168               (__v4sf) __W,
2169               (__mmask8) __U);
2170}
2171
2172static __inline__ __m128 __DEFAULT_FN_ATTRS
2173_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
2174  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2175               (__v4sf)
2176               _mm_setzero_ps (),
2177               (__mmask8) __U);
2178}
2179
2180static __inline__ __m256 __DEFAULT_FN_ATTRS
2181_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2182  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2183               (__v8sf) __W,
2184               (__mmask8) __U);
2185}
2186
2187static __inline__ __m256 __DEFAULT_FN_ATTRS
2188_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
2189  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2190               (__v8sf)
2191               _mm256_setzero_ps (),
2192               (__mmask8) __U);
2193}
2194
2195static __inline__ __m128i __DEFAULT_FN_ATTRS
2196_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2197  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2198                (__v4si) __W,
2199                (__mmask8) __U);
2200}
2201
2202static __inline__ __m128i __DEFAULT_FN_ATTRS
2203_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
2204  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2205                (__v4si)
2206                _mm_setzero_si128 (),
2207                (__mmask8) __U);
2208}
2209
2210static __inline__ __m128i __DEFAULT_FN_ATTRS
2211_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2212  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2213                (__v4si) __W,
2214                (__mmask8) __U);
2215}
2216
2217static __inline__ __m128i __DEFAULT_FN_ATTRS
2218_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
2219  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2220                (__v4si)
2221                _mm_setzero_si128 (),
2222                (__mmask8) __U);
2223}
2224
2225static __inline__ __m128 __DEFAULT_FN_ATTRS
2226_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
2227  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2228            (__v4sf) __W,
2229            (__mmask8) __U);
2230}
2231
2232static __inline__ __m128 __DEFAULT_FN_ATTRS
2233_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
2234  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2235            (__v4sf)
2236            _mm_setzero_ps (),
2237            (__mmask8) __U);
2238}
2239
2240static __inline__ __m128 __DEFAULT_FN_ATTRS
2241_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2242  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2243               (__v4sf) __W,
2244               (__mmask8) __U);
2245}
2246
2247static __inline__ __m128 __DEFAULT_FN_ATTRS
2248_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
2249  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2250               (__v4sf)
2251               _mm_setzero_ps (),
2252               (__mmask8) __U);
2253}
2254
2255static __inline__ __m128i __DEFAULT_FN_ATTRS
2256_mm_cvtpd_epu32 (__m128d __A) {
2257  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2258                 (__v4si)
2259                 _mm_setzero_si128 (),
2260                 (__mmask8) -1);
2261}
2262
2263static __inline__ __m128i __DEFAULT_FN_ATTRS
2264_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2265  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2266                 (__v4si) __W,
2267                 (__mmask8) __U);
2268}
2269
2270static __inline__ __m128i __DEFAULT_FN_ATTRS
2271_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
2272  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2273                 (__v4si)
2274                 _mm_setzero_si128 (),
2275                 (__mmask8) __U);
2276}
2277
2278static __inline__ __m128i __DEFAULT_FN_ATTRS
2279_mm256_cvtpd_epu32 (__m256d __A) {
2280  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2281                 (__v4si)
2282                 _mm_setzero_si128 (),
2283                 (__mmask8) -1);
2284}
2285
2286static __inline__ __m128i __DEFAULT_FN_ATTRS
2287_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2288  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2289                 (__v4si) __W,
2290                 (__mmask8) __U);
2291}
2292
2293static __inline__ __m128i __DEFAULT_FN_ATTRS
2294_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
2295  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2296                 (__v4si)
2297                 _mm_setzero_si128 (),
2298                 (__mmask8) __U);
2299}
2300
2301static __inline__ __m128i __DEFAULT_FN_ATTRS
2302_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2303  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2304                (__v4si) __W,
2305                (__mmask8) __U);
2306}
2307
2308static __inline__ __m128i __DEFAULT_FN_ATTRS
2309_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
2310  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2311                (__v4si)
2312                _mm_setzero_si128 (),
2313                (__mmask8) __U);
2314}
2315
2316static __inline__ __m256i __DEFAULT_FN_ATTRS
2317_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2318  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2319                (__v8si) __W,
2320                (__mmask8) __U);
2321}
2322
2323static __inline__ __m256i __DEFAULT_FN_ATTRS
2324_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
2325  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2326                (__v8si)
2327                _mm256_setzero_si256 (),
2328                (__mmask8) __U);
2329}
2330
2331static __inline__ __m128d __DEFAULT_FN_ATTRS
2332_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2333  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2334                (__v2df) __W,
2335                (__mmask8) __U);
2336}
2337
2338static __inline__ __m128d __DEFAULT_FN_ATTRS
2339_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2340  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2341                (__v2df)
2342                _mm_setzero_pd (),
2343                (__mmask8) __U);
2344}
2345
2346static __inline__ __m256d __DEFAULT_FN_ATTRS
2347_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2348  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2349                (__v4df) __W,
2350                (__mmask8) __U);
2351}
2352
2353static __inline__ __m256d __DEFAULT_FN_ATTRS
2354_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2355  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2356                (__v4df)
2357                _mm256_setzero_pd (),
2358                (__mmask8) __U);
2359}
2360
2361static __inline__ __m128i __DEFAULT_FN_ATTRS
2362_mm_cvtps_epu32 (__m128 __A) {
2363  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2364                 (__v4si)
2365                 _mm_setzero_si128 (),
2366                 (__mmask8) -1);
2367}
2368
2369static __inline__ __m128i __DEFAULT_FN_ATTRS
2370_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2371  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2372                 (__v4si) __W,
2373                 (__mmask8) __U);
2374}
2375
2376static __inline__ __m128i __DEFAULT_FN_ATTRS
2377_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
2378  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2379                 (__v4si)
2380                 _mm_setzero_si128 (),
2381                 (__mmask8) __U);
2382}
2383
2384static __inline__ __m256i __DEFAULT_FN_ATTRS
2385_mm256_cvtps_epu32 (__m256 __A) {
2386  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2387                 (__v8si)
2388                 _mm256_setzero_si256 (),
2389                 (__mmask8) -1);
2390}
2391
2392static __inline__ __m256i __DEFAULT_FN_ATTRS
2393_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2394  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2395                 (__v8si) __W,
2396                 (__mmask8) __U);
2397}
2398
2399static __inline__ __m256i __DEFAULT_FN_ATTRS
2400_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
2401  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2402                 (__v8si)
2403                 _mm256_setzero_si256 (),
2404                 (__mmask8) __U);
2405}
2406
2407static __inline__ __m128i __DEFAULT_FN_ATTRS
2408_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2409  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2410                 (__v4si) __W,
2411                 (__mmask8) __U);
2412}
2413
2414static __inline__ __m128i __DEFAULT_FN_ATTRS
2415_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
2416  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2417                 (__v4si)
2418                 _mm_setzero_si128 (),
2419                 (__mmask8) __U);
2420}
2421
2422static __inline__ __m128i __DEFAULT_FN_ATTRS
2423_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2424  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2425                 (__v4si) __W,
2426                 (__mmask8) __U);
2427}
2428
2429static __inline__ __m128i __DEFAULT_FN_ATTRS
2430_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
2431  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2432                 (__v4si)
2433                 _mm_setzero_si128 (),
2434                 (__mmask8) __U);
2435}
2436
2437static __inline__ __m128i __DEFAULT_FN_ATTRS
2438_mm_cvttpd_epu32 (__m128d __A) {
2439  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2440                  (__v4si)
2441                  _mm_setzero_si128 (),
2442                  (__mmask8) -1);
2443}
2444
2445static __inline__ __m128i __DEFAULT_FN_ATTRS
2446_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2447  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2448                  (__v4si) __W,
2449                  (__mmask8) __U);
2450}
2451
2452static __inline__ __m128i __DEFAULT_FN_ATTRS
2453_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
2454  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2455                  (__v4si)
2456                  _mm_setzero_si128 (),
2457                  (__mmask8) __U);
2458}
2459
2460static __inline__ __m128i __DEFAULT_FN_ATTRS
2461_mm256_cvttpd_epu32 (__m256d __A) {
2462  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2463                  (__v4si)
2464                  _mm_setzero_si128 (),
2465                  (__mmask8) -1);
2466}
2467
2468static __inline__ __m128i __DEFAULT_FN_ATTRS
2469_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2470  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2471                  (__v4si) __W,
2472                  (__mmask8) __U);
2473}
2474
2475static __inline__ __m128i __DEFAULT_FN_ATTRS
2476_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
2477  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2478                  (__v4si)
2479                  _mm_setzero_si128 (),
2480                  (__mmask8) __U);
2481}
2482
2483static __inline__ __m128i __DEFAULT_FN_ATTRS
2484_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2485  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2486                 (__v4si) __W,
2487                 (__mmask8) __U);
2488}
2489
2490static __inline__ __m128i __DEFAULT_FN_ATTRS
2491_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
2492  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2493                 (__v4si)
2494                 _mm_setzero_si128 (),
2495                 (__mmask8) __U);
2496}
2497
2498static __inline__ __m256i __DEFAULT_FN_ATTRS
2499_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2500  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2501                 (__v8si) __W,
2502                 (__mmask8) __U);
2503}
2504
2505static __inline__ __m256i __DEFAULT_FN_ATTRS
2506_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
2507  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2508                 (__v8si)
2509                 _mm256_setzero_si256 (),
2510                 (__mmask8) __U);
2511}
2512
2513static __inline__ __m128i __DEFAULT_FN_ATTRS
2514_mm_cvttps_epu32 (__m128 __A) {
2515  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2516                  (__v4si)
2517                  _mm_setzero_si128 (),
2518                  (__mmask8) -1);
2519}
2520
2521static __inline__ __m128i __DEFAULT_FN_ATTRS
2522_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2523  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2524                  (__v4si) __W,
2525                  (__mmask8) __U);
2526}
2527
2528static __inline__ __m128i __DEFAULT_FN_ATTRS
2529_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
2530  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2531                  (__v4si)
2532                  _mm_setzero_si128 (),
2533                  (__mmask8) __U);
2534}
2535
2536static __inline__ __m256i __DEFAULT_FN_ATTRS
2537_mm256_cvttps_epu32 (__m256 __A) {
2538  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2539                  (__v8si)
2540                  _mm256_setzero_si256 (),
2541                  (__mmask8) -1);
2542}
2543
2544static __inline__ __m256i __DEFAULT_FN_ATTRS
2545_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2546  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2547                  (__v8si) __W,
2548                  (__mmask8) __U);
2549}
2550
2551static __inline__ __m256i __DEFAULT_FN_ATTRS
2552_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
2553  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2554                  (__v8si)
2555                  _mm256_setzero_si256 (),
2556                  (__mmask8) __U);
2557}
2558
2559static __inline__ __m128d __DEFAULT_FN_ATTRS
2560_mm_cvtepu32_pd (__m128i __A) {
2561  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2562                 (__v2df)
2563                 _mm_setzero_pd (),
2564                 (__mmask8) -1);
2565}
2566
2567static __inline__ __m128d __DEFAULT_FN_ATTRS
2568_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2569  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2570                 (__v2df) __W,
2571                 (__mmask8) __U);
2572}
2573
2574static __inline__ __m128d __DEFAULT_FN_ATTRS
2575_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2576  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
2577                 (__v2df)
2578                 _mm_setzero_pd (),
2579                 (__mmask8) __U);
2580}
2581
2582static __inline__ __m256d __DEFAULT_FN_ATTRS
2583_mm256_cvtepu32_pd (__m128i __A) {
2584  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2585                 (__v4df)
2586                 _mm256_setzero_pd (),
2587                 (__mmask8) -1);
2588}
2589
2590static __inline__ __m256d __DEFAULT_FN_ATTRS
2591_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2592  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2593                 (__v4df) __W,
2594                 (__mmask8) __U);
2595}
2596
2597static __inline__ __m256d __DEFAULT_FN_ATTRS
2598_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
2599  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
2600                 (__v4df)
2601                 _mm256_setzero_pd (),
2602                 (__mmask8) __U);
2603}
2604
2605static __inline__ __m128 __DEFAULT_FN_ATTRS
2606_mm_cvtepu32_ps (__m128i __A) {
2607  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2608                (__v4sf)
2609                _mm_setzero_ps (),
2610                (__mmask8) -1);
2611}
2612
2613static __inline__ __m128 __DEFAULT_FN_ATTRS
2614_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2615  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2616                (__v4sf) __W,
2617                (__mmask8) __U);
2618}
2619
2620static __inline__ __m128 __DEFAULT_FN_ATTRS
2621_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
2622  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2623                (__v4sf)
2624                _mm_setzero_ps (),
2625                (__mmask8) __U);
2626}
2627
2628static __inline__ __m256 __DEFAULT_FN_ATTRS
2629_mm256_cvtepu32_ps (__m256i __A) {
2630  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2631                (__v8sf)
2632                _mm256_setzero_ps (),
2633                (__mmask8) -1);
2634}
2635
2636static __inline__ __m256 __DEFAULT_FN_ATTRS
2637_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2638  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2639                (__v8sf) __W,
2640                (__mmask8) __U);
2641}
2642
2643static __inline__ __m256 __DEFAULT_FN_ATTRS
2644_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
2645  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2646                (__v8sf)
2647                _mm256_setzero_ps (),
2648                (__mmask8) __U);
2649}
2650
2651static __inline__ __m128d __DEFAULT_FN_ATTRS
2652_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2653  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2654                                              (__v2df)_mm_div_pd(__A, __B),
2655                                              (__v2df)__W);
2656}
2657
2658static __inline__ __m128d __DEFAULT_FN_ATTRS
2659_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2660  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2661                                              (__v2df)_mm_div_pd(__A, __B),
2662                                              (__v2df)_mm_setzero_pd());
2663}
2664
2665static __inline__ __m256d __DEFAULT_FN_ATTRS
2666_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2667  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2668                                              (__v4df)_mm256_div_pd(__A, __B),
2669                                              (__v4df)__W);
2670}
2671
2672static __inline__ __m256d __DEFAULT_FN_ATTRS
2673_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2674  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2675                                              (__v4df)_mm256_div_pd(__A, __B),
2676                                              (__v4df)_mm256_setzero_pd());
2677}
2678
2679static __inline__ __m128 __DEFAULT_FN_ATTRS
2680_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2681  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2682                                             (__v4sf)_mm_div_ps(__A, __B),
2683                                             (__v4sf)__W);
2684}
2685
2686static __inline__ __m128 __DEFAULT_FN_ATTRS
2687_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2688  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2689                                             (__v4sf)_mm_div_ps(__A, __B),
2690                                             (__v4sf)_mm_setzero_ps());
2691}
2692
2693static __inline__ __m256 __DEFAULT_FN_ATTRS
2694_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2695  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2696                                             (__v8sf)_mm256_div_ps(__A, __B),
2697                                             (__v8sf)__W);
2698}
2699
2700static __inline__ __m256 __DEFAULT_FN_ATTRS
2701_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2702  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2703                                             (__v8sf)_mm256_div_ps(__A, __B),
2704                                             (__v8sf)_mm256_setzero_ps());
2705}
2706
2707static __inline__ __m128d __DEFAULT_FN_ATTRS
2708_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2709  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2710                (__v2df) __W,
2711                (__mmask8) __U);
2712}
2713
2714static __inline__ __m128d __DEFAULT_FN_ATTRS
2715_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2716  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2717                 (__v2df)
2718                 _mm_setzero_pd (),
2719                 (__mmask8) __U);
2720}
2721
2722static __inline__ __m256d __DEFAULT_FN_ATTRS
2723_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2724  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2725                (__v4df) __W,
2726                (__mmask8) __U);
2727}
2728
2729static __inline__ __m256d __DEFAULT_FN_ATTRS
2730_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
2731  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2732                 (__v4df)
2733                 _mm256_setzero_pd (),
2734                 (__mmask8) __U);
2735}
2736
2737static __inline__ __m128i __DEFAULT_FN_ATTRS
2738_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2739  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2740                (__v2di) __W,
2741                (__mmask8) __U);
2742}
2743
2744static __inline__ __m128i __DEFAULT_FN_ATTRS
2745_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
2746  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2747                 (__v2di)
2748                 _mm_setzero_si128 (),
2749                 (__mmask8) __U);
2750}
2751
2752static __inline__ __m256i __DEFAULT_FN_ATTRS
2753_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2754  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2755                (__v4di) __W,
2756                (__mmask8) __U);
2757}
2758
2759static __inline__ __m256i __DEFAULT_FN_ATTRS
2760_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
2761  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2762                 (__v4di)
2763                 _mm256_setzero_si256 (),
2764                 (__mmask8) __U);
2765}
2766
2767static __inline__ __m128d __DEFAULT_FN_ATTRS
2768_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2769  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2770              (__v2df) __W,
2771              (__mmask8)
2772              __U);
2773}
2774
2775static __inline__ __m128d __DEFAULT_FN_ATTRS
2776_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2777  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2778               (__v2df)
2779               _mm_setzero_pd (),
2780               (__mmask8)
2781               __U);
2782}
2783
2784static __inline__ __m256d __DEFAULT_FN_ATTRS
2785_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2786  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2787              (__v4df) __W,
2788              (__mmask8)
2789              __U);
2790}
2791
2792static __inline__ __m256d __DEFAULT_FN_ATTRS
2793_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2794  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2795               (__v4df)
2796               _mm256_setzero_pd (),
2797               (__mmask8)
2798               __U);
2799}
2800
2801static __inline__ __m128i __DEFAULT_FN_ATTRS
2802_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2803  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2804              (__v2di) __W,
2805              (__mmask8)
2806              __U);
2807}
2808
2809static __inline__ __m128i __DEFAULT_FN_ATTRS
2810_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2811  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2812               (__v2di)
2813               _mm_setzero_si128 (),
2814               (__mmask8)
2815               __U);
2816}
2817
2818static __inline__ __m256i __DEFAULT_FN_ATTRS
2819_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
2820             void const *__P) {
2821  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2822              (__v4di) __W,
2823              (__mmask8)
2824              __U);
2825}
2826
2827static __inline__ __m256i __DEFAULT_FN_ATTRS
2828_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
2829  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2830               (__v4di)
2831               _mm256_setzero_si256 (),
2832               (__mmask8)
2833               __U);
2834}
2835
2836static __inline__ __m128 __DEFAULT_FN_ATTRS
2837_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2838  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2839                   (__v4sf) __W,
2840                   (__mmask8) __U);
2841}
2842
2843static __inline__ __m128 __DEFAULT_FN_ATTRS
2844_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2845  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2846              (__v4sf)
2847              _mm_setzero_ps (),
2848              (__mmask8)
2849              __U);
2850}
2851
2852static __inline__ __m256 __DEFAULT_FN_ATTRS
2853_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2854  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2855                   (__v8sf) __W,
2856                   (__mmask8) __U);
2857}
2858
2859static __inline__ __m256 __DEFAULT_FN_ATTRS
2860_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2861  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2862              (__v8sf)
2863              _mm256_setzero_ps (),
2864              (__mmask8)
2865              __U);
2866}
2867
2868static __inline__ __m128i __DEFAULT_FN_ATTRS
2869_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2870  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2871              (__v4si) __W,
2872              (__mmask8)
2873              __U);
2874}
2875
2876static __inline__ __m128i __DEFAULT_FN_ATTRS
2877_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2878  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2879               (__v4si)
2880               _mm_setzero_si128 (),
2881               (__mmask8)     __U);
2882}
2883
2884static __inline__ __m256i __DEFAULT_FN_ATTRS
2885_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
2886             void const *__P) {
2887  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2888              (__v8si) __W,
2889              (__mmask8)
2890              __U);
2891}
2892
2893static __inline__ __m256i __DEFAULT_FN_ATTRS
2894_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
2895  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2896               (__v8si)
2897               _mm256_setzero_si256 (),
2898               (__mmask8)
2899               __U);
2900}
2901
2902static __inline__ __m128 __DEFAULT_FN_ATTRS
2903_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2904  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2905               (__v4sf) __W,
2906               (__mmask8) __U);
2907}
2908
2909static __inline__ __m128 __DEFAULT_FN_ATTRS
2910_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
2911  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2912                (__v4sf)
2913                _mm_setzero_ps (),
2914                (__mmask8) __U);
2915}
2916
2917static __inline__ __m256 __DEFAULT_FN_ATTRS
2918_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2919  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2920               (__v8sf) __W,
2921               (__mmask8) __U);
2922}
2923
2924static __inline__ __m256 __DEFAULT_FN_ATTRS
2925_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
2926  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2927                (__v8sf)
2928                _mm256_setzero_ps (),
2929                (__mmask8) __U);
2930}
2931
2932static __inline__ __m128i __DEFAULT_FN_ATTRS
2933_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2934  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2935                (__v4si) __W,
2936                (__mmask8) __U);
2937}
2938
2939static __inline__ __m128i __DEFAULT_FN_ATTRS
2940_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
2941  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2942                 (__v4si)
2943                 _mm_setzero_si128 (),
2944                 (__mmask8) __U);
2945}
2946
2947static __inline__ __m256i __DEFAULT_FN_ATTRS
2948_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2949  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2950                (__v8si) __W,
2951                (__mmask8) __U);
2952}
2953
2954static __inline__ __m256i __DEFAULT_FN_ATTRS
2955_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
2956  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2957                 (__v8si)
2958                 _mm256_setzero_si256 (),
2959                 (__mmask8) __U);
2960}
2961
2962static __inline__ __m128d __DEFAULT_FN_ATTRS
2963_mm_getexp_pd (__m128d __A) {
2964  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2965                (__v2df)
2966                _mm_setzero_pd (),
2967                (__mmask8) -1);
2968}
2969
2970static __inline__ __m128d __DEFAULT_FN_ATTRS
2971_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2972  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2973                (__v2df) __W,
2974                (__mmask8) __U);
2975}
2976
2977static __inline__ __m128d __DEFAULT_FN_ATTRS
2978_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2979  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2980                (__v2df)
2981                _mm_setzero_pd (),
2982                (__mmask8) __U);
2983}
2984
2985static __inline__ __m256d __DEFAULT_FN_ATTRS
2986_mm256_getexp_pd (__m256d __A) {
2987  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2988                (__v4df)
2989                _mm256_setzero_pd (),
2990                (__mmask8) -1);
2991}
2992
2993static __inline__ __m256d __DEFAULT_FN_ATTRS
2994_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2995  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2996                (__v4df) __W,
2997                (__mmask8) __U);
2998}
2999
3000static __inline__ __m256d __DEFAULT_FN_ATTRS
3001_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
3002  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3003                (__v4df)
3004                _mm256_setzero_pd (),
3005                (__mmask8) __U);
3006}
3007
3008static __inline__ __m128 __DEFAULT_FN_ATTRS
3009_mm_getexp_ps (__m128 __A) {
3010  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3011               (__v4sf)
3012               _mm_setzero_ps (),
3013               (__mmask8) -1);
3014}
3015
3016static __inline__ __m128 __DEFAULT_FN_ATTRS
3017_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
3018  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3019               (__v4sf) __W,
3020               (__mmask8) __U);
3021}
3022
3023static __inline__ __m128 __DEFAULT_FN_ATTRS
3024_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
3025  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3026               (__v4sf)
3027               _mm_setzero_ps (),
3028               (__mmask8) __U);
3029}
3030
3031static __inline__ __m256 __DEFAULT_FN_ATTRS
3032_mm256_getexp_ps (__m256 __A) {
3033  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3034               (__v8sf)
3035               _mm256_setzero_ps (),
3036               (__mmask8) -1);
3037}
3038
3039static __inline__ __m256 __DEFAULT_FN_ATTRS
3040_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
3041  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3042               (__v8sf) __W,
3043               (__mmask8) __U);
3044}
3045
3046static __inline__ __m256 __DEFAULT_FN_ATTRS
3047_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
3048  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3049               (__v8sf)
3050               _mm256_setzero_ps (),
3051               (__mmask8) __U);
3052}
3053
3054static __inline__ __m128d __DEFAULT_FN_ATTRS
3055_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3056  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3057                                              (__v2df)_mm_max_pd(__A, __B),
3058                                              (__v2df)__W);
3059}
3060
3061static __inline__ __m128d __DEFAULT_FN_ATTRS
3062_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3063  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3064                                              (__v2df)_mm_max_pd(__A, __B),
3065                                              (__v2df)_mm_setzero_pd());
3066}
3067
3068static __inline__ __m256d __DEFAULT_FN_ATTRS
3069_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3070  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3071                                              (__v4df)_mm256_max_pd(__A, __B),
3072                                              (__v4df)__W);
3073}
3074
3075static __inline__ __m256d __DEFAULT_FN_ATTRS
3076_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3077  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3078                                              (__v4df)_mm256_max_pd(__A, __B),
3079                                              (__v4df)_mm256_setzero_pd());
3080}
3081
3082static __inline__ __m128 __DEFAULT_FN_ATTRS
3083_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3084  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3085                                             (__v4sf)_mm_max_ps(__A, __B),
3086                                             (__v4sf)__W);
3087}
3088
3089static __inline__ __m128 __DEFAULT_FN_ATTRS
3090_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3091  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3092                                             (__v4sf)_mm_max_ps(__A, __B),
3093                                             (__v4sf)_mm_setzero_ps());
3094}
3095
3096static __inline__ __m256 __DEFAULT_FN_ATTRS
3097_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3098  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3099                                             (__v8sf)_mm256_max_ps(__A, __B),
3100                                             (__v8sf)__W);
3101}
3102
3103static __inline__ __m256 __DEFAULT_FN_ATTRS
3104_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3105  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3106                                             (__v8sf)_mm256_max_ps(__A, __B),
3107                                             (__v8sf)_mm256_setzero_ps());
3108}
3109
3110static __inline__ __m128d __DEFAULT_FN_ATTRS
3111_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3112  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3113                                              (__v2df)_mm_min_pd(__A, __B),
3114                                              (__v2df)__W);
3115}
3116
3117static __inline__ __m128d __DEFAULT_FN_ATTRS
3118_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3119  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3120                                              (__v2df)_mm_min_pd(__A, __B),
3121                                              (__v2df)_mm_setzero_pd());
3122}
3123
3124static __inline__ __m256d __DEFAULT_FN_ATTRS
3125_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3126  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3127                                              (__v4df)_mm256_min_pd(__A, __B),
3128                                              (__v4df)__W);
3129}
3130
3131static __inline__ __m256d __DEFAULT_FN_ATTRS
3132_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3133  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3134                                              (__v4df)_mm256_min_pd(__A, __B),
3135                                              (__v4df)_mm256_setzero_pd());
3136}
3137
3138static __inline__ __m128 __DEFAULT_FN_ATTRS
3139_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3140  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3141                                             (__v4sf)_mm_min_ps(__A, __B),
3142                                             (__v4sf)__W);
3143}
3144
3145static __inline__ __m128 __DEFAULT_FN_ATTRS
3146_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3147  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3148                                             (__v4sf)_mm_min_ps(__A, __B),
3149                                             (__v4sf)_mm_setzero_ps());
3150}
3151
3152static __inline__ __m256 __DEFAULT_FN_ATTRS
3153_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3154  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3155                                             (__v8sf)_mm256_min_ps(__A, __B),
3156                                             (__v8sf)__W);
3157}
3158
3159static __inline__ __m256 __DEFAULT_FN_ATTRS
3160_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3161  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3162                                             (__v8sf)_mm256_min_ps(__A, __B),
3163                                             (__v8sf)_mm256_setzero_ps());
3164}
3165
3166static __inline__ __m128d __DEFAULT_FN_ATTRS
3167_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3168  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3169                                              (__v2df)_mm_mul_pd(__A, __B),
3170                                              (__v2df)__W);
3171}
3172
3173static __inline__ __m128d __DEFAULT_FN_ATTRS
3174_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3175  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3176                                              (__v2df)_mm_mul_pd(__A, __B),
3177                                              (__v2df)_mm_setzero_pd());
3178}
3179
3180static __inline__ __m256d __DEFAULT_FN_ATTRS
3181_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3182  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3183                                              (__v4df)_mm256_mul_pd(__A, __B),
3184                                              (__v4df)__W);
3185}
3186
3187static __inline__ __m256d __DEFAULT_FN_ATTRS
3188_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3189  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3190                                              (__v4df)_mm256_mul_pd(__A, __B),
3191                                              (__v4df)_mm256_setzero_pd());
3192}
3193
3194static __inline__ __m128 __DEFAULT_FN_ATTRS
3195_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3196  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3197                                             (__v4sf)_mm_mul_ps(__A, __B),
3198                                             (__v4sf)__W);
3199}
3200
3201static __inline__ __m128 __DEFAULT_FN_ATTRS
3202_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3203  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3204                                             (__v4sf)_mm_mul_ps(__A, __B),
3205                                             (__v4sf)_mm_setzero_ps());
3206}
3207
3208static __inline__ __m256 __DEFAULT_FN_ATTRS
3209_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3210  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3211                                             (__v8sf)_mm256_mul_ps(__A, __B),
3212                                             (__v8sf)__W);
3213}
3214
3215static __inline__ __m256 __DEFAULT_FN_ATTRS
3216_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3217  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3218                                             (__v8sf)_mm256_mul_ps(__A, __B),
3219                                             (__v8sf)_mm256_setzero_ps());
3220}
3221
3222static __inline__ __m128i __DEFAULT_FN_ATTRS
3223_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
3224  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3225                                             (__v4si)_mm_abs_epi32(__A),
3226                                             (__v4si)__W);
3227}
3228
3229static __inline__ __m128i __DEFAULT_FN_ATTRS
3230_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
3231  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3232                                             (__v4si)_mm_abs_epi32(__A),
3233                                             (__v4si)_mm_setzero_si128());
3234}
3235
3236static __inline__ __m256i __DEFAULT_FN_ATTRS
3237_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
3238  return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
3239                                             (__v8si)_mm256_abs_epi32(__A),
3240                                             (__v8si)__W);
3241}
3242
3243static __inline__ __m256i __DEFAULT_FN_ATTRS
3244_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
3245  return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U,
3246                                             (__v8si)_mm256_abs_epi32(__A),
3247                                             (__v8si)_mm256_setzero_si256());
3248}
3249
3250static __inline__ __m128i __DEFAULT_FN_ATTRS
3251_mm_abs_epi64 (__m128i __A) {
3252  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3253             (__v2di)
3254             _mm_setzero_si128 (),
3255             (__mmask8) -1);
3256}
3257
3258static __inline__ __m128i __DEFAULT_FN_ATTRS
3259_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
3260  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3261             (__v2di) __W,
3262             (__mmask8) __U);
3263}
3264
3265static __inline__ __m128i __DEFAULT_FN_ATTRS
3266_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3267  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3268             (__v2di)
3269             _mm_setzero_si128 (),
3270             (__mmask8) __U);
3271}
3272
3273static __inline__ __m256i __DEFAULT_FN_ATTRS
3274_mm256_abs_epi64 (__m256i __A) {
3275  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3276             (__v4di)
3277             _mm256_setzero_si256 (),
3278             (__mmask8) -1);
3279}
3280
3281static __inline__ __m256i __DEFAULT_FN_ATTRS
3282_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3283  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3284             (__v4di) __W,
3285             (__mmask8) __U);
3286}
3287
3288static __inline__ __m256i __DEFAULT_FN_ATTRS
3289_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
3290  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3291             (__v4di)
3292             _mm256_setzero_si256 (),
3293             (__mmask8) __U);
3294}
3295
3296static __inline__ __m128i __DEFAULT_FN_ATTRS
3297_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3298  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3299                                             (__v4si)_mm_max_epi32(__A, __B),
3300                                             (__v4si)_mm_setzero_si128());
3301}
3302
3303static __inline__ __m128i __DEFAULT_FN_ATTRS
3304_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3305  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3306                                             (__v4si)_mm_max_epi32(__A, __B),
3307                                             (__v4si)__W);
3308}
3309
3310static __inline__ __m256i __DEFAULT_FN_ATTRS
3311_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3312  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3313                                             (__v8si)_mm256_max_epi32(__A, __B),
3314                                             (__v8si)_mm256_setzero_si256());
3315}
3316
3317static __inline__ __m256i __DEFAULT_FN_ATTRS
3318_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3319  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3320                                             (__v8si)_mm256_max_epi32(__A, __B),
3321                                             (__v8si)__W);
3322}
3323
3324static __inline__ __m128i __DEFAULT_FN_ATTRS
3325_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3326  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3327              (__v2di) __B,
3328              (__v2di)
3329              _mm_setzero_si128 (),
3330              __M);
3331}
3332
3333static __inline__ __m128i __DEFAULT_FN_ATTRS
3334_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3335        __m128i __B) {
3336  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3337              (__v2di) __B,
3338              (__v2di) __W, __M);
3339}
3340
3341static __inline__ __m128i __DEFAULT_FN_ATTRS
3342_mm_max_epi64 (__m128i __A, __m128i __B) {
3343  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3344              (__v2di) __B,
3345              (__v2di)
3346              _mm_setzero_si128 (),
3347              (__mmask8) -1);
3348}
3349
3350static __inline__ __m256i __DEFAULT_FN_ATTRS
3351_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3352  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3353              (__v4di) __B,
3354              (__v4di)
3355              _mm256_setzero_si256 (),
3356              __M);
3357}
3358
3359static __inline__ __m256i __DEFAULT_FN_ATTRS
3360_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3361           __m256i __B) {
3362  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3363              (__v4di) __B,
3364              (__v4di) __W, __M);
3365}
3366
3367static __inline__ __m256i __DEFAULT_FN_ATTRS
3368_mm256_max_epi64 (__m256i __A, __m256i __B) {
3369  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3370              (__v4di) __B,
3371              (__v4di)
3372              _mm256_setzero_si256 (),
3373              (__mmask8) -1);
3374}
3375
3376static __inline__ __m128i __DEFAULT_FN_ATTRS
3377_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3378  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3379                                             (__v4si)_mm_max_epu32(__A, __B),
3380                                             (__v4si)_mm_setzero_si128());
3381}
3382
3383static __inline__ __m128i __DEFAULT_FN_ATTRS
3384_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3385  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3386                                             (__v4si)_mm_max_epu32(__A, __B),
3387                                             (__v4si)__W);
3388}
3389
3390static __inline__ __m256i __DEFAULT_FN_ATTRS
3391_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3392  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3393                                             (__v8si)_mm256_max_epu32(__A, __B),
3394                                             (__v8si)_mm256_setzero_si256());
3395}
3396
3397static __inline__ __m256i __DEFAULT_FN_ATTRS
3398_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3399  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3400                                             (__v8si)_mm256_max_epu32(__A, __B),
3401                                             (__v8si)__W);
3402}
3403
3404static __inline__ __m128i __DEFAULT_FN_ATTRS
3405_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3406  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3407              (__v2di) __B,
3408              (__v2di)
3409              _mm_setzero_si128 (),
3410              __M);
3411}
3412
3413static __inline__ __m128i __DEFAULT_FN_ATTRS
3414_mm_max_epu64 (__m128i __A, __m128i __B) {
3415  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3416              (__v2di) __B,
3417              (__v2di)
3418              _mm_setzero_si128 (),
3419              (__mmask8) -1);
3420}
3421
3422static __inline__ __m128i __DEFAULT_FN_ATTRS
3423_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3424        __m128i __B) {
3425  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3426              (__v2di) __B,
3427              (__v2di) __W, __M);
3428}
3429
3430static __inline__ __m256i __DEFAULT_FN_ATTRS
3431_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3432  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3433              (__v4di) __B,
3434              (__v4di)
3435              _mm256_setzero_si256 (),
3436              __M);
3437}
3438
3439static __inline__ __m256i __DEFAULT_FN_ATTRS
3440_mm256_max_epu64 (__m256i __A, __m256i __B) {
3441  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3442              (__v4di) __B,
3443              (__v4di)
3444              _mm256_setzero_si256 (),
3445              (__mmask8) -1);
3446}
3447
3448static __inline__ __m256i __DEFAULT_FN_ATTRS
3449_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3450           __m256i __B) {
3451  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3452              (__v4di) __B,
3453              (__v4di) __W, __M);
3454}
3455
3456static __inline__ __m128i __DEFAULT_FN_ATTRS
3457_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3458  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3459                                             (__v4si)_mm_min_epi32(__A, __B),
3460                                             (__v4si)_mm_setzero_si128());
3461}
3462
3463static __inline__ __m128i __DEFAULT_FN_ATTRS
3464_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3465  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3466                                             (__v4si)_mm_min_epi32(__A, __B),
3467                                             (__v4si)__W);
3468}
3469
3470static __inline__ __m256i __DEFAULT_FN_ATTRS
3471_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3472  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3473                                             (__v8si)_mm256_min_epi32(__A, __B),
3474                                             (__v8si)_mm256_setzero_si256());
3475}
3476
3477static __inline__ __m256i __DEFAULT_FN_ATTRS
3478_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3479  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3480                                             (__v8si)_mm256_min_epi32(__A, __B),
3481                                             (__v8si)__W);
3482}
3483
3484static __inline__ __m128i __DEFAULT_FN_ATTRS
3485_mm_min_epi64 (__m128i __A, __m128i __B) {
3486  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3487              (__v2di) __B,
3488              (__v2di)
3489              _mm_setzero_si128 (),
3490              (__mmask8) -1);
3491}
3492
3493static __inline__ __m128i __DEFAULT_FN_ATTRS
3494_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
3495        __m128i __B) {
3496  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3497              (__v2di) __B,
3498              (__v2di) __W, __M);
3499}
3500
3501static __inline__ __m128i __DEFAULT_FN_ATTRS
3502_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3503  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3504              (__v2di) __B,
3505              (__v2di)
3506              _mm_setzero_si128 (),
3507              __M);
3508}
3509
3510static __inline__ __m256i __DEFAULT_FN_ATTRS
3511_mm256_min_epi64 (__m256i __A, __m256i __B) {
3512  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3513              (__v4di) __B,
3514              (__v4di)
3515              _mm256_setzero_si256 (),
3516              (__mmask8) -1);
3517}
3518
3519static __inline__ __m256i __DEFAULT_FN_ATTRS
3520_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
3521           __m256i __B) {
3522  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3523              (__v4di) __B,
3524              (__v4di) __W, __M);
3525}
3526
3527static __inline__ __m256i __DEFAULT_FN_ATTRS
3528_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3529  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3530              (__v4di) __B,
3531              (__v4di)
3532              _mm256_setzero_si256 (),
3533              __M);
3534}
3535
3536static __inline__ __m128i __DEFAULT_FN_ATTRS
3537_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3538  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3539                                             (__v4si)_mm_min_epu32(__A, __B),
3540                                             (__v4si)_mm_setzero_si128());
3541}
3542
3543static __inline__ __m128i __DEFAULT_FN_ATTRS
3544_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3545  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3546                                             (__v4si)_mm_min_epu32(__A, __B),
3547                                             (__v4si)__W);
3548}
3549
3550static __inline__ __m256i __DEFAULT_FN_ATTRS
3551_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3552  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3553                                             (__v8si)_mm256_min_epu32(__A, __B),
3554                                             (__v8si)_mm256_setzero_si256());
3555}
3556
3557static __inline__ __m256i __DEFAULT_FN_ATTRS
3558_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3559  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3560                                             (__v8si)_mm256_min_epu32(__A, __B),
3561                                             (__v8si)__W);
3562}
3563
3564static __inline__ __m128i __DEFAULT_FN_ATTRS
3565_mm_min_epu64 (__m128i __A, __m128i __B) {
3566  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3567              (__v2di) __B,
3568              (__v2di)
3569              _mm_setzero_si128 (),
3570              (__mmask8) -1);
3571}
3572
3573static __inline__ __m128i __DEFAULT_FN_ATTRS
3574_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
3575        __m128i __B) {
3576  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3577              (__v2di) __B,
3578              (__v2di) __W, __M);
3579}
3580
3581static __inline__ __m128i __DEFAULT_FN_ATTRS
3582_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3583  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3584              (__v2di) __B,
3585              (__v2di)
3586              _mm_setzero_si128 (),
3587              __M);
3588}
3589
3590static __inline__ __m256i __DEFAULT_FN_ATTRS
3591_mm256_min_epu64 (__m256i __A, __m256i __B) {
3592  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3593              (__v4di) __B,
3594              (__v4di)
3595              _mm256_setzero_si256 (),
3596              (__mmask8) -1);
3597}
3598
3599static __inline__ __m256i __DEFAULT_FN_ATTRS
3600_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
3601           __m256i __B) {
3602  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3603              (__v4di) __B,
3604              (__v4di) __W, __M);
3605}
3606
3607static __inline__ __m256i __DEFAULT_FN_ATTRS
3608_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3609  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3610              (__v4di) __B,
3611              (__v4di)
3612              _mm256_setzero_si256 (),
3613              __M);
3614}
3615
3616#define _mm_roundscale_pd(A, imm) __extension__ ({ \
3617  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3618                                              (int)(imm), \
3619                                              (__v2df)_mm_setzero_pd(), \
3620                                              (__mmask8)-1); })
3621
3622
3623#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3624  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3625                                              (int)(imm), \
3626                                              (__v2df)(__m128d)(W), \
3627                                              (__mmask8)(U)); })
3628
3629
3630#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3631  (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3632                                              (int)(imm), \
3633                                              (__v2df)_mm_setzero_pd(), \
3634                                              (__mmask8)(U)); })
3635
3636
3637#define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3638  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3639                                              (int)(imm), \
3640                                              (__v4df)_mm256_setzero_pd(), \
3641                                              (__mmask8)-1); })
3642
3643
3644#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3645  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3646                                              (int)(imm), \
3647                                              (__v4df)(__m256d)(W), \
3648                                              (__mmask8)(U)); })
3649
3650
3651#define _mm256_maskz_roundscale_pd(U, A, imm)  __extension__ ({ \
3652  (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3653                                              (int)(imm), \
3654                                              (__v4df)_mm256_setzero_pd(), \
3655                                              (__mmask8)(U)); })
3656
3657#define _mm_roundscale_ps(A, imm)  __extension__ ({ \
3658  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3659                                             (__v4sf)_mm_setzero_ps(), \
3660                                             (__mmask8)-1); })
3661
3662
3663#define _mm_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
3664  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3665                                             (__v4sf)(__m128)(W), \
3666                                             (__mmask8)(U)); })
3667
3668
3669#define _mm_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
3670  (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3671                                             (__v4sf)_mm_setzero_ps(), \
3672                                             (__mmask8)(U)); })
3673
3674#define _mm256_roundscale_ps(A, imm)  __extension__ ({ \
3675  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3676                                             (__v8sf)_mm256_setzero_ps(), \
3677                                             (__mmask8)-1); })
3678
3679#define _mm256_mask_roundscale_ps(W, U, A, imm)  __extension__ ({ \
3680  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3681                                             (__v8sf)(__m256)(W), \
3682                                             (__mmask8)(U)); })
3683
3684
3685#define _mm256_maskz_roundscale_ps(U, A, imm)  __extension__ ({ \
3686  (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3687                                             (__v8sf)_mm256_setzero_ps(), \
3688                                             (__mmask8)(U)); })
3689
3690static __inline__ __m128d __DEFAULT_FN_ATTRS
3691_mm_scalef_pd (__m128d __A, __m128d __B) {
3692  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3693                (__v2df) __B,
3694                (__v2df)
3695                _mm_setzero_pd (),
3696                (__mmask8) -1);
3697}
3698
3699static __inline__ __m128d __DEFAULT_FN_ATTRS
3700_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3701        __m128d __B) {
3702  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3703                (__v2df) __B,
3704                (__v2df) __W,
3705                (__mmask8) __U);
3706}
3707
3708static __inline__ __m128d __DEFAULT_FN_ATTRS
3709_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3710  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3711                (__v2df) __B,
3712                (__v2df)
3713                _mm_setzero_pd (),
3714                (__mmask8) __U);
3715}
3716
3717static __inline__ __m256d __DEFAULT_FN_ATTRS
3718_mm256_scalef_pd (__m256d __A, __m256d __B) {
3719  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3720                (__v4df) __B,
3721                (__v4df)
3722                _mm256_setzero_pd (),
3723                (__mmask8) -1);
3724}
3725
3726static __inline__ __m256d __DEFAULT_FN_ATTRS
3727_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3728           __m256d __B) {
3729  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3730                (__v4df) __B,
3731                (__v4df) __W,
3732                (__mmask8) __U);
3733}
3734
3735static __inline__ __m256d __DEFAULT_FN_ATTRS
3736_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3737  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3738                (__v4df) __B,
3739                (__v4df)
3740                _mm256_setzero_pd (),
3741                (__mmask8) __U);
3742}
3743
3744static __inline__ __m128 __DEFAULT_FN_ATTRS
3745_mm_scalef_ps (__m128 __A, __m128 __B) {
3746  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3747               (__v4sf) __B,
3748               (__v4sf)
3749               _mm_setzero_ps (),
3750               (__mmask8) -1);
3751}
3752
3753static __inline__ __m128 __DEFAULT_FN_ATTRS
3754_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3755  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3756               (__v4sf) __B,
3757               (__v4sf) __W,
3758               (__mmask8) __U);
3759}
3760
3761static __inline__ __m128 __DEFAULT_FN_ATTRS
3762_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3763  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3764               (__v4sf) __B,
3765               (__v4sf)
3766               _mm_setzero_ps (),
3767               (__mmask8) __U);
3768}
3769
3770static __inline__ __m256 __DEFAULT_FN_ATTRS
3771_mm256_scalef_ps (__m256 __A, __m256 __B) {
3772  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3773               (__v8sf) __B,
3774               (__v8sf)
3775               _mm256_setzero_ps (),
3776               (__mmask8) -1);
3777}
3778
3779static __inline__ __m256 __DEFAULT_FN_ATTRS
3780_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3781           __m256 __B) {
3782  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3783               (__v8sf) __B,
3784               (__v8sf) __W,
3785               (__mmask8) __U);
3786}
3787
3788static __inline__ __m256 __DEFAULT_FN_ATTRS
3789_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3790  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3791               (__v8sf) __B,
3792               (__v8sf)
3793               _mm256_setzero_ps (),
3794               (__mmask8) __U);
3795}
3796
3797#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3798  __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3799                               (__v2di)(__m128i)(index), \
3800                               (__v2df)(__m128d)(v1), (int)(scale)); })
3801
3802#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3803  __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3804                               (__v2di)(__m128i)(index), \
3805                               (__v2df)(__m128d)(v1), (int)(scale)); })
3806
3807#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3808  __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3809                               (__v2di)(__m128i)(index), \
3810                               (__v2di)(__m128i)(v1), (int)(scale)); })
3811
3812#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3813  __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3814                               (__v2di)(__m128i)(index), \
3815                               (__v2di)(__m128i)(v1), (int)(scale)); })
3816
3817#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3818  __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3819                               (__v4di)(__m256i)(index), \
3820                               (__v4df)(__m256d)(v1), (int)(scale)); })
3821
3822#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3823  __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3824                               (__v4di)(__m256i)(index), \
3825                               (__v4df)(__m256d)(v1), (int)(scale)); })
3826
3827#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3828  __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3829                               (__v4di)(__m256i)(index), \
3830                               (__v4di)(__m256i)(v1), (int)(scale)); })
3831
3832#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3833  __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3834                               (__v4di)(__m256i)(index), \
3835                               (__v4di)(__m256i)(v1), (int)(scale)); })
3836
3837#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3838  __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3839                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3840                               (int)(scale)); })
3841
3842#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3843  __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3844                               (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3845                               (int)(scale)); })
3846
3847#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3848  __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3849                               (__v2di)(__m128i)(index), \
3850                               (__v4si)(__m128i)(v1), (int)(scale)); })
3851
3852#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3853  __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3854                               (__v2di)(__m128i)(index), \
3855                               (__v4si)(__m128i)(v1), (int)(scale)); })
3856
3857#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3858  __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3859                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3860                               (int)(scale)); })
3861
3862#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3863  __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
3864                               (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3865                               (int)(scale)); })
3866
3867#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3868  __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
3869                               (__v4di)(__m256i)(index), \
3870                               (__v4si)(__m128i)(v1), (int)(scale)); })
3871
3872#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({  \
3873  __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
3874                               (__v4di)(__m256i)(index), \
3875                               (__v4si)(__m128i)(v1), (int)(scale)); })
3876
3877#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({      \
3878  __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
3879                               (__v4si)(__m128i)(index), \
3880                               (__v2df)(__m128d)(v1), (int)(scale)); })
3881
3882#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({        \
3883  __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
3884                               (__v4si)(__m128i)(index), \
3885                               (__v2df)(__m128d)(v1), (int)(scale)); })
3886
3887#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3888  __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
3889                               (__v4si)(__m128i)(index), \
3890                               (__v2di)(__m128i)(v1), (int)(scale)); })
3891
3892#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3893  __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
3894                               (__v4si)(__m128i)(index), \
3895                               (__v2di)(__m128i)(v1), (int)(scale)); })
3896
3897#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3898  __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
3899                               (__v4si)(__m128i)(index), \
3900                               (__v4df)(__m256d)(v1), (int)(scale)); })
3901
3902#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3903  __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
3904                               (__v4si)(__m128i)(index), \
3905                               (__v4df)(__m256d)(v1), (int)(scale)); })
3906
3907#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3908  __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
3909                               (__v4si)(__m128i)(index), \
3910                               (__v4di)(__m256i)(v1), (int)(scale)); })
3911
3912#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3913  __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
3914                               (__v4si)(__m128i)(index), \
3915                               (__v4di)(__m256i)(v1), (int)(scale)); })
3916
3917#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3918  __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
3919                               (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3920                               (int)(scale)); })
3921
3922#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3923  __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
3924                               (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3925                               (int)(scale)); })
3926
3927#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3928  __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
3929                               (__v4si)(__m128i)(index), \
3930                               (__v4si)(__m128i)(v1), (int)(scale)); })
3931
3932#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3933  __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
3934                               (__v4si)(__m128i)(index), \
3935                               (__v4si)(__m128i)(v1), (int)(scale)); })
3936
3937#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3938  __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
3939                               (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3940                               (int)(scale)); })
3941
3942#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3943  __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
3944                               (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3945                               (int)(scale)); })
3946
3947#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3948  __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
3949                               (__v8si)(__m256i)(index), \
3950                               (__v8si)(__m256i)(v1), (int)(scale)); })
3951
3952#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3953  __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
3954                               (__v8si)(__m256i)(index), \
3955                               (__v8si)(__m256i)(v1), (int)(scale)); })
3956
3957static __inline__ __m128d __DEFAULT_FN_ATTRS
3958_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3959  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3960                                              (__v2df)_mm_sqrt_pd(__A),
3961                                              (__v2df)__W);
3962}
3963
3964static __inline__ __m128d __DEFAULT_FN_ATTRS
3965_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3966  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3967                                              (__v2df)_mm_sqrt_pd(__A),
3968                                              (__v2df)_mm_setzero_pd());
3969}
3970
3971static __inline__ __m256d __DEFAULT_FN_ATTRS
3972_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3973  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3974                                              (__v4df)_mm256_sqrt_pd(__A),
3975                                              (__v4df)__W);
3976}
3977
3978static __inline__ __m256d __DEFAULT_FN_ATTRS
3979_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3980  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3981                                              (__v4df)_mm256_sqrt_pd(__A),
3982                                              (__v4df)_mm256_setzero_pd());
3983}
3984
3985static __inline__ __m128 __DEFAULT_FN_ATTRS
3986_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3987  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3988                                             (__v4sf)_mm_sqrt_ps(__A),
3989                                             (__v4sf)__W);
3990}
3991
3992static __inline__ __m128 __DEFAULT_FN_ATTRS
3993_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3994  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3995                                             (__v4sf)_mm_sqrt_ps(__A),
3996                                             (__v4sf)_mm_setzero_pd());
3997}
3998
3999static __inline__ __m256 __DEFAULT_FN_ATTRS
4000_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
4001  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4002                                             (__v8sf)_mm256_sqrt_ps(__A),
4003                                             (__v8sf)__W);
4004}
4005
4006static __inline__ __m256 __DEFAULT_FN_ATTRS
4007_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
4008  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4009                                             (__v8sf)_mm256_sqrt_ps(__A),
4010                                             (__v8sf)_mm256_setzero_ps());
4011}
4012
4013static __inline__ __m128d __DEFAULT_FN_ATTRS
4014_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
4015  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
4016                                              (__v2df)_mm_sub_pd(__A, __B),
4017                                              (__v2df)__W);
4018}
4019
4020static __inline__ __m128d __DEFAULT_FN_ATTRS
4021_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
4022  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
4023                                              (__v2df)_mm_sub_pd(__A, __B),
4024                                              (__v2df)_mm_setzero_pd());
4025}
4026
4027static __inline__ __m256d __DEFAULT_FN_ATTRS
4028_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
4029  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
4030                                              (__v4df)_mm256_sub_pd(__A, __B),
4031                                              (__v4df)__W);
4032}
4033
4034static __inline__ __m256d __DEFAULT_FN_ATTRS
4035_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
4036  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
4037                                              (__v4df)_mm256_sub_pd(__A, __B),
4038                                              (__v4df)_mm256_setzero_pd());
4039}
4040
4041static __inline__ __m128 __DEFAULT_FN_ATTRS
4042_mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
4043  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
4044                                             (__v4sf)_mm_sub_ps(__A, __B),
4045                                             (__v4sf)__W);
4046}
4047
4048static __inline__ __m128 __DEFAULT_FN_ATTRS
4049_mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
4050  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
4051                                             (__v4sf)_mm_sub_ps(__A, __B),
4052                                             (__v4sf)_mm_setzero_ps());
4053}
4054
4055static __inline__ __m256 __DEFAULT_FN_ATTRS
4056_mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
4057  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4058                                             (__v8sf)_mm256_sub_ps(__A, __B),
4059                                             (__v8sf)__W);
4060}
4061
4062static __inline__ __m256 __DEFAULT_FN_ATTRS
4063_mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
4064  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
4065                                             (__v8sf)_mm256_sub_ps(__A, __B),
4066                                             (__v8sf)_mm256_setzero_ps());
4067}
4068
4069static __inline__ __m128i __DEFAULT_FN_ATTRS
4070_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
4071            __m128i __B) {
4072  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
4073                   (__v4si) __I
4074                   /* idx */ ,
4075                   (__v4si) __B,
4076                   (__mmask8) __U);
4077}
4078
4079static __inline__ __m256i __DEFAULT_FN_ATTRS
4080_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
4081         __mmask8 __U, __m256i __B) {
4082  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
4083                   (__v8si) __I
4084                   /* idx */ ,
4085                   (__v8si) __B,
4086                   (__mmask8) __U);
4087}
4088
4089static __inline__ __m128d __DEFAULT_FN_ATTRS
4090_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
4091         __m128d __B) {
4092  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
4093              (__v2di) __I
4094              /* idx */ ,
4095              (__v2df) __B,
4096              (__mmask8)
4097              __U);
4098}
4099
4100static __inline__ __m256d __DEFAULT_FN_ATTRS
4101_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
4102            __m256d __B) {
4103  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
4104              (__v4di) __I
4105              /* idx */ ,
4106              (__v4df) __B,
4107              (__mmask8)
4108              __U);
4109}
4110
4111static __inline__ __m128 __DEFAULT_FN_ATTRS
4112_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
4113         __m128 __B) {
4114  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
4115                   (__v4si) __I
4116                   /* idx */ ,
4117                   (__v4sf) __B,
4118                   (__mmask8) __U);
4119}
4120
4121static __inline__ __m256 __DEFAULT_FN_ATTRS
4122_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
4123            __m256 __B) {
4124  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
4125                   (__v8si) __I
4126                   /* idx */ ,
4127                   (__v8sf) __B,
4128                   (__mmask8) __U);
4129}
4130
4131static __inline__ __m128i __DEFAULT_FN_ATTRS
4132_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
4133            __m128i __B) {
4134  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
4135                   (__v2di) __I
4136                   /* idx */ ,
4137                   (__v2di) __B,
4138                   (__mmask8) __U);
4139}
4140
4141static __inline__ __m256i __DEFAULT_FN_ATTRS
4142_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
4143         __mmask8 __U, __m256i __B) {
4144  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
4145                   (__v4di) __I
4146                   /* idx */ ,
4147                   (__v4di) __B,
4148                   (__mmask8) __U);
4149}
4150
4151static __inline__ __m128i __DEFAULT_FN_ATTRS
4152_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
4153  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4154                   /* idx */ ,
4155                   (__v4si) __A,
4156                   (__v4si) __B,
4157                   (__mmask8) -1);
4158}
4159
4160static __inline__ __m128i __DEFAULT_FN_ATTRS
4161_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
4162           __m128i __B) {
4163  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4164                   /* idx */ ,
4165                   (__v4si) __A,
4166                   (__v4si) __B,
4167                   (__mmask8) __U);
4168}
4169
4170static __inline__ __m128i __DEFAULT_FN_ATTRS
4171_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
4172            __m128i __B) {
4173  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
4174              /* idx */ ,
4175              (__v4si) __A,
4176              (__v4si) __B,
4177              (__mmask8)
4178              __U);
4179}
4180
4181static __inline__ __m256i __DEFAULT_FN_ATTRS
4182_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
4183  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4184                   /* idx */ ,
4185                   (__v8si) __A,
4186                   (__v8si) __B,
4187                   (__mmask8) -1);
4188}
4189
4190static __inline__ __m256i __DEFAULT_FN_ATTRS
4191_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
4192        __m256i __B) {
4193  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4194                   /* idx */ ,
4195                   (__v8si) __A,
4196                   (__v8si) __B,
4197                   (__mmask8) __U);
4198}
4199
4200static __inline__ __m256i __DEFAULT_FN_ATTRS
4201_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
4202         __m256i __I, __m256i __B) {
4203  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
4204              /* idx */ ,
4205              (__v8si) __A,
4206              (__v8si) __B,
4207              (__mmask8)
4208              __U);
4209}
4210
4211static __inline__ __m128d __DEFAULT_FN_ATTRS
4212_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
4213  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4214              /* idx */ ,
4215              (__v2df) __A,
4216              (__v2df) __B,
4217              (__mmask8) -
4218              1);
4219}
4220
4221static __inline__ __m128d __DEFAULT_FN_ATTRS
4222_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
4223        __m128d __B) {
4224  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4225              /* idx */ ,
4226              (__v2df) __A,
4227              (__v2df) __B,
4228              (__mmask8)
4229              __U);
4230}
4231
4232static __inline__ __m128d __DEFAULT_FN_ATTRS
4233_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
4234         __m128d __B) {
4235  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
4236               /* idx */ ,
4237               (__v2df) __A,
4238               (__v2df) __B,
4239               (__mmask8)
4240               __U);
4241}
4242
4243static __inline__ __m256d __DEFAULT_FN_ATTRS
4244_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
4245  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4246              /* idx */ ,
4247              (__v4df) __A,
4248              (__v4df) __B,
4249              (__mmask8) -
4250              1);
4251}
4252
4253static __inline__ __m256d __DEFAULT_FN_ATTRS
4254_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
4255           __m256d __B) {
4256  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4257              /* idx */ ,
4258              (__v4df) __A,
4259              (__v4df) __B,
4260              (__mmask8)
4261              __U);
4262}
4263
4264static __inline__ __m256d __DEFAULT_FN_ATTRS
4265_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
4266            __m256d __B) {
4267  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
4268               /* idx */ ,
4269               (__v4df) __A,
4270               (__v4df) __B,
4271               (__mmask8)
4272               __U);
4273}
4274
4275static __inline__ __m128 __DEFAULT_FN_ATTRS
4276_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
4277  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4278                   /* idx */ ,
4279                   (__v4sf) __A,
4280                   (__v4sf) __B,
4281                   (__mmask8) -1);
4282}
4283
4284static __inline__ __m128 __DEFAULT_FN_ATTRS
4285_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
4286        __m128 __B) {
4287  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4288                   /* idx */ ,
4289                   (__v4sf) __A,
4290                   (__v4sf) __B,
4291                   (__mmask8) __U);
4292}
4293
4294static __inline__ __m128 __DEFAULT_FN_ATTRS
4295_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
4296         __m128 __B) {
4297  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
4298              /* idx */ ,
4299              (__v4sf) __A,
4300              (__v4sf) __B,
4301              (__mmask8)
4302              __U);
4303}
4304
4305static __inline__ __m256 __DEFAULT_FN_ATTRS
4306_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
4307  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4308                   /* idx */ ,
4309                   (__v8sf) __A,
4310                   (__v8sf) __B,
4311                   (__mmask8) -1);
4312}
4313
4314static __inline__ __m256 __DEFAULT_FN_ATTRS
4315_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
4316           __m256 __B) {
4317  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4318                   /* idx */ ,
4319                   (__v8sf) __A,
4320                   (__v8sf) __B,
4321                   (__mmask8) __U);
4322}
4323
4324static __inline__ __m256 __DEFAULT_FN_ATTRS
4325_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
4326            __m256 __B) {
4327  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
4328              /* idx */ ,
4329              (__v8sf) __A,
4330              (__v8sf) __B,
4331              (__mmask8)
4332              __U);
4333}
4334
4335static __inline__ __m128i __DEFAULT_FN_ATTRS
4336_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
4337  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4338                   /* idx */ ,
4339                   (__v2di) __A,
4340                   (__v2di) __B,
4341                   (__mmask8) -1);
4342}
4343
4344static __inline__ __m128i __DEFAULT_FN_ATTRS
4345_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
4346           __m128i __B) {
4347  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4348                   /* idx */ ,
4349                   (__v2di) __A,
4350                   (__v2di) __B,
4351                   (__mmask8) __U);
4352}
4353
4354static __inline__ __m128i __DEFAULT_FN_ATTRS
4355_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
4356            __m128i __B) {
4357  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
4358              /* idx */ ,
4359              (__v2di) __A,
4360              (__v2di) __B,
4361              (__mmask8)
4362              __U);
4363}
4364
4365
4366static __inline__ __m256i __DEFAULT_FN_ATTRS
4367_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
4368  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4369                   /* idx */ ,
4370                   (__v4di) __A,
4371                   (__v4di) __B,
4372                   (__mmask8) -1);
4373}
4374
4375static __inline__ __m256i __DEFAULT_FN_ATTRS
4376_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
4377        __m256i __B) {
4378  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4379                   /* idx */ ,
4380                   (__v4di) __A,
4381                   (__v4di) __B,
4382                   (__mmask8) __U);
4383}
4384
4385static __inline__ __m256i __DEFAULT_FN_ATTRS
4386_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
4387         __m256i __I, __m256i __B) {
4388  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4389              /* idx */ ,
4390              (__v4di) __A,
4391              (__v4di) __B,
4392              (__mmask8)
4393              __U);
4394}
4395
4396static __inline__ __m128i __DEFAULT_FN_ATTRS
4397_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4398{
4399  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4400                                             (__v4si)_mm_cvtepi8_epi32(__A),
4401                                             (__v4si)__W);
4402}
4403
4404static __inline__ __m128i __DEFAULT_FN_ATTRS
4405_mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
4406{
4407  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4408                                             (__v4si)_mm_cvtepi8_epi32(__A),
4409                                             (__v4si)_mm_setzero_si128());
4410}
4411
4412static __inline__ __m256i __DEFAULT_FN_ATTRS
4413_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
4414{
4415  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4416                                             (__v8si)_mm256_cvtepi8_epi32(__A),
4417                                             (__v8si)__W);
4418}
4419
4420static __inline__ __m256i __DEFAULT_FN_ATTRS
4421_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
4422{
4423  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4424                                             (__v8si)_mm256_cvtepi8_epi32(__A),
4425                                             (__v8si)_mm256_setzero_si256());
4426}
4427
4428static __inline__ __m128i __DEFAULT_FN_ATTRS
4429_mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4430{
4431  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4432                                             (__v2di)_mm_cvtepi8_epi64(__A),
4433                                             (__v2di)__W);
4434}
4435
4436static __inline__ __m128i __DEFAULT_FN_ATTRS
4437_mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4438{
4439  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4440                                             (__v2di)_mm_cvtepi8_epi64(__A),
4441                                             (__v2di)_mm_setzero_si128());
4442}
4443
4444static __inline__ __m256i __DEFAULT_FN_ATTRS
4445_mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4446{
4447  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4448                                             (__v4di)_mm256_cvtepi8_epi64(__A),
4449                                             (__v4di)__W);
4450}
4451
4452static __inline__ __m256i __DEFAULT_FN_ATTRS
4453_mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4454{
4455  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4456                                             (__v4di)_mm256_cvtepi8_epi64(__A),
4457                                             (__v4di)_mm256_setzero_si256());
4458}
4459
4460static __inline__ __m128i __DEFAULT_FN_ATTRS
4461_mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4462{
4463  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4464                                             (__v2di)_mm_cvtepi32_epi64(__X),
4465                                             (__v2di)__W);
4466}
4467
4468static __inline__ __m128i __DEFAULT_FN_ATTRS
4469_mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4470{
4471  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4472                                             (__v2di)_mm_cvtepi32_epi64(__X),
4473                                             (__v2di)_mm_setzero_si128());
4474}
4475
4476static __inline__ __m256i __DEFAULT_FN_ATTRS
4477_mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4478{
4479  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4480                                             (__v4di)_mm256_cvtepi32_epi64(__X),
4481                                             (__v4di)__W);
4482}
4483
4484static __inline__ __m256i __DEFAULT_FN_ATTRS
4485_mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
4486{
4487  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4488                                             (__v4di)_mm256_cvtepi32_epi64(__X),
4489                                             (__v4di)_mm256_setzero_si256());
4490}
4491
4492static __inline__ __m128i __DEFAULT_FN_ATTRS
4493_mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4494{
4495  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4496                                             (__v4si)_mm_cvtepi16_epi32(__A),
4497                                             (__v4si)__W);
4498}
4499
4500static __inline__ __m128i __DEFAULT_FN_ATTRS
4501_mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
4502{
4503  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4504                                             (__v4si)_mm_cvtepi16_epi32(__A),
4505                                             (__v4si)_mm_setzero_si128());
4506}
4507
4508static __inline__ __m256i __DEFAULT_FN_ATTRS
4509_mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4510{
4511  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4512                                             (__v8si)_mm256_cvtepi16_epi32(__A),
4513                                             (__v8si)__W);
4514}
4515
4516static __inline__ __m256i __DEFAULT_FN_ATTRS
4517_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
4518{
4519  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4520                                             (__v8si)_mm256_cvtepi16_epi32(__A),
4521                                             (__v8si)_mm256_setzero_si256());
4522}
4523
4524static __inline__ __m128i __DEFAULT_FN_ATTRS
4525_mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4526{
4527  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4528                                             (__v2di)_mm_cvtepi16_epi64(__A),
4529                                             (__v2di)__W);
4530}
4531
4532static __inline__ __m128i __DEFAULT_FN_ATTRS
4533_mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4534{
4535  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4536                                             (__v2di)_mm_cvtepi16_epi64(__A),
4537                                             (__v2di)_mm_setzero_si128());
4538}
4539
4540static __inline__ __m256i __DEFAULT_FN_ATTRS
4541_mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4542{
4543  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4544                                             (__v4di)_mm256_cvtepi16_epi64(__A),
4545                                             (__v4di)__W);
4546}
4547
4548static __inline__ __m256i __DEFAULT_FN_ATTRS
4549_mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4550{
4551  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4552                                             (__v4di)_mm256_cvtepi16_epi64(__A),
4553                                             (__v4di)_mm256_setzero_si256());
4554}
4555
4556
4557static __inline__ __m128i __DEFAULT_FN_ATTRS
4558_mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4559{
4560  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4561                                             (__v4si)_mm_cvtepu8_epi32(__A),
4562                                             (__v4si)__W);
4563}
4564
4565static __inline__ __m128i __DEFAULT_FN_ATTRS
4566_mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4567{
4568  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4569                                             (__v4si)_mm_cvtepu8_epi32(__A),
4570                                             (__v4si)_mm_setzero_si128());
4571}
4572
4573static __inline__ __m256i __DEFAULT_FN_ATTRS
4574_mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4575{
4576  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4577                                             (__v8si)_mm256_cvtepu8_epi32(__A),
4578                                             (__v8si)__W);
4579}
4580
4581static __inline__ __m256i __DEFAULT_FN_ATTRS
4582_mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
4583{
4584  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4585                                             (__v8si)_mm256_cvtepu8_epi32(__A),
4586                                             (__v8si)_mm256_setzero_si256());
4587}
4588
4589static __inline__ __m128i __DEFAULT_FN_ATTRS
4590_mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4591{
4592  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4593                                             (__v2di)_mm_cvtepu8_epi64(__A),
4594                                             (__v2di)__W);
4595}
4596
4597static __inline__ __m128i __DEFAULT_FN_ATTRS
4598_mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4599{
4600  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4601                                             (__v2di)_mm_cvtepu8_epi64(__A),
4602                                             (__v2di)_mm_setzero_si128());
4603}
4604
4605static __inline__ __m256i __DEFAULT_FN_ATTRS
4606_mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4607{
4608  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4609                                             (__v4di)_mm256_cvtepu8_epi64(__A),
4610                                             (__v4di)__W);
4611}
4612
4613static __inline__ __m256i __DEFAULT_FN_ATTRS
4614_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
4615{
4616  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4617                                             (__v4di)_mm256_cvtepu8_epi64(__A),
4618                                             (__v4di)_mm256_setzero_si256());
4619}
4620
4621static __inline__ __m128i __DEFAULT_FN_ATTRS
4622_mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4623{
4624  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4625                                             (__v2di)_mm_cvtepu32_epi64(__X),
4626                                             (__v2di)__W);
4627}
4628
4629static __inline__ __m128i __DEFAULT_FN_ATTRS
4630_mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4631{
4632  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4633                                             (__v2di)_mm_cvtepu32_epi64(__X),
4634                                             (__v2di)_mm_setzero_si128());
4635}
4636
4637static __inline__ __m256i __DEFAULT_FN_ATTRS
4638_mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4639{
4640  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4641                                             (__v4di)_mm256_cvtepu32_epi64(__X),
4642                                             (__v4di)__W);
4643}
4644
4645static __inline__ __m256i __DEFAULT_FN_ATTRS
4646_mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
4647{
4648  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4649                                             (__v4di)_mm256_cvtepu32_epi64(__X),
4650                                             (__v4di)_mm256_setzero_si256());
4651}
4652
4653static __inline__ __m128i __DEFAULT_FN_ATTRS
4654_mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4655{
4656  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4657                                             (__v4si)_mm_cvtepu16_epi32(__A),
4658                                             (__v4si)__W);
4659}
4660
4661static __inline__ __m128i __DEFAULT_FN_ATTRS
4662_mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4663{
4664  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4665                                             (__v4si)_mm_cvtepu16_epi32(__A),
4666                                             (__v4si)_mm_setzero_si128());
4667}
4668
4669static __inline__ __m256i __DEFAULT_FN_ATTRS
4670_mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4671{
4672  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4673                                             (__v8si)_mm256_cvtepu16_epi32(__A),
4674                                             (__v8si)__W);
4675}
4676
4677static __inline__ __m256i __DEFAULT_FN_ATTRS
4678_mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
4679{
4680  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4681                                             (__v8si)_mm256_cvtepu16_epi32(__A),
4682                                             (__v8si)_mm256_setzero_si256());
4683}
4684
4685static __inline__ __m128i __DEFAULT_FN_ATTRS
4686_mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4687{
4688  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4689                                             (__v2di)_mm_cvtepu16_epi64(__A),
4690                                             (__v2di)__W);
4691}
4692
4693static __inline__ __m128i __DEFAULT_FN_ATTRS
4694_mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4695{
4696  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4697                                             (__v2di)_mm_cvtepu16_epi64(__A),
4698                                             (__v2di)_mm_setzero_si128());
4699}
4700
4701static __inline__ __m256i __DEFAULT_FN_ATTRS
4702_mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4703{
4704  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4705                                             (__v4di)_mm256_cvtepu16_epi64(__A),
4706                                             (__v4di)__W);
4707}
4708
4709static __inline__ __m256i __DEFAULT_FN_ATTRS
4710_mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4711{
4712  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4713                                             (__v4di)_mm256_cvtepu16_epi64(__A),
4714                                             (__v4di)_mm256_setzero_si256());
4715}
4716
4717
4718#define _mm_rol_epi32(a, b) __extension__ ({\
4719  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4720                                        (__v4si)_mm_setzero_si128(), \
4721                                        (__mmask8)-1); })
4722
4723#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4724  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4725                                        (__v4si)(__m128i)(w), (__mmask8)(u)); })
4726
4727#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4728  (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4729                                        (__v4si)_mm_setzero_si128(), \
4730                                        (__mmask8)(u)); })
4731
4732#define _mm256_rol_epi32(a, b) __extension__ ({\
4733  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4734                                        (__v8si)_mm256_setzero_si256(), \
4735                                        (__mmask8)-1); })
4736
4737#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4738  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4739                                        (__v8si)(__m256i)(w), (__mmask8)(u)); })
4740
4741#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4742  (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4743                                        (__v8si)_mm256_setzero_si256(), \
4744                                        (__mmask8)(u)); })
4745
4746#define _mm_rol_epi64(a, b) __extension__ ({\
4747  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4748                                        (__v2di)_mm_setzero_di(), \
4749                                        (__mmask8)-1); })
4750
4751#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4752  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4753                                        (__v2di)(__m128i)(w), (__mmask8)(u)); })
4754
4755#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4756  (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4757                                        (__v2di)_mm_setzero_di(), \
4758                                        (__mmask8)(u)); })
4759
4760#define _mm256_rol_epi64(a, b) __extension__ ({\
4761  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4762                                        (__v4di)_mm256_setzero_si256(), \
4763                                        (__mmask8)-1); })
4764
4765#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4766  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4767                                        (__v4di)(__m256i)(w), (__mmask8)(u)); })
4768
4769#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4770  (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4771                                        (__v4di)_mm256_setzero_si256(), \
4772                                        (__mmask8)(u)); })
4773
4774static __inline__ __m128i __DEFAULT_FN_ATTRS
4775_mm_rolv_epi32 (__m128i __A, __m128i __B)
4776{
4777  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4778              (__v4si) __B,
4779              (__v4si)
4780              _mm_setzero_si128 (),
4781              (__mmask8) -1);
4782}
4783
4784static __inline__ __m128i __DEFAULT_FN_ATTRS
4785_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4786         __m128i __B)
4787{
4788  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4789              (__v4si) __B,
4790              (__v4si) __W,
4791              (__mmask8) __U);
4792}
4793
4794static __inline__ __m128i __DEFAULT_FN_ATTRS
4795_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4796{
4797  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4798              (__v4si) __B,
4799              (__v4si)
4800              _mm_setzero_si128 (),
4801              (__mmask8) __U);
4802}
4803
4804static __inline__ __m256i __DEFAULT_FN_ATTRS
4805_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4806{
4807  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4808              (__v8si) __B,
4809              (__v8si)
4810              _mm256_setzero_si256 (),
4811              (__mmask8) -1);
4812}
4813
4814static __inline__ __m256i __DEFAULT_FN_ATTRS
4815_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4816      __m256i __B)
4817{
4818  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4819              (__v8si) __B,
4820              (__v8si) __W,
4821              (__mmask8) __U);
4822}
4823
4824static __inline__ __m256i __DEFAULT_FN_ATTRS
4825_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4826{
4827  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4828              (__v8si) __B,
4829              (__v8si)
4830              _mm256_setzero_si256 (),
4831              (__mmask8) __U);
4832}
4833
4834static __inline__ __m128i __DEFAULT_FN_ATTRS
4835_mm_rolv_epi64 (__m128i __A, __m128i __B)
4836{
4837  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4838              (__v2di) __B,
4839              (__v2di)
4840              _mm_setzero_di (),
4841              (__mmask8) -1);
4842}
4843
4844static __inline__ __m128i __DEFAULT_FN_ATTRS
4845_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
4846         __m128i __B)
4847{
4848  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4849              (__v2di) __B,
4850              (__v2di) __W,
4851              (__mmask8) __U);
4852}
4853
4854static __inline__ __m128i __DEFAULT_FN_ATTRS
4855_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4856{
4857  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4858              (__v2di) __B,
4859              (__v2di)
4860              _mm_setzero_di (),
4861              (__mmask8) __U);
4862}
4863
4864static __inline__ __m256i __DEFAULT_FN_ATTRS
4865_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4866{
4867  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4868              (__v4di) __B,
4869              (__v4di)
4870              _mm256_setzero_si256 (),
4871              (__mmask8) -1);
4872}
4873
4874static __inline__ __m256i __DEFAULT_FN_ATTRS
4875_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
4876      __m256i __B)
4877{
4878  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4879              (__v4di) __B,
4880              (__v4di) __W,
4881              (__mmask8) __U);
4882}
4883
4884static __inline__ __m256i __DEFAULT_FN_ATTRS
4885_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4886{
4887  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4888              (__v4di) __B,
4889              (__v4di)
4890              _mm256_setzero_si256 (),
4891              (__mmask8) __U);
4892}
4893
4894#define _mm_ror_epi32(A, B) __extension__ ({ \
4895  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4896                                        (__v4si)_mm_setzero_si128(), \
4897                                        (__mmask8)-1); })
4898
4899#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4900  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4901                                        (__v4si)(__m128i)(W), (__mmask8)(U)); })
4902
4903#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
4904  (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4905                                        (__v4si)_mm_setzero_si128(), \
4906                                        (__mmask8)(U)); })
4907
4908#define _mm256_ror_epi32(A, B) __extension__ ({ \
4909  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4910                                        (__v8si)_mm256_setzero_si256(), \
4911                                        (__mmask8)-1); })
4912
4913#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4914  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4915                                        (__v8si)(__m256i)(W), (__mmask8)(U)); })
4916
4917#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
4918  (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4919                                        (__v8si)_mm256_setzero_si256(), \
4920                                        (__mmask8)(U)); })
4921
4922#define _mm_ror_epi64(A, B) __extension__ ({ \
4923  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4924                                        (__v2di)_mm_setzero_di(), \
4925                                        (__mmask8)-1); })
4926
4927#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4928  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4929                                        (__v2di)(__m128i)(W), (__mmask8)(U)); })
4930
4931#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
4932  (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4933                                        (__v2di)_mm_setzero_di(), \
4934                                        (__mmask8)(U)); })
4935
4936#define _mm256_ror_epi64(A, B) __extension__ ({ \
4937  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4938                                        (__v4di)_mm256_setzero_si256(), \
4939                                        (__mmask8)-1); })
4940
4941#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4942  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4943                                        (__v4di)(__m256i)(W), (__mmask8)(U)); })
4944
4945#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
4946  (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4947                                        (__v4di)_mm256_setzero_si256(), \
4948                                        (__mmask8)(U)); })
4949
4950static __inline__ __m128i __DEFAULT_FN_ATTRS
4951_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4952{
4953  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4954                                             (__v4si)_mm_sll_epi32(__A, __B),
4955                                             (__v4si)__W);
4956}
4957
4958static __inline__ __m128i __DEFAULT_FN_ATTRS
4959_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4960{
4961  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4962                                             (__v4si)_mm_sll_epi32(__A, __B),
4963                                             (__v4si)_mm_setzero_si128());
4964}
4965
4966static __inline__ __m256i __DEFAULT_FN_ATTRS
4967_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4968{
4969  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4970                                             (__v8si)_mm256_sll_epi32(__A, __B),
4971                                             (__v8si)__W);
4972}
4973
4974static __inline__ __m256i __DEFAULT_FN_ATTRS
4975_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4976{
4977  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4978                                             (__v8si)_mm256_sll_epi32(__A, __B),
4979                                             (__v8si)_mm256_setzero_si256());
4980}
4981
4982static __inline__ __m128i __DEFAULT_FN_ATTRS
4983_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
4984{
4985  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4986                                             (__v4si)_mm_slli_epi32(__A, __B),
4987                                             (__v4si)__W);
4988}
4989
4990static __inline__ __m128i __DEFAULT_FN_ATTRS
4991_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
4992{
4993  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4994                                             (__v4si)_mm_slli_epi32(__A, __B),
4995                                             (__v4si)_mm_setzero_si128());
4996}
4997
4998static __inline__ __m256i __DEFAULT_FN_ATTRS
4999_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5000{
5001  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5002                                             (__v8si)_mm256_slli_epi32(__A, __B),
5003                                             (__v8si)__W);
5004}
5005
5006static __inline__ __m256i __DEFAULT_FN_ATTRS
5007_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
5008{
5009  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5010                                             (__v8si)_mm256_slli_epi32(__A, __B),
5011                                             (__v8si)_mm256_setzero_si256());
5012}
5013
5014static __inline__ __m128i __DEFAULT_FN_ATTRS
5015_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5016{
5017  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5018                                             (__v2di)_mm_sll_epi64(__A, __B),
5019                                             (__v2di)__W);
5020}
5021
5022static __inline__ __m128i __DEFAULT_FN_ATTRS
5023_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
5024{
5025  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5026                                             (__v2di)_mm_sll_epi64(__A, __B),
5027                                             (__v2di)_mm_setzero_di());
5028}
5029
5030static __inline__ __m256i __DEFAULT_FN_ATTRS
5031_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5032{
5033  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5034                                             (__v4di)_mm256_sll_epi64(__A, __B),
5035                                             (__v4di)__W);
5036}
5037
5038static __inline__ __m256i __DEFAULT_FN_ATTRS
5039_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
5040{
5041  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5042                                             (__v4di)_mm256_sll_epi64(__A, __B),
5043                                             (__v4di)_mm256_setzero_si256());
5044}
5045
5046static __inline__ __m128i __DEFAULT_FN_ATTRS
5047_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5048{
5049  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5050                                             (__v2di)_mm_slli_epi64(__A, __B),
5051                                             (__v2di)__W);
5052}
5053
5054static __inline__ __m128i __DEFAULT_FN_ATTRS
5055_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
5056{
5057  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5058                                             (__v2di)_mm_slli_epi64(__A, __B),
5059                                             (__v2di)_mm_setzero_di());
5060}
5061
5062static __inline__ __m256i __DEFAULT_FN_ATTRS
5063_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5064{
5065  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5066                                             (__v4di)_mm256_slli_epi64(__A, __B),
5067                                             (__v4di)__W);
5068}
5069
5070static __inline__ __m256i __DEFAULT_FN_ATTRS
5071_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
5072{
5073  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5074                                             (__v4di)_mm256_slli_epi64(__A, __B),
5075                                             (__v4di)_mm256_setzero_si256());
5076}
5077
5078static __inline__ __m128i __DEFAULT_FN_ATTRS
5079_mm_rorv_epi32 (__m128i __A, __m128i __B)
5080{
5081  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5082              (__v4si) __B,
5083              (__v4si)
5084              _mm_setzero_si128 (),
5085              (__mmask8) -1);
5086}
5087
5088static __inline__ __m128i __DEFAULT_FN_ATTRS
5089_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5090         __m128i __B)
5091{
5092  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5093              (__v4si) __B,
5094              (__v4si) __W,
5095              (__mmask8) __U);
5096}
5097
5098static __inline__ __m128i __DEFAULT_FN_ATTRS
5099_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5100{
5101  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5102              (__v4si) __B,
5103              (__v4si)
5104              _mm_setzero_si128 (),
5105              (__mmask8) __U);
5106}
5107
5108static __inline__ __m256i __DEFAULT_FN_ATTRS
5109_mm256_rorv_epi32 (__m256i __A, __m256i __B)
5110{
5111  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5112              (__v8si) __B,
5113              (__v8si)
5114              _mm256_setzero_si256 (),
5115              (__mmask8) -1);
5116}
5117
5118static __inline__ __m256i __DEFAULT_FN_ATTRS
5119_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5120      __m256i __B)
5121{
5122  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5123              (__v8si) __B,
5124              (__v8si) __W,
5125              (__mmask8) __U);
5126}
5127
5128static __inline__ __m256i __DEFAULT_FN_ATTRS
5129_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5130{
5131  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5132              (__v8si) __B,
5133              (__v8si)
5134              _mm256_setzero_si256 (),
5135              (__mmask8) __U);
5136}
5137
5138static __inline__ __m128i __DEFAULT_FN_ATTRS
5139_mm_rorv_epi64 (__m128i __A, __m128i __B)
5140{
5141  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5142              (__v2di) __B,
5143              (__v2di)
5144              _mm_setzero_di (),
5145              (__mmask8) -1);
5146}
5147
5148static __inline__ __m128i __DEFAULT_FN_ATTRS
5149_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5150         __m128i __B)
5151{
5152  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5153              (__v2di) __B,
5154              (__v2di) __W,
5155              (__mmask8) __U);
5156}
5157
5158static __inline__ __m128i __DEFAULT_FN_ATTRS
5159_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5160{
5161  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5162              (__v2di) __B,
5163              (__v2di)
5164              _mm_setzero_di (),
5165              (__mmask8) __U);
5166}
5167
5168static __inline__ __m256i __DEFAULT_FN_ATTRS
5169_mm256_rorv_epi64 (__m256i __A, __m256i __B)
5170{
5171  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5172              (__v4di) __B,
5173              (__v4di)
5174              _mm256_setzero_si256 (),
5175              (__mmask8) -1);
5176}
5177
5178static __inline__ __m256i __DEFAULT_FN_ATTRS
5179_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5180      __m256i __B)
5181{
5182  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5183              (__v4di) __B,
5184              (__v4di) __W,
5185              (__mmask8) __U);
5186}
5187
5188static __inline__ __m256i __DEFAULT_FN_ATTRS
5189_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5190{
5191  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5192              (__v4di) __B,
5193              (__v4di)
5194              _mm256_setzero_si256 (),
5195              (__mmask8) __U);
5196}
5197
5198static __inline__ __m128i __DEFAULT_FN_ATTRS
5199_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5200{
5201  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5202                                             (__v2di)_mm_sllv_epi64(__X, __Y),
5203                                             (__v2di)__W);
5204}
5205
5206static __inline__ __m128i __DEFAULT_FN_ATTRS
5207_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5208{
5209  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5210                                             (__v2di)_mm_sllv_epi64(__X, __Y),
5211                                             (__v2di)_mm_setzero_di());
5212}
5213
5214static __inline__ __m256i __DEFAULT_FN_ATTRS
5215_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5216{
5217  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5218                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
5219                                            (__v4di)__W);
5220}
5221
5222static __inline__ __m256i __DEFAULT_FN_ATTRS
5223_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
5224{
5225  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5226                                            (__v4di)_mm256_sllv_epi64(__X, __Y),
5227                                            (__v4di)_mm256_setzero_si256());
5228}
5229
5230static __inline__ __m128i __DEFAULT_FN_ATTRS
5231_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5232{
5233  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5234                                             (__v4si)_mm_sllv_epi32(__X, __Y),
5235                                             (__v4si)__W);
5236}
5237
5238static __inline__ __m128i __DEFAULT_FN_ATTRS
5239_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5240{
5241  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5242                                             (__v4si)_mm_sllv_epi32(__X, __Y),
5243                                             (__v4si)_mm_setzero_si128());
5244}
5245
5246static __inline__ __m256i __DEFAULT_FN_ATTRS
5247_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5248{
5249  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5250                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
5251                                            (__v8si)__W);
5252}
5253
5254static __inline__ __m256i __DEFAULT_FN_ATTRS
5255_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5256{
5257  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5258                                            (__v8si)_mm256_sllv_epi32(__X, __Y),
5259                                            (__v8si)_mm256_setzero_si256());
5260}
5261
5262static __inline__ __m128i __DEFAULT_FN_ATTRS
5263_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5264{
5265  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5266                                             (__v2di)_mm_srlv_epi64(__X, __Y),
5267                                             (__v2di)__W);
5268}
5269
5270static __inline__ __m128i __DEFAULT_FN_ATTRS
5271_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5272{
5273  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5274                                             (__v2di)_mm_srlv_epi64(__X, __Y),
5275                                             (__v2di)_mm_setzero_di());
5276}
5277
5278static __inline__ __m256i __DEFAULT_FN_ATTRS
5279_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5280{
5281  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5282                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
5283                                            (__v4di)__W);
5284}
5285
5286static __inline__ __m256i __DEFAULT_FN_ATTRS
5287_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
5288{
5289  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5290                                            (__v4di)_mm256_srlv_epi64(__X, __Y),
5291                                            (__v4di)_mm256_setzero_si256());
5292}
5293
5294static __inline__ __m128i __DEFAULT_FN_ATTRS
5295_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5296{
5297  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5298                                            (__v4si)_mm_srlv_epi32(__X, __Y),
5299                                            (__v4si)__W);
5300}
5301
5302static __inline__ __m128i __DEFAULT_FN_ATTRS
5303_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5304{
5305  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5306                                            (__v4si)_mm_srlv_epi32(__X, __Y),
5307                                            (__v4si)_mm_setzero_si128());
5308}
5309
5310static __inline__ __m256i __DEFAULT_FN_ATTRS
5311_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5312{
5313  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5314                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
5315                                            (__v8si)__W);
5316}
5317
5318static __inline__ __m256i __DEFAULT_FN_ATTRS
5319_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5320{
5321  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5322                                            (__v8si)_mm256_srlv_epi32(__X, __Y),
5323                                            (__v8si)_mm256_setzero_si256());
5324}
5325
5326static __inline__ __m128i __DEFAULT_FN_ATTRS
5327_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5328{
5329  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5330                                             (__v4si)_mm_srl_epi32(__A, __B),
5331                                             (__v4si)__W);
5332}
5333
5334static __inline__ __m128i __DEFAULT_FN_ATTRS
5335_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
5336{
5337  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5338                                             (__v4si)_mm_srl_epi32(__A, __B),
5339                                             (__v4si)_mm_setzero_si128());
5340}
5341
5342static __inline__ __m256i __DEFAULT_FN_ATTRS
5343_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5344{
5345  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5346                                             (__v8si)_mm256_srl_epi32(__A, __B),
5347                                             (__v8si)__W);
5348}
5349
5350static __inline__ __m256i __DEFAULT_FN_ATTRS
5351_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
5352{
5353  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5354                                             (__v8si)_mm256_srl_epi32(__A, __B),
5355                                             (__v8si)_mm256_setzero_si256());
5356}
5357
5358static __inline__ __m128i __DEFAULT_FN_ATTRS
5359_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5360{
5361  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5362                                             (__v4si)_mm_srli_epi32(__A, __B),
5363                                             (__v4si)__W);
5364}
5365
5366static __inline__ __m128i __DEFAULT_FN_ATTRS
5367_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
5368{
5369  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5370                                             (__v4si)_mm_srli_epi32(__A, __B),
5371                                             (__v4si)_mm_setzero_si128());
5372}
5373
5374static __inline__ __m256i __DEFAULT_FN_ATTRS
5375_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5376{
5377  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5378                                             (__v8si)_mm256_srli_epi32(__A, __B),
5379                                             (__v8si)__W);
5380}
5381
5382static __inline__ __m256i __DEFAULT_FN_ATTRS
5383_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
5384{
5385  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5386                                             (__v8si)_mm256_srli_epi32(__A, __B),
5387                                             (__v8si)_mm256_setzero_si256());
5388}
5389
5390static __inline__ __m128i __DEFAULT_FN_ATTRS
5391_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
5392{
5393  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5394                                             (__v2di)_mm_srl_epi64(__A, __B),
5395                                             (__v2di)__W);
5396}
5397
5398static __inline__ __m128i __DEFAULT_FN_ATTRS
5399_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
5400{
5401  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5402                                             (__v2di)_mm_srl_epi64(__A, __B),
5403                                             (__v2di)_mm_setzero_di());
5404}
5405
5406static __inline__ __m256i __DEFAULT_FN_ATTRS
5407_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
5408{
5409  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5410                                             (__v4di)_mm256_srl_epi64(__A, __B),
5411                                             (__v4di)__W);
5412}
5413
5414static __inline__ __m256i __DEFAULT_FN_ATTRS
5415_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
5416{
5417  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5418                                             (__v4di)_mm256_srl_epi64(__A, __B),
5419                                             (__v4di)_mm256_setzero_si256());
5420}
5421
5422static __inline__ __m128i __DEFAULT_FN_ATTRS
5423_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
5424{
5425  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5426                                             (__v2di)_mm_srli_epi64(__A, __B),
5427                                             (__v2di)__W);
5428}
5429
5430static __inline__ __m128i __DEFAULT_FN_ATTRS
5431_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
5432{
5433  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5434                                             (__v2di)_mm_srli_epi64(__A, __B),
5435                                             (__v2di)_mm_setzero_di());
5436}
5437
5438static __inline__ __m256i __DEFAULT_FN_ATTRS
5439_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
5440{
5441  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5442                                             (__v4di)_mm256_srli_epi64(__A, __B),
5443                                             (__v4di)__W);
5444}
5445
5446static __inline__ __m256i __DEFAULT_FN_ATTRS
5447_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
5448{
5449  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5450                                             (__v4di)_mm256_srli_epi64(__A, __B),
5451                                             (__v4di)_mm256_setzero_si256());
5452}
5453
5454static __inline__ __m128i __DEFAULT_FN_ATTRS
5455_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5456{
5457  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5458                                            (__v4si)_mm_srav_epi32(__X, __Y),
5459                                            (__v4si)__W);
5460}
5461
5462static __inline__ __m128i __DEFAULT_FN_ATTRS
5463_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
5464{
5465  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
5466                                            (__v4si)_mm_srav_epi32(__X, __Y),
5467                                            (__v4si)_mm_setzero_si128());
5468}
5469
5470static __inline__ __m256i __DEFAULT_FN_ATTRS
5471_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5472{
5473  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5474                                            (__v8si)_mm256_srav_epi32(__X, __Y),
5475                                            (__v8si)__W);
5476}
5477
5478static __inline__ __m256i __DEFAULT_FN_ATTRS
5479_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
5480{
5481  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
5482                                            (__v8si)_mm256_srav_epi32(__X, __Y),
5483                                            (__v8si)_mm256_setzero_si256());
5484}
5485
5486static __inline__ __m128i __DEFAULT_FN_ATTRS
5487_mm_srav_epi64 (__m128i __X, __m128i __Y)
5488{
5489  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5490              (__v2di) __Y,
5491              (__v2di)
5492              _mm_setzero_di (),
5493              (__mmask8) -1);
5494}
5495
5496static __inline__ __m128i __DEFAULT_FN_ATTRS
5497_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
5498         __m128i __Y)
5499{
5500  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5501              (__v2di) __Y,
5502              (__v2di) __W,
5503              (__mmask8) __U);
5504}
5505
5506static __inline__ __m128i __DEFAULT_FN_ATTRS
5507_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
5508{
5509  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
5510              (__v2di) __Y,
5511              (__v2di)
5512              _mm_setzero_di (),
5513              (__mmask8) __U);
5514}
5515
5516static __inline__ __m256i __DEFAULT_FN_ATTRS
5517_mm256_srav_epi64 (__m256i __X, __m256i __Y)
5518{
5519  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5520              (__v4di) __Y,
5521              (__v4di)
5522              _mm256_setzero_si256 (),
5523              (__mmask8) -1);
5524}
5525
5526static __inline__ __m256i __DEFAULT_FN_ATTRS
5527_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
5528      __m256i __Y)
5529{
5530  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5531              (__v4di) __Y,
5532              (__v4di) __W,
5533              (__mmask8) __U);
5534}
5535
5536static __inline__ __m256i __DEFAULT_FN_ATTRS
5537_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5538{
5539  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
5540              (__v4di) __Y,
5541              (__v4di)
5542              _mm256_setzero_si256 (),
5543              (__mmask8) __U);
5544}
5545
5546static __inline__ __m128i __DEFAULT_FN_ATTRS
5547_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5548{
5549  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5550                 (__v4si) __A,
5551                 (__v4si) __W);
5552}
5553
5554static __inline__ __m128i __DEFAULT_FN_ATTRS
5555_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
5556{
5557  return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5558                 (__v4si) __A,
5559                 (__v4si) _mm_setzero_si128 ());
5560}
5561
5562
5563static __inline__ __m256i __DEFAULT_FN_ATTRS
5564_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5565{
5566  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5567                 (__v8si) __A,
5568                 (__v8si) __W);
5569}
5570
5571static __inline__ __m256i __DEFAULT_FN_ATTRS
5572_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
5573{
5574  return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5575                 (__v8si) __A,
5576                 (__v8si) _mm256_setzero_si256 ());
5577}
5578
5579static __inline__ __m128i __DEFAULT_FN_ATTRS
5580_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5581{
5582  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5583              (__v4si) __W,
5584              (__mmask8)
5585              __U);
5586}
5587
5588static __inline__ __m128i __DEFAULT_FN_ATTRS
5589_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5590{
5591  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5592              (__v4si)
5593              _mm_setzero_si128 (),
5594              (__mmask8)
5595              __U);
5596}
5597
5598static __inline__ __m256i __DEFAULT_FN_ATTRS
5599_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5600{
5601  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5602              (__v8si) __W,
5603              (__mmask8)
5604              __U);
5605}
5606
5607static __inline__ __m256i __DEFAULT_FN_ATTRS
5608_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5609{
5610  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5611              (__v8si)
5612              _mm256_setzero_si256 (),
5613              (__mmask8)
5614              __U);
5615}
5616
5617static __inline__ void __DEFAULT_FN_ATTRS
5618_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5619{
5620  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5621          (__v4si) __A,
5622          (__mmask8) __U);
5623}
5624
5625static __inline__ void __DEFAULT_FN_ATTRS
5626_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5627{
5628  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5629          (__v8si) __A,
5630          (__mmask8) __U);
5631}
5632
5633static __inline__ __m128i __DEFAULT_FN_ATTRS
5634_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5635{
5636  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5637                 (__v2di) __A,
5638                 (__v2di) __W);
5639}
5640
5641static __inline__ __m128i __DEFAULT_FN_ATTRS
5642_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
5643{
5644  return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5645                 (__v2di) __A,
5646                 (__v2di) _mm_setzero_di ());
5647}
5648
5649static __inline__ __m256i __DEFAULT_FN_ATTRS
5650_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5651{
5652  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5653                 (__v4di) __A,
5654                 (__v4di) __W);
5655}
5656
5657static __inline__ __m256i __DEFAULT_FN_ATTRS
5658_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
5659{
5660  return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5661                 (__v4di) __A,
5662                 (__v4di) _mm256_setzero_si256 ());
5663}
5664
5665static __inline__ __m128i __DEFAULT_FN_ATTRS
5666_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5667{
5668  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5669              (__v2di) __W,
5670              (__mmask8)
5671              __U);
5672}
5673
5674static __inline__ __m128i __DEFAULT_FN_ATTRS
5675_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5676{
5677  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5678              (__v2di)
5679              _mm_setzero_di (),
5680              (__mmask8)
5681              __U);
5682}
5683
5684static __inline__ __m256i __DEFAULT_FN_ATTRS
5685_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5686{
5687  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5688              (__v4di) __W,
5689              (__mmask8)
5690              __U);
5691}
5692
5693static __inline__ __m256i __DEFAULT_FN_ATTRS
5694_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5695{
5696  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5697              (__v4di)
5698              _mm256_setzero_si256 (),
5699              (__mmask8)
5700              __U);
5701}
5702
5703static __inline__ void __DEFAULT_FN_ATTRS
5704_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5705{
5706  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5707          (__v2di) __A,
5708          (__mmask8) __U);
5709}
5710
5711static __inline__ void __DEFAULT_FN_ATTRS
5712_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5713{
5714  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5715          (__v4di) __A,
5716          (__mmask8) __U);
5717}
5718
5719static __inline__ __m128d __DEFAULT_FN_ATTRS
5720_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5721{
5722  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5723                                              (__v2df)_mm_movedup_pd(__A),
5724                                              (__v2df)__W);
5725}
5726
5727static __inline__ __m128d __DEFAULT_FN_ATTRS
5728_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5729{
5730  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5731                                              (__v2df)_mm_movedup_pd(__A),
5732                                              (__v2df)_mm_setzero_pd());
5733}
5734
5735static __inline__ __m256d __DEFAULT_FN_ATTRS
5736_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5737{
5738  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5739                                              (__v4df)_mm256_movedup_pd(__A),
5740                                              (__v4df)__W);
5741}
5742
5743static __inline__ __m256d __DEFAULT_FN_ATTRS
5744_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5745{
5746  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5747                                              (__v4df)_mm256_movedup_pd(__A),
5748                                              (__v4df)_mm256_setzero_pd());
5749}
5750
5751
5752#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
5753  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5754                                                  (__v4si)(__m128i)(O), \
5755                                                  (__mmask8)(M)); })
5756
5757#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
5758  (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5759                                                  (__v4si)_mm_setzero_si128(), \
5760                                                  (__mmask8)(M)); })
5761
5762#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
5763  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5764                                                  (__v8si)(__m256i)(O), \
5765                                                  (__mmask8)(M)); })
5766
5767#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
5768  (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5769                                                  (__v8si)_mm256_setzero_si256(), \
5770                                                  (__mmask8)(M)); })
5771
5772#ifdef __x86_64__
5773static __inline__ __m128i __DEFAULT_FN_ATTRS
5774_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5775{
5776  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
5777                 __M);
5778}
5779
5780static __inline__ __m128i __DEFAULT_FN_ATTRS
5781_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5782{
5783  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
5784                 (__v2di)
5785                 _mm_setzero_si128 (),
5786                 __M);
5787}
5788
5789static __inline__ __m256i __DEFAULT_FN_ATTRS
5790_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5791{
5792  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
5793                 __M);
5794}
5795
5796static __inline__ __m256i __DEFAULT_FN_ATTRS
5797_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
5798{
5799  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
5800                 (__v4di)
5801                 _mm256_setzero_si256 (),
5802                 __M);
5803}
5804#endif
5805
5806#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5807  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5808                                             (__v2df)(__m128d)(B), \
5809                                             (__v2di)(__m128i)(C), (int)(imm), \
5810                                             (__mmask8)-1); })
5811
5812#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5813  (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5814                                             (__v2df)(__m128d)(B), \
5815                                             (__v2di)(__m128i)(C), (int)(imm), \
5816                                             (__mmask8)(U)); })
5817
5818#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5819  (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5820                                              (__v2df)(__m128d)(B), \
5821                                              (__v2di)(__m128i)(C), \
5822                                              (int)(imm), (__mmask8)(U)); })
5823
5824#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5825  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5826                                             (__v4df)(__m256d)(B), \
5827                                             (__v4di)(__m256i)(C), (int)(imm), \
5828                                             (__mmask8)-1); })
5829
5830#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5831  (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5832                                             (__v4df)(__m256d)(B), \
5833                                             (__v4di)(__m256i)(C), (int)(imm), \
5834                                             (__mmask8)(U)); })
5835
5836#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5837  (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5838                                              (__v4df)(__m256d)(B), \
5839                                              (__v4di)(__m256i)(C), \
5840                                              (int)(imm), (__mmask8)(U)); })
5841
5842#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5843  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5844                                            (__v4sf)(__m128)(B), \
5845                                            (__v4si)(__m128i)(C), (int)(imm), \
5846                                            (__mmask8)-1); })
5847
5848#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5849  (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5850                                            (__v4sf)(__m128)(B), \
5851                                            (__v4si)(__m128i)(C), (int)(imm), \
5852                                            (__mmask8)(U)); })
5853
5854#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5855  (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5856                                             (__v4sf)(__m128)(B), \
5857                                             (__v4si)(__m128i)(C), (int)(imm), \
5858                                             (__mmask8)(U)); })
5859
5860#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5861  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5862                                            (__v8sf)(__m256)(B), \
5863                                            (__v8si)(__m256i)(C), (int)(imm), \
5864                                            (__mmask8)-1); })
5865
5866#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5867  (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5868                                            (__v8sf)(__m256)(B), \
5869                                            (__v8si)(__m256i)(C), (int)(imm), \
5870                                            (__mmask8)(U)); })
5871
5872#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5873  (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5874                                             (__v8sf)(__m256)(B), \
5875                                             (__v8si)(__m256i)(C), (int)(imm), \
5876                                             (__mmask8)(U)); })
5877
5878static __inline__ __m128d __DEFAULT_FN_ATTRS
5879_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5880{
5881  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5882               (__v2df) __W,
5883               (__mmask8) __U);
5884}
5885
5886static __inline__ __m128d __DEFAULT_FN_ATTRS
5887_mm_maskz_load_pd (__mmask8 __U, void const *__P)
5888{
5889  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5890               (__v2df)
5891               _mm_setzero_pd (),
5892               (__mmask8) __U);
5893}
5894
5895static __inline__ __m256d __DEFAULT_FN_ATTRS
5896_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5897{
5898  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5899               (__v4df) __W,
5900               (__mmask8) __U);
5901}
5902
5903static __inline__ __m256d __DEFAULT_FN_ATTRS
5904_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5905{
5906  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5907               (__v4df)
5908               _mm256_setzero_pd (),
5909               (__mmask8) __U);
5910}
5911
5912static __inline__ __m128 __DEFAULT_FN_ATTRS
5913_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5914{
5915  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5916              (__v4sf) __W,
5917              (__mmask8) __U);
5918}
5919
5920static __inline__ __m128 __DEFAULT_FN_ATTRS
5921_mm_maskz_load_ps (__mmask8 __U, void const *__P)
5922{
5923  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5924              (__v4sf)
5925              _mm_setzero_ps (),
5926              (__mmask8) __U);
5927}
5928
5929static __inline__ __m256 __DEFAULT_FN_ATTRS
5930_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5931{
5932  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5933              (__v8sf) __W,
5934              (__mmask8) __U);
5935}
5936
5937static __inline__ __m256 __DEFAULT_FN_ATTRS
5938_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5939{
5940  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5941              (__v8sf)
5942              _mm256_setzero_ps (),
5943              (__mmask8) __U);
5944}
5945
5946static __inline__ __m128i __DEFAULT_FN_ATTRS
5947_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5948{
5949  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5950                 (__v2di) __W,
5951                 (__mmask8) __U);
5952}
5953
5954static __inline__ __m128i __DEFAULT_FN_ATTRS
5955_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5956{
5957  return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5958                 (__v2di)
5959                 _mm_setzero_si128 (),
5960                 (__mmask8) __U);
5961}
5962
5963static __inline__ __m256i __DEFAULT_FN_ATTRS
5964_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5965{
5966  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5967                 (__v4di) __W,
5968                 (__mmask8) __U);
5969}
5970
5971static __inline__ __m256i __DEFAULT_FN_ATTRS
5972_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5973{
5974  return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5975                 (__v4di)
5976                 _mm256_setzero_si256 (),
5977                 (__mmask8) __U);
5978}
5979
5980static __inline__ __m128i __DEFAULT_FN_ATTRS
5981_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5982{
5983  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5984                 (__v4si) __W,
5985                 (__mmask8) __U);
5986}
5987
5988static __inline__ __m128i __DEFAULT_FN_ATTRS
5989_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5990{
5991  return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5992                 (__v4si)
5993                 _mm_setzero_si128 (),
5994                 (__mmask8) __U);
5995}
5996
5997static __inline__ __m256i __DEFAULT_FN_ATTRS
5998_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5999{
6000  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
6001                 (__v8si) __W,
6002                 (__mmask8) __U);
6003}
6004
6005static __inline__ __m256i __DEFAULT_FN_ATTRS
6006_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
6007{
6008  return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
6009                 (__v8si)
6010                 _mm256_setzero_si256 (),
6011                 (__mmask8) __U);
6012}
6013
6014static __inline__ __m128d __DEFAULT_FN_ATTRS
6015_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6016{
6017  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6018               (__v2df) __W,
6019               (__mmask8) __U);
6020}
6021
6022static __inline__ __m128d __DEFAULT_FN_ATTRS
6023_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
6024{
6025  return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6026               (__v2df)
6027               _mm_setzero_pd (),
6028               (__mmask8) __U);
6029}
6030
6031static __inline__ __m256d __DEFAULT_FN_ATTRS
6032_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6033{
6034  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6035               (__v4df) __W,
6036               (__mmask8) __U);
6037}
6038
6039static __inline__ __m256d __DEFAULT_FN_ATTRS
6040_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
6041{
6042  return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6043               (__v4df)
6044               _mm256_setzero_pd (),
6045               (__mmask8) __U);
6046}
6047
6048static __inline__ __m128 __DEFAULT_FN_ATTRS
6049_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6050{
6051  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6052              (__v4sf) __W,
6053              (__mmask8) __U);
6054}
6055
6056static __inline__ __m128 __DEFAULT_FN_ATTRS
6057_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
6058{
6059  return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6060              (__v4sf)
6061              _mm_setzero_ps (),
6062              (__mmask8) __U);
6063}
6064
6065static __inline__ __m256 __DEFAULT_FN_ATTRS
6066_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6067{
6068  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6069              (__v8sf) __W,
6070              (__mmask8) __U);
6071}
6072
6073static __inline__ __m256 __DEFAULT_FN_ATTRS
6074_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
6075{
6076  return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6077              (__v8sf)
6078              _mm256_setzero_ps (),
6079              (__mmask8) __U);
6080}
6081
6082static __inline__ void __DEFAULT_FN_ATTRS
6083_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
6084{
6085  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
6086           (__v2df) __A,
6087           (__mmask8) __U);
6088}
6089
6090static __inline__ void __DEFAULT_FN_ATTRS
6091_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
6092{
6093  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
6094           (__v4df) __A,
6095           (__mmask8) __U);
6096}
6097
6098static __inline__ void __DEFAULT_FN_ATTRS
6099_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
6100{
6101  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
6102           (__v4sf) __A,
6103           (__mmask8) __U);
6104}
6105
6106static __inline__ void __DEFAULT_FN_ATTRS
6107_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
6108{
6109  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
6110           (__v8sf) __A,
6111           (__mmask8) __U);
6112}
6113
6114static __inline__ void __DEFAULT_FN_ATTRS
6115_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
6116{
6117  __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
6118             (__v2di) __A,
6119             (__mmask8) __U);
6120}
6121
6122static __inline__ void __DEFAULT_FN_ATTRS
6123_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
6124{
6125  __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
6126             (__v4di) __A,
6127             (__mmask8) __U);
6128}
6129
6130static __inline__ void __DEFAULT_FN_ATTRS
6131_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
6132{
6133  __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
6134             (__v4si) __A,
6135             (__mmask8) __U);
6136}
6137
6138static __inline__ void __DEFAULT_FN_ATTRS
6139_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
6140{
6141  __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
6142             (__v8si) __A,
6143             (__mmask8) __U);
6144}
6145
6146static __inline__ void __DEFAULT_FN_ATTRS
6147_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
6148{
6149  __builtin_ia32_storeupd128_mask ((__v2df *) __P,
6150           (__v2df) __A,
6151           (__mmask8) __U);
6152}
6153
6154static __inline__ void __DEFAULT_FN_ATTRS
6155_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
6156{
6157  __builtin_ia32_storeupd256_mask ((__v4df *) __P,
6158           (__v4df) __A,
6159           (__mmask8) __U);
6160}
6161
6162static __inline__ void __DEFAULT_FN_ATTRS
6163_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
6164{
6165  __builtin_ia32_storeups128_mask ((__v4sf *) __P,
6166           (__v4sf) __A,
6167           (__mmask8) __U);
6168}
6169
6170static __inline__ void __DEFAULT_FN_ATTRS
6171_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
6172{
6173  __builtin_ia32_storeups256_mask ((__v8sf *) __P,
6174           (__v8sf) __A,
6175           (__mmask8) __U);
6176}
6177
6178
6179static __inline__ __m128d __DEFAULT_FN_ATTRS
6180_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6181{
6182  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6183                                              (__v2df)_mm_unpackhi_pd(__A, __B),
6184                                              (__v2df)__W);
6185}
6186
6187static __inline__ __m128d __DEFAULT_FN_ATTRS
6188_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
6189{
6190  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6191                                              (__v2df)_mm_unpackhi_pd(__A, __B),
6192                                              (__v2df)_mm_setzero_pd());
6193}
6194
6195static __inline__ __m256d __DEFAULT_FN_ATTRS
6196_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
6197{
6198  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6199                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
6200                                           (__v4df)__W);
6201}
6202
6203static __inline__ __m256d __DEFAULT_FN_ATTRS
6204_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
6205{
6206  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6207                                           (__v4df)_mm256_unpackhi_pd(__A, __B),
6208                                           (__v4df)_mm256_setzero_pd());
6209}
6210
6211static __inline__ __m128 __DEFAULT_FN_ATTRS
6212_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6213{
6214  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6215                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
6216                                             (__v4sf)__W);
6217}
6218
6219static __inline__ __m128 __DEFAULT_FN_ATTRS
6220_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
6221{
6222  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6223                                             (__v4sf)_mm_unpackhi_ps(__A, __B),
6224                                             (__v4sf)_mm_setzero_ps());
6225}
6226
6227static __inline__ __m256 __DEFAULT_FN_ATTRS
6228_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
6229{
6230  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6231                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
6232                                           (__v8sf)__W);
6233}
6234
6235static __inline__ __m256 __DEFAULT_FN_ATTRS
6236_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
6237{
6238  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6239                                           (__v8sf)_mm256_unpackhi_ps(__A, __B),
6240                                           (__v8sf)_mm256_setzero_ps());
6241}
6242
6243static __inline__ __m128d __DEFAULT_FN_ATTRS
6244_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6245{
6246  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6247                                              (__v2df)_mm_unpacklo_pd(__A, __B),
6248                                              (__v2df)__W);
6249}
6250
6251static __inline__ __m128d __DEFAULT_FN_ATTRS
6252_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
6253{
6254  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6255                                              (__v2df)_mm_unpacklo_pd(__A, __B),
6256                                              (__v2df)_mm_setzero_pd());
6257}
6258
6259static __inline__ __m256d __DEFAULT_FN_ATTRS
6260_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
6261{
6262  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6263                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
6264                                           (__v4df)__W);
6265}
6266
6267static __inline__ __m256d __DEFAULT_FN_ATTRS
6268_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
6269{
6270  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6271                                           (__v4df)_mm256_unpacklo_pd(__A, __B),
6272                                           (__v4df)_mm256_setzero_pd());
6273}
6274
6275static __inline__ __m128 __DEFAULT_FN_ATTRS
6276_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6277{
6278  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6279                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
6280                                             (__v4sf)__W);
6281}
6282
6283static __inline__ __m128 __DEFAULT_FN_ATTRS
6284_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
6285{
6286  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6287                                             (__v4sf)_mm_unpacklo_ps(__A, __B),
6288                                             (__v4sf)_mm_setzero_ps());
6289}
6290
6291static __inline__ __m256 __DEFAULT_FN_ATTRS
6292_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
6293{
6294  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6295                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
6296                                           (__v8sf)__W);
6297}
6298
6299static __inline__ __m256 __DEFAULT_FN_ATTRS
6300_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
6301{
6302  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6303                                           (__v8sf)_mm256_unpacklo_ps(__A, __B),
6304                                           (__v8sf)_mm256_setzero_ps());
6305}
6306
6307static __inline__ __m128d __DEFAULT_FN_ATTRS
6308_mm_rcp14_pd (__m128d __A)
6309{
6310  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6311                (__v2df)
6312                _mm_setzero_pd (),
6313                (__mmask8) -1);
6314}
6315
6316static __inline__ __m128d __DEFAULT_FN_ATTRS
6317_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6318{
6319  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6320                (__v2df) __W,
6321                (__mmask8) __U);
6322}
6323
6324static __inline__ __m128d __DEFAULT_FN_ATTRS
6325_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
6326{
6327  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6328                (__v2df)
6329                _mm_setzero_pd (),
6330                (__mmask8) __U);
6331}
6332
6333static __inline__ __m256d __DEFAULT_FN_ATTRS
6334_mm256_rcp14_pd (__m256d __A)
6335{
6336  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6337                (__v4df)
6338                _mm256_setzero_pd (),
6339                (__mmask8) -1);
6340}
6341
6342static __inline__ __m256d __DEFAULT_FN_ATTRS
6343_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6344{
6345  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6346                (__v4df) __W,
6347                (__mmask8) __U);
6348}
6349
6350static __inline__ __m256d __DEFAULT_FN_ATTRS
6351_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
6352{
6353  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6354                (__v4df)
6355                _mm256_setzero_pd (),
6356                (__mmask8) __U);
6357}
6358
6359static __inline__ __m128 __DEFAULT_FN_ATTRS
6360_mm_rcp14_ps (__m128 __A)
6361{
6362  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6363               (__v4sf)
6364               _mm_setzero_ps (),
6365               (__mmask8) -1);
6366}
6367
6368static __inline__ __m128 __DEFAULT_FN_ATTRS
6369_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6370{
6371  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6372               (__v4sf) __W,
6373               (__mmask8) __U);
6374}
6375
6376static __inline__ __m128 __DEFAULT_FN_ATTRS
6377_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
6378{
6379  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6380               (__v4sf)
6381               _mm_setzero_ps (),
6382               (__mmask8) __U);
6383}
6384
6385static __inline__ __m256 __DEFAULT_FN_ATTRS
6386_mm256_rcp14_ps (__m256 __A)
6387{
6388  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6389               (__v8sf)
6390               _mm256_setzero_ps (),
6391               (__mmask8) -1);
6392}
6393
6394static __inline__ __m256 __DEFAULT_FN_ATTRS
6395_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6396{
6397  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6398               (__v8sf) __W,
6399               (__mmask8) __U);
6400}
6401
6402static __inline__ __m256 __DEFAULT_FN_ATTRS
6403_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
6404{
6405  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6406               (__v8sf)
6407               _mm256_setzero_ps (),
6408               (__mmask8) __U);
6409}
6410
6411#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6412  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6413                                       (__v2df)_mm_permute_pd((X), (C)), \
6414                                       (__v2df)(__m128d)(W)); })
6415
6416#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6417  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6418                                       (__v2df)_mm_permute_pd((X), (C)), \
6419                                       (__v2df)_mm_setzero_pd()); })
6420
6421#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6422  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6423                                       (__v4df)_mm256_permute_pd((X), (C)), \
6424                                       (__v4df)(__m256d)(W)); })
6425
6426#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6427  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6428                                       (__v4df)_mm256_permute_pd((X), (C)), \
6429                                       (__v4df)_mm256_setzero_pd()); })
6430
6431#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6432  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6433                                      (__v4sf)_mm_permute_ps((X), (C)), \
6434                                      (__v4sf)(__m128)(W)); })
6435
6436#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6437  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6438                                      (__v4sf)_mm_permute_ps((X), (C)), \
6439                                      (__v4sf)_mm_setzero_ps()); })
6440
6441#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6442  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6443                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6444                                      (__v8sf)(__m256)(W)); })
6445
6446#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6447  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6448                                      (__v8sf)_mm256_permute_ps((X), (C)), \
6449                                      (__v8sf)_mm256_setzero_ps()); })
6450
6451static __inline__ __m128d __DEFAULT_FN_ATTRS
6452_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
6453      __m128i __C)
6454{
6455  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
6456                 (__v2di) __C,
6457                 (__v2df) __W,
6458                 (__mmask8) __U);
6459}
6460
6461static __inline__ __m128d __DEFAULT_FN_ATTRS
6462_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
6463{
6464  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
6465                 (__v2di) __C,
6466                 (__v2df)
6467                 _mm_setzero_pd (),
6468                 (__mmask8) __U);
6469}
6470
6471static __inline__ __m256d __DEFAULT_FN_ATTRS
6472_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
6473         __m256i __C)
6474{
6475  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
6476              (__v4di) __C,
6477              (__v4df) __W,
6478              (__mmask8)
6479              __U);
6480}
6481
6482static __inline__ __m256d __DEFAULT_FN_ATTRS
6483_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
6484{
6485  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
6486              (__v4di) __C,
6487              (__v4df)
6488              _mm256_setzero_pd (),
6489              (__mmask8)
6490              __U);
6491}
6492
6493static __inline__ __m128 __DEFAULT_FN_ATTRS
6494_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
6495      __m128i __C)
6496{
6497  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
6498                (__v4si) __C,
6499                (__v4sf) __W,
6500                (__mmask8) __U);
6501}
6502
6503static __inline__ __m128 __DEFAULT_FN_ATTRS
6504_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
6505{
6506  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
6507                (__v4si) __C,
6508                (__v4sf)
6509                _mm_setzero_ps (),
6510                (__mmask8) __U);
6511}
6512
6513static __inline__ __m256 __DEFAULT_FN_ATTRS
6514_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
6515         __m256i __C)
6516{
6517  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
6518                   (__v8si) __C,
6519                   (__v8sf) __W,
6520                   (__mmask8) __U);
6521}
6522
6523static __inline__ __m256 __DEFAULT_FN_ATTRS
6524_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
6525{
6526  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
6527                   (__v8si) __C,
6528                   (__v8sf)
6529                   _mm256_setzero_ps (),
6530                   (__mmask8) __U);
6531}
6532
6533static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6534_mm_test_epi32_mask (__m128i __A, __m128i __B)
6535{
6536  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6537                 (__v4si) __B,
6538                 (__mmask8) -1);
6539}
6540
6541static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6542_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6543{
6544  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6545                 (__v4si) __B, __U);
6546}
6547
6548static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6549_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6550{
6551  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6552                 (__v8si) __B,
6553                 (__mmask8) -1);
6554}
6555
6556static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6557_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6558{
6559  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6560                 (__v8si) __B, __U);
6561}
6562
6563static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6564_mm_test_epi64_mask (__m128i __A, __m128i __B)
6565{
6566  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6567                 (__v2di) __B,
6568                 (__mmask8) -1);
6569}
6570
6571static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6572_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6573{
6574  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6575                 (__v2di) __B, __U);
6576}
6577
6578static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6579_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6580{
6581  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6582                 (__v4di) __B,
6583                 (__mmask8) -1);
6584}
6585
6586static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6587_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6588{
6589  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6590                 (__v4di) __B, __U);
6591}
6592
6593static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6594_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6595{
6596  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6597            (__v4si) __B,
6598            (__mmask8) -1);
6599}
6600
6601static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6602_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6603{
6604  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6605            (__v4si) __B, __U);
6606}
6607
6608static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6609_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6610{
6611  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6612            (__v8si) __B,
6613            (__mmask8) -1);
6614}
6615
6616static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6617_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6618{
6619  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6620            (__v8si) __B, __U);
6621}
6622
6623static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6624_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6625{
6626  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6627            (__v2di) __B,
6628            (__mmask8) -1);
6629}
6630
6631static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6632_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6633{
6634  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6635            (__v2di) __B, __U);
6636}
6637
6638static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6639_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6640{
6641  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6642            (__v4di) __B,
6643            (__mmask8) -1);
6644}
6645
6646static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6647_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6648{
6649  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6650            (__v4di) __B, __U);
6651}
6652
6653
6654
6655static __inline__ __m128i __DEFAULT_FN_ATTRS
6656_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6657{
6658  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6659                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6660                                           (__v4si)__W);
6661}
6662
6663static __inline__ __m128i __DEFAULT_FN_ATTRS
6664_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6665{
6666  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6667                                           (__v4si)_mm_unpackhi_epi32(__A, __B),
6668                                           (__v4si)_mm_setzero_si128());
6669}
6670
6671static __inline__ __m256i __DEFAULT_FN_ATTRS
6672_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6673{
6674  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6675                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6676                                        (__v8si)__W);
6677}
6678
6679static __inline__ __m256i __DEFAULT_FN_ATTRS
6680_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6681{
6682  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6683                                        (__v8si)_mm256_unpackhi_epi32(__A, __B),
6684                                        (__v8si)_mm256_setzero_si256());
6685}
6686
6687static __inline__ __m128i __DEFAULT_FN_ATTRS
6688_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6689{
6690  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6691                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6692                                           (__v2di)__W);
6693}
6694
6695static __inline__ __m128i __DEFAULT_FN_ATTRS
6696_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6697{
6698  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6699                                           (__v2di)_mm_unpackhi_epi64(__A, __B),
6700                                           (__v2di)_mm_setzero_di());
6701}
6702
6703static __inline__ __m256i __DEFAULT_FN_ATTRS
6704_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6705{
6706  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6707                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6708                                        (__v4di)__W);
6709}
6710
6711static __inline__ __m256i __DEFAULT_FN_ATTRS
6712_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6713{
6714  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6715                                        (__v4di)_mm256_unpackhi_epi64(__A, __B),
6716                                        (__v4di)_mm256_setzero_si256());
6717}
6718
6719static __inline__ __m128i __DEFAULT_FN_ATTRS
6720_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6721{
6722  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6723                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6724                                           (__v4si)__W);
6725}
6726
6727static __inline__ __m128i __DEFAULT_FN_ATTRS
6728_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6729{
6730  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6731                                           (__v4si)_mm_unpacklo_epi32(__A, __B),
6732                                           (__v4si)_mm_setzero_si128());
6733}
6734
6735static __inline__ __m256i __DEFAULT_FN_ATTRS
6736_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6737{
6738  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6739                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6740                                        (__v8si)__W);
6741}
6742
6743static __inline__ __m256i __DEFAULT_FN_ATTRS
6744_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6745{
6746  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6747                                        (__v8si)_mm256_unpacklo_epi32(__A, __B),
6748                                        (__v8si)_mm256_setzero_si256());
6749}
6750
6751static __inline__ __m128i __DEFAULT_FN_ATTRS
6752_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6753{
6754  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6755                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6756                                           (__v2di)__W);
6757}
6758
6759static __inline__ __m128i __DEFAULT_FN_ATTRS
6760_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6761{
6762  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6763                                           (__v2di)_mm_unpacklo_epi64(__A, __B),
6764                                           (__v2di)_mm_setzero_di());
6765}
6766
6767static __inline__ __m256i __DEFAULT_FN_ATTRS
6768_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6769{
6770  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6771                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6772                                        (__v4di)__W);
6773}
6774
6775static __inline__ __m256i __DEFAULT_FN_ATTRS
6776_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6777{
6778  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6779                                        (__v4di)_mm256_unpacklo_epi64(__A, __B),
6780                                        (__v4di)_mm256_setzero_si256());
6781}
6782
6783static __inline__ __m128i __DEFAULT_FN_ATTRS
6784_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6785{
6786  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6787                                             (__v4si)_mm_sra_epi32(__A, __B),
6788                                             (__v4si)__W);
6789}
6790
6791static __inline__ __m128i __DEFAULT_FN_ATTRS
6792_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6793{
6794  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6795                                             (__v4si)_mm_sra_epi32(__A, __B),
6796                                             (__v4si)_mm_setzero_si128());
6797}
6798
6799static __inline__ __m256i __DEFAULT_FN_ATTRS
6800_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6801{
6802  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6803                                             (__v8si)_mm256_sra_epi32(__A, __B),
6804                                             (__v8si)__W);
6805}
6806
6807static __inline__ __m256i __DEFAULT_FN_ATTRS
6808_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6809{
6810  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6811                                             (__v8si)_mm256_sra_epi32(__A, __B),
6812                                             (__v8si)_mm256_setzero_si256());
6813}
6814
6815static __inline__ __m128i __DEFAULT_FN_ATTRS
6816_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
6817{
6818  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6819                                             (__v4si)_mm_srai_epi32(__A, __B),
6820                                             (__v4si)__W);
6821}
6822
6823static __inline__ __m128i __DEFAULT_FN_ATTRS
6824_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
6825{
6826  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6827                                             (__v4si)_mm_srai_epi32(__A, __B),
6828                                             (__v4si)_mm_setzero_si128());
6829}
6830
6831static __inline__ __m256i __DEFAULT_FN_ATTRS
6832_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
6833{
6834  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6835                                             (__v8si)_mm256_srai_epi32(__A, __B),
6836                                             (__v8si)__W);
6837}
6838
6839static __inline__ __m256i __DEFAULT_FN_ATTRS
6840_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
6841{
6842  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6843                                             (__v8si)_mm256_srai_epi32(__A, __B),
6844                                             (__v8si)_mm256_setzero_si256());
6845}
6846
6847static __inline__ __m128i __DEFAULT_FN_ATTRS
6848_mm_sra_epi64 (__m128i __A, __m128i __B)
6849{
6850  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
6851             (__v2di) __B,
6852             (__v2di)
6853             _mm_setzero_di (),
6854             (__mmask8) -1);
6855}
6856
6857static __inline__ __m128i __DEFAULT_FN_ATTRS
6858_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
6859        __m128i __B)
6860{
6861  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
6862             (__v2di) __B,
6863             (__v2di) __W,
6864             (__mmask8) __U);
6865}
6866
6867static __inline__ __m128i __DEFAULT_FN_ATTRS
6868_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
6869{
6870  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
6871             (__v2di) __B,
6872             (__v2di)
6873             _mm_setzero_di (),
6874             (__mmask8) __U);
6875}
6876
6877static __inline__ __m256i __DEFAULT_FN_ATTRS
6878_mm256_sra_epi64 (__m256i __A, __m128i __B)
6879{
6880  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
6881             (__v2di) __B,
6882             (__v4di)
6883             _mm256_setzero_si256 (),
6884             (__mmask8) -1);
6885}
6886
6887static __inline__ __m256i __DEFAULT_FN_ATTRS
6888_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
6889           __m128i __B)
6890{
6891  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
6892             (__v2di) __B,
6893             (__v4di) __W,
6894             (__mmask8) __U);
6895}
6896
6897static __inline__ __m256i __DEFAULT_FN_ATTRS
6898_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
6899{
6900  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
6901             (__v2di) __B,
6902             (__v4di)
6903             _mm256_setzero_si256 (),
6904             (__mmask8) __U);
6905}
6906
6907#define _mm_srai_epi64(A, imm) __extension__ ({ \
6908  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
6909                                         (__v2di)_mm_setzero_di(), \
6910                                         (__mmask8)-1); })
6911
6912#define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
6913  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
6914                                         (__v2di)(__m128i)(W), \
6915                                         (__mmask8)(U)); })
6916
6917#define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \
6918  (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \
6919                                         (__v2di)_mm_setzero_si128(), \
6920                                         (__mmask8)(U)); })
6921
6922#define _mm256_srai_epi64(A, imm) __extension__ ({ \
6923  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
6924                                         (__v4di)_mm256_setzero_si256(), \
6925                                         (__mmask8)-1); })
6926
6927#define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \
6928  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
6929                                         (__v4di)(__m256i)(W), \
6930                                         (__mmask8)(U)); })
6931
6932#define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \
6933  (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \
6934                                         (__v4di)_mm256_setzero_si256(), \
6935                                         (__mmask8)(U)); })
6936
6937#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6938  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6939                                            (__v4si)(__m128i)(B), \
6940                                            (__v4si)(__m128i)(C), (int)(imm), \
6941                                            (__mmask8)-1); })
6942
6943#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6944  (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6945                                            (__v4si)(__m128i)(B), \
6946                                            (__v4si)(__m128i)(C), (int)(imm), \
6947                                            (__mmask8)(U)); })
6948
6949#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6950  (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6951                                             (__v4si)(__m128i)(B), \
6952                                             (__v4si)(__m128i)(C), (int)(imm), \
6953                                             (__mmask8)(U)); })
6954
6955#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6956  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6957                                            (__v8si)(__m256i)(B), \
6958                                            (__v8si)(__m256i)(C), (int)(imm), \
6959                                            (__mmask8)-1); })
6960
6961#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6962  (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6963                                            (__v8si)(__m256i)(B), \
6964                                            (__v8si)(__m256i)(C), (int)(imm), \
6965                                            (__mmask8)(U)); })
6966
6967#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6968  (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6969                                             (__v8si)(__m256i)(B), \
6970                                             (__v8si)(__m256i)(C), (int)(imm), \
6971                                             (__mmask8)(U)); })
6972
6973#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6974  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6975                                            (__v2di)(__m128i)(B), \
6976                                            (__v2di)(__m128i)(C), (int)(imm), \
6977                                            (__mmask8)-1); })
6978
6979#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6980  (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6981                                            (__v2di)(__m128i)(B), \
6982                                            (__v2di)(__m128i)(C), (int)(imm), \
6983                                            (__mmask8)(U)); })
6984
6985#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6986  (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6987                                             (__v2di)(__m128i)(B), \
6988                                             (__v2di)(__m128i)(C), (int)(imm), \
6989                                             (__mmask8)(U)); })
6990
6991#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6992  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6993                                            (__v4di)(__m256i)(B), \
6994                                            (__v4di)(__m256i)(C), (int)(imm), \
6995                                            (__mmask8)-1); })
6996
6997#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6998  (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6999                                            (__v4di)(__m256i)(B), \
7000                                            (__v4di)(__m256i)(C), (int)(imm), \
7001                                            (__mmask8)(U)); })
7002
7003#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
7004  (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
7005                                             (__v4di)(__m256i)(B), \
7006                                             (__v4di)(__m256i)(C), (int)(imm), \
7007                                             (__mmask8)(U)); })
7008
7009
7010
7011#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
7012  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7013                                             (__v8sf)(__m256)(B), (int)(imm), \
7014                                             (__v8sf)_mm256_setzero_ps(), \
7015                                             (__mmask8)-1); })
7016
7017#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7018  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7019                                             (__v8sf)(__m256)(B), (int)(imm), \
7020                                             (__v8sf)(__m256)(W), \
7021                                             (__mmask8)(U)); })
7022
7023#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7024  (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
7025                                             (__v8sf)(__m256)(B), (int)(imm), \
7026                                             (__v8sf)_mm256_setzero_ps(), \
7027                                             (__mmask8)(U)); })
7028
7029#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
7030  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7031                                              (__v4df)(__m256d)(B), \
7032                                              (int)(imm), \
7033                                              (__v4df)_mm256_setzero_pd(), \
7034                                              (__mmask8)-1); })
7035
7036#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7037  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7038                                              (__v4df)(__m256d)(B), \
7039                                              (int)(imm), \
7040                                              (__v4df)(__m256d)(W), \
7041                                              (__mmask8)(U)); })
7042
7043#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7044  (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7045                                              (__v4df)(__m256d)(B), \
7046                                              (int)(imm), \
7047                                              (__v4df)_mm256_setzero_pd(), \
7048                                              (__mmask8)(U)); })
7049
7050#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
7051  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7052                                              (__v8si)(__m256i)(B), \
7053                                              (int)(imm), \
7054                                              (__v8si)_mm256_setzero_si256(), \
7055                                              (__mmask8)-1); })
7056
7057#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7058  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7059                                              (__v8si)(__m256i)(B), \
7060                                              (int)(imm), \
7061                                              (__v8si)(__m256i)(W), \
7062                                              (__mmask8)(U)); })
7063
7064#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7065  (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7066                                              (__v8si)(__m256i)(B), \
7067                                              (int)(imm), \
7068                                              (__v8si)_mm256_setzero_si256(), \
7069                                              (__mmask8)(U)); })
7070
7071#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
7072  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7073                                              (__v4di)(__m256i)(B), \
7074                                              (int)(imm), \
7075                                              (__v4di)_mm256_setzero_si256(), \
7076                                              (__mmask8)-1); })
7077
7078#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7079  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7080                                              (__v4di)(__m256i)(B), \
7081                                              (int)(imm), \
7082                                              (__v4di)(__m256i)(W), \
7083                                              (__mmask8)(U)); })
7084
7085#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7086  (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7087                                              (__v4di)(__m256i)(B), \
7088                                              (int)(imm), \
7089                                              (__v4di)_mm256_setzero_si256(), \
7090                                              (__mmask8)(U)); })
7091
7092#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7093  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7094                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7095                                       (__v2df)(__m128d)(W)); })
7096
7097#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7098  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7099                                       (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7100                                       (__v2df)_mm_setzero_pd()); })
7101
7102#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7103  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7104                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7105                                       (__v4df)(__m256d)(W)); })
7106
7107#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7108  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7109                                       (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7110                                       (__v4df)_mm256_setzero_pd()); })
7111
7112#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7113  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7114                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7115                                      (__v4sf)(__m128)(W)); })
7116
7117#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7118  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7119                                      (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7120                                      (__v4sf)_mm_setzero_ps()); })
7121
7122#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7123  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7124                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7125                                      (__v8sf)(__m256)(W)); })
7126
7127#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7128  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7129                                      (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7130                                      (__v8sf)_mm256_setzero_ps()); })
7131
7132static __inline__ __m128d __DEFAULT_FN_ATTRS
7133_mm_rsqrt14_pd (__m128d __A)
7134{
7135  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7136                 (__v2df)
7137                 _mm_setzero_pd (),
7138                 (__mmask8) -1);
7139}
7140
7141static __inline__ __m128d __DEFAULT_FN_ATTRS
7142_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
7143{
7144  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7145                 (__v2df) __W,
7146                 (__mmask8) __U);
7147}
7148
7149static __inline__ __m128d __DEFAULT_FN_ATTRS
7150_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
7151{
7152  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7153                 (__v2df)
7154                 _mm_setzero_pd (),
7155                 (__mmask8) __U);
7156}
7157
7158static __inline__ __m256d __DEFAULT_FN_ATTRS
7159_mm256_rsqrt14_pd (__m256d __A)
7160{
7161  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7162                 (__v4df)
7163                 _mm256_setzero_pd (),
7164                 (__mmask8) -1);
7165}
7166
7167static __inline__ __m256d __DEFAULT_FN_ATTRS
7168_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
7169{
7170  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7171                 (__v4df) __W,
7172                 (__mmask8) __U);
7173}
7174
7175static __inline__ __m256d __DEFAULT_FN_ATTRS
7176_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
7177{
7178  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7179                 (__v4df)
7180                 _mm256_setzero_pd (),
7181                 (__mmask8) __U);
7182}
7183
7184static __inline__ __m128 __DEFAULT_FN_ATTRS
7185_mm_rsqrt14_ps (__m128 __A)
7186{
7187  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7188                (__v4sf)
7189                _mm_setzero_ps (),
7190                (__mmask8) -1);
7191}
7192
7193static __inline__ __m128 __DEFAULT_FN_ATTRS
7194_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
7195{
7196  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7197                (__v4sf) __W,
7198                (__mmask8) __U);
7199}
7200
7201static __inline__ __m128 __DEFAULT_FN_ATTRS
7202_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
7203{
7204  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7205                (__v4sf)
7206                _mm_setzero_ps (),
7207                (__mmask8) __U);
7208}
7209
7210static __inline__ __m256 __DEFAULT_FN_ATTRS
7211_mm256_rsqrt14_ps (__m256 __A)
7212{
7213  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7214                (__v8sf)
7215                _mm256_setzero_ps (),
7216                (__mmask8) -1);
7217}
7218
7219static __inline__ __m256 __DEFAULT_FN_ATTRS
7220_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
7221{
7222  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7223                (__v8sf) __W,
7224                (__mmask8) __U);
7225}
7226
7227static __inline__ __m256 __DEFAULT_FN_ATTRS
7228_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
7229{
7230  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7231                (__v8sf)
7232                _mm256_setzero_ps (),
7233                (__mmask8) __U);
7234}
7235
7236static __inline__ __m256 __DEFAULT_FN_ATTRS
7237_mm256_broadcast_f32x4 (__m128 __A)
7238{
7239  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7240                (__v8sf)_mm256_undefined_pd (),
7241                (__mmask8) -1);
7242}
7243
7244static __inline__ __m256 __DEFAULT_FN_ATTRS
7245_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
7246{
7247  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7248                (__v8sf) __O,
7249                __M);
7250}
7251
7252static __inline__ __m256 __DEFAULT_FN_ATTRS
7253_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
7254{
7255  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
7256                (__v8sf) _mm256_setzero_ps (),
7257                __M);
7258}
7259
7260static __inline__ __m256i __DEFAULT_FN_ATTRS
7261_mm256_broadcast_i32x4 (__m128i __A)
7262{
7263  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7264                 (__v8si)_mm256_undefined_si256 (),
7265                 (__mmask8) -1);
7266}
7267
7268static __inline__ __m256i __DEFAULT_FN_ATTRS
7269_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
7270{
7271  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A,
7272                 (__v8si)
7273                 __O, __M);
7274}
7275
7276static __inline__ __m256i __DEFAULT_FN_ATTRS
7277_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
7278{
7279  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
7280                 __A,
7281                 (__v8si) _mm256_setzero_si256 (),
7282                 __M);
7283}
7284
7285static __inline__ __m256d __DEFAULT_FN_ATTRS
7286_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
7287{
7288  return (__m256d)__builtin_ia32_selectpd_256(__M,
7289                                              (__v4df) _mm256_broadcastsd_pd(__A),
7290                                              (__v4df) __O);
7291}
7292
7293static __inline__ __m256d __DEFAULT_FN_ATTRS
7294_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7295{
7296  return (__m256d)__builtin_ia32_selectpd_256(__M,
7297                                              (__v4df) _mm256_broadcastsd_pd(__A),
7298                                              (__v4df) _mm256_setzero_pd());
7299}
7300
7301static __inline__ __m128 __DEFAULT_FN_ATTRS
7302_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
7303{
7304  return (__m128)__builtin_ia32_selectps_128(__M,
7305                                             (__v4sf) _mm_broadcastss_ps(__A),
7306                                             (__v4sf) __O);
7307}
7308
7309static __inline__ __m128 __DEFAULT_FN_ATTRS
7310_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
7311{
7312  return (__m128)__builtin_ia32_selectps_128(__M,
7313                                             (__v4sf) _mm_broadcastss_ps(__A),
7314                                             (__v4sf) _mm_setzero_ps());
7315}
7316
7317static __inline__ __m256 __DEFAULT_FN_ATTRS
7318_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
7319{
7320  return (__m256)__builtin_ia32_selectps_256(__M,
7321                                             (__v8sf) _mm256_broadcastss_ps(__A),
7322                                             (__v8sf) __O);
7323}
7324
7325static __inline__ __m256 __DEFAULT_FN_ATTRS
7326_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
7327{
7328  return (__m256)__builtin_ia32_selectps_256(__M,
7329                                             (__v8sf) _mm256_broadcastss_ps(__A),
7330                                             (__v8sf) _mm256_setzero_ps());
7331}
7332
7333static __inline__ __m128i __DEFAULT_FN_ATTRS
7334_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7335{
7336  return (__m128i)__builtin_ia32_selectd_128(__M,
7337                                             (__v4si) _mm_broadcastd_epi32(__A),
7338                                             (__v4si) __O);
7339}
7340
7341static __inline__ __m128i __DEFAULT_FN_ATTRS
7342_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
7343{
7344  return (__m128i)__builtin_ia32_selectd_128(__M,
7345                                             (__v4si) _mm_broadcastd_epi32(__A),
7346                                             (__v4si) _mm_setzero_si128());
7347}
7348
7349static __inline__ __m256i __DEFAULT_FN_ATTRS
7350_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
7351{
7352  return (__m256i)__builtin_ia32_selectd_256(__M,
7353                                             (__v8si) _mm256_broadcastd_epi32(__A),
7354                                             (__v8si) __O);
7355}
7356
7357static __inline__ __m256i __DEFAULT_FN_ATTRS
7358_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
7359{
7360  return (__m256i)__builtin_ia32_selectd_256(__M,
7361                                             (__v8si) _mm256_broadcastd_epi32(__A),
7362                                             (__v8si) _mm256_setzero_si256());
7363}
7364
7365static __inline__ __m128i __DEFAULT_FN_ATTRS
7366_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
7367{
7368  return (__m128i)__builtin_ia32_selectq_128(__M,
7369                                             (__v2di) _mm_broadcastq_epi64(__A),
7370                                             (__v2di) __O);
7371}
7372
7373static __inline__ __m128i __DEFAULT_FN_ATTRS
7374_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7375{
7376  return (__m128i)__builtin_ia32_selectq_128(__M,
7377                                             (__v2di) _mm_broadcastq_epi64(__A),
7378                                             (__v2di) _mm_setzero_si128());
7379}
7380
7381static __inline__ __m256i __DEFAULT_FN_ATTRS
7382_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
7383{
7384  return (__m256i)__builtin_ia32_selectq_256(__M,
7385                                             (__v4di) _mm256_broadcastq_epi64(__A),
7386                                             (__v4di) __O);
7387}
7388
7389static __inline__ __m256i __DEFAULT_FN_ATTRS
7390_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7391{
7392  return (__m256i)__builtin_ia32_selectq_256(__M,
7393                                             (__v4di) _mm256_broadcastq_epi64(__A),
7394                                             (__v4di) _mm256_setzero_si256());
7395}
7396
7397static __inline__ __m128i __DEFAULT_FN_ATTRS
7398_mm_cvtsepi32_epi8 (__m128i __A)
7399{
7400  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7401               (__v16qi)_mm_undefined_si128(),
7402               (__mmask8) -1);
7403}
7404
7405static __inline__ __m128i __DEFAULT_FN_ATTRS
7406_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7407{
7408  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7409               (__v16qi) __O, __M);
7410}
7411
7412static __inline__ __m128i __DEFAULT_FN_ATTRS
7413_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
7414{
7415  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7416               (__v16qi) _mm_setzero_si128 (),
7417               __M);
7418}
7419
7420static __inline__ void __DEFAULT_FN_ATTRS
7421_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7422{
7423  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7424}
7425
7426static __inline__ __m128i __DEFAULT_FN_ATTRS
7427_mm256_cvtsepi32_epi8 (__m256i __A)
7428{
7429  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7430               (__v16qi)_mm_undefined_si128(),
7431               (__mmask8) -1);
7432}
7433
7434static __inline__ __m128i __DEFAULT_FN_ATTRS
7435_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7436{
7437  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7438               (__v16qi) __O, __M);
7439}
7440
7441static __inline__ __m128i __DEFAULT_FN_ATTRS
7442_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
7443{
7444  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7445               (__v16qi) _mm_setzero_si128 (),
7446               __M);
7447}
7448
7449static __inline__ void __DEFAULT_FN_ATTRS
7450_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7451{
7452  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7453}
7454
7455static __inline__ __m128i __DEFAULT_FN_ATTRS
7456_mm_cvtsepi32_epi16 (__m128i __A)
7457{
7458  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7459               (__v8hi)_mm_setzero_si128 (),
7460               (__mmask8) -1);
7461}
7462
7463static __inline__ __m128i __DEFAULT_FN_ATTRS
7464_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7465{
7466  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7467               (__v8hi)__O,
7468               __M);
7469}
7470
7471static __inline__ __m128i __DEFAULT_FN_ATTRS
7472_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
7473{
7474  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7475               (__v8hi) _mm_setzero_si128 (),
7476               __M);
7477}
7478
7479static __inline__ void __DEFAULT_FN_ATTRS
7480_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7481{
7482  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7483}
7484
7485static __inline__ __m128i __DEFAULT_FN_ATTRS
7486_mm256_cvtsepi32_epi16 (__m256i __A)
7487{
7488  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7489               (__v8hi)_mm_undefined_si128(),
7490               (__mmask8) -1);
7491}
7492
7493static __inline__ __m128i __DEFAULT_FN_ATTRS
7494_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7495{
7496  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7497               (__v8hi) __O, __M);
7498}
7499
7500static __inline__ __m128i __DEFAULT_FN_ATTRS
7501_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
7502{
7503  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7504               (__v8hi) _mm_setzero_si128 (),
7505               __M);
7506}
7507
7508static __inline__ void __DEFAULT_FN_ATTRS
7509_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7510{
7511  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7512}
7513
7514static __inline__ __m128i __DEFAULT_FN_ATTRS
7515_mm_cvtsepi64_epi8 (__m128i __A)
7516{
7517  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7518               (__v16qi)_mm_undefined_si128(),
7519               (__mmask8) -1);
7520}
7521
7522static __inline__ __m128i __DEFAULT_FN_ATTRS
7523_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7524{
7525  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7526               (__v16qi) __O, __M);
7527}
7528
7529static __inline__ __m128i __DEFAULT_FN_ATTRS
7530_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
7531{
7532  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7533               (__v16qi) _mm_setzero_si128 (),
7534               __M);
7535}
7536
7537static __inline__ void __DEFAULT_FN_ATTRS
7538_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7539{
7540  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7541}
7542
7543static __inline__ __m128i __DEFAULT_FN_ATTRS
7544_mm256_cvtsepi64_epi8 (__m256i __A)
7545{
7546  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7547               (__v16qi)_mm_undefined_si128(),
7548               (__mmask8) -1);
7549}
7550
7551static __inline__ __m128i __DEFAULT_FN_ATTRS
7552_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7553{
7554  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7555               (__v16qi) __O, __M);
7556}
7557
7558static __inline__ __m128i __DEFAULT_FN_ATTRS
7559_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
7560{
7561  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7562               (__v16qi) _mm_setzero_si128 (),
7563               __M);
7564}
7565
7566static __inline__ void __DEFAULT_FN_ATTRS
7567_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7568{
7569  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7570}
7571
7572static __inline__ __m128i __DEFAULT_FN_ATTRS
7573_mm_cvtsepi64_epi32 (__m128i __A)
7574{
7575  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7576               (__v4si)_mm_undefined_si128(),
7577               (__mmask8) -1);
7578}
7579
7580static __inline__ __m128i __DEFAULT_FN_ATTRS
7581_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7582{
7583  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7584               (__v4si) __O, __M);
7585}
7586
7587static __inline__ __m128i __DEFAULT_FN_ATTRS
7588_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
7589{
7590  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7591               (__v4si) _mm_setzero_si128 (),
7592               __M);
7593}
7594
7595static __inline__ void __DEFAULT_FN_ATTRS
7596_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7597{
7598  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7599}
7600
7601static __inline__ __m128i __DEFAULT_FN_ATTRS
7602_mm256_cvtsepi64_epi32 (__m256i __A)
7603{
7604  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7605               (__v4si)_mm_undefined_si128(),
7606               (__mmask8) -1);
7607}
7608
7609static __inline__ __m128i __DEFAULT_FN_ATTRS
7610_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7611{
7612  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7613               (__v4si)__O,
7614               __M);
7615}
7616
7617static __inline__ __m128i __DEFAULT_FN_ATTRS
7618_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
7619{
7620  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7621               (__v4si) _mm_setzero_si128 (),
7622               __M);
7623}
7624
7625static __inline__ void __DEFAULT_FN_ATTRS
7626_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7627{
7628  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7629}
7630
7631static __inline__ __m128i __DEFAULT_FN_ATTRS
7632_mm_cvtsepi64_epi16 (__m128i __A)
7633{
7634  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7635               (__v8hi)_mm_undefined_si128(),
7636               (__mmask8) -1);
7637}
7638
7639static __inline__ __m128i __DEFAULT_FN_ATTRS
7640_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7641{
7642  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7643               (__v8hi) __O, __M);
7644}
7645
7646static __inline__ __m128i __DEFAULT_FN_ATTRS
7647_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
7648{
7649  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7650               (__v8hi) _mm_setzero_si128 (),
7651               __M);
7652}
7653
7654static __inline__ void __DEFAULT_FN_ATTRS
7655_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7656{
7657  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7658}
7659
7660static __inline__ __m128i __DEFAULT_FN_ATTRS
7661_mm256_cvtsepi64_epi16 (__m256i __A)
7662{
7663  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7664               (__v8hi)_mm_undefined_si128(),
7665               (__mmask8) -1);
7666}
7667
7668static __inline__ __m128i __DEFAULT_FN_ATTRS
7669_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7670{
7671  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7672               (__v8hi) __O, __M);
7673}
7674
7675static __inline__ __m128i __DEFAULT_FN_ATTRS
7676_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
7677{
7678  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7679               (__v8hi) _mm_setzero_si128 (),
7680               __M);
7681}
7682
7683static __inline__ void __DEFAULT_FN_ATTRS
7684_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7685{
7686  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7687}
7688
7689static __inline__ __m128i __DEFAULT_FN_ATTRS
7690_mm_cvtusepi32_epi8 (__m128i __A)
7691{
7692  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7693                (__v16qi)_mm_undefined_si128(),
7694                (__mmask8) -1);
7695}
7696
7697static __inline__ __m128i __DEFAULT_FN_ATTRS
7698_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7699{
7700  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7701                (__v16qi) __O,
7702                __M);
7703}
7704
7705static __inline__ __m128i __DEFAULT_FN_ATTRS
7706_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
7707{
7708  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7709                (__v16qi) _mm_setzero_si128 (),
7710                __M);
7711}
7712
7713static __inline__ void __DEFAULT_FN_ATTRS
7714_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7715{
7716  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7717}
7718
7719static __inline__ __m128i __DEFAULT_FN_ATTRS
7720_mm256_cvtusepi32_epi8 (__m256i __A)
7721{
7722  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7723                (__v16qi)_mm_undefined_si128(),
7724                (__mmask8) -1);
7725}
7726
7727static __inline__ __m128i __DEFAULT_FN_ATTRS
7728_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7729{
7730  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7731                (__v16qi) __O,
7732                __M);
7733}
7734
7735static __inline__ __m128i __DEFAULT_FN_ATTRS
7736_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
7737{
7738  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7739                (__v16qi) _mm_setzero_si128 (),
7740                __M);
7741}
7742
7743static __inline__ void __DEFAULT_FN_ATTRS
7744_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7745{
7746  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7747}
7748
7749static __inline__ __m128i __DEFAULT_FN_ATTRS
7750_mm_cvtusepi32_epi16 (__m128i __A)
7751{
7752  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7753                (__v8hi)_mm_undefined_si128(),
7754                (__mmask8) -1);
7755}
7756
7757static __inline__ __m128i __DEFAULT_FN_ATTRS
7758_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7759{
7760  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7761                (__v8hi) __O, __M);
7762}
7763
7764static __inline__ __m128i __DEFAULT_FN_ATTRS
7765_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
7766{
7767  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7768                (__v8hi) _mm_setzero_si128 (),
7769                __M);
7770}
7771
7772static __inline__ void __DEFAULT_FN_ATTRS
7773_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7774{
7775  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7776}
7777
7778static __inline__ __m128i __DEFAULT_FN_ATTRS
7779_mm256_cvtusepi32_epi16 (__m256i __A)
7780{
7781  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7782                (__v8hi) _mm_undefined_si128(),
7783                (__mmask8) -1);
7784}
7785
7786static __inline__ __m128i __DEFAULT_FN_ATTRS
7787_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7788{
7789  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7790                (__v8hi) __O, __M);
7791}
7792
7793static __inline__ __m128i __DEFAULT_FN_ATTRS
7794_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
7795{
7796  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7797                (__v8hi) _mm_setzero_si128 (),
7798                __M);
7799}
7800
7801static __inline__ void __DEFAULT_FN_ATTRS
7802_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7803{
7804  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7805}
7806
7807static __inline__ __m128i __DEFAULT_FN_ATTRS
7808_mm_cvtusepi64_epi8 (__m128i __A)
7809{
7810  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7811                (__v16qi)_mm_undefined_si128(),
7812                (__mmask8) -1);
7813}
7814
7815static __inline__ __m128i __DEFAULT_FN_ATTRS
7816_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7817{
7818  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7819                (__v16qi) __O,
7820                __M);
7821}
7822
7823static __inline__ __m128i __DEFAULT_FN_ATTRS
7824_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
7825{
7826  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7827                (__v16qi) _mm_setzero_si128 (),
7828                __M);
7829}
7830
7831static __inline__ void __DEFAULT_FN_ATTRS
7832_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7833{
7834  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7835}
7836
7837static __inline__ __m128i __DEFAULT_FN_ATTRS
7838_mm256_cvtusepi64_epi8 (__m256i __A)
7839{
7840  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7841                (__v16qi)_mm_undefined_si128(),
7842                (__mmask8) -1);
7843}
7844
7845static __inline__ __m128i __DEFAULT_FN_ATTRS
7846_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7847{
7848  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7849                (__v16qi) __O,
7850                __M);
7851}
7852
7853static __inline__ __m128i __DEFAULT_FN_ATTRS
7854_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
7855{
7856  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7857                (__v16qi) _mm_setzero_si128 (),
7858                __M);
7859}
7860
7861static __inline__ void __DEFAULT_FN_ATTRS
7862_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7863{
7864  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7865}
7866
7867static __inline__ __m128i __DEFAULT_FN_ATTRS
7868_mm_cvtusepi64_epi32 (__m128i __A)
7869{
7870  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7871                (__v4si)_mm_undefined_si128(),
7872                (__mmask8) -1);
7873}
7874
7875static __inline__ __m128i __DEFAULT_FN_ATTRS
7876_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7877{
7878  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7879                (__v4si) __O, __M);
7880}
7881
7882static __inline__ __m128i __DEFAULT_FN_ATTRS
7883_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
7884{
7885  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7886                (__v4si) _mm_setzero_si128 (),
7887                __M);
7888}
7889
7890static __inline__ void __DEFAULT_FN_ATTRS
7891_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7892{
7893  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7894}
7895
7896static __inline__ __m128i __DEFAULT_FN_ATTRS
7897_mm256_cvtusepi64_epi32 (__m256i __A)
7898{
7899  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7900                (__v4si)_mm_undefined_si128(),
7901                (__mmask8) -1);
7902}
7903
7904static __inline__ __m128i __DEFAULT_FN_ATTRS
7905_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7906{
7907  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7908                (__v4si) __O, __M);
7909}
7910
7911static __inline__ __m128i __DEFAULT_FN_ATTRS
7912_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
7913{
7914  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7915                (__v4si) _mm_setzero_si128 (),
7916                __M);
7917}
7918
7919static __inline__ void __DEFAULT_FN_ATTRS
7920_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7921{
7922  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7923}
7924
7925static __inline__ __m128i __DEFAULT_FN_ATTRS
7926_mm_cvtusepi64_epi16 (__m128i __A)
7927{
7928  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7929                (__v8hi)_mm_undefined_si128(),
7930                (__mmask8) -1);
7931}
7932
7933static __inline__ __m128i __DEFAULT_FN_ATTRS
7934_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7935{
7936  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7937                (__v8hi) __O, __M);
7938}
7939
7940static __inline__ __m128i __DEFAULT_FN_ATTRS
7941_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
7942{
7943  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7944                (__v8hi) _mm_setzero_si128 (),
7945                __M);
7946}
7947
7948static __inline__ void __DEFAULT_FN_ATTRS
7949_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7950{
7951  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7952}
7953
7954static __inline__ __m128i __DEFAULT_FN_ATTRS
7955_mm256_cvtusepi64_epi16 (__m256i __A)
7956{
7957  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7958                (__v8hi)_mm_undefined_si128(),
7959                (__mmask8) -1);
7960}
7961
7962static __inline__ __m128i __DEFAULT_FN_ATTRS
7963_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7964{
7965  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7966                (__v8hi) __O, __M);
7967}
7968
7969static __inline__ __m128i __DEFAULT_FN_ATTRS
7970_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
7971{
7972  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7973                (__v8hi) _mm_setzero_si128 (),
7974                __M);
7975}
7976
7977static __inline__ void __DEFAULT_FN_ATTRS
7978_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7979{
7980  return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7981}
7982
7983static __inline__ __m128i __DEFAULT_FN_ATTRS
7984_mm_cvtepi32_epi8 (__m128i __A)
7985{
7986  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7987              (__v16qi)_mm_undefined_si128(),
7988              (__mmask8) -1);
7989}
7990
7991static __inline__ __m128i __DEFAULT_FN_ATTRS
7992_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7993{
7994  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7995              (__v16qi) __O, __M);
7996}
7997
7998static __inline__ __m128i __DEFAULT_FN_ATTRS
7999_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
8000{
8001  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
8002              (__v16qi)
8003              _mm_setzero_si128 (),
8004              __M);
8005}
8006
8007static __inline__ void __DEFAULT_FN_ATTRS
8008_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
8009{
8010  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
8011}
8012
8013static __inline__ __m128i __DEFAULT_FN_ATTRS
8014_mm256_cvtepi32_epi8 (__m256i __A)
8015{
8016  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8017              (__v16qi)_mm_undefined_si128(),
8018              (__mmask8) -1);
8019}
8020
8021static __inline__ __m128i __DEFAULT_FN_ATTRS
8022_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
8023{
8024  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8025              (__v16qi) __O, __M);
8026}
8027
8028static __inline__ __m128i __DEFAULT_FN_ATTRS
8029_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
8030{
8031  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
8032              (__v16qi) _mm_setzero_si128 (),
8033              __M);
8034}
8035
8036static __inline__ void __DEFAULT_FN_ATTRS
8037_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
8038{
8039  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
8040}
8041
8042static __inline__ __m128i __DEFAULT_FN_ATTRS
8043_mm_cvtepi32_epi16 (__m128i __A)
8044{
8045  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8046              (__v8hi) _mm_setzero_si128 (),
8047              (__mmask8) -1);
8048}
8049
8050static __inline__ __m128i __DEFAULT_FN_ATTRS
8051_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8052{
8053  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8054              (__v8hi) __O, __M);
8055}
8056
8057static __inline__ __m128i __DEFAULT_FN_ATTRS
8058_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
8059{
8060  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8061              (__v8hi) _mm_setzero_si128 (),
8062              __M);
8063}
8064
8065static __inline__ void __DEFAULT_FN_ATTRS
8066_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8067{
8068  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
8069}
8070
8071static __inline__ __m128i __DEFAULT_FN_ATTRS
8072_mm256_cvtepi32_epi16 (__m256i __A)
8073{
8074  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8075              (__v8hi)_mm_setzero_si128 (),
8076              (__mmask8) -1);
8077}
8078
8079static __inline__ __m128i __DEFAULT_FN_ATTRS
8080_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8081{
8082  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8083              (__v8hi) __O, __M);
8084}
8085
8086static __inline__ __m128i __DEFAULT_FN_ATTRS
8087_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
8088{
8089  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8090              (__v8hi) _mm_setzero_si128 (),
8091              __M);
8092}
8093
8094static __inline__ void __DEFAULT_FN_ATTRS
8095_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
8096{
8097  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8098}
8099
8100static __inline__ __m128i __DEFAULT_FN_ATTRS
8101_mm_cvtepi64_epi8 (__m128i __A)
8102{
8103  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8104              (__v16qi) _mm_undefined_si128(),
8105              (__mmask8) -1);
8106}
8107
8108static __inline__ __m128i __DEFAULT_FN_ATTRS
8109_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
8110{
8111  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8112              (__v16qi) __O, __M);
8113}
8114
8115static __inline__ __m128i __DEFAULT_FN_ATTRS
8116_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
8117{
8118  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8119              (__v16qi) _mm_setzero_si128 (),
8120              __M);
8121}
8122
8123static __inline__ void __DEFAULT_FN_ATTRS
8124_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
8125{
8126  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8127}
8128
8129static __inline__ __m128i __DEFAULT_FN_ATTRS
8130_mm256_cvtepi64_epi8 (__m256i __A)
8131{
8132  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8133              (__v16qi) _mm_undefined_si128(),
8134              (__mmask8) -1);
8135}
8136
8137static __inline__ __m128i __DEFAULT_FN_ATTRS
8138_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
8139{
8140  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8141              (__v16qi) __O, __M);
8142}
8143
8144static __inline__ __m128i __DEFAULT_FN_ATTRS
8145_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
8146{
8147  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8148              (__v16qi) _mm_setzero_si128 (),
8149              __M);
8150}
8151
8152static __inline__ void __DEFAULT_FN_ATTRS
8153_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
8154{
8155  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8156}
8157
8158static __inline__ __m128i __DEFAULT_FN_ATTRS
8159_mm_cvtepi64_epi32 (__m128i __A)
8160{
8161  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8162              (__v4si)_mm_undefined_si128(),
8163              (__mmask8) -1);
8164}
8165
8166static __inline__ __m128i __DEFAULT_FN_ATTRS
8167_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
8168{
8169  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8170              (__v4si) __O, __M);
8171}
8172
8173static __inline__ __m128i __DEFAULT_FN_ATTRS
8174_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
8175{
8176  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8177              (__v4si) _mm_setzero_si128 (),
8178              __M);
8179}
8180
8181static __inline__ void __DEFAULT_FN_ATTRS
8182_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
8183{
8184  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8185}
8186
8187static __inline__ __m128i __DEFAULT_FN_ATTRS
8188_mm256_cvtepi64_epi32 (__m256i __A)
8189{
8190  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8191              (__v4si) _mm_undefined_si128(),
8192              (__mmask8) -1);
8193}
8194
8195static __inline__ __m128i __DEFAULT_FN_ATTRS
8196_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
8197{
8198  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8199              (__v4si) __O, __M);
8200}
8201
8202static __inline__ __m128i __DEFAULT_FN_ATTRS
8203_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
8204{
8205  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8206              (__v4si) _mm_setzero_si128 (),
8207              __M);
8208}
8209
8210static __inline__ void __DEFAULT_FN_ATTRS
8211_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
8212{
8213  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8214}
8215
8216static __inline__ __m128i __DEFAULT_FN_ATTRS
8217_mm_cvtepi64_epi16 (__m128i __A)
8218{
8219  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8220              (__v8hi) _mm_undefined_si128(),
8221              (__mmask8) -1);
8222}
8223
8224static __inline__ __m128i __DEFAULT_FN_ATTRS
8225_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
8226{
8227  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8228              (__v8hi)__O,
8229              __M);
8230}
8231
8232static __inline__ __m128i __DEFAULT_FN_ATTRS
8233_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
8234{
8235  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8236              (__v8hi) _mm_setzero_si128 (),
8237              __M);
8238}
8239
8240static __inline__ void __DEFAULT_FN_ATTRS
8241_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
8242{
8243  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8244}
8245
8246static __inline__ __m128i __DEFAULT_FN_ATTRS
8247_mm256_cvtepi64_epi16 (__m256i __A)
8248{
8249  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8250              (__v8hi)_mm_undefined_si128(),
8251              (__mmask8) -1);
8252}
8253
8254static __inline__ __m128i __DEFAULT_FN_ATTRS
8255_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
8256{
8257  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8258              (__v8hi) __O, __M);
8259}
8260
8261static __inline__ __m128i __DEFAULT_FN_ATTRS
8262_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
8263{
8264  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8265              (__v8hi) _mm_setzero_si128 (),
8266              __M);
8267}
8268
8269static __inline__ void __DEFAULT_FN_ATTRS
8270_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
8271{
8272  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8273}
8274
8275#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
8276  (__m128)__builtin_shufflevector((__v8sf)(__m256)(A),           \
8277                                  (__v8sf)_mm256_undefined_ps(), \
8278                                  ((imm) & 1) ? 4 : 0,           \
8279                                  ((imm) & 1) ? 5 : 1,           \
8280                                  ((imm) & 1) ? 6 : 2,           \
8281                                  ((imm) & 1) ? 7 : 3); })
8282
8283#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
8284  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
8285                                   (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
8286                                   (__v4sf)(W)); })
8287
8288#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
8289  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
8290                                   (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
8291                                   (__v4sf)_mm_setzero_ps()); })
8292
8293#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
8294  (__m128i)__builtin_shufflevector((__v8si)(__m256)(A),              \
8295                                   (__v8si)_mm256_undefined_si256(), \
8296                                   ((imm) & 1) ? 4 : 0,              \
8297                                   ((imm) & 1) ? 5 : 1,              \
8298                                   ((imm) & 1) ? 6 : 2,              \
8299                                   ((imm) & 1) ? 7 : 3); })
8300
8301#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
8302  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8303                                (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
8304                                (__v4si)(W)); })
8305
8306#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
8307  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8308                                (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
8309                                (__v4si)_mm_setzero_si128()); })
8310
8311#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
8312  (__m256)__builtin_shufflevector((__v8sf)(A), \
8313                                  (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
8314                                  ((imm) & 0x1) ?  0 :  8, \
8315                                  ((imm) & 0x1) ?  1 :  9, \
8316                                  ((imm) & 0x1) ?  2 : 10, \
8317                                  ((imm) & 0x1) ?  3 : 11, \
8318                                  ((imm) & 0x1) ?  8 :  4, \
8319                                  ((imm) & 0x1) ?  9 :  5, \
8320                                  ((imm) & 0x1) ? 10 :  6, \
8321                                  ((imm) & 0x1) ? 11 :  7); })
8322
8323#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8324  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
8325                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
8326                                  (__v8sf)(W)); })
8327
8328#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8329  (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
8330                                  (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
8331                                  (__v8sf)_mm256_setzero_ps()); })
8332
8333#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
8334  (__m256i)__builtin_shufflevector((__v8si)(A), \
8335                                 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
8336                                 ((imm) & 0x1) ?  0 :  8, \
8337                                 ((imm) & 0x1) ?  1 :  9, \
8338                                 ((imm) & 0x1) ?  2 : 10, \
8339                                 ((imm) & 0x1) ?  3 : 11, \
8340                                 ((imm) & 0x1) ?  8 :  4, \
8341                                 ((imm) & 0x1) ?  9 :  5, \
8342                                 ((imm) & 0x1) ? 10 :  6, \
8343                                 ((imm) & 0x1) ? 11 :  7); })
8344
8345#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8346  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8347                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
8348                                  (__v8si)(W)); })
8349
8350#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8351  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8352                                  (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
8353                                  (__v8si)_mm256_setzero_si256()); })
8354
8355#define _mm_getmant_pd(A, B, C) __extension__({\
8356  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8357                                            (int)(((C)<<2) | (B)), \
8358                                            (__v2df)_mm_setzero_pd(), \
8359                                            (__mmask8)-1); })
8360
8361#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
8362  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8363                                            (int)(((C)<<2) | (B)), \
8364                                            (__v2df)(__m128d)(W), \
8365                                            (__mmask8)(U)); })
8366
8367#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
8368  (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8369                                            (int)(((C)<<2) | (B)), \
8370                                            (__v2df)_mm_setzero_pd(), \
8371                                            (__mmask8)(U)); })
8372
8373#define _mm256_getmant_pd(A, B, C) __extension__ ({ \
8374  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8375                                            (int)(((C)<<2) | (B)), \
8376                                            (__v4df)_mm256_setzero_pd(), \
8377                                            (__mmask8)-1); })
8378
8379#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8380  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8381                                            (int)(((C)<<2) | (B)), \
8382                                            (__v4df)(__m256d)(W), \
8383                                            (__mmask8)(U)); })
8384
8385#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8386  (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8387                                            (int)(((C)<<2) | (B)), \
8388                                            (__v4df)_mm256_setzero_pd(), \
8389                                            (__mmask8)(U)); })
8390
8391#define _mm_getmant_ps(A, B, C) __extension__ ({ \
8392  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8393                                           (int)(((C)<<2) | (B)), \
8394                                           (__v4sf)_mm_setzero_ps(), \
8395                                           (__mmask8)-1); })
8396
8397#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8398  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8399                                           (int)(((C)<<2) | (B)), \
8400                                           (__v4sf)(__m128)(W), \
8401                                           (__mmask8)(U)); })
8402
8403#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8404  (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8405                                           (int)(((C)<<2) | (B)), \
8406                                           (__v4sf)_mm_setzero_ps(), \
8407                                           (__mmask8)(U)); })
8408
8409#define _mm256_getmant_ps(A, B, C) __extension__ ({ \
8410  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8411                                           (int)(((C)<<2) | (B)), \
8412                                           (__v8sf)_mm256_setzero_ps(), \
8413                                           (__mmask8)-1); })
8414
8415#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8416  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8417                                           (int)(((C)<<2) | (B)), \
8418                                           (__v8sf)(__m256)(W), \
8419                                           (__mmask8)(U)); })
8420
8421#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8422  (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8423                                           (int)(((C)<<2) | (B)), \
8424                                           (__v8sf)_mm256_setzero_ps(), \
8425                                           (__mmask8)(U)); })
8426
8427#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8428  (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
8429                                        (double const *)(addr), \
8430                                        (__v2di)(__m128i)(index), \
8431                                        (__mmask8)(mask), (int)(scale)); })
8432
8433#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8434  (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8435                                        (long long const *)(addr), \
8436                                        (__v2di)(__m128i)(index), \
8437                                        (__mmask8)(mask), (int)(scale)); })
8438
8439#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8440  (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8441                                        (double const *)(addr), \
8442                                        (__v4di)(__m256i)(index), \
8443                                        (__mmask8)(mask), (int)(scale)); })
8444
8445#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8446  (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8447                                        (long long const *)(addr), \
8448                                        (__v4di)(__m256i)(index), \
8449                                        (__mmask8)(mask), (int)(scale)); })
8450
8451#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8452  (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8453                                       (float const *)(addr), \
8454                                       (__v2di)(__m128i)(index), \
8455                                       (__mmask8)(mask), (int)(scale)); })
8456
8457#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8458  (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8459                                        (int const *)(addr), \
8460                                        (__v2di)(__m128i)(index), \
8461                                        (__mmask8)(mask), (int)(scale)); })
8462
8463#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8464  (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8465                                       (float const *)(addr), \
8466                                       (__v4di)(__m256i)(index), \
8467                                       (__mmask8)(mask), (int)(scale)); })
8468
8469#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8470  (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8471                                        (int const *)(addr), \
8472                                        (__v4di)(__m256i)(index), \
8473                                        (__mmask8)(mask), (int)(scale)); })
8474
8475#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8476  (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8477                                        (double const *)(addr), \
8478                                        (__v4si)(__m128i)(index), \
8479                                        (__mmask8)(mask), (int)(scale)); })
8480
8481#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8482  (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8483                                        (long long const *)(addr), \
8484                                        (__v4si)(__m128i)(index), \
8485                                        (__mmask8)(mask), (int)(scale)); })
8486
8487#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8488  (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8489                                        (double const *)(addr), \
8490                                        (__v4si)(__m128i)(index), \
8491                                        (__mmask8)(mask), (int)(scale)); })
8492
8493#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8494  (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8495                                        (long long const *)(addr), \
8496                                        (__v4si)(__m128i)(index), \
8497                                        (__mmask8)(mask), (int)(scale)); })
8498
8499#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8500  (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8501                                       (float const *)(addr), \
8502                                       (__v4si)(__m128i)(index), \
8503                                       (__mmask8)(mask), (int)(scale)); })
8504
8505#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8506  (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8507                                        (int const *)(addr), \
8508                                        (__v4si)(__m128i)(index), \
8509                                        (__mmask8)(mask), (int)(scale)); })
8510
8511#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8512  (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8513                                       (float const *)(addr), \
8514                                       (__v8si)(__m256i)(index), \
8515                                       (__mmask8)(mask), (int)(scale)); })
8516
8517#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8518  (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8519                                        (int const *)(addr), \
8520                                        (__v8si)(__m256i)(index), \
8521                                        (__mmask8)(mask), (int)(scale)); })
8522
8523#define _mm256_permutex_pd(X, C) __extension__ ({ \
8524  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8525                                   (__v4df)_mm256_undefined_pd(), \
8526                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8527                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8528
8529#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8530  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8531                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8532                                       (__v4df)(__m256d)(W)); })
8533
8534#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8535  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8536                                       (__v4df)_mm256_permutex_pd((X), (C)), \
8537                                       (__v4df)_mm256_setzero_pd()); })
8538
8539#define _mm256_permutex_epi64(X, C) __extension__ ({ \
8540  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8541                                   (__v4di)_mm256_undefined_si256(), \
8542                                   ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8543                                   ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8544
8545#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8546  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8547                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8548                                      (__v4di)(__m256i)(W)); })
8549
8550#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8551  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8552                                      (__v4di)_mm256_permutex_epi64((X), (C)), \
8553                                      (__v4di)_mm256_setzero_si256()); })
8554
8555static __inline__ __m256d __DEFAULT_FN_ATTRS
8556_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8557{
8558  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8559                 (__v4di) __X,
8560                 (__v4df) _mm256_undefined_si256 (),
8561                 (__mmask8) -1);
8562}
8563
8564static __inline__ __m256d __DEFAULT_FN_ATTRS
8565_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8566          __m256d __Y)
8567{
8568  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8569                 (__v4di) __X,
8570                 (__v4df) __W,
8571                 (__mmask8) __U);
8572}
8573
8574static __inline__ __m256d __DEFAULT_FN_ATTRS
8575_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8576{
8577  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8578                 (__v4di) __X,
8579                 (__v4df) _mm256_setzero_pd (),
8580                 (__mmask8) __U);
8581}
8582
8583static __inline__ __m256i __DEFAULT_FN_ATTRS
8584_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8585{
8586  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8587                 (__v4di) __X,
8588                 (__v4di) _mm256_setzero_si256 (),
8589                 (__mmask8) __M);
8590}
8591
8592static __inline__ __m256i __DEFAULT_FN_ATTRS
8593_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8594{
8595  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8596                 (__v4di) __X,
8597                 (__v4di) _mm256_undefined_si256 (),
8598                 (__mmask8) -1);
8599}
8600
8601static __inline__ __m256i __DEFAULT_FN_ATTRS
8602_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8603             __m256i __Y)
8604{
8605  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8606                 (__v4di) __X,
8607                 (__v4di) __W,
8608                 __M);
8609}
8610
8611static __inline__ __m256 __DEFAULT_FN_ATTRS
8612_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8613          __m256 __Y)
8614{
8615  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8616                (__v8si) __X,
8617                (__v8sf) __W,
8618                (__mmask8) __U);
8619}
8620
8621static __inline__ __m256 __DEFAULT_FN_ATTRS
8622_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8623{
8624  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8625                (__v8si) __X,
8626                (__v8sf) _mm256_setzero_ps (),
8627                (__mmask8) __U);
8628}
8629
8630static __inline__ __m256 __DEFAULT_FN_ATTRS
8631_mm256_permutexvar_ps (__m256i __X, __m256 __Y)
8632{
8633  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8634                (__v8si) __X,
8635                (__v8sf) _mm256_undefined_si256 (),
8636                (__mmask8) -1);
8637}
8638
8639static __inline__ __m256i __DEFAULT_FN_ATTRS
8640_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
8641{
8642  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8643                 (__v8si) __X,
8644                 (__v8si) _mm256_setzero_si256 (),
8645                 __M);
8646}
8647
8648static __inline__ __m256i __DEFAULT_FN_ATTRS
8649_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
8650             __m256i __Y)
8651{
8652  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8653                 (__v8si) __X,
8654                 (__v8si) __W,
8655                 (__mmask8) __M);
8656}
8657
8658static __inline__ __m256i __DEFAULT_FN_ATTRS
8659_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
8660{
8661  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8662                 (__v8si) __X,
8663                 (__v8si) _mm256_undefined_si256(),
8664                 (__mmask8) -1);
8665}
8666
8667#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8668  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8669                                         (__v4si)(__m128i)(B), (int)(imm), \
8670                                         (__v4si)_mm_undefined_si128(), \
8671                                         (__mmask8)-1); })
8672
8673#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8674  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8675                                         (__v4si)(__m128i)(B), (int)(imm), \
8676                                         (__v4si)(__m128i)(W), \
8677                                         (__mmask8)(U)); })
8678
8679#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8680  (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \
8681                                         (__v4si)(__m128i)(B), (int)(imm), \
8682                                         (__v4si)_mm_setzero_si128(), \
8683                                         (__mmask8)(U)); })
8684
8685#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8686  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8687                                         (__v8si)(__m256i)(B), (int)(imm), \
8688                                         (__v8si)_mm256_undefined_si256(), \
8689                                         (__mmask8)-1); })
8690
8691#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8692  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8693                                         (__v8si)(__m256i)(B), (int)(imm), \
8694                                         (__v8si)(__m256i)(W), \
8695                                         (__mmask8)(U)); })
8696
8697#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8698  (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \
8699                                         (__v8si)(__m256i)(B), (int)(imm), \
8700                                         (__v8si)_mm256_setzero_si256(), \
8701                                         (__mmask8)(U)); })
8702
8703#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8704  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8705                                         (__v2di)(__m128i)(B), (int)(imm), \
8706                                         (__v2di)_mm_setzero_di(), \
8707                                         (__mmask8)-1); })
8708
8709#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8710  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8711                                         (__v2di)(__m128i)(B), (int)(imm), \
8712                                         (__v2di)(__m128i)(W), \
8713                                         (__mmask8)(U)); })
8714
8715#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8716  (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \
8717                                         (__v2di)(__m128i)(B), (int)(imm), \
8718                                         (__v2di)_mm_setzero_di(), \
8719                                         (__mmask8)(U)); })
8720
8721#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8722  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8723                                         (__v4di)(__m256i)(B), (int)(imm), \
8724                                         (__v4di)_mm256_undefined_pd(), \
8725                                         (__mmask8)-1); })
8726
8727#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8728  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8729                                         (__v4di)(__m256i)(B), (int)(imm), \
8730                                         (__v4di)(__m256i)(W), \
8731                                         (__mmask8)(U)); })
8732
8733#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8734  (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \
8735                                         (__v4di)(__m256i)(B), (int)(imm), \
8736                                         (__v4di)_mm256_setzero_si256(), \
8737                                         (__mmask8)(U)); })
8738
8739static __inline__ __m128 __DEFAULT_FN_ATTRS
8740_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8741{
8742  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8743                                             (__v4sf)_mm_movehdup_ps(__A),
8744                                             (__v4sf)__W);
8745}
8746
8747static __inline__ __m128 __DEFAULT_FN_ATTRS
8748_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
8749{
8750  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8751                                             (__v4sf)_mm_movehdup_ps(__A),
8752                                             (__v4sf)_mm_setzero_ps());
8753}
8754
8755static __inline__ __m256 __DEFAULT_FN_ATTRS
8756_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8757{
8758  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8759                                             (__v8sf)_mm256_movehdup_ps(__A),
8760                                             (__v8sf)__W);
8761}
8762
8763static __inline__ __m256 __DEFAULT_FN_ATTRS
8764_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
8765{
8766  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8767                                             (__v8sf)_mm256_movehdup_ps(__A),
8768                                             (__v8sf)_mm256_setzero_ps());
8769}
8770
8771static __inline__ __m128 __DEFAULT_FN_ATTRS
8772_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8773{
8774  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8775                                             (__v4sf)_mm_moveldup_ps(__A),
8776                                             (__v4sf)__W);
8777}
8778
8779static __inline__ __m128 __DEFAULT_FN_ATTRS
8780_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
8781{
8782  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8783                                             (__v4sf)_mm_moveldup_ps(__A),
8784                                             (__v4sf)_mm_setzero_ps());
8785}
8786
8787static __inline__ __m256 __DEFAULT_FN_ATTRS
8788_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8789{
8790  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8791                                             (__v8sf)_mm256_moveldup_ps(__A),
8792                                             (__v8sf)__W);
8793}
8794
8795static __inline__ __m256 __DEFAULT_FN_ATTRS
8796_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
8797{
8798  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8799                                             (__v8sf)_mm256_moveldup_ps(__A),
8800                                             (__v8sf)_mm256_setzero_ps());
8801}
8802
8803#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8804  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8805                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
8806                                      (__v8si)(__m256i)(W)); })
8807
8808#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
8809  (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8810                                      (__v8si)_mm256_shuffle_epi32((A), (I)), \
8811                                      (__v8si)_mm256_setzero_si256()); })
8812
8813#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
8814  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8815                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
8816                                      (__v4si)(__m128i)(W)); })
8817
8818#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
8819  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8820                                      (__v4si)_mm_shuffle_epi32((A), (I)), \
8821                                      (__v4si)_mm_setzero_si128()); })
8822
8823static __inline__ __m128d __DEFAULT_FN_ATTRS
8824_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8825{
8826  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8827              (__v2df) __A,
8828              (__v2df) __W);
8829}
8830
8831static __inline__ __m128d __DEFAULT_FN_ATTRS
8832_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8833{
8834  return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8835              (__v2df) __A,
8836              (__v2df) _mm_setzero_pd ());
8837}
8838
8839static __inline__ __m256d __DEFAULT_FN_ATTRS
8840_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8841{
8842  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8843              (__v4df) __A,
8844              (__v4df) __W);
8845}
8846
8847static __inline__ __m256d __DEFAULT_FN_ATTRS
8848_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
8849{
8850  return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8851              (__v4df) __A,
8852              (__v4df) _mm256_setzero_pd ());
8853}
8854
8855static __inline__ __m128 __DEFAULT_FN_ATTRS
8856_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8857{
8858  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8859             (__v4sf) __A,
8860             (__v4sf) __W);
8861}
8862
8863static __inline__ __m128 __DEFAULT_FN_ATTRS
8864_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
8865{
8866  return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8867             (__v4sf) __A,
8868             (__v4sf) _mm_setzero_ps ());
8869}
8870
8871static __inline__ __m256 __DEFAULT_FN_ATTRS
8872_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8873{
8874  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8875             (__v8sf) __A,
8876             (__v8sf) __W);
8877}
8878
8879static __inline__ __m256 __DEFAULT_FN_ATTRS
8880_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
8881{
8882  return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8883             (__v8sf) __A,
8884             (__v8sf) _mm256_setzero_ps ());
8885}
8886
8887static __inline__ __m128 __DEFAULT_FN_ATTRS
8888_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8889{
8890  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8891             (__v4sf) __W,
8892             (__mmask8) __U);
8893}
8894
8895static __inline__ __m128 __DEFAULT_FN_ATTRS
8896_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8897{
8898  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8899             (__v4sf)
8900             _mm_setzero_ps (),
8901             (__mmask8) __U);
8902}
8903
8904static __inline__ __m256 __DEFAULT_FN_ATTRS
8905_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8906{
8907  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8908                (__v8sf) __W,
8909                (__mmask8) __U);
8910}
8911
8912static __inline__ __m256 __DEFAULT_FN_ATTRS
8913_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8914{
8915  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8916                (__v8sf)
8917                _mm256_setzero_ps (),
8918                (__mmask8) __U);
8919}
8920
8921static __inline __m128i __DEFAULT_FN_ATTRS
8922_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
8923{
8924  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8925                                                  (__v8hi) __W,
8926                                                  (__mmask8) __U);
8927}
8928
8929static __inline __m128i __DEFAULT_FN_ATTRS
8930_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
8931{
8932  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
8933                                                  (__v8hi) _mm_setzero_si128 (),
8934                                                  (__mmask8) __U);
8935}
8936
8937#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8938  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8939                                         (__v8hi)(__m128i)(W), \
8940                                         (__mmask8)(U)); })
8941
8942#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8943  (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8944                                         (__v8hi)_mm_setzero_si128(), \
8945                                         (__mmask8)(U)); })
8946
8947static __inline __m128i __DEFAULT_FN_ATTRS
8948_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
8949{
8950  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8951                                                      (__v8hi) __W,
8952                                                      (__mmask8) __U);
8953}
8954
8955static __inline __m128i __DEFAULT_FN_ATTRS
8956_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
8957{
8958  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
8959                                                      (__v8hi) _mm_setzero_si128(),
8960                                                      (__mmask8) __U);
8961}
8962#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8963  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8964                                            (__v8hi)(__m128i)(W), \
8965                                            (__mmask8)(U)); })
8966
8967#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8968  (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8969                                            (__v8hi)_mm_setzero_si128(), \
8970                                            (__mmask8)(U)); })
8971
8972
8973#undef __DEFAULT_FN_ATTRS
8974
8975#endif /* __AVX512VLINTRIN_H */
8976