1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
33
34static __inline__ __m256i __DEFAULT_FN_ATTRS
35_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36  return (__m256i) ((__v4du) __A * (__v4du) __B);
37}
38
39static __inline__ __m256i __DEFAULT_FN_ATTRS
40_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
42                                             (__v4di)_mm256_mullo_epi64(__A, __B),
43                                             (__v4di)__W);
44}
45
46static __inline__ __m256i __DEFAULT_FN_ATTRS
47_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
48  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
49                                             (__v4di)_mm256_mullo_epi64(__A, __B),
50                                             (__v4di)_mm256_setzero_si256());
51}
52
53static __inline__ __m128i __DEFAULT_FN_ATTRS
54_mm_mullo_epi64 (__m128i __A, __m128i __B) {
55  return (__m128i) ((__v2du) __A * (__v2du) __B);
56}
57
58static __inline__ __m128i __DEFAULT_FN_ATTRS
59_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
60  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
61                                             (__v2di)_mm_mullo_epi64(__A, __B),
62                                             (__v2di)__W);
63}
64
65static __inline__ __m128i __DEFAULT_FN_ATTRS
66_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
67  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
68                                             (__v2di)_mm_mullo_epi64(__A, __B),
69                                             (__v2di)_mm_setzero_si128());
70}
71
72static __inline__ __m256d __DEFAULT_FN_ATTRS
73_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
74  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
75                                              (__v4df)_mm256_andnot_pd(__A, __B),
76                                              (__v4df)__W);
77}
78
79static __inline__ __m256d __DEFAULT_FN_ATTRS
80_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
81  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
82                                              (__v4df)_mm256_andnot_pd(__A, __B),
83                                              (__v4df)_mm256_setzero_pd());
84}
85
86static __inline__ __m128d __DEFAULT_FN_ATTRS
87_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
88  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
89                                              (__v2df)_mm_andnot_pd(__A, __B),
90                                              (__v2df)__W);
91}
92
93static __inline__ __m128d __DEFAULT_FN_ATTRS
94_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
95  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
96                                              (__v2df)_mm_andnot_pd(__A, __B),
97                                              (__v2df)_mm_setzero_pd());
98}
99
100static __inline__ __m256 __DEFAULT_FN_ATTRS
101_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
102  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
103                                             (__v8sf)_mm256_andnot_ps(__A, __B),
104                                             (__v8sf)__W);
105}
106
107static __inline__ __m256 __DEFAULT_FN_ATTRS
108_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
109  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
110                                             (__v8sf)_mm256_andnot_ps(__A, __B),
111                                             (__v8sf)_mm256_setzero_ps());
112}
113
114static __inline__ __m128 __DEFAULT_FN_ATTRS
115_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
116  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
117                                             (__v4sf)_mm_andnot_ps(__A, __B),
118                                             (__v4sf)__W);
119}
120
121static __inline__ __m128 __DEFAULT_FN_ATTRS
122_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
123  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
124                                             (__v4sf)_mm_andnot_ps(__A, __B),
125                                             (__v4sf)_mm_setzero_ps());
126}
127
128static __inline__ __m256d __DEFAULT_FN_ATTRS
129_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
130  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
131                                              (__v4df)_mm256_and_pd(__A, __B),
132                                              (__v4df)__W);
133}
134
135static __inline__ __m256d __DEFAULT_FN_ATTRS
136_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
137  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
138                                              (__v4df)_mm256_and_pd(__A, __B),
139                                              (__v4df)_mm256_setzero_pd());
140}
141
142static __inline__ __m128d __DEFAULT_FN_ATTRS
143_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
144  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
145                                              (__v2df)_mm_and_pd(__A, __B),
146                                              (__v2df)__W);
147}
148
149static __inline__ __m128d __DEFAULT_FN_ATTRS
150_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
151  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
152                                              (__v2df)_mm_and_pd(__A, __B),
153                                              (__v2df)_mm_setzero_pd());
154}
155
156static __inline__ __m256 __DEFAULT_FN_ATTRS
157_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
158  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
159                                             (__v8sf)_mm256_and_ps(__A, __B),
160                                             (__v8sf)__W);
161}
162
163static __inline__ __m256 __DEFAULT_FN_ATTRS
164_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
165  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
166                                             (__v8sf)_mm256_and_ps(__A, __B),
167                                             (__v8sf)_mm256_setzero_ps());
168}
169
170static __inline__ __m128 __DEFAULT_FN_ATTRS
171_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
172  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
173                                             (__v4sf)_mm_and_ps(__A, __B),
174                                             (__v4sf)__W);
175}
176
177static __inline__ __m128 __DEFAULT_FN_ATTRS
178_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
179  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
180                                             (__v4sf)_mm_and_ps(__A, __B),
181                                             (__v4sf)_mm_setzero_ps());
182}
183
184static __inline__ __m256d __DEFAULT_FN_ATTRS
185_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
186  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
187                                              (__v4df)_mm256_xor_pd(__A, __B),
188                                              (__v4df)__W);
189}
190
191static __inline__ __m256d __DEFAULT_FN_ATTRS
192_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
193  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
194                                              (__v4df)_mm256_xor_pd(__A, __B),
195                                              (__v4df)_mm256_setzero_pd());
196}
197
198static __inline__ __m128d __DEFAULT_FN_ATTRS
199_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
200  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
201                                              (__v2df)_mm_xor_pd(__A, __B),
202                                              (__v2df)__W);
203}
204
205static __inline__ __m128d __DEFAULT_FN_ATTRS
206_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
207  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
208                                              (__v2df)_mm_xor_pd(__A, __B),
209                                              (__v2df)_mm_setzero_pd());
210}
211
212static __inline__ __m256 __DEFAULT_FN_ATTRS
213_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
214  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
215                                             (__v8sf)_mm256_xor_ps(__A, __B),
216                                             (__v8sf)__W);
217}
218
219static __inline__ __m256 __DEFAULT_FN_ATTRS
220_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
221  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
222                                             (__v8sf)_mm256_xor_ps(__A, __B),
223                                             (__v8sf)_mm256_setzero_ps());
224}
225
226static __inline__ __m128 __DEFAULT_FN_ATTRS
227_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
228  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
229                                             (__v4sf)_mm_xor_ps(__A, __B),
230                                             (__v4sf)__W);
231}
232
233static __inline__ __m128 __DEFAULT_FN_ATTRS
234_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
235  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
236                                             (__v4sf)_mm_xor_ps(__A, __B),
237                                             (__v4sf)_mm_setzero_ps());
238}
239
240static __inline__ __m256d __DEFAULT_FN_ATTRS
241_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
242  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
243                                              (__v4df)_mm256_or_pd(__A, __B),
244                                              (__v4df)__W);
245}
246
247static __inline__ __m256d __DEFAULT_FN_ATTRS
248_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
249  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
250                                              (__v4df)_mm256_or_pd(__A, __B),
251                                              (__v4df)_mm256_setzero_pd());
252}
253
254static __inline__ __m128d __DEFAULT_FN_ATTRS
255_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
256  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
257                                              (__v2df)_mm_or_pd(__A, __B),
258                                              (__v2df)__W);
259}
260
261static __inline__ __m128d __DEFAULT_FN_ATTRS
262_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
263  return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
264                                              (__v2df)_mm_or_pd(__A, __B),
265                                              (__v2df)_mm_setzero_pd());
266}
267
268static __inline__ __m256 __DEFAULT_FN_ATTRS
269_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
270  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
271                                             (__v8sf)_mm256_or_ps(__A, __B),
272                                             (__v8sf)__W);
273}
274
275static __inline__ __m256 __DEFAULT_FN_ATTRS
276_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
277  return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
278                                             (__v8sf)_mm256_or_ps(__A, __B),
279                                             (__v8sf)_mm256_setzero_ps());
280}
281
282static __inline__ __m128 __DEFAULT_FN_ATTRS
283_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
284  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
285                                             (__v4sf)_mm_or_ps(__A, __B),
286                                             (__v4sf)__W);
287}
288
289static __inline__ __m128 __DEFAULT_FN_ATTRS
290_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
291  return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
292                                             (__v4sf)_mm_or_ps(__A, __B),
293                                             (__v4sf)_mm_setzero_ps());
294}
295
296static __inline__ __m128i __DEFAULT_FN_ATTRS
297_mm_cvtpd_epi64 (__m128d __A) {
298  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
299                (__v2di) _mm_setzero_si128(),
300                (__mmask8) -1);
301}
302
303static __inline__ __m128i __DEFAULT_FN_ATTRS
304_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
305  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
306                (__v2di) __W,
307                (__mmask8) __U);
308}
309
310static __inline__ __m128i __DEFAULT_FN_ATTRS
311_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
312  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
313                (__v2di) _mm_setzero_si128(),
314                (__mmask8) __U);
315}
316
317static __inline__ __m256i __DEFAULT_FN_ATTRS
318_mm256_cvtpd_epi64 (__m256d __A) {
319  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
320                (__v4di) _mm256_setzero_si256(),
321                (__mmask8) -1);
322}
323
324static __inline__ __m256i __DEFAULT_FN_ATTRS
325_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
326  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
327                (__v4di) __W,
328                (__mmask8) __U);
329}
330
331static __inline__ __m256i __DEFAULT_FN_ATTRS
332_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
333  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
334                (__v4di) _mm256_setzero_si256(),
335                (__mmask8) __U);
336}
337
338static __inline__ __m128i __DEFAULT_FN_ATTRS
339_mm_cvtpd_epu64 (__m128d __A) {
340  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
341                (__v2di) _mm_setzero_si128(),
342                (__mmask8) -1);
343}
344
345static __inline__ __m128i __DEFAULT_FN_ATTRS
346_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
347  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
348                (__v2di) __W,
349                (__mmask8) __U);
350}
351
352static __inline__ __m128i __DEFAULT_FN_ATTRS
353_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
354  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
355                (__v2di) _mm_setzero_si128(),
356                (__mmask8) __U);
357}
358
359static __inline__ __m256i __DEFAULT_FN_ATTRS
360_mm256_cvtpd_epu64 (__m256d __A) {
361  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
362                (__v4di) _mm256_setzero_si256(),
363                (__mmask8) -1);
364}
365
366static __inline__ __m256i __DEFAULT_FN_ATTRS
367_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
368  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
369                (__v4di) __W,
370                (__mmask8) __U);
371}
372
373static __inline__ __m256i __DEFAULT_FN_ATTRS
374_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
375  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
376                (__v4di) _mm256_setzero_si256(),
377                (__mmask8) __U);
378}
379
380static __inline__ __m128i __DEFAULT_FN_ATTRS
381_mm_cvtps_epi64 (__m128 __A) {
382  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
383                (__v2di) _mm_setzero_si128(),
384                (__mmask8) -1);
385}
386
387static __inline__ __m128i __DEFAULT_FN_ATTRS
388_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
389  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
390                (__v2di) __W,
391                (__mmask8) __U);
392}
393
394static __inline__ __m128i __DEFAULT_FN_ATTRS
395_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
396  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
397                (__v2di) _mm_setzero_si128(),
398                (__mmask8) __U);
399}
400
401static __inline__ __m256i __DEFAULT_FN_ATTRS
402_mm256_cvtps_epi64 (__m128 __A) {
403  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
404                (__v4di) _mm256_setzero_si256(),
405                (__mmask8) -1);
406}
407
408static __inline__ __m256i __DEFAULT_FN_ATTRS
409_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
410  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
411                (__v4di) __W,
412                (__mmask8) __U);
413}
414
415static __inline__ __m256i __DEFAULT_FN_ATTRS
416_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
417  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
418                (__v4di) _mm256_setzero_si256(),
419                (__mmask8) __U);
420}
421
422static __inline__ __m128i __DEFAULT_FN_ATTRS
423_mm_cvtps_epu64 (__m128 __A) {
424  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
425                (__v2di) _mm_setzero_si128(),
426                (__mmask8) -1);
427}
428
429static __inline__ __m128i __DEFAULT_FN_ATTRS
430_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
431  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
432                (__v2di) __W,
433                (__mmask8) __U);
434}
435
436static __inline__ __m128i __DEFAULT_FN_ATTRS
437_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
438  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
439                (__v2di) _mm_setzero_si128(),
440                (__mmask8) __U);
441}
442
443static __inline__ __m256i __DEFAULT_FN_ATTRS
444_mm256_cvtps_epu64 (__m128 __A) {
445  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
446                (__v4di) _mm256_setzero_si256(),
447                (__mmask8) -1);
448}
449
450static __inline__ __m256i __DEFAULT_FN_ATTRS
451_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
452  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
453                (__v4di) __W,
454                (__mmask8) __U);
455}
456
457static __inline__ __m256i __DEFAULT_FN_ATTRS
458_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
459  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
460                (__v4di) _mm256_setzero_si256(),
461                (__mmask8) __U);
462}
463
464static __inline__ __m128d __DEFAULT_FN_ATTRS
465_mm_cvtepi64_pd (__m128i __A) {
466  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
467                (__v2df) _mm_setzero_pd(),
468                (__mmask8) -1);
469}
470
471static __inline__ __m128d __DEFAULT_FN_ATTRS
472_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
473  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
474                (__v2df) __W,
475                (__mmask8) __U);
476}
477
478static __inline__ __m128d __DEFAULT_FN_ATTRS
479_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
480  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
481                (__v2df) _mm_setzero_pd(),
482                (__mmask8) __U);
483}
484
485static __inline__ __m256d __DEFAULT_FN_ATTRS
486_mm256_cvtepi64_pd (__m256i __A) {
487  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
488                (__v4df) _mm256_setzero_pd(),
489                (__mmask8) -1);
490}
491
492static __inline__ __m256d __DEFAULT_FN_ATTRS
493_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
494  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
495                (__v4df) __W,
496                (__mmask8) __U);
497}
498
499static __inline__ __m256d __DEFAULT_FN_ATTRS
500_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
501  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
502                (__v4df) _mm256_setzero_pd(),
503                (__mmask8) __U);
504}
505
506static __inline__ __m128 __DEFAULT_FN_ATTRS
507_mm_cvtepi64_ps (__m128i __A) {
508  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
509                (__v4sf) _mm_setzero_ps(),
510                (__mmask8) -1);
511}
512
513static __inline__ __m128 __DEFAULT_FN_ATTRS
514_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
515  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
516                (__v4sf) __W,
517                (__mmask8) __U);
518}
519
520static __inline__ __m128 __DEFAULT_FN_ATTRS
521_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
522  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
523                (__v4sf) _mm_setzero_ps(),
524                (__mmask8) __U);
525}
526
527static __inline__ __m128 __DEFAULT_FN_ATTRS
528_mm256_cvtepi64_ps (__m256i __A) {
529  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
530                (__v4sf) _mm_setzero_ps(),
531                (__mmask8) -1);
532}
533
534static __inline__ __m128 __DEFAULT_FN_ATTRS
535_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
536  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
537                (__v4sf) __W,
538                (__mmask8) __U);
539}
540
541static __inline__ __m128 __DEFAULT_FN_ATTRS
542_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
543  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
544                (__v4sf) _mm_setzero_ps(),
545                (__mmask8) __U);
546}
547
548static __inline__ __m128i __DEFAULT_FN_ATTRS
549_mm_cvttpd_epi64 (__m128d __A) {
550  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
551                (__v2di) _mm_setzero_si128(),
552                (__mmask8) -1);
553}
554
555static __inline__ __m128i __DEFAULT_FN_ATTRS
556_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
557  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
558                (__v2di) __W,
559                (__mmask8) __U);
560}
561
562static __inline__ __m128i __DEFAULT_FN_ATTRS
563_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
564  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
565                (__v2di) _mm_setzero_si128(),
566                (__mmask8) __U);
567}
568
569static __inline__ __m256i __DEFAULT_FN_ATTRS
570_mm256_cvttpd_epi64 (__m256d __A) {
571  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
572                (__v4di) _mm256_setzero_si256(),
573                (__mmask8) -1);
574}
575
576static __inline__ __m256i __DEFAULT_FN_ATTRS
577_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
578  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
579                (__v4di) __W,
580                (__mmask8) __U);
581}
582
583static __inline__ __m256i __DEFAULT_FN_ATTRS
584_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
585  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
586                (__v4di) _mm256_setzero_si256(),
587                (__mmask8) __U);
588}
589
590static __inline__ __m128i __DEFAULT_FN_ATTRS
591_mm_cvttpd_epu64 (__m128d __A) {
592  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
593                (__v2di) _mm_setzero_si128(),
594                (__mmask8) -1);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS
598_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
599  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
600                (__v2di) __W,
601                (__mmask8) __U);
602}
603
604static __inline__ __m128i __DEFAULT_FN_ATTRS
605_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
606  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
607                (__v2di) _mm_setzero_si128(),
608                (__mmask8) __U);
609}
610
611static __inline__ __m256i __DEFAULT_FN_ATTRS
612_mm256_cvttpd_epu64 (__m256d __A) {
613  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
614                (__v4di) _mm256_setzero_si256(),
615                (__mmask8) -1);
616}
617
618static __inline__ __m256i __DEFAULT_FN_ATTRS
619_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
620  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
621                (__v4di) __W,
622                (__mmask8) __U);
623}
624
625static __inline__ __m256i __DEFAULT_FN_ATTRS
626_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
627  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
628                (__v4di) _mm256_setzero_si256(),
629                (__mmask8) __U);
630}
631
632static __inline__ __m128i __DEFAULT_FN_ATTRS
633_mm_cvttps_epi64 (__m128 __A) {
634  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
635                (__v2di) _mm_setzero_si128(),
636                (__mmask8) -1);
637}
638
639static __inline__ __m128i __DEFAULT_FN_ATTRS
640_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
641  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
642                (__v2di) __W,
643                (__mmask8) __U);
644}
645
646static __inline__ __m128i __DEFAULT_FN_ATTRS
647_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
648  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
649                (__v2di) _mm_setzero_si128(),
650                (__mmask8) __U);
651}
652
653static __inline__ __m256i __DEFAULT_FN_ATTRS
654_mm256_cvttps_epi64 (__m128 __A) {
655  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
656                (__v4di) _mm256_setzero_si256(),
657                (__mmask8) -1);
658}
659
660static __inline__ __m256i __DEFAULT_FN_ATTRS
661_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
662  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
663                (__v4di) __W,
664                (__mmask8) __U);
665}
666
667static __inline__ __m256i __DEFAULT_FN_ATTRS
668_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
669  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
670                (__v4di) _mm256_setzero_si256(),
671                (__mmask8) __U);
672}
673
674static __inline__ __m128i __DEFAULT_FN_ATTRS
675_mm_cvttps_epu64 (__m128 __A) {
676  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
677                (__v2di) _mm_setzero_si128(),
678                (__mmask8) -1);
679}
680
681static __inline__ __m128i __DEFAULT_FN_ATTRS
682_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
683  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
684                (__v2di) __W,
685                (__mmask8) __U);
686}
687
688static __inline__ __m128i __DEFAULT_FN_ATTRS
689_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
690  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
691                (__v2di) _mm_setzero_si128(),
692                (__mmask8) __U);
693}
694
695static __inline__ __m256i __DEFAULT_FN_ATTRS
696_mm256_cvttps_epu64 (__m128 __A) {
697  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
698                (__v4di) _mm256_setzero_si256(),
699                (__mmask8) -1);
700}
701
702static __inline__ __m256i __DEFAULT_FN_ATTRS
703_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
704  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
705                (__v4di) __W,
706                (__mmask8) __U);
707}
708
709static __inline__ __m256i __DEFAULT_FN_ATTRS
710_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
711  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
712                (__v4di) _mm256_setzero_si256(),
713                (__mmask8) __U);
714}
715
716static __inline__ __m128d __DEFAULT_FN_ATTRS
717_mm_cvtepu64_pd (__m128i __A) {
718  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
719                (__v2df) _mm_setzero_pd(),
720                (__mmask8) -1);
721}
722
723static __inline__ __m128d __DEFAULT_FN_ATTRS
724_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
725  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
726                (__v2df) __W,
727                (__mmask8) __U);
728}
729
730static __inline__ __m128d __DEFAULT_FN_ATTRS
731_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
732  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
733                (__v2df) _mm_setzero_pd(),
734                (__mmask8) __U);
735}
736
737static __inline__ __m256d __DEFAULT_FN_ATTRS
738_mm256_cvtepu64_pd (__m256i __A) {
739  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
740                (__v4df) _mm256_setzero_pd(),
741                (__mmask8) -1);
742}
743
744static __inline__ __m256d __DEFAULT_FN_ATTRS
745_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
746  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
747                (__v4df) __W,
748                (__mmask8) __U);
749}
750
751static __inline__ __m256d __DEFAULT_FN_ATTRS
752_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
753  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
754                (__v4df) _mm256_setzero_pd(),
755                (__mmask8) __U);
756}
757
758static __inline__ __m128 __DEFAULT_FN_ATTRS
759_mm_cvtepu64_ps (__m128i __A) {
760  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
761                (__v4sf) _mm_setzero_ps(),
762                (__mmask8) -1);
763}
764
765static __inline__ __m128 __DEFAULT_FN_ATTRS
766_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
767  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
768                (__v4sf) __W,
769                (__mmask8) __U);
770}
771
772static __inline__ __m128 __DEFAULT_FN_ATTRS
773_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
774  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
775                (__v4sf) _mm_setzero_ps(),
776                (__mmask8) __U);
777}
778
779static __inline__ __m128 __DEFAULT_FN_ATTRS
780_mm256_cvtepu64_ps (__m256i __A) {
781  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
782                (__v4sf) _mm_setzero_ps(),
783                (__mmask8) -1);
784}
785
786static __inline__ __m128 __DEFAULT_FN_ATTRS
787_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
788  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
789                (__v4sf) __W,
790                (__mmask8) __U);
791}
792
793static __inline__ __m128 __DEFAULT_FN_ATTRS
794_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
795  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
796                (__v4sf) _mm_setzero_ps(),
797                (__mmask8) __U);
798}
799
800#define _mm_range_pd(A, B, C) __extension__ ({                         \
801  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
802                                          (__v2df)(__m128d)(B), (int)(C), \
803                                          (__v2df)_mm_setzero_pd(), \
804                                          (__mmask8)-1); })
805
806#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({          \
807  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
808                                          (__v2df)(__m128d)(B), (int)(C), \
809                                          (__v2df)(__m128d)(W), \
810                                          (__mmask8)(U)); })
811
812#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({              \
813  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
814                                          (__v2df)(__m128d)(B), (int)(C), \
815                                          (__v2df)_mm_setzero_pd(), \
816                                          (__mmask8)(U)); })
817
818#define _mm256_range_pd(A, B, C) __extension__ ({                      \
819  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
820                                          (__v4df)(__m256d)(B), (int)(C), \
821                                          (__v4df)_mm256_setzero_pd(), \
822                                          (__mmask8)-1); })
823
824#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({       \
825  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
826                                          (__v4df)(__m256d)(B), (int)(C), \
827                                          (__v4df)(__m256d)(W), \
828                                          (__mmask8)(U)); })
829
830#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({           \
831  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
832                                          (__v4df)(__m256d)(B), (int)(C), \
833                                          (__v4df)_mm256_setzero_pd(), \
834                                          (__mmask8)(U)); })
835
836#define _mm_range_ps(A, B, C) __extension__ ({                         \
837  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
838                                         (__v4sf)(__m128)(B), (int)(C), \
839                                         (__v4sf)_mm_setzero_ps(), \
840                                         (__mmask8)-1); })
841
842#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({          \
843  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
844                                         (__v4sf)(__m128)(B), (int)(C), \
845                                         (__v4sf)(__m128)(W), (__mmask8)(U)); })
846
847#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({              \
848  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
849                                         (__v4sf)(__m128)(B), (int)(C), \
850                                         (__v4sf)_mm_setzero_ps(), \
851                                         (__mmask8)(U)); })
852
853#define _mm256_range_ps(A, B, C) __extension__ ({                      \
854  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
855                                         (__v8sf)(__m256)(B), (int)(C), \
856                                         (__v8sf)_mm256_setzero_ps(), \
857                                         (__mmask8)-1); })
858
859#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({       \
860  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
861                                         (__v8sf)(__m256)(B), (int)(C), \
862                                         (__v8sf)(__m256)(W), (__mmask8)(U)); })
863
864#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({           \
865  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
866                                         (__v8sf)(__m256)(B), (int)(C), \
867                                         (__v8sf)_mm256_setzero_ps(), \
868                                         (__mmask8)(U)); })
869
870#define _mm_reduce_pd(A, B) __extension__ ({                \
871  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
872                                           (__v2df)_mm_setzero_pd(), \
873                                           (__mmask8)-1); })
874
875#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
876  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
877                                           (__v2df)(__m128d)(W), \
878                                           (__mmask8)(U)); })
879
880#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({     \
881  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
882                                           (__v2df)_mm_setzero_pd(), \
883                                           (__mmask8)(U)); })
884
885#define _mm256_reduce_pd(A, B) __extension__ ({                \
886  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
887                                           (__v4df)_mm256_setzero_pd(), \
888                                           (__mmask8)-1); })
889
890#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
891  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
892                                           (__v4df)(__m256d)(W), \
893                                           (__mmask8)(U)); })
894
895#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({     \
896  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
897                                           (__v4df)_mm256_setzero_pd(), \
898                                           (__mmask8)(U)); })
899
900#define _mm_reduce_ps(A, B) __extension__ ({                   \
901  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
902                                          (__v4sf)_mm_setzero_ps(), \
903                                          (__mmask8)-1); })
904
905#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({    \
906  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
907                                          (__v4sf)(__m128)(W), \
908                                          (__mmask8)(U)); })
909
910#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({        \
911  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
912                                          (__v4sf)_mm_setzero_ps(), \
913                                          (__mmask8)(U)); })
914
915#define _mm256_reduce_ps(A, B) __extension__ ({                \
916  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
917                                          (__v8sf)_mm256_setzero_ps(), \
918                                          (__mmask8)-1); })
919
920#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
921  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
922                                          (__v8sf)(__m256)(W), \
923                                          (__mmask8)(U)); })
924
925#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({     \
926  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
927                                          (__v8sf)_mm256_setzero_ps(), \
928                                          (__mmask8)(U)); })
929
930static __inline__ __mmask8 __DEFAULT_FN_ATTRS
931_mm_movepi32_mask (__m128i __A)
932{
933  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
934}
935
936static __inline__ __mmask8 __DEFAULT_FN_ATTRS
937_mm256_movepi32_mask (__m256i __A)
938{
939  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
940}
941
942static __inline__ __m128i __DEFAULT_FN_ATTRS
943_mm_movm_epi32 (__mmask8 __A)
944{
945  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
946}
947
948static __inline__ __m256i __DEFAULT_FN_ATTRS
949_mm256_movm_epi32 (__mmask8 __A)
950{
951  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
952}
953
954static __inline__ __m128i __DEFAULT_FN_ATTRS
955_mm_movm_epi64 (__mmask8 __A)
956{
957  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
958}
959
960static __inline__ __m256i __DEFAULT_FN_ATTRS
961_mm256_movm_epi64 (__mmask8 __A)
962{
963  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
964}
965
966static __inline__ __mmask8 __DEFAULT_FN_ATTRS
967_mm_movepi64_mask (__m128i __A)
968{
969  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
970}
971
972static __inline__ __mmask8 __DEFAULT_FN_ATTRS
973_mm256_movepi64_mask (__m256i __A)
974{
975  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
976}
977
978static __inline__ __m256 __DEFAULT_FN_ATTRS
979_mm256_broadcast_f32x2 (__m128 __A)
980{
981  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
982                (__v8sf)_mm256_undefined_ps(),
983                (__mmask8) -1);
984}
985
986static __inline__ __m256 __DEFAULT_FN_ATTRS
987_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
988{
989  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
990                (__v8sf) __O,
991                __M);
992}
993
994static __inline__ __m256 __DEFAULT_FN_ATTRS
995_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
996{
997  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
998                (__v8sf) _mm256_setzero_ps (),
999                __M);
1000}
1001
1002static __inline__ __m256d __DEFAULT_FN_ATTRS
1003_mm256_broadcast_f64x2(__m128d __A)
1004{
1005  return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1006                                          0, 1, 0, 1);
1007}
1008
1009static __inline__ __m256d __DEFAULT_FN_ATTRS
1010_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
1011{
1012  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1013                                            (__v4df)_mm256_broadcast_f64x2(__A),
1014                                            (__v4df)__O);
1015}
1016
1017static __inline__ __m256d __DEFAULT_FN_ATTRS
1018_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1019{
1020  return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
1021                                            (__v4df)_mm256_broadcast_f64x2(__A),
1022                                            (__v4df)_mm256_setzero_pd());
1023}
1024
1025static __inline__ __m128i __DEFAULT_FN_ATTRS
1026_mm_broadcast_i32x2 (__m128i __A)
1027{
1028  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1029                 (__v4si)_mm_undefined_si128(),
1030                 (__mmask8) -1);
1031}
1032
1033static __inline__ __m128i __DEFAULT_FN_ATTRS
1034_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1035{
1036  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1037                 (__v4si) __O,
1038                 __M);
1039}
1040
1041static __inline__ __m128i __DEFAULT_FN_ATTRS
1042_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1043{
1044  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1045                 (__v4si) _mm_setzero_si128 (),
1046                 __M);
1047}
1048
1049static __inline__ __m256i __DEFAULT_FN_ATTRS
1050_mm256_broadcast_i32x2 (__m128i __A)
1051{
1052  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1053                 (__v8si)_mm256_undefined_si256(),
1054                 (__mmask8) -1);
1055}
1056
1057static __inline__ __m256i __DEFAULT_FN_ATTRS
1058_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1059{
1060  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1061                 (__v8si) __O,
1062                 __M);
1063}
1064
1065static __inline__ __m256i __DEFAULT_FN_ATTRS
1066_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1067{
1068  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1069                 (__v8si) _mm256_setzero_si256 (),
1070                 __M);
1071}
1072
1073static __inline__ __m256i __DEFAULT_FN_ATTRS
1074_mm256_broadcast_i64x2(__m128i __A)
1075{
1076  return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1077                                          0, 1, 0, 1);
1078}
1079
1080static __inline__ __m256i __DEFAULT_FN_ATTRS
1081_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
1082{
1083  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1084                                            (__v4di)_mm256_broadcast_i64x2(__A),
1085                                            (__v4di)__O);
1086}
1087
1088static __inline__ __m256i __DEFAULT_FN_ATTRS
1089_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1090{
1091  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
1092                                            (__v4di)_mm256_broadcast_i64x2(__A),
1093                                            (__v4di)_mm256_setzero_si256());
1094}
1095
1096#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
1097  (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A),           \
1098                                   (__v4df)_mm256_undefined_pd(), \
1099                                   ((imm) & 1) ? 2 : 0,           \
1100                                   ((imm) & 1) ? 3 : 1); })
1101
1102#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1103  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1104                                   (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
1105                                   (__v2df)(W)); })
1106
1107#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1108  (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1109                                   (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
1110                                   (__v2df)_mm_setzero_pd()); })
1111
1112#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
1113  (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A),             \
1114                                   (__v4di)_mm256_undefined_si256(), \
1115                                   ((imm) & 1) ? 2 : 0,              \
1116                                   ((imm) & 1) ? 3 : 1); })
1117
1118#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1119  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
1120                                (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
1121                                (__v2di)(W)); })
1122
1123#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1124  (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
1125                                (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
1126                                (__v2di)_mm_setzero_di()); })
1127
1128#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
1129  (__m256d)__builtin_shufflevector((__v4df)(A), \
1130                                 (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \
1131                                 ((imm) & 0x1) ? 0 : 4, \
1132                                 ((imm) & 0x1) ? 1 : 5, \
1133                                 ((imm) & 0x1) ? 4 : 2, \
1134                                 ((imm) & 0x1) ? 5 : 3); })
1135
1136#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1137  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1138                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1139                                  (__v4df)(W)); })
1140
1141#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1142  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1143                                  (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1144                                  (__v4df)_mm256_setzero_pd()); })
1145
1146#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
1147  (__m256i)__builtin_shufflevector((__v4di)(A), \
1148                                 (__v4di)_mm256_castsi128_si256((__m128i)(B)), \
1149                                 ((imm) & 0x1) ? 0 : 4, \
1150                                 ((imm) & 0x1) ? 1 : 5, \
1151                                 ((imm) & 0x1) ? 4 : 2, \
1152                                 ((imm) & 0x1) ? 5 : 3); })
1153
1154#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1155  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1156                                  (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1157                                  (__v4di)(W)); })
1158
1159#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1160  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1161                                  (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1162                                  (__v4di)_mm256_setzero_si256()); })
1163
1164#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1165  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1166                                             (__mmask8)(U)); })
1167
1168#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
1169  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1170                                             (__mmask8)-1); })
1171
1172#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1173  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1174                                             (__mmask8)(U)); })
1175
1176#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
1177  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1178                                             (__mmask8)-1); })
1179
1180#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1181  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1182                                             (__mmask8)(U)); })
1183
1184#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
1185  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1186                                             (__mmask8)-1); })
1187
1188#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1189  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1190                                             (__mmask8)(U)); })
1191
1192#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
1193  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1194                                             (__mmask8)-1); })
1195
1196#undef __DEFAULT_FN_ATTRS
1197
1198#endif
1199