1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
33
34static __inline__ __m256i __DEFAULT_FN_ATTRS
35_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36  return (__m256i) ((__v4du) __A * (__v4du) __B);
37}
38
39static __inline__ __m256i __DEFAULT_FN_ATTRS
40_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
42              (__v4di) __B,
43              (__v4di) __W,
44              (__mmask8) __U);
45}
46
47static __inline__ __m256i __DEFAULT_FN_ATTRS
48_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
49  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
50              (__v4di) __B,
51              (__v4di)
52              _mm256_setzero_si256 (),
53              (__mmask8) __U);
54}
55
56static __inline__ __m128i __DEFAULT_FN_ATTRS
57_mm_mullo_epi64 (__m128i __A, __m128i __B) {
58  return (__m128i) ((__v2du) __A * (__v2du) __B);
59}
60
61static __inline__ __m128i __DEFAULT_FN_ATTRS
62_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
63  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
64              (__v2di) __B,
65              (__v2di) __W,
66              (__mmask8) __U);
67}
68
69static __inline__ __m128i __DEFAULT_FN_ATTRS
70_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
71  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
72              (__v2di) __B,
73              (__v2di)
74              _mm_setzero_si128 (),
75              (__mmask8) __U);
76}
77
78static __inline__ __m256d __DEFAULT_FN_ATTRS
79_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
80  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
81              (__v4df) __B,
82              (__v4df) __W,
83              (__mmask8) __U);
84}
85
86static __inline__ __m256d __DEFAULT_FN_ATTRS
87_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
88  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
89              (__v4df) __B,
90              (__v4df)
91              _mm256_setzero_pd (),
92              (__mmask8) __U);
93}
94
95static __inline__ __m128d __DEFAULT_FN_ATTRS
96_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
97  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
98              (__v2df) __B,
99              (__v2df) __W,
100              (__mmask8) __U);
101}
102
103static __inline__ __m128d __DEFAULT_FN_ATTRS
104_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
105  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
106              (__v2df) __B,
107              (__v2df)
108              _mm_setzero_pd (),
109              (__mmask8) __U);
110}
111
112static __inline__ __m256 __DEFAULT_FN_ATTRS
113_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
114  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
115             (__v8sf) __B,
116             (__v8sf) __W,
117             (__mmask8) __U);
118}
119
120static __inline__ __m256 __DEFAULT_FN_ATTRS
121_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
122  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
123             (__v8sf) __B,
124             (__v8sf)
125             _mm256_setzero_ps (),
126             (__mmask8) __U);
127}
128
129static __inline__ __m128 __DEFAULT_FN_ATTRS
130_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
131  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
132             (__v4sf) __B,
133             (__v4sf) __W,
134             (__mmask8) __U);
135}
136
137static __inline__ __m128 __DEFAULT_FN_ATTRS
138_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
139  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
140             (__v4sf) __B,
141             (__v4sf)
142             _mm_setzero_ps (),
143             (__mmask8) __U);
144}
145
146static __inline__ __m256d __DEFAULT_FN_ATTRS
147_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
148  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
149             (__v4df) __B,
150             (__v4df) __W,
151             (__mmask8) __U);
152}
153
154static __inline__ __m256d __DEFAULT_FN_ATTRS
155_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
156  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
157             (__v4df) __B,
158             (__v4df)
159             _mm256_setzero_pd (),
160             (__mmask8) __U);
161}
162
163static __inline__ __m128d __DEFAULT_FN_ATTRS
164_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
165  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
166             (__v2df) __B,
167             (__v2df) __W,
168             (__mmask8) __U);
169}
170
171static __inline__ __m128d __DEFAULT_FN_ATTRS
172_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
173  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
174             (__v2df) __B,
175             (__v2df)
176             _mm_setzero_pd (),
177             (__mmask8) __U);
178}
179
180static __inline__ __m256 __DEFAULT_FN_ATTRS
181_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
182  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
183            (__v8sf) __B,
184            (__v8sf) __W,
185            (__mmask8) __U);
186}
187
188static __inline__ __m256 __DEFAULT_FN_ATTRS
189_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
190  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
191            (__v8sf) __B,
192            (__v8sf)
193            _mm256_setzero_ps (),
194            (__mmask8) __U);
195}
196
197static __inline__ __m128 __DEFAULT_FN_ATTRS
198_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
199  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
200            (__v4sf) __B,
201            (__v4sf) __W,
202            (__mmask8) __U);
203}
204
205static __inline__ __m128 __DEFAULT_FN_ATTRS
206_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
207  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
208            (__v4sf) __B,
209            (__v4sf)
210            _mm_setzero_ps (),
211            (__mmask8) __U);
212}
213
214static __inline__ __m256d __DEFAULT_FN_ATTRS
215_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
216        __m256d __B) {
217  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
218             (__v4df) __B,
219             (__v4df) __W,
220             (__mmask8) __U);
221}
222
223static __inline__ __m256d __DEFAULT_FN_ATTRS
224_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
225  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
226             (__v4df) __B,
227             (__v4df)
228             _mm256_setzero_pd (),
229             (__mmask8) __U);
230}
231
232static __inline__ __m128d __DEFAULT_FN_ATTRS
233_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
234  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
235             (__v2df) __B,
236             (__v2df) __W,
237             (__mmask8) __U);
238}
239
240static __inline__ __m128d __DEFAULT_FN_ATTRS
241_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
242  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
243             (__v2df) __B,
244             (__v2df)
245             _mm_setzero_pd (),
246             (__mmask8) __U);
247}
248
249static __inline__ __m256 __DEFAULT_FN_ATTRS
250_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
251  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
252            (__v8sf) __B,
253            (__v8sf) __W,
254            (__mmask8) __U);
255}
256
257static __inline__ __m256 __DEFAULT_FN_ATTRS
258_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
259  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
260            (__v8sf) __B,
261            (__v8sf)
262            _mm256_setzero_ps (),
263            (__mmask8) __U);
264}
265
266static __inline__ __m128 __DEFAULT_FN_ATTRS
267_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
268  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
269            (__v4sf) __B,
270            (__v4sf) __W,
271            (__mmask8) __U);
272}
273
274static __inline__ __m128 __DEFAULT_FN_ATTRS
275_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
276  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
277            (__v4sf) __B,
278            (__v4sf)
279            _mm_setzero_ps (),
280            (__mmask8) __U);
281}
282
283static __inline__ __m256d __DEFAULT_FN_ATTRS
284_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
285  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
286            (__v4df) __B,
287            (__v4df) __W,
288            (__mmask8) __U);
289}
290
291static __inline__ __m256d __DEFAULT_FN_ATTRS
292_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
293  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
294            (__v4df) __B,
295            (__v4df)
296            _mm256_setzero_pd (),
297            (__mmask8) __U);
298}
299
300static __inline__ __m128d __DEFAULT_FN_ATTRS
301_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
302  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
303            (__v2df) __B,
304            (__v2df) __W,
305            (__mmask8) __U);
306}
307
308static __inline__ __m128d __DEFAULT_FN_ATTRS
309_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
310  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
311            (__v2df) __B,
312            (__v2df)
313            _mm_setzero_pd (),
314            (__mmask8) __U);
315}
316
317static __inline__ __m256 __DEFAULT_FN_ATTRS
318_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
319  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
320                 (__v8sf) __B,
321                 (__v8sf) __W,
322                 (__mmask8) __U);
323}
324
325static __inline__ __m256 __DEFAULT_FN_ATTRS
326_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
327  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
328                 (__v8sf) __B,
329                 (__v8sf)
330                 _mm256_setzero_ps (),
331                 (__mmask8) __U);
332}
333
334static __inline__ __m128 __DEFAULT_FN_ATTRS
335_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
336  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
337                 (__v4sf) __B,
338                 (__v4sf) __W,
339                 (__mmask8) __U);
340}
341
342static __inline__ __m128 __DEFAULT_FN_ATTRS
343_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
344  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
345                 (__v4sf) __B,
346                 (__v4sf)
347                 _mm_setzero_ps (),
348                 (__mmask8) __U);
349}
350
351static __inline__ __m128i __DEFAULT_FN_ATTRS
352_mm_cvtpd_epi64 (__m128d __A) {
353  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
354                (__v2di) _mm_setzero_si128(),
355                (__mmask8) -1);
356}
357
358static __inline__ __m128i __DEFAULT_FN_ATTRS
359_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
360  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
361                (__v2di) __W,
362                (__mmask8) __U);
363}
364
365static __inline__ __m128i __DEFAULT_FN_ATTRS
366_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
367  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
368                (__v2di) _mm_setzero_si128(),
369                (__mmask8) __U);
370}
371
372static __inline__ __m256i __DEFAULT_FN_ATTRS
373_mm256_cvtpd_epi64 (__m256d __A) {
374  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
375                (__v4di) _mm256_setzero_si256(),
376                (__mmask8) -1);
377}
378
379static __inline__ __m256i __DEFAULT_FN_ATTRS
380_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
381  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
382                (__v4di) __W,
383                (__mmask8) __U);
384}
385
386static __inline__ __m256i __DEFAULT_FN_ATTRS
387_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
388  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
389                (__v4di) _mm256_setzero_si256(),
390                (__mmask8) __U);
391}
392
393static __inline__ __m128i __DEFAULT_FN_ATTRS
394_mm_cvtpd_epu64 (__m128d __A) {
395  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
396                (__v2di) _mm_setzero_si128(),
397                (__mmask8) -1);
398}
399
400static __inline__ __m128i __DEFAULT_FN_ATTRS
401_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
402  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
403                (__v2di) __W,
404                (__mmask8) __U);
405}
406
407static __inline__ __m128i __DEFAULT_FN_ATTRS
408_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
409  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
410                (__v2di) _mm_setzero_si128(),
411                (__mmask8) __U);
412}
413
414static __inline__ __m256i __DEFAULT_FN_ATTRS
415_mm256_cvtpd_epu64 (__m256d __A) {
416  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
417                (__v4di) _mm256_setzero_si256(),
418                (__mmask8) -1);
419}
420
421static __inline__ __m256i __DEFAULT_FN_ATTRS
422_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
423  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
424                (__v4di) __W,
425                (__mmask8) __U);
426}
427
428static __inline__ __m256i __DEFAULT_FN_ATTRS
429_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
430  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
431                (__v4di) _mm256_setzero_si256(),
432                (__mmask8) __U);
433}
434
435static __inline__ __m128i __DEFAULT_FN_ATTRS
436_mm_cvtps_epi64 (__m128 __A) {
437  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
438                (__v2di) _mm_setzero_si128(),
439                (__mmask8) -1);
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS
443_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
444  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
445                (__v2di) __W,
446                (__mmask8) __U);
447}
448
449static __inline__ __m128i __DEFAULT_FN_ATTRS
450_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
451  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
452                (__v2di) _mm_setzero_si128(),
453                (__mmask8) __U);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS
457_mm256_cvtps_epi64 (__m128 __A) {
458  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
459                (__v4di) _mm256_setzero_si256(),
460                (__mmask8) -1);
461}
462
463static __inline__ __m256i __DEFAULT_FN_ATTRS
464_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
465  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
466                (__v4di) __W,
467                (__mmask8) __U);
468}
469
470static __inline__ __m256i __DEFAULT_FN_ATTRS
471_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
472  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
473                (__v4di) _mm256_setzero_si256(),
474                (__mmask8) __U);
475}
476
477static __inline__ __m128i __DEFAULT_FN_ATTRS
478_mm_cvtps_epu64 (__m128 __A) {
479  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
480                (__v2di) _mm_setzero_si128(),
481                (__mmask8) -1);
482}
483
484static __inline__ __m128i __DEFAULT_FN_ATTRS
485_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
486  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
487                (__v2di) __W,
488                (__mmask8) __U);
489}
490
491static __inline__ __m128i __DEFAULT_FN_ATTRS
492_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
493  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
494                (__v2di) _mm_setzero_si128(),
495                (__mmask8) __U);
496}
497
498static __inline__ __m256i __DEFAULT_FN_ATTRS
499_mm256_cvtps_epu64 (__m128 __A) {
500  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
501                (__v4di) _mm256_setzero_si256(),
502                (__mmask8) -1);
503}
504
505static __inline__ __m256i __DEFAULT_FN_ATTRS
506_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
507  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
508                (__v4di) __W,
509                (__mmask8) __U);
510}
511
512static __inline__ __m256i __DEFAULT_FN_ATTRS
513_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
514  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
515                (__v4di) _mm256_setzero_si256(),
516                (__mmask8) __U);
517}
518
519static __inline__ __m128d __DEFAULT_FN_ATTRS
520_mm_cvtepi64_pd (__m128i __A) {
521  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
522                (__v2df) _mm_setzero_pd(),
523                (__mmask8) -1);
524}
525
526static __inline__ __m128d __DEFAULT_FN_ATTRS
527_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
528  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
529                (__v2df) __W,
530                (__mmask8) __U);
531}
532
533static __inline__ __m128d __DEFAULT_FN_ATTRS
534_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
535  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
536                (__v2df) _mm_setzero_pd(),
537                (__mmask8) __U);
538}
539
540static __inline__ __m256d __DEFAULT_FN_ATTRS
541_mm256_cvtepi64_pd (__m256i __A) {
542  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
543                (__v4df) _mm256_setzero_pd(),
544                (__mmask8) -1);
545}
546
547static __inline__ __m256d __DEFAULT_FN_ATTRS
548_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
549  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
550                (__v4df) __W,
551                (__mmask8) __U);
552}
553
554static __inline__ __m256d __DEFAULT_FN_ATTRS
555_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
556  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
557                (__v4df) _mm256_setzero_pd(),
558                (__mmask8) __U);
559}
560
561static __inline__ __m128 __DEFAULT_FN_ATTRS
562_mm_cvtepi64_ps (__m128i __A) {
563  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
564                (__v4sf) _mm_setzero_ps(),
565                (__mmask8) -1);
566}
567
568static __inline__ __m128 __DEFAULT_FN_ATTRS
569_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
570  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
571                (__v4sf) __W,
572                (__mmask8) __U);
573}
574
575static __inline__ __m128 __DEFAULT_FN_ATTRS
576_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
577  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
578                (__v4sf) _mm_setzero_ps(),
579                (__mmask8) __U);
580}
581
582static __inline__ __m128 __DEFAULT_FN_ATTRS
583_mm256_cvtepi64_ps (__m256i __A) {
584  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
585                (__v4sf) _mm_setzero_ps(),
586                (__mmask8) -1);
587}
588
589static __inline__ __m128 __DEFAULT_FN_ATTRS
590_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
591  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
592                (__v4sf) __W,
593                (__mmask8) __U);
594}
595
596static __inline__ __m128 __DEFAULT_FN_ATTRS
597_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
598  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
599                (__v4sf) _mm_setzero_ps(),
600                (__mmask8) __U);
601}
602
603static __inline__ __m128i __DEFAULT_FN_ATTRS
604_mm_cvttpd_epi64 (__m128d __A) {
605  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
606                (__v2di) _mm_setzero_si128(),
607                (__mmask8) -1);
608}
609
610static __inline__ __m128i __DEFAULT_FN_ATTRS
611_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
612  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
613                (__v2di) __W,
614                (__mmask8) __U);
615}
616
617static __inline__ __m128i __DEFAULT_FN_ATTRS
618_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
619  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
620                (__v2di) _mm_setzero_si128(),
621                (__mmask8) __U);
622}
623
624static __inline__ __m256i __DEFAULT_FN_ATTRS
625_mm256_cvttpd_epi64 (__m256d __A) {
626  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
627                (__v4di) _mm256_setzero_si256(),
628                (__mmask8) -1);
629}
630
631static __inline__ __m256i __DEFAULT_FN_ATTRS
632_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
633  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
634                (__v4di) __W,
635                (__mmask8) __U);
636}
637
638static __inline__ __m256i __DEFAULT_FN_ATTRS
639_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
640  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
641                (__v4di) _mm256_setzero_si256(),
642                (__mmask8) __U);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS
646_mm_cvttpd_epu64 (__m128d __A) {
647  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
648                (__v2di) _mm_setzero_si128(),
649                (__mmask8) -1);
650}
651
652static __inline__ __m128i __DEFAULT_FN_ATTRS
653_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
654  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
655                (__v2di) __W,
656                (__mmask8) __U);
657}
658
659static __inline__ __m128i __DEFAULT_FN_ATTRS
660_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
661  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
662                (__v2di) _mm_setzero_si128(),
663                (__mmask8) __U);
664}
665
666static __inline__ __m256i __DEFAULT_FN_ATTRS
667_mm256_cvttpd_epu64 (__m256d __A) {
668  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
669                (__v4di) _mm256_setzero_si256(),
670                (__mmask8) -1);
671}
672
673static __inline__ __m256i __DEFAULT_FN_ATTRS
674_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
675  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
676                (__v4di) __W,
677                (__mmask8) __U);
678}
679
680static __inline__ __m256i __DEFAULT_FN_ATTRS
681_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
682  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
683                (__v4di) _mm256_setzero_si256(),
684                (__mmask8) __U);
685}
686
687static __inline__ __m128i __DEFAULT_FN_ATTRS
688_mm_cvttps_epi64 (__m128 __A) {
689  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
690                (__v2di) _mm_setzero_si128(),
691                (__mmask8) -1);
692}
693
694static __inline__ __m128i __DEFAULT_FN_ATTRS
695_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
696  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
697                (__v2di) __W,
698                (__mmask8) __U);
699}
700
701static __inline__ __m128i __DEFAULT_FN_ATTRS
702_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
703  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
704                (__v2di) _mm_setzero_si128(),
705                (__mmask8) __U);
706}
707
708static __inline__ __m256i __DEFAULT_FN_ATTRS
709_mm256_cvttps_epi64 (__m128 __A) {
710  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
711                (__v4di) _mm256_setzero_si256(),
712                (__mmask8) -1);
713}
714
715static __inline__ __m256i __DEFAULT_FN_ATTRS
716_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
717  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
718                (__v4di) __W,
719                (__mmask8) __U);
720}
721
722static __inline__ __m256i __DEFAULT_FN_ATTRS
723_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
724  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
725                (__v4di) _mm256_setzero_si256(),
726                (__mmask8) __U);
727}
728
729static __inline__ __m128i __DEFAULT_FN_ATTRS
730_mm_cvttps_epu64 (__m128 __A) {
731  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
732                (__v2di) _mm_setzero_si128(),
733                (__mmask8) -1);
734}
735
736static __inline__ __m128i __DEFAULT_FN_ATTRS
737_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
738  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
739                (__v2di) __W,
740                (__mmask8) __U);
741}
742
743static __inline__ __m128i __DEFAULT_FN_ATTRS
744_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
745  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
746                (__v2di) _mm_setzero_si128(),
747                (__mmask8) __U);
748}
749
750static __inline__ __m256i __DEFAULT_FN_ATTRS
751_mm256_cvttps_epu64 (__m128 __A) {
752  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
753                (__v4di) _mm256_setzero_si256(),
754                (__mmask8) -1);
755}
756
757static __inline__ __m256i __DEFAULT_FN_ATTRS
758_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
759  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
760                (__v4di) __W,
761                (__mmask8) __U);
762}
763
764static __inline__ __m256i __DEFAULT_FN_ATTRS
765_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
766  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
767                (__v4di) _mm256_setzero_si256(),
768                (__mmask8) __U);
769}
770
771static __inline__ __m128d __DEFAULT_FN_ATTRS
772_mm_cvtepu64_pd (__m128i __A) {
773  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
774                (__v2df) _mm_setzero_pd(),
775                (__mmask8) -1);
776}
777
778static __inline__ __m128d __DEFAULT_FN_ATTRS
779_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
780  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
781                (__v2df) __W,
782                (__mmask8) __U);
783}
784
785static __inline__ __m128d __DEFAULT_FN_ATTRS
786_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
787  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
788                (__v2df) _mm_setzero_pd(),
789                (__mmask8) __U);
790}
791
792static __inline__ __m256d __DEFAULT_FN_ATTRS
793_mm256_cvtepu64_pd (__m256i __A) {
794  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
795                (__v4df) _mm256_setzero_pd(),
796                (__mmask8) -1);
797}
798
799static __inline__ __m256d __DEFAULT_FN_ATTRS
800_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
801  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
802                (__v4df) __W,
803                (__mmask8) __U);
804}
805
806static __inline__ __m256d __DEFAULT_FN_ATTRS
807_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
808  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
809                (__v4df) _mm256_setzero_pd(),
810                (__mmask8) __U);
811}
812
813static __inline__ __m128 __DEFAULT_FN_ATTRS
814_mm_cvtepu64_ps (__m128i __A) {
815  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
816                (__v4sf) _mm_setzero_ps(),
817                (__mmask8) -1);
818}
819
820static __inline__ __m128 __DEFAULT_FN_ATTRS
821_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
822  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
823                (__v4sf) __W,
824                (__mmask8) __U);
825}
826
827static __inline__ __m128 __DEFAULT_FN_ATTRS
828_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
829  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
830                (__v4sf) _mm_setzero_ps(),
831                (__mmask8) __U);
832}
833
834static __inline__ __m128 __DEFAULT_FN_ATTRS
835_mm256_cvtepu64_ps (__m256i __A) {
836  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
837                (__v4sf) _mm_setzero_ps(),
838                (__mmask8) -1);
839}
840
841static __inline__ __m128 __DEFAULT_FN_ATTRS
842_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
843  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
844                (__v4sf) __W,
845                (__mmask8) __U);
846}
847
848static __inline__ __m128 __DEFAULT_FN_ATTRS
849_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
850  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
851                (__v4sf) _mm_setzero_ps(),
852                (__mmask8) __U);
853}
854
855#define _mm_range_pd(A, B, C) __extension__ ({                         \
856  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
857                                          (__v2df)(__m128d)(B), (int)(C), \
858                                          (__v2df)_mm_setzero_pd(), \
859                                          (__mmask8)-1); })
860
861#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({          \
862  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
863                                          (__v2df)(__m128d)(B), (int)(C), \
864                                          (__v2df)(__m128d)(W), \
865                                          (__mmask8)(U)); })
866
867#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({              \
868  (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
869                                          (__v2df)(__m128d)(B), (int)(C), \
870                                          (__v2df)_mm_setzero_pd(), \
871                                          (__mmask8)(U)); })
872
873#define _mm256_range_pd(A, B, C) __extension__ ({                      \
874  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
875                                          (__v4df)(__m256d)(B), (int)(C), \
876                                          (__v4df)_mm256_setzero_pd(), \
877                                          (__mmask8)-1); })
878
879#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({       \
880  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
881                                          (__v4df)(__m256d)(B), (int)(C), \
882                                          (__v4df)(__m256d)(W), \
883                                          (__mmask8)(U)); })
884
885#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({           \
886  (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
887                                          (__v4df)(__m256d)(B), (int)(C), \
888                                          (__v4df)_mm256_setzero_pd(), \
889                                          (__mmask8)(U)); })
890
891#define _mm_range_ps(A, B, C) __extension__ ({                         \
892  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
893                                         (__v4sf)(__m128)(B), (int)(C), \
894                                         (__v4sf)_mm_setzero_ps(), \
895                                         (__mmask8)-1); })
896
897#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({          \
898  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
899                                         (__v4sf)(__m128)(B), (int)(C), \
900                                         (__v4sf)(__m128)(W), (__mmask8)(U)); })
901
902#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({              \
903  (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
904                                         (__v4sf)(__m128)(B), (int)(C), \
905                                         (__v4sf)_mm_setzero_ps(), \
906                                         (__mmask8)(U)); })
907
908#define _mm256_range_ps(A, B, C) __extension__ ({                      \
909  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
910                                         (__v8sf)(__m256)(B), (int)(C), \
911                                         (__v8sf)_mm256_setzero_ps(), \
912                                         (__mmask8)-1); })
913
914#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({       \
915  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
916                                         (__v8sf)(__m256)(B), (int)(C), \
917                                         (__v8sf)(__m256)(W), (__mmask8)(U)); })
918
919#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({           \
920  (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
921                                         (__v8sf)(__m256)(B), (int)(C), \
922                                         (__v8sf)_mm256_setzero_ps(), \
923                                         (__mmask8)(U)); })
924
925#define _mm_reduce_pd(A, B) __extension__ ({                \
926  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
927                                           (__v2df)_mm_setzero_pd(), \
928                                           (__mmask8)-1); })
929
930#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
931  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
932                                           (__v2df)(__m128d)(W), \
933                                           (__mmask8)(U)); })
934
935#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({     \
936  (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
937                                           (__v2df)_mm_setzero_pd(), \
938                                           (__mmask8)(U)); })
939
940#define _mm256_reduce_pd(A, B) __extension__ ({                \
941  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
942                                           (__v4df)_mm256_setzero_pd(), \
943                                           (__mmask8)-1); })
944
945#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
946  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
947                                           (__v4df)(__m256d)(W), \
948                                           (__mmask8)(U)); })
949
950#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({     \
951  (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
952                                           (__v4df)_mm256_setzero_pd(), \
953                                           (__mmask8)(U)); })
954
955#define _mm_reduce_ps(A, B) __extension__ ({                   \
956  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
957                                          (__v4sf)_mm_setzero_ps(), \
958                                          (__mmask8)-1); })
959
960#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({    \
961  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
962                                          (__v4sf)(__m128)(W), \
963                                          (__mmask8)(U)); })
964
965#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({        \
966  (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
967                                          (__v4sf)_mm_setzero_ps(), \
968                                          (__mmask8)(U)); })
969
970#define _mm256_reduce_ps(A, B) __extension__ ({                \
971  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
972                                          (__v8sf)_mm256_setzero_ps(), \
973                                          (__mmask8)-1); })
974
975#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
976  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
977                                          (__v8sf)(__m256)(W), \
978                                          (__mmask8)(U)); })
979
980#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({     \
981  (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
982                                          (__v8sf)_mm256_setzero_ps(), \
983                                          (__mmask8)(U)); })
984
985static __inline__ __mmask8 __DEFAULT_FN_ATTRS
986_mm_movepi32_mask (__m128i __A)
987{
988  return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
989}
990
991static __inline__ __mmask8 __DEFAULT_FN_ATTRS
992_mm256_movepi32_mask (__m256i __A)
993{
994  return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
995}
996
997static __inline__ __m128i __DEFAULT_FN_ATTRS
998_mm_movm_epi32 (__mmask8 __A)
999{
1000  return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
1001}
1002
1003static __inline__ __m256i __DEFAULT_FN_ATTRS
1004_mm256_movm_epi32 (__mmask8 __A)
1005{
1006  return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
1007}
1008
1009static __inline__ __m128i __DEFAULT_FN_ATTRS
1010_mm_movm_epi64 (__mmask8 __A)
1011{
1012  return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
1013}
1014
1015static __inline__ __m256i __DEFAULT_FN_ATTRS
1016_mm256_movm_epi64 (__mmask8 __A)
1017{
1018  return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
1019}
1020
1021static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1022_mm_movepi64_mask (__m128i __A)
1023{
1024  return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
1025}
1026
1027static __inline__ __mmask8 __DEFAULT_FN_ATTRS
1028_mm256_movepi64_mask (__m256i __A)
1029{
1030  return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
1031}
1032
1033static __inline__ __m256 __DEFAULT_FN_ATTRS
1034_mm256_broadcast_f32x2 (__m128 __A)
1035{
1036  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1037                (__v8sf)_mm256_undefined_ps(),
1038                (__mmask8) -1);
1039}
1040
1041static __inline__ __m256 __DEFAULT_FN_ATTRS
1042_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
1043{
1044  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1045                (__v8sf) __O,
1046                __M);
1047}
1048
1049static __inline__ __m256 __DEFAULT_FN_ATTRS
1050_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
1051{
1052  return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1053                (__v8sf) _mm256_setzero_ps (),
1054                __M);
1055}
1056
1057static __inline__ __m256d __DEFAULT_FN_ATTRS
1058_mm256_broadcast_f64x2 (__m128d __A)
1059{
1060  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1061                 (__v4df)_mm256_undefined_pd(),
1062                 (__mmask8) -1);
1063}
1064
1065static __inline__ __m256d __DEFAULT_FN_ATTRS
1066_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A)
1067{
1068  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1069                 (__v4df) __O,
1070                 __M);
1071}
1072
1073static __inline__ __m256d __DEFAULT_FN_ATTRS
1074_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
1075{
1076  return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A,
1077                 (__v4df) _mm256_setzero_ps (),
1078                 __M);
1079}
1080
1081static __inline__ __m128i __DEFAULT_FN_ATTRS
1082_mm_broadcast_i32x2 (__m128i __A)
1083{
1084  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1085                 (__v4si)_mm_undefined_si128(),
1086                 (__mmask8) -1);
1087}
1088
1089static __inline__ __m128i __DEFAULT_FN_ATTRS
1090_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
1091{
1092  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1093                 (__v4si) __O,
1094                 __M);
1095}
1096
1097static __inline__ __m128i __DEFAULT_FN_ATTRS
1098_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1099{
1100  return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1101                 (__v4si) _mm_setzero_si128 (),
1102                 __M);
1103}
1104
1105static __inline__ __m256i __DEFAULT_FN_ATTRS
1106_mm256_broadcast_i32x2 (__m128i __A)
1107{
1108  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1109                 (__v8si)_mm256_undefined_si256(),
1110                 (__mmask8) -1);
1111}
1112
1113static __inline__ __m256i __DEFAULT_FN_ATTRS
1114_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
1115{
1116  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1117                 (__v8si) __O,
1118                 __M);
1119}
1120
1121static __inline__ __m256i __DEFAULT_FN_ATTRS
1122_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
1123{
1124  return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1125                 (__v8si) _mm256_setzero_si256 (),
1126                 __M);
1127}
1128
1129static __inline__ __m256i __DEFAULT_FN_ATTRS
1130_mm256_broadcast_i64x2 (__m128i __A)
1131{
1132  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1133                 (__v4di)_mm256_undefined_si256(),
1134                 (__mmask8) -1);
1135}
1136
1137static __inline__ __m256i __DEFAULT_FN_ATTRS
1138_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A)
1139{
1140  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1141                 (__v4di) __O,
1142                 __M);
1143}
1144
1145static __inline__ __m256i __DEFAULT_FN_ATTRS
1146_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
1147{
1148  return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A,
1149                 (__v4di) _mm256_setzero_si256 (),
1150                 __M);
1151}
1152
1153#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
1154  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1155                                                (int)(imm), \
1156                                                (__v2df)_mm_setzero_pd(), \
1157                                                (__mmask8)-1); })
1158
1159#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1160  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1161                                                (int)(imm), \
1162                                                (__v2df)(__m128d)(W), \
1163                                                (__mmask8)(U)); })
1164
1165#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1166  (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1167                                                (int)(imm), \
1168                                                (__v2df)_mm_setzero_pd(), \
1169                                                (__mmask8)(U)); })
1170
1171#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
1172  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1173                                                (int)(imm), \
1174                                                (__v2di)_mm_setzero_di(), \
1175                                                (__mmask8)-1); })
1176
1177#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1178  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1179                                                (int)(imm), \
1180                                                (__v2di)(__m128i)(W), \
1181                                                (__mmask8)(U)); })
1182
1183#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1184  (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1185                                                (int)(imm), \
1186                                                (__v2di)_mm_setzero_di(), \
1187                                                (__mmask8)(U)); })
1188
1189#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
1190  (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1191                                               (__v2df)(__m128d)(B), \
1192                                               (int)(imm), \
1193                                               (__v4df)_mm256_setzero_pd(), \
1194                                               (__mmask8)-1); })
1195
1196#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1197  (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1198                                               (__v2df)(__m128d)(B), \
1199                                               (int)(imm), \
1200                                               (__v4df)(__m256d)(W), \
1201                                               (__mmask8)(U)); })
1202
1203#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1204  (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
1205                                               (__v2df)(__m128d)(B), \
1206                                               (int)(imm), \
1207                                               (__v4df)_mm256_setzero_pd(), \
1208                                               (__mmask8)(U)); })
1209
1210#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
1211  (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1212                                               (__v2di)(__m128i)(B), \
1213                                               (int)(imm), \
1214                                               (__v4di)_mm256_setzero_si256(), \
1215                                               (__mmask8)-1); })
1216
1217#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1218  (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1219                                               (__v2di)(__m128i)(B), \
1220                                               (int)(imm), \
1221                                               (__v4di)(__m256i)(W), \
1222                                               (__mmask8)(U)); })
1223
1224#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1225  (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
1226                                               (__v2di)(__m128i)(B), \
1227                                               (int)(imm), \
1228                                               (__v4di)_mm256_setzero_si256(), \
1229                                               (__mmask8)(U)); })
1230
1231#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1232  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1233                                             (__mmask8)(U)); })
1234
1235#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
1236  (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1237                                             (__mmask8)-1); })
1238
1239#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1240  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1241                                             (__mmask8)(U)); })
1242
1243#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
1244  (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1245                                             (__mmask8)-1); })
1246
1247#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1248  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1249                                             (__mmask8)(U)); })
1250
1251#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
1252  (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1253                                             (__mmask8)-1); })
1254
1255#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1256  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1257                                             (__mmask8)(U)); })
1258
1259#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
1260  (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1261                                             (__mmask8)-1); })
1262
1263#undef __DEFAULT_FN_ATTRS
1264
1265#endif
1266