1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
32typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
36
37/* Unsigned types */
38typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43typedef float __m512 __attribute__((__vector_size__(64)));
44typedef double __m512d __attribute__((__vector_size__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64)));
46
47typedef unsigned char __mmask8;
48typedef unsigned short __mmask16;
49
50/* Rounding mode macros.  */
51#define _MM_FROUND_TO_NEAREST_INT   0x00
52#define _MM_FROUND_TO_NEG_INF       0x01
53#define _MM_FROUND_TO_POS_INF       0x02
54#define _MM_FROUND_TO_ZERO          0x03
55#define _MM_FROUND_CUR_DIRECTION    0x04
56
57/* Constants for integer comparison predicates */
58typedef enum {
59    _MM_CMPINT_EQ,      /* Equal */
60    _MM_CMPINT_LT,      /* Less than */
61    _MM_CMPINT_LE,      /* Less than or Equal */
62    _MM_CMPINT_UNUSED,
63    _MM_CMPINT_NE,      /* Not Equal */
64    _MM_CMPINT_NLT,     /* Not Less than */
65#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
66    _MM_CMPINT_NLE      /* Not Less than or Equal */
67#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
68} _MM_CMPINT_ENUM;
69
70typedef enum
71{
72  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157  _MM_PERM_DDDD = 0xFF
158} _MM_PERM_ENUM;
159
160typedef enum
161{
162  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
163  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
164  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
165  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
166} _MM_MANTISSA_NORM_ENUM;
167
168typedef enum
169{
170  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
171  _MM_MANT_SIGN_zero,   /* sign = 0             */
172  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
173} _MM_MANTISSA_SIGN_ENUM;
174
175/* Define the default attributes for the functions in this file. */
176#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177
178/* Create vectors with repeated elements */
179
180static  __inline __m512i __DEFAULT_FN_ATTRS
181_mm512_setzero_si512(void)
182{
183  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184}
185
186#define _mm512_setzero_epi32 _mm512_setzero_si512
187
188static __inline__ __m512d __DEFAULT_FN_ATTRS
189_mm512_undefined_pd(void)
190{
191  return (__m512d)__builtin_ia32_undef512();
192}
193
194static __inline__ __m512 __DEFAULT_FN_ATTRS
195_mm512_undefined(void)
196{
197  return (__m512)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS
201_mm512_undefined_ps(void)
202{
203  return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512i __DEFAULT_FN_ATTRS
207_mm512_undefined_epi32(void)
208{
209  return (__m512i)__builtin_ia32_undef512();
210}
211
212static __inline__ __m512i __DEFAULT_FN_ATTRS
213_mm512_broadcastd_epi32 (__m128i __A)
214{
215  return (__m512i)__builtin_shufflevector((__v4si) __A,
216                                          (__v4si)_mm_undefined_si128(),
217                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS
221_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222{
223  return (__m512i)__builtin_ia32_selectd_512(__M,
224                                             (__v16si) _mm512_broadcastd_epi32(__A),
225                                             (__v16si) __O);
226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS
229_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230{
231  return (__m512i)__builtin_ia32_selectd_512(__M,
232                                             (__v16si) _mm512_broadcastd_epi32(__A),
233                                             (__v16si) _mm512_setzero_si512());
234}
235
236static __inline__ __m512i __DEFAULT_FN_ATTRS
237_mm512_broadcastq_epi64 (__m128i __A)
238{
239  return (__m512i)__builtin_shufflevector((__v2di) __A,
240                                          (__v2di) _mm_undefined_si128(),
241                                          0, 0, 0, 0, 0, 0, 0, 0);
242}
243
244static __inline__ __m512i __DEFAULT_FN_ATTRS
245_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246{
247  return (__m512i)__builtin_ia32_selectq_512(__M,
248                                             (__v8di) _mm512_broadcastq_epi64(__A),
249                                             (__v8di) __O);
250
251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS
254_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255{
256  return (__m512i)__builtin_ia32_selectq_512(__M,
257                                             (__v8di) _mm512_broadcastq_epi64(__A),
258                                             (__v8di) _mm512_setzero_si512());
259}
260
261static __inline __m512i __DEFAULT_FN_ATTRS
262_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
263{
264  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
265                 (__v16si)
266                 _mm512_setzero_si512 (),
267                 __M);
268}
269
270static __inline __m512i __DEFAULT_FN_ATTRS
271_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
272{
273#ifdef __x86_64__
274  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
275                 (__v8di)
276                 _mm512_setzero_si512 (),
277                 __M);
278#else
279  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
280                 (__v8di)
281                 _mm512_setzero_si512 (),
282                 __M);
283#endif
284}
285
286static __inline __m512 __DEFAULT_FN_ATTRS
287_mm512_setzero_ps(void)
288{
289  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
290                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
291}
292
293#define _mm512_setzero _mm512_setzero_ps
294
295static  __inline __m512d __DEFAULT_FN_ATTRS
296_mm512_setzero_pd(void)
297{
298  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
299}
300
301static __inline __m512 __DEFAULT_FN_ATTRS
302_mm512_set1_ps(float __w)
303{
304  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
305                   __w, __w, __w, __w, __w, __w, __w, __w  };
306}
307
308static __inline __m512d __DEFAULT_FN_ATTRS
309_mm512_set1_pd(double __w)
310{
311  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
312}
313
314static __inline __m512i __DEFAULT_FN_ATTRS
315_mm512_set1_epi8(char __w)
316{
317  return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
318                             __w, __w, __w, __w, __w, __w, __w, __w,
319                             __w, __w, __w, __w, __w, __w, __w, __w,
320                             __w, __w, __w, __w, __w, __w, __w, __w,
321                             __w, __w, __w, __w, __w, __w, __w, __w,
322                             __w, __w, __w, __w, __w, __w, __w, __w,
323                             __w, __w, __w, __w, __w, __w, __w, __w,
324                             __w, __w, __w, __w, __w, __w, __w, __w  };
325}
326
327static __inline __m512i __DEFAULT_FN_ATTRS
328_mm512_set1_epi16(short __w)
329{
330  return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
331                             __w, __w, __w, __w, __w, __w, __w, __w,
332                             __w, __w, __w, __w, __w, __w, __w, __w,
333                             __w, __w, __w, __w, __w, __w, __w, __w };
334}
335
336static __inline __m512i __DEFAULT_FN_ATTRS
337_mm512_set1_epi32(int __s)
338{
339  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
340                             __s, __s, __s, __s, __s, __s, __s, __s };
341}
342
343static __inline __m512i __DEFAULT_FN_ATTRS
344_mm512_set1_epi64(long long __d)
345{
346  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
347}
348
349static __inline__ __m512 __DEFAULT_FN_ATTRS
350_mm512_broadcastss_ps(__m128 __A)
351{
352  return (__m512)__builtin_shufflevector((__v4sf) __A,
353                                         (__v4sf)_mm_undefined_ps(),
354                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
355}
356
357static __inline __m512i __DEFAULT_FN_ATTRS
358_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
359{
360  return  (__m512i)(__v16si)
361   { __D, __C, __B, __A, __D, __C, __B, __A,
362     __D, __C, __B, __A, __D, __C, __B, __A };
363}
364
365static __inline __m512i __DEFAULT_FN_ATTRS
366_mm512_set4_epi64 (long long __A, long long __B, long long __C,
367       long long __D)
368{
369  return  (__m512i) (__v8di)
370   { __D, __C, __B, __A, __D, __C, __B, __A };
371}
372
373static __inline __m512d __DEFAULT_FN_ATTRS
374_mm512_set4_pd (double __A, double __B, double __C, double __D)
375{
376  return  (__m512d)
377   { __D, __C, __B, __A, __D, __C, __B, __A };
378}
379
380static __inline __m512 __DEFAULT_FN_ATTRS
381_mm512_set4_ps (float __A, float __B, float __C, float __D)
382{
383  return  (__m512)
384   { __D, __C, __B, __A, __D, __C, __B, __A,
385     __D, __C, __B, __A, __D, __C, __B, __A };
386}
387
388#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
389  _mm512_set4_epi32((e3),(e2),(e1),(e0))
390
391#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
392  _mm512_set4_epi64((e3),(e2),(e1),(e0))
393
394#define _mm512_setr4_pd(e0,e1,e2,e3)                \
395  _mm512_set4_pd((e3),(e2),(e1),(e0))
396
397#define _mm512_setr4_ps(e0,e1,e2,e3)                \
398  _mm512_set4_ps((e3),(e2),(e1),(e0))
399
400static __inline__ __m512d __DEFAULT_FN_ATTRS
401_mm512_broadcastsd_pd(__m128d __A)
402{
403  return (__m512d)__builtin_shufflevector((__v2df) __A,
404                                          (__v2df) _mm_undefined_pd(),
405                                          0, 0, 0, 0, 0, 0, 0, 0);
406}
407
408/* Cast between vector types */
409
410static __inline __m512d __DEFAULT_FN_ATTRS
411_mm512_castpd256_pd512(__m256d __a)
412{
413  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
414}
415
416static __inline __m512 __DEFAULT_FN_ATTRS
417_mm512_castps256_ps512(__m256 __a)
418{
419  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
420                                          -1, -1, -1, -1, -1, -1, -1, -1);
421}
422
423static __inline __m128d __DEFAULT_FN_ATTRS
424_mm512_castpd512_pd128(__m512d __a)
425{
426  return __builtin_shufflevector(__a, __a, 0, 1);
427}
428
429static __inline __m256d __DEFAULT_FN_ATTRS
430_mm512_castpd512_pd256 (__m512d __A)
431{
432  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
433}
434
435static __inline __m128 __DEFAULT_FN_ATTRS
436_mm512_castps512_ps128(__m512 __a)
437{
438  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
439}
440
441static __inline __m256 __DEFAULT_FN_ATTRS
442_mm512_castps512_ps256 (__m512 __A)
443{
444  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
445}
446
447static __inline __m512 __DEFAULT_FN_ATTRS
448_mm512_castpd_ps (__m512d __A)
449{
450  return (__m512) (__A);
451}
452
453static __inline __m512i __DEFAULT_FN_ATTRS
454_mm512_castpd_si512 (__m512d __A)
455{
456  return (__m512i) (__A);
457}
458
459static __inline__ __m512d __DEFAULT_FN_ATTRS
460_mm512_castpd128_pd512 (__m128d __A)
461{
462  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
463}
464
465static __inline __m512d __DEFAULT_FN_ATTRS
466_mm512_castps_pd (__m512 __A)
467{
468  return (__m512d) (__A);
469}
470
471static __inline __m512i __DEFAULT_FN_ATTRS
472_mm512_castps_si512 (__m512 __A)
473{
474  return (__m512i) (__A);
475}
476
477static __inline__ __m512 __DEFAULT_FN_ATTRS
478_mm512_castps128_ps512 (__m128 __A)
479{
480    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
481}
482
483static __inline__ __m512i __DEFAULT_FN_ATTRS
484_mm512_castsi128_si512 (__m128i __A)
485{
486   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
487}
488
489static __inline__ __m512i __DEFAULT_FN_ATTRS
490_mm512_castsi256_si512 (__m256i __A)
491{
492   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
493}
494
495static __inline __m512 __DEFAULT_FN_ATTRS
496_mm512_castsi512_ps (__m512i __A)
497{
498  return (__m512) (__A);
499}
500
501static __inline __m512d __DEFAULT_FN_ATTRS
502_mm512_castsi512_pd (__m512i __A)
503{
504  return (__m512d) (__A);
505}
506
507static __inline __m128i __DEFAULT_FN_ATTRS
508_mm512_castsi512_si128 (__m512i __A)
509{
510  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
511}
512
513static __inline __m256i __DEFAULT_FN_ATTRS
514_mm512_castsi512_si256 (__m512i __A)
515{
516  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
517}
518
519/* Bitwise operators */
520static __inline__ __m512i __DEFAULT_FN_ATTRS
521_mm512_and_epi32(__m512i __a, __m512i __b)
522{
523  return (__m512i)((__v16su)__a & (__v16su)__b);
524}
525
526static __inline__ __m512i __DEFAULT_FN_ATTRS
527_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
528{
529  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
530                (__v16si) _mm512_and_epi32(__a, __b),
531                (__v16si) __src);
532}
533
534static __inline__ __m512i __DEFAULT_FN_ATTRS
535_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
536{
537  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
538                                         __k, __a, __b);
539}
540
541static __inline__ __m512i __DEFAULT_FN_ATTRS
542_mm512_and_epi64(__m512i __a, __m512i __b)
543{
544  return (__m512i)((__v8du)__a & (__v8du)__b);
545}
546
547static __inline__ __m512i __DEFAULT_FN_ATTRS
548_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
549{
550    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
551                (__v8di) _mm512_and_epi64(__a, __b),
552                (__v8di) __src);
553}
554
555static __inline__ __m512i __DEFAULT_FN_ATTRS
556_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
557{
558  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
559                                         __k, __a, __b);
560}
561
562static __inline__ __m512i __DEFAULT_FN_ATTRS
563_mm512_andnot_si512 (__m512i __A, __m512i __B)
564{
565  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
566}
567
568static __inline__ __m512i __DEFAULT_FN_ATTRS
569_mm512_andnot_epi32 (__m512i __A, __m512i __B)
570{
571  return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
572}
573
574static __inline__ __m512i __DEFAULT_FN_ATTRS
575_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
576{
577  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
578                                         (__v16si)_mm512_andnot_epi32(__A, __B),
579                                         (__v16si)__W);
580}
581
582static __inline__ __m512i __DEFAULT_FN_ATTRS
583_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
584{
585  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
586                                           __U, __A, __B);
587}
588
589static __inline__ __m512i __DEFAULT_FN_ATTRS
590_mm512_andnot_epi64(__m512i __A, __m512i __B)
591{
592  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
593}
594
595static __inline__ __m512i __DEFAULT_FN_ATTRS
596_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
597{
598  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
599                                          (__v8di)_mm512_andnot_epi64(__A, __B),
600                                          (__v8di)__W);
601}
602
603static __inline__ __m512i __DEFAULT_FN_ATTRS
604_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
605{
606  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
607                                           __U, __A, __B);
608}
609
610static __inline__ __m512i __DEFAULT_FN_ATTRS
611_mm512_or_epi32(__m512i __a, __m512i __b)
612{
613  return (__m512i)((__v16su)__a | (__v16su)__b);
614}
615
616static __inline__ __m512i __DEFAULT_FN_ATTRS
617_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
618{
619  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
620                                             (__v16si)_mm512_or_epi32(__a, __b),
621                                             (__v16si)__src);
622}
623
624static __inline__ __m512i __DEFAULT_FN_ATTRS
625_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
626{
627  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
628}
629
630static __inline__ __m512i __DEFAULT_FN_ATTRS
631_mm512_or_epi64(__m512i __a, __m512i __b)
632{
633  return (__m512i)((__v8du)__a | (__v8du)__b);
634}
635
636static __inline__ __m512i __DEFAULT_FN_ATTRS
637_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
638{
639  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
640                                             (__v8di)_mm512_or_epi64(__a, __b),
641                                             (__v8di)__src);
642}
643
644static __inline__ __m512i __DEFAULT_FN_ATTRS
645_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
646{
647  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
648}
649
650static __inline__ __m512i __DEFAULT_FN_ATTRS
651_mm512_xor_epi32(__m512i __a, __m512i __b)
652{
653  return (__m512i)((__v16su)__a ^ (__v16su)__b);
654}
655
656static __inline__ __m512i __DEFAULT_FN_ATTRS
657_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
658{
659  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
660                                            (__v16si)_mm512_xor_epi32(__a, __b),
661                                            (__v16si)__src);
662}
663
664static __inline__ __m512i __DEFAULT_FN_ATTRS
665_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
666{
667  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
668}
669
670static __inline__ __m512i __DEFAULT_FN_ATTRS
671_mm512_xor_epi64(__m512i __a, __m512i __b)
672{
673  return (__m512i)((__v8du)__a ^ (__v8du)__b);
674}
675
676static __inline__ __m512i __DEFAULT_FN_ATTRS
677_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
678{
679  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
680                                             (__v8di)_mm512_xor_epi64(__a, __b),
681                                             (__v8di)__src);
682}
683
684static __inline__ __m512i __DEFAULT_FN_ATTRS
685_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
686{
687  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
688}
689
690static __inline__ __m512i __DEFAULT_FN_ATTRS
691_mm512_and_si512(__m512i __a, __m512i __b)
692{
693  return (__m512i)((__v8du)__a & (__v8du)__b);
694}
695
696static __inline__ __m512i __DEFAULT_FN_ATTRS
697_mm512_or_si512(__m512i __a, __m512i __b)
698{
699  return (__m512i)((__v8du)__a | (__v8du)__b);
700}
701
702static __inline__ __m512i __DEFAULT_FN_ATTRS
703_mm512_xor_si512(__m512i __a, __m512i __b)
704{
705  return (__m512i)((__v8du)__a ^ (__v8du)__b);
706}
707
708/* Arithmetic */
709
710static __inline __m512d __DEFAULT_FN_ATTRS
711_mm512_add_pd(__m512d __a, __m512d __b)
712{
713  return (__m512d)((__v8df)__a + (__v8df)__b);
714}
715
716static __inline __m512 __DEFAULT_FN_ATTRS
717_mm512_add_ps(__m512 __a, __m512 __b)
718{
719  return (__m512)((__v16sf)__a + (__v16sf)__b);
720}
721
722static __inline __m512d __DEFAULT_FN_ATTRS
723_mm512_mul_pd(__m512d __a, __m512d __b)
724{
725  return (__m512d)((__v8df)__a * (__v8df)__b);
726}
727
728static __inline __m512 __DEFAULT_FN_ATTRS
729_mm512_mul_ps(__m512 __a, __m512 __b)
730{
731  return (__m512)((__v16sf)__a * (__v16sf)__b);
732}
733
734static __inline __m512d __DEFAULT_FN_ATTRS
735_mm512_sub_pd(__m512d __a, __m512d __b)
736{
737  return (__m512d)((__v8df)__a - (__v8df)__b);
738}
739
740static __inline __m512 __DEFAULT_FN_ATTRS
741_mm512_sub_ps(__m512 __a, __m512 __b)
742{
743  return (__m512)((__v16sf)__a - (__v16sf)__b);
744}
745
746static __inline__ __m512i __DEFAULT_FN_ATTRS
747_mm512_add_epi64 (__m512i __A, __m512i __B)
748{
749  return (__m512i) ((__v8du) __A + (__v8du) __B);
750}
751
752static __inline__ __m512i __DEFAULT_FN_ATTRS
753_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
754{
755  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
756                                             (__v8di)_mm512_add_epi64(__A, __B),
757                                             (__v8di)__W);
758}
759
760static __inline__ __m512i __DEFAULT_FN_ATTRS
761_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
762{
763  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
764                                             (__v8di)_mm512_add_epi64(__A, __B),
765                                             (__v8di)_mm512_setzero_si512());
766}
767
768static __inline__ __m512i __DEFAULT_FN_ATTRS
769_mm512_sub_epi64 (__m512i __A, __m512i __B)
770{
771  return (__m512i) ((__v8du) __A - (__v8du) __B);
772}
773
774static __inline__ __m512i __DEFAULT_FN_ATTRS
775_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
776{
777  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
778                                             (__v8di)_mm512_sub_epi64(__A, __B),
779                                             (__v8di)__W);
780}
781
782static __inline__ __m512i __DEFAULT_FN_ATTRS
783_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
784{
785  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
786                                             (__v8di)_mm512_sub_epi64(__A, __B),
787                                             (__v8di)_mm512_setzero_si512());
788}
789
790static __inline__ __m512i __DEFAULT_FN_ATTRS
791_mm512_add_epi32 (__m512i __A, __m512i __B)
792{
793  return (__m512i) ((__v16su) __A + (__v16su) __B);
794}
795
796static __inline__ __m512i __DEFAULT_FN_ATTRS
797_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
798{
799  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
800                                             (__v16si)_mm512_add_epi32(__A, __B),
801                                             (__v16si)__W);
802}
803
804static __inline__ __m512i __DEFAULT_FN_ATTRS
805_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
806{
807  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
808                                             (__v16si)_mm512_add_epi32(__A, __B),
809                                             (__v16si)_mm512_setzero_si512());
810}
811
812static __inline__ __m512i __DEFAULT_FN_ATTRS
813_mm512_sub_epi32 (__m512i __A, __m512i __B)
814{
815  return (__m512i) ((__v16su) __A - (__v16su) __B);
816}
817
818static __inline__ __m512i __DEFAULT_FN_ATTRS
819_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
820{
821  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
822                                             (__v16si)_mm512_sub_epi32(__A, __B),
823                                             (__v16si)__W);
824}
825
826static __inline__ __m512i __DEFAULT_FN_ATTRS
827_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
828{
829  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
830                                             (__v16si)_mm512_sub_epi32(__A, __B),
831                                             (__v16si)_mm512_setzero_si512());
832}
833
834#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
835  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
836                                        (__v8df)(__m512d)(B), \
837                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
838                                        (int)(R)); })
839
840#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
841  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
842                                        (__v8df)(__m512d)(B), \
843                                        (__v8df)_mm512_setzero_pd(), \
844                                        (__mmask8)(U), (int)(R)); })
845
846#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
847  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
848                                        (__v8df)(__m512d)(B), \
849                                        (__v8df)_mm512_undefined_pd(), \
850                                        (__mmask8)-1, (int)(R)); })
851
852static  __inline__ __m512d __DEFAULT_FN_ATTRS
853_mm512_max_pd(__m512d __A, __m512d __B)
854{
855  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
856             (__v8df) __B,
857             (__v8df)
858             _mm512_setzero_pd (),
859             (__mmask8) -1,
860             _MM_FROUND_CUR_DIRECTION);
861}
862
863static __inline__ __m512d __DEFAULT_FN_ATTRS
864_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
865{
866  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
867                  (__v8df) __B,
868                  (__v8df) __W,
869                  (__mmask8) __U,
870                  _MM_FROUND_CUR_DIRECTION);
871}
872
873static __inline__ __m512d __DEFAULT_FN_ATTRS
874_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
875{
876  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
877                  (__v8df) __B,
878                  (__v8df)
879                  _mm512_setzero_pd (),
880                  (__mmask8) __U,
881                  _MM_FROUND_CUR_DIRECTION);
882}
883
884#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
885  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
886                                       (__v16sf)(__m512)(B), \
887                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
888                                       (int)(R)); })
889
890#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
891  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
892                                       (__v16sf)(__m512)(B), \
893                                       (__v16sf)_mm512_setzero_ps(), \
894                                       (__mmask16)(U), (int)(R)); })
895
896#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
897  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
898                                       (__v16sf)(__m512)(B), \
899                                       (__v16sf)_mm512_undefined_ps(), \
900                                       (__mmask16)-1, (int)(R)); })
901
902static  __inline__ __m512 __DEFAULT_FN_ATTRS
903_mm512_max_ps(__m512 __A, __m512 __B)
904{
905  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
906            (__v16sf) __B,
907            (__v16sf)
908            _mm512_setzero_ps (),
909            (__mmask16) -1,
910            _MM_FROUND_CUR_DIRECTION);
911}
912
913static __inline__ __m512 __DEFAULT_FN_ATTRS
914_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
915{
916  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
917                 (__v16sf) __B,
918                 (__v16sf) __W,
919                 (__mmask16) __U,
920                 _MM_FROUND_CUR_DIRECTION);
921}
922
923static __inline__ __m512 __DEFAULT_FN_ATTRS
924_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
925{
926  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
927                 (__v16sf) __B,
928                 (__v16sf)
929                 _mm512_setzero_ps (),
930                 (__mmask16) __U,
931                 _MM_FROUND_CUR_DIRECTION);
932}
933
934static __inline__ __m128 __DEFAULT_FN_ATTRS
935_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
936  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
937                (__v4sf) __B,
938                (__v4sf) __W,
939                (__mmask8) __U,
940                _MM_FROUND_CUR_DIRECTION);
941}
942
943static __inline__ __m128 __DEFAULT_FN_ATTRS
944_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
945  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
946                (__v4sf) __B,
947                (__v4sf)  _mm_setzero_ps (),
948                (__mmask8) __U,
949                _MM_FROUND_CUR_DIRECTION);
950}
951
952#define _mm_max_round_ss(A, B, R) __extension__ ({ \
953  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
954                                          (__v4sf)(__m128)(B), \
955                                          (__v4sf)_mm_setzero_ps(), \
956                                          (__mmask8)-1, (int)(R)); })
957
958#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
959  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
960                                          (__v4sf)(__m128)(B), \
961                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
962                                          (int)(R)); })
963
964#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
965  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
966                                          (__v4sf)(__m128)(B), \
967                                          (__v4sf)_mm_setzero_ps(), \
968                                          (__mmask8)(U), (int)(R)); })
969
970static __inline__ __m128d __DEFAULT_FN_ATTRS
971_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
972  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
973                (__v2df) __B,
974                (__v2df) __W,
975                (__mmask8) __U,
976                _MM_FROUND_CUR_DIRECTION);
977}
978
979static __inline__ __m128d __DEFAULT_FN_ATTRS
980_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
981  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
982                (__v2df) __B,
983                (__v2df)  _mm_setzero_pd (),
984                (__mmask8) __U,
985                _MM_FROUND_CUR_DIRECTION);
986}
987
988#define _mm_max_round_sd(A, B, R) __extension__ ({ \
989  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
990                                           (__v2df)(__m128d)(B), \
991                                           (__v2df)_mm_setzero_pd(), \
992                                           (__mmask8)-1, (int)(R)); })
993
994#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
995  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
996                                           (__v2df)(__m128d)(B), \
997                                           (__v2df)(__m128d)(W), \
998                                           (__mmask8)(U), (int)(R)); })
999
1000#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1001  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1002                                           (__v2df)(__m128d)(B), \
1003                                           (__v2df)_mm_setzero_pd(), \
1004                                           (__mmask8)(U), (int)(R)); })
1005
1006static __inline __m512i
1007__DEFAULT_FN_ATTRS
1008_mm512_max_epi32(__m512i __A, __m512i __B)
1009{
1010  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1011              (__v16si) __B,
1012              (__v16si)
1013              _mm512_setzero_si512 (),
1014              (__mmask16) -1);
1015}
1016
1017static __inline__ __m512i __DEFAULT_FN_ATTRS
1018_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1019{
1020  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1021                   (__v16si) __B,
1022                   (__v16si) __W, __M);
1023}
1024
1025static __inline__ __m512i __DEFAULT_FN_ATTRS
1026_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1027{
1028  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1029                   (__v16si) __B,
1030                   (__v16si)
1031                   _mm512_setzero_si512 (),
1032                   __M);
1033}
1034
1035static __inline __m512i __DEFAULT_FN_ATTRS
1036_mm512_max_epu32(__m512i __A, __m512i __B)
1037{
1038  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1039              (__v16si) __B,
1040              (__v16si)
1041              _mm512_setzero_si512 (),
1042              (__mmask16) -1);
1043}
1044
1045static __inline__ __m512i __DEFAULT_FN_ATTRS
1046_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1047{
1048  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1049                   (__v16si) __B,
1050                   (__v16si) __W, __M);
1051}
1052
1053static __inline__ __m512i __DEFAULT_FN_ATTRS
1054_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1055{
1056  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1057                   (__v16si) __B,
1058                   (__v16si)
1059                   _mm512_setzero_si512 (),
1060                   __M);
1061}
1062
1063static __inline __m512i __DEFAULT_FN_ATTRS
1064_mm512_max_epi64(__m512i __A, __m512i __B)
1065{
1066  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1067              (__v8di) __B,
1068              (__v8di)
1069              _mm512_setzero_si512 (),
1070              (__mmask8) -1);
1071}
1072
1073static __inline__ __m512i __DEFAULT_FN_ATTRS
1074_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1075{
1076  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1077                   (__v8di) __B,
1078                   (__v8di) __W, __M);
1079}
1080
1081static __inline__ __m512i __DEFAULT_FN_ATTRS
1082_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1083{
1084  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1085                   (__v8di) __B,
1086                   (__v8di)
1087                   _mm512_setzero_si512 (),
1088                   __M);
1089}
1090
1091static __inline __m512i __DEFAULT_FN_ATTRS
1092_mm512_max_epu64(__m512i __A, __m512i __B)
1093{
1094  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1095              (__v8di) __B,
1096              (__v8di)
1097              _mm512_setzero_si512 (),
1098              (__mmask8) -1);
1099}
1100
1101static __inline__ __m512i __DEFAULT_FN_ATTRS
1102_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1103{
1104  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1105                   (__v8di) __B,
1106                   (__v8di) __W, __M);
1107}
1108
1109static __inline__ __m512i __DEFAULT_FN_ATTRS
1110_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1111{
1112  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1113                   (__v8di) __B,
1114                   (__v8di)
1115                   _mm512_setzero_si512 (),
1116                   __M);
1117}
1118
1119#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1120  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1121                                        (__v8df)(__m512d)(B), \
1122                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
1123                                        (int)(R)); })
1124
1125#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1126  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1127                                        (__v8df)(__m512d)(B), \
1128                                        (__v8df)_mm512_setzero_pd(), \
1129                                        (__mmask8)(U), (int)(R)); })
1130
1131#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1132  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1133                                        (__v8df)(__m512d)(B), \
1134                                        (__v8df)_mm512_undefined_pd(), \
1135                                        (__mmask8)-1, (int)(R)); })
1136
1137static  __inline__ __m512d __DEFAULT_FN_ATTRS
1138_mm512_min_pd(__m512d __A, __m512d __B)
1139{
1140  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1141             (__v8df) __B,
1142             (__v8df)
1143             _mm512_setzero_pd (),
1144             (__mmask8) -1,
1145             _MM_FROUND_CUR_DIRECTION);
1146}
1147
1148static __inline__ __m512d __DEFAULT_FN_ATTRS
1149_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1150{
1151  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1152                  (__v8df) __B,
1153                  (__v8df) __W,
1154                  (__mmask8) __U,
1155                  _MM_FROUND_CUR_DIRECTION);
1156}
1157
1158#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1159  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1160                                       (__v16sf)(__m512)(B), \
1161                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
1162                                       (int)(R)); })
1163
1164#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1165  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1166                                       (__v16sf)(__m512)(B), \
1167                                       (__v16sf)_mm512_setzero_ps(), \
1168                                       (__mmask16)(U), (int)(R)); })
1169
1170#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1171  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1172                                       (__v16sf)(__m512)(B), \
1173                                       (__v16sf)_mm512_undefined_ps(), \
1174                                       (__mmask16)-1, (int)(R)); })
1175
1176static __inline__ __m512d __DEFAULT_FN_ATTRS
1177_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1178{
1179  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1180                  (__v8df) __B,
1181                  (__v8df)
1182                  _mm512_setzero_pd (),
1183                  (__mmask8) __U,
1184                  _MM_FROUND_CUR_DIRECTION);
1185}
1186
1187static  __inline__ __m512 __DEFAULT_FN_ATTRS
1188_mm512_min_ps(__m512 __A, __m512 __B)
1189{
1190  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1191            (__v16sf) __B,
1192            (__v16sf)
1193            _mm512_setzero_ps (),
1194            (__mmask16) -1,
1195            _MM_FROUND_CUR_DIRECTION);
1196}
1197
1198static __inline__ __m512 __DEFAULT_FN_ATTRS
1199_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1200{
1201  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1202                 (__v16sf) __B,
1203                 (__v16sf) __W,
1204                 (__mmask16) __U,
1205                 _MM_FROUND_CUR_DIRECTION);
1206}
1207
1208static __inline__ __m512 __DEFAULT_FN_ATTRS
1209_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1210{
1211  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1212                 (__v16sf) __B,
1213                 (__v16sf)
1214                 _mm512_setzero_ps (),
1215                 (__mmask16) __U,
1216                 _MM_FROUND_CUR_DIRECTION);
1217}
1218
1219static __inline__ __m128 __DEFAULT_FN_ATTRS
1220_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1221  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1222                (__v4sf) __B,
1223                (__v4sf) __W,
1224                (__mmask8) __U,
1225                _MM_FROUND_CUR_DIRECTION);
1226}
1227
1228static __inline__ __m128 __DEFAULT_FN_ATTRS
1229_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1230  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1231                (__v4sf) __B,
1232                (__v4sf)  _mm_setzero_ps (),
1233                (__mmask8) __U,
1234                _MM_FROUND_CUR_DIRECTION);
1235}
1236
1237#define _mm_min_round_ss(A, B, R) __extension__ ({ \
1238  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1239                                          (__v4sf)(__m128)(B), \
1240                                          (__v4sf)_mm_setzero_ps(), \
1241                                          (__mmask8)-1, (int)(R)); })
1242
1243#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1244  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1245                                          (__v4sf)(__m128)(B), \
1246                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1247                                          (int)(R)); })
1248
1249#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1250  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1251                                          (__v4sf)(__m128)(B), \
1252                                          (__v4sf)_mm_setzero_ps(), \
1253                                          (__mmask8)(U), (int)(R)); })
1254
1255static __inline__ __m128d __DEFAULT_FN_ATTRS
1256_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1257  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1258                (__v2df) __B,
1259                (__v2df) __W,
1260                (__mmask8) __U,
1261                _MM_FROUND_CUR_DIRECTION);
1262}
1263
1264static __inline__ __m128d __DEFAULT_FN_ATTRS
1265_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1266  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1267                (__v2df) __B,
1268                (__v2df)  _mm_setzero_pd (),
1269                (__mmask8) __U,
1270                _MM_FROUND_CUR_DIRECTION);
1271}
1272
1273#define _mm_min_round_sd(A, B, R) __extension__ ({ \
1274  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1275                                           (__v2df)(__m128d)(B), \
1276                                           (__v2df)_mm_setzero_pd(), \
1277                                           (__mmask8)-1, (int)(R)); })
1278
1279#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1280  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1281                                           (__v2df)(__m128d)(B), \
1282                                           (__v2df)(__m128d)(W), \
1283                                           (__mmask8)(U), (int)(R)); })
1284
1285#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1286  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1287                                           (__v2df)(__m128d)(B), \
1288                                           (__v2df)_mm_setzero_pd(), \
1289                                           (__mmask8)(U), (int)(R)); })
1290
1291static __inline __m512i
1292__DEFAULT_FN_ATTRS
1293_mm512_min_epi32(__m512i __A, __m512i __B)
1294{
1295  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1296              (__v16si) __B,
1297              (__v16si)
1298              _mm512_setzero_si512 (),
1299              (__mmask16) -1);
1300}
1301
1302static __inline__ __m512i __DEFAULT_FN_ATTRS
1303_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1304{
1305  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1306                   (__v16si) __B,
1307                   (__v16si) __W, __M);
1308}
1309
1310static __inline__ __m512i __DEFAULT_FN_ATTRS
1311_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1312{
1313  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1314                   (__v16si) __B,
1315                   (__v16si)
1316                   _mm512_setzero_si512 (),
1317                   __M);
1318}
1319
1320static __inline __m512i __DEFAULT_FN_ATTRS
1321_mm512_min_epu32(__m512i __A, __m512i __B)
1322{
1323  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1324              (__v16si) __B,
1325              (__v16si)
1326              _mm512_setzero_si512 (),
1327              (__mmask16) -1);
1328}
1329
1330static __inline__ __m512i __DEFAULT_FN_ATTRS
1331_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1332{
1333  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1334                   (__v16si) __B,
1335                   (__v16si) __W, __M);
1336}
1337
1338static __inline__ __m512i __DEFAULT_FN_ATTRS
1339_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1340{
1341  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1342                   (__v16si) __B,
1343                   (__v16si)
1344                   _mm512_setzero_si512 (),
1345                   __M);
1346}
1347
1348static __inline __m512i __DEFAULT_FN_ATTRS
1349_mm512_min_epi64(__m512i __A, __m512i __B)
1350{
1351  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1352              (__v8di) __B,
1353              (__v8di)
1354              _mm512_setzero_si512 (),
1355              (__mmask8) -1);
1356}
1357
1358static __inline__ __m512i __DEFAULT_FN_ATTRS
1359_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1360{
1361  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1362                   (__v8di) __B,
1363                   (__v8di) __W, __M);
1364}
1365
1366static __inline__ __m512i __DEFAULT_FN_ATTRS
1367_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1368{
1369  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1370                   (__v8di) __B,
1371                   (__v8di)
1372                   _mm512_setzero_si512 (),
1373                   __M);
1374}
1375
1376static __inline __m512i __DEFAULT_FN_ATTRS
1377_mm512_min_epu64(__m512i __A, __m512i __B)
1378{
1379  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1380              (__v8di) __B,
1381              (__v8di)
1382              _mm512_setzero_si512 (),
1383              (__mmask8) -1);
1384}
1385
1386static __inline__ __m512i __DEFAULT_FN_ATTRS
1387_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1388{
1389  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1390                   (__v8di) __B,
1391                   (__v8di) __W, __M);
1392}
1393
1394static __inline__ __m512i __DEFAULT_FN_ATTRS
1395_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1396{
1397  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1398                   (__v8di) __B,
1399                   (__v8di)
1400                   _mm512_setzero_si512 (),
1401                   __M);
1402}
1403
1404static __inline __m512i __DEFAULT_FN_ATTRS
1405_mm512_mul_epi32(__m512i __X, __m512i __Y)
1406{
1407  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
1408              (__v16si) __Y,
1409              (__v8di)
1410              _mm512_setzero_si512 (),
1411              (__mmask8) -1);
1412}
1413
1414static __inline __m512i __DEFAULT_FN_ATTRS
1415_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1416{
1417  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
1418              (__v16si) __Y,
1419              (__v8di) __W, __M);
1420}
1421
1422static __inline __m512i __DEFAULT_FN_ATTRS
1423_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
1424{
1425  return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
1426              (__v16si) __Y,
1427              (__v8di)
1428              _mm512_setzero_si512 (),
1429              __M);
1430}
1431
1432static __inline __m512i __DEFAULT_FN_ATTRS
1433_mm512_mul_epu32(__m512i __X, __m512i __Y)
1434{
1435  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1436               (__v16si) __Y,
1437               (__v8di)
1438               _mm512_setzero_si512 (),
1439               (__mmask8) -1);
1440}
1441
1442static __inline __m512i __DEFAULT_FN_ATTRS
1443_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1444{
1445  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1446               (__v16si) __Y,
1447               (__v8di) __W, __M);
1448}
1449
1450static __inline __m512i __DEFAULT_FN_ATTRS
1451_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
1452{
1453  return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
1454               (__v16si) __Y,
1455               (__v8di)
1456               _mm512_setzero_si512 (),
1457               __M);
1458}
1459
1460static __inline __m512i __DEFAULT_FN_ATTRS
1461_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1462{
1463  return (__m512i) ((__v16su) __A * (__v16su) __B);
1464}
1465
1466static __inline __m512i __DEFAULT_FN_ATTRS
1467_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1468{
1469  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1470                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1471                                             (__v16si)_mm512_setzero_si512());
1472}
1473
1474static __inline __m512i __DEFAULT_FN_ATTRS
1475_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1476{
1477  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1478                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1479                                             (__v16si)__W);
1480}
1481
1482#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1483  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1484                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1485                                         (int)(R)); })
1486
1487#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1488  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1489                                         (__v8df)_mm512_setzero_pd(), \
1490                                         (__mmask8)(U), (int)(R)); })
1491
1492#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1493  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1494                                         (__v8df)_mm512_undefined_pd(), \
1495                                         (__mmask8)-1, (int)(R)); })
1496
1497static  __inline__ __m512d __DEFAULT_FN_ATTRS
1498_mm512_sqrt_pd(__m512d __a)
1499{
1500  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1501                                                (__v8df) _mm512_setzero_pd (),
1502                                                (__mmask8) -1,
1503                                                _MM_FROUND_CUR_DIRECTION);
1504}
1505
1506static __inline__ __m512d __DEFAULT_FN_ATTRS
1507_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1508{
1509  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1510                   (__v8df) __W,
1511                   (__mmask8) __U,
1512                   _MM_FROUND_CUR_DIRECTION);
1513}
1514
1515static __inline__ __m512d __DEFAULT_FN_ATTRS
1516_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517{
1518  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1519                   (__v8df)
1520                   _mm512_setzero_pd (),
1521                   (__mmask8) __U,
1522                   _MM_FROUND_CUR_DIRECTION);
1523}
1524
1525#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1526  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1527                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1528                                        (int)(R)); })
1529
1530#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1531  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1532                                        (__v16sf)_mm512_setzero_ps(), \
1533                                        (__mmask16)(U), (int)(R)); })
1534
1535#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1536  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1537                                        (__v16sf)_mm512_undefined_ps(), \
1538                                        (__mmask16)-1, (int)(R)); })
1539
1540static  __inline__ __m512 __DEFAULT_FN_ATTRS
1541_mm512_sqrt_ps(__m512 __a)
1542{
1543  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1544                                               (__v16sf) _mm512_setzero_ps (),
1545                                               (__mmask16) -1,
1546                                               _MM_FROUND_CUR_DIRECTION);
1547}
1548
1549static  __inline__ __m512 __DEFAULT_FN_ATTRS
1550_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1551{
1552  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1553                                               (__v16sf) __W,
1554                                               (__mmask16) __U,
1555                                               _MM_FROUND_CUR_DIRECTION);
1556}
1557
1558static  __inline__ __m512 __DEFAULT_FN_ATTRS
1559_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1560{
1561  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1562                                               (__v16sf) _mm512_setzero_ps (),
1563                                               (__mmask16) __U,
1564                                               _MM_FROUND_CUR_DIRECTION);
1565}
1566
1567static  __inline__ __m512d __DEFAULT_FN_ATTRS
1568_mm512_rsqrt14_pd(__m512d __A)
1569{
1570  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571                 (__v8df)
1572                 _mm512_setzero_pd (),
1573                 (__mmask8) -1);}
1574
1575static __inline__ __m512d __DEFAULT_FN_ATTRS
1576_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1577{
1578  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579                  (__v8df) __W,
1580                  (__mmask8) __U);
1581}
1582
1583static __inline__ __m512d __DEFAULT_FN_ATTRS
1584_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1585{
1586  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1587                  (__v8df)
1588                  _mm512_setzero_pd (),
1589                  (__mmask8) __U);
1590}
1591
1592static  __inline__ __m512 __DEFAULT_FN_ATTRS
1593_mm512_rsqrt14_ps(__m512 __A)
1594{
1595  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1596                (__v16sf)
1597                _mm512_setzero_ps (),
1598                (__mmask16) -1);
1599}
1600
1601static __inline__ __m512 __DEFAULT_FN_ATTRS
1602_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1603{
1604  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605                 (__v16sf) __W,
1606                 (__mmask16) __U);
1607}
1608
1609static __inline__ __m512 __DEFAULT_FN_ATTRS
1610_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1611{
1612  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1613                 (__v16sf)
1614                 _mm512_setzero_ps (),
1615                 (__mmask16) __U);
1616}
1617
1618static  __inline__ __m128 __DEFAULT_FN_ATTRS
1619_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1620{
1621  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1622             (__v4sf) __B,
1623             (__v4sf)
1624             _mm_setzero_ps (),
1625             (__mmask8) -1);
1626}
1627
1628static __inline__ __m128 __DEFAULT_FN_ATTRS
1629_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1630{
1631 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632          (__v4sf) __B,
1633          (__v4sf) __W,
1634          (__mmask8) __U);
1635}
1636
1637static __inline__ __m128 __DEFAULT_FN_ATTRS
1638_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1639{
1640 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1641          (__v4sf) __B,
1642          (__v4sf) _mm_setzero_ps (),
1643          (__mmask8) __U);
1644}
1645
1646static  __inline__ __m128d __DEFAULT_FN_ATTRS
1647_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1648{
1649  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1650              (__v2df) __B,
1651              (__v2df)
1652              _mm_setzero_pd (),
1653              (__mmask8) -1);
1654}
1655
1656static __inline__ __m128d __DEFAULT_FN_ATTRS
1657_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1658{
1659 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1660          (__v2df) __B,
1661          (__v2df) __W,
1662          (__mmask8) __U);
1663}
1664
1665static __inline__ __m128d __DEFAULT_FN_ATTRS
1666_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1667{
1668 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1669          (__v2df) __B,
1670          (__v2df) _mm_setzero_pd (),
1671          (__mmask8) __U);
1672}
1673
1674static  __inline__ __m512d __DEFAULT_FN_ATTRS
1675_mm512_rcp14_pd(__m512d __A)
1676{
1677  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1678               (__v8df)
1679               _mm512_setzero_pd (),
1680               (__mmask8) -1);
1681}
1682
1683static __inline__ __m512d __DEFAULT_FN_ATTRS
1684_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1685{
1686  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687                (__v8df) __W,
1688                (__mmask8) __U);
1689}
1690
1691static __inline__ __m512d __DEFAULT_FN_ATTRS
1692_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1693{
1694  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1695                (__v8df)
1696                _mm512_setzero_pd (),
1697                (__mmask8) __U);
1698}
1699
1700static  __inline__ __m512 __DEFAULT_FN_ATTRS
1701_mm512_rcp14_ps(__m512 __A)
1702{
1703  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1704              (__v16sf)
1705              _mm512_setzero_ps (),
1706              (__mmask16) -1);
1707}
1708
1709static __inline__ __m512 __DEFAULT_FN_ATTRS
1710_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1711{
1712  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713                   (__v16sf) __W,
1714                   (__mmask16) __U);
1715}
1716
1717static __inline__ __m512 __DEFAULT_FN_ATTRS
1718_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1719{
1720  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1721                   (__v16sf)
1722                   _mm512_setzero_ps (),
1723                   (__mmask16) __U);
1724}
1725
1726static  __inline__ __m128 __DEFAULT_FN_ATTRS
1727_mm_rcp14_ss(__m128 __A, __m128 __B)
1728{
1729  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1730                 (__v4sf) __B,
1731                 (__v4sf)
1732                 _mm_setzero_ps (),
1733                 (__mmask8) -1);
1734}
1735
1736static __inline__ __m128 __DEFAULT_FN_ATTRS
1737_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1738{
1739 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740          (__v4sf) __B,
1741          (__v4sf) __W,
1742          (__mmask8) __U);
1743}
1744
1745static __inline__ __m128 __DEFAULT_FN_ATTRS
1746_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1747{
1748 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1749          (__v4sf) __B,
1750          (__v4sf) _mm_setzero_ps (),
1751          (__mmask8) __U);
1752}
1753
1754static  __inline__ __m128d __DEFAULT_FN_ATTRS
1755_mm_rcp14_sd(__m128d __A, __m128d __B)
1756{
1757  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1758            (__v2df) __B,
1759            (__v2df)
1760            _mm_setzero_pd (),
1761            (__mmask8) -1);
1762}
1763
1764static __inline__ __m128d __DEFAULT_FN_ATTRS
1765_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1766{
1767 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1768          (__v2df) __B,
1769          (__v2df) __W,
1770          (__mmask8) __U);
1771}
1772
1773static __inline__ __m128d __DEFAULT_FN_ATTRS
1774_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1775{
1776 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1777          (__v2df) __B,
1778          (__v2df) _mm_setzero_pd (),
1779          (__mmask8) __U);
1780}
1781
1782static __inline __m512 __DEFAULT_FN_ATTRS
1783_mm512_floor_ps(__m512 __A)
1784{
1785  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1786                                                  _MM_FROUND_FLOOR,
1787                                                  (__v16sf) __A, -1,
1788                                                  _MM_FROUND_CUR_DIRECTION);
1789}
1790
1791static __inline__ __m512 __DEFAULT_FN_ATTRS
1792_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1793{
1794  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1795                   _MM_FROUND_FLOOR,
1796                   (__v16sf) __W, __U,
1797                   _MM_FROUND_CUR_DIRECTION);
1798}
1799
1800static __inline __m512d __DEFAULT_FN_ATTRS
1801_mm512_floor_pd(__m512d __A)
1802{
1803  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1804                                                   _MM_FROUND_FLOOR,
1805                                                   (__v8df) __A, -1,
1806                                                   _MM_FROUND_CUR_DIRECTION);
1807}
1808
1809static __inline__ __m512d __DEFAULT_FN_ATTRS
1810_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1811{
1812  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1813                _MM_FROUND_FLOOR,
1814                (__v8df) __W, __U,
1815                _MM_FROUND_CUR_DIRECTION);
1816}
1817
1818static __inline__ __m512 __DEFAULT_FN_ATTRS
1819_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1820{
1821  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1822                   _MM_FROUND_CEIL,
1823                   (__v16sf) __W, __U,
1824                   _MM_FROUND_CUR_DIRECTION);
1825}
1826
1827static __inline __m512 __DEFAULT_FN_ATTRS
1828_mm512_ceil_ps(__m512 __A)
1829{
1830  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1831                                                  _MM_FROUND_CEIL,
1832                                                  (__v16sf) __A, -1,
1833                                                  _MM_FROUND_CUR_DIRECTION);
1834}
1835
1836static __inline __m512d __DEFAULT_FN_ATTRS
1837_mm512_ceil_pd(__m512d __A)
1838{
1839  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1840                                                   _MM_FROUND_CEIL,
1841                                                   (__v8df) __A, -1,
1842                                                   _MM_FROUND_CUR_DIRECTION);
1843}
1844
1845static __inline__ __m512d __DEFAULT_FN_ATTRS
1846_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1847{
1848  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1849                _MM_FROUND_CEIL,
1850                (__v8df) __W, __U,
1851                _MM_FROUND_CUR_DIRECTION);
1852}
1853
1854static __inline __m512i __DEFAULT_FN_ATTRS
1855_mm512_abs_epi64(__m512i __A)
1856{
1857  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1858             (__v8di)
1859             _mm512_setzero_si512 (),
1860             (__mmask8) -1);
1861}
1862
1863static __inline__ __m512i __DEFAULT_FN_ATTRS
1864_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1865{
1866  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1867                  (__v8di) __W,
1868                  (__mmask8) __U);
1869}
1870
1871static __inline__ __m512i __DEFAULT_FN_ATTRS
1872_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1873{
1874  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1875                  (__v8di)
1876                  _mm512_setzero_si512 (),
1877                  (__mmask8) __U);
1878}
1879
1880static __inline __m512i __DEFAULT_FN_ATTRS
1881_mm512_abs_epi32(__m512i __A)
1882{
1883  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1884             (__v16si)
1885             _mm512_setzero_si512 (),
1886             (__mmask16) -1);
1887}
1888
1889static __inline__ __m512i __DEFAULT_FN_ATTRS
1890_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1891{
1892  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1893                  (__v16si) __W,
1894                  (__mmask16) __U);
1895}
1896
1897static __inline__ __m512i __DEFAULT_FN_ATTRS
1898_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1899{
1900  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1901                  (__v16si)
1902                  _mm512_setzero_si512 (),
1903                  (__mmask16) __U);
1904}
1905
1906static __inline__ __m128 __DEFAULT_FN_ATTRS
1907_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1908  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1909                (__v4sf) __B,
1910                (__v4sf) __W,
1911                (__mmask8) __U,
1912                _MM_FROUND_CUR_DIRECTION);
1913}
1914
1915static __inline__ __m128 __DEFAULT_FN_ATTRS
1916_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1917  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
1918                (__v4sf) __B,
1919                (__v4sf)  _mm_setzero_ps (),
1920                (__mmask8) __U,
1921                _MM_FROUND_CUR_DIRECTION);
1922}
1923
1924#define _mm_add_round_ss(A, B, R) __extension__ ({ \
1925  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1926                                          (__v4sf)(__m128)(B), \
1927                                          (__v4sf)_mm_setzero_ps(), \
1928                                          (__mmask8)-1, (int)(R)); })
1929
1930#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
1931  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1932                                          (__v4sf)(__m128)(B), \
1933                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1934                                          (int)(R)); })
1935
1936#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
1937  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1938                                          (__v4sf)(__m128)(B), \
1939                                          (__v4sf)_mm_setzero_ps(), \
1940                                          (__mmask8)(U), (int)(R)); })
1941
1942static __inline__ __m128d __DEFAULT_FN_ATTRS
1943_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1944  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1945                (__v2df) __B,
1946                (__v2df) __W,
1947                (__mmask8) __U,
1948                _MM_FROUND_CUR_DIRECTION);
1949}
1950
1951static __inline__ __m128d __DEFAULT_FN_ATTRS
1952_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1953  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
1954                (__v2df) __B,
1955                (__v2df)  _mm_setzero_pd (),
1956                (__mmask8) __U,
1957                _MM_FROUND_CUR_DIRECTION);
1958}
1959#define _mm_add_round_sd(A, B, R) __extension__ ({ \
1960  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1961                                           (__v2df)(__m128d)(B), \
1962                                           (__v2df)_mm_setzero_pd(), \
1963                                           (__mmask8)-1, (int)(R)); })
1964
1965#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
1966  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1967                                           (__v2df)(__m128d)(B), \
1968                                           (__v2df)(__m128d)(W), \
1969                                           (__mmask8)(U), (int)(R)); })
1970
1971#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
1972  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1973                                           (__v2df)(__m128d)(B), \
1974                                           (__v2df)_mm_setzero_pd(), \
1975                                           (__mmask8)(U), (int)(R)); })
1976
1977static __inline__ __m512d __DEFAULT_FN_ATTRS
1978_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1979  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1980                                              (__v8df)_mm512_add_pd(__A, __B),
1981                                              (__v8df)__W);
1982}
1983
1984static __inline__ __m512d __DEFAULT_FN_ATTRS
1985_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1986  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1987                                              (__v8df)_mm512_add_pd(__A, __B),
1988                                              (__v8df)_mm512_setzero_pd());
1989}
1990
1991static __inline__ __m512 __DEFAULT_FN_ATTRS
1992_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1993  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1994                                             (__v16sf)_mm512_add_ps(__A, __B),
1995                                             (__v16sf)__W);
1996}
1997
1998static __inline__ __m512 __DEFAULT_FN_ATTRS
1999_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2000  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2001                                             (__v16sf)_mm512_add_ps(__A, __B),
2002                                             (__v16sf)_mm512_setzero_ps());
2003}
2004
2005#define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2006  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2007                                        (__v8df)(__m512d)(B), \
2008                                        (__v8df)_mm512_setzero_pd(), \
2009                                        (__mmask8)-1, (int)(R)); })
2010
2011#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2012  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2013                                        (__v8df)(__m512d)(B), \
2014                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2015                                        (int)(R)); })
2016
2017#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2018  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2019                                        (__v8df)(__m512d)(B), \
2020                                        (__v8df)_mm512_setzero_pd(), \
2021                                        (__mmask8)(U), (int)(R)); })
2022
2023#define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2024  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2025                                       (__v16sf)(__m512)(B), \
2026                                       (__v16sf)_mm512_setzero_ps(), \
2027                                       (__mmask16)-1, (int)(R)); })
2028
2029#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2030  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2031                                       (__v16sf)(__m512)(B), \
2032                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2033                                       (int)(R)); })
2034
2035#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2036  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2037                                       (__v16sf)(__m512)(B), \
2038                                       (__v16sf)_mm512_setzero_ps(), \
2039                                       (__mmask16)(U), (int)(R)); })
2040
2041static __inline__ __m128 __DEFAULT_FN_ATTRS
2042_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2043  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2044                (__v4sf) __B,
2045                (__v4sf) __W,
2046                (__mmask8) __U,
2047                _MM_FROUND_CUR_DIRECTION);
2048}
2049
2050static __inline__ __m128 __DEFAULT_FN_ATTRS
2051_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2052  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2053                (__v4sf) __B,
2054                (__v4sf)  _mm_setzero_ps (),
2055                (__mmask8) __U,
2056                _MM_FROUND_CUR_DIRECTION);
2057}
2058#define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2059  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2060                                          (__v4sf)(__m128)(B), \
2061                                          (__v4sf)_mm_setzero_ps(), \
2062                                          (__mmask8)-1, (int)(R)); })
2063
2064#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2065  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2066                                          (__v4sf)(__m128)(B), \
2067                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2068                                          (int)(R)); })
2069
2070#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2071  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2072                                          (__v4sf)(__m128)(B), \
2073                                          (__v4sf)_mm_setzero_ps(), \
2074                                          (__mmask8)(U), (int)(R)); })
2075
2076static __inline__ __m128d __DEFAULT_FN_ATTRS
2077_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2078  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2079                (__v2df) __B,
2080                (__v2df) __W,
2081                (__mmask8) __U,
2082                _MM_FROUND_CUR_DIRECTION);
2083}
2084
2085static __inline__ __m128d __DEFAULT_FN_ATTRS
2086_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2087  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2088                (__v2df) __B,
2089                (__v2df)  _mm_setzero_pd (),
2090                (__mmask8) __U,
2091                _MM_FROUND_CUR_DIRECTION);
2092}
2093
2094#define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2095  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2096                                           (__v2df)(__m128d)(B), \
2097                                           (__v2df)_mm_setzero_pd(), \
2098                                           (__mmask8)-1, (int)(R)); })
2099
2100#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2101  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2102                                           (__v2df)(__m128d)(B), \
2103                                           (__v2df)(__m128d)(W), \
2104                                           (__mmask8)(U), (int)(R)); })
2105
2106#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2107  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2108                                           (__v2df)(__m128d)(B), \
2109                                           (__v2df)_mm_setzero_pd(), \
2110                                           (__mmask8)(U), (int)(R)); })
2111
2112static __inline__ __m512d __DEFAULT_FN_ATTRS
2113_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2114  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2115                                              (__v8df)_mm512_sub_pd(__A, __B),
2116                                              (__v8df)__W);
2117}
2118
2119static __inline__ __m512d __DEFAULT_FN_ATTRS
2120_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2121  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2122                                              (__v8df)_mm512_sub_pd(__A, __B),
2123                                              (__v8df)_mm512_setzero_pd());
2124}
2125
2126static __inline__ __m512 __DEFAULT_FN_ATTRS
2127_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2128  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2129                                             (__v16sf)_mm512_sub_ps(__A, __B),
2130                                             (__v16sf)__W);
2131}
2132
2133static __inline__ __m512 __DEFAULT_FN_ATTRS
2134_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2135  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2136                                             (__v16sf)_mm512_sub_ps(__A, __B),
2137                                             (__v16sf)_mm512_setzero_ps());
2138}
2139
2140#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2141  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2142                                        (__v8df)(__m512d)(B), \
2143                                        (__v8df)_mm512_setzero_pd(), \
2144                                        (__mmask8)-1, (int)(R)); })
2145
2146#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2147  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2148                                        (__v8df)(__m512d)(B), \
2149                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2150                                        (int)(R)); })
2151
2152#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2153  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2154                                        (__v8df)(__m512d)(B), \
2155                                        (__v8df)_mm512_setzero_pd(), \
2156                                        (__mmask8)(U), (int)(R)); })
2157
2158#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2159  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2160                                       (__v16sf)(__m512)(B), \
2161                                       (__v16sf)_mm512_setzero_ps(), \
2162                                       (__mmask16)-1, (int)(R)); })
2163
2164#define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2165  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2166                                       (__v16sf)(__m512)(B), \
2167                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2168                                       (int)(R)); });
2169
2170#define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2171  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2172                                       (__v16sf)(__m512)(B), \
2173                                       (__v16sf)_mm512_setzero_ps(), \
2174                                       (__mmask16)(U), (int)(R)); });
2175
2176static __inline__ __m128 __DEFAULT_FN_ATTRS
2177_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2178  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2179                (__v4sf) __B,
2180                (__v4sf) __W,
2181                (__mmask8) __U,
2182                _MM_FROUND_CUR_DIRECTION);
2183}
2184
2185static __inline__ __m128 __DEFAULT_FN_ATTRS
2186_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2187  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2188                (__v4sf) __B,
2189                (__v4sf)  _mm_setzero_ps (),
2190                (__mmask8) __U,
2191                _MM_FROUND_CUR_DIRECTION);
2192}
2193#define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2194  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2195                                          (__v4sf)(__m128)(B), \
2196                                          (__v4sf)_mm_setzero_ps(), \
2197                                          (__mmask8)-1, (int)(R)); })
2198
2199#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2200  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2201                                          (__v4sf)(__m128)(B), \
2202                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2203                                          (int)(R)); })
2204
2205#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2206  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2207                                          (__v4sf)(__m128)(B), \
2208                                          (__v4sf)_mm_setzero_ps(), \
2209                                          (__mmask8)(U), (int)(R)); })
2210
2211static __inline__ __m128d __DEFAULT_FN_ATTRS
2212_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2213  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2214                (__v2df) __B,
2215                (__v2df) __W,
2216                (__mmask8) __U,
2217                _MM_FROUND_CUR_DIRECTION);
2218}
2219
2220static __inline__ __m128d __DEFAULT_FN_ATTRS
2221_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2222  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2223                (__v2df) __B,
2224                (__v2df)  _mm_setzero_pd (),
2225                (__mmask8) __U,
2226                _MM_FROUND_CUR_DIRECTION);
2227}
2228
2229#define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2230  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2231                                           (__v2df)(__m128d)(B), \
2232                                           (__v2df)_mm_setzero_pd(), \
2233                                           (__mmask8)-1, (int)(R)); })
2234
2235#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2236  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2237                                           (__v2df)(__m128d)(B), \
2238                                           (__v2df)(__m128d)(W), \
2239                                           (__mmask8)(U), (int)(R)); })
2240
2241#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2242  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2243                                           (__v2df)(__m128d)(B), \
2244                                           (__v2df)_mm_setzero_pd(), \
2245                                           (__mmask8)(U), (int)(R)); })
2246
2247static __inline__ __m512d __DEFAULT_FN_ATTRS
2248_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2249  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2250                                              (__v8df)_mm512_mul_pd(__A, __B),
2251                                              (__v8df)__W);
2252}
2253
2254static __inline__ __m512d __DEFAULT_FN_ATTRS
2255_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2256  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2257                                              (__v8df)_mm512_mul_pd(__A, __B),
2258                                              (__v8df)_mm512_setzero_pd());
2259}
2260
2261static __inline__ __m512 __DEFAULT_FN_ATTRS
2262_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2263  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2264                                             (__v16sf)_mm512_mul_ps(__A, __B),
2265                                             (__v16sf)__W);
2266}
2267
2268static __inline__ __m512 __DEFAULT_FN_ATTRS
2269_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2270  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2271                                             (__v16sf)_mm512_mul_ps(__A, __B),
2272                                             (__v16sf)_mm512_setzero_ps());
2273}
2274
2275#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2276  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2277                                        (__v8df)(__m512d)(B), \
2278                                        (__v8df)_mm512_setzero_pd(), \
2279                                        (__mmask8)-1, (int)(R)); })
2280
2281#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2282  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2283                                        (__v8df)(__m512d)(B), \
2284                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2285                                        (int)(R)); })
2286
2287#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2288  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2289                                        (__v8df)(__m512d)(B), \
2290                                        (__v8df)_mm512_setzero_pd(), \
2291                                        (__mmask8)(U), (int)(R)); })
2292
2293#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2294  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2295                                       (__v16sf)(__m512)(B), \
2296                                       (__v16sf)_mm512_setzero_ps(), \
2297                                       (__mmask16)-1, (int)(R)); })
2298
2299#define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2300  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2301                                       (__v16sf)(__m512)(B), \
2302                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2303                                       (int)(R)); });
2304
2305#define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2306  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2307                                       (__v16sf)(__m512)(B), \
2308                                       (__v16sf)_mm512_setzero_ps(), \
2309                                       (__mmask16)(U), (int)(R)); });
2310
2311static __inline__ __m128 __DEFAULT_FN_ATTRS
2312_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2313  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2314                (__v4sf) __B,
2315                (__v4sf) __W,
2316                (__mmask8) __U,
2317                _MM_FROUND_CUR_DIRECTION);
2318}
2319
2320static __inline__ __m128 __DEFAULT_FN_ATTRS
2321_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2322  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2323                (__v4sf) __B,
2324                (__v4sf)  _mm_setzero_ps (),
2325                (__mmask8) __U,
2326                _MM_FROUND_CUR_DIRECTION);
2327}
2328
2329#define _mm_div_round_ss(A, B, R) __extension__ ({ \
2330  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2331                                          (__v4sf)(__m128)(B), \
2332                                          (__v4sf)_mm_setzero_ps(), \
2333                                          (__mmask8)-1, (int)(R)); })
2334
2335#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2336  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2337                                          (__v4sf)(__m128)(B), \
2338                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2339                                          (int)(R)); })
2340
2341#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2342  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2343                                          (__v4sf)(__m128)(B), \
2344                                          (__v4sf)_mm_setzero_ps(), \
2345                                          (__mmask8)(U), (int)(R)); })
2346
2347static __inline__ __m128d __DEFAULT_FN_ATTRS
2348_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2349  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2350                (__v2df) __B,
2351                (__v2df) __W,
2352                (__mmask8) __U,
2353                _MM_FROUND_CUR_DIRECTION);
2354}
2355
2356static __inline__ __m128d __DEFAULT_FN_ATTRS
2357_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2358  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2359                (__v2df) __B,
2360                (__v2df)  _mm_setzero_pd (),
2361                (__mmask8) __U,
2362                _MM_FROUND_CUR_DIRECTION);
2363}
2364
2365#define _mm_div_round_sd(A, B, R) __extension__ ({ \
2366  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2367                                           (__v2df)(__m128d)(B), \
2368                                           (__v2df)_mm_setzero_pd(), \
2369                                           (__mmask8)-1, (int)(R)); })
2370
2371#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2372  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2373                                           (__v2df)(__m128d)(B), \
2374                                           (__v2df)(__m128d)(W), \
2375                                           (__mmask8)(U), (int)(R)); })
2376
2377#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2378  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2379                                           (__v2df)(__m128d)(B), \
2380                                           (__v2df)_mm_setzero_pd(), \
2381                                           (__mmask8)(U), (int)(R)); })
2382
2383static __inline __m512d __DEFAULT_FN_ATTRS
2384_mm512_div_pd(__m512d __a, __m512d __b)
2385{
2386  return (__m512d)((__v8df)__a/(__v8df)__b);
2387}
2388
2389static __inline__ __m512d __DEFAULT_FN_ATTRS
2390_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2391  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2392                                              (__v8df)_mm512_div_pd(__A, __B),
2393                                              (__v8df)__W);
2394}
2395
2396static __inline__ __m512d __DEFAULT_FN_ATTRS
2397_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2398  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2399                                              (__v8df)_mm512_div_pd(__A, __B),
2400                                              (__v8df)_mm512_setzero_pd());
2401}
2402
2403static __inline __m512 __DEFAULT_FN_ATTRS
2404_mm512_div_ps(__m512 __a, __m512 __b)
2405{
2406  return (__m512)((__v16sf)__a/(__v16sf)__b);
2407}
2408
2409static __inline__ __m512 __DEFAULT_FN_ATTRS
2410_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2411  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2412                                             (__v16sf)_mm512_div_ps(__A, __B),
2413                                             (__v16sf)__W);
2414}
2415
2416static __inline__ __m512 __DEFAULT_FN_ATTRS
2417_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2418  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2419                                             (__v16sf)_mm512_div_ps(__A, __B),
2420                                             (__v16sf)_mm512_setzero_ps());
2421}
2422
2423#define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2424  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2425                                        (__v8df)(__m512d)(B), \
2426                                        (__v8df)_mm512_setzero_pd(), \
2427                                        (__mmask8)-1, (int)(R)); })
2428
2429#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2430  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2431                                        (__v8df)(__m512d)(B), \
2432                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2433                                        (int)(R)); })
2434
2435#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2436  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2437                                        (__v8df)(__m512d)(B), \
2438                                        (__v8df)_mm512_setzero_pd(), \
2439                                        (__mmask8)(U), (int)(R)); })
2440
2441#define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2442  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2443                                       (__v16sf)(__m512)(B), \
2444                                       (__v16sf)_mm512_setzero_ps(), \
2445                                       (__mmask16)-1, (int)(R)); })
2446
2447#define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2448  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2449                                       (__v16sf)(__m512)(B), \
2450                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2451                                       (int)(R)); });
2452
2453#define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2454  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2455                                       (__v16sf)(__m512)(B), \
2456                                       (__v16sf)_mm512_setzero_ps(), \
2457                                       (__mmask16)(U), (int)(R)); });
2458
2459#define _mm512_roundscale_ps(A, B) __extension__ ({ \
2460  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2461                                         (__v16sf)(__m512)(A), (__mmask16)-1, \
2462                                         _MM_FROUND_CUR_DIRECTION); })
2463
2464#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2465  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2466                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2467                                         _MM_FROUND_CUR_DIRECTION); })
2468
2469#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2470  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2471                                         (__v16sf)_mm512_setzero_ps(), \
2472                                         (__mmask16)(A), \
2473                                         _MM_FROUND_CUR_DIRECTION); })
2474
2475#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2476  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2477                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2478                                         (int)(R)); })
2479
2480#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2481  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2482                                         (__v16sf)_mm512_setzero_ps(), \
2483                                         (__mmask16)(A), (int)(R)); })
2484
2485#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2486  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2487                                         (__v16sf)_mm512_undefined_ps(), \
2488                                         (__mmask16)-1, (int)(R)); })
2489
2490#define _mm512_roundscale_pd(A, B) __extension__ ({ \
2491  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2492                                          (__v8df)(__m512d)(A), (__mmask8)-1, \
2493                                          _MM_FROUND_CUR_DIRECTION); })
2494
2495#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2496  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2497                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2498                                          _MM_FROUND_CUR_DIRECTION); })
2499
2500#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2501  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2502                                          (__v8df)_mm512_setzero_pd(), \
2503                                          (__mmask8)(A), \
2504                                          _MM_FROUND_CUR_DIRECTION); })
2505
2506#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2507  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2508                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2509                                          (int)(R)); })
2510
2511#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2512  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2513                                          (__v8df)_mm512_setzero_pd(), \
2514                                          (__mmask8)(A), (int)(R)); })
2515
2516#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2517  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2518                                          (__v8df)_mm512_undefined_pd(), \
2519                                          (__mmask8)-1, (int)(R)); })
2520
2521#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2522  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2523                                           (__v8df)(__m512d)(B), \
2524                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
2525                                           (int)(R)); })
2526
2527
2528#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2529  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2530                                           (__v8df)(__m512d)(B), \
2531                                           (__v8df)(__m512d)(C), \
2532                                           (__mmask8)(U), (int)(R)); })
2533
2534
2535#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2536  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2537                                            (__v8df)(__m512d)(B), \
2538                                            (__v8df)(__m512d)(C), \
2539                                            (__mmask8)(U), (int)(R)); })
2540
2541
2542#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2543  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2544                                            (__v8df)(__m512d)(B), \
2545                                            (__v8df)(__m512d)(C), \
2546                                            (__mmask8)(U), (int)(R)); })
2547
2548
2549#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2550  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2551                                           (__v8df)(__m512d)(B), \
2552                                           -(__v8df)(__m512d)(C), \
2553                                           (__mmask8)-1, (int)(R)); })
2554
2555
2556#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2557  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2558                                           (__v8df)(__m512d)(B), \
2559                                           -(__v8df)(__m512d)(C), \
2560                                           (__mmask8)(U), (int)(R)); })
2561
2562
2563#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2564  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2565                                            (__v8df)(__m512d)(B), \
2566                                            -(__v8df)(__m512d)(C), \
2567                                            (__mmask8)(U), (int)(R)); })
2568
2569
2570#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2571  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2572                                           (__v8df)(__m512d)(B), \
2573                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
2574                                           (int)(R)); })
2575
2576
2577#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2578  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2579                                            (__v8df)(__m512d)(B), \
2580                                            (__v8df)(__m512d)(C), \
2581                                            (__mmask8)(U), (int)(R)); })
2582
2583
2584#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2585  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2586                                            (__v8df)(__m512d)(B), \
2587                                            (__v8df)(__m512d)(C), \
2588                                            (__mmask8)(U), (int)(R)); })
2589
2590
2591#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2592  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2593                                           (__v8df)(__m512d)(B), \
2594                                           -(__v8df)(__m512d)(C), \
2595                                           (__mmask8)-1, (int)(R)); })
2596
2597
2598#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2599  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2600                                            (__v8df)(__m512d)(B), \
2601                                            -(__v8df)(__m512d)(C), \
2602                                            (__mmask8)(U), (int)(R)); })
2603
2604
2605static __inline__ __m512d __DEFAULT_FN_ATTRS
2606_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2607{
2608  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2609                                                    (__v8df) __B,
2610                                                    (__v8df) __C,
2611                                                    (__mmask8) -1,
2612                                                    _MM_FROUND_CUR_DIRECTION);
2613}
2614
2615static __inline__ __m512d __DEFAULT_FN_ATTRS
2616_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2617{
2618  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2619                                                    (__v8df) __B,
2620                                                    (__v8df) __C,
2621                                                    (__mmask8) __U,
2622                                                    _MM_FROUND_CUR_DIRECTION);
2623}
2624
2625static __inline__ __m512d __DEFAULT_FN_ATTRS
2626_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2627{
2628  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2629                                                     (__v8df) __B,
2630                                                     (__v8df) __C,
2631                                                     (__mmask8) __U,
2632                                                     _MM_FROUND_CUR_DIRECTION);
2633}
2634
2635static __inline__ __m512d __DEFAULT_FN_ATTRS
2636_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2637{
2638  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2639                                                     (__v8df) __B,
2640                                                     (__v8df) __C,
2641                                                     (__mmask8) __U,
2642                                                     _MM_FROUND_CUR_DIRECTION);
2643}
2644
2645static __inline__ __m512d __DEFAULT_FN_ATTRS
2646_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2647{
2648  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2649                                                    (__v8df) __B,
2650                                                    -(__v8df) __C,
2651                                                    (__mmask8) -1,
2652                                                    _MM_FROUND_CUR_DIRECTION);
2653}
2654
2655static __inline__ __m512d __DEFAULT_FN_ATTRS
2656_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2657{
2658  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2659                                                    (__v8df) __B,
2660                                                    -(__v8df) __C,
2661                                                    (__mmask8) __U,
2662                                                    _MM_FROUND_CUR_DIRECTION);
2663}
2664
2665static __inline__ __m512d __DEFAULT_FN_ATTRS
2666_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2667{
2668  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2669                                                     (__v8df) __B,
2670                                                     -(__v8df) __C,
2671                                                     (__mmask8) __U,
2672                                                     _MM_FROUND_CUR_DIRECTION);
2673}
2674
2675static __inline__ __m512d __DEFAULT_FN_ATTRS
2676_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2677{
2678  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2679                                                    (__v8df) __B,
2680                                                    (__v8df) __C,
2681                                                    (__mmask8) -1,
2682                                                    _MM_FROUND_CUR_DIRECTION);
2683}
2684
2685static __inline__ __m512d __DEFAULT_FN_ATTRS
2686_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2687{
2688  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2689                                                     (__v8df) __B,
2690                                                     (__v8df) __C,
2691                                                     (__mmask8) __U,
2692                                                     _MM_FROUND_CUR_DIRECTION);
2693}
2694
2695static __inline__ __m512d __DEFAULT_FN_ATTRS
2696_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2697{
2698  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2699                                                     (__v8df) __B,
2700                                                     (__v8df) __C,
2701                                                     (__mmask8) __U,
2702                                                     _MM_FROUND_CUR_DIRECTION);
2703}
2704
2705static __inline__ __m512d __DEFAULT_FN_ATTRS
2706_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2707{
2708  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2709                                                    (__v8df) __B,
2710                                                    -(__v8df) __C,
2711                                                    (__mmask8) -1,
2712                                                    _MM_FROUND_CUR_DIRECTION);
2713}
2714
2715static __inline__ __m512d __DEFAULT_FN_ATTRS
2716_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2717{
2718  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2719                                                     (__v8df) __B,
2720                                                     -(__v8df) __C,
2721                                                     (__mmask8) __U,
2722                                                     _MM_FROUND_CUR_DIRECTION);
2723}
2724
2725#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2726  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2727                                          (__v16sf)(__m512)(B), \
2728                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
2729                                          (int)(R)); })
2730
2731
2732#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2733  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2734                                          (__v16sf)(__m512)(B), \
2735                                          (__v16sf)(__m512)(C), \
2736                                          (__mmask16)(U), (int)(R)); })
2737
2738
2739#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2740  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2741                                           (__v16sf)(__m512)(B), \
2742                                           (__v16sf)(__m512)(C), \
2743                                           (__mmask16)(U), (int)(R)); })
2744
2745
2746#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2747  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2748                                           (__v16sf)(__m512)(B), \
2749                                           (__v16sf)(__m512)(C), \
2750                                           (__mmask16)(U), (int)(R)); })
2751
2752
2753#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2754  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2755                                          (__v16sf)(__m512)(B), \
2756                                          -(__v16sf)(__m512)(C), \
2757                                          (__mmask16)-1, (int)(R)); })
2758
2759
2760#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2761  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2762                                          (__v16sf)(__m512)(B), \
2763                                          -(__v16sf)(__m512)(C), \
2764                                          (__mmask16)(U), (int)(R)); })
2765
2766
2767#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2768  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2769                                           (__v16sf)(__m512)(B), \
2770                                           -(__v16sf)(__m512)(C), \
2771                                           (__mmask16)(U), (int)(R)); })
2772
2773
2774#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2775  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2776                                          (__v16sf)(__m512)(B), \
2777                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
2778                                          (int)(R)); })
2779
2780
2781#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2782  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2783                                           (__v16sf)(__m512)(B), \
2784                                           (__v16sf)(__m512)(C), \
2785                                           (__mmask16)(U), (int)(R)); })
2786
2787
2788#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2789  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2790                                           (__v16sf)(__m512)(B), \
2791                                           (__v16sf)(__m512)(C), \
2792                                           (__mmask16)(U), (int)(R)); })
2793
2794
2795#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2796  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2797                                          (__v16sf)(__m512)(B), \
2798                                          -(__v16sf)(__m512)(C), \
2799                                          (__mmask16)-1, (int)(R)); })
2800
2801
2802#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2803  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2804                                           (__v16sf)(__m512)(B), \
2805                                           -(__v16sf)(__m512)(C), \
2806                                           (__mmask16)(U), (int)(R)); })
2807
2808
2809static __inline__ __m512 __DEFAULT_FN_ATTRS
2810_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2811{
2812  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2813                                                   (__v16sf) __B,
2814                                                   (__v16sf) __C,
2815                                                   (__mmask16) -1,
2816                                                   _MM_FROUND_CUR_DIRECTION);
2817}
2818
2819static __inline__ __m512 __DEFAULT_FN_ATTRS
2820_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2821{
2822  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2823                                                   (__v16sf) __B,
2824                                                   (__v16sf) __C,
2825                                                   (__mmask16) __U,
2826                                                   _MM_FROUND_CUR_DIRECTION);
2827}
2828
2829static __inline__ __m512 __DEFAULT_FN_ATTRS
2830_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2831{
2832  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2833                                                    (__v16sf) __B,
2834                                                    (__v16sf) __C,
2835                                                    (__mmask16) __U,
2836                                                    _MM_FROUND_CUR_DIRECTION);
2837}
2838
2839static __inline__ __m512 __DEFAULT_FN_ATTRS
2840_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2841{
2842  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2843                                                    (__v16sf) __B,
2844                                                    (__v16sf) __C,
2845                                                    (__mmask16) __U,
2846                                                    _MM_FROUND_CUR_DIRECTION);
2847}
2848
2849static __inline__ __m512 __DEFAULT_FN_ATTRS
2850_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2851{
2852  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2853                                                   (__v16sf) __B,
2854                                                   -(__v16sf) __C,
2855                                                   (__mmask16) -1,
2856                                                   _MM_FROUND_CUR_DIRECTION);
2857}
2858
2859static __inline__ __m512 __DEFAULT_FN_ATTRS
2860_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2861{
2862  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2863                                                   (__v16sf) __B,
2864                                                   -(__v16sf) __C,
2865                                                   (__mmask16) __U,
2866                                                   _MM_FROUND_CUR_DIRECTION);
2867}
2868
2869static __inline__ __m512 __DEFAULT_FN_ATTRS
2870_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2871{
2872  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2873                                                    (__v16sf) __B,
2874                                                    -(__v16sf) __C,
2875                                                    (__mmask16) __U,
2876                                                    _MM_FROUND_CUR_DIRECTION);
2877}
2878
2879static __inline__ __m512 __DEFAULT_FN_ATTRS
2880_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2881{
2882  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2883                                                   (__v16sf) __B,
2884                                                   (__v16sf) __C,
2885                                                   (__mmask16) -1,
2886                                                   _MM_FROUND_CUR_DIRECTION);
2887}
2888
2889static __inline__ __m512 __DEFAULT_FN_ATTRS
2890_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2891{
2892  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2893                                                    (__v16sf) __B,
2894                                                    (__v16sf) __C,
2895                                                    (__mmask16) __U,
2896                                                    _MM_FROUND_CUR_DIRECTION);
2897}
2898
2899static __inline__ __m512 __DEFAULT_FN_ATTRS
2900_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2901{
2902  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2903                                                    (__v16sf) __B,
2904                                                    (__v16sf) __C,
2905                                                    (__mmask16) __U,
2906                                                    _MM_FROUND_CUR_DIRECTION);
2907}
2908
2909static __inline__ __m512 __DEFAULT_FN_ATTRS
2910_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2911{
2912  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2913                                                   (__v16sf) __B,
2914                                                   -(__v16sf) __C,
2915                                                   (__mmask16) -1,
2916                                                   _MM_FROUND_CUR_DIRECTION);
2917}
2918
2919static __inline__ __m512 __DEFAULT_FN_ATTRS
2920_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2921{
2922  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2923                                                    (__v16sf) __B,
2924                                                    -(__v16sf) __C,
2925                                                    (__mmask16) __U,
2926                                                    _MM_FROUND_CUR_DIRECTION);
2927}
2928
2929#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
2930  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2931                                              (__v8df)(__m512d)(B), \
2932                                              (__v8df)(__m512d)(C), \
2933                                              (__mmask8)-1, (int)(R)); })
2934
2935
2936#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
2937  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2938                                              (__v8df)(__m512d)(B), \
2939                                              (__v8df)(__m512d)(C), \
2940                                              (__mmask8)(U), (int)(R)); })
2941
2942
2943#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
2944  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2945                                               (__v8df)(__m512d)(B), \
2946                                               (__v8df)(__m512d)(C), \
2947                                               (__mmask8)(U), (int)(R)); })
2948
2949
2950#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
2951  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2952                                               (__v8df)(__m512d)(B), \
2953                                               (__v8df)(__m512d)(C), \
2954                                               (__mmask8)(U), (int)(R)); })
2955
2956
2957#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
2958  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2959                                              (__v8df)(__m512d)(B), \
2960                                              -(__v8df)(__m512d)(C), \
2961                                              (__mmask8)-1, (int)(R)); })
2962
2963
2964#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
2965  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2966                                              (__v8df)(__m512d)(B), \
2967                                              -(__v8df)(__m512d)(C), \
2968                                              (__mmask8)(U), (int)(R)); })
2969
2970
2971#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
2972  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2973                                               (__v8df)(__m512d)(B), \
2974                                               -(__v8df)(__m512d)(C), \
2975                                               (__mmask8)(U), (int)(R)); })
2976
2977
2978static __inline__ __m512d __DEFAULT_FN_ATTRS
2979_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2980{
2981  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2982                                                       (__v8df) __B,
2983                                                       (__v8df) __C,
2984                                                       (__mmask8) -1,
2985                                                       _MM_FROUND_CUR_DIRECTION);
2986}
2987
2988static __inline__ __m512d __DEFAULT_FN_ATTRS
2989_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2990{
2991  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2992                                                       (__v8df) __B,
2993                                                       (__v8df) __C,
2994                                                       (__mmask8) __U,
2995                                                       _MM_FROUND_CUR_DIRECTION);
2996}
2997
2998static __inline__ __m512d __DEFAULT_FN_ATTRS
2999_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3000{
3001  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3002                                                        (__v8df) __B,
3003                                                        (__v8df) __C,
3004                                                        (__mmask8) __U,
3005                                                        _MM_FROUND_CUR_DIRECTION);
3006}
3007
3008static __inline__ __m512d __DEFAULT_FN_ATTRS
3009_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3010{
3011  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3012                                                        (__v8df) __B,
3013                                                        (__v8df) __C,
3014                                                        (__mmask8) __U,
3015                                                        _MM_FROUND_CUR_DIRECTION);
3016}
3017
3018static __inline__ __m512d __DEFAULT_FN_ATTRS
3019_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3020{
3021  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3022                                                       (__v8df) __B,
3023                                                       -(__v8df) __C,
3024                                                       (__mmask8) -1,
3025                                                       _MM_FROUND_CUR_DIRECTION);
3026}
3027
3028static __inline__ __m512d __DEFAULT_FN_ATTRS
3029_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3030{
3031  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3032                                                       (__v8df) __B,
3033                                                       -(__v8df) __C,
3034                                                       (__mmask8) __U,
3035                                                       _MM_FROUND_CUR_DIRECTION);
3036}
3037
3038static __inline__ __m512d __DEFAULT_FN_ATTRS
3039_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3040{
3041  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3042                                                        (__v8df) __B,
3043                                                        -(__v8df) __C,
3044                                                        (__mmask8) __U,
3045                                                        _MM_FROUND_CUR_DIRECTION);
3046}
3047
3048#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3049  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3050                                             (__v16sf)(__m512)(B), \
3051                                             (__v16sf)(__m512)(C), \
3052                                             (__mmask16)-1, (int)(R)); })
3053
3054
3055#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3056  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3057                                             (__v16sf)(__m512)(B), \
3058                                             (__v16sf)(__m512)(C), \
3059                                             (__mmask16)(U), (int)(R)); })
3060
3061
3062#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3063  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3064                                              (__v16sf)(__m512)(B), \
3065                                              (__v16sf)(__m512)(C), \
3066                                              (__mmask16)(U), (int)(R)); })
3067
3068
3069#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3070  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3071                                              (__v16sf)(__m512)(B), \
3072                                              (__v16sf)(__m512)(C), \
3073                                              (__mmask16)(U), (int)(R)); })
3074
3075
3076#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3077  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3078                                             (__v16sf)(__m512)(B), \
3079                                             -(__v16sf)(__m512)(C), \
3080                                             (__mmask16)-1, (int)(R)); })
3081
3082
3083#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3084  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3085                                             (__v16sf)(__m512)(B), \
3086                                             -(__v16sf)(__m512)(C), \
3087                                             (__mmask16)(U), (int)(R)); })
3088
3089
3090#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3091  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3092                                              (__v16sf)(__m512)(B), \
3093                                              -(__v16sf)(__m512)(C), \
3094                                              (__mmask16)(U), (int)(R)); })
3095
3096
3097static __inline__ __m512 __DEFAULT_FN_ATTRS
3098_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3099{
3100  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3101                                                      (__v16sf) __B,
3102                                                      (__v16sf) __C,
3103                                                      (__mmask16) -1,
3104                                                      _MM_FROUND_CUR_DIRECTION);
3105}
3106
3107static __inline__ __m512 __DEFAULT_FN_ATTRS
3108_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3109{
3110  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3111                                                      (__v16sf) __B,
3112                                                      (__v16sf) __C,
3113                                                      (__mmask16) __U,
3114                                                      _MM_FROUND_CUR_DIRECTION);
3115}
3116
3117static __inline__ __m512 __DEFAULT_FN_ATTRS
3118_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3119{
3120  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3121                                                       (__v16sf) __B,
3122                                                       (__v16sf) __C,
3123                                                       (__mmask16) __U,
3124                                                       _MM_FROUND_CUR_DIRECTION);
3125}
3126
3127static __inline__ __m512 __DEFAULT_FN_ATTRS
3128_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3129{
3130  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3131                                                       (__v16sf) __B,
3132                                                       (__v16sf) __C,
3133                                                       (__mmask16) __U,
3134                                                       _MM_FROUND_CUR_DIRECTION);
3135}
3136
3137static __inline__ __m512 __DEFAULT_FN_ATTRS
3138_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3139{
3140  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3141                                                      (__v16sf) __B,
3142                                                      -(__v16sf) __C,
3143                                                      (__mmask16) -1,
3144                                                      _MM_FROUND_CUR_DIRECTION);
3145}
3146
3147static __inline__ __m512 __DEFAULT_FN_ATTRS
3148_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3149{
3150  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3151                                                      (__v16sf) __B,
3152                                                      -(__v16sf) __C,
3153                                                      (__mmask16) __U,
3154                                                      _MM_FROUND_CUR_DIRECTION);
3155}
3156
3157static __inline__ __m512 __DEFAULT_FN_ATTRS
3158_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3159{
3160  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3161                                                       (__v16sf) __B,
3162                                                       -(__v16sf) __C,
3163                                                       (__mmask16) __U,
3164                                                       _MM_FROUND_CUR_DIRECTION);
3165}
3166
3167#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3168  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3169                                            (__v8df)(__m512d)(B), \
3170                                            (__v8df)(__m512d)(C), \
3171                                            (__mmask8)(U), (int)(R)); })
3172
3173
3174static __inline__ __m512d __DEFAULT_FN_ATTRS
3175_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3176{
3177  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3178                                                     (__v8df) __B,
3179                                                     (__v8df) __C,
3180                                                     (__mmask8) __U,
3181                                                     _MM_FROUND_CUR_DIRECTION);
3182}
3183
3184#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3185  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3186                                           (__v16sf)(__m512)(B), \
3187                                           (__v16sf)(__m512)(C), \
3188                                           (__mmask16)(U), (int)(R)); })
3189
3190
3191static __inline__ __m512 __DEFAULT_FN_ATTRS
3192_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3193{
3194  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3195                                                    (__v16sf) __B,
3196                                                    (__v16sf) __C,
3197                                                    (__mmask16) __U,
3198                                                    _MM_FROUND_CUR_DIRECTION);
3199}
3200
3201#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3202  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3203                                               (__v8df)(__m512d)(B), \
3204                                               (__v8df)(__m512d)(C), \
3205                                               (__mmask8)(U), (int)(R)); })
3206
3207
3208static __inline__ __m512d __DEFAULT_FN_ATTRS
3209_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3210{
3211  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3212                                                        (__v8df) __B,
3213                                                        (__v8df) __C,
3214                                                        (__mmask8) __U,
3215                                                        _MM_FROUND_CUR_DIRECTION);
3216}
3217
3218#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3219  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3220                                              (__v16sf)(__m512)(B), \
3221                                              (__v16sf)(__m512)(C), \
3222                                              (__mmask16)(U), (int)(R)); })
3223
3224
3225static __inline__ __m512 __DEFAULT_FN_ATTRS
3226_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3227{
3228  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3229                                                       (__v16sf) __B,
3230                                                       (__v16sf) __C,
3231                                                       (__mmask16) __U,
3232                                                       _MM_FROUND_CUR_DIRECTION);
3233}
3234
3235#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3236  (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3237                                            (__v8df)(__m512d)(B), \
3238                                            (__v8df)(__m512d)(C), \
3239                                            (__mmask8)(U), (int)(R)); })
3240
3241
3242static __inline__ __m512d __DEFAULT_FN_ATTRS
3243_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3244{
3245  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3246                                                     (__v8df) __B,
3247                                                     (__v8df) __C,
3248                                                     (__mmask8) __U,
3249                                                     _MM_FROUND_CUR_DIRECTION);
3250}
3251
3252#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3253  (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3254                                           (__v16sf)(__m512)(B), \
3255                                           (__v16sf)(__m512)(C), \
3256                                           (__mmask16)(U), (int)(R)); })
3257
3258
3259static __inline__ __m512 __DEFAULT_FN_ATTRS
3260_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3261{
3262  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3263                                                    (__v16sf) __B,
3264                                                    (__v16sf) __C,
3265                                                    (__mmask16) __U,
3266                                                    _MM_FROUND_CUR_DIRECTION);
3267}
3268
3269#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3270  (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3271                                            (__v8df)(__m512d)(B), \
3272                                            (__v8df)(__m512d)(C), \
3273                                            (__mmask8)(U), (int)(R)); })
3274
3275
3276#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3277  (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3278                                             (__v8df)(__m512d)(B), \
3279                                             (__v8df)(__m512d)(C), \
3280                                             (__mmask8)(U), (int)(R)); })
3281
3282
3283static __inline__ __m512d __DEFAULT_FN_ATTRS
3284_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3285{
3286  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3287                                                     (__v8df) __B,
3288                                                     (__v8df) __C,
3289                                                     (__mmask8) __U,
3290                                                     _MM_FROUND_CUR_DIRECTION);
3291}
3292
3293static __inline__ __m512d __DEFAULT_FN_ATTRS
3294_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3295{
3296  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3297                                                      (__v8df) __B,
3298                                                      (__v8df) __C,
3299                                                      (__mmask8) __U,
3300                                                      _MM_FROUND_CUR_DIRECTION);
3301}
3302
3303#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3304  (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3305                                           (__v16sf)(__m512)(B), \
3306                                           (__v16sf)(__m512)(C), \
3307                                           (__mmask16)(U), (int)(R)); })
3308
3309
3310#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3311  (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3312                                            (__v16sf)(__m512)(B), \
3313                                            (__v16sf)(__m512)(C), \
3314                                            (__mmask16)(U), (int)(R)); })
3315
3316
3317static __inline__ __m512 __DEFAULT_FN_ATTRS
3318_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3319{
3320  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3321                                                    (__v16sf) __B,
3322                                                    (__v16sf) __C,
3323                                                    (__mmask16) __U,
3324                                                    _MM_FROUND_CUR_DIRECTION);
3325}
3326
3327static __inline__ __m512 __DEFAULT_FN_ATTRS
3328_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3329{
3330  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3331                                                     (__v16sf) __B,
3332                                                     (__v16sf) __C,
3333                                                     (__mmask16) __U,
3334                                                     _MM_FROUND_CUR_DIRECTION);
3335}
3336
3337
3338
3339/* Vector permutations */
3340
3341static __inline __m512i __DEFAULT_FN_ATTRS
3342_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3343{
3344  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3345                                                       /* idx */ ,
3346                                                       (__v16si) __A,
3347                                                       (__v16si) __B,
3348                                                       (__mmask16) -1);
3349}
3350
3351static __inline__ __m512i __DEFAULT_FN_ATTRS
3352_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3353                                __m512i __I, __m512i __B)
3354{
3355  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3356                                                        /* idx */ ,
3357                                                        (__v16si) __A,
3358                                                        (__v16si) __B,
3359                                                        (__mmask16) __U);
3360}
3361
3362static __inline__ __m512i __DEFAULT_FN_ATTRS
3363_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3364                                 __m512i __I, __m512i __B)
3365{
3366  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3367                                                        /* idx */ ,
3368                                                        (__v16si) __A,
3369                                                        (__v16si) __B,
3370                                                        (__mmask16) __U);
3371}
3372
3373static __inline __m512i __DEFAULT_FN_ATTRS
3374_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3375{
3376  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3377                                                       /* idx */ ,
3378                                                       (__v8di) __A,
3379                                                       (__v8di) __B,
3380                                                       (__mmask8) -1);
3381}
3382
3383static __inline__ __m512i __DEFAULT_FN_ATTRS
3384_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3385                                __m512i __B)
3386{
3387  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3388                                                       /* idx */ ,
3389                                                       (__v8di) __A,
3390                                                       (__v8di) __B,
3391                                                       (__mmask8) __U);
3392}
3393
3394
3395static __inline__ __m512i __DEFAULT_FN_ATTRS
3396_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3397         __m512i __I, __m512i __B)
3398{
3399  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3400                                                        /* idx */ ,
3401                                                        (__v8di) __A,
3402                                                        (__v8di) __B,
3403                                                        (__mmask8) __U);
3404}
3405
3406#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3407  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
3408                                         (__v8di)(__m512i)(B), (int)(I), \
3409                                         (__v8di)_mm512_setzero_si512(), \
3410                                         (__mmask8)-1); })
3411
3412#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3413  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
3414                                         (__v8di)(__m512i)(B), (int)(imm), \
3415                                         (__v8di)(__m512i)(W), \
3416                                         (__mmask8)(U)); })
3417
3418#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3419  (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \
3420                                         (__v8di)(__m512i)(B), (int)(imm), \
3421                                         (__v8di)_mm512_setzero_si512(), \
3422                                         (__mmask8)(U)); })
3423
3424#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3425  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
3426                                         (__v16si)(__m512i)(B), (int)(I), \
3427                                         (__v16si)_mm512_setzero_si512(), \
3428                                         (__mmask16)-1); })
3429
3430#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3431  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
3432                                         (__v16si)(__m512i)(B), (int)(imm), \
3433                                         (__v16si)(__m512i)(W), \
3434                                         (__mmask16)(U)); })
3435
3436#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3437  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
3438                                         (__v16si)(__m512i)(B), (int)(imm), \
3439                                         (__v16si)_mm512_setzero_si512(), \
3440                                         (__mmask16)(U)); })
3441/* Vector Extract */
3442
3443#define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
3444  (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
3445                                   (__v8df)_mm512_undefined_pd(), \
3446                                   ((I) & 1) ? 4 : 0,             \
3447                                   ((I) & 1) ? 5 : 1,             \
3448                                   ((I) & 1) ? 6 : 2,             \
3449                                   ((I) & 1) ? 7 : 3); })
3450
3451#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3452  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3453                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3454                                   (__v4df)(W)); })
3455
3456#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3457  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3458                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3459                                   (__v4df)_mm256_setzero_pd()); })
3460
3461#define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
3462  (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
3463                                  (__v16sf)_mm512_undefined_ps(), \
3464                                  0 + ((I) & 0x3) * 4,            \
3465                                  1 + ((I) & 0x3) * 4,            \
3466                                  2 + ((I) & 0x3) * 4,            \
3467                                  3 + ((I) & 0x3) * 4); })
3468
3469#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3470  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3471                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3472                                   (__v4sf)(W)); })
3473
3474#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3475  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3476                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3477                                   (__v4sf)_mm_setzero_ps()); })
3478
3479/* Vector Blend */
3480
3481static __inline __m512d __DEFAULT_FN_ATTRS
3482_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3483{
3484  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3485                 (__v8df) __W,
3486                 (__v8df) __A);
3487}
3488
3489static __inline __m512 __DEFAULT_FN_ATTRS
3490_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3491{
3492  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3493                (__v16sf) __W,
3494                (__v16sf) __A);
3495}
3496
3497static __inline __m512i __DEFAULT_FN_ATTRS
3498_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3499{
3500  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3501                (__v8di) __W,
3502                (__v8di) __A);
3503}
3504
3505static __inline __m512i __DEFAULT_FN_ATTRS
3506_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3507{
3508  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3509                (__v16si) __W,
3510                (__v16si) __A);
3511}
3512
3513/* Compare */
3514
3515#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3516  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3517                                          (__v16sf)(__m512)(B), (int)(P), \
3518                                          (__mmask16)-1, (int)(R)); })
3519
3520#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3521  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3522                                          (__v16sf)(__m512)(B), (int)(P), \
3523                                          (__mmask16)(U), (int)(R)); })
3524
3525#define _mm512_cmp_ps_mask(A, B, P) \
3526  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3527#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3528  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3529
3530#define _mm512_cmpeq_ps_mask(A, B) \
3531    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3532#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3533    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3534
3535#define _mm512_cmplt_ps_mask(A, B) \
3536    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3537#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3538    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3539
3540#define _mm512_cmple_ps_mask(A, B) \
3541    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3542#define _mm512_mask_cmple_ps_mask(k, A, B) \
3543    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3544
3545#define _mm512_cmpunord_ps_mask(A, B) \
3546    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3547#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3548    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3549
3550#define _mm512_cmpneq_ps_mask(A, B) \
3551    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3552#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3553    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3554
3555#define _mm512_cmpnlt_ps_mask(A, B) \
3556    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3557#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3558    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3559
3560#define _mm512_cmpnle_ps_mask(A, B) \
3561    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3562#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3563    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3564
3565#define _mm512_cmpord_ps_mask(A, B) \
3566    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3567#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3568    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3569
3570#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3571  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3572                                         (__v8df)(__m512d)(B), (int)(P), \
3573                                         (__mmask8)-1, (int)(R)); })
3574
3575#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3576  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3577                                         (__v8df)(__m512d)(B), (int)(P), \
3578                                         (__mmask8)(U), (int)(R)); })
3579
3580#define _mm512_cmp_pd_mask(A, B, P) \
3581  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3582#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3583  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3584
3585#define _mm512_cmpeq_pd_mask(A, B) \
3586    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3587#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3588    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3589
3590#define _mm512_cmplt_pd_mask(A, B) \
3591    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3592#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3593    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3594
3595#define _mm512_cmple_pd_mask(A, B) \
3596    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3597#define _mm512_mask_cmple_pd_mask(k, A, B) \
3598    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3599
3600#define _mm512_cmpunord_pd_mask(A, B) \
3601    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3602#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3603    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3604
3605#define _mm512_cmpneq_pd_mask(A, B) \
3606    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3607#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3608    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3609
3610#define _mm512_cmpnlt_pd_mask(A, B) \
3611    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3612#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3613    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3614
3615#define _mm512_cmpnle_pd_mask(A, B) \
3616    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3617#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3618    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3619
3620#define _mm512_cmpord_pd_mask(A, B) \
3621    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3622#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3623    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3624
3625/* Conversion */
3626
3627#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3628  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3629                                             (__v16si)_mm512_undefined_epi32(), \
3630                                             (__mmask16)-1, (int)(R)); })
3631
3632#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3633  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3634                                             (__v16si)(__m512i)(W), \
3635                                             (__mmask16)(U), (int)(R)); })
3636
3637#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3638  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3639                                             (__v16si)_mm512_setzero_si512(), \
3640                                             (__mmask16)(U), (int)(R)); })
3641
3642
3643static __inline __m512i __DEFAULT_FN_ATTRS
3644_mm512_cvttps_epu32(__m512 __A)
3645{
3646  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3647                  (__v16si)
3648                  _mm512_setzero_si512 (),
3649                  (__mmask16) -1,
3650                  _MM_FROUND_CUR_DIRECTION);
3651}
3652
3653static __inline__ __m512i __DEFAULT_FN_ATTRS
3654_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3655{
3656  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3657                   (__v16si) __W,
3658                   (__mmask16) __U,
3659                   _MM_FROUND_CUR_DIRECTION);
3660}
3661
3662static __inline__ __m512i __DEFAULT_FN_ATTRS
3663_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3664{
3665  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3666                   (__v16si) _mm512_setzero_si512 (),
3667                   (__mmask16) __U,
3668                   _MM_FROUND_CUR_DIRECTION);
3669}
3670
3671#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3672  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3673                                          (__v16sf)_mm512_setzero_ps(), \
3674                                          (__mmask16)-1, (int)(R)); })
3675
3676#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3677  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3678                                          (__v16sf)(__m512)(W), \
3679                                          (__mmask16)(U), (int)(R)); })
3680
3681#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3682  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3683                                          (__v16sf)_mm512_setzero_ps(), \
3684                                          (__mmask16)(U), (int)(R)); })
3685
3686#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3687  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3688                                           (__v16sf)_mm512_setzero_ps(), \
3689                                           (__mmask16)-1, (int)(R)); })
3690
3691#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3692  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3693                                           (__v16sf)(__m512)(W), \
3694                                           (__mmask16)(U), (int)(R)); })
3695
3696#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3697  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3698                                           (__v16sf)_mm512_setzero_ps(), \
3699                                           (__mmask16)(U), (int)(R)); })
3700
3701static __inline__ __m512 __DEFAULT_FN_ATTRS
3702_mm512_cvtepu32_ps (__m512i __A)
3703{
3704  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3705                 (__v16sf) _mm512_undefined_ps (),
3706                 (__mmask16) -1,
3707                 _MM_FROUND_CUR_DIRECTION);
3708}
3709
3710static __inline__ __m512 __DEFAULT_FN_ATTRS
3711_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3712{
3713  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3714                 (__v16sf) __W,
3715                 (__mmask16) __U,
3716                 _MM_FROUND_CUR_DIRECTION);
3717}
3718
3719static __inline__ __m512 __DEFAULT_FN_ATTRS
3720_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3721{
3722  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3723                 (__v16sf) _mm512_setzero_ps (),
3724                 (__mmask16) __U,
3725                 _MM_FROUND_CUR_DIRECTION);
3726}
3727
3728static __inline __m512d __DEFAULT_FN_ATTRS
3729_mm512_cvtepi32_pd(__m256i __A)
3730{
3731  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
3732                (__v8df)
3733                _mm512_setzero_pd (),
3734                (__mmask8) -1);
3735}
3736
3737static __inline__ __m512d __DEFAULT_FN_ATTRS
3738_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3739{
3740  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
3741                (__v8df) __W,
3742                (__mmask8) __U);
3743}
3744
3745static __inline__ __m512d __DEFAULT_FN_ATTRS
3746_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3747{
3748  return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
3749                (__v8df) _mm512_setzero_pd (),
3750                (__mmask8) __U);
3751}
3752
3753static __inline__ __m512d __DEFAULT_FN_ATTRS
3754_mm512_cvtepi32lo_pd(__m512i __A)
3755{
3756  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3757}
3758
3759static __inline__ __m512d __DEFAULT_FN_ATTRS
3760_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3761{
3762  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3763}
3764
3765static __inline__ __m512 __DEFAULT_FN_ATTRS
3766_mm512_cvtepi32_ps (__m512i __A)
3767{
3768  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3769                (__v16sf) _mm512_undefined_ps (),
3770                (__mmask16) -1,
3771                _MM_FROUND_CUR_DIRECTION);
3772}
3773
3774static __inline__ __m512 __DEFAULT_FN_ATTRS
3775_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3776{
3777  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3778                (__v16sf) __W,
3779                (__mmask16) __U,
3780                _MM_FROUND_CUR_DIRECTION);
3781}
3782
3783static __inline__ __m512 __DEFAULT_FN_ATTRS
3784_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3785{
3786  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3787                (__v16sf) _mm512_setzero_ps (),
3788                (__mmask16) __U,
3789                _MM_FROUND_CUR_DIRECTION);
3790}
3791
3792static __inline __m512d __DEFAULT_FN_ATTRS
3793_mm512_cvtepu32_pd(__m256i __A)
3794{
3795  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
3796                (__v8df)
3797                _mm512_setzero_pd (),
3798                (__mmask8) -1);
3799}
3800
3801static __inline__ __m512d __DEFAULT_FN_ATTRS
3802_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3803{
3804  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
3805                  (__v8df) __W,
3806                  (__mmask8) __U);
3807}
3808
3809static __inline__ __m512d __DEFAULT_FN_ATTRS
3810_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3811{
3812  return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
3813                  (__v8df) _mm512_setzero_pd (),
3814                  (__mmask8) __U);
3815}
3816
3817static __inline__ __m512d __DEFAULT_FN_ATTRS
3818_mm512_cvtepu32lo_pd(__m512i __A)
3819{
3820  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3821}
3822
3823static __inline__ __m512d __DEFAULT_FN_ATTRS
3824_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3825{
3826  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3827}
3828
3829#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3830  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3831                                          (__v8sf)_mm256_setzero_ps(), \
3832                                          (__mmask8)-1, (int)(R)); })
3833
3834#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3835  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3836                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3837                                          (int)(R)); })
3838
3839#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3840  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3841                                          (__v8sf)_mm256_setzero_ps(), \
3842                                          (__mmask8)(U), (int)(R)); })
3843
3844static __inline__ __m256 __DEFAULT_FN_ATTRS
3845_mm512_cvtpd_ps (__m512d __A)
3846{
3847  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3848                (__v8sf) _mm256_undefined_ps (),
3849                (__mmask8) -1,
3850                _MM_FROUND_CUR_DIRECTION);
3851}
3852
3853static __inline__ __m256 __DEFAULT_FN_ATTRS
3854_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3855{
3856  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3857                (__v8sf) __W,
3858                (__mmask8) __U,
3859                _MM_FROUND_CUR_DIRECTION);
3860}
3861
3862static __inline__ __m256 __DEFAULT_FN_ATTRS
3863_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3864{
3865  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3866                (__v8sf) _mm256_setzero_ps (),
3867                (__mmask8) __U,
3868                _MM_FROUND_CUR_DIRECTION);
3869}
3870
3871static __inline__ __m512 __DEFAULT_FN_ATTRS
3872_mm512_cvtpd_pslo (__m512d __A)
3873{
3874  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3875                (__v8sf) _mm256_setzero_ps (),
3876                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3877}
3878
3879static __inline__ __m512 __DEFAULT_FN_ATTRS
3880_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3881{
3882  return (__m512) __builtin_shufflevector (
3883                (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3884                                               __U, __A),
3885                (__v8sf) _mm256_setzero_ps (),
3886                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3887}
3888
3889#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
3890  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3891                                            (__v16hi)_mm256_undefined_si256(), \
3892                                            (__mmask16)-1); })
3893
3894#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
3895  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3896                                            (__v16hi)(__m256i)(U), \
3897                                            (__mmask16)(W)); })
3898
3899#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
3900  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3901                                            (__v16hi)_mm256_setzero_si256(), \
3902                                            (__mmask16)(W)); })
3903
3904#define _mm512_cvtps_ph(A, I) __extension__ ({ \
3905  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3906                                            (__v16hi)_mm256_setzero_si256(), \
3907                                            (__mmask16)-1); })
3908
3909#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
3910  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3911                                            (__v16hi)(__m256i)(U), \
3912                                            (__mmask16)(W)); })
3913
3914#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
3915  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3916                                            (__v16hi)_mm256_setzero_si256(), \
3917                                            (__mmask16)(W)); })
3918
3919#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
3920  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3921                                           (__v16sf)_mm512_undefined_ps(), \
3922                                           (__mmask16)-1, (int)(R)); })
3923
3924#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
3925  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3926                                           (__v16sf)(__m512)(W), \
3927                                           (__mmask16)(U), (int)(R)); })
3928
3929#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
3930  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3931                                           (__v16sf)_mm512_setzero_ps(), \
3932                                           (__mmask16)(U), (int)(R)); })
3933
3934
3935static  __inline __m512 __DEFAULT_FN_ATTRS
3936_mm512_cvtph_ps(__m256i __A)
3937{
3938  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3939                (__v16sf)
3940                _mm512_setzero_ps (),
3941                (__mmask16) -1,
3942                _MM_FROUND_CUR_DIRECTION);
3943}
3944
3945static __inline__ __m512 __DEFAULT_FN_ATTRS
3946_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3947{
3948  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3949                 (__v16sf) __W,
3950                 (__mmask16) __U,
3951                 _MM_FROUND_CUR_DIRECTION);
3952}
3953
3954static __inline__ __m512 __DEFAULT_FN_ATTRS
3955_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3956{
3957  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3958                 (__v16sf) _mm512_setzero_ps (),
3959                 (__mmask16) __U,
3960                 _MM_FROUND_CUR_DIRECTION);
3961}
3962
3963#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
3964  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3965                                            (__v8si)_mm256_setzero_si256(), \
3966                                            (__mmask8)-1, (int)(R)); })
3967
3968#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
3969  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3970                                            (__v8si)(__m256i)(W), \
3971                                            (__mmask8)(U), (int)(R)); })
3972
3973#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
3974  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3975                                            (__v8si)_mm256_setzero_si256(), \
3976                                            (__mmask8)(U), (int)(R)); })
3977
3978static __inline __m256i __DEFAULT_FN_ATTRS
3979_mm512_cvttpd_epi32(__m512d __a)
3980{
3981  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3982                                                   (__v8si)_mm256_setzero_si256(),
3983                                                   (__mmask8) -1,
3984                                                    _MM_FROUND_CUR_DIRECTION);
3985}
3986
3987static __inline__ __m256i __DEFAULT_FN_ATTRS
3988_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3989{
3990  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3991                  (__v8si) __W,
3992                  (__mmask8) __U,
3993                  _MM_FROUND_CUR_DIRECTION);
3994}
3995
3996static __inline__ __m256i __DEFAULT_FN_ATTRS
3997_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3998{
3999  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4000                  (__v8si) _mm256_setzero_si256 (),
4001                  (__mmask8) __U,
4002                  _MM_FROUND_CUR_DIRECTION);
4003}
4004
4005#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4006  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4007                                            (__v16si)_mm512_setzero_si512(), \
4008                                            (__mmask16)-1, (int)(R)); })
4009
4010#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4011  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4012                                            (__v16si)(__m512i)(W), \
4013                                            (__mmask16)(U), (int)(R)); })
4014
4015#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4016  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4017                                            (__v16si)_mm512_setzero_si512(), \
4018                                            (__mmask16)(U), (int)(R)); })
4019
4020static __inline __m512i __DEFAULT_FN_ATTRS
4021_mm512_cvttps_epi32(__m512 __a)
4022{
4023  return (__m512i)
4024    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4025                                     (__v16si) _mm512_setzero_si512 (),
4026                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4027}
4028
4029static __inline__ __m512i __DEFAULT_FN_ATTRS
4030_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4031{
4032  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4033                  (__v16si) __W,
4034                  (__mmask16) __U,
4035                  _MM_FROUND_CUR_DIRECTION);
4036}
4037
4038static __inline__ __m512i __DEFAULT_FN_ATTRS
4039_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4040{
4041  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4042                  (__v16si) _mm512_setzero_si512 (),
4043                  (__mmask16) __U,
4044                  _MM_FROUND_CUR_DIRECTION);
4045}
4046
4047#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4048  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4049                                           (__v16si)_mm512_setzero_si512(), \
4050                                           (__mmask16)-1, (int)(R)); })
4051
4052#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4053  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4054                                           (__v16si)(__m512i)(W), \
4055                                           (__mmask16)(U), (int)(R)); })
4056
4057#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4058  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4059                                           (__v16si)_mm512_setzero_si512(), \
4060                                           (__mmask16)(U), (int)(R)); })
4061
4062static __inline__ __m512i __DEFAULT_FN_ATTRS
4063_mm512_cvtps_epi32 (__m512 __A)
4064{
4065  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4066                 (__v16si) _mm512_undefined_epi32 (),
4067                 (__mmask16) -1,
4068                 _MM_FROUND_CUR_DIRECTION);
4069}
4070
4071static __inline__ __m512i __DEFAULT_FN_ATTRS
4072_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4073{
4074  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4075                 (__v16si) __W,
4076                 (__mmask16) __U,
4077                 _MM_FROUND_CUR_DIRECTION);
4078}
4079
4080static __inline__ __m512i __DEFAULT_FN_ATTRS
4081_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4082{
4083  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4084                 (__v16si)
4085                 _mm512_setzero_si512 (),
4086                 (__mmask16) __U,
4087                 _MM_FROUND_CUR_DIRECTION);
4088}
4089
4090#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4091  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4092                                           (__v8si)_mm256_setzero_si256(), \
4093                                           (__mmask8)-1, (int)(R)); })
4094
4095#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4096  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4097                                           (__v8si)(__m256i)(W), \
4098                                           (__mmask8)(U), (int)(R)); })
4099
4100#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4101  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4102                                           (__v8si)_mm256_setzero_si256(), \
4103                                           (__mmask8)(U), (int)(R)); })
4104
4105static __inline__ __m256i __DEFAULT_FN_ATTRS
4106_mm512_cvtpd_epi32 (__m512d __A)
4107{
4108  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4109                 (__v8si)
4110                 _mm256_undefined_si256 (),
4111                 (__mmask8) -1,
4112                 _MM_FROUND_CUR_DIRECTION);
4113}
4114
4115static __inline__ __m256i __DEFAULT_FN_ATTRS
4116_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4117{
4118  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4119                 (__v8si) __W,
4120                 (__mmask8) __U,
4121                 _MM_FROUND_CUR_DIRECTION);
4122}
4123
4124static __inline__ __m256i __DEFAULT_FN_ATTRS
4125_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4126{
4127  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4128                 (__v8si)
4129                 _mm256_setzero_si256 (),
4130                 (__mmask8) __U,
4131                 _MM_FROUND_CUR_DIRECTION);
4132}
4133
4134#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4135  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4136                                            (__v16si)_mm512_setzero_si512(), \
4137                                            (__mmask16)-1, (int)(R)); })
4138
4139#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4140  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4141                                            (__v16si)(__m512i)(W), \
4142                                            (__mmask16)(U), (int)(R)); })
4143
4144#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4145  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4146                                            (__v16si)_mm512_setzero_si512(), \
4147                                            (__mmask16)(U), (int)(R)); })
4148
4149static __inline__ __m512i __DEFAULT_FN_ATTRS
4150_mm512_cvtps_epu32 ( __m512 __A)
4151{
4152  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4153                  (__v16si)\
4154                  _mm512_undefined_epi32 (),\
4155                  (__mmask16) -1,\
4156                  _MM_FROUND_CUR_DIRECTION);\
4157}
4158
4159static __inline__ __m512i __DEFAULT_FN_ATTRS
4160_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4161{
4162  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4163                  (__v16si) __W,
4164                  (__mmask16) __U,
4165                  _MM_FROUND_CUR_DIRECTION);
4166}
4167
4168static __inline__ __m512i __DEFAULT_FN_ATTRS
4169_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4170{
4171  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4172                  (__v16si)
4173                  _mm512_setzero_si512 (),
4174                  (__mmask16) __U ,
4175                  _MM_FROUND_CUR_DIRECTION);
4176}
4177
4178#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4179  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4180                                            (__v8si)_mm256_setzero_si256(), \
4181                                            (__mmask8)-1, (int)(R)); })
4182
4183#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4184  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4185                                            (__v8si)(W), \
4186                                            (__mmask8)(U), (int)(R)); })
4187
4188#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4189  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4190                                            (__v8si)_mm256_setzero_si256(), \
4191                                            (__mmask8)(U), (int)(R)); })
4192
4193static __inline__ __m256i __DEFAULT_FN_ATTRS
4194_mm512_cvtpd_epu32 (__m512d __A)
4195{
4196  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4197                  (__v8si)
4198                  _mm256_undefined_si256 (),
4199                  (__mmask8) -1,
4200                  _MM_FROUND_CUR_DIRECTION);
4201}
4202
4203static __inline__ __m256i __DEFAULT_FN_ATTRS
4204_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4205{
4206  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4207                  (__v8si) __W,
4208                  (__mmask8) __U,
4209                  _MM_FROUND_CUR_DIRECTION);
4210}
4211
4212static __inline__ __m256i __DEFAULT_FN_ATTRS
4213_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4214{
4215  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4216                  (__v8si)
4217                  _mm256_setzero_si256 (),
4218                  (__mmask8) __U,
4219                  _MM_FROUND_CUR_DIRECTION);
4220}
4221
4222/* Unpack and Interleave */
4223
4224static __inline __m512d __DEFAULT_FN_ATTRS
4225_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4226{
4227  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4228                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4229}
4230
4231static __inline__ __m512d __DEFAULT_FN_ATTRS
4232_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4233{
4234  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4235                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4236                                           (__v8df)__W);
4237}
4238
4239static __inline__ __m512d __DEFAULT_FN_ATTRS
4240_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4241{
4242  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4243                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4244                                           (__v8df)_mm512_setzero_pd());
4245}
4246
4247static __inline __m512d __DEFAULT_FN_ATTRS
4248_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4249{
4250  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4251                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4252}
4253
4254static __inline__ __m512d __DEFAULT_FN_ATTRS
4255_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4256{
4257  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4258                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4259                                           (__v8df)__W);
4260}
4261
4262static __inline__ __m512d __DEFAULT_FN_ATTRS
4263_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4264{
4265  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4266                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4267                                           (__v8df)_mm512_setzero_pd());
4268}
4269
4270static __inline __m512 __DEFAULT_FN_ATTRS
4271_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4272{
4273  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4274                                         2,    18,    3,    19,
4275                                         2+4,  18+4,  3+4,  19+4,
4276                                         2+8,  18+8,  3+8,  19+8,
4277                                         2+12, 18+12, 3+12, 19+12);
4278}
4279
4280static __inline__ __m512 __DEFAULT_FN_ATTRS
4281_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4282{
4283  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4284                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4285                                          (__v16sf)__W);
4286}
4287
4288static __inline__ __m512 __DEFAULT_FN_ATTRS
4289_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4290{
4291  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4292                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4293                                          (__v16sf)_mm512_setzero_ps());
4294}
4295
4296static __inline __m512 __DEFAULT_FN_ATTRS
4297_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4298{
4299  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4300                                         0,    16,    1,    17,
4301                                         0+4,  16+4,  1+4,  17+4,
4302                                         0+8,  16+8,  1+8,  17+8,
4303                                         0+12, 16+12, 1+12, 17+12);
4304}
4305
4306static __inline__ __m512 __DEFAULT_FN_ATTRS
4307_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4308{
4309  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4310                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4311                                          (__v16sf)__W);
4312}
4313
4314static __inline__ __m512 __DEFAULT_FN_ATTRS
4315_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4316{
4317  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4318                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4319                                          (__v16sf)_mm512_setzero_ps());
4320}
4321
4322static __inline__ __m512i __DEFAULT_FN_ATTRS
4323_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4324{
4325  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4326                                          2,    18,    3,    19,
4327                                          2+4,  18+4,  3+4,  19+4,
4328                                          2+8,  18+8,  3+8,  19+8,
4329                                          2+12, 18+12, 3+12, 19+12);
4330}
4331
4332static __inline__ __m512i __DEFAULT_FN_ATTRS
4333_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4334{
4335  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4336                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4337                                       (__v16si)__W);
4338}
4339
4340static __inline__ __m512i __DEFAULT_FN_ATTRS
4341_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4342{
4343  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4344                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4345                                       (__v16si)_mm512_setzero_si512());
4346}
4347
4348static __inline__ __m512i __DEFAULT_FN_ATTRS
4349_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4350{
4351  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4352                                          0,    16,    1,    17,
4353                                          0+4,  16+4,  1+4,  17+4,
4354                                          0+8,  16+8,  1+8,  17+8,
4355                                          0+12, 16+12, 1+12, 17+12);
4356}
4357
4358static __inline__ __m512i __DEFAULT_FN_ATTRS
4359_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4360{
4361  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4362                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4363                                       (__v16si)__W);
4364}
4365
4366static __inline__ __m512i __DEFAULT_FN_ATTRS
4367_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4368{
4369  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4370                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4371                                       (__v16si)_mm512_setzero_si512());
4372}
4373
4374static __inline__ __m512i __DEFAULT_FN_ATTRS
4375_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4376{
4377  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4378                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4379}
4380
4381static __inline__ __m512i __DEFAULT_FN_ATTRS
4382_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4383{
4384  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4385                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4386                                        (__v8di)__W);
4387}
4388
4389static __inline__ __m512i __DEFAULT_FN_ATTRS
4390_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4391{
4392  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4393                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4394                                        (__v8di)_mm512_setzero_si512());
4395}
4396
4397static __inline__ __m512i __DEFAULT_FN_ATTRS
4398_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4399{
4400  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4401                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4402}
4403
4404static __inline__ __m512i __DEFAULT_FN_ATTRS
4405_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4406{
4407  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4408                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4409                                        (__v8di)__W);
4410}
4411
4412static __inline__ __m512i __DEFAULT_FN_ATTRS
4413_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4414{
4415  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4416                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4417                                        (__v8di)_mm512_setzero_si512());
4418}
4419
4420/* Bit Test */
4421
4422static __inline __mmask16 __DEFAULT_FN_ATTRS
4423_mm512_test_epi32_mask(__m512i __A, __m512i __B)
4424{
4425  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4426            (__v16si) __B,
4427            (__mmask16) -1);
4428}
4429
4430static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4431_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
4432{
4433  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4434                 (__v16si) __B, __U);
4435}
4436
4437static __inline __mmask8 __DEFAULT_FN_ATTRS
4438_mm512_test_epi64_mask(__m512i __A, __m512i __B)
4439{
4440  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
4441                 (__v8di) __B,
4442                 (__mmask8) -1);
4443}
4444
4445static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4446_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
4447{
4448  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
4449}
4450
4451
4452/* SIMD load ops */
4453
4454static __inline __m512i __DEFAULT_FN_ATTRS
4455_mm512_loadu_si512 (void const *__P)
4456{
4457  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4458                  (__v16si)
4459                  _mm512_setzero_si512 (),
4460                  (__mmask16) -1);
4461}
4462
4463static __inline __m512i __DEFAULT_FN_ATTRS
4464_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4465{
4466  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4467                  (__v16si) __W,
4468                  (__mmask16) __U);
4469}
4470
4471
4472static __inline __m512i __DEFAULT_FN_ATTRS
4473_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4474{
4475  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4476                                                     (__v16si)
4477                                                     _mm512_setzero_si512 (),
4478                                                     (__mmask16) __U);
4479}
4480
4481static __inline __m512i __DEFAULT_FN_ATTRS
4482_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4483{
4484  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4485                  (__v8di) __W,
4486                  (__mmask8) __U);
4487}
4488
4489static __inline __m512i __DEFAULT_FN_ATTRS
4490_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4491{
4492  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4493                                                     (__v8di)
4494                                                     _mm512_setzero_si512 (),
4495                                                     (__mmask8) __U);
4496}
4497
4498static __inline __m512 __DEFAULT_FN_ATTRS
4499_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4500{
4501  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4502                   (__v16sf) __W,
4503                   (__mmask16) __U);
4504}
4505
4506static __inline __m512 __DEFAULT_FN_ATTRS
4507_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4508{
4509  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4510                                                  (__v16sf)
4511                                                  _mm512_setzero_ps (),
4512                                                  (__mmask16) __U);
4513}
4514
4515static __inline __m512d __DEFAULT_FN_ATTRS
4516_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4517{
4518  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4519                (__v8df) __W,
4520                (__mmask8) __U);
4521}
4522
4523static __inline __m512d __DEFAULT_FN_ATTRS
4524_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4525{
4526  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4527                                                   (__v8df)
4528                                                   _mm512_setzero_pd (),
4529                                                   (__mmask8) __U);
4530}
4531
4532static __inline __m512d __DEFAULT_FN_ATTRS
4533_mm512_loadu_pd(double const *__p)
4534{
4535  struct __loadu_pd {
4536    __m512d __v;
4537  } __attribute__((__packed__, __may_alias__));
4538  return ((struct __loadu_pd*)__p)->__v;
4539}
4540
4541static __inline __m512 __DEFAULT_FN_ATTRS
4542_mm512_loadu_ps(float const *__p)
4543{
4544  struct __loadu_ps {
4545    __m512 __v;
4546  } __attribute__((__packed__, __may_alias__));
4547  return ((struct __loadu_ps*)__p)->__v;
4548}
4549
4550static __inline __m512 __DEFAULT_FN_ATTRS
4551_mm512_load_ps(float const *__p)
4552{
4553  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4554                                                  (__v16sf)
4555                                                  _mm512_setzero_ps (),
4556                                                  (__mmask16) -1);
4557}
4558
4559static __inline __m512 __DEFAULT_FN_ATTRS
4560_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4561{
4562  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4563                   (__v16sf) __W,
4564                   (__mmask16) __U);
4565}
4566
4567static __inline __m512 __DEFAULT_FN_ATTRS
4568_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4569{
4570  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4571                                                  (__v16sf)
4572                                                  _mm512_setzero_ps (),
4573                                                  (__mmask16) __U);
4574}
4575
4576static __inline __m512d __DEFAULT_FN_ATTRS
4577_mm512_load_pd(double const *__p)
4578{
4579  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4580                                                   (__v8df)
4581                                                   _mm512_setzero_pd (),
4582                                                   (__mmask8) -1);
4583}
4584
4585static __inline __m512d __DEFAULT_FN_ATTRS
4586_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4587{
4588  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4589                          (__v8df) __W,
4590                          (__mmask8) __U);
4591}
4592
4593static __inline __m512d __DEFAULT_FN_ATTRS
4594_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4595{
4596  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4597                                                   (__v8df)
4598                                                   _mm512_setzero_pd (),
4599                                                   (__mmask8) __U);
4600}
4601
4602static __inline __m512i __DEFAULT_FN_ATTRS
4603_mm512_load_si512 (void const *__P)
4604{
4605  return *(__m512i *) __P;
4606}
4607
4608static __inline __m512i __DEFAULT_FN_ATTRS
4609_mm512_load_epi32 (void const *__P)
4610{
4611  return *(__m512i *) __P;
4612}
4613
4614static __inline __m512i __DEFAULT_FN_ATTRS
4615_mm512_load_epi64 (void const *__P)
4616{
4617  return *(__m512i *) __P;
4618}
4619
4620/* SIMD store ops */
4621
4622static __inline void __DEFAULT_FN_ATTRS
4623_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4624{
4625  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4626                                     (__mmask8) __U);
4627}
4628
4629static __inline void __DEFAULT_FN_ATTRS
4630_mm512_storeu_si512 (void *__P, __m512i __A)
4631{
4632  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4633            (__mmask16) -1);
4634}
4635
4636static __inline void __DEFAULT_FN_ATTRS
4637_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4638{
4639  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4640                                     (__mmask16) __U);
4641}
4642
4643static __inline void __DEFAULT_FN_ATTRS
4644_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4645{
4646  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4647}
4648
4649static __inline void __DEFAULT_FN_ATTRS
4650_mm512_storeu_pd(void *__P, __m512d __A)
4651{
4652  __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4653}
4654
4655static __inline void __DEFAULT_FN_ATTRS
4656_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4657{
4658  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4659                                   (__mmask16) __U);
4660}
4661
4662static __inline void __DEFAULT_FN_ATTRS
4663_mm512_storeu_ps(void *__P, __m512 __A)
4664{
4665  __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4666}
4667
4668static __inline void __DEFAULT_FN_ATTRS
4669_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4670{
4671  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4672}
4673
4674static __inline void __DEFAULT_FN_ATTRS
4675_mm512_store_pd(void *__P, __m512d __A)
4676{
4677  *(__m512d*)__P = __A;
4678}
4679
4680static __inline void __DEFAULT_FN_ATTRS
4681_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4682{
4683  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4684                                   (__mmask16) __U);
4685}
4686
4687static __inline void __DEFAULT_FN_ATTRS
4688_mm512_store_ps(void *__P, __m512 __A)
4689{
4690  *(__m512*)__P = __A;
4691}
4692
4693static __inline void __DEFAULT_FN_ATTRS
4694_mm512_store_si512 (void *__P, __m512i __A)
4695{
4696  *(__m512i *) __P = __A;
4697}
4698
4699static __inline void __DEFAULT_FN_ATTRS
4700_mm512_store_epi32 (void *__P, __m512i __A)
4701{
4702  *(__m512i *) __P = __A;
4703}
4704
4705static __inline void __DEFAULT_FN_ATTRS
4706_mm512_store_epi64 (void *__P, __m512i __A)
4707{
4708  *(__m512i *) __P = __A;
4709}
4710
4711/* Mask ops */
4712
4713static __inline __mmask16 __DEFAULT_FN_ATTRS
4714_mm512_knot(__mmask16 __M)
4715{
4716  return __builtin_ia32_knothi(__M);
4717}
4718
4719/* Integer compare */
4720
4721static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4722_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
4723  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4724                                                   (__mmask16)-1);
4725}
4726
4727static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4728_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4729  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4730                                                   __u);
4731}
4732
4733static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4734_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
4735  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4736                                                 (__mmask16)-1);
4737}
4738
4739static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4740_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4741  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4742                                                 __u);
4743}
4744
4745static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4746_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4747  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4748                                                  __u);
4749}
4750
4751static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4752_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
4753  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4754                                                  (__mmask8)-1);
4755}
4756
4757static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4758_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
4759  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4760                                                (__mmask8)-1);
4761}
4762
4763static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4764_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4765  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4766                                                __u);
4767}
4768
4769static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4770_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
4771  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4772                                                (__mmask16)-1);
4773}
4774
4775static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4776_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4777  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4778                                                __u);
4779}
4780
4781static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4782_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
4783  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4784                                                 (__mmask16)-1);
4785}
4786
4787static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4788_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4789  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4790                                                 __u);
4791}
4792
4793static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4794_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
4795  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4796                                               (__mmask8)-1);
4797}
4798
4799static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4800_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4801  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4802                                               __u);
4803}
4804
4805static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4806_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
4807  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4808                                                (__mmask8)-1);
4809}
4810
4811static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4812_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4813  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4814                                                __u);
4815}
4816
4817static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4818_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
4819  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4820                                                   (__mmask16)-1);
4821}
4822
4823static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4824_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4825  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4826                                                   __u);
4827}
4828
4829static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4830_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
4831  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4832                                                 (__mmask16)-1);
4833}
4834
4835static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4836_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4837  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4838                                                 __u);
4839}
4840
4841static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4842_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4843  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4844                                                  __u);
4845}
4846
4847static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4848_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
4849  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4850                                                  (__mmask8)-1);
4851}
4852
4853static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4854_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
4855  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4856                                                (__mmask8)-1);
4857}
4858
4859static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4860_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4861  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4862                                                __u);
4863}
4864
4865static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4866_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
4867  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4868                                                (__mmask16)-1);
4869}
4870
4871static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4872_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4873  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4874                                                __u);
4875}
4876
4877static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4878_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
4879  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4880                                                 (__mmask16)-1);
4881}
4882
4883static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4884_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4885  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4886                                                 __u);
4887}
4888
4889static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4890_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
4891  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4892                                               (__mmask8)-1);
4893}
4894
4895static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4896_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4897  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4898                                               __u);
4899}
4900
4901static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4902_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
4903  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4904                                                (__mmask8)-1);
4905}
4906
4907static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4908_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4909  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
4910                                                __u);
4911}
4912
4913static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4914_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
4915  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4916                                                (__mmask16)-1);
4917}
4918
4919static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4920_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4921  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4922                                                __u);
4923}
4924
4925static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4926_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
4927  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4928                                                 (__mmask16)-1);
4929}
4930
4931static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4932_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4933  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
4934                                                 __u);
4935}
4936
4937static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4938_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
4939  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4940                                               (__mmask8)-1);
4941}
4942
4943static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4944_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4945  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4946                                               __u);
4947}
4948
4949static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4950_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
4951  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4952                                                (__mmask8)-1);
4953}
4954
4955static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4956_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4957  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
4958                                                __u);
4959}
4960
4961static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4962_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
4963  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4964                                                (__mmask16)-1);
4965}
4966
4967static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4968_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4969  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4970                                                __u);
4971}
4972
4973static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4974_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
4975  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4976                                                 (__mmask16)-1);
4977}
4978
4979static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4980_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4981  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
4982                                                 __u);
4983}
4984
4985static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4986_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
4987  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4988                                               (__mmask8)-1);
4989}
4990
4991static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4992_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4993  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
4994                                               __u);
4995}
4996
4997static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4998_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
4999  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5000                                                (__mmask8)-1);
5001}
5002
5003static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5004_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5005  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5006                                                __u);
5007}
5008
5009static __inline__ __m512i __DEFAULT_FN_ATTRS
5010_mm512_cvtepi8_epi32(__m128i __A)
5011{
5012  /* This function always performs a signed extension, but __v16qi is a char
5013     which may be signed or unsigned, so use __v16qs. */
5014  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
5015}
5016
5017static __inline__ __m512i __DEFAULT_FN_ATTRS
5018_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5019{
5020  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5021                                             (__v16si)_mm512_cvtepi8_epi32(__A),
5022                                             (__v16si)__W);
5023}
5024
5025static __inline__ __m512i __DEFAULT_FN_ATTRS
5026_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
5027{
5028  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5029                                             (__v16si)_mm512_cvtepi8_epi32(__A),
5030                                             (__v16si)_mm512_setzero_si512());
5031}
5032
5033static __inline__ __m512i __DEFAULT_FN_ATTRS
5034_mm512_cvtepi8_epi64(__m128i __A)
5035{
5036  /* This function always performs a signed extension, but __v16qi is a char
5037     which may be signed or unsigned, so use __v16qs. */
5038  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5039}
5040
5041static __inline__ __m512i __DEFAULT_FN_ATTRS
5042_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5043{
5044  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5045                                             (__v8di)_mm512_cvtepi8_epi64(__A),
5046                                             (__v8di)__W);
5047}
5048
5049static __inline__ __m512i __DEFAULT_FN_ATTRS
5050_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
5051{
5052  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5053                                             (__v8di)_mm512_cvtepi8_epi64(__A),
5054                                             (__v8di)_mm512_setzero_si512 ());
5055}
5056
5057static __inline__ __m512i __DEFAULT_FN_ATTRS
5058_mm512_cvtepi32_epi64(__m256i __X)
5059{
5060  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
5061}
5062
5063static __inline__ __m512i __DEFAULT_FN_ATTRS
5064_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5065{
5066  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5067                                             (__v8di)_mm512_cvtepi32_epi64(__X),
5068                                             (__v8di)__W);
5069}
5070
5071static __inline__ __m512i __DEFAULT_FN_ATTRS
5072_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
5073{
5074  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5075                                             (__v8di)_mm512_cvtepi32_epi64(__X),
5076                                             (__v8di)_mm512_setzero_si512());
5077}
5078
5079static __inline__ __m512i __DEFAULT_FN_ATTRS
5080_mm512_cvtepi16_epi32(__m256i __A)
5081{
5082  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
5083}
5084
5085static __inline__ __m512i __DEFAULT_FN_ATTRS
5086_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5087{
5088  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5089                                            (__v16si)_mm512_cvtepi16_epi32(__A),
5090                                            (__v16si)__W);
5091}
5092
5093static __inline__ __m512i __DEFAULT_FN_ATTRS
5094_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5095{
5096  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5097                                            (__v16si)_mm512_cvtepi16_epi32(__A),
5098                                            (__v16si)_mm512_setzero_si512 ());
5099}
5100
5101static __inline__ __m512i __DEFAULT_FN_ATTRS
5102_mm512_cvtepi16_epi64(__m128i __A)
5103{
5104  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5105}
5106
5107static __inline__ __m512i __DEFAULT_FN_ATTRS
5108_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5109{
5110  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5111                                             (__v8di)_mm512_cvtepi16_epi64(__A),
5112                                             (__v8di)__W);
5113}
5114
5115static __inline__ __m512i __DEFAULT_FN_ATTRS
5116_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5117{
5118  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5119                                             (__v8di)_mm512_cvtepi16_epi64(__A),
5120                                             (__v8di)_mm512_setzero_si512());
5121}
5122
5123static __inline__ __m512i __DEFAULT_FN_ATTRS
5124_mm512_cvtepu8_epi32(__m128i __A)
5125{
5126  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5127}
5128
5129static __inline__ __m512i __DEFAULT_FN_ATTRS
5130_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5131{
5132  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5133                                             (__v16si)_mm512_cvtepu8_epi32(__A),
5134                                             (__v16si)__W);
5135}
5136
5137static __inline__ __m512i __DEFAULT_FN_ATTRS
5138_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5139{
5140  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5141                                             (__v16si)_mm512_cvtepu8_epi32(__A),
5142                                             (__v16si)_mm512_setzero_si512());
5143}
5144
5145static __inline__ __m512i __DEFAULT_FN_ATTRS
5146_mm512_cvtepu8_epi64(__m128i __A)
5147{
5148  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5149}
5150
5151static __inline__ __m512i __DEFAULT_FN_ATTRS
5152_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5153{
5154  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5155                                             (__v8di)_mm512_cvtepu8_epi64(__A),
5156                                             (__v8di)__W);
5157}
5158
5159static __inline__ __m512i __DEFAULT_FN_ATTRS
5160_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5161{
5162  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5163                                             (__v8di)_mm512_cvtepu8_epi64(__A),
5164                                             (__v8di)_mm512_setzero_si512());
5165}
5166
5167static __inline__ __m512i __DEFAULT_FN_ATTRS
5168_mm512_cvtepu32_epi64(__m256i __X)
5169{
5170  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5171}
5172
5173static __inline__ __m512i __DEFAULT_FN_ATTRS
5174_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5175{
5176  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5177                                             (__v8di)_mm512_cvtepu32_epi64(__X),
5178                                             (__v8di)__W);
5179}
5180
5181static __inline__ __m512i __DEFAULT_FN_ATTRS
5182_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5183{
5184  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5185                                             (__v8di)_mm512_cvtepu32_epi64(__X),
5186                                             (__v8di)_mm512_setzero_si512());
5187}
5188
5189static __inline__ __m512i __DEFAULT_FN_ATTRS
5190_mm512_cvtepu16_epi32(__m256i __A)
5191{
5192  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5193}
5194
5195static __inline__ __m512i __DEFAULT_FN_ATTRS
5196_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5197{
5198  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5199                                            (__v16si)_mm512_cvtepu16_epi32(__A),
5200                                            (__v16si)__W);
5201}
5202
5203static __inline__ __m512i __DEFAULT_FN_ATTRS
5204_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5205{
5206  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5207                                            (__v16si)_mm512_cvtepu16_epi32(__A),
5208                                            (__v16si)_mm512_setzero_si512());
5209}
5210
5211static __inline__ __m512i __DEFAULT_FN_ATTRS
5212_mm512_cvtepu16_epi64(__m128i __A)
5213{
5214  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5215}
5216
5217static __inline__ __m512i __DEFAULT_FN_ATTRS
5218_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5219{
5220  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5221                                             (__v8di)_mm512_cvtepu16_epi64(__A),
5222                                             (__v8di)__W);
5223}
5224
5225static __inline__ __m512i __DEFAULT_FN_ATTRS
5226_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5227{
5228  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5229                                             (__v8di)_mm512_cvtepu16_epi64(__A),
5230                                             (__v8di)_mm512_setzero_si512());
5231}
5232
5233static __inline__ __m512i __DEFAULT_FN_ATTRS
5234_mm512_rorv_epi32 (__m512i __A, __m512i __B)
5235{
5236  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5237              (__v16si) __B,
5238              (__v16si)
5239              _mm512_setzero_si512 (),
5240              (__mmask16) -1);
5241}
5242
5243static __inline__ __m512i __DEFAULT_FN_ATTRS
5244_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5245{
5246  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5247              (__v16si) __B,
5248              (__v16si) __W,
5249              (__mmask16) __U);
5250}
5251
5252static __inline__ __m512i __DEFAULT_FN_ATTRS
5253_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5254{
5255  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5256              (__v16si) __B,
5257              (__v16si)
5258              _mm512_setzero_si512 (),
5259              (__mmask16) __U);
5260}
5261
5262static __inline__ __m512i __DEFAULT_FN_ATTRS
5263_mm512_rorv_epi64 (__m512i __A, __m512i __B)
5264{
5265  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5266              (__v8di) __B,
5267              (__v8di)
5268              _mm512_setzero_si512 (),
5269              (__mmask8) -1);
5270}
5271
5272static __inline__ __m512i __DEFAULT_FN_ATTRS
5273_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5274{
5275  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5276              (__v8di) __B,
5277              (__v8di) __W,
5278              (__mmask8) __U);
5279}
5280
5281static __inline__ __m512i __DEFAULT_FN_ATTRS
5282_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5283{
5284  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5285              (__v8di) __B,
5286              (__v8di)
5287              _mm512_setzero_si512 (),
5288              (__mmask8) __U);
5289}
5290
5291
5292
5293#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5294  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5295                                         (__v16si)(__m512i)(b), (int)(p), \
5296                                         (__mmask16)-1); })
5297
5298#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5299  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5300                                          (__v16si)(__m512i)(b), (int)(p), \
5301                                          (__mmask16)-1); })
5302
5303#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5304  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5305                                        (__v8di)(__m512i)(b), (int)(p), \
5306                                        (__mmask8)-1); })
5307
5308#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5309  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5310                                         (__v8di)(__m512i)(b), (int)(p), \
5311                                         (__mmask8)-1); })
5312
5313#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5314  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5315                                         (__v16si)(__m512i)(b), (int)(p), \
5316                                         (__mmask16)(m)); })
5317
5318#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5319  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5320                                          (__v16si)(__m512i)(b), (int)(p), \
5321                                          (__mmask16)(m)); })
5322
5323#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5324  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5325                                        (__v8di)(__m512i)(b), (int)(p), \
5326                                        (__mmask8)(m)); })
5327
5328#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5329  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5330                                         (__v8di)(__m512i)(b), (int)(p), \
5331                                         (__mmask8)(m)); })
5332
5333#define _mm512_rol_epi32(a, b) __extension__ ({ \
5334  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5335                                        (__v16si)_mm512_setzero_si512(), \
5336                                        (__mmask16)-1); })
5337
5338#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5339  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5340                                        (__v16si)(__m512i)(W), \
5341                                        (__mmask16)(U)); })
5342
5343#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5344  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5345                                        (__v16si)_mm512_setzero_si512(), \
5346                                        (__mmask16)(U)); })
5347
5348#define _mm512_rol_epi64(a, b) __extension__ ({ \
5349  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5350                                        (__v8di)_mm512_setzero_si512(), \
5351                                        (__mmask8)-1); })
5352
5353#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5354  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5355                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
5356
5357#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5358  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5359                                        (__v8di)_mm512_setzero_si512(), \
5360                                        (__mmask8)(U)); })
5361static __inline__ __m512i __DEFAULT_FN_ATTRS
5362_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5363{
5364  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5365              (__v16si) __B,
5366              (__v16si)
5367              _mm512_setzero_si512 (),
5368              (__mmask16) -1);
5369}
5370
5371static __inline__ __m512i __DEFAULT_FN_ATTRS
5372_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5373{
5374  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5375              (__v16si) __B,
5376              (__v16si) __W,
5377              (__mmask16) __U);
5378}
5379
5380static __inline__ __m512i __DEFAULT_FN_ATTRS
5381_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5382{
5383  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5384              (__v16si) __B,
5385              (__v16si)
5386              _mm512_setzero_si512 (),
5387              (__mmask16) __U);
5388}
5389
5390static __inline__ __m512i __DEFAULT_FN_ATTRS
5391_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5392{
5393  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5394              (__v8di) __B,
5395              (__v8di)
5396              _mm512_setzero_si512 (),
5397              (__mmask8) -1);
5398}
5399
5400static __inline__ __m512i __DEFAULT_FN_ATTRS
5401_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5402{
5403  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5404              (__v8di) __B,
5405              (__v8di) __W,
5406              (__mmask8) __U);
5407}
5408
5409static __inline__ __m512i __DEFAULT_FN_ATTRS
5410_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5411{
5412  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5413              (__v8di) __B,
5414              (__v8di)
5415              _mm512_setzero_si512 (),
5416              (__mmask8) __U);
5417}
5418
5419#define _mm512_ror_epi32(A, B) __extension__ ({ \
5420  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5421                                        (__v16si)_mm512_setzero_si512(), \
5422                                        (__mmask16)-1); })
5423
5424#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5425  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5426                                        (__v16si)(__m512i)(W), \
5427                                        (__mmask16)(U)); })
5428
5429#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5430  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5431                                        (__v16si)_mm512_setzero_si512(), \
5432                                        (__mmask16)(U)); })
5433
5434#define _mm512_ror_epi64(A, B) __extension__ ({ \
5435  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5436                                        (__v8di)_mm512_setzero_si512(), \
5437                                        (__mmask8)-1); })
5438
5439#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5440  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5441                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
5442
5443#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5444  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5445                                        (__v8di)_mm512_setzero_si512(), \
5446                                        (__mmask8)(U)); })
5447
5448#define _mm512_slli_epi32(A, B) __extension__ ({ \
5449  (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5450                                         (__v16si)_mm512_setzero_si512(), \
5451                                         (__mmask16)-1); })
5452
5453#define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \
5454  (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5455                                         (__v16si)(__m512i)(W), \
5456                                         (__mmask16)(U)); })
5457
5458#define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \
5459  (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5460                                         (__v16si)_mm512_setzero_si512(), \
5461                                         (__mmask16)(U)); })
5462
5463#define _mm512_slli_epi64(A, B) __extension__ ({ \
5464  (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5465                                         (__v8di)_mm512_setzero_si512(), \
5466                                         (__mmask8)-1); })
5467
5468#define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \
5469  (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5470                                         (__v8di)(__m512i)(W), \
5471                                         (__mmask8)(U)); })
5472
5473#define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \
5474  (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5475                                         (__v8di)_mm512_setzero_si512(), \
5476                                         (__mmask8)(U)); })
5477
5478
5479
5480#define _mm512_srli_epi32(A, B) __extension__ ({ \
5481  (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5482                                         (__v16si)_mm512_setzero_si512(), \
5483                                         (__mmask16)-1); })
5484
5485#define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \
5486  (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5487                                         (__v16si)(__m512i)(W), \
5488                                         (__mmask16)(U)); })
5489
5490#define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \
5491  (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \
5492                                         (__v16si)_mm512_setzero_si512(), \
5493                                         (__mmask16)(U)); })
5494
5495#define _mm512_srli_epi64(A, B) __extension__ ({ \
5496  (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5497                                         (__v8di)_mm512_setzero_si512(), \
5498                                         (__mmask8)-1); })
5499
5500#define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \
5501  (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5502                                         (__v8di)(__m512i)(W), \
5503                                         (__mmask8)(U)); })
5504
5505#define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \
5506  (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \
5507                                         (__v8di)_mm512_setzero_si512(), \
5508                                         (__mmask8)(U)); })
5509
5510static __inline__ __m512i __DEFAULT_FN_ATTRS
5511_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5512{
5513  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5514              (__v16si) __W,
5515              (__mmask16) __U);
5516}
5517
5518static __inline__ __m512i __DEFAULT_FN_ATTRS
5519_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5520{
5521  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5522              (__v16si)
5523              _mm512_setzero_si512 (),
5524              (__mmask16) __U);
5525}
5526
5527static __inline__ void __DEFAULT_FN_ATTRS
5528_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5529{
5530  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5531          (__mmask16) __U);
5532}
5533
5534static __inline__ __m512i __DEFAULT_FN_ATTRS
5535_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5536{
5537  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5538                 (__v16si) __A,
5539                 (__v16si) __W);
5540}
5541
5542static __inline__ __m512i __DEFAULT_FN_ATTRS
5543_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5544{
5545  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5546                 (__v16si) __A,
5547                 (__v16si) _mm512_setzero_si512 ());
5548}
5549
5550static __inline__ __m512i __DEFAULT_FN_ATTRS
5551_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5552{
5553  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5554                 (__v8di) __A,
5555                 (__v8di) __W);
5556}
5557
5558static __inline__ __m512i __DEFAULT_FN_ATTRS
5559_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5560{
5561  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5562                 (__v8di) __A,
5563                 (__v8di) _mm512_setzero_si512 ());
5564}
5565
5566static __inline__ __m512i __DEFAULT_FN_ATTRS
5567_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5568{
5569  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5570              (__v8di) __W,
5571              (__mmask8) __U);
5572}
5573
5574static __inline__ __m512i __DEFAULT_FN_ATTRS
5575_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5576{
5577  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5578              (__v8di)
5579              _mm512_setzero_si512 (),
5580              (__mmask8) __U);
5581}
5582
5583static __inline__ void __DEFAULT_FN_ATTRS
5584_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5585{
5586  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5587          (__mmask8) __U);
5588}
5589
5590static __inline__ __m512d __DEFAULT_FN_ATTRS
5591_mm512_movedup_pd (__m512d __A)
5592{
5593  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5594                                          0, 0, 2, 2, 4, 4, 6, 6);
5595}
5596
5597static __inline__ __m512d __DEFAULT_FN_ATTRS
5598_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5599{
5600  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5601                                              (__v8df)_mm512_movedup_pd(__A),
5602                                              (__v8df)__W);
5603}
5604
5605static __inline__ __m512d __DEFAULT_FN_ATTRS
5606_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5607{
5608  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5609                                              (__v8df)_mm512_movedup_pd(__A),
5610                                              (__v8df)_mm512_setzero_pd());
5611}
5612
5613#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5614  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5615                                             (__v8df)(__m512d)(B), \
5616                                             (__v8di)(__m512i)(C), (int)(imm), \
5617                                             (__mmask8)-1, (int)(R)); })
5618
5619#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5620  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5621                                             (__v8df)(__m512d)(B), \
5622                                             (__v8di)(__m512i)(C), (int)(imm), \
5623                                             (__mmask8)(U), (int)(R)); })
5624
5625#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5626  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5627                                             (__v8df)(__m512d)(B), \
5628                                             (__v8di)(__m512i)(C), (int)(imm), \
5629                                             (__mmask8)-1, \
5630                                             _MM_FROUND_CUR_DIRECTION); })
5631
5632#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5633  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5634                                             (__v8df)(__m512d)(B), \
5635                                             (__v8di)(__m512i)(C), (int)(imm), \
5636                                             (__mmask8)(U), \
5637                                             _MM_FROUND_CUR_DIRECTION); })
5638
5639#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5640  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5641                                              (__v8df)(__m512d)(B), \
5642                                              (__v8di)(__m512i)(C), \
5643                                              (int)(imm), (__mmask8)(U), \
5644                                              (int)(R)); })
5645
5646#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5647  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5648                                              (__v8df)(__m512d)(B), \
5649                                              (__v8di)(__m512i)(C), \
5650                                              (int)(imm), (__mmask8)(U), \
5651                                              _MM_FROUND_CUR_DIRECTION); })
5652
5653#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5654  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5655                                            (__v16sf)(__m512)(B), \
5656                                            (__v16si)(__m512i)(C), (int)(imm), \
5657                                            (__mmask16)-1, (int)(R)); })
5658
5659#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5660  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5661                                            (__v16sf)(__m512)(B), \
5662                                            (__v16si)(__m512i)(C), (int)(imm), \
5663                                            (__mmask16)(U), (int)(R)); })
5664
5665#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5666  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5667                                            (__v16sf)(__m512)(B), \
5668                                            (__v16si)(__m512i)(C), (int)(imm), \
5669                                            (__mmask16)-1, \
5670                                            _MM_FROUND_CUR_DIRECTION); })
5671
5672#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5673  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5674                                            (__v16sf)(__m512)(B), \
5675                                            (__v16si)(__m512i)(C), (int)(imm), \
5676                                            (__mmask16)(U), \
5677                                            _MM_FROUND_CUR_DIRECTION); })
5678
5679#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5680  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5681                                             (__v16sf)(__m512)(B), \
5682                                             (__v16si)(__m512i)(C), \
5683                                             (int)(imm), (__mmask16)(U), \
5684                                             (int)(R)); })
5685
5686#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5687  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5688                                             (__v16sf)(__m512)(B), \
5689                                             (__v16si)(__m512i)(C), \
5690                                             (int)(imm), (__mmask16)(U), \
5691                                             _MM_FROUND_CUR_DIRECTION); })
5692
5693#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5694  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5695                                          (__v2df)(__m128d)(B), \
5696                                          (__v2di)(__m128i)(C), (int)(imm), \
5697                                          (__mmask8)-1, (int)(R)); })
5698
5699#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5700  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5701                                          (__v2df)(__m128d)(B), \
5702                                          (__v2di)(__m128i)(C), (int)(imm), \
5703                                          (__mmask8)(U), (int)(R)); })
5704
5705#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5706  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5707                                          (__v2df)(__m128d)(B), \
5708                                          (__v2di)(__m128i)(C), (int)(imm), \
5709                                          (__mmask8)-1, \
5710                                          _MM_FROUND_CUR_DIRECTION); })
5711
5712#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5713  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5714                                          (__v2df)(__m128d)(B), \
5715                                          (__v2di)(__m128i)(C), (int)(imm), \
5716                                          (__mmask8)(U), \
5717                                          _MM_FROUND_CUR_DIRECTION); })
5718
5719#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5720  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5721                                           (__v2df)(__m128d)(B), \
5722                                           (__v2di)(__m128i)(C), (int)(imm), \
5723                                           (__mmask8)(U), (int)(R)); })
5724
5725#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5726  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5727                                           (__v2df)(__m128d)(B), \
5728                                           (__v2di)(__m128i)(C), (int)(imm), \
5729                                           (__mmask8)(U), \
5730                                           _MM_FROUND_CUR_DIRECTION); })
5731
5732#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5733  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5734                                         (__v4sf)(__m128)(B), \
5735                                         (__v4si)(__m128i)(C), (int)(imm), \
5736                                         (__mmask8)-1, (int)(R)); })
5737
5738#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5739  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5740                                         (__v4sf)(__m128)(B), \
5741                                         (__v4si)(__m128i)(C), (int)(imm), \
5742                                         (__mmask8)(U), (int)(R)); })
5743
5744#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5745  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5746                                         (__v4sf)(__m128)(B), \
5747                                         (__v4si)(__m128i)(C), (int)(imm), \
5748                                         (__mmask8)-1, \
5749                                         _MM_FROUND_CUR_DIRECTION); })
5750
5751#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5752  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5753                                         (__v4sf)(__m128)(B), \
5754                                         (__v4si)(__m128i)(C), (int)(imm), \
5755                                         (__mmask8)(U), \
5756                                         _MM_FROUND_CUR_DIRECTION); })
5757
5758#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5759  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5760                                          (__v4sf)(__m128)(B), \
5761                                          (__v4si)(__m128i)(C), (int)(imm), \
5762                                          (__mmask8)(U), (int)(R)); })
5763
5764#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5765  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5766                                          (__v4sf)(__m128)(B), \
5767                                          (__v4si)(__m128i)(C), (int)(imm), \
5768                                          (__mmask8)(U), \
5769                                          _MM_FROUND_CUR_DIRECTION); })
5770
5771#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5772  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5773                                                 (__v2df)(__m128d)(B), \
5774                                                 (__v2df)_mm_setzero_pd(), \
5775                                                 (__mmask8)-1, (int)(R)); })
5776
5777
5778static __inline__ __m128d __DEFAULT_FN_ATTRS
5779_mm_getexp_sd (__m128d __A, __m128d __B)
5780{
5781  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5782                 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5783}
5784
5785static __inline__ __m128d __DEFAULT_FN_ATTRS
5786_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5787{
5788 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5789          (__v2df) __B,
5790          (__v2df) __W,
5791          (__mmask8) __U,
5792          _MM_FROUND_CUR_DIRECTION);
5793}
5794
5795#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5796  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5797                                                 (__v2df)(__m128d)(B), \
5798                                                 (__v2df)(__m128d)(W), \
5799                                                 (__mmask8)(U), (int)(R)); })
5800
5801static __inline__ __m128d __DEFAULT_FN_ATTRS
5802_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5803{
5804 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5805          (__v2df) __B,
5806          (__v2df) _mm_setzero_pd (),
5807          (__mmask8) __U,
5808          _MM_FROUND_CUR_DIRECTION);
5809}
5810
5811#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5812  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5813                                                 (__v2df)(__m128d)(B), \
5814                                                 (__v2df)_mm_setzero_pd(), \
5815                                                 (__mmask8)(U), (int)(R)); })
5816
5817#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5818  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5819                                                (__v4sf)(__m128)(B), \
5820                                                (__v4sf)_mm_setzero_ps(), \
5821                                                (__mmask8)-1, (int)(R)); })
5822
5823static __inline__ __m128 __DEFAULT_FN_ATTRS
5824_mm_getexp_ss (__m128 __A, __m128 __B)
5825{
5826  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5827                (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5828}
5829
5830static __inline__ __m128 __DEFAULT_FN_ATTRS
5831_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5832{
5833 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5834          (__v4sf) __B,
5835          (__v4sf) __W,
5836          (__mmask8) __U,
5837          _MM_FROUND_CUR_DIRECTION);
5838}
5839
5840#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5841  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5842                                                (__v4sf)(__m128)(B), \
5843                                                (__v4sf)(__m128)(W), \
5844                                                (__mmask8)(U), (int)(R)); })
5845
5846static __inline__ __m128 __DEFAULT_FN_ATTRS
5847_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5848{
5849 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5850          (__v4sf) __B,
5851          (__v4sf) _mm_setzero_pd (),
5852          (__mmask8) __U,
5853          _MM_FROUND_CUR_DIRECTION);
5854}
5855
5856#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
5857  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5858                                                (__v4sf)(__m128)(B), \
5859                                                (__v4sf)_mm_setzero_ps(), \
5860                                                (__mmask8)(U), (int)(R)); })
5861
5862#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
5863  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5864                                               (__v2df)(__m128d)(B), \
5865                                               (int)(((D)<<2) | (C)), \
5866                                               (__v2df)_mm_setzero_pd(), \
5867                                               (__mmask8)-1, (int)(R)); })
5868
5869#define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
5870  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5871                                               (__v2df)(__m128d)(B), \
5872                                               (int)(((D)<<2) | (C)), \
5873                                               (__v2df)_mm_setzero_pd(), \
5874                                               (__mmask8)-1, \
5875                                               _MM_FROUND_CUR_DIRECTION); })
5876
5877#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
5878  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5879                                               (__v2df)(__m128d)(B), \
5880                                               (int)(((D)<<2) | (C)), \
5881                                               (__v2df)(__m128d)(W), \
5882                                               (__mmask8)(U), \
5883                                               _MM_FROUND_CUR_DIRECTION); })
5884
5885#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
5886  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5887                                               (__v2df)(__m128d)(B), \
5888                                               (int)(((D)<<2) | (C)), \
5889                                               (__v2df)(__m128d)(W), \
5890                                               (__mmask8)(U), (int)(R)); })
5891
5892#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
5893  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5894                                               (__v2df)(__m128d)(B), \
5895                                               (int)(((D)<<2) | (C)), \
5896                                               (__v2df)_mm_setzero_pd(), \
5897                                               (__mmask8)(U), \
5898                                               _MM_FROUND_CUR_DIRECTION); })
5899
5900#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
5901  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5902                                               (__v2df)(__m128d)(B), \
5903                                               (int)(((D)<<2) | (C)), \
5904                                               (__v2df)_mm_setzero_pd(), \
5905                                               (__mmask8)(U), (int)(R)); })
5906
5907#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
5908  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5909                                              (__v4sf)(__m128)(B), \
5910                                              (int)(((D)<<2) | (C)), \
5911                                              (__v4sf)_mm_setzero_ps(), \
5912                                              (__mmask8)-1, (int)(R)); })
5913
5914#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
5915  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5916                                              (__v4sf)(__m128)(B), \
5917                                              (int)(((D)<<2) | (C)), \
5918                                              (__v4sf)_mm_setzero_ps(), \
5919                                              (__mmask8)-1, \
5920                                              _MM_FROUND_CUR_DIRECTION); })
5921
5922#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
5923  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5924                                              (__v4sf)(__m128)(B), \
5925                                              (int)(((D)<<2) | (C)), \
5926                                              (__v4sf)(__m128)(W), \
5927                                              (__mmask8)(U), \
5928                                              _MM_FROUND_CUR_DIRECTION); })
5929
5930#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
5931  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5932                                              (__v4sf)(__m128)(B), \
5933                                              (int)(((D)<<2) | (C)), \
5934                                              (__v4sf)(__m128)(W), \
5935                                              (__mmask8)(U), (int)(R)); })
5936
5937#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
5938  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5939                                              (__v4sf)(__m128)(B), \
5940                                              (int)(((D)<<2) | (C)), \
5941                                              (__v4sf)_mm_setzero_pd(), \
5942                                              (__mmask8)(U), \
5943                                              _MM_FROUND_CUR_DIRECTION); })
5944
5945#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
5946  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5947                                              (__v4sf)(__m128)(B), \
5948                                              (int)(((D)<<2) | (C)), \
5949                                              (__v4sf)_mm_setzero_ps(), \
5950                                              (__mmask8)(U), (int)(R)); })
5951
5952static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5953_mm512_kmov (__mmask16 __A)
5954{
5955  return  __A;
5956}
5957
5958#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
5959  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5960                              (int)(P), (int)(R)); })
5961
5962#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
5963  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5964                              (int)(P), (int)(R)); })
5965
5966#ifdef __x86_64__
5967#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
5968  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
5969#endif
5970
5971static __inline__ __m512i __DEFAULT_FN_ATTRS
5972_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
5973         __mmask16 __U, __m512i __B)
5974{
5975  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
5976                   (__v16si) __I
5977                   /* idx */ ,
5978                   (__v16si) __B,
5979                   (__mmask16) __U);
5980}
5981
5982static __inline__ __m512i __DEFAULT_FN_ATTRS
5983_mm512_sll_epi32 (__m512i __A, __m128i __B)
5984{
5985  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5986             (__v4si) __B,
5987             (__v16si)
5988             _mm512_setzero_si512 (),
5989             (__mmask16) -1);
5990}
5991
5992static __inline__ __m512i __DEFAULT_FN_ATTRS
5993_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5994{
5995  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5996             (__v4si) __B,
5997             (__v16si) __W,
5998             (__mmask16) __U);
5999}
6000
6001static __inline__ __m512i __DEFAULT_FN_ATTRS
6002_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
6003{
6004  return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
6005             (__v4si) __B,
6006             (__v16si)
6007             _mm512_setzero_si512 (),
6008             (__mmask16) __U);
6009}
6010
6011static __inline__ __m512i __DEFAULT_FN_ATTRS
6012_mm512_sll_epi64 (__m512i __A, __m128i __B)
6013{
6014  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
6015             (__v2di) __B,
6016             (__v8di)
6017             _mm512_setzero_si512 (),
6018             (__mmask8) -1);
6019}
6020
6021static __inline__ __m512i __DEFAULT_FN_ATTRS
6022_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6023{
6024  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
6025             (__v2di) __B,
6026             (__v8di) __W,
6027             (__mmask8) __U);
6028}
6029
6030static __inline__ __m512i __DEFAULT_FN_ATTRS
6031_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
6032{
6033  return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
6034             (__v2di) __B,
6035             (__v8di)
6036             _mm512_setzero_si512 (),
6037             (__mmask8) __U);
6038}
6039
6040static __inline__ __m512i __DEFAULT_FN_ATTRS
6041_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
6042{
6043  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
6044              (__v16si) __Y,
6045              (__v16si)
6046              _mm512_setzero_si512 (),
6047              (__mmask16) -1);
6048}
6049
6050static __inline__ __m512i __DEFAULT_FN_ATTRS
6051_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6052{
6053  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
6054              (__v16si) __Y,
6055              (__v16si) __W,
6056              (__mmask16) __U);
6057}
6058
6059static __inline__ __m512i __DEFAULT_FN_ATTRS
6060_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
6061{
6062  return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
6063              (__v16si) __Y,
6064              (__v16si)
6065              _mm512_setzero_si512 (),
6066              (__mmask16) __U);
6067}
6068
6069static __inline__ __m512i __DEFAULT_FN_ATTRS
6070_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
6071{
6072  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
6073             (__v8di) __Y,
6074             (__v8di)
6075             _mm512_undefined_pd (),
6076             (__mmask8) -1);
6077}
6078
6079static __inline__ __m512i __DEFAULT_FN_ATTRS
6080_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6081{
6082  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
6083             (__v8di) __Y,
6084             (__v8di) __W,
6085             (__mmask8) __U);
6086}
6087
6088static __inline__ __m512i __DEFAULT_FN_ATTRS
6089_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
6090{
6091  return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
6092             (__v8di) __Y,
6093             (__v8di)
6094             _mm512_setzero_si512 (),
6095             (__mmask8) __U);
6096}
6097
6098static __inline__ __m512i __DEFAULT_FN_ATTRS
6099_mm512_sra_epi32 (__m512i __A, __m128i __B)
6100{
6101  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
6102             (__v4si) __B,
6103             (__v16si)
6104             _mm512_setzero_si512 (),
6105             (__mmask16) -1);
6106}
6107
6108static __inline__ __m512i __DEFAULT_FN_ATTRS
6109_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6110{
6111  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
6112             (__v4si) __B,
6113             (__v16si) __W,
6114             (__mmask16) __U);
6115}
6116
6117static __inline__ __m512i __DEFAULT_FN_ATTRS
6118_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
6119{
6120  return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
6121             (__v4si) __B,
6122             (__v16si)
6123             _mm512_setzero_si512 (),
6124             (__mmask16) __U);
6125}
6126
6127static __inline__ __m512i __DEFAULT_FN_ATTRS
6128_mm512_sra_epi64 (__m512i __A, __m128i __B)
6129{
6130  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
6131             (__v2di) __B,
6132             (__v8di)
6133             _mm512_setzero_si512 (),
6134             (__mmask8) -1);
6135}
6136
6137static __inline__ __m512i __DEFAULT_FN_ATTRS
6138_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6139{
6140  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
6141             (__v2di) __B,
6142             (__v8di) __W,
6143             (__mmask8) __U);
6144}
6145
6146static __inline__ __m512i __DEFAULT_FN_ATTRS
6147_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
6148{
6149  return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
6150             (__v2di) __B,
6151             (__v8di)
6152             _mm512_setzero_si512 (),
6153             (__mmask8) __U);
6154}
6155
6156static __inline__ __m512i __DEFAULT_FN_ATTRS
6157_mm512_srav_epi32 (__m512i __X, __m512i __Y)
6158{
6159  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
6160              (__v16si) __Y,
6161              (__v16si)
6162              _mm512_setzero_si512 (),
6163              (__mmask16) -1);
6164}
6165
6166static __inline__ __m512i __DEFAULT_FN_ATTRS
6167_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6168{
6169  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
6170              (__v16si) __Y,
6171              (__v16si) __W,
6172              (__mmask16) __U);
6173}
6174
6175static __inline__ __m512i __DEFAULT_FN_ATTRS
6176_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
6177{
6178  return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
6179              (__v16si) __Y,
6180              (__v16si)
6181              _mm512_setzero_si512 (),
6182              (__mmask16) __U);
6183}
6184
6185static __inline__ __m512i __DEFAULT_FN_ATTRS
6186_mm512_srav_epi64 (__m512i __X, __m512i __Y)
6187{
6188  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
6189             (__v8di) __Y,
6190             (__v8di)
6191             _mm512_setzero_si512 (),
6192             (__mmask8) -1);
6193}
6194
6195static __inline__ __m512i __DEFAULT_FN_ATTRS
6196_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6197{
6198  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
6199             (__v8di) __Y,
6200             (__v8di) __W,
6201             (__mmask8) __U);
6202}
6203
6204static __inline__ __m512i __DEFAULT_FN_ATTRS
6205_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
6206{
6207  return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
6208             (__v8di) __Y,
6209             (__v8di)
6210             _mm512_setzero_si512 (),
6211             (__mmask8) __U);
6212}
6213
6214static __inline__ __m512i __DEFAULT_FN_ATTRS
6215_mm512_srl_epi32 (__m512i __A, __m128i __B)
6216{
6217  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
6218             (__v4si) __B,
6219             (__v16si)
6220             _mm512_setzero_si512 (),
6221             (__mmask16) -1);
6222}
6223
6224static __inline__ __m512i __DEFAULT_FN_ATTRS
6225_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6226{
6227  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
6228             (__v4si) __B,
6229             (__v16si) __W,
6230             (__mmask16) __U);
6231}
6232
6233static __inline__ __m512i __DEFAULT_FN_ATTRS
6234_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
6235{
6236  return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
6237             (__v4si) __B,
6238             (__v16si)
6239             _mm512_setzero_si512 (),
6240             (__mmask16) __U);
6241}
6242
6243static __inline__ __m512i __DEFAULT_FN_ATTRS
6244_mm512_srl_epi64 (__m512i __A, __m128i __B)
6245{
6246  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
6247             (__v2di) __B,
6248             (__v8di)
6249             _mm512_setzero_si512 (),
6250             (__mmask8) -1);
6251}
6252
6253static __inline__ __m512i __DEFAULT_FN_ATTRS
6254_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6255{
6256  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
6257             (__v2di) __B,
6258             (__v8di) __W,
6259             (__mmask8) __U);
6260}
6261
6262static __inline__ __m512i __DEFAULT_FN_ATTRS
6263_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
6264{
6265  return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
6266             (__v2di) __B,
6267             (__v8di)
6268             _mm512_setzero_si512 (),
6269             (__mmask8) __U);
6270}
6271
6272static __inline__ __m512i __DEFAULT_FN_ATTRS
6273_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
6274{
6275  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
6276              (__v16si) __Y,
6277              (__v16si)
6278              _mm512_setzero_si512 (),
6279              (__mmask16) -1);
6280}
6281
6282static __inline__ __m512i __DEFAULT_FN_ATTRS
6283_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6284{
6285  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
6286              (__v16si) __Y,
6287              (__v16si) __W,
6288              (__mmask16) __U);
6289}
6290
6291static __inline__ __m512i __DEFAULT_FN_ATTRS
6292_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
6293{
6294  return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
6295              (__v16si) __Y,
6296              (__v16si)
6297              _mm512_setzero_si512 (),
6298              (__mmask16) __U);
6299}
6300
6301static __inline__ __m512i __DEFAULT_FN_ATTRS
6302_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6303{
6304  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
6305             (__v8di) __Y,
6306             (__v8di)
6307             _mm512_setzero_si512 (),
6308             (__mmask8) -1);
6309}
6310
6311static __inline__ __m512i __DEFAULT_FN_ATTRS
6312_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6313{
6314  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
6315             (__v8di) __Y,
6316             (__v8di) __W,
6317             (__mmask8) __U);
6318}
6319
6320static __inline__ __m512i __DEFAULT_FN_ATTRS
6321_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
6322{
6323  return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
6324             (__v8di) __Y,
6325             (__v8di)
6326             _mm512_setzero_si512 (),
6327             (__mmask8) __U);
6328}
6329
6330#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6331  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6332                                            (__v16si)(__m512i)(B), \
6333                                            (__v16si)(__m512i)(C), (int)(imm), \
6334                                            (__mmask16)-1); })
6335
6336#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6337  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6338                                            (__v16si)(__m512i)(B), \
6339                                            (__v16si)(__m512i)(C), (int)(imm), \
6340                                            (__mmask16)(U)); })
6341
6342#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6343  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6344                                             (__v16si)(__m512i)(B), \
6345                                             (__v16si)(__m512i)(C), \
6346                                             (int)(imm), (__mmask16)(U)); })
6347
6348#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6349  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6350                                            (__v8di)(__m512i)(B), \
6351                                            (__v8di)(__m512i)(C), (int)(imm), \
6352                                            (__mmask8)-1); })
6353
6354#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6355  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6356                                            (__v8di)(__m512i)(B), \
6357                                            (__v8di)(__m512i)(C), (int)(imm), \
6358                                            (__mmask8)(U)); })
6359
6360#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6361  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6362                                             (__v8di)(__m512i)(B), \
6363                                             (__v8di)(__m512i)(C), (int)(imm), \
6364                                             (__mmask8)(U)); })
6365
6366#ifdef __x86_64__
6367#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6368  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6369#endif
6370
6371#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6372  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6373
6374#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6375  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6376
6377#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6378  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6379
6380static __inline__ unsigned __DEFAULT_FN_ATTRS
6381_mm_cvtsd_u32 (__m128d __A)
6382{
6383  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6384             _MM_FROUND_CUR_DIRECTION);
6385}
6386
6387#ifdef __x86_64__
6388#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6389  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6390                                                  (int)(R)); })
6391
6392static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6393_mm_cvtsd_u64 (__m128d __A)
6394{
6395  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6396                 __A,
6397                 _MM_FROUND_CUR_DIRECTION);
6398}
6399#endif
6400
6401#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6402  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6403
6404#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6405  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6406
6407#ifdef __x86_64__
6408#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6409  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6410
6411#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6412  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6413#endif
6414
6415#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6416  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6417
6418static __inline__ unsigned __DEFAULT_FN_ATTRS
6419_mm_cvtss_u32 (__m128 __A)
6420{
6421  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6422             _MM_FROUND_CUR_DIRECTION);
6423}
6424
6425#ifdef __x86_64__
6426#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6427  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6428                                                  (int)(R)); })
6429
6430static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6431_mm_cvtss_u64 (__m128 __A)
6432{
6433  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6434                 __A,
6435                 _MM_FROUND_CUR_DIRECTION);
6436}
6437#endif
6438
6439#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6440  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6441
6442#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6443  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6444
6445static __inline__ int __DEFAULT_FN_ATTRS
6446_mm_cvttsd_i32 (__m128d __A)
6447{
6448  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6449              _MM_FROUND_CUR_DIRECTION);
6450}
6451
6452#ifdef __x86_64__
6453#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6454  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6455
6456#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6457  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6458
6459static __inline__ long long __DEFAULT_FN_ATTRS
6460_mm_cvttsd_i64 (__m128d __A)
6461{
6462  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6463              _MM_FROUND_CUR_DIRECTION);
6464}
6465#endif
6466
6467#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6468  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6469
6470static __inline__ unsigned __DEFAULT_FN_ATTRS
6471_mm_cvttsd_u32 (__m128d __A)
6472{
6473  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6474              _MM_FROUND_CUR_DIRECTION);
6475}
6476
6477#ifdef __x86_64__
6478#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6479  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6480                                                   (int)(R)); })
6481
6482static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6483_mm_cvttsd_u64 (__m128d __A)
6484{
6485  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6486                  __A,
6487                  _MM_FROUND_CUR_DIRECTION);
6488}
6489#endif
6490
6491#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6492  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6493
6494#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6495  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6496
6497static __inline__ int __DEFAULT_FN_ATTRS
6498_mm_cvttss_i32 (__m128 __A)
6499{
6500  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6501              _MM_FROUND_CUR_DIRECTION);
6502}
6503
6504#ifdef __x86_64__
6505#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6506  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6507
6508#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6509  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6510
6511static __inline__ long long __DEFAULT_FN_ATTRS
6512_mm_cvttss_i64 (__m128 __A)
6513{
6514  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6515              _MM_FROUND_CUR_DIRECTION);
6516}
6517#endif
6518
6519#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6520  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6521
6522static __inline__ unsigned __DEFAULT_FN_ATTRS
6523_mm_cvttss_u32 (__m128 __A)
6524{
6525  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6526              _MM_FROUND_CUR_DIRECTION);
6527}
6528
6529#ifdef __x86_64__
6530#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6531  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6532                                                   (int)(R)); })
6533
6534static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6535_mm_cvttss_u64 (__m128 __A)
6536{
6537  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6538                  __A,
6539                  _MM_FROUND_CUR_DIRECTION);
6540}
6541#endif
6542
6543static __inline__ __m512d __DEFAULT_FN_ATTRS
6544_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6545            __m512d __B)
6546{
6547  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6548              (__v8di) __I
6549              /* idx */ ,
6550              (__v8df) __B,
6551              (__mmask8) __U);
6552}
6553
6554static __inline__ __m512 __DEFAULT_FN_ATTRS
6555_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6556            __m512 __B)
6557{
6558  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6559                   (__v16si) __I
6560                   /* idx */ ,
6561                   (__v16sf) __B,
6562                   (__mmask16) __U);
6563}
6564
6565static __inline__ __m512i __DEFAULT_FN_ATTRS
6566_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6567         __mmask8 __U, __m512i __B)
6568{
6569  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6570                   (__v8di) __I
6571                   /* idx */ ,
6572                   (__v8di) __B,
6573                   (__mmask8) __U);
6574}
6575
6576#define _mm512_permute_pd(X, C) __extension__ ({ \
6577  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6578                                   (__v8df)_mm512_undefined_pd(), \
6579                                   0 + (((C) >> 0) & 0x1), \
6580                                   0 + (((C) >> 1) & 0x1), \
6581                                   2 + (((C) >> 2) & 0x1), \
6582                                   2 + (((C) >> 3) & 0x1), \
6583                                   4 + (((C) >> 4) & 0x1), \
6584                                   4 + (((C) >> 5) & 0x1), \
6585                                   6 + (((C) >> 6) & 0x1), \
6586                                   6 + (((C) >> 7) & 0x1)); })
6587
6588#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6589  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6590                                       (__v8df)_mm512_permute_pd((X), (C)), \
6591                                       (__v8df)(__m512d)(W)); })
6592
6593#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6594  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6595                                       (__v8df)_mm512_permute_pd((X), (C)), \
6596                                       (__v8df)_mm512_setzero_pd()); })
6597
6598#define _mm512_permute_ps(X, C) __extension__ ({ \
6599  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6600                                  (__v16sf)_mm512_undefined_ps(), \
6601                                   0  + (((C) >> 0) & 0x3), \
6602                                   0  + (((C) >> 2) & 0x3), \
6603                                   0  + (((C) >> 4) & 0x3), \
6604                                   0  + (((C) >> 6) & 0x3), \
6605                                   4  + (((C) >> 0) & 0x3), \
6606                                   4  + (((C) >> 2) & 0x3), \
6607                                   4  + (((C) >> 4) & 0x3), \
6608                                   4  + (((C) >> 6) & 0x3), \
6609                                   8  + (((C) >> 0) & 0x3), \
6610                                   8  + (((C) >> 2) & 0x3), \
6611                                   8  + (((C) >> 4) & 0x3), \
6612                                   8  + (((C) >> 6) & 0x3), \
6613                                   12 + (((C) >> 0) & 0x3), \
6614                                   12 + (((C) >> 2) & 0x3), \
6615                                   12 + (((C) >> 4) & 0x3), \
6616                                   12 + (((C) >> 6) & 0x3)); })
6617
6618#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6619  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6620                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6621                                      (__v16sf)(__m512)(W)); })
6622
6623#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6624  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6625                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6626                                      (__v16sf)_mm512_setzero_ps()); })
6627
6628static __inline__ __m512d __DEFAULT_FN_ATTRS
6629_mm512_permutevar_pd (__m512d __A, __m512i __C)
6630{
6631  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6632              (__v8di) __C,
6633              (__v8df)
6634              _mm512_undefined_pd (),
6635              (__mmask8) -1);
6636}
6637
6638static __inline__ __m512d __DEFAULT_FN_ATTRS
6639_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6640{
6641  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6642              (__v8di) __C,
6643              (__v8df) __W,
6644              (__mmask8) __U);
6645}
6646
6647static __inline__ __m512d __DEFAULT_FN_ATTRS
6648_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
6649{
6650  return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
6651              (__v8di) __C,
6652              (__v8df)
6653              _mm512_setzero_pd (),
6654              (__mmask8) __U);
6655}
6656
6657static __inline__ __m512 __DEFAULT_FN_ATTRS
6658_mm512_permutevar_ps (__m512 __A, __m512i __C)
6659{
6660  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6661                   (__v16si) __C,
6662                   (__v16sf)
6663                   _mm512_undefined_ps (),
6664                   (__mmask16) -1);
6665}
6666
6667static __inline__ __m512 __DEFAULT_FN_ATTRS
6668_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6669{
6670  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6671                   (__v16si) __C,
6672                   (__v16sf) __W,
6673                   (__mmask16) __U);
6674}
6675
6676static __inline__ __m512 __DEFAULT_FN_ATTRS
6677_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
6678{
6679  return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
6680                   (__v16si) __C,
6681                   (__v16sf)
6682                   _mm512_setzero_ps (),
6683                   (__mmask16) __U);
6684}
6685
6686static __inline __m512d __DEFAULT_FN_ATTRS
6687_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6688{
6689  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6690                    /* idx */ ,
6691                    (__v8df) __A,
6692                    (__v8df) __B,
6693                    (__mmask8) -1);
6694}
6695
6696static __inline__ __m512d __DEFAULT_FN_ATTRS
6697_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6698{
6699  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6700                    /* idx */ ,
6701                    (__v8df) __A,
6702                    (__v8df) __B,
6703                    (__mmask8) __U);
6704}
6705
6706static __inline__ __m512d __DEFAULT_FN_ATTRS
6707_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6708            __m512d __B)
6709{
6710  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6711                                                         /* idx */ ,
6712                                                         (__v8df) __A,
6713                                                         (__v8df) __B,
6714                                                         (__mmask8) __U);
6715}
6716
6717static __inline __m512 __DEFAULT_FN_ATTRS
6718_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6719{
6720  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6721                                                         /* idx */ ,
6722                                                         (__v16sf) __A,
6723                                                         (__v16sf) __B,
6724                                                         (__mmask16) -1);
6725}
6726
6727static __inline__ __m512 __DEFAULT_FN_ATTRS
6728_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6729{
6730  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6731                                                         /* idx */ ,
6732                                                         (__v16sf) __A,
6733                                                         (__v16sf) __B,
6734                                                         (__mmask16) __U);
6735}
6736
6737static __inline__ __m512 __DEFAULT_FN_ATTRS
6738_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6739            __m512 __B)
6740{
6741  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6742                                                        /* idx */ ,
6743                                                        (__v16sf) __A,
6744                                                        (__v16sf) __B,
6745                                                        (__mmask16) __U);
6746}
6747
6748static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6749_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
6750{
6751  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6752             (__v16si) __B,
6753             (__mmask16) -1);
6754}
6755
6756static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6757_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
6758{
6759  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6760             (__v16si) __B, __U);
6761}
6762
6763static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6764_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
6765{
6766  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6767            (__v8di) __B,
6768            (__mmask8) -1);
6769}
6770
6771static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6772_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
6773{
6774  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6775            (__v8di) __B, __U);
6776}
6777
6778#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6779  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6780                                             (__v8si)_mm256_undefined_si256(), \
6781                                             (__mmask8)-1, (int)(R)); })
6782
6783#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6784  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6785                                             (__v8si)(__m256i)(W), \
6786                                             (__mmask8)(U), (int)(R)); })
6787
6788#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6789  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6790                                             (__v8si)_mm256_setzero_si256(), \
6791                                             (__mmask8)(U), (int)(R)); })
6792
6793static __inline__ __m256i __DEFAULT_FN_ATTRS
6794_mm512_cvttpd_epu32 (__m512d __A)
6795{
6796  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6797                  (__v8si)
6798                  _mm256_undefined_si256 (),
6799                  (__mmask8) -1,
6800                  _MM_FROUND_CUR_DIRECTION);
6801}
6802
6803static __inline__ __m256i __DEFAULT_FN_ATTRS
6804_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6805{
6806  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6807                  (__v8si) __W,
6808                  (__mmask8) __U,
6809                  _MM_FROUND_CUR_DIRECTION);
6810}
6811
6812static __inline__ __m256i __DEFAULT_FN_ATTRS
6813_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6814{
6815  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6816                  (__v8si)
6817                  _mm256_setzero_si256 (),
6818                  (__mmask8) __U,
6819                  _MM_FROUND_CUR_DIRECTION);
6820}
6821
6822#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6823  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6824                                                (__v2df)(__m128d)(B), \
6825                                                (__v2df)_mm_setzero_pd(), \
6826                                                (__mmask8)-1, (int)(imm), \
6827                                                (int)(R)); })
6828
6829#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6830  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6831                                                (__v2df)(__m128d)(B), \
6832                                                (__v2df)_mm_setzero_pd(), \
6833                                                (__mmask8)-1, (int)(imm), \
6834                                                _MM_FROUND_CUR_DIRECTION); })
6835
6836#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6837  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6838                                                (__v2df)(__m128d)(B), \
6839                                                (__v2df)(__m128d)(W), \
6840                                                (__mmask8)(U), (int)(imm), \
6841                                                _MM_FROUND_CUR_DIRECTION); })
6842
6843#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6844  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6845                                                (__v2df)(__m128d)(B), \
6846                                                (__v2df)(__m128d)(W), \
6847                                                (__mmask8)(U), (int)(I), \
6848                                                (int)(R)); })
6849
6850#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6851  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6852                                                (__v2df)(__m128d)(B), \
6853                                                (__v2df)_mm_setzero_pd(), \
6854                                                (__mmask8)(U), (int)(I), \
6855                                                _MM_FROUND_CUR_DIRECTION); })
6856
6857#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6858  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6859                                                (__v2df)(__m128d)(B), \
6860                                                (__v2df)_mm_setzero_pd(), \
6861                                                (__mmask8)(U), (int)(I), \
6862                                                (int)(R)); })
6863
6864#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6865  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6866                                               (__v4sf)(__m128)(B), \
6867                                               (__v4sf)_mm_setzero_ps(), \
6868                                               (__mmask8)-1, (int)(imm), \
6869                                               (int)(R)); })
6870
6871#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6872  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6873                                               (__v4sf)(__m128)(B), \
6874                                               (__v4sf)_mm_setzero_ps(), \
6875                                               (__mmask8)-1, (int)(imm), \
6876                                               _MM_FROUND_CUR_DIRECTION); })
6877
6878#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6879  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6880                                               (__v4sf)(__m128)(B), \
6881                                               (__v4sf)(__m128)(W), \
6882                                               (__mmask8)(U), (int)(I), \
6883                                               _MM_FROUND_CUR_DIRECTION); })
6884
6885#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6886  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6887                                               (__v4sf)(__m128)(B), \
6888                                               (__v4sf)(__m128)(W), \
6889                                               (__mmask8)(U), (int)(I), \
6890                                               (int)(R)); })
6891
6892#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6893  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6894                                               (__v4sf)(__m128)(B), \
6895                                               (__v4sf)_mm_setzero_ps(), \
6896                                               (__mmask8)(U), (int)(I), \
6897                                               _MM_FROUND_CUR_DIRECTION); })
6898
6899#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6900  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6901                                               (__v4sf)(__m128)(B), \
6902                                               (__v4sf)_mm_setzero_ps(), \
6903                                               (__mmask8)(U), (int)(I), \
6904                                               (int)(R)); })
6905
6906#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6907  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6908                                           (__v8df)(__m512d)(B), \
6909                                           (__v8df)_mm512_undefined_pd(), \
6910                                           (__mmask8)-1, (int)(R)); })
6911
6912#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6913  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6914                                           (__v8df)(__m512d)(B), \
6915                                           (__v8df)(__m512d)(W), \
6916                                           (__mmask8)(U), (int)(R)); })
6917
6918#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6919  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6920                                           (__v8df)(__m512d)(B), \
6921                                           (__v8df)_mm512_setzero_pd(), \
6922                                           (__mmask8)(U), (int)(R)); })
6923
6924static __inline__ __m512d __DEFAULT_FN_ATTRS
6925_mm512_scalef_pd (__m512d __A, __m512d __B)
6926{
6927  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6928                (__v8df) __B,
6929                (__v8df)
6930                _mm512_undefined_pd (),
6931                (__mmask8) -1,
6932                _MM_FROUND_CUR_DIRECTION);
6933}
6934
6935static __inline__ __m512d __DEFAULT_FN_ATTRS
6936_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6937{
6938  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6939                (__v8df) __B,
6940                (__v8df) __W,
6941                (__mmask8) __U,
6942                _MM_FROUND_CUR_DIRECTION);
6943}
6944
6945static __inline__ __m512d __DEFAULT_FN_ATTRS
6946_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6947{
6948  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6949                (__v8df) __B,
6950                (__v8df)
6951                _mm512_setzero_pd (),
6952                (__mmask8) __U,
6953                _MM_FROUND_CUR_DIRECTION);
6954}
6955
6956#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
6957  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6958                                          (__v16sf)(__m512)(B), \
6959                                          (__v16sf)_mm512_undefined_ps(), \
6960                                          (__mmask16)-1, (int)(R)); })
6961
6962#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
6963  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6964                                          (__v16sf)(__m512)(B), \
6965                                          (__v16sf)(__m512)(W), \
6966                                          (__mmask16)(U), (int)(R)); })
6967
6968#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
6969  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6970                                          (__v16sf)(__m512)(B), \
6971                                          (__v16sf)_mm512_setzero_ps(), \
6972                                          (__mmask16)(U), (int)(R)); })
6973
6974static __inline__ __m512 __DEFAULT_FN_ATTRS
6975_mm512_scalef_ps (__m512 __A, __m512 __B)
6976{
6977  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6978               (__v16sf) __B,
6979               (__v16sf)
6980               _mm512_undefined_ps (),
6981               (__mmask16) -1,
6982               _MM_FROUND_CUR_DIRECTION);
6983}
6984
6985static __inline__ __m512 __DEFAULT_FN_ATTRS
6986_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6987{
6988  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6989               (__v16sf) __B,
6990               (__v16sf) __W,
6991               (__mmask16) __U,
6992               _MM_FROUND_CUR_DIRECTION);
6993}
6994
6995static __inline__ __m512 __DEFAULT_FN_ATTRS
6996_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6997{
6998  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6999               (__v16sf) __B,
7000               (__v16sf)
7001               _mm512_setzero_ps (),
7002               (__mmask16) __U,
7003               _MM_FROUND_CUR_DIRECTION);
7004}
7005
7006#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
7007  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7008                                              (__v2df)(__m128d)(B), \
7009                                              (__v2df)_mm_setzero_pd(), \
7010                                              (__mmask8)-1, (int)(R)); })
7011
7012static __inline__ __m128d __DEFAULT_FN_ATTRS
7013_mm_scalef_sd (__m128d __A, __m128d __B)
7014{
7015  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
7016              (__v2df)( __B), (__v2df) _mm_setzero_pd(),
7017              (__mmask8) -1,
7018              _MM_FROUND_CUR_DIRECTION);
7019}
7020
7021static __inline__ __m128d __DEFAULT_FN_ATTRS
7022_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7023{
7024 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7025                 (__v2df) __B,
7026                (__v2df) __W,
7027                (__mmask8) __U,
7028                _MM_FROUND_CUR_DIRECTION);
7029}
7030
7031#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
7032  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7033                                              (__v2df)(__m128d)(B), \
7034                                              (__v2df)(__m128d)(W), \
7035                                              (__mmask8)(U), (int)(R)); })
7036
7037static __inline__ __m128d __DEFAULT_FN_ATTRS
7038_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
7039{
7040 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7041                 (__v2df) __B,
7042                (__v2df) _mm_setzero_pd (),
7043                (__mmask8) __U,
7044                _MM_FROUND_CUR_DIRECTION);
7045}
7046
7047#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
7048  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7049                                              (__v2df)(__m128d)(B), \
7050                                              (__v2df)_mm_setzero_pd(), \
7051                                              (__mmask8)(U), (int)(R)); })
7052
7053#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
7054  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7055                                             (__v4sf)(__m128)(B), \
7056                                             (__v4sf)_mm_setzero_ps(), \
7057                                             (__mmask8)-1, (int)(R)); })
7058
7059static __inline__ __m128 __DEFAULT_FN_ATTRS
7060_mm_scalef_ss (__m128 __A, __m128 __B)
7061{
7062  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
7063             (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
7064             (__mmask8) -1,
7065             _MM_FROUND_CUR_DIRECTION);
7066}
7067
7068static __inline__ __m128 __DEFAULT_FN_ATTRS
7069_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7070{
7071 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7072                (__v4sf) __B,
7073                (__v4sf) __W,
7074                (__mmask8) __U,
7075                _MM_FROUND_CUR_DIRECTION);
7076}
7077
7078#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
7079  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7080                                             (__v4sf)(__m128)(B), \
7081                                             (__v4sf)(__m128)(W), \
7082                                             (__mmask8)(U), (int)(R)); })
7083
7084static __inline__ __m128 __DEFAULT_FN_ATTRS
7085_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
7086{
7087 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7088                 (__v4sf) __B,
7089                (__v4sf) _mm_setzero_ps (),
7090                (__mmask8) __U,
7091                _MM_FROUND_CUR_DIRECTION);
7092}
7093
7094#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
7095  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7096                                             (__v4sf)(__m128)(B), \
7097                                             (__v4sf)_mm_setzero_ps(), \
7098                                             (__mmask8)(U), \
7099                                             _MM_FROUND_CUR_DIRECTION); })
7100
7101#define _mm512_srai_epi32(A, B) __extension__ ({ \
7102  (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
7103                                         (__v16si)_mm512_setzero_si512(), \
7104                                         (__mmask16)-1); })
7105
7106#define _mm512_mask_srai_epi32(W, U, A, B) __extension__ ({ \
7107  (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
7108                                         (__v16si)(__m512i)(W), \
7109                                         (__mmask16)(U)); })
7110
7111#define _mm512_maskz_srai_epi32(U, A, B) __extension__ ({ \
7112  (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \
7113                                         (__v16si)_mm512_setzero_si512(), \
7114                                         (__mmask16)(U)); })
7115
7116#define _mm512_srai_epi64(A, B) __extension__ ({ \
7117  (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
7118                                         (__v8di)_mm512_setzero_si512(), \
7119                                         (__mmask8)-1); })
7120
7121#define _mm512_mask_srai_epi64(W, U, A, B) __extension__ ({ \
7122  (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
7123                                         (__v8di)(__m512i)(W), \
7124                                         (__mmask8)(U)); })
7125
7126#define _mm512_maskz_srai_epi64(U, A, B) __extension__ ({ \
7127  (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \
7128                                         (__v8di)_mm512_setzero_si512(), \
7129                                         (__mmask8)(U)); })
7130
7131#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
7132  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7133                                         (__v16sf)(__m512)(B), (int)(imm), \
7134                                         (__v16sf)_mm512_undefined_ps(), \
7135                                         (__mmask16)-1); })
7136
7137#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7138  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7139                                         (__v16sf)(__m512)(B), (int)(imm), \
7140                                         (__v16sf)(__m512)(W), \
7141                                         (__mmask16)(U)); })
7142
7143#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7144  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7145                                         (__v16sf)(__m512)(B), (int)(imm), \
7146                                         (__v16sf)_mm512_setzero_ps(), \
7147                                         (__mmask16)(U)); })
7148
7149#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
7150  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7151                                          (__v8df)(__m512d)(B), (int)(imm), \
7152                                          (__v8df)_mm512_undefined_pd(), \
7153                                          (__mmask8)-1); })
7154
7155#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7156  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7157                                          (__v8df)(__m512d)(B), (int)(imm), \
7158                                          (__v8df)(__m512d)(W), \
7159                                          (__mmask8)(U)); })
7160
7161#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7162  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7163                                          (__v8df)(__m512d)(B), (int)(imm), \
7164                                          (__v8df)_mm512_setzero_pd(), \
7165                                          (__mmask8)(U)); })
7166
7167#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7168  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7169                                          (__v16si)(__m512i)(B), (int)(imm), \
7170                                          (__v16si)_mm512_setzero_si512(), \
7171                                          (__mmask16)-1); })
7172
7173#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7174  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7175                                          (__v16si)(__m512i)(B), (int)(imm), \
7176                                          (__v16si)(__m512i)(W), \
7177                                          (__mmask16)(U)); })
7178
7179#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7180  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7181                                          (__v16si)(__m512i)(B), (int)(imm), \
7182                                          (__v16si)_mm512_setzero_si512(), \
7183                                          (__mmask16)(U)); })
7184
7185#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7186  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7187                                          (__v8di)(__m512i)(B), (int)(imm), \
7188                                          (__v8di)_mm512_setzero_si512(), \
7189                                          (__mmask8)-1); })
7190
7191#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7192  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7193                                          (__v8di)(__m512i)(B), (int)(imm), \
7194                                          (__v8di)(__m512i)(W), \
7195                                          (__mmask8)(U)); })
7196
7197#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7198  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7199                                          (__v8di)(__m512i)(B), (int)(imm), \
7200                                          (__v8di)_mm512_setzero_si512(), \
7201                                          (__mmask8)(U)); })
7202
7203#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7204  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7205                                   (__v8df)(__m512d)(B), \
7206                                   0  + (((M) >> 0) & 0x1), \
7207                                   8  + (((M) >> 1) & 0x1), \
7208                                   2  + (((M) >> 2) & 0x1), \
7209                                   10 + (((M) >> 3) & 0x1), \
7210                                   4  + (((M) >> 4) & 0x1), \
7211                                   12 + (((M) >> 5) & 0x1), \
7212                                   6  + (((M) >> 6) & 0x1), \
7213                                   14 + (((M) >> 7) & 0x1)); })
7214
7215#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7216  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7217                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7218                                       (__v8df)(__m512d)(W)); })
7219
7220#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7221  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7222                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7223                                       (__v8df)_mm512_setzero_pd()); })
7224
7225#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7226  (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7227                                   (__v16sf)(__m512)(B), \
7228                                   0  + (((M) >> 0) & 0x3), \
7229                                   0  + (((M) >> 2) & 0x3), \
7230                                   16 + (((M) >> 4) & 0x3), \
7231                                   16 + (((M) >> 6) & 0x3), \
7232                                   4  + (((M) >> 0) & 0x3), \
7233                                   4  + (((M) >> 2) & 0x3), \
7234                                   20 + (((M) >> 4) & 0x3), \
7235                                   20 + (((M) >> 6) & 0x3), \
7236                                   8  + (((M) >> 0) & 0x3), \
7237                                   8  + (((M) >> 2) & 0x3), \
7238                                   24 + (((M) >> 4) & 0x3), \
7239                                   24 + (((M) >> 6) & 0x3), \
7240                                   12 + (((M) >> 0) & 0x3), \
7241                                   12 + (((M) >> 2) & 0x3), \
7242                                   28 + (((M) >> 4) & 0x3), \
7243                                   28 + (((M) >> 6) & 0x3)); })
7244
7245#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7246  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7247                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7248                                      (__v16sf)(__m512)(W)); })
7249
7250#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7251  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7252                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7253                                      (__v16sf)_mm512_setzero_ps()); })
7254
7255#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7256  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7257                                            (__v2df)(__m128d)(B), \
7258                                            (__v2df)_mm_setzero_pd(), \
7259                                            (__mmask8)-1, (int)(R)); })
7260
7261static __inline__ __m128d __DEFAULT_FN_ATTRS
7262_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7263{
7264 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7265                 (__v2df) __B,
7266                (__v2df) __W,
7267                (__mmask8) __U,
7268                _MM_FROUND_CUR_DIRECTION);
7269}
7270
7271#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7272  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7273                                            (__v2df)(__m128d)(B), \
7274                                            (__v2df)(__m128d)(W), \
7275                                            (__mmask8)(U), (int)(R)); })
7276
7277static __inline__ __m128d __DEFAULT_FN_ATTRS
7278_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7279{
7280 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7281                 (__v2df) __B,
7282                (__v2df) _mm_setzero_pd (),
7283                (__mmask8) __U,
7284                _MM_FROUND_CUR_DIRECTION);
7285}
7286
7287#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7288  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7289                                            (__v2df)(__m128d)(B), \
7290                                            (__v2df)_mm_setzero_pd(), \
7291                                            (__mmask8)(U), (int)(R)); })
7292
7293#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7294  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7295                                           (__v4sf)(__m128)(B), \
7296                                           (__v4sf)_mm_setzero_ps(), \
7297                                           (__mmask8)-1, (int)(R)); })
7298
7299static __inline__ __m128 __DEFAULT_FN_ATTRS
7300_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7301{
7302 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7303                 (__v4sf) __B,
7304                (__v4sf) __W,
7305                (__mmask8) __U,
7306                _MM_FROUND_CUR_DIRECTION);
7307}
7308
7309#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7310  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7311                                           (__v4sf)(__m128)(B), \
7312                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
7313                                           (int)(R)); })
7314
7315static __inline__ __m128 __DEFAULT_FN_ATTRS
7316_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7317{
7318 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7319                 (__v4sf) __B,
7320                (__v4sf) _mm_setzero_ps (),
7321                (__mmask8) __U,
7322                _MM_FROUND_CUR_DIRECTION);
7323}
7324
7325#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7326  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7327                                           (__v4sf)(__m128)(B), \
7328                                           (__v4sf)_mm_setzero_ps(), \
7329                                           (__mmask8)(U), (int)(R)); })
7330
7331static __inline__ __m512 __DEFAULT_FN_ATTRS
7332_mm512_broadcast_f32x4 (__m128 __A)
7333{
7334  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7335                 (__v16sf)
7336                 _mm512_undefined_ps (),
7337                 (__mmask16) -1);
7338}
7339
7340static __inline__ __m512 __DEFAULT_FN_ATTRS
7341_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
7342{
7343  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7344                 (__v16sf) __O,
7345                 __M);
7346}
7347
7348static __inline__ __m512 __DEFAULT_FN_ATTRS
7349_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
7350{
7351  return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7352                 (__v16sf)
7353                 _mm512_setzero_ps (),
7354                 __M);
7355}
7356
7357static __inline__ __m512d __DEFAULT_FN_ATTRS
7358_mm512_broadcast_f64x4 (__m256d __A)
7359{
7360  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7361                  (__v8df)
7362                  _mm512_undefined_pd (),
7363                  (__mmask8) -1);
7364}
7365
7366static __inline__ __m512d __DEFAULT_FN_ATTRS
7367_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
7368{
7369  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7370                  (__v8df) __O,
7371                  __M);
7372}
7373
7374static __inline__ __m512d __DEFAULT_FN_ATTRS
7375_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
7376{
7377  return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7378                  (__v8df)
7379                  _mm512_setzero_pd (),
7380                  __M);
7381}
7382
7383static __inline__ __m512i __DEFAULT_FN_ATTRS
7384_mm512_broadcast_i32x4 (__m128i __A)
7385{
7386  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7387                  (__v16si)
7388                  _mm512_undefined_epi32 (),
7389                  (__mmask16) -1);
7390}
7391
7392static __inline__ __m512i __DEFAULT_FN_ATTRS
7393_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
7394{
7395  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7396                  (__v16si) __O,
7397                  __M);
7398}
7399
7400static __inline__ __m512i __DEFAULT_FN_ATTRS
7401_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
7402{
7403  return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7404                  (__v16si)
7405                  _mm512_setzero_si512 (),
7406                  __M);
7407}
7408
7409static __inline__ __m512i __DEFAULT_FN_ATTRS
7410_mm512_broadcast_i64x4 (__m256i __A)
7411{
7412  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7413                  (__v8di)
7414                  _mm512_undefined_epi32 (),
7415                  (__mmask8) -1);
7416}
7417
7418static __inline__ __m512i __DEFAULT_FN_ATTRS
7419_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
7420{
7421  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7422                  (__v8di) __O,
7423                  __M);
7424}
7425
7426static __inline__ __m512i __DEFAULT_FN_ATTRS
7427_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
7428{
7429  return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7430                  (__v8di)
7431                  _mm512_setzero_si512 (),
7432                  __M);
7433}
7434
7435static __inline__ __m512d __DEFAULT_FN_ATTRS
7436_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7437{
7438  return (__m512d)__builtin_ia32_selectpd_512(__M,
7439                                              (__v8df) _mm512_broadcastsd_pd(__A),
7440                                              (__v8df) __O);
7441}
7442
7443static __inline__ __m512d __DEFAULT_FN_ATTRS
7444_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7445{
7446  return (__m512d)__builtin_ia32_selectpd_512(__M,
7447                                              (__v8df) _mm512_broadcastsd_pd(__A),
7448                                              (__v8df) _mm512_setzero_pd());
7449}
7450
7451static __inline__ __m512 __DEFAULT_FN_ATTRS
7452_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7453{
7454  return (__m512)__builtin_ia32_selectps_512(__M,
7455                                             (__v16sf) _mm512_broadcastss_ps(__A),
7456                                             (__v16sf) __O);
7457}
7458
7459static __inline__ __m512 __DEFAULT_FN_ATTRS
7460_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7461{
7462  return (__m512)__builtin_ia32_selectps_512(__M,
7463                                             (__v16sf) _mm512_broadcastss_ps(__A),
7464                                             (__v16sf) _mm512_setzero_ps());
7465}
7466
7467static __inline__ __m128i __DEFAULT_FN_ATTRS
7468_mm512_cvtsepi32_epi8 (__m512i __A)
7469{
7470  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7471               (__v16qi) _mm_undefined_si128 (),
7472               (__mmask16) -1);
7473}
7474
7475static __inline__ __m128i __DEFAULT_FN_ATTRS
7476_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7477{
7478  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7479               (__v16qi) __O, __M);
7480}
7481
7482static __inline__ __m128i __DEFAULT_FN_ATTRS
7483_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7484{
7485  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7486               (__v16qi) _mm_setzero_si128 (),
7487               __M);
7488}
7489
7490static __inline__ void __DEFAULT_FN_ATTRS
7491_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7492{
7493  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7494}
7495
7496static __inline__ __m256i __DEFAULT_FN_ATTRS
7497_mm512_cvtsepi32_epi16 (__m512i __A)
7498{
7499  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7500               (__v16hi) _mm256_undefined_si256 (),
7501               (__mmask16) -1);
7502}
7503
7504static __inline__ __m256i __DEFAULT_FN_ATTRS
7505_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7506{
7507  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7508               (__v16hi) __O, __M);
7509}
7510
7511static __inline__ __m256i __DEFAULT_FN_ATTRS
7512_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7513{
7514  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7515               (__v16hi) _mm256_setzero_si256 (),
7516               __M);
7517}
7518
7519static __inline__ void __DEFAULT_FN_ATTRS
7520_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7521{
7522  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7523}
7524
7525static __inline__ __m128i __DEFAULT_FN_ATTRS
7526_mm512_cvtsepi64_epi8 (__m512i __A)
7527{
7528  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7529               (__v16qi) _mm_undefined_si128 (),
7530               (__mmask8) -1);
7531}
7532
7533static __inline__ __m128i __DEFAULT_FN_ATTRS
7534_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7535{
7536  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7537               (__v16qi) __O, __M);
7538}
7539
7540static __inline__ __m128i __DEFAULT_FN_ATTRS
7541_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7542{
7543  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7544               (__v16qi) _mm_setzero_si128 (),
7545               __M);
7546}
7547
7548static __inline__ void __DEFAULT_FN_ATTRS
7549_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7550{
7551  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7552}
7553
7554static __inline__ __m256i __DEFAULT_FN_ATTRS
7555_mm512_cvtsepi64_epi32 (__m512i __A)
7556{
7557  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7558               (__v8si) _mm256_undefined_si256 (),
7559               (__mmask8) -1);
7560}
7561
7562static __inline__ __m256i __DEFAULT_FN_ATTRS
7563_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7564{
7565  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7566               (__v8si) __O, __M);
7567}
7568
7569static __inline__ __m256i __DEFAULT_FN_ATTRS
7570_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7571{
7572  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7573               (__v8si) _mm256_setzero_si256 (),
7574               __M);
7575}
7576
7577static __inline__ void __DEFAULT_FN_ATTRS
7578_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7579{
7580  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7581}
7582
7583static __inline__ __m128i __DEFAULT_FN_ATTRS
7584_mm512_cvtsepi64_epi16 (__m512i __A)
7585{
7586  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7587               (__v8hi) _mm_undefined_si128 (),
7588               (__mmask8) -1);
7589}
7590
7591static __inline__ __m128i __DEFAULT_FN_ATTRS
7592_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7593{
7594  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7595               (__v8hi) __O, __M);
7596}
7597
7598static __inline__ __m128i __DEFAULT_FN_ATTRS
7599_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7600{
7601  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7602               (__v8hi) _mm_setzero_si128 (),
7603               __M);
7604}
7605
7606static __inline__ void __DEFAULT_FN_ATTRS
7607_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7608{
7609  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7610}
7611
7612static __inline__ __m128i __DEFAULT_FN_ATTRS
7613_mm512_cvtusepi32_epi8 (__m512i __A)
7614{
7615  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7616                (__v16qi) _mm_undefined_si128 (),
7617                (__mmask16) -1);
7618}
7619
7620static __inline__ __m128i __DEFAULT_FN_ATTRS
7621_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7622{
7623  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7624                (__v16qi) __O,
7625                __M);
7626}
7627
7628static __inline__ __m128i __DEFAULT_FN_ATTRS
7629_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7630{
7631  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7632                (__v16qi) _mm_setzero_si128 (),
7633                __M);
7634}
7635
7636static __inline__ void __DEFAULT_FN_ATTRS
7637_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7638{
7639  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7640}
7641
7642static __inline__ __m256i __DEFAULT_FN_ATTRS
7643_mm512_cvtusepi32_epi16 (__m512i __A)
7644{
7645  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7646                (__v16hi) _mm256_undefined_si256 (),
7647                (__mmask16) -1);
7648}
7649
7650static __inline__ __m256i __DEFAULT_FN_ATTRS
7651_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7652{
7653  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7654                (__v16hi) __O,
7655                __M);
7656}
7657
7658static __inline__ __m256i __DEFAULT_FN_ATTRS
7659_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7660{
7661  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7662                (__v16hi) _mm256_setzero_si256 (),
7663                __M);
7664}
7665
7666static __inline__ void __DEFAULT_FN_ATTRS
7667_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7668{
7669  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7670}
7671
7672static __inline__ __m128i __DEFAULT_FN_ATTRS
7673_mm512_cvtusepi64_epi8 (__m512i __A)
7674{
7675  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7676                (__v16qi) _mm_undefined_si128 (),
7677                (__mmask8) -1);
7678}
7679
7680static __inline__ __m128i __DEFAULT_FN_ATTRS
7681_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7682{
7683  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7684                (__v16qi) __O,
7685                __M);
7686}
7687
7688static __inline__ __m128i __DEFAULT_FN_ATTRS
7689_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7690{
7691  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7692                (__v16qi) _mm_setzero_si128 (),
7693                __M);
7694}
7695
7696static __inline__ void __DEFAULT_FN_ATTRS
7697_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7698{
7699  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7700}
7701
7702static __inline__ __m256i __DEFAULT_FN_ATTRS
7703_mm512_cvtusepi64_epi32 (__m512i __A)
7704{
7705  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7706                (__v8si) _mm256_undefined_si256 (),
7707                (__mmask8) -1);
7708}
7709
7710static __inline__ __m256i __DEFAULT_FN_ATTRS
7711_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7712{
7713  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7714                (__v8si) __O, __M);
7715}
7716
7717static __inline__ __m256i __DEFAULT_FN_ATTRS
7718_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7719{
7720  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7721                (__v8si) _mm256_setzero_si256 (),
7722                __M);
7723}
7724
7725static __inline__ void __DEFAULT_FN_ATTRS
7726_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7727{
7728  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7729}
7730
7731static __inline__ __m128i __DEFAULT_FN_ATTRS
7732_mm512_cvtusepi64_epi16 (__m512i __A)
7733{
7734  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7735                (__v8hi) _mm_undefined_si128 (),
7736                (__mmask8) -1);
7737}
7738
7739static __inline__ __m128i __DEFAULT_FN_ATTRS
7740_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7741{
7742  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7743                (__v8hi) __O, __M);
7744}
7745
7746static __inline__ __m128i __DEFAULT_FN_ATTRS
7747_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7748{
7749  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7750                (__v8hi) _mm_setzero_si128 (),
7751                __M);
7752}
7753
7754static __inline__ void __DEFAULT_FN_ATTRS
7755_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7756{
7757  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7758}
7759
7760static __inline__ __m128i __DEFAULT_FN_ATTRS
7761_mm512_cvtepi32_epi8 (__m512i __A)
7762{
7763  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7764              (__v16qi) _mm_undefined_si128 (),
7765              (__mmask16) -1);
7766}
7767
7768static __inline__ __m128i __DEFAULT_FN_ATTRS
7769_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7770{
7771  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7772              (__v16qi) __O, __M);
7773}
7774
7775static __inline__ __m128i __DEFAULT_FN_ATTRS
7776_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7777{
7778  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7779              (__v16qi) _mm_setzero_si128 (),
7780              __M);
7781}
7782
7783static __inline__ void __DEFAULT_FN_ATTRS
7784_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7785{
7786  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7787}
7788
7789static __inline__ __m256i __DEFAULT_FN_ATTRS
7790_mm512_cvtepi32_epi16 (__m512i __A)
7791{
7792  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7793              (__v16hi) _mm256_undefined_si256 (),
7794              (__mmask16) -1);
7795}
7796
7797static __inline__ __m256i __DEFAULT_FN_ATTRS
7798_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7799{
7800  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7801              (__v16hi) __O, __M);
7802}
7803
7804static __inline__ __m256i __DEFAULT_FN_ATTRS
7805_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7806{
7807  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7808              (__v16hi) _mm256_setzero_si256 (),
7809              __M);
7810}
7811
7812static __inline__ void __DEFAULT_FN_ATTRS
7813_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7814{
7815  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7816}
7817
7818static __inline__ __m128i __DEFAULT_FN_ATTRS
7819_mm512_cvtepi64_epi8 (__m512i __A)
7820{
7821  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7822              (__v16qi) _mm_undefined_si128 (),
7823              (__mmask8) -1);
7824}
7825
7826static __inline__ __m128i __DEFAULT_FN_ATTRS
7827_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7828{
7829  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7830              (__v16qi) __O, __M);
7831}
7832
7833static __inline__ __m128i __DEFAULT_FN_ATTRS
7834_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7835{
7836  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7837              (__v16qi) _mm_setzero_si128 (),
7838              __M);
7839}
7840
7841static __inline__ void __DEFAULT_FN_ATTRS
7842_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7843{
7844  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7845}
7846
7847static __inline__ __m256i __DEFAULT_FN_ATTRS
7848_mm512_cvtepi64_epi32 (__m512i __A)
7849{
7850  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7851              (__v8si) _mm256_undefined_si256 (),
7852              (__mmask8) -1);
7853}
7854
7855static __inline__ __m256i __DEFAULT_FN_ATTRS
7856_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7857{
7858  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7859              (__v8si) __O, __M);
7860}
7861
7862static __inline__ __m256i __DEFAULT_FN_ATTRS
7863_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7864{
7865  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7866              (__v8si) _mm256_setzero_si256 (),
7867              __M);
7868}
7869
7870static __inline__ void __DEFAULT_FN_ATTRS
7871_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7872{
7873  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7874}
7875
7876static __inline__ __m128i __DEFAULT_FN_ATTRS
7877_mm512_cvtepi64_epi16 (__m512i __A)
7878{
7879  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7880              (__v8hi) _mm_undefined_si128 (),
7881              (__mmask8) -1);
7882}
7883
7884static __inline__ __m128i __DEFAULT_FN_ATTRS
7885_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7886{
7887  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7888              (__v8hi) __O, __M);
7889}
7890
7891static __inline__ __m128i __DEFAULT_FN_ATTRS
7892_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7893{
7894  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7895              (__v8hi) _mm_setzero_si128 (),
7896              __M);
7897}
7898
7899static __inline__ void __DEFAULT_FN_ATTRS
7900_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7901{
7902  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7903}
7904
7905#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
7906  (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
7907                                   (__v16si)_mm512_undefined_epi32(), \
7908                                   0 + ((imm) & 0x3) * 4,             \
7909                                   1 + ((imm) & 0x3) * 4,             \
7910                                   2 + ((imm) & 0x3) * 4,             \
7911                                   3 + ((imm) & 0x3) * 4); })
7912
7913#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7914  (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
7915                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7916                                (__v4si)__W); })
7917
7918#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7919  (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
7920                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7921                                (__v4si)_mm_setzero_si128()); })
7922
7923#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
7924  (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
7925                                   (__v8di)_mm512_undefined_epi32(), \
7926                                   ((imm) & 1) ? 4 : 0,              \
7927                                   ((imm) & 1) ? 5 : 1,              \
7928                                   ((imm) & 1) ? 6 : 2,              \
7929                                   ((imm) & 1) ? 7 : 3); })
7930
7931#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7932  (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,      \
7933                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7934                                (__v4di)__W); })
7935
7936#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7937  (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,      \
7938                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7939                                (__v4di)_mm256_setzero_si256()); })
7940
7941#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7942  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7943                                 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7944                                 ((imm) & 0x1) ?  0 :  8, \
7945                                 ((imm) & 0x1) ?  1 :  9, \
7946                                 ((imm) & 0x1) ?  2 : 10, \
7947                                 ((imm) & 0x1) ?  3 : 11, \
7948                                 ((imm) & 0x1) ?  8 :  4, \
7949                                 ((imm) & 0x1) ?  9 :  5, \
7950                                 ((imm) & 0x1) ? 10 :  6, \
7951                                 ((imm) & 0x1) ? 11 :  7); })
7952
7953#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
7954  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7955                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7956                                  (__v8df)(W)); })
7957
7958#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
7959  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7960                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7961                                  (__v8df)_mm512_setzero_pd()); })
7962
7963#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
7964  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
7965                                 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
7966                                 ((imm) & 0x1) ?  0 :  8, \
7967                                 ((imm) & 0x1) ?  1 :  9, \
7968                                 ((imm) & 0x1) ?  2 : 10, \
7969                                 ((imm) & 0x1) ?  3 : 11, \
7970                                 ((imm) & 0x1) ?  8 :  4, \
7971                                 ((imm) & 0x1) ?  9 :  5, \
7972                                 ((imm) & 0x1) ? 10 :  6, \
7973                                 ((imm) & 0x1) ? 11 :  7); })
7974
7975#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
7976  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7977                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7978                                  (__v8di)(W)); })
7979
7980#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
7981  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7982                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7983                                  (__v8di)_mm512_setzero_si512()); })
7984
7985#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
7986  (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
7987                                  (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
7988                                  (((imm) & 0x3) == 0) ? 16 :  0, \
7989                                  (((imm) & 0x3) == 0) ? 17 :  1, \
7990                                  (((imm) & 0x3) == 0) ? 18 :  2, \
7991                                  (((imm) & 0x3) == 0) ? 19 :  3, \
7992                                  (((imm) & 0x3) == 1) ? 16 :  4, \
7993                                  (((imm) & 0x3) == 1) ? 17 :  5, \
7994                                  (((imm) & 0x3) == 1) ? 18 :  6, \
7995                                  (((imm) & 0x3) == 1) ? 19 :  7, \
7996                                  (((imm) & 0x3) == 2) ? 16 :  8, \
7997                                  (((imm) & 0x3) == 2) ? 17 :  9, \
7998                                  (((imm) & 0x3) == 2) ? 18 : 10, \
7999                                  (((imm) & 0x3) == 2) ? 19 : 11, \
8000                                  (((imm) & 0x3) == 3) ? 16 : 12, \
8001                                  (((imm) & 0x3) == 3) ? 17 : 13, \
8002                                  (((imm) & 0x3) == 3) ? 18 : 14, \
8003                                  (((imm) & 0x3) == 3) ? 19 : 15); })
8004
8005#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8006  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8007                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8008                                 (__v16sf)(W)); })
8009
8010#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8011  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8012                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8013                                 (__v16sf)_mm512_setzero_ps()); })
8014
8015#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
8016  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
8017                                 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
8018                                 (((imm) & 0x3) == 0) ? 16 :  0, \
8019                                 (((imm) & 0x3) == 0) ? 17 :  1, \
8020                                 (((imm) & 0x3) == 0) ? 18 :  2, \
8021                                 (((imm) & 0x3) == 0) ? 19 :  3, \
8022                                 (((imm) & 0x3) == 1) ? 16 :  4, \
8023                                 (((imm) & 0x3) == 1) ? 17 :  5, \
8024                                 (((imm) & 0x3) == 1) ? 18 :  6, \
8025                                 (((imm) & 0x3) == 1) ? 19 :  7, \
8026                                 (((imm) & 0x3) == 2) ? 16 :  8, \
8027                                 (((imm) & 0x3) == 2) ? 17 :  9, \
8028                                 (((imm) & 0x3) == 2) ? 18 : 10, \
8029                                 (((imm) & 0x3) == 2) ? 19 : 11, \
8030                                 (((imm) & 0x3) == 3) ? 16 : 12, \
8031                                 (((imm) & 0x3) == 3) ? 17 : 13, \
8032                                 (((imm) & 0x3) == 3) ? 18 : 14, \
8033                                 (((imm) & 0x3) == 3) ? 19 : 15); })
8034
8035#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8036  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8037                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8038                                 (__v16si)(W)); })
8039
8040#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8041  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8042                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8043                                 (__v16si)_mm512_setzero_si512()); })
8044
8045#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
8046  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8047                                            (int)(((C)<<2) | (B)), \
8048                                            (__v8df)_mm512_undefined_pd(), \
8049                                            (__mmask8)-1, (int)(R)); })
8050
8051#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
8052  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8053                                            (int)(((C)<<2) | (B)), \
8054                                            (__v8df)(__m512d)(W), \
8055                                            (__mmask8)(U), (int)(R)); })
8056
8057#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
8058  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8059                                            (int)(((C)<<2) | (B)), \
8060                                            (__v8df)_mm512_setzero_pd(), \
8061                                            (__mmask8)(U), (int)(R)); })
8062
8063#define _mm512_getmant_pd(A, B, C) __extension__ ({ \
8064  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8065                                            (int)(((C)<<2) | (B)), \
8066                                            (__v8df)_mm512_setzero_pd(), \
8067                                            (__mmask8)-1, \
8068                                            _MM_FROUND_CUR_DIRECTION); })
8069
8070#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8071  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8072                                            (int)(((C)<<2) | (B)), \
8073                                            (__v8df)(__m512d)(W), \
8074                                            (__mmask8)(U), \
8075                                            _MM_FROUND_CUR_DIRECTION); })
8076
8077#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8078  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8079                                            (int)(((C)<<2) | (B)), \
8080                                            (__v8df)_mm512_setzero_pd(), \
8081                                            (__mmask8)(U), \
8082                                            _MM_FROUND_CUR_DIRECTION); })
8083
8084#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
8085  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8086                                           (int)(((C)<<2) | (B)), \
8087                                           (__v16sf)_mm512_undefined_ps(), \
8088                                           (__mmask16)-1, (int)(R)); })
8089
8090#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
8091  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8092                                           (int)(((C)<<2) | (B)), \
8093                                           (__v16sf)(__m512)(W), \
8094                                           (__mmask16)(U), (int)(R)); })
8095
8096#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
8097  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8098                                           (int)(((C)<<2) | (B)), \
8099                                           (__v16sf)_mm512_setzero_ps(), \
8100                                           (__mmask16)(U), (int)(R)); })
8101
8102#define _mm512_getmant_ps(A, B, C) __extension__ ({ \
8103  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8104                                           (int)(((C)<<2)|(B)), \
8105                                           (__v16sf)_mm512_undefined_ps(), \
8106                                           (__mmask16)-1, \
8107                                           _MM_FROUND_CUR_DIRECTION); })
8108
8109#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8110  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8111                                           (int)(((C)<<2)|(B)), \
8112                                           (__v16sf)(__m512)(W), \
8113                                           (__mmask16)(U), \
8114                                           _MM_FROUND_CUR_DIRECTION); })
8115
8116#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8117  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8118                                           (int)(((C)<<2)|(B)), \
8119                                           (__v16sf)_mm512_setzero_ps(), \
8120                                           (__mmask16)(U), \
8121                                           _MM_FROUND_CUR_DIRECTION); })
8122
8123#define _mm512_getexp_round_pd(A, R) __extension__ ({ \
8124  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8125                                           (__v8df)_mm512_undefined_pd(), \
8126                                           (__mmask8)-1, (int)(R)); })
8127
8128#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
8129  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8130                                           (__v8df)(__m512d)(W), \
8131                                           (__mmask8)(U), (int)(R)); })
8132
8133#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
8134  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8135                                           (__v8df)_mm512_setzero_pd(), \
8136                                           (__mmask8)(U), (int)(R)); })
8137
8138static __inline__ __m512d __DEFAULT_FN_ATTRS
8139_mm512_getexp_pd (__m512d __A)
8140{
8141  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8142                (__v8df) _mm512_undefined_pd (),
8143                (__mmask8) -1,
8144                _MM_FROUND_CUR_DIRECTION);
8145}
8146
8147static __inline__ __m512d __DEFAULT_FN_ATTRS
8148_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
8149{
8150  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8151                (__v8df) __W,
8152                (__mmask8) __U,
8153                _MM_FROUND_CUR_DIRECTION);
8154}
8155
8156static __inline__ __m512d __DEFAULT_FN_ATTRS
8157_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
8158{
8159  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8160                (__v8df) _mm512_setzero_pd (),
8161                (__mmask8) __U,
8162                _MM_FROUND_CUR_DIRECTION);
8163}
8164
8165#define _mm512_getexp_round_ps(A, R) __extension__ ({ \
8166  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8167                                          (__v16sf)_mm512_undefined_ps(), \
8168                                          (__mmask16)-1, (int)(R)); })
8169
8170#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8171  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8172                                          (__v16sf)(__m512)(W), \
8173                                          (__mmask16)(U), (int)(R)); })
8174
8175#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8176  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8177                                          (__v16sf)_mm512_setzero_ps(), \
8178                                          (__mmask16)(U), (int)(R)); })
8179
8180static __inline__ __m512 __DEFAULT_FN_ATTRS
8181_mm512_getexp_ps (__m512 __A)
8182{
8183  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8184               (__v16sf) _mm512_undefined_ps (),
8185               (__mmask16) -1,
8186               _MM_FROUND_CUR_DIRECTION);
8187}
8188
8189static __inline__ __m512 __DEFAULT_FN_ATTRS
8190_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8191{
8192  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8193               (__v16sf) __W,
8194               (__mmask16) __U,
8195               _MM_FROUND_CUR_DIRECTION);
8196}
8197
8198static __inline__ __m512 __DEFAULT_FN_ATTRS
8199_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8200{
8201  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8202               (__v16sf) _mm512_setzero_ps (),
8203               (__mmask16) __U,
8204               _MM_FROUND_CUR_DIRECTION);
8205}
8206
8207#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8208  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8209                                       (float const *)(addr), \
8210                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8211                                       (int)(scale)); })
8212
8213#define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\
8214                                  __addr, __scale) __extension__({\
8215__builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\
8216                              __addr,(__v8di) __index, __mask, __scale);\
8217})
8218
8219#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8220  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8221                                        (int const *)(addr), \
8222                                        (__v8di)(__m512i)(index), \
8223                                        (__mmask8)-1, (int)(scale)); })
8224
8225#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8226  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8227                                        (int const *)(addr), \
8228                                        (__v8di)(__m512i)(index), \
8229                                        (__mmask8)(mask), (int)(scale)); })
8230
8231#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8232  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8233                                       (double const *)(addr), \
8234                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8235                                       (int)(scale)); })
8236
8237#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8238  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8239                                       (double const *)(addr), \
8240                                       (__v8di)(__m512i)(index), \
8241                                       (__mmask8)(mask), (int)(scale)); })
8242
8243#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8244  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8245                                       (long long const *)(addr), \
8246                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8247                                       (int)(scale)); })
8248
8249#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8250  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8251                                       (long long const *)(addr), \
8252                                       (__v8di)(__m512i)(index), \
8253                                       (__mmask8)(mask), (int)(scale)); })
8254
8255#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8256  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8257                                       (float const *)(addr), \
8258                                       (__v16sf)(__m512)(index), \
8259                                       (__mmask16)-1, (int)(scale)); })
8260
8261#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8262  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8263                                       (float const *)(addr), \
8264                                       (__v16sf)(__m512)(index), \
8265                                       (__mmask16)(mask), (int)(scale)); })
8266
8267#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8268  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8269                                        (int const *)(addr), \
8270                                        (__v16si)(__m512i)(index), \
8271                                        (__mmask16)-1, (int)(scale)); })
8272
8273#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8274  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8275                                        (int const *)(addr), \
8276                                        (__v16si)(__m512i)(index), \
8277                                        (__mmask16)(mask), (int)(scale)); })
8278
8279#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8280  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8281                                       (double const *)(addr), \
8282                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
8283                                       (int)(scale)); })
8284
8285#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8286  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8287                                       (double const *)(addr), \
8288                                       (__v8si)(__m256i)(index), \
8289                                       (__mmask8)(mask), (int)(scale)); })
8290
8291#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8292  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8293                                       (long long const *)(addr), \
8294                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
8295                                       (int)(scale)); })
8296
8297#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8298  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8299                                       (long long const *)(addr), \
8300                                       (__v8si)(__m256i)(index), \
8301                                       (__mmask8)(mask), (int)(scale)); })
8302
8303#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8304  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8305                                (__v8di)(__m512i)(index), \
8306                                (__v8sf)(__m256)(v1), (int)(scale)); })
8307
8308#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8309  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8310                                (__v8di)(__m512i)(index), \
8311                                (__v8sf)(__m256)(v1), (int)(scale)); })
8312
8313#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8314  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8315                                (__v8di)(__m512i)(index), \
8316                                (__v8si)(__m256i)(v1), (int)(scale)); })
8317
8318#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8319  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8320                                (__v8di)(__m512i)(index), \
8321                                (__v8si)(__m256i)(v1), (int)(scale)); })
8322
8323#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8324  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8325                               (__v8di)(__m512i)(index), \
8326                               (__v8df)(__m512d)(v1), (int)(scale)); })
8327
8328#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8329  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8330                               (__v8di)(__m512i)(index), \
8331                               (__v8df)(__m512d)(v1), (int)(scale)); })
8332
8333#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8334  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8335                               (__v8di)(__m512i)(index), \
8336                               (__v8di)(__m512i)(v1), (int)(scale)); })
8337
8338#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8339  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8340                               (__v8di)(__m512i)(index), \
8341                               (__v8di)(__m512i)(v1), (int)(scale)); })
8342
8343#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8344  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8345                                (__v16si)(__m512i)(index), \
8346                                (__v16sf)(__m512)(v1), (int)(scale)); })
8347
8348#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8349  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8350                                (__v16si)(__m512i)(index), \
8351                                (__v16sf)(__m512)(v1), (int)(scale)); })
8352
8353#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8354  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8355                                (__v16si)(__m512i)(index), \
8356                                (__v16si)(__m512i)(v1), (int)(scale)); })
8357
8358#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8359  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8360                                (__v16si)(__m512i)(index), \
8361                                (__v16si)(__m512i)(v1), (int)(scale)); })
8362
8363#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8364  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8365                               (__v8si)(__m256i)(index), \
8366                               (__v8df)(__m512d)(v1), (int)(scale)); })
8367
8368#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8369  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8370                               (__v8si)(__m256i)(index), \
8371                               (__v8df)(__m512d)(v1), (int)(scale)); })
8372
8373#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8374  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8375                               (__v8si)(__m256i)(index), \
8376                               (__v8di)(__m512i)(v1), (int)(scale)); })
8377
8378#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8379  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8380                               (__v8si)(__m256i)(index), \
8381                               (__v8di)(__m512i)(v1), (int)(scale)); })
8382
8383static __inline__ __m128 __DEFAULT_FN_ATTRS
8384_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8385{
8386 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8387          (__v4sf) __A,
8388          (__v4sf) __B,
8389          (__mmask8) __U,
8390          _MM_FROUND_CUR_DIRECTION);
8391}
8392
8393#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8394  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8395                                        (__v4sf)(__m128)(A), \
8396                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8397                                        (int)(R)); })
8398
8399static __inline__ __m128 __DEFAULT_FN_ATTRS
8400_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8401{
8402 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8403          (__v4sf) __B,
8404          (__v4sf) __C,
8405          (__mmask8) __U,
8406          _MM_FROUND_CUR_DIRECTION);
8407}
8408
8409#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8410  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8411                                         (__v4sf)(__m128)(B), \
8412                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
8413                                         _MM_FROUND_CUR_DIRECTION); })
8414
8415static __inline__ __m128 __DEFAULT_FN_ATTRS
8416_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8417{
8418 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8419          (__v4sf) __X,
8420          (__v4sf) __Y,
8421          (__mmask8) __U,
8422          _MM_FROUND_CUR_DIRECTION);
8423}
8424
8425#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8426  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8427                                         (__v4sf)(__m128)(X), \
8428                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8429                                         (int)(R)); })
8430
8431static __inline__ __m128 __DEFAULT_FN_ATTRS
8432_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8433{
8434 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8435          (__v4sf) __A,
8436          -(__v4sf) __B,
8437          (__mmask8) __U,
8438          _MM_FROUND_CUR_DIRECTION);
8439}
8440
8441#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8442  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8443                                        (__v4sf)(__m128)(A), \
8444                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8445                                        (int)(R)); })
8446
8447static __inline__ __m128 __DEFAULT_FN_ATTRS
8448_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8449{
8450 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8451          (__v4sf) __B,
8452          -(__v4sf) __C,
8453          (__mmask8) __U,
8454          _MM_FROUND_CUR_DIRECTION);
8455}
8456
8457#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8458  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8459                                         (__v4sf)(__m128)(B), \
8460                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8461                                         (int)(R)); })
8462
8463static __inline__ __m128 __DEFAULT_FN_ATTRS
8464_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8465{
8466 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8467          (__v4sf) __X,
8468          -(__v4sf) __Y,
8469          (__mmask8) __U,
8470          _MM_FROUND_CUR_DIRECTION);
8471}
8472
8473#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8474  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8475                                         (__v4sf)(__m128)(X), \
8476                                         -(__v4sf)(__m128)(Y), (__mmask8)(U), \
8477                                         (int)(R)); })
8478
8479static __inline__ __m128 __DEFAULT_FN_ATTRS
8480_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8481{
8482 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8483          -(__v4sf) __A,
8484          (__v4sf) __B,
8485          (__mmask8) __U,
8486          _MM_FROUND_CUR_DIRECTION);
8487}
8488
8489#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8490  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8491                                        -(__v4sf)(__m128)(A), \
8492                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8493                                        (int)(R)); })
8494
8495static __inline__ __m128 __DEFAULT_FN_ATTRS
8496_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8497{
8498 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8499          (__v4sf) __B,
8500          (__v4sf) __C,
8501          (__mmask8) __U,
8502          _MM_FROUND_CUR_DIRECTION);
8503}
8504
8505#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8506  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8507                                         (__v4sf)(__m128)(B), \
8508                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
8509                                         (int)(R)); })
8510
8511static __inline__ __m128 __DEFAULT_FN_ATTRS
8512_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8513{
8514 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8515          (__v4sf) __X,
8516          (__v4sf) __Y,
8517          (__mmask8) __U,
8518          _MM_FROUND_CUR_DIRECTION);
8519}
8520
8521#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8522  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8523                                         (__v4sf)(__m128)(X), \
8524                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8525                                         (int)(R)); })
8526
8527static __inline__ __m128 __DEFAULT_FN_ATTRS
8528_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8529{
8530 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8531          -(__v4sf) __A,
8532          -(__v4sf) __B,
8533          (__mmask8) __U,
8534          _MM_FROUND_CUR_DIRECTION);
8535}
8536
8537#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8538  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8539                                        -(__v4sf)(__m128)(A), \
8540                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
8541                                        (int)(R)); })
8542
8543static __inline__ __m128 __DEFAULT_FN_ATTRS
8544_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8545{
8546 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8547          (__v4sf) __B,
8548          -(__v4sf) __C,
8549          (__mmask8) __U,
8550          _MM_FROUND_CUR_DIRECTION);
8551}
8552
8553#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8554  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8555                                         (__v4sf)(__m128)(B), \
8556                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8557                                         _MM_FROUND_CUR_DIRECTION); })
8558
8559static __inline__ __m128 __DEFAULT_FN_ATTRS
8560_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8561{
8562 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8563          (__v4sf) __X,
8564          -(__v4sf) __Y,
8565          (__mmask8) __U,
8566          _MM_FROUND_CUR_DIRECTION);
8567}
8568
8569#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8570  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8571                                         (__v4sf)(__m128)(X), \
8572                                         -(__v4sf)(__m128)(Y), (__mmask8)(U), \
8573                                         (int)(R)); })
8574
8575static __inline__ __m128d __DEFAULT_FN_ATTRS
8576_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8577{
8578 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8579          (__v2df) __A,
8580          (__v2df) __B,
8581          (__mmask8) __U,
8582          _MM_FROUND_CUR_DIRECTION);
8583}
8584
8585#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8586  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8587                                         (__v2df)(__m128d)(A), \
8588                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8589                                         (int)(R)); })
8590
8591static __inline__ __m128d __DEFAULT_FN_ATTRS
8592_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8593{
8594 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8595          (__v2df) __B,
8596          (__v2df) __C,
8597          (__mmask8) __U,
8598          _MM_FROUND_CUR_DIRECTION);
8599}
8600
8601#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8602  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8603                                          (__v2df)(__m128d)(B), \
8604                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8605                                          _MM_FROUND_CUR_DIRECTION); })
8606
8607static __inline__ __m128d __DEFAULT_FN_ATTRS
8608_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8609{
8610 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8611          (__v2df) __X,
8612          (__v2df) __Y,
8613          (__mmask8) __U,
8614          _MM_FROUND_CUR_DIRECTION);
8615}
8616
8617#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8618  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8619                                          (__v2df)(__m128d)(X), \
8620                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8621                                          (int)(R)); })
8622
8623static __inline__ __m128d __DEFAULT_FN_ATTRS
8624_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8625{
8626 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8627          (__v2df) __A,
8628          -(__v2df) __B,
8629          (__mmask8) __U,
8630          _MM_FROUND_CUR_DIRECTION);
8631}
8632
8633#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8634  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8635                                         (__v2df)(__m128d)(A), \
8636                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8637                                         (int)(R)); })
8638
8639static __inline__ __m128d __DEFAULT_FN_ATTRS
8640_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8641{
8642 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8643          (__v2df) __B,
8644          -(__v2df) __C,
8645          (__mmask8) __U,
8646          _MM_FROUND_CUR_DIRECTION);
8647}
8648
8649#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8650  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8651                                          (__v2df)(__m128d)(B), \
8652                                          -(__v2df)(__m128d)(C), \
8653                                          (__mmask8)(U), (int)(R)); })
8654
8655static __inline__ __m128d __DEFAULT_FN_ATTRS
8656_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8657{
8658 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8659          (__v2df) __X,
8660          -(__v2df) __Y,
8661          (__mmask8) __U,
8662          _MM_FROUND_CUR_DIRECTION);
8663}
8664
8665#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8666  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8667                                          (__v2df)(__m128d)(X), \
8668                                          -(__v2df)(__m128d)(Y), \
8669                                          (__mmask8)(U), (int)(R)); })
8670
8671static __inline__ __m128d __DEFAULT_FN_ATTRS
8672_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8673{
8674 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8675          -(__v2df) __A,
8676          (__v2df) __B,
8677          (__mmask8) __U,
8678          _MM_FROUND_CUR_DIRECTION);
8679}
8680
8681#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8682  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8683                                         -(__v2df)(__m128d)(A), \
8684                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8685                                         (int)(R)); })
8686
8687static __inline__ __m128d __DEFAULT_FN_ATTRS
8688_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8689{
8690 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8691          (__v2df) __B,
8692          (__v2df) __C,
8693          (__mmask8) __U,
8694          _MM_FROUND_CUR_DIRECTION);
8695}
8696
8697#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8698  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8699                                          (__v2df)(__m128d)(B), \
8700                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8701                                          (int)(R)); })
8702
8703static __inline__ __m128d __DEFAULT_FN_ATTRS
8704_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8705{
8706 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8707          (__v2df) __X,
8708          (__v2df) __Y,
8709          (__mmask8) __U,
8710          _MM_FROUND_CUR_DIRECTION);
8711}
8712
8713#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8714  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8715                                          (__v2df)(__m128d)(X), \
8716                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8717                                          (int)(R)); })
8718
8719static __inline__ __m128d __DEFAULT_FN_ATTRS
8720_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8721{
8722 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8723          -(__v2df) __A,
8724          -(__v2df) __B,
8725          (__mmask8) __U,
8726          _MM_FROUND_CUR_DIRECTION);
8727}
8728
8729#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8730  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8731                                         -(__v2df)(__m128d)(A), \
8732                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8733                                         (int)(R)); })
8734
8735static __inline__ __m128d __DEFAULT_FN_ATTRS
8736_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8737{
8738 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8739          (__v2df) __B,
8740          -(__v2df) __C,
8741          (__mmask8) __U,
8742          _MM_FROUND_CUR_DIRECTION);
8743}
8744
8745#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8746  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8747                                          (__v2df)(__m128d)(B), \
8748                                          -(__v2df)(__m128d)(C), \
8749                                          (__mmask8)(U), \
8750                                          _MM_FROUND_CUR_DIRECTION); })
8751
8752static __inline__ __m128d __DEFAULT_FN_ATTRS
8753_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8754{
8755 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W),
8756          (__v2df) __X,
8757          -(__v2df) (__Y),
8758          (__mmask8) __U,
8759          _MM_FROUND_CUR_DIRECTION);
8760}
8761
8762#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8763  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8764                                          (__v2df)(__m128d)(X), \
8765                                          -(__v2df)(__m128d)(Y), \
8766                                          (__mmask8)(U), (int)(R)); })
8767
8768#define _mm512_permutex_pd(X, C) __extension__ ({ \
8769  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8770                                   (__v8df)_mm512_undefined_pd(), \
8771                                   0 + (((C) >> 0) & 0x3), \
8772                                   0 + (((C) >> 2) & 0x3), \
8773                                   0 + (((C) >> 4) & 0x3), \
8774                                   0 + (((C) >> 6) & 0x3), \
8775                                   4 + (((C) >> 0) & 0x3), \
8776                                   4 + (((C) >> 2) & 0x3), \
8777                                   4 + (((C) >> 4) & 0x3), \
8778                                   4 + (((C) >> 6) & 0x3)); })
8779
8780#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8781  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8782                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8783                                       (__v8df)(__m512d)(W)); })
8784
8785#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8786  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8787                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8788                                       (__v8df)_mm512_setzero_pd()); })
8789
8790#define _mm512_permutex_epi64(X, C) __extension__ ({ \
8791  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8792                                   (__v8di)_mm512_undefined_epi32(), \
8793                                   0 + (((C) >> 0) & 0x3), \
8794                                   0 + (((C) >> 2) & 0x3), \
8795                                   0 + (((C) >> 4) & 0x3), \
8796                                   0 + (((C) >> 6) & 0x3), \
8797                                   4 + (((C) >> 0) & 0x3), \
8798                                   4 + (((C) >> 2) & 0x3), \
8799                                   4 + (((C) >> 4) & 0x3), \
8800                                   4 + (((C) >> 6) & 0x3)); })
8801
8802#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8803  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8804                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8805                                      (__v8di)(__m512i)(W)); })
8806
8807#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8808  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8809                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8810                                      (__v8di)_mm512_setzero_si512()); })
8811
8812static __inline__ __m512d __DEFAULT_FN_ATTRS
8813_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8814{
8815  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8816                 (__v8di) __X,
8817                 (__v8df) _mm512_undefined_pd (),
8818                 (__mmask8) -1);
8819}
8820
8821static __inline__ __m512d __DEFAULT_FN_ATTRS
8822_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8823{
8824  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8825                 (__v8di) __X,
8826                 (__v8df) __W,
8827                 (__mmask8) __U);
8828}
8829
8830static __inline__ __m512d __DEFAULT_FN_ATTRS
8831_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8832{
8833  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8834                 (__v8di) __X,
8835                 (__v8df) _mm512_setzero_pd (),
8836                 (__mmask8) __U);
8837}
8838
8839static __inline__ __m512i __DEFAULT_FN_ATTRS
8840_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8841{
8842  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8843                 (__v8di) __X,
8844                 (__v8di) _mm512_setzero_si512 (),
8845                 __M);
8846}
8847
8848static __inline__ __m512i __DEFAULT_FN_ATTRS
8849_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8850{
8851  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8852                 (__v8di) __X,
8853                 (__v8di) _mm512_undefined_epi32 (),
8854                 (__mmask8) -1);
8855}
8856
8857static __inline__ __m512i __DEFAULT_FN_ATTRS
8858_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8859             __m512i __Y)
8860{
8861  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8862                 (__v8di) __X,
8863                 (__v8di) __W,
8864                 __M);
8865}
8866
8867static __inline__ __m512 __DEFAULT_FN_ATTRS
8868_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8869{
8870  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8871                (__v16si) __X,
8872                (__v16sf) _mm512_undefined_ps (),
8873                (__mmask16) -1);
8874}
8875
8876static __inline__ __m512 __DEFAULT_FN_ATTRS
8877_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8878{
8879  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8880                (__v16si) __X,
8881                (__v16sf) __W,
8882                (__mmask16) __U);
8883}
8884
8885static __inline__ __m512 __DEFAULT_FN_ATTRS
8886_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8887{
8888  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8889                (__v16si) __X,
8890                (__v16sf) _mm512_setzero_ps (),
8891                (__mmask16) __U);
8892}
8893
8894static __inline__ __m512i __DEFAULT_FN_ATTRS
8895_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8896{
8897  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8898                 (__v16si) __X,
8899                 (__v16si) _mm512_setzero_si512 (),
8900                 __M);
8901}
8902
8903static __inline__ __m512i __DEFAULT_FN_ATTRS
8904_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8905{
8906  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8907                 (__v16si) __X,
8908                 (__v16si) _mm512_undefined_epi32 (),
8909                 (__mmask16) -1);
8910}
8911
8912static __inline__ __m512i __DEFAULT_FN_ATTRS
8913_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8914             __m512i __Y)
8915{
8916  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8917                 (__v16si) __X,
8918                 (__v16si) __W,
8919                 __M);
8920}
8921
8922static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8923_mm512_kand (__mmask16 __A, __mmask16 __B)
8924{
8925  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8926}
8927
8928static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8929_mm512_kandn (__mmask16 __A, __mmask16 __B)
8930{
8931  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8932}
8933
8934static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8935_mm512_kor (__mmask16 __A, __mmask16 __B)
8936{
8937  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8938}
8939
8940static __inline__ int __DEFAULT_FN_ATTRS
8941_mm512_kortestc (__mmask16 __A, __mmask16 __B)
8942{
8943  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8944}
8945
8946static __inline__ int __DEFAULT_FN_ATTRS
8947_mm512_kortestz (__mmask16 __A, __mmask16 __B)
8948{
8949  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8950}
8951
8952static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8953_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8954{
8955  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8956}
8957
8958static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8959_mm512_kxnor (__mmask16 __A, __mmask16 __B)
8960{
8961  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8962}
8963
8964static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8965_mm512_kxor (__mmask16 __A, __mmask16 __B)
8966{
8967  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8968}
8969
8970static __inline__ void __DEFAULT_FN_ATTRS
8971_mm512_stream_si512 (__m512i * __P, __m512i __A)
8972{
8973  __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
8974}
8975
8976static __inline__ __m512i __DEFAULT_FN_ATTRS
8977_mm512_stream_load_si512 (void *__P)
8978{
8979  return __builtin_ia32_movntdqa512 ((__v8di *)__P);
8980}
8981
8982static __inline__ void __DEFAULT_FN_ATTRS
8983_mm512_stream_pd (double *__P, __m512d __A)
8984{
8985  __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
8986}
8987
8988static __inline__ void __DEFAULT_FN_ATTRS
8989_mm512_stream_ps (float *__P, __m512 __A)
8990{
8991  __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
8992}
8993
8994static __inline__ __m512d __DEFAULT_FN_ATTRS
8995_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8996{
8997  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8998                  (__v8df) __W,
8999                  (__mmask8) __U);
9000}
9001
9002static __inline__ __m512d __DEFAULT_FN_ATTRS
9003_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9004{
9005  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9006                  (__v8df)
9007                  _mm512_setzero_pd (),
9008                  (__mmask8) __U);
9009}
9010
9011static __inline__ __m512i __DEFAULT_FN_ATTRS
9012_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9013{
9014  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9015                  (__v8di) __W,
9016                  (__mmask8) __U);
9017}
9018
9019static __inline__ __m512i __DEFAULT_FN_ATTRS
9020_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9021{
9022  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9023                  (__v8di)
9024                  _mm512_setzero_si512 (),
9025                  (__mmask8) __U);
9026}
9027
9028static __inline__ __m512 __DEFAULT_FN_ATTRS
9029_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9030{
9031  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9032                 (__v16sf) __W,
9033                 (__mmask16) __U);
9034}
9035
9036static __inline__ __m512 __DEFAULT_FN_ATTRS
9037_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9038{
9039  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9040                 (__v16sf)
9041                 _mm512_setzero_ps (),
9042                 (__mmask16) __U);
9043}
9044
9045static __inline__ __m512i __DEFAULT_FN_ATTRS
9046_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9047{
9048  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9049                  (__v16si) __W,
9050                  (__mmask16) __U);
9051}
9052
9053static __inline__ __m512i __DEFAULT_FN_ATTRS
9054_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9055{
9056  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9057                  (__v16si)
9058                  _mm512_setzero_si512 (),
9059                  (__mmask16) __U);
9060}
9061
9062#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
9063  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9064                                      (__v4sf)(__m128)(Y), (int)(P), \
9065                                      (__mmask8)-1, (int)(R)); })
9066
9067#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
9068  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9069                                      (__v4sf)(__m128)(Y), (int)(P), \
9070                                      (__mmask8)(M), (int)(R)); })
9071
9072#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
9073  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9074                                      (__v4sf)(__m128)(Y), (int)(P), \
9075                                      (__mmask8)-1, \
9076                                      _MM_FROUND_CUR_DIRECTION); })
9077
9078#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
9079  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9080                                      (__v4sf)(__m128)(Y), (int)(P), \
9081                                      (__mmask8)(M), \
9082                                      _MM_FROUND_CUR_DIRECTION); })
9083
9084#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
9085  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9086                                      (__v2df)(__m128d)(Y), (int)(P), \
9087                                      (__mmask8)-1, (int)(R)); })
9088
9089#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
9090  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9091                                      (__v2df)(__m128d)(Y), (int)(P), \
9092                                      (__mmask8)(M), (int)(R)); })
9093
9094#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
9095  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9096                                      (__v2df)(__m128d)(Y), (int)(P), \
9097                                      (__mmask8)-1, \
9098                                      _MM_FROUND_CUR_DIRECTION); })
9099
9100#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
9101  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9102                                      (__v2df)(__m128d)(Y), (int)(P), \
9103                                      (__mmask8)(M), \
9104                                      _MM_FROUND_CUR_DIRECTION); })
9105
9106static __inline__ __m512 __DEFAULT_FN_ATTRS
9107_mm512_movehdup_ps (__m512 __A)
9108{
9109  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9110                         1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9111}
9112
9113static __inline__ __m512 __DEFAULT_FN_ATTRS
9114_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9115{
9116  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9117                                             (__v16sf)_mm512_movehdup_ps(__A),
9118                                             (__v16sf)__W);
9119}
9120
9121static __inline__ __m512 __DEFAULT_FN_ATTRS
9122_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9123{
9124  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9125                                             (__v16sf)_mm512_movehdup_ps(__A),
9126                                             (__v16sf)_mm512_setzero_ps());
9127}
9128
9129static __inline__ __m512 __DEFAULT_FN_ATTRS
9130_mm512_moveldup_ps (__m512 __A)
9131{
9132  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9133                         0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9134}
9135
9136static __inline__ __m512 __DEFAULT_FN_ATTRS
9137_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9138{
9139  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9140                                             (__v16sf)_mm512_moveldup_ps(__A),
9141                                             (__v16sf)__W);
9142}
9143
9144static __inline__ __m512 __DEFAULT_FN_ATTRS
9145_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9146{
9147  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9148                                             (__v16sf)_mm512_moveldup_ps(__A),
9149                                             (__v16sf)_mm512_setzero_ps());
9150}
9151
9152static __inline__ __m128 __DEFAULT_FN_ATTRS
9153_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9154{
9155  return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B,
9156               (__v4sf) __W,
9157               (__mmask8) __U);
9158}
9159
9160static __inline__ __m128 __DEFAULT_FN_ATTRS
9161_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9162{
9163  return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B,
9164               (__v4sf)
9165               _mm_setzero_si128(),
9166               (__mmask8) __U);
9167}
9168
9169static __inline__ __m128d __DEFAULT_FN_ATTRS
9170_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9171{
9172  return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B,
9173               (__v2df) __W,
9174               (__mmask8) __U);
9175}
9176
9177static __inline__ __m128d __DEFAULT_FN_ATTRS
9178_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9179{
9180  return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B,
9181               (__v2df)
9182               _mm_setzero_pd (),
9183               (__mmask8) __U);
9184}
9185
9186#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9187  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9188                                   (__v16si)_mm512_undefined_epi32(), \
9189                                   0  + (((I) >> 0) & 0x3), \
9190                                   0  + (((I) >> 2) & 0x3), \
9191                                   0  + (((I) >> 4) & 0x3), \
9192                                   0  + (((I) >> 6) & 0x3), \
9193                                   4  + (((I) >> 0) & 0x3), \
9194                                   4  + (((I) >> 2) & 0x3), \
9195                                   4  + (((I) >> 4) & 0x3), \
9196                                   4  + (((I) >> 6) & 0x3), \
9197                                   8  + (((I) >> 0) & 0x3), \
9198                                   8  + (((I) >> 2) & 0x3), \
9199                                   8  + (((I) >> 4) & 0x3), \
9200                                   8  + (((I) >> 6) & 0x3), \
9201                                   12 + (((I) >> 0) & 0x3), \
9202                                   12 + (((I) >> 2) & 0x3), \
9203                                   12 + (((I) >> 4) & 0x3), \
9204                                   12 + (((I) >> 6) & 0x3)); })
9205
9206#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9207  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9208                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
9209                                      (__v16si)(__m512i)(W)); })
9210
9211#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9212  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9213                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
9214                                      (__v16si)_mm512_setzero_si512()); })
9215
9216static __inline__ __m512d __DEFAULT_FN_ATTRS
9217_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9218{
9219  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9220                (__v8df) __W,
9221                (__mmask8) __U);
9222}
9223
9224static __inline__ __m512d __DEFAULT_FN_ATTRS
9225_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9226{
9227  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9228                (__v8df) _mm512_setzero_pd (),
9229                (__mmask8) __U);
9230}
9231
9232static __inline__ __m512i __DEFAULT_FN_ATTRS
9233_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9234{
9235  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9236                (__v8di) __W,
9237                (__mmask8) __U);
9238}
9239
9240static __inline__ __m512i __DEFAULT_FN_ATTRS
9241_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9242{
9243  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9244                (__v8di) _mm512_setzero_pd (),
9245                (__mmask8) __U);
9246}
9247
9248static __inline__ __m512d __DEFAULT_FN_ATTRS
9249_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9250{
9251  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9252              (__v8df) __W,
9253              (__mmask8) __U);
9254}
9255
9256static __inline__ __m512d __DEFAULT_FN_ATTRS
9257_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9258{
9259  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9260              (__v8df) _mm512_setzero_pd(),
9261              (__mmask8) __U);
9262}
9263
9264static __inline__ __m512i __DEFAULT_FN_ATTRS
9265_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9266{
9267  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9268              (__v8di) __W,
9269              (__mmask8) __U);
9270}
9271
9272static __inline__ __m512i __DEFAULT_FN_ATTRS
9273_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9274{
9275  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9276              (__v8di) _mm512_setzero_pd(),
9277              (__mmask8) __U);
9278}
9279
9280static __inline__ __m512 __DEFAULT_FN_ATTRS
9281_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9282{
9283  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9284                   (__v16sf) __W,
9285                   (__mmask16) __U);
9286}
9287
9288static __inline__ __m512 __DEFAULT_FN_ATTRS
9289_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9290{
9291  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9292                   (__v16sf) _mm512_setzero_ps(),
9293                   (__mmask16) __U);
9294}
9295
9296static __inline__ __m512i __DEFAULT_FN_ATTRS
9297_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9298{
9299  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9300              (__v16si) __W,
9301              (__mmask16) __U);
9302}
9303
9304static __inline__ __m512i __DEFAULT_FN_ATTRS
9305_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9306{
9307  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9308              (__v16si) _mm512_setzero_ps(),
9309              (__mmask16) __U);
9310}
9311
9312static __inline__ __m512 __DEFAULT_FN_ATTRS
9313_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9314{
9315  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9316               (__v16sf) __W,
9317               (__mmask16) __U);
9318}
9319
9320static __inline__ __m512 __DEFAULT_FN_ATTRS
9321_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9322{
9323  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9324               (__v16sf) _mm512_setzero_ps(),
9325               (__mmask16) __U);
9326}
9327
9328static __inline__ __m512i __DEFAULT_FN_ATTRS
9329_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9330{
9331  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9332                (__v16si) __W,
9333                (__mmask16) __U);
9334}
9335
9336static __inline__ __m512i __DEFAULT_FN_ATTRS
9337_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9338{
9339  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9340                (__v16si) _mm512_setzero_ps(),
9341                (__mmask16) __U);
9342}
9343
9344#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9345  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9346                                           (__v8df)_mm512_undefined_pd(), \
9347                                           (__mmask8)-1, (int)(R)); })
9348
9349#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9350  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9351                                           (__v8df)(__m512d)(W), \
9352                                           (__mmask8)(U), (int)(R)); })
9353
9354#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9355  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9356                                           (__v8df)_mm512_setzero_pd(), \
9357                                           (__mmask8)(U), (int)(R)); })
9358
9359static __inline__ __m512d __DEFAULT_FN_ATTRS
9360_mm512_cvtps_pd (__m256 __A)
9361{
9362  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9363                (__v8df)
9364                _mm512_undefined_pd (),
9365                (__mmask8) -1,
9366                _MM_FROUND_CUR_DIRECTION);
9367}
9368
9369static __inline__ __m512d __DEFAULT_FN_ATTRS
9370_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9371{
9372  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9373                (__v8df) __W,
9374                (__mmask8) __U,
9375                _MM_FROUND_CUR_DIRECTION);
9376}
9377
9378static __inline__ __m512d __DEFAULT_FN_ATTRS
9379_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9380{
9381  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9382                (__v8df)
9383                _mm512_setzero_pd (),
9384                (__mmask8) __U,
9385                _MM_FROUND_CUR_DIRECTION);
9386}
9387
9388static __inline__ __m512 __DEFAULT_FN_ATTRS
9389_mm512_cvtpslo_pd (__m512 __A)
9390{
9391  return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9392}
9393
9394static __inline__ __m512 __DEFAULT_FN_ATTRS
9395_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9396{
9397  return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9398}
9399
9400static __inline__ __m512d __DEFAULT_FN_ATTRS
9401_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9402{
9403  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9404              (__v8df) __A,
9405              (__v8df) __W);
9406}
9407
9408static __inline__ __m512d __DEFAULT_FN_ATTRS
9409_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9410{
9411  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9412              (__v8df) __A,
9413              (__v8df) _mm512_setzero_pd ());
9414}
9415
9416static __inline__ __m512 __DEFAULT_FN_ATTRS
9417_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9418{
9419  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9420             (__v16sf) __A,
9421             (__v16sf) __W);
9422}
9423
9424static __inline__ __m512 __DEFAULT_FN_ATTRS
9425_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9426{
9427  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9428             (__v16sf) __A,
9429             (__v16sf) _mm512_setzero_ps ());
9430}
9431
9432static __inline__ void __DEFAULT_FN_ATTRS
9433_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9434{
9435  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9436            (__mmask8) __U);
9437}
9438
9439static __inline__ void __DEFAULT_FN_ATTRS
9440_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9441{
9442  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9443            (__mmask8) __U);
9444}
9445
9446static __inline__ void __DEFAULT_FN_ATTRS
9447_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9448{
9449  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9450            (__mmask16) __U);
9451}
9452
9453static __inline__ void __DEFAULT_FN_ATTRS
9454_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9455{
9456  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9457            (__mmask16) __U);
9458}
9459
9460#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9461  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9462                                             (__v2df)(__m128d)(B), \
9463                                             (__v4sf)_mm_undefined_ps(), \
9464                                             (__mmask8)-1, (int)(R)); })
9465
9466#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9467  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9468                                             (__v2df)(__m128d)(B), \
9469                                             (__v4sf)(__m128)(W), \
9470                                             (__mmask8)(U), (int)(R)); })
9471
9472#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9473  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9474                                             (__v2df)(__m128d)(B), \
9475                                             (__v4sf)_mm_setzero_ps(), \
9476                                             (__mmask8)(U), (int)(R)); })
9477
9478static __inline__ __m128 __DEFAULT_FN_ATTRS
9479_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9480{
9481  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9482                                             (__v2df)(__B),
9483                                             (__v4sf)(__W),
9484                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9485}
9486
9487static __inline__ __m128 __DEFAULT_FN_ATTRS
9488_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9489{
9490  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9491                                             (__v2df)(__B),
9492                                             (__v4sf)_mm_setzero_ps(),
9493                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9494}
9495
9496#define _mm_cvtss_i32 _mm_cvtss_si32
9497#define _mm_cvtsd_i32 _mm_cvtsd_si32
9498#define _mm_cvti32_sd _mm_cvtsi32_sd
9499#define _mm_cvti32_ss _mm_cvtsi32_ss
9500#ifdef __x86_64__
9501#define _mm_cvtss_i64 _mm_cvtss_si64
9502#define _mm_cvtsd_i64 _mm_cvtsd_si64
9503#define _mm_cvti64_sd _mm_cvtsi64_sd
9504#define _mm_cvti64_ss _mm_cvtsi64_ss
9505#endif
9506
9507#ifdef __x86_64__
9508#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9509  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9510                                     (int)(R)); })
9511
9512#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9513  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9514                                     (int)(R)); })
9515#endif
9516
9517#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9518  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9519
9520#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9521  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9522
9523#ifdef __x86_64__
9524#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9525  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9526                                    (int)(R)); })
9527
9528#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9529  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9530                                    (int)(R)); })
9531#endif
9532
9533#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9534  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9535                                              (__v4sf)(__m128)(B), \
9536                                              (__v2df)_mm_undefined_pd(), \
9537                                              (__mmask8)-1, (int)(R)); })
9538
9539#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9540  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9541                                              (__v4sf)(__m128)(B), \
9542                                              (__v2df)(__m128d)(W), \
9543                                              (__mmask8)(U), (int)(R)); })
9544
9545#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9546  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9547                                              (__v4sf)(__m128)(B), \
9548                                              (__v2df)_mm_setzero_pd(), \
9549                                              (__mmask8)(U), (int)(R)); })
9550
9551static __inline__ __m128d __DEFAULT_FN_ATTRS
9552_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9553{
9554  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9555                                              (__v4sf)(__B),
9556                                              (__v2df)(__W),
9557                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9558}
9559
9560static __inline__ __m128d __DEFAULT_FN_ATTRS
9561_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9562{
9563  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9564                                              (__v4sf)(__B),
9565                                              (__v2df)_mm_setzero_pd(),
9566                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9567}
9568
9569static __inline__ __m128d __DEFAULT_FN_ATTRS
9570_mm_cvtu32_sd (__m128d __A, unsigned __B)
9571{
9572  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9573}
9574
9575#ifdef __x86_64__
9576#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9577  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9578                                      (unsigned long long)(B), (int)(R)); })
9579
9580static __inline__ __m128d __DEFAULT_FN_ATTRS
9581_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9582{
9583  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9584                 _MM_FROUND_CUR_DIRECTION);
9585}
9586#endif
9587
9588#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9589  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9590                                     (int)(R)); })
9591
9592static __inline__ __m128 __DEFAULT_FN_ATTRS
9593_mm_cvtu32_ss (__m128 __A, unsigned __B)
9594{
9595  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9596                _MM_FROUND_CUR_DIRECTION);
9597}
9598
9599#ifdef __x86_64__
9600#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9601  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9602                                     (unsigned long long)(B), (int)(R)); })
9603
9604static __inline__ __m128 __DEFAULT_FN_ATTRS
9605_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9606{
9607  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9608                _MM_FROUND_CUR_DIRECTION);
9609}
9610#endif
9611
9612static __inline__ __m512i __DEFAULT_FN_ATTRS
9613_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9614{
9615  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
9616                 __M);
9617}
9618
9619#ifdef __x86_64__
9620static __inline__ __m512i __DEFAULT_FN_ATTRS
9621_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9622{
9623  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
9624                 __M);
9625}
9626#endif
9627
9628static __inline __m512i __DEFAULT_FN_ATTRS
9629_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9630     int __E, int __F, int __G, int __H,
9631     int __I, int __J, int __K, int __L,
9632     int __M, int __N, int __O, int __P)
9633{
9634  return __extension__ (__m512i)(__v16si)
9635  { __P, __O, __N, __M, __L, __K, __J, __I,
9636    __H, __G, __F, __E, __D, __C, __B, __A };
9637}
9638
9639#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9640       e8,e9,e10,e11,e12,e13,e14,e15)          \
9641  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9642                   (e5),(e4),(e3),(e2),(e1),(e0))
9643
9644static __inline__ __m512i __DEFAULT_FN_ATTRS
9645_mm512_set_epi64 (long long __A, long long __B, long long __C,
9646     long long __D, long long __E, long long __F,
9647     long long __G, long long __H)
9648{
9649  return __extension__ (__m512i) (__v8di)
9650  { __H, __G, __F, __E, __D, __C, __B, __A };
9651}
9652
9653#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9654  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9655
9656static __inline__ __m512d __DEFAULT_FN_ATTRS
9657_mm512_set_pd (double __A, double __B, double __C, double __D,
9658        double __E, double __F, double __G, double __H)
9659{
9660  return __extension__ (__m512d)
9661  { __H, __G, __F, __E, __D, __C, __B, __A };
9662}
9663
9664#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9665  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9666
9667static __inline__ __m512 __DEFAULT_FN_ATTRS
9668_mm512_set_ps (float __A, float __B, float __C, float __D,
9669        float __E, float __F, float __G, float __H,
9670        float __I, float __J, float __K, float __L,
9671        float __M, float __N, float __O, float __P)
9672{
9673  return __extension__ (__m512)
9674  { __P, __O, __N, __M, __L, __K, __J, __I,
9675    __H, __G, __F, __E, __D, __C, __B, __A };
9676}
9677
9678#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9679  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9680                (e4),(e3),(e2),(e1),(e0))
9681
9682static __inline__ __m512 __DEFAULT_FN_ATTRS
9683_mm512_abs_ps(__m512 __A)
9684{
9685  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9686}
9687
9688static __inline__ __m512 __DEFAULT_FN_ATTRS
9689_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9690{
9691  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9692}
9693
9694static __inline__ __m512d __DEFAULT_FN_ATTRS
9695_mm512_abs_pd(__m512d __A)
9696{
9697  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9698}
9699
9700static __inline__ __m512d __DEFAULT_FN_ATTRS
9701_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9702{
9703  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9704}
9705
9706// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9707// outputs. This class of vector operation forms the basis of many scientific
9708// computations. In vector-reduction arithmetic, the evaluation off is
9709// independent of the order of the input elements of V.
9710
9711// Used bisection method. At each step, we partition the vector with previous
9712// step in half, and the operation is performed on its two halves.
9713// This takes log2(n) steps where n is the number of elements in the vector.
9714
9715// Vec512 - Vector with size 512.
9716// Operator - Can be one of following: +,*,&,|
9717// T2  - Can get 'i' for int and 'f' for float.
9718// T1 - Can get 'i' for int and 'd' for double.
9719
9720#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
9721  __extension__({                                                      \
9722    __m256##T1 Vec256 = __builtin_shufflevector(                       \
9723                            (__v8d##T2)Vec512,                         \
9724                            (__v8d##T2)Vec512,                         \
9725                            0, 1, 2, 3)                                \
9726                        Operator                                       \
9727                        __builtin_shufflevector(                       \
9728                            (__v8d##T2)Vec512,                         \
9729                            (__v8d##T2)Vec512,                         \
9730                            4, 5, 6, 7);                               \
9731    __m128##T1 Vec128 = __builtin_shufflevector(                       \
9732                            (__v4d##T2)Vec256,                         \
9733                            (__v4d##T2)Vec256,                         \
9734                            0, 1)                                      \
9735                        Operator                                       \
9736                        __builtin_shufflevector(                       \
9737                            (__v4d##T2)Vec256,                         \
9738                            (__v4d##T2)Vec256,                         \
9739                            2, 3);                                     \
9740    Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
9741                                     (__v2d##T2)Vec128, 0, -1)         \
9742             Operator                                                  \
9743             __builtin_shufflevector((__v2d##T2)Vec128,                \
9744                                     (__v2d##T2)Vec128, 1, -1);        \
9745    return Vec128[0];                                                  \
9746  })
9747
9748static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9749  _mm512_reduce_operator_64bit(__W, +, i, i);
9750}
9751
9752static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9753  _mm512_reduce_operator_64bit(__W, *, i, i);
9754}
9755
9756static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9757  _mm512_reduce_operator_64bit(__W, &, i, i);
9758}
9759
9760static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9761  _mm512_reduce_operator_64bit(__W, |, i, i);
9762}
9763
9764static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9765  _mm512_reduce_operator_64bit(__W, +, f, d);
9766}
9767
9768static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9769  _mm512_reduce_operator_64bit(__W, *, f, d);
9770}
9771
9772// Vec512 - Vector with size 512.
9773// Vec512Neutral - All vector elements set to the identity element.
9774// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9775// Operator - Can be one of following: +,*,&,|
9776// Mask - Intrinsic Mask
9777// T2  - Can get 'i' for int and 'f' for float.
9778// T1 - Can get 'i' for int and 'd' for packed double-precision.
9779// T3 - Can be Pd for packed double or q for q-word.
9780
9781#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
9782                                          Mask, T2, T1, T3)                    \
9783  __extension__({                                                              \
9784    Vec512 = __builtin_ia32_select##T3##_512(                                  \
9785                 (__mmask8)Mask,                                               \
9786                 (__v8d##T2)Vec512,                                            \
9787                 (__v8d##T2)Vec512Neutral);                                    \
9788    _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
9789  })
9790
9791static __inline__ long long __DEFAULT_FN_ATTRS
9792_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9793  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9794}
9795
9796static __inline__ long long __DEFAULT_FN_ATTRS
9797_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9798  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9799}
9800
9801static __inline__ long long __DEFAULT_FN_ATTRS
9802_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9803  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9804                                    &, __M,  i, i, q);
9805}
9806
9807static __inline__ long long __DEFAULT_FN_ATTRS
9808_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9809  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
9810                                    i, i, q);
9811}
9812
9813static __inline__ double __DEFAULT_FN_ATTRS
9814_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9815  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
9816                                    f, d, pd);
9817}
9818
9819static __inline__ double __DEFAULT_FN_ATTRS
9820_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9821  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9822                                    f, d, pd);
9823}
9824
9825// Vec512 - Vector with size 512.
9826// Operator - Can be one of following: +,*,&,|
9827// T2 - Can get 'i' for int and ' ' for packed single.
9828// T1 - Can get 'i' for int and 'f' for float.
9829
9830#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9831    __m256##T1 Vec256 =                                                        \
9832            (__m256##T1)(__builtin_shufflevector(                              \
9833                                    (__v16s##T2)Vec512,                        \
9834                                    (__v16s##T2)Vec512,                        \
9835                                    0, 1, 2, 3, 4, 5, 6, 7)                    \
9836                                Operator                                       \
9837                         __builtin_shufflevector(                              \
9838                                    (__v16s##T2)Vec512,                        \
9839                                    (__v16s##T2)Vec512,                        \
9840                                    8, 9, 10, 11, 12, 13, 14, 15));            \
9841    __m128##T1 Vec128 =                                                        \
9842             (__m128##T1)(__builtin_shufflevector(                             \
9843                                    (__v8s##T2)Vec256,                         \
9844                                    (__v8s##T2)Vec256,                         \
9845                                    0, 1, 2, 3)                                \
9846                                Operator                                       \
9847                          __builtin_shufflevector(                             \
9848                                    (__v8s##T2)Vec256,                         \
9849                                    (__v8s##T2)Vec256,                         \
9850                                    4, 5, 6, 7));                              \
9851    Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
9852                                    (__v4s##T2)Vec128,                         \
9853                                    (__v4s##T2)Vec128,                         \
9854                                    0, 1, -1, -1)                              \
9855                                Operator                                       \
9856                          __builtin_shufflevector(                             \
9857                                    (__v4s##T2)Vec128,                         \
9858                                    (__v4s##T2)Vec128,                         \
9859                                    2, 3, -1, -1));                            \
9860    Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
9861                                    (__v4s##T2)Vec128,                         \
9862                                    (__v4s##T2)Vec128,                         \
9863                                    0, -1, -1, -1)                             \
9864                                Operator                                       \
9865                          __builtin_shufflevector(                             \
9866                                    (__v4s##T2)Vec128,                         \
9867                                    (__v4s##T2)Vec128,                         \
9868                                    1, -1, -1, -1));                           \
9869    return Vec128[0];                                                          \
9870  })
9871
9872static __inline__ int __DEFAULT_FN_ATTRS
9873_mm512_reduce_add_epi32(__m512i __W) {
9874  _mm512_reduce_operator_32bit(__W, +, i, i);
9875}
9876
9877static __inline__ int __DEFAULT_FN_ATTRS
9878_mm512_reduce_mul_epi32(__m512i __W) {
9879  _mm512_reduce_operator_32bit(__W, *, i, i);
9880}
9881
9882static __inline__ int __DEFAULT_FN_ATTRS
9883_mm512_reduce_and_epi32(__m512i __W) {
9884  _mm512_reduce_operator_32bit(__W, &, i, i);
9885}
9886
9887static __inline__ int __DEFAULT_FN_ATTRS
9888_mm512_reduce_or_epi32(__m512i __W) {
9889  _mm512_reduce_operator_32bit(__W, |, i, i);
9890}
9891
9892static __inline__ float __DEFAULT_FN_ATTRS
9893_mm512_reduce_add_ps(__m512 __W) {
9894  _mm512_reduce_operator_32bit(__W, +, f, );
9895}
9896
9897static __inline__ float __DEFAULT_FN_ATTRS
9898_mm512_reduce_mul_ps(__m512 __W) {
9899  _mm512_reduce_operator_32bit(__W, *, f, );
9900}
9901
9902// Vec512 - Vector with size 512.
9903// Vec512Neutral - All vector elements set to the identity element.
9904// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
9905// Operator - Can be one of following: +,*,&,|
9906// Mask - Intrinsic Mask
9907// T2  - Can get 'i' for int and 'f' for float.
9908// T1 - Can get 'i' for int and 'd' for double.
9909// T3 - Can be Ps for packed single or d for d-word.
9910
9911#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
9912                                          Mask, T2, T1, T3)                    \
9913  __extension__({                                                              \
9914    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
9915                             (__mmask16)Mask,                                  \
9916                             (__v16s##T2)Vec512,                               \
9917                             (__v16s##T2)Vec512Neutral);                       \
9918    _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
9919  })
9920
9921static __inline__ int __DEFAULT_FN_ATTRS
9922_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9923  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
9924}
9925
9926static __inline__ int __DEFAULT_FN_ATTRS
9927_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9928  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
9929}
9930
9931static __inline__ int __DEFAULT_FN_ATTRS
9932_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9933  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
9934                                    i, i, d);
9935}
9936
9937static __inline__ int __DEFAULT_FN_ATTRS
9938_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9939  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
9940}
9941
9942static __inline__ float __DEFAULT_FN_ATTRS
9943_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9944  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
9945}
9946
9947static __inline__ float __DEFAULT_FN_ATTRS
9948_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9949  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
9950}
9951
9952// Used bisection method. At each step, we partition the vector with previous
9953// step in half, and the operation is performed on its two halves.
9954// This takes log2(n) steps where n is the number of elements in the vector.
9955// This macro uses only intrinsics from the AVX512F feature.
9956
9957// Vec512 - Vector with size of 512.
9958// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
9959//              __mm512_max_epi64
9960// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
9961// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
9962
9963#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
9964        Vec512 = _mm512_##IntrinName(                                          \
9965                                (__m512##T1)__builtin_shufflevector(           \
9966                                                (__v8d##T2)Vec512,             \
9967                                                (__v8d##T2)Vec512,             \
9968                                                 0, 1, 2, 3, -1, -1, -1, -1),  \
9969                                (__m512##T1)__builtin_shufflevector(           \
9970                                                (__v8d##T2)Vec512,             \
9971                                                (__v8d##T2)Vec512,             \
9972                                                 4, 5, 6, 7, -1, -1, -1, -1)); \
9973        Vec512 = _mm512_##IntrinName(                                          \
9974                                (__m512##T1)__builtin_shufflevector(           \
9975                                                (__v8d##T2)Vec512,             \
9976                                                (__v8d##T2)Vec512,             \
9977                                                 0, 1, -1, -1, -1, -1, -1, -1),\
9978                                (__m512##T1)__builtin_shufflevector(           \
9979                                                (__v8d##T2)Vec512,             \
9980                                                (__v8d##T2)Vec512,             \
9981                                                 2, 3, -1, -1, -1, -1, -1,     \
9982                                                 -1));                         \
9983        Vec512 = _mm512_##IntrinName(                                          \
9984                                (__m512##T1)__builtin_shufflevector(           \
9985                                                (__v8d##T2)Vec512,             \
9986                                                (__v8d##T2)Vec512,             \
9987                                                0, -1, -1, -1, -1, -1, -1, -1),\
9988                                (__m512##T1)__builtin_shufflevector(           \
9989                                                (__v8d##T2)Vec512,             \
9990                                                (__v8d##T2)Vec512,             \
9991                                                1, -1, -1, -1, -1, -1, -1, -1))\
9992                                                ;                              \
9993    return Vec512[0];                                                          \
9994  })
9995
9996static __inline__ long long __DEFAULT_FN_ATTRS
9997_mm512_reduce_max_epi64(__m512i __V) {
9998  _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
9999}
10000
10001static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10002_mm512_reduce_max_epu64(__m512i __V) {
10003  _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10004}
10005
10006static __inline__ double __DEFAULT_FN_ATTRS
10007_mm512_reduce_max_pd(__m512d __V) {
10008  _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10009}
10010
10011static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10012(__m512i __V) {
10013  _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10014}
10015
10016static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10017_mm512_reduce_min_epu64(__m512i __V) {
10018  _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10019}
10020
10021static __inline__ double __DEFAULT_FN_ATTRS
10022_mm512_reduce_min_pd(__m512d __V) {
10023  _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10024}
10025
10026// Vec512 - Vector with size 512.
10027// Vec512Neutral - A 512 length vector with elements set to the identity element
10028// Identity element: {max_epi,0x8000000000000000}
10029//                   {max_epu,0x0000000000000000}
10030//                   {max_pd, 0xFFF0000000000000}
10031//                   {min_epi,0x7FFFFFFFFFFFFFFF}
10032//                   {min_epu,0xFFFFFFFFFFFFFFFF}
10033//                   {min_pd, 0x7FF0000000000000}
10034//
10035// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10036//              __mm512_max_epi64
10037// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10038// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10039// T3 - Can get 'q' q word and 'pd' for packed double.
10040//      [__builtin_ia32_select{q|pd}_512]
10041// Mask - Intrinsic Mask
10042
10043#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10044                                        T2, T3, Mask)                          \
10045  __extension__({                                                              \
10046    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10047                             (__mmask8)Mask,                                   \
10048                             (__v8d##T2)Vec512,                                \
10049                             (__v8d##T2)Vec512Neutral);                        \
10050    _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
10051  })
10052
10053static __inline__ long long __DEFAULT_FN_ATTRS
10054_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10055  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10056                                  max_epi64, i, i, q, __M);
10057}
10058
10059static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10060_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10061  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10062                                  max_epu64, i, i, q, __M);
10063}
10064
10065static __inline__ double __DEFAULT_FN_ATTRS
10066_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10067  _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10068                                  max_pd, d, f, pd, __M);
10069}
10070
10071static __inline__ long long __DEFAULT_FN_ATTRS
10072_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10073  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10074                                  min_epi64, i, i, q, __M);
10075}
10076
10077static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10078_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10079  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10080                                  min_epu64, i, i, q, __M);
10081}
10082
10083static __inline__ double __DEFAULT_FN_ATTRS
10084_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10085  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10086                                  min_pd, d, f, pd, __M);
10087}
10088
10089// Vec512 - Vector with size 512.
10090// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10091//              __mm512_max_epi32
10092// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10093// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10094
10095#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10096    Vec512 = _mm512_##IntrinName(                                              \
10097                  (__m512##T1)__builtin_shufflevector(                         \
10098                                  (__v16s##T2)Vec512,                          \
10099                                  (__v16s##T2)Vec512,                          \
10100                                  0, 1, 2, 3, 4, 5, 6, 7,                      \
10101                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10102                  (__m512##T1)__builtin_shufflevector(                         \
10103                                  (__v16s##T2)Vec512,                          \
10104                                  (__v16s##T2)Vec512,                          \
10105                                  8, 9, 10, 11, 12, 13, 14, 15,                \
10106                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10107    Vec512 = _mm512_##IntrinName(                                              \
10108                  (__m512##T1)__builtin_shufflevector(                         \
10109                                  (__v16s##T2)Vec512,                          \
10110                                  (__v16s##T2)Vec512,                          \
10111                                  0, 1, 2, 3, -1, -1, -1, -1,                  \
10112                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10113                  (__m512##T1)__builtin_shufflevector(                         \
10114                                  (__v16s##T2)Vec512,                          \
10115                                  (__v16s##T2)Vec512,                          \
10116                                  4, 5, 6, 7, -1, -1, -1, -1,                  \
10117                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10118    Vec512 = _mm512_##IntrinName(                                              \
10119                  (__m512##T1)__builtin_shufflevector(                         \
10120                                  (__v16s##T2)Vec512,                          \
10121                                  (__v16s##T2)Vec512,                          \
10122                                  0, 1, -1, -1, -1, -1, -1, -1,                \
10123                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10124                  (__m512##T1)__builtin_shufflevector(                         \
10125                                  (__v16s##T2)Vec512,                          \
10126                                  (__v16s##T2)Vec512,                          \
10127                                  2, 3, -1, -1, -1, -1, -1, -1,                \
10128                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10129    Vec512 = _mm512_##IntrinName(                                              \
10130                  (__m512##T1)__builtin_shufflevector(                         \
10131                                  (__v16s##T2)Vec512,                          \
10132                                  (__v16s##T2)Vec512,                          \
10133                                  0,  -1, -1, -1, -1, -1, -1, -1,              \
10134                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10135                  (__m512##T1)__builtin_shufflevector(                         \
10136                                  (__v16s##T2)Vec512,                          \
10137                                  (__v16s##T2)Vec512,                          \
10138                                  1, -1, -1, -1, -1, -1, -1, -1,               \
10139                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10140    return Vec512[0];                                                          \
10141  })
10142
10143static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10144  _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10145}
10146
10147static __inline__ unsigned int __DEFAULT_FN_ATTRS
10148_mm512_reduce_max_epu32(__m512i a) {
10149  _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10150}
10151
10152static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10153  _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10154}
10155
10156static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10157  _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10158}
10159
10160static __inline__ unsigned int __DEFAULT_FN_ATTRS
10161_mm512_reduce_min_epu32(__m512i a) {
10162  _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10163}
10164
10165static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10166  _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10167}
10168
10169// Vec512 - Vector with size 512.
10170// Vec512Neutral - A 512 length vector with elements set to the identity element
10171// Identity element: {max_epi,0x80000000}
10172//                   {max_epu,0x00000000}
10173//                   {max_ps, 0xFF800000}
10174//                   {min_epi,0x7FFFFFFF}
10175//                   {min_epu,0xFFFFFFFF}
10176//                   {min_ps, 0x7F800000}
10177//
10178// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10179//              __mm512_max_epi32
10180// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10181// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10182// T3 - Can get 'q' q word and 'pd' for packed double.
10183//      [__builtin_ia32_select{q|pd}_512]
10184// Mask - Intrinsic Mask
10185
10186#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10187                                        T2, T3, Mask)                          \
10188  __extension__({                                                              \
10189    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10190                                        (__mmask16)Mask,                       \
10191                                        (__v16s##T2)Vec512,                    \
10192                                        (__v16s##T2)Vec512Neutral);            \
10193   _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
10194   })
10195
10196static __inline__ int __DEFAULT_FN_ATTRS
10197_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10198  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10199                                  i, i, d, __M);
10200}
10201
10202static __inline__ unsigned int __DEFAULT_FN_ATTRS
10203_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10204  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10205                                  i, i, d, __M);
10206}
10207
10208static __inline__ float __DEFAULT_FN_ATTRS
10209_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10210  _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10211                                  ps, __M);
10212}
10213
10214static __inline__ int __DEFAULT_FN_ATTRS
10215_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10216  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10217                                  i, i, d, __M);
10218}
10219
10220static __inline__ unsigned int __DEFAULT_FN_ATTRS
10221_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10222  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10223                                  i, i, d, __M);
10224}
10225
10226static __inline__ float __DEFAULT_FN_ATTRS
10227_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10228  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10229                                  ps, __M);
10230}
10231
10232#undef __DEFAULT_FN_ATTRS
10233
10234#endif // __AVX512FINTRIN_H
10235