1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
32typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
36
37/* Unsigned types */
38typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43typedef float __m512 __attribute__((__vector_size__(64)));
44typedef double __m512d __attribute__((__vector_size__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64)));
46
47typedef unsigned char __mmask8;
48typedef unsigned short __mmask16;
49
50/* Rounding mode macros.  */
51#define _MM_FROUND_TO_NEAREST_INT   0x00
52#define _MM_FROUND_TO_NEG_INF       0x01
53#define _MM_FROUND_TO_POS_INF       0x02
54#define _MM_FROUND_TO_ZERO          0x03
55#define _MM_FROUND_CUR_DIRECTION    0x04
56
57/* Constants for integer comparison predicates */
58typedef enum {
59    _MM_CMPINT_EQ,      /* Equal */
60    _MM_CMPINT_LT,      /* Less than */
61    _MM_CMPINT_LE,      /* Less than or Equal */
62    _MM_CMPINT_UNUSED,
63    _MM_CMPINT_NE,      /* Not Equal */
64    _MM_CMPINT_NLT,     /* Not Less than */
65#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
66    _MM_CMPINT_NLE      /* Not Less than or Equal */
67#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
68} _MM_CMPINT_ENUM;
69
70typedef enum
71{
72  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157  _MM_PERM_DDDD = 0xFF
158} _MM_PERM_ENUM;
159
160typedef enum
161{
162  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
163  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
164  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
165  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
166} _MM_MANTISSA_NORM_ENUM;
167
168typedef enum
169{
170  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
171  _MM_MANT_SIGN_zero,   /* sign = 0             */
172  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
173} _MM_MANTISSA_SIGN_ENUM;
174
175/* Define the default attributes for the functions in this file. */
176#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177
178/* Create vectors with repeated elements */
179
180static  __inline __m512i __DEFAULT_FN_ATTRS
181_mm512_setzero_si512(void)
182{
183  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184}
185
186#define _mm512_setzero_epi32 _mm512_setzero_si512
187
188static __inline__ __m512d __DEFAULT_FN_ATTRS
189_mm512_undefined_pd(void)
190{
191  return (__m512d)__builtin_ia32_undef512();
192}
193
194static __inline__ __m512 __DEFAULT_FN_ATTRS
195_mm512_undefined(void)
196{
197  return (__m512)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS
201_mm512_undefined_ps(void)
202{
203  return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512i __DEFAULT_FN_ATTRS
207_mm512_undefined_epi32(void)
208{
209  return (__m512i)__builtin_ia32_undef512();
210}
211
212static __inline__ __m512i __DEFAULT_FN_ATTRS
213_mm512_broadcastd_epi32 (__m128i __A)
214{
215  return (__m512i)__builtin_shufflevector((__v4si) __A,
216                                          (__v4si)_mm_undefined_si128(),
217                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS
221_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222{
223  return (__m512i)__builtin_ia32_selectd_512(__M,
224                                             (__v16si) _mm512_broadcastd_epi32(__A),
225                                             (__v16si) __O);
226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS
229_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230{
231  return (__m512i)__builtin_ia32_selectd_512(__M,
232                                             (__v16si) _mm512_broadcastd_epi32(__A),
233                                             (__v16si) _mm512_setzero_si512());
234}
235
236static __inline__ __m512i __DEFAULT_FN_ATTRS
237_mm512_broadcastq_epi64 (__m128i __A)
238{
239  return (__m512i)__builtin_shufflevector((__v2di) __A,
240                                          (__v2di) _mm_undefined_si128(),
241                                          0, 0, 0, 0, 0, 0, 0, 0);
242}
243
244static __inline__ __m512i __DEFAULT_FN_ATTRS
245_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246{
247  return (__m512i)__builtin_ia32_selectq_512(__M,
248                                             (__v8di) _mm512_broadcastq_epi64(__A),
249                                             (__v8di) __O);
250
251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS
254_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255{
256  return (__m512i)__builtin_ia32_selectq_512(__M,
257                                             (__v8di) _mm512_broadcastq_epi64(__A),
258                                             (__v8di) _mm512_setzero_si512());
259}
260
261static __inline __m512i __DEFAULT_FN_ATTRS
262_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
263{
264  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A,
265                 (__v16si)
266                 _mm512_setzero_si512 (),
267                 __M);
268}
269
270static __inline __m512i __DEFAULT_FN_ATTRS
271_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
272{
273#ifdef __x86_64__
274  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A,
275                 (__v8di)
276                 _mm512_setzero_si512 (),
277                 __M);
278#else
279  return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A,
280                 (__v8di)
281                 _mm512_setzero_si512 (),
282                 __M);
283#endif
284}
285
286static __inline __m512 __DEFAULT_FN_ATTRS
287_mm512_setzero_ps(void)
288{
289  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
290                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
291}
292
293#define _mm512_setzero _mm512_setzero_ps
294
295static  __inline __m512d __DEFAULT_FN_ATTRS
296_mm512_setzero_pd(void)
297{
298  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
299}
300
301static __inline __m512 __DEFAULT_FN_ATTRS
302_mm512_set1_ps(float __w)
303{
304  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
305                   __w, __w, __w, __w, __w, __w, __w, __w  };
306}
307
308static __inline __m512d __DEFAULT_FN_ATTRS
309_mm512_set1_pd(double __w)
310{
311  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
312}
313
314static __inline __m512i __DEFAULT_FN_ATTRS
315_mm512_set1_epi8(char __w)
316{
317  return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
318                             __w, __w, __w, __w, __w, __w, __w, __w,
319                             __w, __w, __w, __w, __w, __w, __w, __w,
320                             __w, __w, __w, __w, __w, __w, __w, __w,
321                             __w, __w, __w, __w, __w, __w, __w, __w,
322                             __w, __w, __w, __w, __w, __w, __w, __w,
323                             __w, __w, __w, __w, __w, __w, __w, __w,
324                             __w, __w, __w, __w, __w, __w, __w, __w  };
325}
326
327static __inline __m512i __DEFAULT_FN_ATTRS
328_mm512_set1_epi16(short __w)
329{
330  return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
331                             __w, __w, __w, __w, __w, __w, __w, __w,
332                             __w, __w, __w, __w, __w, __w, __w, __w,
333                             __w, __w, __w, __w, __w, __w, __w, __w };
334}
335
336static __inline __m512i __DEFAULT_FN_ATTRS
337_mm512_set1_epi32(int __s)
338{
339  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
340                             __s, __s, __s, __s, __s, __s, __s, __s };
341}
342
343static __inline __m512i __DEFAULT_FN_ATTRS
344_mm512_set1_epi64(long long __d)
345{
346  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
347}
348
349static __inline__ __m512 __DEFAULT_FN_ATTRS
350_mm512_broadcastss_ps(__m128 __A)
351{
352  return (__m512)__builtin_shufflevector((__v4sf) __A,
353                                         (__v4sf)_mm_undefined_ps(),
354                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
355}
356
357static __inline __m512i __DEFAULT_FN_ATTRS
358_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
359{
360  return  (__m512i)(__v16si)
361   { __D, __C, __B, __A, __D, __C, __B, __A,
362     __D, __C, __B, __A, __D, __C, __B, __A };
363}
364
365static __inline __m512i __DEFAULT_FN_ATTRS
366_mm512_set4_epi64 (long long __A, long long __B, long long __C,
367       long long __D)
368{
369  return  (__m512i) (__v8di)
370   { __D, __C, __B, __A, __D, __C, __B, __A };
371}
372
373static __inline __m512d __DEFAULT_FN_ATTRS
374_mm512_set4_pd (double __A, double __B, double __C, double __D)
375{
376  return  (__m512d)
377   { __D, __C, __B, __A, __D, __C, __B, __A };
378}
379
380static __inline __m512 __DEFAULT_FN_ATTRS
381_mm512_set4_ps (float __A, float __B, float __C, float __D)
382{
383  return  (__m512)
384   { __D, __C, __B, __A, __D, __C, __B, __A,
385     __D, __C, __B, __A, __D, __C, __B, __A };
386}
387
388#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
389  _mm512_set4_epi32((e3),(e2),(e1),(e0))
390
391#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
392  _mm512_set4_epi64((e3),(e2),(e1),(e0))
393
394#define _mm512_setr4_pd(e0,e1,e2,e3)                \
395  _mm512_set4_pd((e3),(e2),(e1),(e0))
396
397#define _mm512_setr4_ps(e0,e1,e2,e3)                \
398  _mm512_set4_ps((e3),(e2),(e1),(e0))
399
400static __inline__ __m512d __DEFAULT_FN_ATTRS
401_mm512_broadcastsd_pd(__m128d __A)
402{
403  return (__m512d)__builtin_shufflevector((__v2df) __A,
404                                          (__v2df) _mm_undefined_pd(),
405                                          0, 0, 0, 0, 0, 0, 0, 0);
406}
407
408/* Cast between vector types */
409
410static __inline __m512d __DEFAULT_FN_ATTRS
411_mm512_castpd256_pd512(__m256d __a)
412{
413  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
414}
415
416static __inline __m512 __DEFAULT_FN_ATTRS
417_mm512_castps256_ps512(__m256 __a)
418{
419  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
420                                          -1, -1, -1, -1, -1, -1, -1, -1);
421}
422
423static __inline __m128d __DEFAULT_FN_ATTRS
424_mm512_castpd512_pd128(__m512d __a)
425{
426  return __builtin_shufflevector(__a, __a, 0, 1);
427}
428
429static __inline __m256d __DEFAULT_FN_ATTRS
430_mm512_castpd512_pd256 (__m512d __A)
431{
432  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
433}
434
435static __inline __m128 __DEFAULT_FN_ATTRS
436_mm512_castps512_ps128(__m512 __a)
437{
438  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
439}
440
441static __inline __m256 __DEFAULT_FN_ATTRS
442_mm512_castps512_ps256 (__m512 __A)
443{
444  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
445}
446
447static __inline __m512 __DEFAULT_FN_ATTRS
448_mm512_castpd_ps (__m512d __A)
449{
450  return (__m512) (__A);
451}
452
453static __inline __m512i __DEFAULT_FN_ATTRS
454_mm512_castpd_si512 (__m512d __A)
455{
456  return (__m512i) (__A);
457}
458
459static __inline__ __m512d __DEFAULT_FN_ATTRS
460_mm512_castpd128_pd512 (__m128d __A)
461{
462  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
463}
464
465static __inline __m512d __DEFAULT_FN_ATTRS
466_mm512_castps_pd (__m512 __A)
467{
468  return (__m512d) (__A);
469}
470
471static __inline __m512i __DEFAULT_FN_ATTRS
472_mm512_castps_si512 (__m512 __A)
473{
474  return (__m512i) (__A);
475}
476
477static __inline__ __m512 __DEFAULT_FN_ATTRS
478_mm512_castps128_ps512 (__m128 __A)
479{
480    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
481}
482
483static __inline__ __m512i __DEFAULT_FN_ATTRS
484_mm512_castsi128_si512 (__m128i __A)
485{
486   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
487}
488
489static __inline__ __m512i __DEFAULT_FN_ATTRS
490_mm512_castsi256_si512 (__m256i __A)
491{
492   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
493}
494
495static __inline __m512 __DEFAULT_FN_ATTRS
496_mm512_castsi512_ps (__m512i __A)
497{
498  return (__m512) (__A);
499}
500
501static __inline __m512d __DEFAULT_FN_ATTRS
502_mm512_castsi512_pd (__m512i __A)
503{
504  return (__m512d) (__A);
505}
506
507static __inline __m128i __DEFAULT_FN_ATTRS
508_mm512_castsi512_si128 (__m512i __A)
509{
510  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
511}
512
513static __inline __m256i __DEFAULT_FN_ATTRS
514_mm512_castsi512_si256 (__m512i __A)
515{
516  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
517}
518
519static __inline__ __mmask16 __DEFAULT_FN_ATTRS
520_mm512_int2mask(int __a)
521{
522  return (__mmask16)__a;
523}
524
525static __inline__ int __DEFAULT_FN_ATTRS
526_mm512_mask2int(__mmask16 __a)
527{
528  return (int)__a;
529}
530
531/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
532///    128-bit floating-point vector of [2 x double]. The lower 128 bits
533///    contain the value of the source vector. The upper 384 bits are set
534///    to zero.
535///
536/// \headerfile <x86intrin.h>
537///
538/// This intrinsic has no corresponding instruction.
539///
540/// \param __a
541///    A 128-bit vector of [2 x double].
542/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
543///    contain the value of the parameter. The upper 384 bits are set to zero.
544static __inline __m512d __DEFAULT_FN_ATTRS
545_mm512_zextpd128_pd512(__m128d __a)
546{
547  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
548}
549
550/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
551///    256-bit floating-point vector of [4 x double]. The lower 256 bits
552///    contain the value of the source vector. The upper 256 bits are set
553///    to zero.
554///
555/// \headerfile <x86intrin.h>
556///
557/// This intrinsic has no corresponding instruction.
558///
559/// \param __a
560///    A 256-bit vector of [4 x double].
561/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
562///    contain the value of the parameter. The upper 256 bits are set to zero.
563static __inline __m512d __DEFAULT_FN_ATTRS
564_mm512_zextpd256_pd512(__m256d __a)
565{
566  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
567}
568
569/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
570///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
571///    the value of the source vector. The upper 384 bits are set to zero.
572///
573/// \headerfile <x86intrin.h>
574///
575/// This intrinsic has no corresponding instruction.
576///
577/// \param __a
578///    A 128-bit vector of [4 x float].
579/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
580///    contain the value of the parameter. The upper 384 bits are set to zero.
581static __inline __m512 __DEFAULT_FN_ATTRS
582_mm512_zextps128_ps512(__m128 __a)
583{
584  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
585}
586
587/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
588///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
589///    the value of the source vector. The upper 256 bits are set to zero.
590///
591/// \headerfile <x86intrin.h>
592///
593/// This intrinsic has no corresponding instruction.
594///
595/// \param __a
596///    A 256-bit vector of [8 x float].
597/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
598///    contain the value of the parameter. The upper 256 bits are set to zero.
599static __inline __m512 __DEFAULT_FN_ATTRS
600_mm512_zextps256_ps512(__m256 __a)
601{
602  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
603}
604
605/// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
606///    The lower 128 bits contain the value of the source vector. The upper
607///    384 bits are set to zero.
608///
609/// \headerfile <x86intrin.h>
610///
611/// This intrinsic has no corresponding instruction.
612///
613/// \param __a
614///    A 128-bit integer vector.
615/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
616///    the parameter. The upper 384 bits are set to zero.
617static __inline __m512i __DEFAULT_FN_ATTRS
618_mm512_zextsi128_si512(__m128i __a)
619{
620  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
621}
622
623/// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
624///    The lower 256 bits contain the value of the source vector. The upper
625///    256 bits are set to zero.
626///
627/// \headerfile <x86intrin.h>
628///
629/// This intrinsic has no corresponding instruction.
630///
631/// \param __a
632///    A 256-bit integer vector.
633/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
634///    the parameter. The upper 256 bits are set to zero.
635static __inline __m512i __DEFAULT_FN_ATTRS
636_mm512_zextsi256_si512(__m256i __a)
637{
638  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
639}
640
641/* Bitwise operators */
642static __inline__ __m512i __DEFAULT_FN_ATTRS
643_mm512_and_epi32(__m512i __a, __m512i __b)
644{
645  return (__m512i)((__v16su)__a & (__v16su)__b);
646}
647
648static __inline__ __m512i __DEFAULT_FN_ATTRS
649_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
650{
651  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
652                (__v16si) _mm512_and_epi32(__a, __b),
653                (__v16si) __src);
654}
655
656static __inline__ __m512i __DEFAULT_FN_ATTRS
657_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
658{
659  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
660                                         __k, __a, __b);
661}
662
663static __inline__ __m512i __DEFAULT_FN_ATTRS
664_mm512_and_epi64(__m512i __a, __m512i __b)
665{
666  return (__m512i)((__v8du)__a & (__v8du)__b);
667}
668
669static __inline__ __m512i __DEFAULT_FN_ATTRS
670_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
671{
672    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
673                (__v8di) _mm512_and_epi64(__a, __b),
674                (__v8di) __src);
675}
676
677static __inline__ __m512i __DEFAULT_FN_ATTRS
678_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
679{
680  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
681                                         __k, __a, __b);
682}
683
684static __inline__ __m512i __DEFAULT_FN_ATTRS
685_mm512_andnot_si512 (__m512i __A, __m512i __B)
686{
687  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
688}
689
690static __inline__ __m512i __DEFAULT_FN_ATTRS
691_mm512_andnot_epi32 (__m512i __A, __m512i __B)
692{
693  return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
694}
695
696static __inline__ __m512i __DEFAULT_FN_ATTRS
697_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
698{
699  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
700                                         (__v16si)_mm512_andnot_epi32(__A, __B),
701                                         (__v16si)__W);
702}
703
704static __inline__ __m512i __DEFAULT_FN_ATTRS
705_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
706{
707  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
708                                           __U, __A, __B);
709}
710
711static __inline__ __m512i __DEFAULT_FN_ATTRS
712_mm512_andnot_epi64(__m512i __A, __m512i __B)
713{
714  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
715}
716
717static __inline__ __m512i __DEFAULT_FN_ATTRS
718_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
719{
720  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
721                                          (__v8di)_mm512_andnot_epi64(__A, __B),
722                                          (__v8di)__W);
723}
724
725static __inline__ __m512i __DEFAULT_FN_ATTRS
726_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
727{
728  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
729                                           __U, __A, __B);
730}
731
732static __inline__ __m512i __DEFAULT_FN_ATTRS
733_mm512_or_epi32(__m512i __a, __m512i __b)
734{
735  return (__m512i)((__v16su)__a | (__v16su)__b);
736}
737
738static __inline__ __m512i __DEFAULT_FN_ATTRS
739_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
740{
741  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
742                                             (__v16si)_mm512_or_epi32(__a, __b),
743                                             (__v16si)__src);
744}
745
746static __inline__ __m512i __DEFAULT_FN_ATTRS
747_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
748{
749  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
750}
751
752static __inline__ __m512i __DEFAULT_FN_ATTRS
753_mm512_or_epi64(__m512i __a, __m512i __b)
754{
755  return (__m512i)((__v8du)__a | (__v8du)__b);
756}
757
758static __inline__ __m512i __DEFAULT_FN_ATTRS
759_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
760{
761  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
762                                             (__v8di)_mm512_or_epi64(__a, __b),
763                                             (__v8di)__src);
764}
765
766static __inline__ __m512i __DEFAULT_FN_ATTRS
767_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
768{
769  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
770}
771
772static __inline__ __m512i __DEFAULT_FN_ATTRS
773_mm512_xor_epi32(__m512i __a, __m512i __b)
774{
775  return (__m512i)((__v16su)__a ^ (__v16su)__b);
776}
777
778static __inline__ __m512i __DEFAULT_FN_ATTRS
779_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
780{
781  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
782                                            (__v16si)_mm512_xor_epi32(__a, __b),
783                                            (__v16si)__src);
784}
785
786static __inline__ __m512i __DEFAULT_FN_ATTRS
787_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
788{
789  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
790}
791
792static __inline__ __m512i __DEFAULT_FN_ATTRS
793_mm512_xor_epi64(__m512i __a, __m512i __b)
794{
795  return (__m512i)((__v8du)__a ^ (__v8du)__b);
796}
797
798static __inline__ __m512i __DEFAULT_FN_ATTRS
799_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
800{
801  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
802                                             (__v8di)_mm512_xor_epi64(__a, __b),
803                                             (__v8di)__src);
804}
805
806static __inline__ __m512i __DEFAULT_FN_ATTRS
807_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
808{
809  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
810}
811
812static __inline__ __m512i __DEFAULT_FN_ATTRS
813_mm512_and_si512(__m512i __a, __m512i __b)
814{
815  return (__m512i)((__v8du)__a & (__v8du)__b);
816}
817
818static __inline__ __m512i __DEFAULT_FN_ATTRS
819_mm512_or_si512(__m512i __a, __m512i __b)
820{
821  return (__m512i)((__v8du)__a | (__v8du)__b);
822}
823
824static __inline__ __m512i __DEFAULT_FN_ATTRS
825_mm512_xor_si512(__m512i __a, __m512i __b)
826{
827  return (__m512i)((__v8du)__a ^ (__v8du)__b);
828}
829
830/* Arithmetic */
831
832static __inline __m512d __DEFAULT_FN_ATTRS
833_mm512_add_pd(__m512d __a, __m512d __b)
834{
835  return (__m512d)((__v8df)__a + (__v8df)__b);
836}
837
838static __inline __m512 __DEFAULT_FN_ATTRS
839_mm512_add_ps(__m512 __a, __m512 __b)
840{
841  return (__m512)((__v16sf)__a + (__v16sf)__b);
842}
843
844static __inline __m512d __DEFAULT_FN_ATTRS
845_mm512_mul_pd(__m512d __a, __m512d __b)
846{
847  return (__m512d)((__v8df)__a * (__v8df)__b);
848}
849
850static __inline __m512 __DEFAULT_FN_ATTRS
851_mm512_mul_ps(__m512 __a, __m512 __b)
852{
853  return (__m512)((__v16sf)__a * (__v16sf)__b);
854}
855
856static __inline __m512d __DEFAULT_FN_ATTRS
857_mm512_sub_pd(__m512d __a, __m512d __b)
858{
859  return (__m512d)((__v8df)__a - (__v8df)__b);
860}
861
862static __inline __m512 __DEFAULT_FN_ATTRS
863_mm512_sub_ps(__m512 __a, __m512 __b)
864{
865  return (__m512)((__v16sf)__a - (__v16sf)__b);
866}
867
868static __inline__ __m512i __DEFAULT_FN_ATTRS
869_mm512_add_epi64 (__m512i __A, __m512i __B)
870{
871  return (__m512i) ((__v8du) __A + (__v8du) __B);
872}
873
874static __inline__ __m512i __DEFAULT_FN_ATTRS
875_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
876{
877  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
878                                             (__v8di)_mm512_add_epi64(__A, __B),
879                                             (__v8di)__W);
880}
881
882static __inline__ __m512i __DEFAULT_FN_ATTRS
883_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
884{
885  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
886                                             (__v8di)_mm512_add_epi64(__A, __B),
887                                             (__v8di)_mm512_setzero_si512());
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS
891_mm512_sub_epi64 (__m512i __A, __m512i __B)
892{
893  return (__m512i) ((__v8du) __A - (__v8du) __B);
894}
895
896static __inline__ __m512i __DEFAULT_FN_ATTRS
897_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
898{
899  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
900                                             (__v8di)_mm512_sub_epi64(__A, __B),
901                                             (__v8di)__W);
902}
903
904static __inline__ __m512i __DEFAULT_FN_ATTRS
905_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
906{
907  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
908                                             (__v8di)_mm512_sub_epi64(__A, __B),
909                                             (__v8di)_mm512_setzero_si512());
910}
911
912static __inline__ __m512i __DEFAULT_FN_ATTRS
913_mm512_add_epi32 (__m512i __A, __m512i __B)
914{
915  return (__m512i) ((__v16su) __A + (__v16su) __B);
916}
917
918static __inline__ __m512i __DEFAULT_FN_ATTRS
919_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
920{
921  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
922                                             (__v16si)_mm512_add_epi32(__A, __B),
923                                             (__v16si)__W);
924}
925
926static __inline__ __m512i __DEFAULT_FN_ATTRS
927_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
928{
929  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
930                                             (__v16si)_mm512_add_epi32(__A, __B),
931                                             (__v16si)_mm512_setzero_si512());
932}
933
934static __inline__ __m512i __DEFAULT_FN_ATTRS
935_mm512_sub_epi32 (__m512i __A, __m512i __B)
936{
937  return (__m512i) ((__v16su) __A - (__v16su) __B);
938}
939
940static __inline__ __m512i __DEFAULT_FN_ATTRS
941_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
942{
943  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
944                                             (__v16si)_mm512_sub_epi32(__A, __B),
945                                             (__v16si)__W);
946}
947
948static __inline__ __m512i __DEFAULT_FN_ATTRS
949_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
950{
951  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
952                                             (__v16si)_mm512_sub_epi32(__A, __B),
953                                             (__v16si)_mm512_setzero_si512());
954}
955
956#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
957  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
958                                        (__v8df)(__m512d)(B), \
959                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
960                                        (int)(R)); })
961
962#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
963  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
964                                        (__v8df)(__m512d)(B), \
965                                        (__v8df)_mm512_setzero_pd(), \
966                                        (__mmask8)(U), (int)(R)); })
967
968#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
969  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
970                                        (__v8df)(__m512d)(B), \
971                                        (__v8df)_mm512_undefined_pd(), \
972                                        (__mmask8)-1, (int)(R)); })
973
974static  __inline__ __m512d __DEFAULT_FN_ATTRS
975_mm512_max_pd(__m512d __A, __m512d __B)
976{
977  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
978             (__v8df) __B,
979             (__v8df)
980             _mm512_setzero_pd (),
981             (__mmask8) -1,
982             _MM_FROUND_CUR_DIRECTION);
983}
984
985static __inline__ __m512d __DEFAULT_FN_ATTRS
986_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
987{
988  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
989                  (__v8df) __B,
990                  (__v8df) __W,
991                  (__mmask8) __U,
992                  _MM_FROUND_CUR_DIRECTION);
993}
994
995static __inline__ __m512d __DEFAULT_FN_ATTRS
996_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
997{
998  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
999                  (__v8df) __B,
1000                  (__v8df)
1001                  _mm512_setzero_pd (),
1002                  (__mmask8) __U,
1003                  _MM_FROUND_CUR_DIRECTION);
1004}
1005
1006#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
1007  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1008                                       (__v16sf)(__m512)(B), \
1009                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
1010                                       (int)(R)); })
1011
1012#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1013  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1014                                       (__v16sf)(__m512)(B), \
1015                                       (__v16sf)_mm512_setzero_ps(), \
1016                                       (__mmask16)(U), (int)(R)); })
1017
1018#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1019  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1020                                       (__v16sf)(__m512)(B), \
1021                                       (__v16sf)_mm512_undefined_ps(), \
1022                                       (__mmask16)-1, (int)(R)); })
1023
1024static  __inline__ __m512 __DEFAULT_FN_ATTRS
1025_mm512_max_ps(__m512 __A, __m512 __B)
1026{
1027  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1028            (__v16sf) __B,
1029            (__v16sf)
1030            _mm512_setzero_ps (),
1031            (__mmask16) -1,
1032            _MM_FROUND_CUR_DIRECTION);
1033}
1034
1035static __inline__ __m512 __DEFAULT_FN_ATTRS
1036_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1037{
1038  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1039                 (__v16sf) __B,
1040                 (__v16sf) __W,
1041                 (__mmask16) __U,
1042                 _MM_FROUND_CUR_DIRECTION);
1043}
1044
1045static __inline__ __m512 __DEFAULT_FN_ATTRS
1046_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1047{
1048  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1049                 (__v16sf) __B,
1050                 (__v16sf)
1051                 _mm512_setzero_ps (),
1052                 (__mmask16) __U,
1053                 _MM_FROUND_CUR_DIRECTION);
1054}
1055
1056static __inline__ __m128 __DEFAULT_FN_ATTRS
1057_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1058  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1059                (__v4sf) __B,
1060                (__v4sf) __W,
1061                (__mmask8) __U,
1062                _MM_FROUND_CUR_DIRECTION);
1063}
1064
1065static __inline__ __m128 __DEFAULT_FN_ATTRS
1066_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1067  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1068                (__v4sf) __B,
1069                (__v4sf)  _mm_setzero_ps (),
1070                (__mmask8) __U,
1071                _MM_FROUND_CUR_DIRECTION);
1072}
1073
1074#define _mm_max_round_ss(A, B, R) __extension__ ({ \
1075  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1076                                          (__v4sf)(__m128)(B), \
1077                                          (__v4sf)_mm_setzero_ps(), \
1078                                          (__mmask8)-1, (int)(R)); })
1079
1080#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1081  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1082                                          (__v4sf)(__m128)(B), \
1083                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1084                                          (int)(R)); })
1085
1086#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1087  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1088                                          (__v4sf)(__m128)(B), \
1089                                          (__v4sf)_mm_setzero_ps(), \
1090                                          (__mmask8)(U), (int)(R)); })
1091
1092static __inline__ __m128d __DEFAULT_FN_ATTRS
1093_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1094  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1095                (__v2df) __B,
1096                (__v2df) __W,
1097                (__mmask8) __U,
1098                _MM_FROUND_CUR_DIRECTION);
1099}
1100
1101static __inline__ __m128d __DEFAULT_FN_ATTRS
1102_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1103  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1104                (__v2df) __B,
1105                (__v2df)  _mm_setzero_pd (),
1106                (__mmask8) __U,
1107                _MM_FROUND_CUR_DIRECTION);
1108}
1109
1110#define _mm_max_round_sd(A, B, R) __extension__ ({ \
1111  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1112                                           (__v2df)(__m128d)(B), \
1113                                           (__v2df)_mm_setzero_pd(), \
1114                                           (__mmask8)-1, (int)(R)); })
1115
1116#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1117  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1118                                           (__v2df)(__m128d)(B), \
1119                                           (__v2df)(__m128d)(W), \
1120                                           (__mmask8)(U), (int)(R)); })
1121
1122#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1123  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1124                                           (__v2df)(__m128d)(B), \
1125                                           (__v2df)_mm_setzero_pd(), \
1126                                           (__mmask8)(U), (int)(R)); })
1127
1128static __inline __m512i
1129__DEFAULT_FN_ATTRS
1130_mm512_max_epi32(__m512i __A, __m512i __B)
1131{
1132  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1133              (__v16si) __B,
1134              (__v16si)
1135              _mm512_setzero_si512 (),
1136              (__mmask16) -1);
1137}
1138
1139static __inline__ __m512i __DEFAULT_FN_ATTRS
1140_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1141{
1142  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1143                   (__v16si) __B,
1144                   (__v16si) __W, __M);
1145}
1146
1147static __inline__ __m512i __DEFAULT_FN_ATTRS
1148_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1149{
1150  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1151                   (__v16si) __B,
1152                   (__v16si)
1153                   _mm512_setzero_si512 (),
1154                   __M);
1155}
1156
1157static __inline __m512i __DEFAULT_FN_ATTRS
1158_mm512_max_epu32(__m512i __A, __m512i __B)
1159{
1160  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1161              (__v16si) __B,
1162              (__v16si)
1163              _mm512_setzero_si512 (),
1164              (__mmask16) -1);
1165}
1166
1167static __inline__ __m512i __DEFAULT_FN_ATTRS
1168_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1169{
1170  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1171                   (__v16si) __B,
1172                   (__v16si) __W, __M);
1173}
1174
1175static __inline__ __m512i __DEFAULT_FN_ATTRS
1176_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1177{
1178  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1179                   (__v16si) __B,
1180                   (__v16si)
1181                   _mm512_setzero_si512 (),
1182                   __M);
1183}
1184
1185static __inline __m512i __DEFAULT_FN_ATTRS
1186_mm512_max_epi64(__m512i __A, __m512i __B)
1187{
1188  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1189              (__v8di) __B,
1190              (__v8di)
1191              _mm512_setzero_si512 (),
1192              (__mmask8) -1);
1193}
1194
1195static __inline__ __m512i __DEFAULT_FN_ATTRS
1196_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1197{
1198  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1199                   (__v8di) __B,
1200                   (__v8di) __W, __M);
1201}
1202
1203static __inline__ __m512i __DEFAULT_FN_ATTRS
1204_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1205{
1206  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1207                   (__v8di) __B,
1208                   (__v8di)
1209                   _mm512_setzero_si512 (),
1210                   __M);
1211}
1212
1213static __inline __m512i __DEFAULT_FN_ATTRS
1214_mm512_max_epu64(__m512i __A, __m512i __B)
1215{
1216  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1217              (__v8di) __B,
1218              (__v8di)
1219              _mm512_setzero_si512 (),
1220              (__mmask8) -1);
1221}
1222
1223static __inline__ __m512i __DEFAULT_FN_ATTRS
1224_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1225{
1226  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1227                   (__v8di) __B,
1228                   (__v8di) __W, __M);
1229}
1230
1231static __inline__ __m512i __DEFAULT_FN_ATTRS
1232_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1233{
1234  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1235                   (__v8di) __B,
1236                   (__v8di)
1237                   _mm512_setzero_si512 (),
1238                   __M);
1239}
1240
1241#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1242  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1243                                        (__v8df)(__m512d)(B), \
1244                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
1245                                        (int)(R)); })
1246
1247#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1248  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1249                                        (__v8df)(__m512d)(B), \
1250                                        (__v8df)_mm512_setzero_pd(), \
1251                                        (__mmask8)(U), (int)(R)); })
1252
1253#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1254  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1255                                        (__v8df)(__m512d)(B), \
1256                                        (__v8df)_mm512_undefined_pd(), \
1257                                        (__mmask8)-1, (int)(R)); })
1258
1259static  __inline__ __m512d __DEFAULT_FN_ATTRS
1260_mm512_min_pd(__m512d __A, __m512d __B)
1261{
1262  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1263             (__v8df) __B,
1264             (__v8df)
1265             _mm512_setzero_pd (),
1266             (__mmask8) -1,
1267             _MM_FROUND_CUR_DIRECTION);
1268}
1269
1270static __inline__ __m512d __DEFAULT_FN_ATTRS
1271_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1272{
1273  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1274                  (__v8df) __B,
1275                  (__v8df) __W,
1276                  (__mmask8) __U,
1277                  _MM_FROUND_CUR_DIRECTION);
1278}
1279
1280#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1281  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1282                                       (__v16sf)(__m512)(B), \
1283                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
1284                                       (int)(R)); })
1285
1286#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1287  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1288                                       (__v16sf)(__m512)(B), \
1289                                       (__v16sf)_mm512_setzero_ps(), \
1290                                       (__mmask16)(U), (int)(R)); })
1291
1292#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1293  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1294                                       (__v16sf)(__m512)(B), \
1295                                       (__v16sf)_mm512_undefined_ps(), \
1296                                       (__mmask16)-1, (int)(R)); })
1297
1298static __inline__ __m512d __DEFAULT_FN_ATTRS
1299_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1300{
1301  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1302                  (__v8df) __B,
1303                  (__v8df)
1304                  _mm512_setzero_pd (),
1305                  (__mmask8) __U,
1306                  _MM_FROUND_CUR_DIRECTION);
1307}
1308
1309static  __inline__ __m512 __DEFAULT_FN_ATTRS
1310_mm512_min_ps(__m512 __A, __m512 __B)
1311{
1312  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1313            (__v16sf) __B,
1314            (__v16sf)
1315            _mm512_setzero_ps (),
1316            (__mmask16) -1,
1317            _MM_FROUND_CUR_DIRECTION);
1318}
1319
1320static __inline__ __m512 __DEFAULT_FN_ATTRS
1321_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1322{
1323  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1324                 (__v16sf) __B,
1325                 (__v16sf) __W,
1326                 (__mmask16) __U,
1327                 _MM_FROUND_CUR_DIRECTION);
1328}
1329
1330static __inline__ __m512 __DEFAULT_FN_ATTRS
1331_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1332{
1333  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1334                 (__v16sf) __B,
1335                 (__v16sf)
1336                 _mm512_setzero_ps (),
1337                 (__mmask16) __U,
1338                 _MM_FROUND_CUR_DIRECTION);
1339}
1340
1341static __inline__ __m128 __DEFAULT_FN_ATTRS
1342_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1343  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1344                (__v4sf) __B,
1345                (__v4sf) __W,
1346                (__mmask8) __U,
1347                _MM_FROUND_CUR_DIRECTION);
1348}
1349
1350static __inline__ __m128 __DEFAULT_FN_ATTRS
1351_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1352  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1353                (__v4sf) __B,
1354                (__v4sf)  _mm_setzero_ps (),
1355                (__mmask8) __U,
1356                _MM_FROUND_CUR_DIRECTION);
1357}
1358
1359#define _mm_min_round_ss(A, B, R) __extension__ ({ \
1360  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1361                                          (__v4sf)(__m128)(B), \
1362                                          (__v4sf)_mm_setzero_ps(), \
1363                                          (__mmask8)-1, (int)(R)); })
1364
1365#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1366  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1367                                          (__v4sf)(__m128)(B), \
1368                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1369                                          (int)(R)); })
1370
1371#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1372  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1373                                          (__v4sf)(__m128)(B), \
1374                                          (__v4sf)_mm_setzero_ps(), \
1375                                          (__mmask8)(U), (int)(R)); })
1376
1377static __inline__ __m128d __DEFAULT_FN_ATTRS
1378_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1379  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1380                (__v2df) __B,
1381                (__v2df) __W,
1382                (__mmask8) __U,
1383                _MM_FROUND_CUR_DIRECTION);
1384}
1385
1386static __inline__ __m128d __DEFAULT_FN_ATTRS
1387_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1388  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1389                (__v2df) __B,
1390                (__v2df)  _mm_setzero_pd (),
1391                (__mmask8) __U,
1392                _MM_FROUND_CUR_DIRECTION);
1393}
1394
1395#define _mm_min_round_sd(A, B, R) __extension__ ({ \
1396  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1397                                           (__v2df)(__m128d)(B), \
1398                                           (__v2df)_mm_setzero_pd(), \
1399                                           (__mmask8)-1, (int)(R)); })
1400
1401#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1402  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1403                                           (__v2df)(__m128d)(B), \
1404                                           (__v2df)(__m128d)(W), \
1405                                           (__mmask8)(U), (int)(R)); })
1406
1407#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1408  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1409                                           (__v2df)(__m128d)(B), \
1410                                           (__v2df)_mm_setzero_pd(), \
1411                                           (__mmask8)(U), (int)(R)); })
1412
1413static __inline __m512i
1414__DEFAULT_FN_ATTRS
1415_mm512_min_epi32(__m512i __A, __m512i __B)
1416{
1417  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1418              (__v16si) __B,
1419              (__v16si)
1420              _mm512_setzero_si512 (),
1421              (__mmask16) -1);
1422}
1423
1424static __inline__ __m512i __DEFAULT_FN_ATTRS
1425_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1426{
1427  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1428                   (__v16si) __B,
1429                   (__v16si) __W, __M);
1430}
1431
1432static __inline__ __m512i __DEFAULT_FN_ATTRS
1433_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1434{
1435  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1436                   (__v16si) __B,
1437                   (__v16si)
1438                   _mm512_setzero_si512 (),
1439                   __M);
1440}
1441
1442static __inline __m512i __DEFAULT_FN_ATTRS
1443_mm512_min_epu32(__m512i __A, __m512i __B)
1444{
1445  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1446              (__v16si) __B,
1447              (__v16si)
1448              _mm512_setzero_si512 (),
1449              (__mmask16) -1);
1450}
1451
1452static __inline__ __m512i __DEFAULT_FN_ATTRS
1453_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1454{
1455  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1456                   (__v16si) __B,
1457                   (__v16si) __W, __M);
1458}
1459
1460static __inline__ __m512i __DEFAULT_FN_ATTRS
1461_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1462{
1463  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1464                   (__v16si) __B,
1465                   (__v16si)
1466                   _mm512_setzero_si512 (),
1467                   __M);
1468}
1469
1470static __inline __m512i __DEFAULT_FN_ATTRS
1471_mm512_min_epi64(__m512i __A, __m512i __B)
1472{
1473  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1474              (__v8di) __B,
1475              (__v8di)
1476              _mm512_setzero_si512 (),
1477              (__mmask8) -1);
1478}
1479
1480static __inline__ __m512i __DEFAULT_FN_ATTRS
1481_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1482{
1483  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1484                   (__v8di) __B,
1485                   (__v8di) __W, __M);
1486}
1487
1488static __inline__ __m512i __DEFAULT_FN_ATTRS
1489_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1490{
1491  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1492                   (__v8di) __B,
1493                   (__v8di)
1494                   _mm512_setzero_si512 (),
1495                   __M);
1496}
1497
1498static __inline __m512i __DEFAULT_FN_ATTRS
1499_mm512_min_epu64(__m512i __A, __m512i __B)
1500{
1501  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1502              (__v8di) __B,
1503              (__v8di)
1504              _mm512_setzero_si512 (),
1505              (__mmask8) -1);
1506}
1507
1508static __inline__ __m512i __DEFAULT_FN_ATTRS
1509_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1510{
1511  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1512                   (__v8di) __B,
1513                   (__v8di) __W, __M);
1514}
1515
1516static __inline__ __m512i __DEFAULT_FN_ATTRS
1517_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1518{
1519  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1520                   (__v8di) __B,
1521                   (__v8di)
1522                   _mm512_setzero_si512 (),
1523                   __M);
1524}
1525
1526static __inline __m512i __DEFAULT_FN_ATTRS
1527_mm512_mul_epi32(__m512i __X, __m512i __Y)
1528{
1529  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1530}
1531
1532static __inline __m512i __DEFAULT_FN_ATTRS
1533_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1534{
1535  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1536                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1537                                             (__v8di)__W);
1538}
1539
1540static __inline __m512i __DEFAULT_FN_ATTRS
1541_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1542{
1543  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1544                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1545                                             (__v8di)_mm512_setzero_si512 ());
1546}
1547
1548static __inline __m512i __DEFAULT_FN_ATTRS
1549_mm512_mul_epu32(__m512i __X, __m512i __Y)
1550{
1551  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1552}
1553
1554static __inline __m512i __DEFAULT_FN_ATTRS
1555_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1556{
1557  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1558                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1559                                             (__v8di)__W);
1560}
1561
1562static __inline __m512i __DEFAULT_FN_ATTRS
1563_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1564{
1565  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1566                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1567                                             (__v8di)_mm512_setzero_si512 ());
1568}
1569
1570static __inline __m512i __DEFAULT_FN_ATTRS
1571_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1572{
1573  return (__m512i) ((__v16su) __A * (__v16su) __B);
1574}
1575
1576static __inline __m512i __DEFAULT_FN_ATTRS
1577_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1578{
1579  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1580                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1581                                             (__v16si)_mm512_setzero_si512());
1582}
1583
1584static __inline __m512i __DEFAULT_FN_ATTRS
1585_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1586{
1587  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1588                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1589                                             (__v16si)__W);
1590}
1591
1592#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1593  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1594                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1595                                         (int)(R)); })
1596
1597#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1598  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1599                                         (__v8df)_mm512_setzero_pd(), \
1600                                         (__mmask8)(U), (int)(R)); })
1601
1602#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1603  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1604                                         (__v8df)_mm512_undefined_pd(), \
1605                                         (__mmask8)-1, (int)(R)); })
1606
1607static  __inline__ __m512d __DEFAULT_FN_ATTRS
1608_mm512_sqrt_pd(__m512d __a)
1609{
1610  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1611                                                (__v8df) _mm512_setzero_pd (),
1612                                                (__mmask8) -1,
1613                                                _MM_FROUND_CUR_DIRECTION);
1614}
1615
1616static __inline__ __m512d __DEFAULT_FN_ATTRS
1617_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1618{
1619  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1620                   (__v8df) __W,
1621                   (__mmask8) __U,
1622                   _MM_FROUND_CUR_DIRECTION);
1623}
1624
1625static __inline__ __m512d __DEFAULT_FN_ATTRS
1626_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1627{
1628  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1629                   (__v8df)
1630                   _mm512_setzero_pd (),
1631                   (__mmask8) __U,
1632                   _MM_FROUND_CUR_DIRECTION);
1633}
1634
1635#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1636  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1637                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1638                                        (int)(R)); })
1639
1640#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1641  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1642                                        (__v16sf)_mm512_setzero_ps(), \
1643                                        (__mmask16)(U), (int)(R)); })
1644
1645#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1646  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1647                                        (__v16sf)_mm512_undefined_ps(), \
1648                                        (__mmask16)-1, (int)(R)); })
1649
1650static  __inline__ __m512 __DEFAULT_FN_ATTRS
1651_mm512_sqrt_ps(__m512 __a)
1652{
1653  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1654                                               (__v16sf) _mm512_setzero_ps (),
1655                                               (__mmask16) -1,
1656                                               _MM_FROUND_CUR_DIRECTION);
1657}
1658
1659static  __inline__ __m512 __DEFAULT_FN_ATTRS
1660_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1661{
1662  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1663                                               (__v16sf) __W,
1664                                               (__mmask16) __U,
1665                                               _MM_FROUND_CUR_DIRECTION);
1666}
1667
1668static  __inline__ __m512 __DEFAULT_FN_ATTRS
1669_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1670{
1671  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1672                                               (__v16sf) _mm512_setzero_ps (),
1673                                               (__mmask16) __U,
1674                                               _MM_FROUND_CUR_DIRECTION);
1675}
1676
1677static  __inline__ __m512d __DEFAULT_FN_ATTRS
1678_mm512_rsqrt14_pd(__m512d __A)
1679{
1680  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1681                 (__v8df)
1682                 _mm512_setzero_pd (),
1683                 (__mmask8) -1);}
1684
1685static __inline__ __m512d __DEFAULT_FN_ATTRS
1686_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1687{
1688  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1689                  (__v8df) __W,
1690                  (__mmask8) __U);
1691}
1692
1693static __inline__ __m512d __DEFAULT_FN_ATTRS
1694_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1695{
1696  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1697                  (__v8df)
1698                  _mm512_setzero_pd (),
1699                  (__mmask8) __U);
1700}
1701
1702static  __inline__ __m512 __DEFAULT_FN_ATTRS
1703_mm512_rsqrt14_ps(__m512 __A)
1704{
1705  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1706                (__v16sf)
1707                _mm512_setzero_ps (),
1708                (__mmask16) -1);
1709}
1710
1711static __inline__ __m512 __DEFAULT_FN_ATTRS
1712_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1713{
1714  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1715                 (__v16sf) __W,
1716                 (__mmask16) __U);
1717}
1718
1719static __inline__ __m512 __DEFAULT_FN_ATTRS
1720_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1721{
1722  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1723                 (__v16sf)
1724                 _mm512_setzero_ps (),
1725                 (__mmask16) __U);
1726}
1727
1728static  __inline__ __m128 __DEFAULT_FN_ATTRS
1729_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1730{
1731  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1732             (__v4sf) __B,
1733             (__v4sf)
1734             _mm_setzero_ps (),
1735             (__mmask8) -1);
1736}
1737
1738static __inline__ __m128 __DEFAULT_FN_ATTRS
1739_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1740{
1741 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1742          (__v4sf) __B,
1743          (__v4sf) __W,
1744          (__mmask8) __U);
1745}
1746
1747static __inline__ __m128 __DEFAULT_FN_ATTRS
1748_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1749{
1750 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1751          (__v4sf) __B,
1752          (__v4sf) _mm_setzero_ps (),
1753          (__mmask8) __U);
1754}
1755
1756static  __inline__ __m128d __DEFAULT_FN_ATTRS
1757_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1758{
1759  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1760              (__v2df) __B,
1761              (__v2df)
1762              _mm_setzero_pd (),
1763              (__mmask8) -1);
1764}
1765
1766static __inline__ __m128d __DEFAULT_FN_ATTRS
1767_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1768{
1769 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1770          (__v2df) __B,
1771          (__v2df) __W,
1772          (__mmask8) __U);
1773}
1774
1775static __inline__ __m128d __DEFAULT_FN_ATTRS
1776_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1777{
1778 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1779          (__v2df) __B,
1780          (__v2df) _mm_setzero_pd (),
1781          (__mmask8) __U);
1782}
1783
1784static  __inline__ __m512d __DEFAULT_FN_ATTRS
1785_mm512_rcp14_pd(__m512d __A)
1786{
1787  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1788               (__v8df)
1789               _mm512_setzero_pd (),
1790               (__mmask8) -1);
1791}
1792
1793static __inline__ __m512d __DEFAULT_FN_ATTRS
1794_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1795{
1796  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1797                (__v8df) __W,
1798                (__mmask8) __U);
1799}
1800
1801static __inline__ __m512d __DEFAULT_FN_ATTRS
1802_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1803{
1804  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1805                (__v8df)
1806                _mm512_setzero_pd (),
1807                (__mmask8) __U);
1808}
1809
1810static  __inline__ __m512 __DEFAULT_FN_ATTRS
1811_mm512_rcp14_ps(__m512 __A)
1812{
1813  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1814              (__v16sf)
1815              _mm512_setzero_ps (),
1816              (__mmask16) -1);
1817}
1818
1819static __inline__ __m512 __DEFAULT_FN_ATTRS
1820_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1821{
1822  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1823                   (__v16sf) __W,
1824                   (__mmask16) __U);
1825}
1826
1827static __inline__ __m512 __DEFAULT_FN_ATTRS
1828_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1829{
1830  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1831                   (__v16sf)
1832                   _mm512_setzero_ps (),
1833                   (__mmask16) __U);
1834}
1835
1836static  __inline__ __m128 __DEFAULT_FN_ATTRS
1837_mm_rcp14_ss(__m128 __A, __m128 __B)
1838{
1839  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1840                 (__v4sf) __B,
1841                 (__v4sf)
1842                 _mm_setzero_ps (),
1843                 (__mmask8) -1);
1844}
1845
1846static __inline__ __m128 __DEFAULT_FN_ATTRS
1847_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1848{
1849 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1850          (__v4sf) __B,
1851          (__v4sf) __W,
1852          (__mmask8) __U);
1853}
1854
1855static __inline__ __m128 __DEFAULT_FN_ATTRS
1856_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1857{
1858 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1859          (__v4sf) __B,
1860          (__v4sf) _mm_setzero_ps (),
1861          (__mmask8) __U);
1862}
1863
1864static  __inline__ __m128d __DEFAULT_FN_ATTRS
1865_mm_rcp14_sd(__m128d __A, __m128d __B)
1866{
1867  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1868            (__v2df) __B,
1869            (__v2df)
1870            _mm_setzero_pd (),
1871            (__mmask8) -1);
1872}
1873
1874static __inline__ __m128d __DEFAULT_FN_ATTRS
1875_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1876{
1877 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1878          (__v2df) __B,
1879          (__v2df) __W,
1880          (__mmask8) __U);
1881}
1882
1883static __inline__ __m128d __DEFAULT_FN_ATTRS
1884_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1885{
1886 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1887          (__v2df) __B,
1888          (__v2df) _mm_setzero_pd (),
1889          (__mmask8) __U);
1890}
1891
1892static __inline __m512 __DEFAULT_FN_ATTRS
1893_mm512_floor_ps(__m512 __A)
1894{
1895  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1896                                                  _MM_FROUND_FLOOR,
1897                                                  (__v16sf) __A, -1,
1898                                                  _MM_FROUND_CUR_DIRECTION);
1899}
1900
1901static __inline__ __m512 __DEFAULT_FN_ATTRS
1902_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1903{
1904  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1905                   _MM_FROUND_FLOOR,
1906                   (__v16sf) __W, __U,
1907                   _MM_FROUND_CUR_DIRECTION);
1908}
1909
1910static __inline __m512d __DEFAULT_FN_ATTRS
1911_mm512_floor_pd(__m512d __A)
1912{
1913  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1914                                                   _MM_FROUND_FLOOR,
1915                                                   (__v8df) __A, -1,
1916                                                   _MM_FROUND_CUR_DIRECTION);
1917}
1918
1919static __inline__ __m512d __DEFAULT_FN_ATTRS
1920_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1921{
1922  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1923                _MM_FROUND_FLOOR,
1924                (__v8df) __W, __U,
1925                _MM_FROUND_CUR_DIRECTION);
1926}
1927
1928static __inline__ __m512 __DEFAULT_FN_ATTRS
1929_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1930{
1931  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1932                   _MM_FROUND_CEIL,
1933                   (__v16sf) __W, __U,
1934                   _MM_FROUND_CUR_DIRECTION);
1935}
1936
1937static __inline __m512 __DEFAULT_FN_ATTRS
1938_mm512_ceil_ps(__m512 __A)
1939{
1940  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1941                                                  _MM_FROUND_CEIL,
1942                                                  (__v16sf) __A, -1,
1943                                                  _MM_FROUND_CUR_DIRECTION);
1944}
1945
1946static __inline __m512d __DEFAULT_FN_ATTRS
1947_mm512_ceil_pd(__m512d __A)
1948{
1949  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1950                                                   _MM_FROUND_CEIL,
1951                                                   (__v8df) __A, -1,
1952                                                   _MM_FROUND_CUR_DIRECTION);
1953}
1954
1955static __inline__ __m512d __DEFAULT_FN_ATTRS
1956_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1957{
1958  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1959                _MM_FROUND_CEIL,
1960                (__v8df) __W, __U,
1961                _MM_FROUND_CUR_DIRECTION);
1962}
1963
1964static __inline __m512i __DEFAULT_FN_ATTRS
1965_mm512_abs_epi64(__m512i __A)
1966{
1967  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1968             (__v8di)
1969             _mm512_setzero_si512 (),
1970             (__mmask8) -1);
1971}
1972
1973static __inline__ __m512i __DEFAULT_FN_ATTRS
1974_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1975{
1976  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1977                  (__v8di) __W,
1978                  (__mmask8) __U);
1979}
1980
1981static __inline__ __m512i __DEFAULT_FN_ATTRS
1982_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1983{
1984  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1985                  (__v8di)
1986                  _mm512_setzero_si512 (),
1987                  (__mmask8) __U);
1988}
1989
1990static __inline __m512i __DEFAULT_FN_ATTRS
1991_mm512_abs_epi32(__m512i __A)
1992{
1993  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1994             (__v16si)
1995             _mm512_setzero_si512 (),
1996             (__mmask16) -1);
1997}
1998
1999static __inline__ __m512i __DEFAULT_FN_ATTRS
2000_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
2001{
2002  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2003                  (__v16si) __W,
2004                  (__mmask16) __U);
2005}
2006
2007static __inline__ __m512i __DEFAULT_FN_ATTRS
2008_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
2009{
2010  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2011                  (__v16si)
2012                  _mm512_setzero_si512 (),
2013                  (__mmask16) __U);
2014}
2015
2016static __inline__ __m128 __DEFAULT_FN_ATTRS
2017_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2018  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2019                (__v4sf) __B,
2020                (__v4sf) __W,
2021                (__mmask8) __U,
2022                _MM_FROUND_CUR_DIRECTION);
2023}
2024
2025static __inline__ __m128 __DEFAULT_FN_ATTRS
2026_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2027  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2028                (__v4sf) __B,
2029                (__v4sf)  _mm_setzero_ps (),
2030                (__mmask8) __U,
2031                _MM_FROUND_CUR_DIRECTION);
2032}
2033
2034#define _mm_add_round_ss(A, B, R) __extension__ ({ \
2035  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2036                                          (__v4sf)(__m128)(B), \
2037                                          (__v4sf)_mm_setzero_ps(), \
2038                                          (__mmask8)-1, (int)(R)); })
2039
2040#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
2041  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2042                                          (__v4sf)(__m128)(B), \
2043                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2044                                          (int)(R)); })
2045
2046#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
2047  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2048                                          (__v4sf)(__m128)(B), \
2049                                          (__v4sf)_mm_setzero_ps(), \
2050                                          (__mmask8)(U), (int)(R)); })
2051
2052static __inline__ __m128d __DEFAULT_FN_ATTRS
2053_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2054  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2055                (__v2df) __B,
2056                (__v2df) __W,
2057                (__mmask8) __U,
2058                _MM_FROUND_CUR_DIRECTION);
2059}
2060
2061static __inline__ __m128d __DEFAULT_FN_ATTRS
2062_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2063  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2064                (__v2df) __B,
2065                (__v2df)  _mm_setzero_pd (),
2066                (__mmask8) __U,
2067                _MM_FROUND_CUR_DIRECTION);
2068}
2069#define _mm_add_round_sd(A, B, R) __extension__ ({ \
2070  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2071                                           (__v2df)(__m128d)(B), \
2072                                           (__v2df)_mm_setzero_pd(), \
2073                                           (__mmask8)-1, (int)(R)); })
2074
2075#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2076  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2077                                           (__v2df)(__m128d)(B), \
2078                                           (__v2df)(__m128d)(W), \
2079                                           (__mmask8)(U), (int)(R)); })
2080
2081#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2082  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2083                                           (__v2df)(__m128d)(B), \
2084                                           (__v2df)_mm_setzero_pd(), \
2085                                           (__mmask8)(U), (int)(R)); })
2086
2087static __inline__ __m512d __DEFAULT_FN_ATTRS
2088_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2089  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2090                                              (__v8df)_mm512_add_pd(__A, __B),
2091                                              (__v8df)__W);
2092}
2093
2094static __inline__ __m512d __DEFAULT_FN_ATTRS
2095_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2096  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2097                                              (__v8df)_mm512_add_pd(__A, __B),
2098                                              (__v8df)_mm512_setzero_pd());
2099}
2100
2101static __inline__ __m512 __DEFAULT_FN_ATTRS
2102_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2103  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2104                                             (__v16sf)_mm512_add_ps(__A, __B),
2105                                             (__v16sf)__W);
2106}
2107
2108static __inline__ __m512 __DEFAULT_FN_ATTRS
2109_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2110  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2111                                             (__v16sf)_mm512_add_ps(__A, __B),
2112                                             (__v16sf)_mm512_setzero_ps());
2113}
2114
2115#define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2116  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2117                                        (__v8df)(__m512d)(B), \
2118                                        (__v8df)_mm512_setzero_pd(), \
2119                                        (__mmask8)-1, (int)(R)); })
2120
2121#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2122  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2123                                        (__v8df)(__m512d)(B), \
2124                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2125                                        (int)(R)); })
2126
2127#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2128  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2129                                        (__v8df)(__m512d)(B), \
2130                                        (__v8df)_mm512_setzero_pd(), \
2131                                        (__mmask8)(U), (int)(R)); })
2132
2133#define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2134  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2135                                       (__v16sf)(__m512)(B), \
2136                                       (__v16sf)_mm512_setzero_ps(), \
2137                                       (__mmask16)-1, (int)(R)); })
2138
2139#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2140  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2141                                       (__v16sf)(__m512)(B), \
2142                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2143                                       (int)(R)); })
2144
2145#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2146  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2147                                       (__v16sf)(__m512)(B), \
2148                                       (__v16sf)_mm512_setzero_ps(), \
2149                                       (__mmask16)(U), (int)(R)); })
2150
2151static __inline__ __m128 __DEFAULT_FN_ATTRS
2152_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2153  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2154                (__v4sf) __B,
2155                (__v4sf) __W,
2156                (__mmask8) __U,
2157                _MM_FROUND_CUR_DIRECTION);
2158}
2159
2160static __inline__ __m128 __DEFAULT_FN_ATTRS
2161_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2162  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2163                (__v4sf) __B,
2164                (__v4sf)  _mm_setzero_ps (),
2165                (__mmask8) __U,
2166                _MM_FROUND_CUR_DIRECTION);
2167}
2168#define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2169  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2170                                          (__v4sf)(__m128)(B), \
2171                                          (__v4sf)_mm_setzero_ps(), \
2172                                          (__mmask8)-1, (int)(R)); })
2173
2174#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2175  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2176                                          (__v4sf)(__m128)(B), \
2177                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2178                                          (int)(R)); })
2179
2180#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2181  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2182                                          (__v4sf)(__m128)(B), \
2183                                          (__v4sf)_mm_setzero_ps(), \
2184                                          (__mmask8)(U), (int)(R)); })
2185
2186static __inline__ __m128d __DEFAULT_FN_ATTRS
2187_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2188  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2189                (__v2df) __B,
2190                (__v2df) __W,
2191                (__mmask8) __U,
2192                _MM_FROUND_CUR_DIRECTION);
2193}
2194
2195static __inline__ __m128d __DEFAULT_FN_ATTRS
2196_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2197  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2198                (__v2df) __B,
2199                (__v2df)  _mm_setzero_pd (),
2200                (__mmask8) __U,
2201                _MM_FROUND_CUR_DIRECTION);
2202}
2203
2204#define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2205  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2206                                           (__v2df)(__m128d)(B), \
2207                                           (__v2df)_mm_setzero_pd(), \
2208                                           (__mmask8)-1, (int)(R)); })
2209
2210#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2211  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2212                                           (__v2df)(__m128d)(B), \
2213                                           (__v2df)(__m128d)(W), \
2214                                           (__mmask8)(U), (int)(R)); })
2215
2216#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2217  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2218                                           (__v2df)(__m128d)(B), \
2219                                           (__v2df)_mm_setzero_pd(), \
2220                                           (__mmask8)(U), (int)(R)); })
2221
2222static __inline__ __m512d __DEFAULT_FN_ATTRS
2223_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2224  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2225                                              (__v8df)_mm512_sub_pd(__A, __B),
2226                                              (__v8df)__W);
2227}
2228
2229static __inline__ __m512d __DEFAULT_FN_ATTRS
2230_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2231  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2232                                              (__v8df)_mm512_sub_pd(__A, __B),
2233                                              (__v8df)_mm512_setzero_pd());
2234}
2235
2236static __inline__ __m512 __DEFAULT_FN_ATTRS
2237_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2238  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2239                                             (__v16sf)_mm512_sub_ps(__A, __B),
2240                                             (__v16sf)__W);
2241}
2242
2243static __inline__ __m512 __DEFAULT_FN_ATTRS
2244_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2245  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2246                                             (__v16sf)_mm512_sub_ps(__A, __B),
2247                                             (__v16sf)_mm512_setzero_ps());
2248}
2249
2250#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2251  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2252                                        (__v8df)(__m512d)(B), \
2253                                        (__v8df)_mm512_setzero_pd(), \
2254                                        (__mmask8)-1, (int)(R)); })
2255
2256#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2257  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2258                                        (__v8df)(__m512d)(B), \
2259                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2260                                        (int)(R)); })
2261
2262#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2263  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2264                                        (__v8df)(__m512d)(B), \
2265                                        (__v8df)_mm512_setzero_pd(), \
2266                                        (__mmask8)(U), (int)(R)); })
2267
2268#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2269  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2270                                       (__v16sf)(__m512)(B), \
2271                                       (__v16sf)_mm512_setzero_ps(), \
2272                                       (__mmask16)-1, (int)(R)); })
2273
2274#define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2275  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2276                                       (__v16sf)(__m512)(B), \
2277                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2278                                       (int)(R)); });
2279
2280#define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2281  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2282                                       (__v16sf)(__m512)(B), \
2283                                       (__v16sf)_mm512_setzero_ps(), \
2284                                       (__mmask16)(U), (int)(R)); });
2285
2286static __inline__ __m128 __DEFAULT_FN_ATTRS
2287_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2288  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2289                (__v4sf) __B,
2290                (__v4sf) __W,
2291                (__mmask8) __U,
2292                _MM_FROUND_CUR_DIRECTION);
2293}
2294
2295static __inline__ __m128 __DEFAULT_FN_ATTRS
2296_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2297  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2298                (__v4sf) __B,
2299                (__v4sf)  _mm_setzero_ps (),
2300                (__mmask8) __U,
2301                _MM_FROUND_CUR_DIRECTION);
2302}
2303#define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2304  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2305                                          (__v4sf)(__m128)(B), \
2306                                          (__v4sf)_mm_setzero_ps(), \
2307                                          (__mmask8)-1, (int)(R)); })
2308
2309#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2310  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2311                                          (__v4sf)(__m128)(B), \
2312                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2313                                          (int)(R)); })
2314
2315#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2316  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2317                                          (__v4sf)(__m128)(B), \
2318                                          (__v4sf)_mm_setzero_ps(), \
2319                                          (__mmask8)(U), (int)(R)); })
2320
2321static __inline__ __m128d __DEFAULT_FN_ATTRS
2322_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2323  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2324                (__v2df) __B,
2325                (__v2df) __W,
2326                (__mmask8) __U,
2327                _MM_FROUND_CUR_DIRECTION);
2328}
2329
2330static __inline__ __m128d __DEFAULT_FN_ATTRS
2331_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2332  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2333                (__v2df) __B,
2334                (__v2df)  _mm_setzero_pd (),
2335                (__mmask8) __U,
2336                _MM_FROUND_CUR_DIRECTION);
2337}
2338
2339#define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2340  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2341                                           (__v2df)(__m128d)(B), \
2342                                           (__v2df)_mm_setzero_pd(), \
2343                                           (__mmask8)-1, (int)(R)); })
2344
2345#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2346  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2347                                           (__v2df)(__m128d)(B), \
2348                                           (__v2df)(__m128d)(W), \
2349                                           (__mmask8)(U), (int)(R)); })
2350
2351#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2352  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2353                                           (__v2df)(__m128d)(B), \
2354                                           (__v2df)_mm_setzero_pd(), \
2355                                           (__mmask8)(U), (int)(R)); })
2356
2357static __inline__ __m512d __DEFAULT_FN_ATTRS
2358_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2359  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2360                                              (__v8df)_mm512_mul_pd(__A, __B),
2361                                              (__v8df)__W);
2362}
2363
2364static __inline__ __m512d __DEFAULT_FN_ATTRS
2365_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2366  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2367                                              (__v8df)_mm512_mul_pd(__A, __B),
2368                                              (__v8df)_mm512_setzero_pd());
2369}
2370
2371static __inline__ __m512 __DEFAULT_FN_ATTRS
2372_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2373  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2374                                             (__v16sf)_mm512_mul_ps(__A, __B),
2375                                             (__v16sf)__W);
2376}
2377
2378static __inline__ __m512 __DEFAULT_FN_ATTRS
2379_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2380  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2381                                             (__v16sf)_mm512_mul_ps(__A, __B),
2382                                             (__v16sf)_mm512_setzero_ps());
2383}
2384
2385#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2386  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2387                                        (__v8df)(__m512d)(B), \
2388                                        (__v8df)_mm512_setzero_pd(), \
2389                                        (__mmask8)-1, (int)(R)); })
2390
2391#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2392  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2393                                        (__v8df)(__m512d)(B), \
2394                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2395                                        (int)(R)); })
2396
2397#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2398  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2399                                        (__v8df)(__m512d)(B), \
2400                                        (__v8df)_mm512_setzero_pd(), \
2401                                        (__mmask8)(U), (int)(R)); })
2402
2403#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2404  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2405                                       (__v16sf)(__m512)(B), \
2406                                       (__v16sf)_mm512_setzero_ps(), \
2407                                       (__mmask16)-1, (int)(R)); })
2408
2409#define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2410  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2411                                       (__v16sf)(__m512)(B), \
2412                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2413                                       (int)(R)); });
2414
2415#define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2416  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2417                                       (__v16sf)(__m512)(B), \
2418                                       (__v16sf)_mm512_setzero_ps(), \
2419                                       (__mmask16)(U), (int)(R)); });
2420
2421static __inline__ __m128 __DEFAULT_FN_ATTRS
2422_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2423  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2424                (__v4sf) __B,
2425                (__v4sf) __W,
2426                (__mmask8) __U,
2427                _MM_FROUND_CUR_DIRECTION);
2428}
2429
2430static __inline__ __m128 __DEFAULT_FN_ATTRS
2431_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2432  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2433                (__v4sf) __B,
2434                (__v4sf)  _mm_setzero_ps (),
2435                (__mmask8) __U,
2436                _MM_FROUND_CUR_DIRECTION);
2437}
2438
2439#define _mm_div_round_ss(A, B, R) __extension__ ({ \
2440  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2441                                          (__v4sf)(__m128)(B), \
2442                                          (__v4sf)_mm_setzero_ps(), \
2443                                          (__mmask8)-1, (int)(R)); })
2444
2445#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2446  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2447                                          (__v4sf)(__m128)(B), \
2448                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2449                                          (int)(R)); })
2450
2451#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2452  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2453                                          (__v4sf)(__m128)(B), \
2454                                          (__v4sf)_mm_setzero_ps(), \
2455                                          (__mmask8)(U), (int)(R)); })
2456
2457static __inline__ __m128d __DEFAULT_FN_ATTRS
2458_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2459  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2460                (__v2df) __B,
2461                (__v2df) __W,
2462                (__mmask8) __U,
2463                _MM_FROUND_CUR_DIRECTION);
2464}
2465
2466static __inline__ __m128d __DEFAULT_FN_ATTRS
2467_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2468  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2469                (__v2df) __B,
2470                (__v2df)  _mm_setzero_pd (),
2471                (__mmask8) __U,
2472                _MM_FROUND_CUR_DIRECTION);
2473}
2474
2475#define _mm_div_round_sd(A, B, R) __extension__ ({ \
2476  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2477                                           (__v2df)(__m128d)(B), \
2478                                           (__v2df)_mm_setzero_pd(), \
2479                                           (__mmask8)-1, (int)(R)); })
2480
2481#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2482  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2483                                           (__v2df)(__m128d)(B), \
2484                                           (__v2df)(__m128d)(W), \
2485                                           (__mmask8)(U), (int)(R)); })
2486
2487#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2488  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2489                                           (__v2df)(__m128d)(B), \
2490                                           (__v2df)_mm_setzero_pd(), \
2491                                           (__mmask8)(U), (int)(R)); })
2492
2493static __inline __m512d __DEFAULT_FN_ATTRS
2494_mm512_div_pd(__m512d __a, __m512d __b)
2495{
2496  return (__m512d)((__v8df)__a/(__v8df)__b);
2497}
2498
2499static __inline__ __m512d __DEFAULT_FN_ATTRS
2500_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2501  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2502                                              (__v8df)_mm512_div_pd(__A, __B),
2503                                              (__v8df)__W);
2504}
2505
2506static __inline__ __m512d __DEFAULT_FN_ATTRS
2507_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2508  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2509                                              (__v8df)_mm512_div_pd(__A, __B),
2510                                              (__v8df)_mm512_setzero_pd());
2511}
2512
2513static __inline __m512 __DEFAULT_FN_ATTRS
2514_mm512_div_ps(__m512 __a, __m512 __b)
2515{
2516  return (__m512)((__v16sf)__a/(__v16sf)__b);
2517}
2518
2519static __inline__ __m512 __DEFAULT_FN_ATTRS
2520_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2521  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2522                                             (__v16sf)_mm512_div_ps(__A, __B),
2523                                             (__v16sf)__W);
2524}
2525
2526static __inline__ __m512 __DEFAULT_FN_ATTRS
2527_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2528  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2529                                             (__v16sf)_mm512_div_ps(__A, __B),
2530                                             (__v16sf)_mm512_setzero_ps());
2531}
2532
2533#define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2534  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2535                                        (__v8df)(__m512d)(B), \
2536                                        (__v8df)_mm512_setzero_pd(), \
2537                                        (__mmask8)-1, (int)(R)); })
2538
2539#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2540  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2541                                        (__v8df)(__m512d)(B), \
2542                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2543                                        (int)(R)); })
2544
2545#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2546  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2547                                        (__v8df)(__m512d)(B), \
2548                                        (__v8df)_mm512_setzero_pd(), \
2549                                        (__mmask8)(U), (int)(R)); })
2550
2551#define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2552  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2553                                       (__v16sf)(__m512)(B), \
2554                                       (__v16sf)_mm512_setzero_ps(), \
2555                                       (__mmask16)-1, (int)(R)); })
2556
2557#define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2558  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2559                                       (__v16sf)(__m512)(B), \
2560                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2561                                       (int)(R)); });
2562
2563#define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2564  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2565                                       (__v16sf)(__m512)(B), \
2566                                       (__v16sf)_mm512_setzero_ps(), \
2567                                       (__mmask16)(U), (int)(R)); });
2568
2569#define _mm512_roundscale_ps(A, B) __extension__ ({ \
2570  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2571                                         (__v16sf)(__m512)(A), (__mmask16)-1, \
2572                                         _MM_FROUND_CUR_DIRECTION); })
2573
2574#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2575  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2576                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2577                                         _MM_FROUND_CUR_DIRECTION); })
2578
2579#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2580  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2581                                         (__v16sf)_mm512_setzero_ps(), \
2582                                         (__mmask16)(A), \
2583                                         _MM_FROUND_CUR_DIRECTION); })
2584
2585#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2586  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2587                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2588                                         (int)(R)); })
2589
2590#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2591  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2592                                         (__v16sf)_mm512_setzero_ps(), \
2593                                         (__mmask16)(A), (int)(R)); })
2594
2595#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2596  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2597                                         (__v16sf)_mm512_undefined_ps(), \
2598                                         (__mmask16)-1, (int)(R)); })
2599
2600#define _mm512_roundscale_pd(A, B) __extension__ ({ \
2601  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2602                                          (__v8df)(__m512d)(A), (__mmask8)-1, \
2603                                          _MM_FROUND_CUR_DIRECTION); })
2604
2605#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2606  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2607                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2608                                          _MM_FROUND_CUR_DIRECTION); })
2609
2610#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2611  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2612                                          (__v8df)_mm512_setzero_pd(), \
2613                                          (__mmask8)(A), \
2614                                          _MM_FROUND_CUR_DIRECTION); })
2615
2616#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2617  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2618                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2619                                          (int)(R)); })
2620
2621#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2622  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2623                                          (__v8df)_mm512_setzero_pd(), \
2624                                          (__mmask8)(A), (int)(R)); })
2625
2626#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2627  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2628                                          (__v8df)_mm512_undefined_pd(), \
2629                                          (__mmask8)-1, (int)(R)); })
2630
2631#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2632  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2633                                           (__v8df)(__m512d)(B), \
2634                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
2635                                           (int)(R)); })
2636
2637
2638#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2639  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2640                                           (__v8df)(__m512d)(B), \
2641                                           (__v8df)(__m512d)(C), \
2642                                           (__mmask8)(U), (int)(R)); })
2643
2644
2645#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2646  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2647                                            (__v8df)(__m512d)(B), \
2648                                            (__v8df)(__m512d)(C), \
2649                                            (__mmask8)(U), (int)(R)); })
2650
2651
2652#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2653  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2654                                            (__v8df)(__m512d)(B), \
2655                                            (__v8df)(__m512d)(C), \
2656                                            (__mmask8)(U), (int)(R)); })
2657
2658
2659#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2660  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2661                                           (__v8df)(__m512d)(B), \
2662                                           -(__v8df)(__m512d)(C), \
2663                                           (__mmask8)-1, (int)(R)); })
2664
2665
2666#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2667  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2668                                           (__v8df)(__m512d)(B), \
2669                                           -(__v8df)(__m512d)(C), \
2670                                           (__mmask8)(U), (int)(R)); })
2671
2672
2673#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2674  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2675                                            (__v8df)(__m512d)(B), \
2676                                            -(__v8df)(__m512d)(C), \
2677                                            (__mmask8)(U), (int)(R)); })
2678
2679
2680#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2681  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2682                                           (__v8df)(__m512d)(B), \
2683                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
2684                                           (int)(R)); })
2685
2686
2687#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2688  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2689                                            (__v8df)(__m512d)(B), \
2690                                            (__v8df)(__m512d)(C), \
2691                                            (__mmask8)(U), (int)(R)); })
2692
2693
2694#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2695  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2696                                            (__v8df)(__m512d)(B), \
2697                                            (__v8df)(__m512d)(C), \
2698                                            (__mmask8)(U), (int)(R)); })
2699
2700
2701#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2702  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2703                                           (__v8df)(__m512d)(B), \
2704                                           -(__v8df)(__m512d)(C), \
2705                                           (__mmask8)-1, (int)(R)); })
2706
2707
2708#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2709  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2710                                            (__v8df)(__m512d)(B), \
2711                                            -(__v8df)(__m512d)(C), \
2712                                            (__mmask8)(U), (int)(R)); })
2713
2714
2715static __inline__ __m512d __DEFAULT_FN_ATTRS
2716_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2717{
2718  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2719                                                    (__v8df) __B,
2720                                                    (__v8df) __C,
2721                                                    (__mmask8) -1,
2722                                                    _MM_FROUND_CUR_DIRECTION);
2723}
2724
2725static __inline__ __m512d __DEFAULT_FN_ATTRS
2726_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2727{
2728  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2729                                                    (__v8df) __B,
2730                                                    (__v8df) __C,
2731                                                    (__mmask8) __U,
2732                                                    _MM_FROUND_CUR_DIRECTION);
2733}
2734
2735static __inline__ __m512d __DEFAULT_FN_ATTRS
2736_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2737{
2738  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2739                                                     (__v8df) __B,
2740                                                     (__v8df) __C,
2741                                                     (__mmask8) __U,
2742                                                     _MM_FROUND_CUR_DIRECTION);
2743}
2744
2745static __inline__ __m512d __DEFAULT_FN_ATTRS
2746_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2747{
2748  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2749                                                     (__v8df) __B,
2750                                                     (__v8df) __C,
2751                                                     (__mmask8) __U,
2752                                                     _MM_FROUND_CUR_DIRECTION);
2753}
2754
2755static __inline__ __m512d __DEFAULT_FN_ATTRS
2756_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2757{
2758  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2759                                                    (__v8df) __B,
2760                                                    -(__v8df) __C,
2761                                                    (__mmask8) -1,
2762                                                    _MM_FROUND_CUR_DIRECTION);
2763}
2764
2765static __inline__ __m512d __DEFAULT_FN_ATTRS
2766_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2767{
2768  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2769                                                    (__v8df) __B,
2770                                                    -(__v8df) __C,
2771                                                    (__mmask8) __U,
2772                                                    _MM_FROUND_CUR_DIRECTION);
2773}
2774
2775static __inline__ __m512d __DEFAULT_FN_ATTRS
2776_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2777{
2778  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2779                                                     (__v8df) __B,
2780                                                     -(__v8df) __C,
2781                                                     (__mmask8) __U,
2782                                                     _MM_FROUND_CUR_DIRECTION);
2783}
2784
2785static __inline__ __m512d __DEFAULT_FN_ATTRS
2786_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2787{
2788  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2789                                                    (__v8df) __B,
2790                                                    (__v8df) __C,
2791                                                    (__mmask8) -1,
2792                                                    _MM_FROUND_CUR_DIRECTION);
2793}
2794
2795static __inline__ __m512d __DEFAULT_FN_ATTRS
2796_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2797{
2798  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2799                                                     (__v8df) __B,
2800                                                     (__v8df) __C,
2801                                                     (__mmask8) __U,
2802                                                     _MM_FROUND_CUR_DIRECTION);
2803}
2804
2805static __inline__ __m512d __DEFAULT_FN_ATTRS
2806_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2807{
2808  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2809                                                     (__v8df) __B,
2810                                                     (__v8df) __C,
2811                                                     (__mmask8) __U,
2812                                                     _MM_FROUND_CUR_DIRECTION);
2813}
2814
2815static __inline__ __m512d __DEFAULT_FN_ATTRS
2816_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2817{
2818  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2819                                                    (__v8df) __B,
2820                                                    -(__v8df) __C,
2821                                                    (__mmask8) -1,
2822                                                    _MM_FROUND_CUR_DIRECTION);
2823}
2824
2825static __inline__ __m512d __DEFAULT_FN_ATTRS
2826_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2827{
2828  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2829                                                     (__v8df) __B,
2830                                                     -(__v8df) __C,
2831                                                     (__mmask8) __U,
2832                                                     _MM_FROUND_CUR_DIRECTION);
2833}
2834
2835#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2836  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2837                                          (__v16sf)(__m512)(B), \
2838                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
2839                                          (int)(R)); })
2840
2841
2842#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2843  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2844                                          (__v16sf)(__m512)(B), \
2845                                          (__v16sf)(__m512)(C), \
2846                                          (__mmask16)(U), (int)(R)); })
2847
2848
2849#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2850  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2851                                           (__v16sf)(__m512)(B), \
2852                                           (__v16sf)(__m512)(C), \
2853                                           (__mmask16)(U), (int)(R)); })
2854
2855
2856#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2857  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2858                                           (__v16sf)(__m512)(B), \
2859                                           (__v16sf)(__m512)(C), \
2860                                           (__mmask16)(U), (int)(R)); })
2861
2862
2863#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2864  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2865                                          (__v16sf)(__m512)(B), \
2866                                          -(__v16sf)(__m512)(C), \
2867                                          (__mmask16)-1, (int)(R)); })
2868
2869
2870#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2871  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2872                                          (__v16sf)(__m512)(B), \
2873                                          -(__v16sf)(__m512)(C), \
2874                                          (__mmask16)(U), (int)(R)); })
2875
2876
2877#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2878  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2879                                           (__v16sf)(__m512)(B), \
2880                                           -(__v16sf)(__m512)(C), \
2881                                           (__mmask16)(U), (int)(R)); })
2882
2883
2884#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2885  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2886                                          (__v16sf)(__m512)(B), \
2887                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
2888                                          (int)(R)); })
2889
2890
2891#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2892  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2893                                           (__v16sf)(__m512)(B), \
2894                                           (__v16sf)(__m512)(C), \
2895                                           (__mmask16)(U), (int)(R)); })
2896
2897
2898#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2899  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2900                                           (__v16sf)(__m512)(B), \
2901                                           (__v16sf)(__m512)(C), \
2902                                           (__mmask16)(U), (int)(R)); })
2903
2904
2905#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2906  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2907                                          (__v16sf)(__m512)(B), \
2908                                          -(__v16sf)(__m512)(C), \
2909                                          (__mmask16)-1, (int)(R)); })
2910
2911
2912#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2913  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2914                                           (__v16sf)(__m512)(B), \
2915                                           -(__v16sf)(__m512)(C), \
2916                                           (__mmask16)(U), (int)(R)); })
2917
2918
2919static __inline__ __m512 __DEFAULT_FN_ATTRS
2920_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2921{
2922  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2923                                                   (__v16sf) __B,
2924                                                   (__v16sf) __C,
2925                                                   (__mmask16) -1,
2926                                                   _MM_FROUND_CUR_DIRECTION);
2927}
2928
2929static __inline__ __m512 __DEFAULT_FN_ATTRS
2930_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2931{
2932  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2933                                                   (__v16sf) __B,
2934                                                   (__v16sf) __C,
2935                                                   (__mmask16) __U,
2936                                                   _MM_FROUND_CUR_DIRECTION);
2937}
2938
2939static __inline__ __m512 __DEFAULT_FN_ATTRS
2940_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2941{
2942  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2943                                                    (__v16sf) __B,
2944                                                    (__v16sf) __C,
2945                                                    (__mmask16) __U,
2946                                                    _MM_FROUND_CUR_DIRECTION);
2947}
2948
2949static __inline__ __m512 __DEFAULT_FN_ATTRS
2950_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2951{
2952  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2953                                                    (__v16sf) __B,
2954                                                    (__v16sf) __C,
2955                                                    (__mmask16) __U,
2956                                                    _MM_FROUND_CUR_DIRECTION);
2957}
2958
2959static __inline__ __m512 __DEFAULT_FN_ATTRS
2960_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2961{
2962  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2963                                                   (__v16sf) __B,
2964                                                   -(__v16sf) __C,
2965                                                   (__mmask16) -1,
2966                                                   _MM_FROUND_CUR_DIRECTION);
2967}
2968
2969static __inline__ __m512 __DEFAULT_FN_ATTRS
2970_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2971{
2972  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2973                                                   (__v16sf) __B,
2974                                                   -(__v16sf) __C,
2975                                                   (__mmask16) __U,
2976                                                   _MM_FROUND_CUR_DIRECTION);
2977}
2978
2979static __inline__ __m512 __DEFAULT_FN_ATTRS
2980_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2981{
2982  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2983                                                    (__v16sf) __B,
2984                                                    -(__v16sf) __C,
2985                                                    (__mmask16) __U,
2986                                                    _MM_FROUND_CUR_DIRECTION);
2987}
2988
2989static __inline__ __m512 __DEFAULT_FN_ATTRS
2990_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2991{
2992  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2993                                                   (__v16sf) __B,
2994                                                   (__v16sf) __C,
2995                                                   (__mmask16) -1,
2996                                                   _MM_FROUND_CUR_DIRECTION);
2997}
2998
2999static __inline__ __m512 __DEFAULT_FN_ATTRS
3000_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3001{
3002  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
3003                                                    (__v16sf) __B,
3004                                                    (__v16sf) __C,
3005                                                    (__mmask16) __U,
3006                                                    _MM_FROUND_CUR_DIRECTION);
3007}
3008
3009static __inline__ __m512 __DEFAULT_FN_ATTRS
3010_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3011{
3012  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3013                                                    (__v16sf) __B,
3014                                                    (__v16sf) __C,
3015                                                    (__mmask16) __U,
3016                                                    _MM_FROUND_CUR_DIRECTION);
3017}
3018
3019static __inline__ __m512 __DEFAULT_FN_ATTRS
3020_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3021{
3022  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3023                                                   (__v16sf) __B,
3024                                                   -(__v16sf) __C,
3025                                                   (__mmask16) -1,
3026                                                   _MM_FROUND_CUR_DIRECTION);
3027}
3028
3029static __inline__ __m512 __DEFAULT_FN_ATTRS
3030_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3031{
3032  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3033                                                    (__v16sf) __B,
3034                                                    -(__v16sf) __C,
3035                                                    (__mmask16) __U,
3036                                                    _MM_FROUND_CUR_DIRECTION);
3037}
3038
3039#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
3040  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3041                                              (__v8df)(__m512d)(B), \
3042                                              (__v8df)(__m512d)(C), \
3043                                              (__mmask8)-1, (int)(R)); })
3044
3045
3046#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
3047  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3048                                              (__v8df)(__m512d)(B), \
3049                                              (__v8df)(__m512d)(C), \
3050                                              (__mmask8)(U), (int)(R)); })
3051
3052
3053#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
3054  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3055                                               (__v8df)(__m512d)(B), \
3056                                               (__v8df)(__m512d)(C), \
3057                                               (__mmask8)(U), (int)(R)); })
3058
3059
3060#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
3061  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3062                                               (__v8df)(__m512d)(B), \
3063                                               (__v8df)(__m512d)(C), \
3064                                               (__mmask8)(U), (int)(R)); })
3065
3066
3067#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
3068  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3069                                              (__v8df)(__m512d)(B), \
3070                                              -(__v8df)(__m512d)(C), \
3071                                              (__mmask8)-1, (int)(R)); })
3072
3073
3074#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3075  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3076                                              (__v8df)(__m512d)(B), \
3077                                              -(__v8df)(__m512d)(C), \
3078                                              (__mmask8)(U), (int)(R)); })
3079
3080
3081#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3082  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3083                                               (__v8df)(__m512d)(B), \
3084                                               -(__v8df)(__m512d)(C), \
3085                                               (__mmask8)(U), (int)(R)); })
3086
3087
3088static __inline__ __m512d __DEFAULT_FN_ATTRS
3089_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3090{
3091  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3092                                                       (__v8df) __B,
3093                                                       (__v8df) __C,
3094                                                       (__mmask8) -1,
3095                                                       _MM_FROUND_CUR_DIRECTION);
3096}
3097
3098static __inline__ __m512d __DEFAULT_FN_ATTRS
3099_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3100{
3101  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3102                                                       (__v8df) __B,
3103                                                       (__v8df) __C,
3104                                                       (__mmask8) __U,
3105                                                       _MM_FROUND_CUR_DIRECTION);
3106}
3107
3108static __inline__ __m512d __DEFAULT_FN_ATTRS
3109_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3110{
3111  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3112                                                        (__v8df) __B,
3113                                                        (__v8df) __C,
3114                                                        (__mmask8) __U,
3115                                                        _MM_FROUND_CUR_DIRECTION);
3116}
3117
3118static __inline__ __m512d __DEFAULT_FN_ATTRS
3119_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3120{
3121  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3122                                                        (__v8df) __B,
3123                                                        (__v8df) __C,
3124                                                        (__mmask8) __U,
3125                                                        _MM_FROUND_CUR_DIRECTION);
3126}
3127
3128static __inline__ __m512d __DEFAULT_FN_ATTRS
3129_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3130{
3131  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3132                                                       (__v8df) __B,
3133                                                       -(__v8df) __C,
3134                                                       (__mmask8) -1,
3135                                                       _MM_FROUND_CUR_DIRECTION);
3136}
3137
3138static __inline__ __m512d __DEFAULT_FN_ATTRS
3139_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3140{
3141  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3142                                                       (__v8df) __B,
3143                                                       -(__v8df) __C,
3144                                                       (__mmask8) __U,
3145                                                       _MM_FROUND_CUR_DIRECTION);
3146}
3147
3148static __inline__ __m512d __DEFAULT_FN_ATTRS
3149_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3150{
3151  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3152                                                        (__v8df) __B,
3153                                                        -(__v8df) __C,
3154                                                        (__mmask8) __U,
3155                                                        _MM_FROUND_CUR_DIRECTION);
3156}
3157
3158#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3159  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3160                                             (__v16sf)(__m512)(B), \
3161                                             (__v16sf)(__m512)(C), \
3162                                             (__mmask16)-1, (int)(R)); })
3163
3164
3165#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3166  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3167                                             (__v16sf)(__m512)(B), \
3168                                             (__v16sf)(__m512)(C), \
3169                                             (__mmask16)(U), (int)(R)); })
3170
3171
3172#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3173  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3174                                              (__v16sf)(__m512)(B), \
3175                                              (__v16sf)(__m512)(C), \
3176                                              (__mmask16)(U), (int)(R)); })
3177
3178
3179#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3180  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3181                                              (__v16sf)(__m512)(B), \
3182                                              (__v16sf)(__m512)(C), \
3183                                              (__mmask16)(U), (int)(R)); })
3184
3185
3186#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3187  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3188                                             (__v16sf)(__m512)(B), \
3189                                             -(__v16sf)(__m512)(C), \
3190                                             (__mmask16)-1, (int)(R)); })
3191
3192
3193#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3194  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3195                                             (__v16sf)(__m512)(B), \
3196                                             -(__v16sf)(__m512)(C), \
3197                                             (__mmask16)(U), (int)(R)); })
3198
3199
3200#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3201  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3202                                              (__v16sf)(__m512)(B), \
3203                                              -(__v16sf)(__m512)(C), \
3204                                              (__mmask16)(U), (int)(R)); })
3205
3206
3207static __inline__ __m512 __DEFAULT_FN_ATTRS
3208_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3209{
3210  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3211                                                      (__v16sf) __B,
3212                                                      (__v16sf) __C,
3213                                                      (__mmask16) -1,
3214                                                      _MM_FROUND_CUR_DIRECTION);
3215}
3216
3217static __inline__ __m512 __DEFAULT_FN_ATTRS
3218_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3219{
3220  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3221                                                      (__v16sf) __B,
3222                                                      (__v16sf) __C,
3223                                                      (__mmask16) __U,
3224                                                      _MM_FROUND_CUR_DIRECTION);
3225}
3226
3227static __inline__ __m512 __DEFAULT_FN_ATTRS
3228_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3229{
3230  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3231                                                       (__v16sf) __B,
3232                                                       (__v16sf) __C,
3233                                                       (__mmask16) __U,
3234                                                       _MM_FROUND_CUR_DIRECTION);
3235}
3236
3237static __inline__ __m512 __DEFAULT_FN_ATTRS
3238_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3239{
3240  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3241                                                       (__v16sf) __B,
3242                                                       (__v16sf) __C,
3243                                                       (__mmask16) __U,
3244                                                       _MM_FROUND_CUR_DIRECTION);
3245}
3246
3247static __inline__ __m512 __DEFAULT_FN_ATTRS
3248_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3249{
3250  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3251                                                      (__v16sf) __B,
3252                                                      -(__v16sf) __C,
3253                                                      (__mmask16) -1,
3254                                                      _MM_FROUND_CUR_DIRECTION);
3255}
3256
3257static __inline__ __m512 __DEFAULT_FN_ATTRS
3258_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3259{
3260  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3261                                                      (__v16sf) __B,
3262                                                      -(__v16sf) __C,
3263                                                      (__mmask16) __U,
3264                                                      _MM_FROUND_CUR_DIRECTION);
3265}
3266
3267static __inline__ __m512 __DEFAULT_FN_ATTRS
3268_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3269{
3270  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3271                                                       (__v16sf) __B,
3272                                                       -(__v16sf) __C,
3273                                                       (__mmask16) __U,
3274                                                       _MM_FROUND_CUR_DIRECTION);
3275}
3276
3277#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3278  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3279                                            (__v8df)(__m512d)(B), \
3280                                            (__v8df)(__m512d)(C), \
3281                                            (__mmask8)(U), (int)(R)); })
3282
3283
3284static __inline__ __m512d __DEFAULT_FN_ATTRS
3285_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3286{
3287  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3288                                                     (__v8df) __B,
3289                                                     (__v8df) __C,
3290                                                     (__mmask8) __U,
3291                                                     _MM_FROUND_CUR_DIRECTION);
3292}
3293
3294#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3295  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3296                                           (__v16sf)(__m512)(B), \
3297                                           (__v16sf)(__m512)(C), \
3298                                           (__mmask16)(U), (int)(R)); })
3299
3300
3301static __inline__ __m512 __DEFAULT_FN_ATTRS
3302_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3303{
3304  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3305                                                    (__v16sf) __B,
3306                                                    (__v16sf) __C,
3307                                                    (__mmask16) __U,
3308                                                    _MM_FROUND_CUR_DIRECTION);
3309}
3310
3311#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3312  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3313                                               (__v8df)(__m512d)(B), \
3314                                               (__v8df)(__m512d)(C), \
3315                                               (__mmask8)(U), (int)(R)); })
3316
3317
3318static __inline__ __m512d __DEFAULT_FN_ATTRS
3319_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3320{
3321  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3322                                                        (__v8df) __B,
3323                                                        (__v8df) __C,
3324                                                        (__mmask8) __U,
3325                                                        _MM_FROUND_CUR_DIRECTION);
3326}
3327
3328#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3329  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3330                                              (__v16sf)(__m512)(B), \
3331                                              (__v16sf)(__m512)(C), \
3332                                              (__mmask16)(U), (int)(R)); })
3333
3334
3335static __inline__ __m512 __DEFAULT_FN_ATTRS
3336_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3337{
3338  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3339                                                       (__v16sf) __B,
3340                                                       (__v16sf) __C,
3341                                                       (__mmask16) __U,
3342                                                       _MM_FROUND_CUR_DIRECTION);
3343}
3344
3345#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3346  (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3347                                            (__v8df)(__m512d)(B), \
3348                                            (__v8df)(__m512d)(C), \
3349                                            (__mmask8)(U), (int)(R)); })
3350
3351
3352static __inline__ __m512d __DEFAULT_FN_ATTRS
3353_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3354{
3355  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3356                                                     (__v8df) __B,
3357                                                     (__v8df) __C,
3358                                                     (__mmask8) __U,
3359                                                     _MM_FROUND_CUR_DIRECTION);
3360}
3361
3362#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3363  (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3364                                           (__v16sf)(__m512)(B), \
3365                                           (__v16sf)(__m512)(C), \
3366                                           (__mmask16)(U), (int)(R)); })
3367
3368
3369static __inline__ __m512 __DEFAULT_FN_ATTRS
3370_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3371{
3372  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3373                                                    (__v16sf) __B,
3374                                                    (__v16sf) __C,
3375                                                    (__mmask16) __U,
3376                                                    _MM_FROUND_CUR_DIRECTION);
3377}
3378
3379#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3380  (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3381                                            (__v8df)(__m512d)(B), \
3382                                            (__v8df)(__m512d)(C), \
3383                                            (__mmask8)(U), (int)(R)); })
3384
3385
3386#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3387  (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3388                                             (__v8df)(__m512d)(B), \
3389                                             (__v8df)(__m512d)(C), \
3390                                             (__mmask8)(U), (int)(R)); })
3391
3392
3393static __inline__ __m512d __DEFAULT_FN_ATTRS
3394_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3395{
3396  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3397                                                     (__v8df) __B,
3398                                                     (__v8df) __C,
3399                                                     (__mmask8) __U,
3400                                                     _MM_FROUND_CUR_DIRECTION);
3401}
3402
3403static __inline__ __m512d __DEFAULT_FN_ATTRS
3404_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3405{
3406  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3407                                                      (__v8df) __B,
3408                                                      (__v8df) __C,
3409                                                      (__mmask8) __U,
3410                                                      _MM_FROUND_CUR_DIRECTION);
3411}
3412
3413#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3414  (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3415                                           (__v16sf)(__m512)(B), \
3416                                           (__v16sf)(__m512)(C), \
3417                                           (__mmask16)(U), (int)(R)); })
3418
3419
3420#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3421  (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3422                                            (__v16sf)(__m512)(B), \
3423                                            (__v16sf)(__m512)(C), \
3424                                            (__mmask16)(U), (int)(R)); })
3425
3426
3427static __inline__ __m512 __DEFAULT_FN_ATTRS
3428_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3429{
3430  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3431                                                    (__v16sf) __B,
3432                                                    (__v16sf) __C,
3433                                                    (__mmask16) __U,
3434                                                    _MM_FROUND_CUR_DIRECTION);
3435}
3436
3437static __inline__ __m512 __DEFAULT_FN_ATTRS
3438_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3439{
3440  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3441                                                     (__v16sf) __B,
3442                                                     (__v16sf) __C,
3443                                                     (__mmask16) __U,
3444                                                     _MM_FROUND_CUR_DIRECTION);
3445}
3446
3447
3448
3449/* Vector permutations */
3450
3451static __inline __m512i __DEFAULT_FN_ATTRS
3452_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3453{
3454  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3455                                                       /* idx */ ,
3456                                                       (__v16si) __A,
3457                                                       (__v16si) __B,
3458                                                       (__mmask16) -1);
3459}
3460
3461static __inline__ __m512i __DEFAULT_FN_ATTRS
3462_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3463                                __m512i __I, __m512i __B)
3464{
3465  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3466                                                        /* idx */ ,
3467                                                        (__v16si) __A,
3468                                                        (__v16si) __B,
3469                                                        (__mmask16) __U);
3470}
3471
3472static __inline__ __m512i __DEFAULT_FN_ATTRS
3473_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3474                                 __m512i __I, __m512i __B)
3475{
3476  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3477                                                        /* idx */ ,
3478                                                        (__v16si) __A,
3479                                                        (__v16si) __B,
3480                                                        (__mmask16) __U);
3481}
3482
3483static __inline __m512i __DEFAULT_FN_ATTRS
3484_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3485{
3486  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3487                                                       /* idx */ ,
3488                                                       (__v8di) __A,
3489                                                       (__v8di) __B,
3490                                                       (__mmask8) -1);
3491}
3492
3493static __inline__ __m512i __DEFAULT_FN_ATTRS
3494_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3495                                __m512i __B)
3496{
3497  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3498                                                       /* idx */ ,
3499                                                       (__v8di) __A,
3500                                                       (__v8di) __B,
3501                                                       (__mmask8) __U);
3502}
3503
3504
3505static __inline__ __m512i __DEFAULT_FN_ATTRS
3506_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3507         __m512i __I, __m512i __B)
3508{
3509  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3510                                                        /* idx */ ,
3511                                                        (__v8di) __A,
3512                                                        (__v8di) __B,
3513                                                        (__mmask8) __U);
3514}
3515
3516#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3517  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3518                                   (__v8di)(__m512i)(A), \
3519                                   ((int)(I) & 0x7) + 0, \
3520                                   ((int)(I) & 0x7) + 1, \
3521                                   ((int)(I) & 0x7) + 2, \
3522                                   ((int)(I) & 0x7) + 3, \
3523                                   ((int)(I) & 0x7) + 4, \
3524                                   ((int)(I) & 0x7) + 5, \
3525                                   ((int)(I) & 0x7) + 6, \
3526                                   ((int)(I) & 0x7) + 7); })
3527
3528#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3529  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3530                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3531                                 (__v8di)(__m512i)(W)); })
3532
3533#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3534  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3535                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3536                                 (__v8di)_mm512_setzero_si512()); })
3537
3538#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3539  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3540                                   (__v16si)(__m512i)(A), \
3541                                   ((int)(I) & 0xf) + 0, \
3542                                   ((int)(I) & 0xf) + 1, \
3543                                   ((int)(I) & 0xf) + 2, \
3544                                   ((int)(I) & 0xf) + 3, \
3545                                   ((int)(I) & 0xf) + 4, \
3546                                   ((int)(I) & 0xf) + 5, \
3547                                   ((int)(I) & 0xf) + 6, \
3548                                   ((int)(I) & 0xf) + 7, \
3549                                   ((int)(I) & 0xf) + 8, \
3550                                   ((int)(I) & 0xf) + 9, \
3551                                   ((int)(I) & 0xf) + 10, \
3552                                   ((int)(I) & 0xf) + 11, \
3553                                   ((int)(I) & 0xf) + 12, \
3554                                   ((int)(I) & 0xf) + 13, \
3555                                   ((int)(I) & 0xf) + 14, \
3556                                   ((int)(I) & 0xf) + 15); })
3557
3558#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3559  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3560                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3561                                (__v16si)(__m512i)(W)); })
3562
3563#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3564  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3565                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3566                                (__v16si)_mm512_setzero_si512()); })
3567/* Vector Extract */
3568
3569#define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
3570  (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
3571                                   (__v8df)_mm512_undefined_pd(), \
3572                                   ((I) & 1) ? 4 : 0,             \
3573                                   ((I) & 1) ? 5 : 1,             \
3574                                   ((I) & 1) ? 6 : 2,             \
3575                                   ((I) & 1) ? 7 : 3); })
3576
3577#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3578  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3579                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3580                                   (__v4df)(W)); })
3581
3582#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3583  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3584                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3585                                   (__v4df)_mm256_setzero_pd()); })
3586
3587#define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
3588  (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
3589                                  (__v16sf)_mm512_undefined_ps(), \
3590                                  0 + ((I) & 0x3) * 4,            \
3591                                  1 + ((I) & 0x3) * 4,            \
3592                                  2 + ((I) & 0x3) * 4,            \
3593                                  3 + ((I) & 0x3) * 4); })
3594
3595#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3596  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3597                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3598                                   (__v4sf)(W)); })
3599
3600#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3601  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3602                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3603                                   (__v4sf)_mm_setzero_ps()); })
3604
3605/* Vector Blend */
3606
3607static __inline __m512d __DEFAULT_FN_ATTRS
3608_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3609{
3610  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3611                 (__v8df) __W,
3612                 (__v8df) __A);
3613}
3614
3615static __inline __m512 __DEFAULT_FN_ATTRS
3616_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3617{
3618  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3619                (__v16sf) __W,
3620                (__v16sf) __A);
3621}
3622
3623static __inline __m512i __DEFAULT_FN_ATTRS
3624_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3625{
3626  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3627                (__v8di) __W,
3628                (__v8di) __A);
3629}
3630
3631static __inline __m512i __DEFAULT_FN_ATTRS
3632_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3633{
3634  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3635                (__v16si) __W,
3636                (__v16si) __A);
3637}
3638
3639/* Compare */
3640
3641#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3642  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3643                                          (__v16sf)(__m512)(B), (int)(P), \
3644                                          (__mmask16)-1, (int)(R)); })
3645
3646#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3647  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3648                                          (__v16sf)(__m512)(B), (int)(P), \
3649                                          (__mmask16)(U), (int)(R)); })
3650
3651#define _mm512_cmp_ps_mask(A, B, P) \
3652  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3653#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3654  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3655
3656#define _mm512_cmpeq_ps_mask(A, B) \
3657    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3658#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3659    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3660
3661#define _mm512_cmplt_ps_mask(A, B) \
3662    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3663#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3664    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3665
3666#define _mm512_cmple_ps_mask(A, B) \
3667    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3668#define _mm512_mask_cmple_ps_mask(k, A, B) \
3669    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3670
3671#define _mm512_cmpunord_ps_mask(A, B) \
3672    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3673#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3674    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3675
3676#define _mm512_cmpneq_ps_mask(A, B) \
3677    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3678#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3679    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3680
3681#define _mm512_cmpnlt_ps_mask(A, B) \
3682    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3683#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3684    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3685
3686#define _mm512_cmpnle_ps_mask(A, B) \
3687    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3688#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3689    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3690
3691#define _mm512_cmpord_ps_mask(A, B) \
3692    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3693#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3694    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3695
3696#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3697  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3698                                         (__v8df)(__m512d)(B), (int)(P), \
3699                                         (__mmask8)-1, (int)(R)); })
3700
3701#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3702  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3703                                         (__v8df)(__m512d)(B), (int)(P), \
3704                                         (__mmask8)(U), (int)(R)); })
3705
3706#define _mm512_cmp_pd_mask(A, B, P) \
3707  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3708#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3709  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3710
3711#define _mm512_cmpeq_pd_mask(A, B) \
3712    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3713#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3714    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3715
3716#define _mm512_cmplt_pd_mask(A, B) \
3717    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3718#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3719    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3720
3721#define _mm512_cmple_pd_mask(A, B) \
3722    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3723#define _mm512_mask_cmple_pd_mask(k, A, B) \
3724    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3725
3726#define _mm512_cmpunord_pd_mask(A, B) \
3727    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3728#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3729    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3730
3731#define _mm512_cmpneq_pd_mask(A, B) \
3732    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3733#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3734    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3735
3736#define _mm512_cmpnlt_pd_mask(A, B) \
3737    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3738#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3739    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3740
3741#define _mm512_cmpnle_pd_mask(A, B) \
3742    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3743#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3744    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3745
3746#define _mm512_cmpord_pd_mask(A, B) \
3747    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3748#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3749    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3750
3751/* Conversion */
3752
3753#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3754  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3755                                             (__v16si)_mm512_undefined_epi32(), \
3756                                             (__mmask16)-1, (int)(R)); })
3757
3758#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3759  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3760                                             (__v16si)(__m512i)(W), \
3761                                             (__mmask16)(U), (int)(R)); })
3762
3763#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3764  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3765                                             (__v16si)_mm512_setzero_si512(), \
3766                                             (__mmask16)(U), (int)(R)); })
3767
3768
3769static __inline __m512i __DEFAULT_FN_ATTRS
3770_mm512_cvttps_epu32(__m512 __A)
3771{
3772  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3773                  (__v16si)
3774                  _mm512_setzero_si512 (),
3775                  (__mmask16) -1,
3776                  _MM_FROUND_CUR_DIRECTION);
3777}
3778
3779static __inline__ __m512i __DEFAULT_FN_ATTRS
3780_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3781{
3782  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3783                   (__v16si) __W,
3784                   (__mmask16) __U,
3785                   _MM_FROUND_CUR_DIRECTION);
3786}
3787
3788static __inline__ __m512i __DEFAULT_FN_ATTRS
3789_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3790{
3791  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3792                   (__v16si) _mm512_setzero_si512 (),
3793                   (__mmask16) __U,
3794                   _MM_FROUND_CUR_DIRECTION);
3795}
3796
3797#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3798  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3799                                          (__v16sf)_mm512_setzero_ps(), \
3800                                          (__mmask16)-1, (int)(R)); })
3801
3802#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3803  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3804                                          (__v16sf)(__m512)(W), \
3805                                          (__mmask16)(U), (int)(R)); })
3806
3807#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3808  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3809                                          (__v16sf)_mm512_setzero_ps(), \
3810                                          (__mmask16)(U), (int)(R)); })
3811
3812#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3813  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3814                                           (__v16sf)_mm512_setzero_ps(), \
3815                                           (__mmask16)-1, (int)(R)); })
3816
3817#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3818  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3819                                           (__v16sf)(__m512)(W), \
3820                                           (__mmask16)(U), (int)(R)); })
3821
3822#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3823  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3824                                           (__v16sf)_mm512_setzero_ps(), \
3825                                           (__mmask16)(U), (int)(R)); })
3826
3827static __inline__ __m512 __DEFAULT_FN_ATTRS
3828_mm512_cvtepu32_ps (__m512i __A)
3829{
3830  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3831                 (__v16sf) _mm512_undefined_ps (),
3832                 (__mmask16) -1,
3833                 _MM_FROUND_CUR_DIRECTION);
3834}
3835
3836static __inline__ __m512 __DEFAULT_FN_ATTRS
3837_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3838{
3839  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3840                 (__v16sf) __W,
3841                 (__mmask16) __U,
3842                 _MM_FROUND_CUR_DIRECTION);
3843}
3844
3845static __inline__ __m512 __DEFAULT_FN_ATTRS
3846_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3847{
3848  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3849                 (__v16sf) _mm512_setzero_ps (),
3850                 (__mmask16) __U,
3851                 _MM_FROUND_CUR_DIRECTION);
3852}
3853
3854static __inline __m512d __DEFAULT_FN_ATTRS
3855_mm512_cvtepi32_pd(__m256i __A)
3856{
3857  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3858}
3859
3860static __inline__ __m512d __DEFAULT_FN_ATTRS
3861_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3862{
3863  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3864                                              (__v8df)_mm512_cvtepi32_pd(__A),
3865                                              (__v8df)__W);
3866}
3867
3868static __inline__ __m512d __DEFAULT_FN_ATTRS
3869_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3870{
3871  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3872                                              (__v8df)_mm512_cvtepi32_pd(__A),
3873                                              (__v8df)_mm512_setzero_pd());
3874}
3875
3876static __inline__ __m512d __DEFAULT_FN_ATTRS
3877_mm512_cvtepi32lo_pd(__m512i __A)
3878{
3879  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3880}
3881
3882static __inline__ __m512d __DEFAULT_FN_ATTRS
3883_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3884{
3885  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3886}
3887
3888static __inline__ __m512 __DEFAULT_FN_ATTRS
3889_mm512_cvtepi32_ps (__m512i __A)
3890{
3891  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3892                (__v16sf) _mm512_undefined_ps (),
3893                (__mmask16) -1,
3894                _MM_FROUND_CUR_DIRECTION);
3895}
3896
3897static __inline__ __m512 __DEFAULT_FN_ATTRS
3898_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3899{
3900  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3901                (__v16sf) __W,
3902                (__mmask16) __U,
3903                _MM_FROUND_CUR_DIRECTION);
3904}
3905
3906static __inline__ __m512 __DEFAULT_FN_ATTRS
3907_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3908{
3909  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3910                (__v16sf) _mm512_setzero_ps (),
3911                (__mmask16) __U,
3912                _MM_FROUND_CUR_DIRECTION);
3913}
3914
3915static __inline __m512d __DEFAULT_FN_ATTRS
3916_mm512_cvtepu32_pd(__m256i __A)
3917{
3918  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3919}
3920
3921static __inline__ __m512d __DEFAULT_FN_ATTRS
3922_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3923{
3924  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3925                                              (__v8df)_mm512_cvtepu32_pd(__A),
3926                                              (__v8df)__W);
3927}
3928
3929static __inline__ __m512d __DEFAULT_FN_ATTRS
3930_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3931{
3932  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3933                                              (__v8df)_mm512_cvtepu32_pd(__A),
3934                                              (__v8df)_mm512_setzero_pd());
3935}
3936
3937static __inline__ __m512d __DEFAULT_FN_ATTRS
3938_mm512_cvtepu32lo_pd(__m512i __A)
3939{
3940  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3941}
3942
3943static __inline__ __m512d __DEFAULT_FN_ATTRS
3944_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3945{
3946  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3947}
3948
3949#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3950  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3951                                          (__v8sf)_mm256_setzero_ps(), \
3952                                          (__mmask8)-1, (int)(R)); })
3953
3954#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3955  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3956                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3957                                          (int)(R)); })
3958
3959#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3960  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3961                                          (__v8sf)_mm256_setzero_ps(), \
3962                                          (__mmask8)(U), (int)(R)); })
3963
3964static __inline__ __m256 __DEFAULT_FN_ATTRS
3965_mm512_cvtpd_ps (__m512d __A)
3966{
3967  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3968                (__v8sf) _mm256_undefined_ps (),
3969                (__mmask8) -1,
3970                _MM_FROUND_CUR_DIRECTION);
3971}
3972
3973static __inline__ __m256 __DEFAULT_FN_ATTRS
3974_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3975{
3976  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3977                (__v8sf) __W,
3978                (__mmask8) __U,
3979                _MM_FROUND_CUR_DIRECTION);
3980}
3981
3982static __inline__ __m256 __DEFAULT_FN_ATTRS
3983_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3984{
3985  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3986                (__v8sf) _mm256_setzero_ps (),
3987                (__mmask8) __U,
3988                _MM_FROUND_CUR_DIRECTION);
3989}
3990
3991static __inline__ __m512 __DEFAULT_FN_ATTRS
3992_mm512_cvtpd_pslo (__m512d __A)
3993{
3994  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3995                (__v8sf) _mm256_setzero_ps (),
3996                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3997}
3998
3999static __inline__ __m512 __DEFAULT_FN_ATTRS
4000_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
4001{
4002  return (__m512) __builtin_shufflevector (
4003                (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
4004                                               __U, __A),
4005                (__v8sf) _mm256_setzero_ps (),
4006                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4007}
4008
4009#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
4010  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4011                                            (__v16hi)_mm256_undefined_si256(), \
4012                                            (__mmask16)-1); })
4013
4014#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
4015  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4016                                            (__v16hi)(__m256i)(U), \
4017                                            (__mmask16)(W)); })
4018
4019#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
4020  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4021                                            (__v16hi)_mm256_setzero_si256(), \
4022                                            (__mmask16)(W)); })
4023
4024#define _mm512_cvtps_ph(A, I) __extension__ ({ \
4025  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4026                                            (__v16hi)_mm256_setzero_si256(), \
4027                                            (__mmask16)-1); })
4028
4029#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
4030  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4031                                            (__v16hi)(__m256i)(U), \
4032                                            (__mmask16)(W)); })
4033
4034#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
4035  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4036                                            (__v16hi)_mm256_setzero_si256(), \
4037                                            (__mmask16)(W)); })
4038
4039#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
4040  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4041                                           (__v16sf)_mm512_undefined_ps(), \
4042                                           (__mmask16)-1, (int)(R)); })
4043
4044#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
4045  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4046                                           (__v16sf)(__m512)(W), \
4047                                           (__mmask16)(U), (int)(R)); })
4048
4049#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
4050  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4051                                           (__v16sf)_mm512_setzero_ps(), \
4052                                           (__mmask16)(U), (int)(R)); })
4053
4054
4055static  __inline __m512 __DEFAULT_FN_ATTRS
4056_mm512_cvtph_ps(__m256i __A)
4057{
4058  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4059                (__v16sf)
4060                _mm512_setzero_ps (),
4061                (__mmask16) -1,
4062                _MM_FROUND_CUR_DIRECTION);
4063}
4064
4065static __inline__ __m512 __DEFAULT_FN_ATTRS
4066_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
4067{
4068  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4069                 (__v16sf) __W,
4070                 (__mmask16) __U,
4071                 _MM_FROUND_CUR_DIRECTION);
4072}
4073
4074static __inline__ __m512 __DEFAULT_FN_ATTRS
4075_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4076{
4077  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4078                 (__v16sf) _mm512_setzero_ps (),
4079                 (__mmask16) __U,
4080                 _MM_FROUND_CUR_DIRECTION);
4081}
4082
4083#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4084  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4085                                            (__v8si)_mm256_setzero_si256(), \
4086                                            (__mmask8)-1, (int)(R)); })
4087
4088#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4089  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4090                                            (__v8si)(__m256i)(W), \
4091                                            (__mmask8)(U), (int)(R)); })
4092
4093#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
4094  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4095                                            (__v8si)_mm256_setzero_si256(), \
4096                                            (__mmask8)(U), (int)(R)); })
4097
4098static __inline __m256i __DEFAULT_FN_ATTRS
4099_mm512_cvttpd_epi32(__m512d __a)
4100{
4101  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4102                                                   (__v8si)_mm256_setzero_si256(),
4103                                                   (__mmask8) -1,
4104                                                    _MM_FROUND_CUR_DIRECTION);
4105}
4106
4107static __inline__ __m256i __DEFAULT_FN_ATTRS
4108_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4109{
4110  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4111                  (__v8si) __W,
4112                  (__mmask8) __U,
4113                  _MM_FROUND_CUR_DIRECTION);
4114}
4115
4116static __inline__ __m256i __DEFAULT_FN_ATTRS
4117_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4118{
4119  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4120                  (__v8si) _mm256_setzero_si256 (),
4121                  (__mmask8) __U,
4122                  _MM_FROUND_CUR_DIRECTION);
4123}
4124
4125#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4126  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4127                                            (__v16si)_mm512_setzero_si512(), \
4128                                            (__mmask16)-1, (int)(R)); })
4129
4130#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4131  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4132                                            (__v16si)(__m512i)(W), \
4133                                            (__mmask16)(U), (int)(R)); })
4134
4135#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4136  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4137                                            (__v16si)_mm512_setzero_si512(), \
4138                                            (__mmask16)(U), (int)(R)); })
4139
4140static __inline __m512i __DEFAULT_FN_ATTRS
4141_mm512_cvttps_epi32(__m512 __a)
4142{
4143  return (__m512i)
4144    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4145                                     (__v16si) _mm512_setzero_si512 (),
4146                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4147}
4148
4149static __inline__ __m512i __DEFAULT_FN_ATTRS
4150_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4151{
4152  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4153                  (__v16si) __W,
4154                  (__mmask16) __U,
4155                  _MM_FROUND_CUR_DIRECTION);
4156}
4157
4158static __inline__ __m512i __DEFAULT_FN_ATTRS
4159_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4160{
4161  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4162                  (__v16si) _mm512_setzero_si512 (),
4163                  (__mmask16) __U,
4164                  _MM_FROUND_CUR_DIRECTION);
4165}
4166
4167#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4168  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4169                                           (__v16si)_mm512_setzero_si512(), \
4170                                           (__mmask16)-1, (int)(R)); })
4171
4172#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4173  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4174                                           (__v16si)(__m512i)(W), \
4175                                           (__mmask16)(U), (int)(R)); })
4176
4177#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4178  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4179                                           (__v16si)_mm512_setzero_si512(), \
4180                                           (__mmask16)(U), (int)(R)); })
4181
4182static __inline__ __m512i __DEFAULT_FN_ATTRS
4183_mm512_cvtps_epi32 (__m512 __A)
4184{
4185  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4186                 (__v16si) _mm512_undefined_epi32 (),
4187                 (__mmask16) -1,
4188                 _MM_FROUND_CUR_DIRECTION);
4189}
4190
4191static __inline__ __m512i __DEFAULT_FN_ATTRS
4192_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4193{
4194  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4195                 (__v16si) __W,
4196                 (__mmask16) __U,
4197                 _MM_FROUND_CUR_DIRECTION);
4198}
4199
4200static __inline__ __m512i __DEFAULT_FN_ATTRS
4201_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4202{
4203  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4204                 (__v16si)
4205                 _mm512_setzero_si512 (),
4206                 (__mmask16) __U,
4207                 _MM_FROUND_CUR_DIRECTION);
4208}
4209
4210#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4211  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4212                                           (__v8si)_mm256_setzero_si256(), \
4213                                           (__mmask8)-1, (int)(R)); })
4214
4215#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4216  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4217                                           (__v8si)(__m256i)(W), \
4218                                           (__mmask8)(U), (int)(R)); })
4219
4220#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4221  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4222                                           (__v8si)_mm256_setzero_si256(), \
4223                                           (__mmask8)(U), (int)(R)); })
4224
4225static __inline__ __m256i __DEFAULT_FN_ATTRS
4226_mm512_cvtpd_epi32 (__m512d __A)
4227{
4228  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4229                 (__v8si)
4230                 _mm256_undefined_si256 (),
4231                 (__mmask8) -1,
4232                 _MM_FROUND_CUR_DIRECTION);
4233}
4234
4235static __inline__ __m256i __DEFAULT_FN_ATTRS
4236_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4237{
4238  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4239                 (__v8si) __W,
4240                 (__mmask8) __U,
4241                 _MM_FROUND_CUR_DIRECTION);
4242}
4243
4244static __inline__ __m256i __DEFAULT_FN_ATTRS
4245_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4246{
4247  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4248                 (__v8si)
4249                 _mm256_setzero_si256 (),
4250                 (__mmask8) __U,
4251                 _MM_FROUND_CUR_DIRECTION);
4252}
4253
4254#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4255  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4256                                            (__v16si)_mm512_setzero_si512(), \
4257                                            (__mmask16)-1, (int)(R)); })
4258
4259#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4260  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4261                                            (__v16si)(__m512i)(W), \
4262                                            (__mmask16)(U), (int)(R)); })
4263
4264#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4265  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4266                                            (__v16si)_mm512_setzero_si512(), \
4267                                            (__mmask16)(U), (int)(R)); })
4268
4269static __inline__ __m512i __DEFAULT_FN_ATTRS
4270_mm512_cvtps_epu32 ( __m512 __A)
4271{
4272  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4273                  (__v16si)\
4274                  _mm512_undefined_epi32 (),\
4275                  (__mmask16) -1,\
4276                  _MM_FROUND_CUR_DIRECTION);\
4277}
4278
4279static __inline__ __m512i __DEFAULT_FN_ATTRS
4280_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4281{
4282  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4283                  (__v16si) __W,
4284                  (__mmask16) __U,
4285                  _MM_FROUND_CUR_DIRECTION);
4286}
4287
4288static __inline__ __m512i __DEFAULT_FN_ATTRS
4289_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4290{
4291  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4292                  (__v16si)
4293                  _mm512_setzero_si512 (),
4294                  (__mmask16) __U ,
4295                  _MM_FROUND_CUR_DIRECTION);
4296}
4297
4298#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4299  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4300                                            (__v8si)_mm256_setzero_si256(), \
4301                                            (__mmask8)-1, (int)(R)); })
4302
4303#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4304  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4305                                            (__v8si)(W), \
4306                                            (__mmask8)(U), (int)(R)); })
4307
4308#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4309  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4310                                            (__v8si)_mm256_setzero_si256(), \
4311                                            (__mmask8)(U), (int)(R)); })
4312
4313static __inline__ __m256i __DEFAULT_FN_ATTRS
4314_mm512_cvtpd_epu32 (__m512d __A)
4315{
4316  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4317                  (__v8si)
4318                  _mm256_undefined_si256 (),
4319                  (__mmask8) -1,
4320                  _MM_FROUND_CUR_DIRECTION);
4321}
4322
4323static __inline__ __m256i __DEFAULT_FN_ATTRS
4324_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4325{
4326  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4327                  (__v8si) __W,
4328                  (__mmask8) __U,
4329                  _MM_FROUND_CUR_DIRECTION);
4330}
4331
4332static __inline__ __m256i __DEFAULT_FN_ATTRS
4333_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4334{
4335  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4336                  (__v8si)
4337                  _mm256_setzero_si256 (),
4338                  (__mmask8) __U,
4339                  _MM_FROUND_CUR_DIRECTION);
4340}
4341
4342static __inline__ double __DEFAULT_FN_ATTRS
4343_mm512_cvtsd_f64(__m512d __a)
4344{
4345  return __a[0];
4346}
4347
4348static __inline__ float __DEFAULT_FN_ATTRS
4349_mm512_cvtss_f32(__m512 __a)
4350{
4351  return __a[0];
4352}
4353
4354/* Unpack and Interleave */
4355
4356static __inline __m512d __DEFAULT_FN_ATTRS
4357_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4358{
4359  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4360                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4361}
4362
4363static __inline__ __m512d __DEFAULT_FN_ATTRS
4364_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4365{
4366  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4367                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4368                                           (__v8df)__W);
4369}
4370
4371static __inline__ __m512d __DEFAULT_FN_ATTRS
4372_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4373{
4374  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4375                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4376                                           (__v8df)_mm512_setzero_pd());
4377}
4378
4379static __inline __m512d __DEFAULT_FN_ATTRS
4380_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4381{
4382  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4383                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4384}
4385
4386static __inline__ __m512d __DEFAULT_FN_ATTRS
4387_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4388{
4389  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4390                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4391                                           (__v8df)__W);
4392}
4393
4394static __inline__ __m512d __DEFAULT_FN_ATTRS
4395_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4396{
4397  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4398                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4399                                           (__v8df)_mm512_setzero_pd());
4400}
4401
4402static __inline __m512 __DEFAULT_FN_ATTRS
4403_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4404{
4405  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4406                                         2,    18,    3,    19,
4407                                         2+4,  18+4,  3+4,  19+4,
4408                                         2+8,  18+8,  3+8,  19+8,
4409                                         2+12, 18+12, 3+12, 19+12);
4410}
4411
4412static __inline__ __m512 __DEFAULT_FN_ATTRS
4413_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4414{
4415  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4416                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4417                                          (__v16sf)__W);
4418}
4419
4420static __inline__ __m512 __DEFAULT_FN_ATTRS
4421_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4422{
4423  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4424                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4425                                          (__v16sf)_mm512_setzero_ps());
4426}
4427
4428static __inline __m512 __DEFAULT_FN_ATTRS
4429_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4430{
4431  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4432                                         0,    16,    1,    17,
4433                                         0+4,  16+4,  1+4,  17+4,
4434                                         0+8,  16+8,  1+8,  17+8,
4435                                         0+12, 16+12, 1+12, 17+12);
4436}
4437
4438static __inline__ __m512 __DEFAULT_FN_ATTRS
4439_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4440{
4441  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4442                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4443                                          (__v16sf)__W);
4444}
4445
4446static __inline__ __m512 __DEFAULT_FN_ATTRS
4447_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4448{
4449  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4450                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4451                                          (__v16sf)_mm512_setzero_ps());
4452}
4453
4454static __inline__ __m512i __DEFAULT_FN_ATTRS
4455_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4456{
4457  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4458                                          2,    18,    3,    19,
4459                                          2+4,  18+4,  3+4,  19+4,
4460                                          2+8,  18+8,  3+8,  19+8,
4461                                          2+12, 18+12, 3+12, 19+12);
4462}
4463
4464static __inline__ __m512i __DEFAULT_FN_ATTRS
4465_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4466{
4467  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4468                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4469                                       (__v16si)__W);
4470}
4471
4472static __inline__ __m512i __DEFAULT_FN_ATTRS
4473_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4474{
4475  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4476                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4477                                       (__v16si)_mm512_setzero_si512());
4478}
4479
4480static __inline__ __m512i __DEFAULT_FN_ATTRS
4481_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4482{
4483  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4484                                          0,    16,    1,    17,
4485                                          0+4,  16+4,  1+4,  17+4,
4486                                          0+8,  16+8,  1+8,  17+8,
4487                                          0+12, 16+12, 1+12, 17+12);
4488}
4489
4490static __inline__ __m512i __DEFAULT_FN_ATTRS
4491_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4492{
4493  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4494                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4495                                       (__v16si)__W);
4496}
4497
4498static __inline__ __m512i __DEFAULT_FN_ATTRS
4499_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4500{
4501  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4502                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4503                                       (__v16si)_mm512_setzero_si512());
4504}
4505
4506static __inline__ __m512i __DEFAULT_FN_ATTRS
4507_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4508{
4509  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4510                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4511}
4512
4513static __inline__ __m512i __DEFAULT_FN_ATTRS
4514_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4515{
4516  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4517                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4518                                        (__v8di)__W);
4519}
4520
4521static __inline__ __m512i __DEFAULT_FN_ATTRS
4522_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4523{
4524  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4525                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4526                                        (__v8di)_mm512_setzero_si512());
4527}
4528
4529static __inline__ __m512i __DEFAULT_FN_ATTRS
4530_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4531{
4532  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4533                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4534}
4535
4536static __inline__ __m512i __DEFAULT_FN_ATTRS
4537_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4538{
4539  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4540                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4541                                        (__v8di)__W);
4542}
4543
4544static __inline__ __m512i __DEFAULT_FN_ATTRS
4545_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4546{
4547  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4548                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4549                                        (__v8di)_mm512_setzero_si512());
4550}
4551
4552/* Bit Test */
4553
4554static __inline __mmask16 __DEFAULT_FN_ATTRS
4555_mm512_test_epi32_mask(__m512i __A, __m512i __B)
4556{
4557  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4558            (__v16si) __B,
4559            (__mmask16) -1);
4560}
4561
4562static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4563_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
4564{
4565  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4566                 (__v16si) __B, __U);
4567}
4568
4569static __inline __mmask8 __DEFAULT_FN_ATTRS
4570_mm512_test_epi64_mask(__m512i __A, __m512i __B)
4571{
4572  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
4573                 (__v8di) __B,
4574                 (__mmask8) -1);
4575}
4576
4577static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4578_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
4579{
4580  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
4581}
4582
4583
4584/* SIMD load ops */
4585
4586static __inline __m512i __DEFAULT_FN_ATTRS
4587_mm512_loadu_si512 (void const *__P)
4588{
4589  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4590                  (__v16si)
4591                  _mm512_setzero_si512 (),
4592                  (__mmask16) -1);
4593}
4594
4595static __inline __m512i __DEFAULT_FN_ATTRS
4596_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4597{
4598  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4599                  (__v16si) __W,
4600                  (__mmask16) __U);
4601}
4602
4603
4604static __inline __m512i __DEFAULT_FN_ATTRS
4605_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4606{
4607  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4608                                                     (__v16si)
4609                                                     _mm512_setzero_si512 (),
4610                                                     (__mmask16) __U);
4611}
4612
4613static __inline __m512i __DEFAULT_FN_ATTRS
4614_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4615{
4616  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4617                  (__v8di) __W,
4618                  (__mmask8) __U);
4619}
4620
4621static __inline __m512i __DEFAULT_FN_ATTRS
4622_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4623{
4624  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4625                                                     (__v8di)
4626                                                     _mm512_setzero_si512 (),
4627                                                     (__mmask8) __U);
4628}
4629
4630static __inline __m512 __DEFAULT_FN_ATTRS
4631_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4632{
4633  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4634                   (__v16sf) __W,
4635                   (__mmask16) __U);
4636}
4637
4638static __inline __m512 __DEFAULT_FN_ATTRS
4639_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4640{
4641  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4642                                                  (__v16sf)
4643                                                  _mm512_setzero_ps (),
4644                                                  (__mmask16) __U);
4645}
4646
4647static __inline __m512d __DEFAULT_FN_ATTRS
4648_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4649{
4650  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4651                (__v8df) __W,
4652                (__mmask8) __U);
4653}
4654
4655static __inline __m512d __DEFAULT_FN_ATTRS
4656_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4657{
4658  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4659                                                   (__v8df)
4660                                                   _mm512_setzero_pd (),
4661                                                   (__mmask8) __U);
4662}
4663
4664static __inline __m512d __DEFAULT_FN_ATTRS
4665_mm512_loadu_pd(void const *__p)
4666{
4667  struct __loadu_pd {
4668    __m512d __v;
4669  } __attribute__((__packed__, __may_alias__));
4670  return ((struct __loadu_pd*)__p)->__v;
4671}
4672
4673static __inline __m512 __DEFAULT_FN_ATTRS
4674_mm512_loadu_ps(void const *__p)
4675{
4676  struct __loadu_ps {
4677    __m512 __v;
4678  } __attribute__((__packed__, __may_alias__));
4679  return ((struct __loadu_ps*)__p)->__v;
4680}
4681
4682static __inline __m512 __DEFAULT_FN_ATTRS
4683_mm512_load_ps(void const *__p)
4684{
4685  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4686                                                  (__v16sf)
4687                                                  _mm512_setzero_ps (),
4688                                                  (__mmask16) -1);
4689}
4690
4691static __inline __m512 __DEFAULT_FN_ATTRS
4692_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4693{
4694  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4695                   (__v16sf) __W,
4696                   (__mmask16) __U);
4697}
4698
4699static __inline __m512 __DEFAULT_FN_ATTRS
4700_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4701{
4702  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4703                                                  (__v16sf)
4704                                                  _mm512_setzero_ps (),
4705                                                  (__mmask16) __U);
4706}
4707
4708static __inline __m512d __DEFAULT_FN_ATTRS
4709_mm512_load_pd(void const *__p)
4710{
4711  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4712                                                   (__v8df)
4713                                                   _mm512_setzero_pd (),
4714                                                   (__mmask8) -1);
4715}
4716
4717static __inline __m512d __DEFAULT_FN_ATTRS
4718_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4719{
4720  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4721                          (__v8df) __W,
4722                          (__mmask8) __U);
4723}
4724
4725static __inline __m512d __DEFAULT_FN_ATTRS
4726_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4727{
4728  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4729                                                   (__v8df)
4730                                                   _mm512_setzero_pd (),
4731                                                   (__mmask8) __U);
4732}
4733
4734static __inline __m512i __DEFAULT_FN_ATTRS
4735_mm512_load_si512 (void const *__P)
4736{
4737  return *(__m512i *) __P;
4738}
4739
4740static __inline __m512i __DEFAULT_FN_ATTRS
4741_mm512_load_epi32 (void const *__P)
4742{
4743  return *(__m512i *) __P;
4744}
4745
4746static __inline __m512i __DEFAULT_FN_ATTRS
4747_mm512_load_epi64 (void const *__P)
4748{
4749  return *(__m512i *) __P;
4750}
4751
4752/* SIMD store ops */
4753
4754static __inline void __DEFAULT_FN_ATTRS
4755_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4756{
4757  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4758                                     (__mmask8) __U);
4759}
4760
4761static __inline void __DEFAULT_FN_ATTRS
4762_mm512_storeu_si512 (void *__P, __m512i __A)
4763{
4764  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4765            (__mmask16) -1);
4766}
4767
4768static __inline void __DEFAULT_FN_ATTRS
4769_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4770{
4771  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4772                                     (__mmask16) __U);
4773}
4774
4775static __inline void __DEFAULT_FN_ATTRS
4776_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4777{
4778  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4779}
4780
4781static __inline void __DEFAULT_FN_ATTRS
4782_mm512_storeu_pd(void *__P, __m512d __A)
4783{
4784  __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4785}
4786
4787static __inline void __DEFAULT_FN_ATTRS
4788_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4789{
4790  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4791                                   (__mmask16) __U);
4792}
4793
4794static __inline void __DEFAULT_FN_ATTRS
4795_mm512_storeu_ps(void *__P, __m512 __A)
4796{
4797  __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4798}
4799
4800static __inline void __DEFAULT_FN_ATTRS
4801_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4802{
4803  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4804}
4805
4806static __inline void __DEFAULT_FN_ATTRS
4807_mm512_store_pd(void *__P, __m512d __A)
4808{
4809  *(__m512d*)__P = __A;
4810}
4811
4812static __inline void __DEFAULT_FN_ATTRS
4813_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4814{
4815  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4816                                   (__mmask16) __U);
4817}
4818
4819static __inline void __DEFAULT_FN_ATTRS
4820_mm512_store_ps(void *__P, __m512 __A)
4821{
4822  *(__m512*)__P = __A;
4823}
4824
4825static __inline void __DEFAULT_FN_ATTRS
4826_mm512_store_si512 (void *__P, __m512i __A)
4827{
4828  *(__m512i *) __P = __A;
4829}
4830
4831static __inline void __DEFAULT_FN_ATTRS
4832_mm512_store_epi32 (void *__P, __m512i __A)
4833{
4834  *(__m512i *) __P = __A;
4835}
4836
4837static __inline void __DEFAULT_FN_ATTRS
4838_mm512_store_epi64 (void *__P, __m512i __A)
4839{
4840  *(__m512i *) __P = __A;
4841}
4842
4843/* Mask ops */
4844
4845static __inline __mmask16 __DEFAULT_FN_ATTRS
4846_mm512_knot(__mmask16 __M)
4847{
4848  return __builtin_ia32_knothi(__M);
4849}
4850
4851/* Integer compare */
4852
4853static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4854_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
4855  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4856                                                   (__mmask16)-1);
4857}
4858
4859static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4860_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4861  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4862                                                   __u);
4863}
4864
4865static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4866_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
4867  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4868                                                 (__mmask16)-1);
4869}
4870
4871static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4872_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4873  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4874                                                 __u);
4875}
4876
4877static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4878_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4879  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4880                                                  __u);
4881}
4882
4883static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4884_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
4885  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4886                                                  (__mmask8)-1);
4887}
4888
4889static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4890_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
4891  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4892                                                (__mmask8)-1);
4893}
4894
4895static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4896_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4897  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4898                                                __u);
4899}
4900
4901static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4902_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
4903  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4904                                                (__mmask16)-1);
4905}
4906
4907static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4908_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4909  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4910                                                __u);
4911}
4912
4913static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4914_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
4915  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4916                                                 (__mmask16)-1);
4917}
4918
4919static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4920_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4921  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4922                                                 __u);
4923}
4924
4925static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4926_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
4927  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4928                                               (__mmask8)-1);
4929}
4930
4931static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4932_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4933  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4934                                               __u);
4935}
4936
4937static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4938_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
4939  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4940                                                (__mmask8)-1);
4941}
4942
4943static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4944_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4945  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4946                                                __u);
4947}
4948
4949static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4950_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
4951  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4952                                                   (__mmask16)-1);
4953}
4954
4955static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4956_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4957  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4958                                                   __u);
4959}
4960
4961static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4962_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
4963  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4964                                                 (__mmask16)-1);
4965}
4966
4967static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4968_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4969  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4970                                                 __u);
4971}
4972
4973static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4974_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4975  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4976                                                  __u);
4977}
4978
4979static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4980_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
4981  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4982                                                  (__mmask8)-1);
4983}
4984
4985static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4986_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
4987  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4988                                                (__mmask8)-1);
4989}
4990
4991static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4992_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4993  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4994                                                __u);
4995}
4996
4997static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4998_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
4999  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5000                                                (__mmask16)-1);
5001}
5002
5003static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5004_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5005  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5006                                                __u);
5007}
5008
5009static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5010_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
5011  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5012                                                 (__mmask16)-1);
5013}
5014
5015static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5016_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5017  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5018                                                 __u);
5019}
5020
5021static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5022_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
5023  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5024                                               (__mmask8)-1);
5025}
5026
5027static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5028_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5029  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5030                                               __u);
5031}
5032
5033static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5034_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
5035  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5036                                                (__mmask8)-1);
5037}
5038
5039static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5040_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5041  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5042                                                __u);
5043}
5044
5045static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5046_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
5047  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5048                                                (__mmask16)-1);
5049}
5050
5051static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5052_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5053  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5054                                                __u);
5055}
5056
5057static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5058_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
5059  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5060                                                 (__mmask16)-1);
5061}
5062
5063static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5064_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5065  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5066                                                 __u);
5067}
5068
5069static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5070_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
5071  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5072                                               (__mmask8)-1);
5073}
5074
5075static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5076_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5077  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5078                                               __u);
5079}
5080
5081static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5082_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
5083  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5084                                                (__mmask8)-1);
5085}
5086
5087static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5088_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5089  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5090                                                __u);
5091}
5092
5093static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5094_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
5095  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5096                                                (__mmask16)-1);
5097}
5098
5099static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5100_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5101  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5102                                                __u);
5103}
5104
5105static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5106_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
5107  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5108                                                 (__mmask16)-1);
5109}
5110
5111static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5112_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5113  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5114                                                 __u);
5115}
5116
5117static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5118_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
5119  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5120                                               (__mmask8)-1);
5121}
5122
5123static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5124_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5125  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5126                                               __u);
5127}
5128
5129static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5130_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
5131  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5132                                                (__mmask8)-1);
5133}
5134
5135static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5136_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5137  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5138                                                __u);
5139}
5140
5141static __inline__ __m512i __DEFAULT_FN_ATTRS
5142_mm512_cvtepi8_epi32(__m128i __A)
5143{
5144  /* This function always performs a signed extension, but __v16qi is a char
5145     which may be signed or unsigned, so use __v16qs. */
5146  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
5147}
5148
5149static __inline__ __m512i __DEFAULT_FN_ATTRS
5150_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5151{
5152  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5153                                             (__v16si)_mm512_cvtepi8_epi32(__A),
5154                                             (__v16si)__W);
5155}
5156
5157static __inline__ __m512i __DEFAULT_FN_ATTRS
5158_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
5159{
5160  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5161                                             (__v16si)_mm512_cvtepi8_epi32(__A),
5162                                             (__v16si)_mm512_setzero_si512());
5163}
5164
5165static __inline__ __m512i __DEFAULT_FN_ATTRS
5166_mm512_cvtepi8_epi64(__m128i __A)
5167{
5168  /* This function always performs a signed extension, but __v16qi is a char
5169     which may be signed or unsigned, so use __v16qs. */
5170  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5171}
5172
5173static __inline__ __m512i __DEFAULT_FN_ATTRS
5174_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5175{
5176  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5177                                             (__v8di)_mm512_cvtepi8_epi64(__A),
5178                                             (__v8di)__W);
5179}
5180
5181static __inline__ __m512i __DEFAULT_FN_ATTRS
5182_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
5183{
5184  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5185                                             (__v8di)_mm512_cvtepi8_epi64(__A),
5186                                             (__v8di)_mm512_setzero_si512 ());
5187}
5188
5189static __inline__ __m512i __DEFAULT_FN_ATTRS
5190_mm512_cvtepi32_epi64(__m256i __X)
5191{
5192  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
5193}
5194
5195static __inline__ __m512i __DEFAULT_FN_ATTRS
5196_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5197{
5198  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5199                                             (__v8di)_mm512_cvtepi32_epi64(__X),
5200                                             (__v8di)__W);
5201}
5202
5203static __inline__ __m512i __DEFAULT_FN_ATTRS
5204_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
5205{
5206  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5207                                             (__v8di)_mm512_cvtepi32_epi64(__X),
5208                                             (__v8di)_mm512_setzero_si512());
5209}
5210
5211static __inline__ __m512i __DEFAULT_FN_ATTRS
5212_mm512_cvtepi16_epi32(__m256i __A)
5213{
5214  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
5215}
5216
5217static __inline__ __m512i __DEFAULT_FN_ATTRS
5218_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5219{
5220  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5221                                            (__v16si)_mm512_cvtepi16_epi32(__A),
5222                                            (__v16si)__W);
5223}
5224
5225static __inline__ __m512i __DEFAULT_FN_ATTRS
5226_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5227{
5228  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5229                                            (__v16si)_mm512_cvtepi16_epi32(__A),
5230                                            (__v16si)_mm512_setzero_si512 ());
5231}
5232
5233static __inline__ __m512i __DEFAULT_FN_ATTRS
5234_mm512_cvtepi16_epi64(__m128i __A)
5235{
5236  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5237}
5238
5239static __inline__ __m512i __DEFAULT_FN_ATTRS
5240_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5241{
5242  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5243                                             (__v8di)_mm512_cvtepi16_epi64(__A),
5244                                             (__v8di)__W);
5245}
5246
5247static __inline__ __m512i __DEFAULT_FN_ATTRS
5248_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5249{
5250  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5251                                             (__v8di)_mm512_cvtepi16_epi64(__A),
5252                                             (__v8di)_mm512_setzero_si512());
5253}
5254
5255static __inline__ __m512i __DEFAULT_FN_ATTRS
5256_mm512_cvtepu8_epi32(__m128i __A)
5257{
5258  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5259}
5260
5261static __inline__ __m512i __DEFAULT_FN_ATTRS
5262_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5263{
5264  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5265                                             (__v16si)_mm512_cvtepu8_epi32(__A),
5266                                             (__v16si)__W);
5267}
5268
5269static __inline__ __m512i __DEFAULT_FN_ATTRS
5270_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5271{
5272  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5273                                             (__v16si)_mm512_cvtepu8_epi32(__A),
5274                                             (__v16si)_mm512_setzero_si512());
5275}
5276
5277static __inline__ __m512i __DEFAULT_FN_ATTRS
5278_mm512_cvtepu8_epi64(__m128i __A)
5279{
5280  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5281}
5282
5283static __inline__ __m512i __DEFAULT_FN_ATTRS
5284_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5285{
5286  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5287                                             (__v8di)_mm512_cvtepu8_epi64(__A),
5288                                             (__v8di)__W);
5289}
5290
5291static __inline__ __m512i __DEFAULT_FN_ATTRS
5292_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5293{
5294  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5295                                             (__v8di)_mm512_cvtepu8_epi64(__A),
5296                                             (__v8di)_mm512_setzero_si512());
5297}
5298
5299static __inline__ __m512i __DEFAULT_FN_ATTRS
5300_mm512_cvtepu32_epi64(__m256i __X)
5301{
5302  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5303}
5304
5305static __inline__ __m512i __DEFAULT_FN_ATTRS
5306_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5307{
5308  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5309                                             (__v8di)_mm512_cvtepu32_epi64(__X),
5310                                             (__v8di)__W);
5311}
5312
5313static __inline__ __m512i __DEFAULT_FN_ATTRS
5314_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5315{
5316  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5317                                             (__v8di)_mm512_cvtepu32_epi64(__X),
5318                                             (__v8di)_mm512_setzero_si512());
5319}
5320
5321static __inline__ __m512i __DEFAULT_FN_ATTRS
5322_mm512_cvtepu16_epi32(__m256i __A)
5323{
5324  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5325}
5326
5327static __inline__ __m512i __DEFAULT_FN_ATTRS
5328_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5329{
5330  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5331                                            (__v16si)_mm512_cvtepu16_epi32(__A),
5332                                            (__v16si)__W);
5333}
5334
5335static __inline__ __m512i __DEFAULT_FN_ATTRS
5336_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5337{
5338  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5339                                            (__v16si)_mm512_cvtepu16_epi32(__A),
5340                                            (__v16si)_mm512_setzero_si512());
5341}
5342
5343static __inline__ __m512i __DEFAULT_FN_ATTRS
5344_mm512_cvtepu16_epi64(__m128i __A)
5345{
5346  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5347}
5348
5349static __inline__ __m512i __DEFAULT_FN_ATTRS
5350_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5351{
5352  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5353                                             (__v8di)_mm512_cvtepu16_epi64(__A),
5354                                             (__v8di)__W);
5355}
5356
5357static __inline__ __m512i __DEFAULT_FN_ATTRS
5358_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5359{
5360  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5361                                             (__v8di)_mm512_cvtepu16_epi64(__A),
5362                                             (__v8di)_mm512_setzero_si512());
5363}
5364
5365static __inline__ __m512i __DEFAULT_FN_ATTRS
5366_mm512_rorv_epi32 (__m512i __A, __m512i __B)
5367{
5368  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5369              (__v16si) __B,
5370              (__v16si)
5371              _mm512_setzero_si512 (),
5372              (__mmask16) -1);
5373}
5374
5375static __inline__ __m512i __DEFAULT_FN_ATTRS
5376_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5377{
5378  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5379              (__v16si) __B,
5380              (__v16si) __W,
5381              (__mmask16) __U);
5382}
5383
5384static __inline__ __m512i __DEFAULT_FN_ATTRS
5385_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5386{
5387  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5388              (__v16si) __B,
5389              (__v16si)
5390              _mm512_setzero_si512 (),
5391              (__mmask16) __U);
5392}
5393
5394static __inline__ __m512i __DEFAULT_FN_ATTRS
5395_mm512_rorv_epi64 (__m512i __A, __m512i __B)
5396{
5397  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5398              (__v8di) __B,
5399              (__v8di)
5400              _mm512_setzero_si512 (),
5401              (__mmask8) -1);
5402}
5403
5404static __inline__ __m512i __DEFAULT_FN_ATTRS
5405_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5406{
5407  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5408              (__v8di) __B,
5409              (__v8di) __W,
5410              (__mmask8) __U);
5411}
5412
5413static __inline__ __m512i __DEFAULT_FN_ATTRS
5414_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5415{
5416  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5417              (__v8di) __B,
5418              (__v8di)
5419              _mm512_setzero_si512 (),
5420              (__mmask8) __U);
5421}
5422
5423
5424
5425#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5426  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5427                                         (__v16si)(__m512i)(b), (int)(p), \
5428                                         (__mmask16)-1); })
5429
5430#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5431  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5432                                          (__v16si)(__m512i)(b), (int)(p), \
5433                                          (__mmask16)-1); })
5434
5435#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5436  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5437                                        (__v8di)(__m512i)(b), (int)(p), \
5438                                        (__mmask8)-1); })
5439
5440#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5441  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5442                                         (__v8di)(__m512i)(b), (int)(p), \
5443                                         (__mmask8)-1); })
5444
5445#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5446  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5447                                         (__v16si)(__m512i)(b), (int)(p), \
5448                                         (__mmask16)(m)); })
5449
5450#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5451  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5452                                          (__v16si)(__m512i)(b), (int)(p), \
5453                                          (__mmask16)(m)); })
5454
5455#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5456  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5457                                        (__v8di)(__m512i)(b), (int)(p), \
5458                                        (__mmask8)(m)); })
5459
5460#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5461  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5462                                         (__v8di)(__m512i)(b), (int)(p), \
5463                                         (__mmask8)(m)); })
5464
5465#define _mm512_rol_epi32(a, b) __extension__ ({ \
5466  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5467                                        (__v16si)_mm512_setzero_si512(), \
5468                                        (__mmask16)-1); })
5469
5470#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5471  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5472                                        (__v16si)(__m512i)(W), \
5473                                        (__mmask16)(U)); })
5474
5475#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5476  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5477                                        (__v16si)_mm512_setzero_si512(), \
5478                                        (__mmask16)(U)); })
5479
5480#define _mm512_rol_epi64(a, b) __extension__ ({ \
5481  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5482                                        (__v8di)_mm512_setzero_si512(), \
5483                                        (__mmask8)-1); })
5484
5485#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5486  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5487                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
5488
5489#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5490  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5491                                        (__v8di)_mm512_setzero_si512(), \
5492                                        (__mmask8)(U)); })
5493static __inline__ __m512i __DEFAULT_FN_ATTRS
5494_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5495{
5496  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5497              (__v16si) __B,
5498              (__v16si)
5499              _mm512_setzero_si512 (),
5500              (__mmask16) -1);
5501}
5502
5503static __inline__ __m512i __DEFAULT_FN_ATTRS
5504_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5505{
5506  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5507              (__v16si) __B,
5508              (__v16si) __W,
5509              (__mmask16) __U);
5510}
5511
5512static __inline__ __m512i __DEFAULT_FN_ATTRS
5513_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5514{
5515  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5516              (__v16si) __B,
5517              (__v16si)
5518              _mm512_setzero_si512 (),
5519              (__mmask16) __U);
5520}
5521
5522static __inline__ __m512i __DEFAULT_FN_ATTRS
5523_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5524{
5525  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5526              (__v8di) __B,
5527              (__v8di)
5528              _mm512_setzero_si512 (),
5529              (__mmask8) -1);
5530}
5531
5532static __inline__ __m512i __DEFAULT_FN_ATTRS
5533_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5534{
5535  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5536              (__v8di) __B,
5537              (__v8di) __W,
5538              (__mmask8) __U);
5539}
5540
5541static __inline__ __m512i __DEFAULT_FN_ATTRS
5542_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5543{
5544  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5545              (__v8di) __B,
5546              (__v8di)
5547              _mm512_setzero_si512 (),
5548              (__mmask8) __U);
5549}
5550
5551#define _mm512_ror_epi32(A, B) __extension__ ({ \
5552  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5553                                        (__v16si)_mm512_setzero_si512(), \
5554                                        (__mmask16)-1); })
5555
5556#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5557  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5558                                        (__v16si)(__m512i)(W), \
5559                                        (__mmask16)(U)); })
5560
5561#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5562  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5563                                        (__v16si)_mm512_setzero_si512(), \
5564                                        (__mmask16)(U)); })
5565
5566#define _mm512_ror_epi64(A, B) __extension__ ({ \
5567  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5568                                        (__v8di)_mm512_setzero_si512(), \
5569                                        (__mmask8)-1); })
5570
5571#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5572  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5573                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
5574
5575#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5576  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5577                                        (__v8di)_mm512_setzero_si512(), \
5578                                        (__mmask8)(U)); })
5579
5580static __inline__ __m512i __DEFAULT_FN_ATTRS
5581_mm512_slli_epi32(__m512i __A, int __B)
5582{
5583  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5584}
5585
5586static __inline__ __m512i __DEFAULT_FN_ATTRS
5587_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5588{
5589  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5590                                         (__v16si)_mm512_slli_epi32(__A, __B),
5591                                         (__v16si)__W);
5592}
5593
5594static __inline__ __m512i __DEFAULT_FN_ATTRS
5595_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5596  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5597                                         (__v16si)_mm512_slli_epi32(__A, __B),
5598                                         (__v16si)_mm512_setzero_si512());
5599}
5600
5601static __inline__ __m512i __DEFAULT_FN_ATTRS
5602_mm512_slli_epi64(__m512i __A, int __B)
5603{
5604  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5605}
5606
5607static __inline__ __m512i __DEFAULT_FN_ATTRS
5608_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5609{
5610  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5611                                          (__v8di)_mm512_slli_epi64(__A, __B),
5612                                          (__v8di)__W);
5613}
5614
5615static __inline__ __m512i __DEFAULT_FN_ATTRS
5616_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5617{
5618  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5619                                          (__v8di)_mm512_slli_epi64(__A, __B),
5620                                          (__v8di)_mm512_setzero_si512());
5621}
5622
5623static __inline__ __m512i __DEFAULT_FN_ATTRS
5624_mm512_srli_epi32(__m512i __A, int __B)
5625{
5626  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5627}
5628
5629static __inline__ __m512i __DEFAULT_FN_ATTRS
5630_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5631{
5632  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5633                                         (__v16si)_mm512_srli_epi32(__A, __B),
5634                                         (__v16si)__W);
5635}
5636
5637static __inline__ __m512i __DEFAULT_FN_ATTRS
5638_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5639  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5640                                         (__v16si)_mm512_srli_epi32(__A, __B),
5641                                         (__v16si)_mm512_setzero_si512());
5642}
5643
5644static __inline__ __m512i __DEFAULT_FN_ATTRS
5645_mm512_srli_epi64(__m512i __A, int __B)
5646{
5647  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5648}
5649
5650static __inline__ __m512i __DEFAULT_FN_ATTRS
5651_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5652{
5653  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5654                                          (__v8di)_mm512_srli_epi64(__A, __B),
5655                                          (__v8di)__W);
5656}
5657
5658static __inline__ __m512i __DEFAULT_FN_ATTRS
5659_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5660{
5661  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5662                                          (__v8di)_mm512_srli_epi64(__A, __B),
5663                                          (__v8di)_mm512_setzero_si512());
5664}
5665
5666static __inline__ __m512i __DEFAULT_FN_ATTRS
5667_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5668{
5669  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5670              (__v16si) __W,
5671              (__mmask16) __U);
5672}
5673
5674static __inline__ __m512i __DEFAULT_FN_ATTRS
5675_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5676{
5677  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5678              (__v16si)
5679              _mm512_setzero_si512 (),
5680              (__mmask16) __U);
5681}
5682
5683static __inline__ void __DEFAULT_FN_ATTRS
5684_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5685{
5686  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5687          (__mmask16) __U);
5688}
5689
5690static __inline__ __m512i __DEFAULT_FN_ATTRS
5691_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5692{
5693  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5694                 (__v16si) __A,
5695                 (__v16si) __W);
5696}
5697
5698static __inline__ __m512i __DEFAULT_FN_ATTRS
5699_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5700{
5701  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5702                 (__v16si) __A,
5703                 (__v16si) _mm512_setzero_si512 ());
5704}
5705
5706static __inline__ __m512i __DEFAULT_FN_ATTRS
5707_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5708{
5709  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5710                 (__v8di) __A,
5711                 (__v8di) __W);
5712}
5713
5714static __inline__ __m512i __DEFAULT_FN_ATTRS
5715_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5716{
5717  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5718                 (__v8di) __A,
5719                 (__v8di) _mm512_setzero_si512 ());
5720}
5721
5722static __inline__ __m512i __DEFAULT_FN_ATTRS
5723_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5724{
5725  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5726              (__v8di) __W,
5727              (__mmask8) __U);
5728}
5729
5730static __inline__ __m512i __DEFAULT_FN_ATTRS
5731_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5732{
5733  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5734              (__v8di)
5735              _mm512_setzero_si512 (),
5736              (__mmask8) __U);
5737}
5738
5739static __inline__ void __DEFAULT_FN_ATTRS
5740_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5741{
5742  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5743          (__mmask8) __U);
5744}
5745
5746static __inline__ __m512d __DEFAULT_FN_ATTRS
5747_mm512_movedup_pd (__m512d __A)
5748{
5749  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5750                                          0, 0, 2, 2, 4, 4, 6, 6);
5751}
5752
5753static __inline__ __m512d __DEFAULT_FN_ATTRS
5754_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5755{
5756  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5757                                              (__v8df)_mm512_movedup_pd(__A),
5758                                              (__v8df)__W);
5759}
5760
5761static __inline__ __m512d __DEFAULT_FN_ATTRS
5762_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5763{
5764  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5765                                              (__v8df)_mm512_movedup_pd(__A),
5766                                              (__v8df)_mm512_setzero_pd());
5767}
5768
5769#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5770  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5771                                             (__v8df)(__m512d)(B), \
5772                                             (__v8di)(__m512i)(C), (int)(imm), \
5773                                             (__mmask8)-1, (int)(R)); })
5774
5775#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5776  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5777                                             (__v8df)(__m512d)(B), \
5778                                             (__v8di)(__m512i)(C), (int)(imm), \
5779                                             (__mmask8)(U), (int)(R)); })
5780
5781#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5782  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5783                                             (__v8df)(__m512d)(B), \
5784                                             (__v8di)(__m512i)(C), (int)(imm), \
5785                                             (__mmask8)-1, \
5786                                             _MM_FROUND_CUR_DIRECTION); })
5787
5788#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5789  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5790                                             (__v8df)(__m512d)(B), \
5791                                             (__v8di)(__m512i)(C), (int)(imm), \
5792                                             (__mmask8)(U), \
5793                                             _MM_FROUND_CUR_DIRECTION); })
5794
5795#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5796  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5797                                              (__v8df)(__m512d)(B), \
5798                                              (__v8di)(__m512i)(C), \
5799                                              (int)(imm), (__mmask8)(U), \
5800                                              (int)(R)); })
5801
5802#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5803  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5804                                              (__v8df)(__m512d)(B), \
5805                                              (__v8di)(__m512i)(C), \
5806                                              (int)(imm), (__mmask8)(U), \
5807                                              _MM_FROUND_CUR_DIRECTION); })
5808
5809#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5810  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5811                                            (__v16sf)(__m512)(B), \
5812                                            (__v16si)(__m512i)(C), (int)(imm), \
5813                                            (__mmask16)-1, (int)(R)); })
5814
5815#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5816  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5817                                            (__v16sf)(__m512)(B), \
5818                                            (__v16si)(__m512i)(C), (int)(imm), \
5819                                            (__mmask16)(U), (int)(R)); })
5820
5821#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5822  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5823                                            (__v16sf)(__m512)(B), \
5824                                            (__v16si)(__m512i)(C), (int)(imm), \
5825                                            (__mmask16)-1, \
5826                                            _MM_FROUND_CUR_DIRECTION); })
5827
5828#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5829  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5830                                            (__v16sf)(__m512)(B), \
5831                                            (__v16si)(__m512i)(C), (int)(imm), \
5832                                            (__mmask16)(U), \
5833                                            _MM_FROUND_CUR_DIRECTION); })
5834
5835#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5836  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5837                                             (__v16sf)(__m512)(B), \
5838                                             (__v16si)(__m512i)(C), \
5839                                             (int)(imm), (__mmask16)(U), \
5840                                             (int)(R)); })
5841
5842#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5843  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5844                                             (__v16sf)(__m512)(B), \
5845                                             (__v16si)(__m512i)(C), \
5846                                             (int)(imm), (__mmask16)(U), \
5847                                             _MM_FROUND_CUR_DIRECTION); })
5848
5849#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5850  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5851                                          (__v2df)(__m128d)(B), \
5852                                          (__v2di)(__m128i)(C), (int)(imm), \
5853                                          (__mmask8)-1, (int)(R)); })
5854
5855#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5856  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5857                                          (__v2df)(__m128d)(B), \
5858                                          (__v2di)(__m128i)(C), (int)(imm), \
5859                                          (__mmask8)(U), (int)(R)); })
5860
5861#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5862  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5863                                          (__v2df)(__m128d)(B), \
5864                                          (__v2di)(__m128i)(C), (int)(imm), \
5865                                          (__mmask8)-1, \
5866                                          _MM_FROUND_CUR_DIRECTION); })
5867
5868#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5869  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5870                                          (__v2df)(__m128d)(B), \
5871                                          (__v2di)(__m128i)(C), (int)(imm), \
5872                                          (__mmask8)(U), \
5873                                          _MM_FROUND_CUR_DIRECTION); })
5874
5875#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5876  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5877                                           (__v2df)(__m128d)(B), \
5878                                           (__v2di)(__m128i)(C), (int)(imm), \
5879                                           (__mmask8)(U), (int)(R)); })
5880
5881#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5882  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5883                                           (__v2df)(__m128d)(B), \
5884                                           (__v2di)(__m128i)(C), (int)(imm), \
5885                                           (__mmask8)(U), \
5886                                           _MM_FROUND_CUR_DIRECTION); })
5887
5888#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5889  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5890                                         (__v4sf)(__m128)(B), \
5891                                         (__v4si)(__m128i)(C), (int)(imm), \
5892                                         (__mmask8)-1, (int)(R)); })
5893
5894#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5895  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5896                                         (__v4sf)(__m128)(B), \
5897                                         (__v4si)(__m128i)(C), (int)(imm), \
5898                                         (__mmask8)(U), (int)(R)); })
5899
5900#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5901  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5902                                         (__v4sf)(__m128)(B), \
5903                                         (__v4si)(__m128i)(C), (int)(imm), \
5904                                         (__mmask8)-1, \
5905                                         _MM_FROUND_CUR_DIRECTION); })
5906
5907#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5908  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5909                                         (__v4sf)(__m128)(B), \
5910                                         (__v4si)(__m128i)(C), (int)(imm), \
5911                                         (__mmask8)(U), \
5912                                         _MM_FROUND_CUR_DIRECTION); })
5913
5914#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5915  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5916                                          (__v4sf)(__m128)(B), \
5917                                          (__v4si)(__m128i)(C), (int)(imm), \
5918                                          (__mmask8)(U), (int)(R)); })
5919
5920#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5921  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5922                                          (__v4sf)(__m128)(B), \
5923                                          (__v4si)(__m128i)(C), (int)(imm), \
5924                                          (__mmask8)(U), \
5925                                          _MM_FROUND_CUR_DIRECTION); })
5926
5927#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5928  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5929                                                 (__v2df)(__m128d)(B), \
5930                                                 (__v2df)_mm_setzero_pd(), \
5931                                                 (__mmask8)-1, (int)(R)); })
5932
5933
5934static __inline__ __m128d __DEFAULT_FN_ATTRS
5935_mm_getexp_sd (__m128d __A, __m128d __B)
5936{
5937  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5938                 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5939}
5940
5941static __inline__ __m128d __DEFAULT_FN_ATTRS
5942_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5943{
5944 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5945          (__v2df) __B,
5946          (__v2df) __W,
5947          (__mmask8) __U,
5948          _MM_FROUND_CUR_DIRECTION);
5949}
5950
5951#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5952  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5953                                                 (__v2df)(__m128d)(B), \
5954                                                 (__v2df)(__m128d)(W), \
5955                                                 (__mmask8)(U), (int)(R)); })
5956
5957static __inline__ __m128d __DEFAULT_FN_ATTRS
5958_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5959{
5960 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5961          (__v2df) __B,
5962          (__v2df) _mm_setzero_pd (),
5963          (__mmask8) __U,
5964          _MM_FROUND_CUR_DIRECTION);
5965}
5966
5967#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5968  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5969                                                 (__v2df)(__m128d)(B), \
5970                                                 (__v2df)_mm_setzero_pd(), \
5971                                                 (__mmask8)(U), (int)(R)); })
5972
5973#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5974  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5975                                                (__v4sf)(__m128)(B), \
5976                                                (__v4sf)_mm_setzero_ps(), \
5977                                                (__mmask8)-1, (int)(R)); })
5978
5979static __inline__ __m128 __DEFAULT_FN_ATTRS
5980_mm_getexp_ss (__m128 __A, __m128 __B)
5981{
5982  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5983                (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5984}
5985
5986static __inline__ __m128 __DEFAULT_FN_ATTRS
5987_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5988{
5989 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5990          (__v4sf) __B,
5991          (__v4sf) __W,
5992          (__mmask8) __U,
5993          _MM_FROUND_CUR_DIRECTION);
5994}
5995
5996#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5997  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5998                                                (__v4sf)(__m128)(B), \
5999                                                (__v4sf)(__m128)(W), \
6000                                                (__mmask8)(U), (int)(R)); })
6001
6002static __inline__ __m128 __DEFAULT_FN_ATTRS
6003_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
6004{
6005 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
6006          (__v4sf) __B,
6007          (__v4sf) _mm_setzero_pd (),
6008          (__mmask8) __U,
6009          _MM_FROUND_CUR_DIRECTION);
6010}
6011
6012#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
6013  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
6014                                                (__v4sf)(__m128)(B), \
6015                                                (__v4sf)_mm_setzero_ps(), \
6016                                                (__mmask8)(U), (int)(R)); })
6017
6018#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
6019  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6020                                               (__v2df)(__m128d)(B), \
6021                                               (int)(((D)<<2) | (C)), \
6022                                               (__v2df)_mm_setzero_pd(), \
6023                                               (__mmask8)-1, (int)(R)); })
6024
6025#define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
6026  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6027                                               (__v2df)(__m128d)(B), \
6028                                               (int)(((D)<<2) | (C)), \
6029                                               (__v2df)_mm_setzero_pd(), \
6030                                               (__mmask8)-1, \
6031                                               _MM_FROUND_CUR_DIRECTION); })
6032
6033#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
6034  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6035                                               (__v2df)(__m128d)(B), \
6036                                               (int)(((D)<<2) | (C)), \
6037                                               (__v2df)(__m128d)(W), \
6038                                               (__mmask8)(U), \
6039                                               _MM_FROUND_CUR_DIRECTION); })
6040
6041#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
6042  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6043                                               (__v2df)(__m128d)(B), \
6044                                               (int)(((D)<<2) | (C)), \
6045                                               (__v2df)(__m128d)(W), \
6046                                               (__mmask8)(U), (int)(R)); })
6047
6048#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
6049  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6050                                               (__v2df)(__m128d)(B), \
6051                                               (int)(((D)<<2) | (C)), \
6052                                               (__v2df)_mm_setzero_pd(), \
6053                                               (__mmask8)(U), \
6054                                               _MM_FROUND_CUR_DIRECTION); })
6055
6056#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
6057  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6058                                               (__v2df)(__m128d)(B), \
6059                                               (int)(((D)<<2) | (C)), \
6060                                               (__v2df)_mm_setzero_pd(), \
6061                                               (__mmask8)(U), (int)(R)); })
6062
6063#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
6064  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6065                                              (__v4sf)(__m128)(B), \
6066                                              (int)(((D)<<2) | (C)), \
6067                                              (__v4sf)_mm_setzero_ps(), \
6068                                              (__mmask8)-1, (int)(R)); })
6069
6070#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
6071  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6072                                              (__v4sf)(__m128)(B), \
6073                                              (int)(((D)<<2) | (C)), \
6074                                              (__v4sf)_mm_setzero_ps(), \
6075                                              (__mmask8)-1, \
6076                                              _MM_FROUND_CUR_DIRECTION); })
6077
6078#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
6079  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6080                                              (__v4sf)(__m128)(B), \
6081                                              (int)(((D)<<2) | (C)), \
6082                                              (__v4sf)(__m128)(W), \
6083                                              (__mmask8)(U), \
6084                                              _MM_FROUND_CUR_DIRECTION); })
6085
6086#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
6087  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6088                                              (__v4sf)(__m128)(B), \
6089                                              (int)(((D)<<2) | (C)), \
6090                                              (__v4sf)(__m128)(W), \
6091                                              (__mmask8)(U), (int)(R)); })
6092
6093#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
6094  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6095                                              (__v4sf)(__m128)(B), \
6096                                              (int)(((D)<<2) | (C)), \
6097                                              (__v4sf)_mm_setzero_pd(), \
6098                                              (__mmask8)(U), \
6099                                              _MM_FROUND_CUR_DIRECTION); })
6100
6101#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
6102  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6103                                              (__v4sf)(__m128)(B), \
6104                                              (int)(((D)<<2) | (C)), \
6105                                              (__v4sf)_mm_setzero_ps(), \
6106                                              (__mmask8)(U), (int)(R)); })
6107
6108static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6109_mm512_kmov (__mmask16 __A)
6110{
6111  return  __A;
6112}
6113
6114#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
6115  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
6116                              (int)(P), (int)(R)); })
6117
6118#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
6119  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
6120                              (int)(P), (int)(R)); })
6121
6122#ifdef __x86_64__
6123#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
6124  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6125#endif
6126
6127static __inline__ __m512i __DEFAULT_FN_ATTRS
6128_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6129         __mmask16 __U, __m512i __B)
6130{
6131  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6132                   (__v16si) __I
6133                   /* idx */ ,
6134                   (__v16si) __B,
6135                   (__mmask16) __U);
6136}
6137
6138static __inline__ __m512i __DEFAULT_FN_ATTRS
6139_mm512_sll_epi32(__m512i __A, __m128i __B)
6140{
6141  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
6142}
6143
6144static __inline__ __m512i __DEFAULT_FN_ATTRS
6145_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6146{
6147  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6148                                          (__v16si)_mm512_sll_epi32(__A, __B),
6149                                          (__v16si)__W);
6150}
6151
6152static __inline__ __m512i __DEFAULT_FN_ATTRS
6153_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6154{
6155  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6156                                          (__v16si)_mm512_sll_epi32(__A, __B),
6157                                          (__v16si)_mm512_setzero_si512());
6158}
6159
6160static __inline__ __m512i __DEFAULT_FN_ATTRS
6161_mm512_sll_epi64(__m512i __A, __m128i __B)
6162{
6163  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
6164}
6165
6166static __inline__ __m512i __DEFAULT_FN_ATTRS
6167_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6168{
6169  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6170                                             (__v8di)_mm512_sll_epi64(__A, __B),
6171                                             (__v8di)__W);
6172}
6173
6174static __inline__ __m512i __DEFAULT_FN_ATTRS
6175_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6176{
6177  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6178                                           (__v8di)_mm512_sll_epi64(__A, __B),
6179                                           (__v8di)_mm512_setzero_si512());
6180}
6181
6182static __inline__ __m512i __DEFAULT_FN_ATTRS
6183_mm512_sllv_epi32(__m512i __X, __m512i __Y)
6184{
6185  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
6186}
6187
6188static __inline__ __m512i __DEFAULT_FN_ATTRS
6189_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6190{
6191  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6192                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
6193                                           (__v16si)__W);
6194}
6195
6196static __inline__ __m512i __DEFAULT_FN_ATTRS
6197_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6198{
6199  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6200                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
6201                                           (__v16si)_mm512_setzero_si512());
6202}
6203
6204static __inline__ __m512i __DEFAULT_FN_ATTRS
6205_mm512_sllv_epi64(__m512i __X, __m512i __Y)
6206{
6207  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
6208}
6209
6210static __inline__ __m512i __DEFAULT_FN_ATTRS
6211_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6212{
6213  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6214                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
6215                                            (__v8di)__W);
6216}
6217
6218static __inline__ __m512i __DEFAULT_FN_ATTRS
6219_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6220{
6221  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6222                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
6223                                            (__v8di)_mm512_setzero_si512());
6224}
6225
6226static __inline__ __m512i __DEFAULT_FN_ATTRS
6227_mm512_sra_epi32(__m512i __A, __m128i __B)
6228{
6229  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6230}
6231
6232static __inline__ __m512i __DEFAULT_FN_ATTRS
6233_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6234{
6235  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6236                                          (__v16si)_mm512_sra_epi32(__A, __B),
6237                                          (__v16si)__W);
6238}
6239
6240static __inline__ __m512i __DEFAULT_FN_ATTRS
6241_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6242{
6243  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6244                                          (__v16si)_mm512_sra_epi32(__A, __B),
6245                                          (__v16si)_mm512_setzero_si512());
6246}
6247
6248static __inline__ __m512i __DEFAULT_FN_ATTRS
6249_mm512_sra_epi64(__m512i __A, __m128i __B)
6250{
6251  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6252}
6253
6254static __inline__ __m512i __DEFAULT_FN_ATTRS
6255_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6256{
6257  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6258                                           (__v8di)_mm512_sra_epi64(__A, __B),
6259                                           (__v8di)__W);
6260}
6261
6262static __inline__ __m512i __DEFAULT_FN_ATTRS
6263_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6264{
6265  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6266                                           (__v8di)_mm512_sra_epi64(__A, __B),
6267                                           (__v8di)_mm512_setzero_si512());
6268}
6269
6270static __inline__ __m512i __DEFAULT_FN_ATTRS
6271_mm512_srav_epi32(__m512i __X, __m512i __Y)
6272{
6273  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6274}
6275
6276static __inline__ __m512i __DEFAULT_FN_ATTRS
6277_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6278{
6279  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6280                                           (__v16si)_mm512_srav_epi32(__X, __Y),
6281                                           (__v16si)__W);
6282}
6283
6284static __inline__ __m512i __DEFAULT_FN_ATTRS
6285_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6286{
6287  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6288                                           (__v16si)_mm512_srav_epi32(__X, __Y),
6289                                           (__v16si)_mm512_setzero_si512());
6290}
6291
6292static __inline__ __m512i __DEFAULT_FN_ATTRS
6293_mm512_srav_epi64(__m512i __X, __m512i __Y)
6294{
6295  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6296}
6297
6298static __inline__ __m512i __DEFAULT_FN_ATTRS
6299_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6300{
6301  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6302                                            (__v8di)_mm512_srav_epi64(__X, __Y),
6303                                            (__v8di)__W);
6304}
6305
6306static __inline__ __m512i __DEFAULT_FN_ATTRS
6307_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6308{
6309  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6310                                            (__v8di)_mm512_srav_epi64(__X, __Y),
6311                                            (__v8di)_mm512_setzero_si512());
6312}
6313
6314static __inline__ __m512i __DEFAULT_FN_ATTRS
6315_mm512_srl_epi32(__m512i __A, __m128i __B)
6316{
6317  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6318}
6319
6320static __inline__ __m512i __DEFAULT_FN_ATTRS
6321_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6322{
6323  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6324                                          (__v16si)_mm512_srl_epi32(__A, __B),
6325                                          (__v16si)__W);
6326}
6327
6328static __inline__ __m512i __DEFAULT_FN_ATTRS
6329_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6330{
6331  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6332                                          (__v16si)_mm512_srl_epi32(__A, __B),
6333                                          (__v16si)_mm512_setzero_si512());
6334}
6335
6336static __inline__ __m512i __DEFAULT_FN_ATTRS
6337_mm512_srl_epi64(__m512i __A, __m128i __B)
6338{
6339  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6340}
6341
6342static __inline__ __m512i __DEFAULT_FN_ATTRS
6343_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6344{
6345  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6346                                           (__v8di)_mm512_srl_epi64(__A, __B),
6347                                           (__v8di)__W);
6348}
6349
6350static __inline__ __m512i __DEFAULT_FN_ATTRS
6351_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6352{
6353  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6354                                           (__v8di)_mm512_srl_epi64(__A, __B),
6355                                           (__v8di)_mm512_setzero_si512());
6356}
6357
6358static __inline__ __m512i __DEFAULT_FN_ATTRS
6359_mm512_srlv_epi32(__m512i __X, __m512i __Y)
6360{
6361  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6362}
6363
6364static __inline__ __m512i __DEFAULT_FN_ATTRS
6365_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6366{
6367  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6368                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
6369                                           (__v16si)__W);
6370}
6371
6372static __inline__ __m512i __DEFAULT_FN_ATTRS
6373_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6374{
6375  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6376                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
6377                                           (__v16si)_mm512_setzero_si512());
6378}
6379
6380static __inline__ __m512i __DEFAULT_FN_ATTRS
6381_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6382{
6383  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6384}
6385
6386static __inline__ __m512i __DEFAULT_FN_ATTRS
6387_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6388{
6389  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6390                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
6391                                            (__v8di)__W);
6392}
6393
6394static __inline__ __m512i __DEFAULT_FN_ATTRS
6395_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6396{
6397  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6398                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
6399                                            (__v8di)_mm512_setzero_si512());
6400}
6401
6402#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6403  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6404                                            (__v16si)(__m512i)(B), \
6405                                            (__v16si)(__m512i)(C), (int)(imm), \
6406                                            (__mmask16)-1); })
6407
6408#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6409  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6410                                            (__v16si)(__m512i)(B), \
6411                                            (__v16si)(__m512i)(C), (int)(imm), \
6412                                            (__mmask16)(U)); })
6413
6414#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6415  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6416                                             (__v16si)(__m512i)(B), \
6417                                             (__v16si)(__m512i)(C), \
6418                                             (int)(imm), (__mmask16)(U)); })
6419
6420#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6421  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6422                                            (__v8di)(__m512i)(B), \
6423                                            (__v8di)(__m512i)(C), (int)(imm), \
6424                                            (__mmask8)-1); })
6425
6426#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6427  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6428                                            (__v8di)(__m512i)(B), \
6429                                            (__v8di)(__m512i)(C), (int)(imm), \
6430                                            (__mmask8)(U)); })
6431
6432#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6433  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6434                                             (__v8di)(__m512i)(B), \
6435                                             (__v8di)(__m512i)(C), (int)(imm), \
6436                                             (__mmask8)(U)); })
6437
6438#ifdef __x86_64__
6439#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6440  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6441#endif
6442
6443#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6444  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6445
6446#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6447  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6448
6449#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6450  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6451
6452static __inline__ unsigned __DEFAULT_FN_ATTRS
6453_mm_cvtsd_u32 (__m128d __A)
6454{
6455  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6456             _MM_FROUND_CUR_DIRECTION);
6457}
6458
6459#ifdef __x86_64__
6460#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6461  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6462                                                  (int)(R)); })
6463
6464static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6465_mm_cvtsd_u64 (__m128d __A)
6466{
6467  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6468                 __A,
6469                 _MM_FROUND_CUR_DIRECTION);
6470}
6471#endif
6472
6473#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6474  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6475
6476#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6477  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6478
6479#ifdef __x86_64__
6480#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6481  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6482
6483#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6484  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6485#endif
6486
6487#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6488  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6489
6490static __inline__ unsigned __DEFAULT_FN_ATTRS
6491_mm_cvtss_u32 (__m128 __A)
6492{
6493  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6494             _MM_FROUND_CUR_DIRECTION);
6495}
6496
6497#ifdef __x86_64__
6498#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6499  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6500                                                  (int)(R)); })
6501
6502static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6503_mm_cvtss_u64 (__m128 __A)
6504{
6505  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6506                 __A,
6507                 _MM_FROUND_CUR_DIRECTION);
6508}
6509#endif
6510
6511#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6512  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6513
6514#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6515  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6516
6517static __inline__ int __DEFAULT_FN_ATTRS
6518_mm_cvttsd_i32 (__m128d __A)
6519{
6520  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6521              _MM_FROUND_CUR_DIRECTION);
6522}
6523
6524#ifdef __x86_64__
6525#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6526  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6527
6528#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6529  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6530
6531static __inline__ long long __DEFAULT_FN_ATTRS
6532_mm_cvttsd_i64 (__m128d __A)
6533{
6534  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6535              _MM_FROUND_CUR_DIRECTION);
6536}
6537#endif
6538
6539#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6540  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6541
6542static __inline__ unsigned __DEFAULT_FN_ATTRS
6543_mm_cvttsd_u32 (__m128d __A)
6544{
6545  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6546              _MM_FROUND_CUR_DIRECTION);
6547}
6548
6549#ifdef __x86_64__
6550#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6551  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6552                                                   (int)(R)); })
6553
6554static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6555_mm_cvttsd_u64 (__m128d __A)
6556{
6557  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6558                  __A,
6559                  _MM_FROUND_CUR_DIRECTION);
6560}
6561#endif
6562
6563#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6564  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6565
6566#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6567  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6568
6569static __inline__ int __DEFAULT_FN_ATTRS
6570_mm_cvttss_i32 (__m128 __A)
6571{
6572  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6573              _MM_FROUND_CUR_DIRECTION);
6574}
6575
6576#ifdef __x86_64__
6577#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6578  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6579
6580#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6581  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6582
6583static __inline__ long long __DEFAULT_FN_ATTRS
6584_mm_cvttss_i64 (__m128 __A)
6585{
6586  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6587              _MM_FROUND_CUR_DIRECTION);
6588}
6589#endif
6590
6591#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6592  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6593
6594static __inline__ unsigned __DEFAULT_FN_ATTRS
6595_mm_cvttss_u32 (__m128 __A)
6596{
6597  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6598              _MM_FROUND_CUR_DIRECTION);
6599}
6600
6601#ifdef __x86_64__
6602#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6603  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6604                                                   (int)(R)); })
6605
6606static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6607_mm_cvttss_u64 (__m128 __A)
6608{
6609  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6610                  __A,
6611                  _MM_FROUND_CUR_DIRECTION);
6612}
6613#endif
6614
6615static __inline__ __m512d __DEFAULT_FN_ATTRS
6616_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6617            __m512d __B)
6618{
6619  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6620              (__v8di) __I
6621              /* idx */ ,
6622              (__v8df) __B,
6623              (__mmask8) __U);
6624}
6625
6626static __inline__ __m512 __DEFAULT_FN_ATTRS
6627_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6628            __m512 __B)
6629{
6630  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6631                   (__v16si) __I
6632                   /* idx */ ,
6633                   (__v16sf) __B,
6634                   (__mmask16) __U);
6635}
6636
6637static __inline__ __m512i __DEFAULT_FN_ATTRS
6638_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6639         __mmask8 __U, __m512i __B)
6640{
6641  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6642                   (__v8di) __I
6643                   /* idx */ ,
6644                   (__v8di) __B,
6645                   (__mmask8) __U);
6646}
6647
6648#define _mm512_permute_pd(X, C) __extension__ ({ \
6649  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6650                                   (__v8df)_mm512_undefined_pd(), \
6651                                   0 + (((C) >> 0) & 0x1), \
6652                                   0 + (((C) >> 1) & 0x1), \
6653                                   2 + (((C) >> 2) & 0x1), \
6654                                   2 + (((C) >> 3) & 0x1), \
6655                                   4 + (((C) >> 4) & 0x1), \
6656                                   4 + (((C) >> 5) & 0x1), \
6657                                   6 + (((C) >> 6) & 0x1), \
6658                                   6 + (((C) >> 7) & 0x1)); })
6659
6660#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6661  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6662                                       (__v8df)_mm512_permute_pd((X), (C)), \
6663                                       (__v8df)(__m512d)(W)); })
6664
6665#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6666  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6667                                       (__v8df)_mm512_permute_pd((X), (C)), \
6668                                       (__v8df)_mm512_setzero_pd()); })
6669
6670#define _mm512_permute_ps(X, C) __extension__ ({ \
6671  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6672                                  (__v16sf)_mm512_undefined_ps(), \
6673                                   0  + (((C) >> 0) & 0x3), \
6674                                   0  + (((C) >> 2) & 0x3), \
6675                                   0  + (((C) >> 4) & 0x3), \
6676                                   0  + (((C) >> 6) & 0x3), \
6677                                   4  + (((C) >> 0) & 0x3), \
6678                                   4  + (((C) >> 2) & 0x3), \
6679                                   4  + (((C) >> 4) & 0x3), \
6680                                   4  + (((C) >> 6) & 0x3), \
6681                                   8  + (((C) >> 0) & 0x3), \
6682                                   8  + (((C) >> 2) & 0x3), \
6683                                   8  + (((C) >> 4) & 0x3), \
6684                                   8  + (((C) >> 6) & 0x3), \
6685                                   12 + (((C) >> 0) & 0x3), \
6686                                   12 + (((C) >> 2) & 0x3), \
6687                                   12 + (((C) >> 4) & 0x3), \
6688                                   12 + (((C) >> 6) & 0x3)); })
6689
6690#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6691  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6692                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6693                                      (__v16sf)(__m512)(W)); })
6694
6695#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6696  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6697                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6698                                      (__v16sf)_mm512_setzero_ps()); })
6699
6700static __inline__ __m512d __DEFAULT_FN_ATTRS
6701_mm512_permutevar_pd(__m512d __A, __m512i __C)
6702{
6703  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6704}
6705
6706static __inline__ __m512d __DEFAULT_FN_ATTRS
6707_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6708{
6709  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6710                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6711                                         (__v8df)__W);
6712}
6713
6714static __inline__ __m512d __DEFAULT_FN_ATTRS
6715_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6716{
6717  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6718                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6719                                         (__v8df)_mm512_setzero_pd());
6720}
6721
6722static __inline__ __m512 __DEFAULT_FN_ATTRS
6723_mm512_permutevar_ps(__m512 __A, __m512i __C)
6724{
6725  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6726}
6727
6728static __inline__ __m512 __DEFAULT_FN_ATTRS
6729_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6730{
6731  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6732                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6733                                        (__v16sf)__W);
6734}
6735
6736static __inline__ __m512 __DEFAULT_FN_ATTRS
6737_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6738{
6739  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6740                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6741                                        (__v16sf)_mm512_setzero_ps());
6742}
6743
6744static __inline __m512d __DEFAULT_FN_ATTRS
6745_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6746{
6747  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6748                    /* idx */ ,
6749                    (__v8df) __A,
6750                    (__v8df) __B,
6751                    (__mmask8) -1);
6752}
6753
6754static __inline__ __m512d __DEFAULT_FN_ATTRS
6755_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6756{
6757  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6758                    /* idx */ ,
6759                    (__v8df) __A,
6760                    (__v8df) __B,
6761                    (__mmask8) __U);
6762}
6763
6764static __inline__ __m512d __DEFAULT_FN_ATTRS
6765_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6766            __m512d __B)
6767{
6768  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6769                                                         /* idx */ ,
6770                                                         (__v8df) __A,
6771                                                         (__v8df) __B,
6772                                                         (__mmask8) __U);
6773}
6774
6775static __inline __m512 __DEFAULT_FN_ATTRS
6776_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6777{
6778  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6779                                                         /* idx */ ,
6780                                                         (__v16sf) __A,
6781                                                         (__v16sf) __B,
6782                                                         (__mmask16) -1);
6783}
6784
6785static __inline__ __m512 __DEFAULT_FN_ATTRS
6786_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6787{
6788  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6789                                                         /* idx */ ,
6790                                                         (__v16sf) __A,
6791                                                         (__v16sf) __B,
6792                                                         (__mmask16) __U);
6793}
6794
6795static __inline__ __m512 __DEFAULT_FN_ATTRS
6796_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6797            __m512 __B)
6798{
6799  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6800                                                        /* idx */ ,
6801                                                        (__v16sf) __A,
6802                                                        (__v16sf) __B,
6803                                                        (__mmask16) __U);
6804}
6805
6806static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6807_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
6808{
6809  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6810             (__v16si) __B,
6811             (__mmask16) -1);
6812}
6813
6814static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6815_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
6816{
6817  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6818             (__v16si) __B, __U);
6819}
6820
6821static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6822_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
6823{
6824  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6825            (__v8di) __B,
6826            (__mmask8) -1);
6827}
6828
6829static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6830_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
6831{
6832  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6833            (__v8di) __B, __U);
6834}
6835
6836#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6837  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6838                                             (__v8si)_mm256_undefined_si256(), \
6839                                             (__mmask8)-1, (int)(R)); })
6840
6841#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6842  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6843                                             (__v8si)(__m256i)(W), \
6844                                             (__mmask8)(U), (int)(R)); })
6845
6846#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6847  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6848                                             (__v8si)_mm256_setzero_si256(), \
6849                                             (__mmask8)(U), (int)(R)); })
6850
6851static __inline__ __m256i __DEFAULT_FN_ATTRS
6852_mm512_cvttpd_epu32 (__m512d __A)
6853{
6854  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6855                  (__v8si)
6856                  _mm256_undefined_si256 (),
6857                  (__mmask8) -1,
6858                  _MM_FROUND_CUR_DIRECTION);
6859}
6860
6861static __inline__ __m256i __DEFAULT_FN_ATTRS
6862_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6863{
6864  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6865                  (__v8si) __W,
6866                  (__mmask8) __U,
6867                  _MM_FROUND_CUR_DIRECTION);
6868}
6869
6870static __inline__ __m256i __DEFAULT_FN_ATTRS
6871_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6872{
6873  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6874                  (__v8si)
6875                  _mm256_setzero_si256 (),
6876                  (__mmask8) __U,
6877                  _MM_FROUND_CUR_DIRECTION);
6878}
6879
6880#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6881  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6882                                                (__v2df)(__m128d)(B), \
6883                                                (__v2df)_mm_setzero_pd(), \
6884                                                (__mmask8)-1, (int)(imm), \
6885                                                (int)(R)); })
6886
6887#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6888  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6889                                                (__v2df)(__m128d)(B), \
6890                                                (__v2df)_mm_setzero_pd(), \
6891                                                (__mmask8)-1, (int)(imm), \
6892                                                _MM_FROUND_CUR_DIRECTION); })
6893
6894#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6895  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6896                                                (__v2df)(__m128d)(B), \
6897                                                (__v2df)(__m128d)(W), \
6898                                                (__mmask8)(U), (int)(imm), \
6899                                                _MM_FROUND_CUR_DIRECTION); })
6900
6901#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6902  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6903                                                (__v2df)(__m128d)(B), \
6904                                                (__v2df)(__m128d)(W), \
6905                                                (__mmask8)(U), (int)(I), \
6906                                                (int)(R)); })
6907
6908#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6909  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6910                                                (__v2df)(__m128d)(B), \
6911                                                (__v2df)_mm_setzero_pd(), \
6912                                                (__mmask8)(U), (int)(I), \
6913                                                _MM_FROUND_CUR_DIRECTION); })
6914
6915#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6916  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6917                                                (__v2df)(__m128d)(B), \
6918                                                (__v2df)_mm_setzero_pd(), \
6919                                                (__mmask8)(U), (int)(I), \
6920                                                (int)(R)); })
6921
6922#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6923  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6924                                               (__v4sf)(__m128)(B), \
6925                                               (__v4sf)_mm_setzero_ps(), \
6926                                               (__mmask8)-1, (int)(imm), \
6927                                               (int)(R)); })
6928
6929#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6930  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6931                                               (__v4sf)(__m128)(B), \
6932                                               (__v4sf)_mm_setzero_ps(), \
6933                                               (__mmask8)-1, (int)(imm), \
6934                                               _MM_FROUND_CUR_DIRECTION); })
6935
6936#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6937  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6938                                               (__v4sf)(__m128)(B), \
6939                                               (__v4sf)(__m128)(W), \
6940                                               (__mmask8)(U), (int)(I), \
6941                                               _MM_FROUND_CUR_DIRECTION); })
6942
6943#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6944  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6945                                               (__v4sf)(__m128)(B), \
6946                                               (__v4sf)(__m128)(W), \
6947                                               (__mmask8)(U), (int)(I), \
6948                                               (int)(R)); })
6949
6950#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6951  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6952                                               (__v4sf)(__m128)(B), \
6953                                               (__v4sf)_mm_setzero_ps(), \
6954                                               (__mmask8)(U), (int)(I), \
6955                                               _MM_FROUND_CUR_DIRECTION); })
6956
6957#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6958  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6959                                               (__v4sf)(__m128)(B), \
6960                                               (__v4sf)_mm_setzero_ps(), \
6961                                               (__mmask8)(U), (int)(I), \
6962                                               (int)(R)); })
6963
6964#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6965  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6966                                           (__v8df)(__m512d)(B), \
6967                                           (__v8df)_mm512_undefined_pd(), \
6968                                           (__mmask8)-1, (int)(R)); })
6969
6970#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6971  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6972                                           (__v8df)(__m512d)(B), \
6973                                           (__v8df)(__m512d)(W), \
6974                                           (__mmask8)(U), (int)(R)); })
6975
6976#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6977  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6978                                           (__v8df)(__m512d)(B), \
6979                                           (__v8df)_mm512_setzero_pd(), \
6980                                           (__mmask8)(U), (int)(R)); })
6981
6982static __inline__ __m512d __DEFAULT_FN_ATTRS
6983_mm512_scalef_pd (__m512d __A, __m512d __B)
6984{
6985  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6986                (__v8df) __B,
6987                (__v8df)
6988                _mm512_undefined_pd (),
6989                (__mmask8) -1,
6990                _MM_FROUND_CUR_DIRECTION);
6991}
6992
6993static __inline__ __m512d __DEFAULT_FN_ATTRS
6994_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6995{
6996  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6997                (__v8df) __B,
6998                (__v8df) __W,
6999                (__mmask8) __U,
7000                _MM_FROUND_CUR_DIRECTION);
7001}
7002
7003static __inline__ __m512d __DEFAULT_FN_ATTRS
7004_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
7005{
7006  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
7007                (__v8df) __B,
7008                (__v8df)
7009                _mm512_setzero_pd (),
7010                (__mmask8) __U,
7011                _MM_FROUND_CUR_DIRECTION);
7012}
7013
7014#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
7015  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7016                                          (__v16sf)(__m512)(B), \
7017                                          (__v16sf)_mm512_undefined_ps(), \
7018                                          (__mmask16)-1, (int)(R)); })
7019
7020#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
7021  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7022                                          (__v16sf)(__m512)(B), \
7023                                          (__v16sf)(__m512)(W), \
7024                                          (__mmask16)(U), (int)(R)); })
7025
7026#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
7027  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7028                                          (__v16sf)(__m512)(B), \
7029                                          (__v16sf)_mm512_setzero_ps(), \
7030                                          (__mmask16)(U), (int)(R)); })
7031
7032static __inline__ __m512 __DEFAULT_FN_ATTRS
7033_mm512_scalef_ps (__m512 __A, __m512 __B)
7034{
7035  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7036               (__v16sf) __B,
7037               (__v16sf)
7038               _mm512_undefined_ps (),
7039               (__mmask16) -1,
7040               _MM_FROUND_CUR_DIRECTION);
7041}
7042
7043static __inline__ __m512 __DEFAULT_FN_ATTRS
7044_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7045{
7046  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7047               (__v16sf) __B,
7048               (__v16sf) __W,
7049               (__mmask16) __U,
7050               _MM_FROUND_CUR_DIRECTION);
7051}
7052
7053static __inline__ __m512 __DEFAULT_FN_ATTRS
7054_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
7055{
7056  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7057               (__v16sf) __B,
7058               (__v16sf)
7059               _mm512_setzero_ps (),
7060               (__mmask16) __U,
7061               _MM_FROUND_CUR_DIRECTION);
7062}
7063
7064#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
7065  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7066                                              (__v2df)(__m128d)(B), \
7067                                              (__v2df)_mm_setzero_pd(), \
7068                                              (__mmask8)-1, (int)(R)); })
7069
7070static __inline__ __m128d __DEFAULT_FN_ATTRS
7071_mm_scalef_sd (__m128d __A, __m128d __B)
7072{
7073  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
7074              (__v2df)( __B), (__v2df) _mm_setzero_pd(),
7075              (__mmask8) -1,
7076              _MM_FROUND_CUR_DIRECTION);
7077}
7078
7079static __inline__ __m128d __DEFAULT_FN_ATTRS
7080_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7081{
7082 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7083                 (__v2df) __B,
7084                (__v2df) __W,
7085                (__mmask8) __U,
7086                _MM_FROUND_CUR_DIRECTION);
7087}
7088
7089#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
7090  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7091                                              (__v2df)(__m128d)(B), \
7092                                              (__v2df)(__m128d)(W), \
7093                                              (__mmask8)(U), (int)(R)); })
7094
7095static __inline__ __m128d __DEFAULT_FN_ATTRS
7096_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
7097{
7098 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7099                 (__v2df) __B,
7100                (__v2df) _mm_setzero_pd (),
7101                (__mmask8) __U,
7102                _MM_FROUND_CUR_DIRECTION);
7103}
7104
7105#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
7106  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7107                                              (__v2df)(__m128d)(B), \
7108                                              (__v2df)_mm_setzero_pd(), \
7109                                              (__mmask8)(U), (int)(R)); })
7110
7111#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
7112  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7113                                             (__v4sf)(__m128)(B), \
7114                                             (__v4sf)_mm_setzero_ps(), \
7115                                             (__mmask8)-1, (int)(R)); })
7116
7117static __inline__ __m128 __DEFAULT_FN_ATTRS
7118_mm_scalef_ss (__m128 __A, __m128 __B)
7119{
7120  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
7121             (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
7122             (__mmask8) -1,
7123             _MM_FROUND_CUR_DIRECTION);
7124}
7125
7126static __inline__ __m128 __DEFAULT_FN_ATTRS
7127_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7128{
7129 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7130                (__v4sf) __B,
7131                (__v4sf) __W,
7132                (__mmask8) __U,
7133                _MM_FROUND_CUR_DIRECTION);
7134}
7135
7136#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
7137  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7138                                             (__v4sf)(__m128)(B), \
7139                                             (__v4sf)(__m128)(W), \
7140                                             (__mmask8)(U), (int)(R)); })
7141
7142static __inline__ __m128 __DEFAULT_FN_ATTRS
7143_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
7144{
7145 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7146                 (__v4sf) __B,
7147                (__v4sf) _mm_setzero_ps (),
7148                (__mmask8) __U,
7149                _MM_FROUND_CUR_DIRECTION);
7150}
7151
7152#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
7153  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7154                                             (__v4sf)(__m128)(B), \
7155                                             (__v4sf)_mm_setzero_ps(), \
7156                                             (__mmask8)(U), \
7157                                             _MM_FROUND_CUR_DIRECTION); })
7158
7159static __inline__ __m512i __DEFAULT_FN_ATTRS
7160_mm512_srai_epi32(__m512i __A, int __B)
7161{
7162  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
7163}
7164
7165static __inline__ __m512i __DEFAULT_FN_ATTRS
7166_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
7167{
7168  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7169                                         (__v16si)_mm512_srai_epi32(__A, __B), \
7170                                         (__v16si)__W);
7171}
7172
7173static __inline__ __m512i __DEFAULT_FN_ATTRS
7174_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
7175  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7176                                         (__v16si)_mm512_srai_epi32(__A, __B), \
7177                                         (__v16si)_mm512_setzero_si512());
7178}
7179
7180static __inline__ __m512i __DEFAULT_FN_ATTRS
7181_mm512_srai_epi64(__m512i __A, int __B)
7182{
7183  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
7184}
7185
7186static __inline__ __m512i __DEFAULT_FN_ATTRS
7187_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
7188{
7189  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7190                                          (__v8di)_mm512_srai_epi64(__A, __B), \
7191                                          (__v8di)__W);
7192}
7193
7194static __inline__ __m512i __DEFAULT_FN_ATTRS
7195_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
7196{
7197  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7198                                          (__v8di)_mm512_srai_epi64(__A, __B), \
7199                                          (__v8di)_mm512_setzero_si512());
7200}
7201
7202#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
7203  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7204                                         (__v16sf)(__m512)(B), (int)(imm), \
7205                                         (__v16sf)_mm512_undefined_ps(), \
7206                                         (__mmask16)-1); })
7207
7208#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7209  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7210                                         (__v16sf)(__m512)(B), (int)(imm), \
7211                                         (__v16sf)(__m512)(W), \
7212                                         (__mmask16)(U)); })
7213
7214#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7215  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7216                                         (__v16sf)(__m512)(B), (int)(imm), \
7217                                         (__v16sf)_mm512_setzero_ps(), \
7218                                         (__mmask16)(U)); })
7219
7220#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
7221  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7222                                          (__v8df)(__m512d)(B), (int)(imm), \
7223                                          (__v8df)_mm512_undefined_pd(), \
7224                                          (__mmask8)-1); })
7225
7226#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7227  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7228                                          (__v8df)(__m512d)(B), (int)(imm), \
7229                                          (__v8df)(__m512d)(W), \
7230                                          (__mmask8)(U)); })
7231
7232#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7233  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7234                                          (__v8df)(__m512d)(B), (int)(imm), \
7235                                          (__v8df)_mm512_setzero_pd(), \
7236                                          (__mmask8)(U)); })
7237
7238#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7239  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7240                                          (__v16si)(__m512i)(B), (int)(imm), \
7241                                          (__v16si)_mm512_setzero_si512(), \
7242                                          (__mmask16)-1); })
7243
7244#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7245  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7246                                          (__v16si)(__m512i)(B), (int)(imm), \
7247                                          (__v16si)(__m512i)(W), \
7248                                          (__mmask16)(U)); })
7249
7250#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7251  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7252                                          (__v16si)(__m512i)(B), (int)(imm), \
7253                                          (__v16si)_mm512_setzero_si512(), \
7254                                          (__mmask16)(U)); })
7255
7256#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7257  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7258                                          (__v8di)(__m512i)(B), (int)(imm), \
7259                                          (__v8di)_mm512_setzero_si512(), \
7260                                          (__mmask8)-1); })
7261
7262#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7263  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7264                                          (__v8di)(__m512i)(B), (int)(imm), \
7265                                          (__v8di)(__m512i)(W), \
7266                                          (__mmask8)(U)); })
7267
7268#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7269  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7270                                          (__v8di)(__m512i)(B), (int)(imm), \
7271                                          (__v8di)_mm512_setzero_si512(), \
7272                                          (__mmask8)(U)); })
7273
7274#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7275  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7276                                   (__v8df)(__m512d)(B), \
7277                                   0  + (((M) >> 0) & 0x1), \
7278                                   8  + (((M) >> 1) & 0x1), \
7279                                   2  + (((M) >> 2) & 0x1), \
7280                                   10 + (((M) >> 3) & 0x1), \
7281                                   4  + (((M) >> 4) & 0x1), \
7282                                   12 + (((M) >> 5) & 0x1), \
7283                                   6  + (((M) >> 6) & 0x1), \
7284                                   14 + (((M) >> 7) & 0x1)); })
7285
7286#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7287  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7288                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7289                                       (__v8df)(__m512d)(W)); })
7290
7291#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7292  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7293                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7294                                       (__v8df)_mm512_setzero_pd()); })
7295
7296#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7297  (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7298                                   (__v16sf)(__m512)(B), \
7299                                   0  + (((M) >> 0) & 0x3), \
7300                                   0  + (((M) >> 2) & 0x3), \
7301                                   16 + (((M) >> 4) & 0x3), \
7302                                   16 + (((M) >> 6) & 0x3), \
7303                                   4  + (((M) >> 0) & 0x3), \
7304                                   4  + (((M) >> 2) & 0x3), \
7305                                   20 + (((M) >> 4) & 0x3), \
7306                                   20 + (((M) >> 6) & 0x3), \
7307                                   8  + (((M) >> 0) & 0x3), \
7308                                   8  + (((M) >> 2) & 0x3), \
7309                                   24 + (((M) >> 4) & 0x3), \
7310                                   24 + (((M) >> 6) & 0x3), \
7311                                   12 + (((M) >> 0) & 0x3), \
7312                                   12 + (((M) >> 2) & 0x3), \
7313                                   28 + (((M) >> 4) & 0x3), \
7314                                   28 + (((M) >> 6) & 0x3)); })
7315
7316#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7317  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7318                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7319                                      (__v16sf)(__m512)(W)); })
7320
7321#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7322  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7323                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7324                                      (__v16sf)_mm512_setzero_ps()); })
7325
7326#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7327  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7328                                            (__v2df)(__m128d)(B), \
7329                                            (__v2df)_mm_setzero_pd(), \
7330                                            (__mmask8)-1, (int)(R)); })
7331
7332static __inline__ __m128d __DEFAULT_FN_ATTRS
7333_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7334{
7335 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7336                 (__v2df) __B,
7337                (__v2df) __W,
7338                (__mmask8) __U,
7339                _MM_FROUND_CUR_DIRECTION);
7340}
7341
7342#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7343  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7344                                            (__v2df)(__m128d)(B), \
7345                                            (__v2df)(__m128d)(W), \
7346                                            (__mmask8)(U), (int)(R)); })
7347
7348static __inline__ __m128d __DEFAULT_FN_ATTRS
7349_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7350{
7351 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7352                 (__v2df) __B,
7353                (__v2df) _mm_setzero_pd (),
7354                (__mmask8) __U,
7355                _MM_FROUND_CUR_DIRECTION);
7356}
7357
7358#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7359  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7360                                            (__v2df)(__m128d)(B), \
7361                                            (__v2df)_mm_setzero_pd(), \
7362                                            (__mmask8)(U), (int)(R)); })
7363
7364#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7365  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7366                                           (__v4sf)(__m128)(B), \
7367                                           (__v4sf)_mm_setzero_ps(), \
7368                                           (__mmask8)-1, (int)(R)); })
7369
7370static __inline__ __m128 __DEFAULT_FN_ATTRS
7371_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7372{
7373 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7374                 (__v4sf) __B,
7375                (__v4sf) __W,
7376                (__mmask8) __U,
7377                _MM_FROUND_CUR_DIRECTION);
7378}
7379
7380#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7381  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7382                                           (__v4sf)(__m128)(B), \
7383                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
7384                                           (int)(R)); })
7385
7386static __inline__ __m128 __DEFAULT_FN_ATTRS
7387_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7388{
7389 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7390                 (__v4sf) __B,
7391                (__v4sf) _mm_setzero_ps (),
7392                (__mmask8) __U,
7393                _MM_FROUND_CUR_DIRECTION);
7394}
7395
7396#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7397  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7398                                           (__v4sf)(__m128)(B), \
7399                                           (__v4sf)_mm_setzero_ps(), \
7400                                           (__mmask8)(U), (int)(R)); })
7401
7402static __inline__ __m512 __DEFAULT_FN_ATTRS
7403_mm512_broadcast_f32x4(__m128 __A)
7404{
7405  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7406                                         0, 1, 2, 3, 0, 1, 2, 3,
7407                                         0, 1, 2, 3, 0, 1, 2, 3);
7408}
7409
7410static __inline__ __m512 __DEFAULT_FN_ATTRS
7411_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
7412{
7413  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7414                                           (__v16sf)_mm512_broadcast_f32x4(__A),
7415                                           (__v16sf)__O);
7416}
7417
7418static __inline__ __m512 __DEFAULT_FN_ATTRS
7419_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
7420{
7421  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7422                                           (__v16sf)_mm512_broadcast_f32x4(__A),
7423                                           (__v16sf)_mm512_setzero_ps());
7424}
7425
7426static __inline__ __m512d __DEFAULT_FN_ATTRS
7427_mm512_broadcast_f64x4(__m256d __A)
7428{
7429  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7430                                          0, 1, 2, 3, 0, 1, 2, 3);
7431}
7432
7433static __inline__ __m512d __DEFAULT_FN_ATTRS
7434_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
7435{
7436  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7437                                            (__v8df)_mm512_broadcast_f64x4(__A),
7438                                            (__v8df)__O);
7439}
7440
7441static __inline__ __m512d __DEFAULT_FN_ATTRS
7442_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
7443{
7444  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7445                                            (__v8df)_mm512_broadcast_f64x4(__A),
7446                                            (__v8df)_mm512_setzero_pd());
7447}
7448
7449static __inline__ __m512i __DEFAULT_FN_ATTRS
7450_mm512_broadcast_i32x4(__m128i __A)
7451{
7452  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7453                                          0, 1, 2, 3, 0, 1, 2, 3,
7454                                          0, 1, 2, 3, 0, 1, 2, 3);
7455}
7456
7457static __inline__ __m512i __DEFAULT_FN_ATTRS
7458_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
7459{
7460  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7461                                           (__v16si)_mm512_broadcast_i32x4(__A),
7462                                           (__v16si)__O);
7463}
7464
7465static __inline__ __m512i __DEFAULT_FN_ATTRS
7466_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
7467{
7468  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7469                                           (__v16si)_mm512_broadcast_i32x4(__A),
7470                                           (__v16si)_mm512_setzero_si512());
7471}
7472
7473static __inline__ __m512i __DEFAULT_FN_ATTRS
7474_mm512_broadcast_i64x4(__m256i __A)
7475{
7476  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7477                                          0, 1, 2, 3, 0, 1, 2, 3);
7478}
7479
7480static __inline__ __m512i __DEFAULT_FN_ATTRS
7481_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
7482{
7483  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7484                                            (__v8di)_mm512_broadcast_i64x4(__A),
7485                                            (__v8di)__O);
7486}
7487
7488static __inline__ __m512i __DEFAULT_FN_ATTRS
7489_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
7490{
7491  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7492                                            (__v8di)_mm512_broadcast_i64x4(__A),
7493                                            (__v8di)_mm512_setzero_si512());
7494}
7495
7496static __inline__ __m512d __DEFAULT_FN_ATTRS
7497_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7498{
7499  return (__m512d)__builtin_ia32_selectpd_512(__M,
7500                                              (__v8df) _mm512_broadcastsd_pd(__A),
7501                                              (__v8df) __O);
7502}
7503
7504static __inline__ __m512d __DEFAULT_FN_ATTRS
7505_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7506{
7507  return (__m512d)__builtin_ia32_selectpd_512(__M,
7508                                              (__v8df) _mm512_broadcastsd_pd(__A),
7509                                              (__v8df) _mm512_setzero_pd());
7510}
7511
7512static __inline__ __m512 __DEFAULT_FN_ATTRS
7513_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7514{
7515  return (__m512)__builtin_ia32_selectps_512(__M,
7516                                             (__v16sf) _mm512_broadcastss_ps(__A),
7517                                             (__v16sf) __O);
7518}
7519
7520static __inline__ __m512 __DEFAULT_FN_ATTRS
7521_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7522{
7523  return (__m512)__builtin_ia32_selectps_512(__M,
7524                                             (__v16sf) _mm512_broadcastss_ps(__A),
7525                                             (__v16sf) _mm512_setzero_ps());
7526}
7527
7528static __inline__ __m128i __DEFAULT_FN_ATTRS
7529_mm512_cvtsepi32_epi8 (__m512i __A)
7530{
7531  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7532               (__v16qi) _mm_undefined_si128 (),
7533               (__mmask16) -1);
7534}
7535
7536static __inline__ __m128i __DEFAULT_FN_ATTRS
7537_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7538{
7539  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7540               (__v16qi) __O, __M);
7541}
7542
7543static __inline__ __m128i __DEFAULT_FN_ATTRS
7544_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7545{
7546  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7547               (__v16qi) _mm_setzero_si128 (),
7548               __M);
7549}
7550
7551static __inline__ void __DEFAULT_FN_ATTRS
7552_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7553{
7554  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7555}
7556
7557static __inline__ __m256i __DEFAULT_FN_ATTRS
7558_mm512_cvtsepi32_epi16 (__m512i __A)
7559{
7560  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7561               (__v16hi) _mm256_undefined_si256 (),
7562               (__mmask16) -1);
7563}
7564
7565static __inline__ __m256i __DEFAULT_FN_ATTRS
7566_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7567{
7568  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7569               (__v16hi) __O, __M);
7570}
7571
7572static __inline__ __m256i __DEFAULT_FN_ATTRS
7573_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7574{
7575  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7576               (__v16hi) _mm256_setzero_si256 (),
7577               __M);
7578}
7579
7580static __inline__ void __DEFAULT_FN_ATTRS
7581_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7582{
7583  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7584}
7585
7586static __inline__ __m128i __DEFAULT_FN_ATTRS
7587_mm512_cvtsepi64_epi8 (__m512i __A)
7588{
7589  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7590               (__v16qi) _mm_undefined_si128 (),
7591               (__mmask8) -1);
7592}
7593
7594static __inline__ __m128i __DEFAULT_FN_ATTRS
7595_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7596{
7597  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7598               (__v16qi) __O, __M);
7599}
7600
7601static __inline__ __m128i __DEFAULT_FN_ATTRS
7602_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7603{
7604  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7605               (__v16qi) _mm_setzero_si128 (),
7606               __M);
7607}
7608
7609static __inline__ void __DEFAULT_FN_ATTRS
7610_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7611{
7612  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7613}
7614
7615static __inline__ __m256i __DEFAULT_FN_ATTRS
7616_mm512_cvtsepi64_epi32 (__m512i __A)
7617{
7618  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7619               (__v8si) _mm256_undefined_si256 (),
7620               (__mmask8) -1);
7621}
7622
7623static __inline__ __m256i __DEFAULT_FN_ATTRS
7624_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7625{
7626  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7627               (__v8si) __O, __M);
7628}
7629
7630static __inline__ __m256i __DEFAULT_FN_ATTRS
7631_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7632{
7633  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7634               (__v8si) _mm256_setzero_si256 (),
7635               __M);
7636}
7637
7638static __inline__ void __DEFAULT_FN_ATTRS
7639_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7640{
7641  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7642}
7643
7644static __inline__ __m128i __DEFAULT_FN_ATTRS
7645_mm512_cvtsepi64_epi16 (__m512i __A)
7646{
7647  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7648               (__v8hi) _mm_undefined_si128 (),
7649               (__mmask8) -1);
7650}
7651
7652static __inline__ __m128i __DEFAULT_FN_ATTRS
7653_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7654{
7655  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7656               (__v8hi) __O, __M);
7657}
7658
7659static __inline__ __m128i __DEFAULT_FN_ATTRS
7660_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7661{
7662  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7663               (__v8hi) _mm_setzero_si128 (),
7664               __M);
7665}
7666
7667static __inline__ void __DEFAULT_FN_ATTRS
7668_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7669{
7670  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7671}
7672
7673static __inline__ __m128i __DEFAULT_FN_ATTRS
7674_mm512_cvtusepi32_epi8 (__m512i __A)
7675{
7676  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7677                (__v16qi) _mm_undefined_si128 (),
7678                (__mmask16) -1);
7679}
7680
7681static __inline__ __m128i __DEFAULT_FN_ATTRS
7682_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7683{
7684  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7685                (__v16qi) __O,
7686                __M);
7687}
7688
7689static __inline__ __m128i __DEFAULT_FN_ATTRS
7690_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7691{
7692  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7693                (__v16qi) _mm_setzero_si128 (),
7694                __M);
7695}
7696
7697static __inline__ void __DEFAULT_FN_ATTRS
7698_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7699{
7700  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7701}
7702
7703static __inline__ __m256i __DEFAULT_FN_ATTRS
7704_mm512_cvtusepi32_epi16 (__m512i __A)
7705{
7706  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7707                (__v16hi) _mm256_undefined_si256 (),
7708                (__mmask16) -1);
7709}
7710
7711static __inline__ __m256i __DEFAULT_FN_ATTRS
7712_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7713{
7714  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7715                (__v16hi) __O,
7716                __M);
7717}
7718
7719static __inline__ __m256i __DEFAULT_FN_ATTRS
7720_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7721{
7722  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7723                (__v16hi) _mm256_setzero_si256 (),
7724                __M);
7725}
7726
7727static __inline__ void __DEFAULT_FN_ATTRS
7728_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7729{
7730  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7731}
7732
7733static __inline__ __m128i __DEFAULT_FN_ATTRS
7734_mm512_cvtusepi64_epi8 (__m512i __A)
7735{
7736  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7737                (__v16qi) _mm_undefined_si128 (),
7738                (__mmask8) -1);
7739}
7740
7741static __inline__ __m128i __DEFAULT_FN_ATTRS
7742_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7743{
7744  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7745                (__v16qi) __O,
7746                __M);
7747}
7748
7749static __inline__ __m128i __DEFAULT_FN_ATTRS
7750_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7751{
7752  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7753                (__v16qi) _mm_setzero_si128 (),
7754                __M);
7755}
7756
7757static __inline__ void __DEFAULT_FN_ATTRS
7758_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7759{
7760  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7761}
7762
7763static __inline__ __m256i __DEFAULT_FN_ATTRS
7764_mm512_cvtusepi64_epi32 (__m512i __A)
7765{
7766  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7767                (__v8si) _mm256_undefined_si256 (),
7768                (__mmask8) -1);
7769}
7770
7771static __inline__ __m256i __DEFAULT_FN_ATTRS
7772_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7773{
7774  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7775                (__v8si) __O, __M);
7776}
7777
7778static __inline__ __m256i __DEFAULT_FN_ATTRS
7779_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7780{
7781  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7782                (__v8si) _mm256_setzero_si256 (),
7783                __M);
7784}
7785
7786static __inline__ void __DEFAULT_FN_ATTRS
7787_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7788{
7789  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7790}
7791
7792static __inline__ __m128i __DEFAULT_FN_ATTRS
7793_mm512_cvtusepi64_epi16 (__m512i __A)
7794{
7795  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7796                (__v8hi) _mm_undefined_si128 (),
7797                (__mmask8) -1);
7798}
7799
7800static __inline__ __m128i __DEFAULT_FN_ATTRS
7801_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7802{
7803  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7804                (__v8hi) __O, __M);
7805}
7806
7807static __inline__ __m128i __DEFAULT_FN_ATTRS
7808_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7809{
7810  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7811                (__v8hi) _mm_setzero_si128 (),
7812                __M);
7813}
7814
7815static __inline__ void __DEFAULT_FN_ATTRS
7816_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7817{
7818  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7819}
7820
7821static __inline__ __m128i __DEFAULT_FN_ATTRS
7822_mm512_cvtepi32_epi8 (__m512i __A)
7823{
7824  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7825              (__v16qi) _mm_undefined_si128 (),
7826              (__mmask16) -1);
7827}
7828
7829static __inline__ __m128i __DEFAULT_FN_ATTRS
7830_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7831{
7832  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7833              (__v16qi) __O, __M);
7834}
7835
7836static __inline__ __m128i __DEFAULT_FN_ATTRS
7837_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7838{
7839  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7840              (__v16qi) _mm_setzero_si128 (),
7841              __M);
7842}
7843
7844static __inline__ void __DEFAULT_FN_ATTRS
7845_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7846{
7847  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7848}
7849
7850static __inline__ __m256i __DEFAULT_FN_ATTRS
7851_mm512_cvtepi32_epi16 (__m512i __A)
7852{
7853  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7854              (__v16hi) _mm256_undefined_si256 (),
7855              (__mmask16) -1);
7856}
7857
7858static __inline__ __m256i __DEFAULT_FN_ATTRS
7859_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7860{
7861  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7862              (__v16hi) __O, __M);
7863}
7864
7865static __inline__ __m256i __DEFAULT_FN_ATTRS
7866_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7867{
7868  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7869              (__v16hi) _mm256_setzero_si256 (),
7870              __M);
7871}
7872
7873static __inline__ void __DEFAULT_FN_ATTRS
7874_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7875{
7876  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7877}
7878
7879static __inline__ __m128i __DEFAULT_FN_ATTRS
7880_mm512_cvtepi64_epi8 (__m512i __A)
7881{
7882  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7883              (__v16qi) _mm_undefined_si128 (),
7884              (__mmask8) -1);
7885}
7886
7887static __inline__ __m128i __DEFAULT_FN_ATTRS
7888_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7889{
7890  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7891              (__v16qi) __O, __M);
7892}
7893
7894static __inline__ __m128i __DEFAULT_FN_ATTRS
7895_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7896{
7897  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7898              (__v16qi) _mm_setzero_si128 (),
7899              __M);
7900}
7901
7902static __inline__ void __DEFAULT_FN_ATTRS
7903_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7904{
7905  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7906}
7907
7908static __inline__ __m256i __DEFAULT_FN_ATTRS
7909_mm512_cvtepi64_epi32 (__m512i __A)
7910{
7911  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7912              (__v8si) _mm256_undefined_si256 (),
7913              (__mmask8) -1);
7914}
7915
7916static __inline__ __m256i __DEFAULT_FN_ATTRS
7917_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7918{
7919  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7920              (__v8si) __O, __M);
7921}
7922
7923static __inline__ __m256i __DEFAULT_FN_ATTRS
7924_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7925{
7926  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7927              (__v8si) _mm256_setzero_si256 (),
7928              __M);
7929}
7930
7931static __inline__ void __DEFAULT_FN_ATTRS
7932_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7933{
7934  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7935}
7936
7937static __inline__ __m128i __DEFAULT_FN_ATTRS
7938_mm512_cvtepi64_epi16 (__m512i __A)
7939{
7940  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7941              (__v8hi) _mm_undefined_si128 (),
7942              (__mmask8) -1);
7943}
7944
7945static __inline__ __m128i __DEFAULT_FN_ATTRS
7946_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7947{
7948  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7949              (__v8hi) __O, __M);
7950}
7951
7952static __inline__ __m128i __DEFAULT_FN_ATTRS
7953_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7954{
7955  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7956              (__v8hi) _mm_setzero_si128 (),
7957              __M);
7958}
7959
7960static __inline__ void __DEFAULT_FN_ATTRS
7961_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7962{
7963  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7964}
7965
7966#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
7967  (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
7968                                   (__v16si)_mm512_undefined_epi32(), \
7969                                   0 + ((imm) & 0x3) * 4,             \
7970                                   1 + ((imm) & 0x3) * 4,             \
7971                                   2 + ((imm) & 0x3) * 4,             \
7972                                   3 + ((imm) & 0x3) * 4); })
7973
7974#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7975  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7976                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7977                                (__v4si)(W)); })
7978
7979#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7980  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7981                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7982                                (__v4si)_mm_setzero_si128()); })
7983
7984#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
7985  (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
7986                                   (__v8di)_mm512_undefined_epi32(), \
7987                                   ((imm) & 1) ? 4 : 0,              \
7988                                   ((imm) & 1) ? 5 : 1,              \
7989                                   ((imm) & 1) ? 6 : 2,              \
7990                                   ((imm) & 1) ? 7 : 3); })
7991
7992#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7993  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7994                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7995                                (__v4di)(W)); })
7996
7997#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7998  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7999                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
8000                                (__v4di)_mm256_setzero_si256()); })
8001
8002#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
8003  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
8004                                 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
8005                                 ((imm) & 0x1) ?  0 :  8, \
8006                                 ((imm) & 0x1) ?  1 :  9, \
8007                                 ((imm) & 0x1) ?  2 : 10, \
8008                                 ((imm) & 0x1) ?  3 : 11, \
8009                                 ((imm) & 0x1) ?  8 :  4, \
8010                                 ((imm) & 0x1) ?  9 :  5, \
8011                                 ((imm) & 0x1) ? 10 :  6, \
8012                                 ((imm) & 0x1) ? 11 :  7); })
8013
8014#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
8015  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8016                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
8017                                  (__v8df)(W)); })
8018
8019#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
8020  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8021                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
8022                                  (__v8df)_mm512_setzero_pd()); })
8023
8024#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
8025  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
8026                                 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
8027                                 ((imm) & 0x1) ?  0 :  8, \
8028                                 ((imm) & 0x1) ?  1 :  9, \
8029                                 ((imm) & 0x1) ?  2 : 10, \
8030                                 ((imm) & 0x1) ?  3 : 11, \
8031                                 ((imm) & 0x1) ?  8 :  4, \
8032                                 ((imm) & 0x1) ?  9 :  5, \
8033                                 ((imm) & 0x1) ? 10 :  6, \
8034                                 ((imm) & 0x1) ? 11 :  7); })
8035
8036#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
8037  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8038                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
8039                                  (__v8di)(W)); })
8040
8041#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
8042  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8043                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
8044                                  (__v8di)_mm512_setzero_si512()); })
8045
8046#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
8047  (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
8048                                  (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
8049                                  (((imm) & 0x3) == 0) ? 16 :  0, \
8050                                  (((imm) & 0x3) == 0) ? 17 :  1, \
8051                                  (((imm) & 0x3) == 0) ? 18 :  2, \
8052                                  (((imm) & 0x3) == 0) ? 19 :  3, \
8053                                  (((imm) & 0x3) == 1) ? 16 :  4, \
8054                                  (((imm) & 0x3) == 1) ? 17 :  5, \
8055                                  (((imm) & 0x3) == 1) ? 18 :  6, \
8056                                  (((imm) & 0x3) == 1) ? 19 :  7, \
8057                                  (((imm) & 0x3) == 2) ? 16 :  8, \
8058                                  (((imm) & 0x3) == 2) ? 17 :  9, \
8059                                  (((imm) & 0x3) == 2) ? 18 : 10, \
8060                                  (((imm) & 0x3) == 2) ? 19 : 11, \
8061                                  (((imm) & 0x3) == 3) ? 16 : 12, \
8062                                  (((imm) & 0x3) == 3) ? 17 : 13, \
8063                                  (((imm) & 0x3) == 3) ? 18 : 14, \
8064                                  (((imm) & 0x3) == 3) ? 19 : 15); })
8065
8066#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8067  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8068                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8069                                 (__v16sf)(W)); })
8070
8071#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8072  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8073                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8074                                 (__v16sf)_mm512_setzero_ps()); })
8075
8076#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
8077  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
8078                                 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
8079                                 (((imm) & 0x3) == 0) ? 16 :  0, \
8080                                 (((imm) & 0x3) == 0) ? 17 :  1, \
8081                                 (((imm) & 0x3) == 0) ? 18 :  2, \
8082                                 (((imm) & 0x3) == 0) ? 19 :  3, \
8083                                 (((imm) & 0x3) == 1) ? 16 :  4, \
8084                                 (((imm) & 0x3) == 1) ? 17 :  5, \
8085                                 (((imm) & 0x3) == 1) ? 18 :  6, \
8086                                 (((imm) & 0x3) == 1) ? 19 :  7, \
8087                                 (((imm) & 0x3) == 2) ? 16 :  8, \
8088                                 (((imm) & 0x3) == 2) ? 17 :  9, \
8089                                 (((imm) & 0x3) == 2) ? 18 : 10, \
8090                                 (((imm) & 0x3) == 2) ? 19 : 11, \
8091                                 (((imm) & 0x3) == 3) ? 16 : 12, \
8092                                 (((imm) & 0x3) == 3) ? 17 : 13, \
8093                                 (((imm) & 0x3) == 3) ? 18 : 14, \
8094                                 (((imm) & 0x3) == 3) ? 19 : 15); })
8095
8096#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8097  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8098                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8099                                 (__v16si)(W)); })
8100
8101#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8102  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8103                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8104                                 (__v16si)_mm512_setzero_si512()); })
8105
8106#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
8107  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8108                                            (int)(((C)<<2) | (B)), \
8109                                            (__v8df)_mm512_undefined_pd(), \
8110                                            (__mmask8)-1, (int)(R)); })
8111
8112#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
8113  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8114                                            (int)(((C)<<2) | (B)), \
8115                                            (__v8df)(__m512d)(W), \
8116                                            (__mmask8)(U), (int)(R)); })
8117
8118#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
8119  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8120                                            (int)(((C)<<2) | (B)), \
8121                                            (__v8df)_mm512_setzero_pd(), \
8122                                            (__mmask8)(U), (int)(R)); })
8123
8124#define _mm512_getmant_pd(A, B, C) __extension__ ({ \
8125  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8126                                            (int)(((C)<<2) | (B)), \
8127                                            (__v8df)_mm512_setzero_pd(), \
8128                                            (__mmask8)-1, \
8129                                            _MM_FROUND_CUR_DIRECTION); })
8130
8131#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8132  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8133                                            (int)(((C)<<2) | (B)), \
8134                                            (__v8df)(__m512d)(W), \
8135                                            (__mmask8)(U), \
8136                                            _MM_FROUND_CUR_DIRECTION); })
8137
8138#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8139  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8140                                            (int)(((C)<<2) | (B)), \
8141                                            (__v8df)_mm512_setzero_pd(), \
8142                                            (__mmask8)(U), \
8143                                            _MM_FROUND_CUR_DIRECTION); })
8144
8145#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
8146  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8147                                           (int)(((C)<<2) | (B)), \
8148                                           (__v16sf)_mm512_undefined_ps(), \
8149                                           (__mmask16)-1, (int)(R)); })
8150
8151#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
8152  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8153                                           (int)(((C)<<2) | (B)), \
8154                                           (__v16sf)(__m512)(W), \
8155                                           (__mmask16)(U), (int)(R)); })
8156
8157#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
8158  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8159                                           (int)(((C)<<2) | (B)), \
8160                                           (__v16sf)_mm512_setzero_ps(), \
8161                                           (__mmask16)(U), (int)(R)); })
8162
8163#define _mm512_getmant_ps(A, B, C) __extension__ ({ \
8164  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8165                                           (int)(((C)<<2)|(B)), \
8166                                           (__v16sf)_mm512_undefined_ps(), \
8167                                           (__mmask16)-1, \
8168                                           _MM_FROUND_CUR_DIRECTION); })
8169
8170#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8171  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8172                                           (int)(((C)<<2)|(B)), \
8173                                           (__v16sf)(__m512)(W), \
8174                                           (__mmask16)(U), \
8175                                           _MM_FROUND_CUR_DIRECTION); })
8176
8177#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8178  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8179                                           (int)(((C)<<2)|(B)), \
8180                                           (__v16sf)_mm512_setzero_ps(), \
8181                                           (__mmask16)(U), \
8182                                           _MM_FROUND_CUR_DIRECTION); })
8183
8184#define _mm512_getexp_round_pd(A, R) __extension__ ({ \
8185  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8186                                           (__v8df)_mm512_undefined_pd(), \
8187                                           (__mmask8)-1, (int)(R)); })
8188
8189#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
8190  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8191                                           (__v8df)(__m512d)(W), \
8192                                           (__mmask8)(U), (int)(R)); })
8193
8194#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
8195  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8196                                           (__v8df)_mm512_setzero_pd(), \
8197                                           (__mmask8)(U), (int)(R)); })
8198
8199static __inline__ __m512d __DEFAULT_FN_ATTRS
8200_mm512_getexp_pd (__m512d __A)
8201{
8202  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8203                (__v8df) _mm512_undefined_pd (),
8204                (__mmask8) -1,
8205                _MM_FROUND_CUR_DIRECTION);
8206}
8207
8208static __inline__ __m512d __DEFAULT_FN_ATTRS
8209_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
8210{
8211  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8212                (__v8df) __W,
8213                (__mmask8) __U,
8214                _MM_FROUND_CUR_DIRECTION);
8215}
8216
8217static __inline__ __m512d __DEFAULT_FN_ATTRS
8218_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
8219{
8220  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8221                (__v8df) _mm512_setzero_pd (),
8222                (__mmask8) __U,
8223                _MM_FROUND_CUR_DIRECTION);
8224}
8225
8226#define _mm512_getexp_round_ps(A, R) __extension__ ({ \
8227  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8228                                          (__v16sf)_mm512_undefined_ps(), \
8229                                          (__mmask16)-1, (int)(R)); })
8230
8231#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8232  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8233                                          (__v16sf)(__m512)(W), \
8234                                          (__mmask16)(U), (int)(R)); })
8235
8236#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8237  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8238                                          (__v16sf)_mm512_setzero_ps(), \
8239                                          (__mmask16)(U), (int)(R)); })
8240
8241static __inline__ __m512 __DEFAULT_FN_ATTRS
8242_mm512_getexp_ps (__m512 __A)
8243{
8244  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8245               (__v16sf) _mm512_undefined_ps (),
8246               (__mmask16) -1,
8247               _MM_FROUND_CUR_DIRECTION);
8248}
8249
8250static __inline__ __m512 __DEFAULT_FN_ATTRS
8251_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8252{
8253  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8254               (__v16sf) __W,
8255               (__mmask16) __U,
8256               _MM_FROUND_CUR_DIRECTION);
8257}
8258
8259static __inline__ __m512 __DEFAULT_FN_ATTRS
8260_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8261{
8262  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8263               (__v16sf) _mm512_setzero_ps (),
8264               (__mmask16) __U,
8265               _MM_FROUND_CUR_DIRECTION);
8266}
8267
8268#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8269  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8270                                       (float const *)(addr), \
8271                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8272                                       (int)(scale)); })
8273
8274#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
8275  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8276                                       (float const *)(addr), \
8277                                       (__v8di)(__m512i)(index), \
8278                                       (__mmask8)(mask), (int)(scale)); })
8279
8280#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8281  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8282                                        (int const *)(addr), \
8283                                        (__v8di)(__m512i)(index), \
8284                                        (__mmask8)-1, (int)(scale)); })
8285
8286#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8287  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8288                                        (int const *)(addr), \
8289                                        (__v8di)(__m512i)(index), \
8290                                        (__mmask8)(mask), (int)(scale)); })
8291
8292#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8293  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8294                                       (double const *)(addr), \
8295                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8296                                       (int)(scale)); })
8297
8298#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8299  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8300                                       (double const *)(addr), \
8301                                       (__v8di)(__m512i)(index), \
8302                                       (__mmask8)(mask), (int)(scale)); })
8303
8304#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8305  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8306                                       (long long const *)(addr), \
8307                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8308                                       (int)(scale)); })
8309
8310#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8311  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8312                                       (long long const *)(addr), \
8313                                       (__v8di)(__m512i)(index), \
8314                                       (__mmask8)(mask), (int)(scale)); })
8315
8316#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8317  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8318                                       (float const *)(addr), \
8319                                       (__v16sf)(__m512)(index), \
8320                                       (__mmask16)-1, (int)(scale)); })
8321
8322#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8323  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8324                                       (float const *)(addr), \
8325                                       (__v16sf)(__m512)(index), \
8326                                       (__mmask16)(mask), (int)(scale)); })
8327
8328#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8329  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8330                                        (int const *)(addr), \
8331                                        (__v16si)(__m512i)(index), \
8332                                        (__mmask16)-1, (int)(scale)); })
8333
8334#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8335  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8336                                        (int const *)(addr), \
8337                                        (__v16si)(__m512i)(index), \
8338                                        (__mmask16)(mask), (int)(scale)); })
8339
8340#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8341  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8342                                       (double const *)(addr), \
8343                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
8344                                       (int)(scale)); })
8345
8346#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8347  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8348                                       (double const *)(addr), \
8349                                       (__v8si)(__m256i)(index), \
8350                                       (__mmask8)(mask), (int)(scale)); })
8351
8352#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8353  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8354                                       (long long const *)(addr), \
8355                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
8356                                       (int)(scale)); })
8357
8358#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8359  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8360                                       (long long const *)(addr), \
8361                                       (__v8si)(__m256i)(index), \
8362                                       (__mmask8)(mask), (int)(scale)); })
8363
8364#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8365  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8366                                (__v8di)(__m512i)(index), \
8367                                (__v8sf)(__m256)(v1), (int)(scale)); })
8368
8369#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8370  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8371                                (__v8di)(__m512i)(index), \
8372                                (__v8sf)(__m256)(v1), (int)(scale)); })
8373
8374#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8375  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8376                                (__v8di)(__m512i)(index), \
8377                                (__v8si)(__m256i)(v1), (int)(scale)); })
8378
8379#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8380  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8381                                (__v8di)(__m512i)(index), \
8382                                (__v8si)(__m256i)(v1), (int)(scale)); })
8383
8384#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8385  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8386                               (__v8di)(__m512i)(index), \
8387                               (__v8df)(__m512d)(v1), (int)(scale)); })
8388
8389#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8390  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8391                               (__v8di)(__m512i)(index), \
8392                               (__v8df)(__m512d)(v1), (int)(scale)); })
8393
8394#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8395  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8396                               (__v8di)(__m512i)(index), \
8397                               (__v8di)(__m512i)(v1), (int)(scale)); })
8398
8399#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8400  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8401                               (__v8di)(__m512i)(index), \
8402                               (__v8di)(__m512i)(v1), (int)(scale)); })
8403
8404#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8405  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8406                                (__v16si)(__m512i)(index), \
8407                                (__v16sf)(__m512)(v1), (int)(scale)); })
8408
8409#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8410  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8411                                (__v16si)(__m512i)(index), \
8412                                (__v16sf)(__m512)(v1), (int)(scale)); })
8413
8414#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8415  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8416                                (__v16si)(__m512i)(index), \
8417                                (__v16si)(__m512i)(v1), (int)(scale)); })
8418
8419#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8420  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8421                                (__v16si)(__m512i)(index), \
8422                                (__v16si)(__m512i)(v1), (int)(scale)); })
8423
8424#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8425  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8426                               (__v8si)(__m256i)(index), \
8427                               (__v8df)(__m512d)(v1), (int)(scale)); })
8428
8429#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8430  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8431                               (__v8si)(__m256i)(index), \
8432                               (__v8df)(__m512d)(v1), (int)(scale)); })
8433
8434#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8435  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8436                               (__v8si)(__m256i)(index), \
8437                               (__v8di)(__m512i)(v1), (int)(scale)); })
8438
8439#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8440  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8441                               (__v8si)(__m256i)(index), \
8442                               (__v8di)(__m512i)(v1), (int)(scale)); })
8443
8444static __inline__ __m128 __DEFAULT_FN_ATTRS
8445_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8446{
8447 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8448          (__v4sf) __A,
8449          (__v4sf) __B,
8450          (__mmask8) __U,
8451          _MM_FROUND_CUR_DIRECTION);
8452}
8453
8454#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8455  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8456                                        (__v4sf)(__m128)(A), \
8457                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8458                                        (int)(R)); })
8459
8460static __inline__ __m128 __DEFAULT_FN_ATTRS
8461_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8462{
8463 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8464          (__v4sf) __B,
8465          (__v4sf) __C,
8466          (__mmask8) __U,
8467          _MM_FROUND_CUR_DIRECTION);
8468}
8469
8470#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8471  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8472                                         (__v4sf)(__m128)(B), \
8473                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
8474                                         _MM_FROUND_CUR_DIRECTION); })
8475
8476static __inline__ __m128 __DEFAULT_FN_ATTRS
8477_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8478{
8479 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8480          (__v4sf) __X,
8481          (__v4sf) __Y,
8482          (__mmask8) __U,
8483          _MM_FROUND_CUR_DIRECTION);
8484}
8485
8486#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8487  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8488                                         (__v4sf)(__m128)(X), \
8489                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8490                                         (int)(R)); })
8491
8492static __inline__ __m128 __DEFAULT_FN_ATTRS
8493_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8494{
8495 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8496          (__v4sf) __A,
8497          -(__v4sf) __B,
8498          (__mmask8) __U,
8499          _MM_FROUND_CUR_DIRECTION);
8500}
8501
8502#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8503  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8504                                        (__v4sf)(__m128)(A), \
8505                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8506                                        (int)(R)); })
8507
8508static __inline__ __m128 __DEFAULT_FN_ATTRS
8509_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8510{
8511 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8512          (__v4sf) __B,
8513          -(__v4sf) __C,
8514          (__mmask8) __U,
8515          _MM_FROUND_CUR_DIRECTION);
8516}
8517
8518#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8519  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8520                                         (__v4sf)(__m128)(B), \
8521                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8522                                         (int)(R)); })
8523
8524static __inline__ __m128 __DEFAULT_FN_ATTRS
8525_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8526{
8527 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8528          (__v4sf) __X,
8529          (__v4sf) __Y,
8530          (__mmask8) __U,
8531          _MM_FROUND_CUR_DIRECTION);
8532}
8533
8534#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8535  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8536                                         (__v4sf)(__m128)(X), \
8537                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8538                                         (int)(R)); })
8539
8540static __inline__ __m128 __DEFAULT_FN_ATTRS
8541_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8542{
8543 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8544          -(__v4sf) __A,
8545          (__v4sf) __B,
8546          (__mmask8) __U,
8547          _MM_FROUND_CUR_DIRECTION);
8548}
8549
8550#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8551  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8552                                        -(__v4sf)(__m128)(A), \
8553                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8554                                        (int)(R)); })
8555
8556static __inline__ __m128 __DEFAULT_FN_ATTRS
8557_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8558{
8559 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8560          (__v4sf) __B,
8561          (__v4sf) __C,
8562          (__mmask8) __U,
8563          _MM_FROUND_CUR_DIRECTION);
8564}
8565
8566#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8567  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8568                                         (__v4sf)(__m128)(B), \
8569                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
8570                                         (int)(R)); })
8571
8572static __inline__ __m128 __DEFAULT_FN_ATTRS
8573_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8574{
8575 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8576          (__v4sf) __X,
8577          (__v4sf) __Y,
8578          (__mmask8) __U,
8579          _MM_FROUND_CUR_DIRECTION);
8580}
8581
8582#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8583  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8584                                         (__v4sf)(__m128)(X), \
8585                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8586                                         (int)(R)); })
8587
8588static __inline__ __m128 __DEFAULT_FN_ATTRS
8589_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8590{
8591 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8592          -(__v4sf) __A,
8593          -(__v4sf) __B,
8594          (__mmask8) __U,
8595          _MM_FROUND_CUR_DIRECTION);
8596}
8597
8598#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8599  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8600                                        -(__v4sf)(__m128)(A), \
8601                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
8602                                        (int)(R)); })
8603
8604static __inline__ __m128 __DEFAULT_FN_ATTRS
8605_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8606{
8607 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8608          (__v4sf) __B,
8609          -(__v4sf) __C,
8610          (__mmask8) __U,
8611          _MM_FROUND_CUR_DIRECTION);
8612}
8613
8614#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8615  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8616                                         (__v4sf)(__m128)(B), \
8617                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8618                                         _MM_FROUND_CUR_DIRECTION); })
8619
8620static __inline__ __m128 __DEFAULT_FN_ATTRS
8621_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8622{
8623 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8624          (__v4sf) __X,
8625          (__v4sf) __Y,
8626          (__mmask8) __U,
8627          _MM_FROUND_CUR_DIRECTION);
8628}
8629
8630#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8631  (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8632                                         (__v4sf)(__m128)(X), \
8633                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8634                                         (int)(R)); })
8635
8636static __inline__ __m128d __DEFAULT_FN_ATTRS
8637_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8638{
8639 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8640          (__v2df) __A,
8641          (__v2df) __B,
8642          (__mmask8) __U,
8643          _MM_FROUND_CUR_DIRECTION);
8644}
8645
8646#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8647  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8648                                         (__v2df)(__m128d)(A), \
8649                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8650                                         (int)(R)); })
8651
8652static __inline__ __m128d __DEFAULT_FN_ATTRS
8653_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8654{
8655 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8656          (__v2df) __B,
8657          (__v2df) __C,
8658          (__mmask8) __U,
8659          _MM_FROUND_CUR_DIRECTION);
8660}
8661
8662#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8663  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8664                                          (__v2df)(__m128d)(B), \
8665                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8666                                          _MM_FROUND_CUR_DIRECTION); })
8667
8668static __inline__ __m128d __DEFAULT_FN_ATTRS
8669_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8670{
8671 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8672          (__v2df) __X,
8673          (__v2df) __Y,
8674          (__mmask8) __U,
8675          _MM_FROUND_CUR_DIRECTION);
8676}
8677
8678#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8679  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8680                                          (__v2df)(__m128d)(X), \
8681                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8682                                          (int)(R)); })
8683
8684static __inline__ __m128d __DEFAULT_FN_ATTRS
8685_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8686{
8687 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8688          (__v2df) __A,
8689          -(__v2df) __B,
8690          (__mmask8) __U,
8691          _MM_FROUND_CUR_DIRECTION);
8692}
8693
8694#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8695  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8696                                         (__v2df)(__m128d)(A), \
8697                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8698                                         (int)(R)); })
8699
8700static __inline__ __m128d __DEFAULT_FN_ATTRS
8701_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8702{
8703 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8704          (__v2df) __B,
8705          -(__v2df) __C,
8706          (__mmask8) __U,
8707          _MM_FROUND_CUR_DIRECTION);
8708}
8709
8710#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8711  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8712                                          (__v2df)(__m128d)(B), \
8713                                          -(__v2df)(__m128d)(C), \
8714                                          (__mmask8)(U), (int)(R)); })
8715
8716static __inline__ __m128d __DEFAULT_FN_ATTRS
8717_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8718{
8719 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8720          (__v2df) __X,
8721          (__v2df) __Y,
8722          (__mmask8) __U,
8723          _MM_FROUND_CUR_DIRECTION);
8724}
8725
8726#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8727  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8728                                          (__v2df)(__m128d)(X), \
8729                                          (__v2df)(__m128d)(Y), \
8730                                          (__mmask8)(U), (int)(R)); })
8731
8732static __inline__ __m128d __DEFAULT_FN_ATTRS
8733_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8734{
8735 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8736          -(__v2df) __A,
8737          (__v2df) __B,
8738          (__mmask8) __U,
8739          _MM_FROUND_CUR_DIRECTION);
8740}
8741
8742#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8743  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8744                                         -(__v2df)(__m128d)(A), \
8745                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8746                                         (int)(R)); })
8747
8748static __inline__ __m128d __DEFAULT_FN_ATTRS
8749_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8750{
8751 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8752          (__v2df) __B,
8753          (__v2df) __C,
8754          (__mmask8) __U,
8755          _MM_FROUND_CUR_DIRECTION);
8756}
8757
8758#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8759  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8760                                          (__v2df)(__m128d)(B), \
8761                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8762                                          (int)(R)); })
8763
8764static __inline__ __m128d __DEFAULT_FN_ATTRS
8765_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8766{
8767 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8768          (__v2df) __X,
8769          (__v2df) __Y,
8770          (__mmask8) __U,
8771          _MM_FROUND_CUR_DIRECTION);
8772}
8773
8774#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8775  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8776                                          (__v2df)(__m128d)(X), \
8777                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8778                                          (int)(R)); })
8779
8780static __inline__ __m128d __DEFAULT_FN_ATTRS
8781_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8782{
8783 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8784          -(__v2df) __A,
8785          -(__v2df) __B,
8786          (__mmask8) __U,
8787          _MM_FROUND_CUR_DIRECTION);
8788}
8789
8790#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8791  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8792                                         -(__v2df)(__m128d)(A), \
8793                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8794                                         (int)(R)); })
8795
8796static __inline__ __m128d __DEFAULT_FN_ATTRS
8797_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8798{
8799 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8800          (__v2df) __B,
8801          -(__v2df) __C,
8802          (__mmask8) __U,
8803          _MM_FROUND_CUR_DIRECTION);
8804}
8805
8806#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8807  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8808                                          (__v2df)(__m128d)(B), \
8809                                          -(__v2df)(__m128d)(C), \
8810                                          (__mmask8)(U), \
8811                                          _MM_FROUND_CUR_DIRECTION); })
8812
8813static __inline__ __m128d __DEFAULT_FN_ATTRS
8814_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8815{
8816 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8817          (__v2df) __X,
8818          (__v2df) (__Y),
8819          (__mmask8) __U,
8820          _MM_FROUND_CUR_DIRECTION);
8821}
8822
8823#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8824  (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8825                                          (__v2df)(__m128d)(X), \
8826                                          (__v2df)(__m128d)(Y), \
8827                                          (__mmask8)(U), (int)(R)); })
8828
8829#define _mm512_permutex_pd(X, C) __extension__ ({ \
8830  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8831                                   (__v8df)_mm512_undefined_pd(), \
8832                                   0 + (((C) >> 0) & 0x3), \
8833                                   0 + (((C) >> 2) & 0x3), \
8834                                   0 + (((C) >> 4) & 0x3), \
8835                                   0 + (((C) >> 6) & 0x3), \
8836                                   4 + (((C) >> 0) & 0x3), \
8837                                   4 + (((C) >> 2) & 0x3), \
8838                                   4 + (((C) >> 4) & 0x3), \
8839                                   4 + (((C) >> 6) & 0x3)); })
8840
8841#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8842  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8843                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8844                                       (__v8df)(__m512d)(W)); })
8845
8846#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8847  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8848                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8849                                       (__v8df)_mm512_setzero_pd()); })
8850
8851#define _mm512_permutex_epi64(X, C) __extension__ ({ \
8852  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8853                                   (__v8di)_mm512_undefined_epi32(), \
8854                                   0 + (((C) >> 0) & 0x3), \
8855                                   0 + (((C) >> 2) & 0x3), \
8856                                   0 + (((C) >> 4) & 0x3), \
8857                                   0 + (((C) >> 6) & 0x3), \
8858                                   4 + (((C) >> 0) & 0x3), \
8859                                   4 + (((C) >> 2) & 0x3), \
8860                                   4 + (((C) >> 4) & 0x3), \
8861                                   4 + (((C) >> 6) & 0x3)); })
8862
8863#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8864  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8865                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8866                                      (__v8di)(__m512i)(W)); })
8867
8868#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8869  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8870                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8871                                      (__v8di)_mm512_setzero_si512()); })
8872
8873static __inline__ __m512d __DEFAULT_FN_ATTRS
8874_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8875{
8876  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8877                 (__v8di) __X,
8878                 (__v8df) _mm512_undefined_pd (),
8879                 (__mmask8) -1);
8880}
8881
8882static __inline__ __m512d __DEFAULT_FN_ATTRS
8883_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8884{
8885  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8886                 (__v8di) __X,
8887                 (__v8df) __W,
8888                 (__mmask8) __U);
8889}
8890
8891static __inline__ __m512d __DEFAULT_FN_ATTRS
8892_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8893{
8894  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8895                 (__v8di) __X,
8896                 (__v8df) _mm512_setzero_pd (),
8897                 (__mmask8) __U);
8898}
8899
8900static __inline__ __m512i __DEFAULT_FN_ATTRS
8901_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8902{
8903  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8904                 (__v8di) __X,
8905                 (__v8di) _mm512_setzero_si512 (),
8906                 __M);
8907}
8908
8909static __inline__ __m512i __DEFAULT_FN_ATTRS
8910_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8911{
8912  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8913                 (__v8di) __X,
8914                 (__v8di) _mm512_undefined_epi32 (),
8915                 (__mmask8) -1);
8916}
8917
8918static __inline__ __m512i __DEFAULT_FN_ATTRS
8919_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8920             __m512i __Y)
8921{
8922  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8923                 (__v8di) __X,
8924                 (__v8di) __W,
8925                 __M);
8926}
8927
8928static __inline__ __m512 __DEFAULT_FN_ATTRS
8929_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8930{
8931  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8932                (__v16si) __X,
8933                (__v16sf) _mm512_undefined_ps (),
8934                (__mmask16) -1);
8935}
8936
8937static __inline__ __m512 __DEFAULT_FN_ATTRS
8938_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8939{
8940  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8941                (__v16si) __X,
8942                (__v16sf) __W,
8943                (__mmask16) __U);
8944}
8945
8946static __inline__ __m512 __DEFAULT_FN_ATTRS
8947_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8948{
8949  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8950                (__v16si) __X,
8951                (__v16sf) _mm512_setzero_ps (),
8952                (__mmask16) __U);
8953}
8954
8955static __inline__ __m512i __DEFAULT_FN_ATTRS
8956_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8957{
8958  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8959                 (__v16si) __X,
8960                 (__v16si) _mm512_setzero_si512 (),
8961                 __M);
8962}
8963
8964static __inline__ __m512i __DEFAULT_FN_ATTRS
8965_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8966{
8967  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8968                 (__v16si) __X,
8969                 (__v16si) _mm512_undefined_epi32 (),
8970                 (__mmask16) -1);
8971}
8972
8973#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8974
8975static __inline__ __m512i __DEFAULT_FN_ATTRS
8976_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8977             __m512i __Y)
8978{
8979  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8980                 (__v16si) __X,
8981                 (__v16si) __W,
8982                 __M);
8983}
8984
8985#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8986
8987static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8988_mm512_kand (__mmask16 __A, __mmask16 __B)
8989{
8990  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8991}
8992
8993static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8994_mm512_kandn (__mmask16 __A, __mmask16 __B)
8995{
8996  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8997}
8998
8999static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9000_mm512_kor (__mmask16 __A, __mmask16 __B)
9001{
9002  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
9003}
9004
9005static __inline__ int __DEFAULT_FN_ATTRS
9006_mm512_kortestc (__mmask16 __A, __mmask16 __B)
9007{
9008  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
9009}
9010
9011static __inline__ int __DEFAULT_FN_ATTRS
9012_mm512_kortestz (__mmask16 __A, __mmask16 __B)
9013{
9014  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
9015}
9016
9017static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9018_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
9019{
9020  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
9021}
9022
9023static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9024_mm512_kxnor (__mmask16 __A, __mmask16 __B)
9025{
9026  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
9027}
9028
9029static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9030_mm512_kxor (__mmask16 __A, __mmask16 __B)
9031{
9032  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
9033}
9034
9035static __inline__ void __DEFAULT_FN_ATTRS
9036_mm512_stream_si512 (__m512i * __P, __m512i __A)
9037{
9038  __builtin_nontemporal_store((__v8di)__A, (__v8di*)__P);
9039}
9040
9041static __inline__ __m512i __DEFAULT_FN_ATTRS
9042_mm512_stream_load_si512 (void *__P)
9043{
9044  return (__m512i) __builtin_nontemporal_load((const __v8di *)__P);
9045}
9046
9047static __inline__ void __DEFAULT_FN_ATTRS
9048_mm512_stream_pd (double *__P, __m512d __A)
9049{
9050  __builtin_nontemporal_store((__v8df)__A, (__v8df*)__P);
9051}
9052
9053static __inline__ void __DEFAULT_FN_ATTRS
9054_mm512_stream_ps (float *__P, __m512 __A)
9055{
9056  __builtin_nontemporal_store((__v16sf)__A, (__v16sf*)__P);
9057}
9058
9059static __inline__ __m512d __DEFAULT_FN_ATTRS
9060_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9061{
9062  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9063                  (__v8df) __W,
9064                  (__mmask8) __U);
9065}
9066
9067static __inline__ __m512d __DEFAULT_FN_ATTRS
9068_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9069{
9070  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9071                  (__v8df)
9072                  _mm512_setzero_pd (),
9073                  (__mmask8) __U);
9074}
9075
9076static __inline__ __m512i __DEFAULT_FN_ATTRS
9077_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9078{
9079  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9080                  (__v8di) __W,
9081                  (__mmask8) __U);
9082}
9083
9084static __inline__ __m512i __DEFAULT_FN_ATTRS
9085_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9086{
9087  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9088                  (__v8di)
9089                  _mm512_setzero_si512 (),
9090                  (__mmask8) __U);
9091}
9092
9093static __inline__ __m512 __DEFAULT_FN_ATTRS
9094_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9095{
9096  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9097                 (__v16sf) __W,
9098                 (__mmask16) __U);
9099}
9100
9101static __inline__ __m512 __DEFAULT_FN_ATTRS
9102_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9103{
9104  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9105                 (__v16sf)
9106                 _mm512_setzero_ps (),
9107                 (__mmask16) __U);
9108}
9109
9110static __inline__ __m512i __DEFAULT_FN_ATTRS
9111_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9112{
9113  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9114                  (__v16si) __W,
9115                  (__mmask16) __U);
9116}
9117
9118static __inline__ __m512i __DEFAULT_FN_ATTRS
9119_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9120{
9121  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9122                  (__v16si)
9123                  _mm512_setzero_si512 (),
9124                  (__mmask16) __U);
9125}
9126
9127#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
9128  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9129                                      (__v4sf)(__m128)(Y), (int)(P), \
9130                                      (__mmask8)-1, (int)(R)); })
9131
9132#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
9133  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9134                                      (__v4sf)(__m128)(Y), (int)(P), \
9135                                      (__mmask8)(M), (int)(R)); })
9136
9137#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
9138  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9139                                      (__v4sf)(__m128)(Y), (int)(P), \
9140                                      (__mmask8)-1, \
9141                                      _MM_FROUND_CUR_DIRECTION); })
9142
9143#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
9144  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9145                                      (__v4sf)(__m128)(Y), (int)(P), \
9146                                      (__mmask8)(M), \
9147                                      _MM_FROUND_CUR_DIRECTION); })
9148
9149#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
9150  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9151                                      (__v2df)(__m128d)(Y), (int)(P), \
9152                                      (__mmask8)-1, (int)(R)); })
9153
9154#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
9155  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9156                                      (__v2df)(__m128d)(Y), (int)(P), \
9157                                      (__mmask8)(M), (int)(R)); })
9158
9159#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
9160  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9161                                      (__v2df)(__m128d)(Y), (int)(P), \
9162                                      (__mmask8)-1, \
9163                                      _MM_FROUND_CUR_DIRECTION); })
9164
9165#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
9166  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9167                                      (__v2df)(__m128d)(Y), (int)(P), \
9168                                      (__mmask8)(M), \
9169                                      _MM_FROUND_CUR_DIRECTION); })
9170
9171static __inline__ __m512 __DEFAULT_FN_ATTRS
9172_mm512_movehdup_ps (__m512 __A)
9173{
9174  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9175                         1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9176}
9177
9178static __inline__ __m512 __DEFAULT_FN_ATTRS
9179_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9180{
9181  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9182                                             (__v16sf)_mm512_movehdup_ps(__A),
9183                                             (__v16sf)__W);
9184}
9185
9186static __inline__ __m512 __DEFAULT_FN_ATTRS
9187_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9188{
9189  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9190                                             (__v16sf)_mm512_movehdup_ps(__A),
9191                                             (__v16sf)_mm512_setzero_ps());
9192}
9193
9194static __inline__ __m512 __DEFAULT_FN_ATTRS
9195_mm512_moveldup_ps (__m512 __A)
9196{
9197  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9198                         0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9199}
9200
9201static __inline__ __m512 __DEFAULT_FN_ATTRS
9202_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9203{
9204  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9205                                             (__v16sf)_mm512_moveldup_ps(__A),
9206                                             (__v16sf)__W);
9207}
9208
9209static __inline__ __m512 __DEFAULT_FN_ATTRS
9210_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9211{
9212  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9213                                             (__v16sf)_mm512_moveldup_ps(__A),
9214                                             (__v16sf)_mm512_setzero_ps());
9215}
9216
9217static __inline__ __m128 __DEFAULT_FN_ATTRS
9218_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9219{
9220  __m128 res = __A;
9221  res[0] = (__U & 1) ? __B[0] : __W[0];
9222  return res;
9223}
9224
9225static __inline__ __m128 __DEFAULT_FN_ATTRS
9226_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9227{
9228  __m128 res = __A;
9229  res[0] = (__U & 1) ? __B[0] : 0;
9230  return res;
9231}
9232
9233static __inline__ __m128d __DEFAULT_FN_ATTRS
9234_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9235{
9236  __m128d res = __A;
9237  res[0] = (__U & 1) ? __B[0] : __W[0];
9238  return res;
9239}
9240
9241static __inline__ __m128d __DEFAULT_FN_ATTRS
9242_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9243{
9244  __m128d res = __A;
9245  res[0] = (__U & 1) ? __B[0] : 0;
9246  return res;
9247}
9248
9249static __inline__ void __DEFAULT_FN_ATTRS
9250_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9251{
9252  __builtin_ia32_storess128_mask ((__v16sf *)__W,
9253                (__v16sf) _mm512_castps128_ps512(__A),
9254                (__mmask16) __U & (__mmask16)1);
9255}
9256
9257static __inline__ void __DEFAULT_FN_ATTRS
9258_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9259{
9260  __builtin_ia32_storesd128_mask ((__v8df *)__W,
9261                (__v8df) _mm512_castpd128_pd512(__A),
9262                (__mmask8) __U & 1);
9263}
9264
9265static __inline__ __m128 __DEFAULT_FN_ATTRS
9266_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9267{
9268  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9269                                                (__v4sf) {0.0, 0.0, 0.0, 0.0},
9270                                                0, 4, 4, 4);
9271
9272  return (__m128) __builtin_shufflevector(
9273                           __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9274                                      (__v16sf) _mm512_castps128_ps512(src),
9275                                      (__mmask16) __U & 1),
9276                           _mm512_undefined_ps(), 0, 1, 2, 3);
9277}
9278
9279static __inline__ __m128 __DEFAULT_FN_ATTRS
9280_mm_maskz_load_ss (__mmask8 __U, const float* __A)
9281{
9282  return (__m128) __builtin_shufflevector(
9283                           __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9284                                      (__v16sf) _mm512_setzero_ps(),
9285                                      (__mmask16) __U & 1),
9286                           _mm512_undefined_ps(), 0, 1, 2, 3);
9287}
9288
9289static __inline__ __m128d __DEFAULT_FN_ATTRS
9290_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9291{
9292  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9293                                                 (__v2df) {0.0, 0.0}, 0, 2);
9294
9295  return (__m128d) __builtin_shufflevector(
9296                            __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9297                                      (__v8df) _mm512_castpd128_pd512(src),
9298                                      (__mmask8) __U & 1),
9299                            _mm512_undefined_pd(), 0, 1);
9300}
9301
9302static __inline__ __m128d __DEFAULT_FN_ATTRS
9303_mm_maskz_load_sd (__mmask8 __U, const double* __A)
9304{
9305  return (__m128d) __builtin_shufflevector(
9306                            __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9307                                      (__v8df) _mm512_setzero_pd(),
9308                                      (__mmask8) __U & 1),
9309                            _mm512_undefined_pd(), 0, 1);
9310}
9311
9312#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9313  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9314                                   (__v16si)_mm512_undefined_epi32(), \
9315                                   0  + (((I) >> 0) & 0x3), \
9316                                   0  + (((I) >> 2) & 0x3), \
9317                                   0  + (((I) >> 4) & 0x3), \
9318                                   0  + (((I) >> 6) & 0x3), \
9319                                   4  + (((I) >> 0) & 0x3), \
9320                                   4  + (((I) >> 2) & 0x3), \
9321                                   4  + (((I) >> 4) & 0x3), \
9322                                   4  + (((I) >> 6) & 0x3), \
9323                                   8  + (((I) >> 0) & 0x3), \
9324                                   8  + (((I) >> 2) & 0x3), \
9325                                   8  + (((I) >> 4) & 0x3), \
9326                                   8  + (((I) >> 6) & 0x3), \
9327                                   12 + (((I) >> 0) & 0x3), \
9328                                   12 + (((I) >> 2) & 0x3), \
9329                                   12 + (((I) >> 4) & 0x3), \
9330                                   12 + (((I) >> 6) & 0x3)); })
9331
9332#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9333  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9334                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
9335                                      (__v16si)(__m512i)(W)); })
9336
9337#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9338  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9339                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
9340                                      (__v16si)_mm512_setzero_si512()); })
9341
9342static __inline__ __m512d __DEFAULT_FN_ATTRS
9343_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9344{
9345  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9346                (__v8df) __W,
9347                (__mmask8) __U);
9348}
9349
9350static __inline__ __m512d __DEFAULT_FN_ATTRS
9351_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9352{
9353  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9354                (__v8df) _mm512_setzero_pd (),
9355                (__mmask8) __U);
9356}
9357
9358static __inline__ __m512i __DEFAULT_FN_ATTRS
9359_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9360{
9361  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9362                (__v8di) __W,
9363                (__mmask8) __U);
9364}
9365
9366static __inline__ __m512i __DEFAULT_FN_ATTRS
9367_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9368{
9369  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9370                (__v8di) _mm512_setzero_pd (),
9371                (__mmask8) __U);
9372}
9373
9374static __inline__ __m512d __DEFAULT_FN_ATTRS
9375_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9376{
9377  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9378              (__v8df) __W,
9379              (__mmask8) __U);
9380}
9381
9382static __inline__ __m512d __DEFAULT_FN_ATTRS
9383_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9384{
9385  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9386              (__v8df) _mm512_setzero_pd(),
9387              (__mmask8) __U);
9388}
9389
9390static __inline__ __m512i __DEFAULT_FN_ATTRS
9391_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9392{
9393  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9394              (__v8di) __W,
9395              (__mmask8) __U);
9396}
9397
9398static __inline__ __m512i __DEFAULT_FN_ATTRS
9399_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9400{
9401  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9402              (__v8di) _mm512_setzero_pd(),
9403              (__mmask8) __U);
9404}
9405
9406static __inline__ __m512 __DEFAULT_FN_ATTRS
9407_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9408{
9409  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9410                   (__v16sf) __W,
9411                   (__mmask16) __U);
9412}
9413
9414static __inline__ __m512 __DEFAULT_FN_ATTRS
9415_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9416{
9417  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9418                   (__v16sf) _mm512_setzero_ps(),
9419                   (__mmask16) __U);
9420}
9421
9422static __inline__ __m512i __DEFAULT_FN_ATTRS
9423_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9424{
9425  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9426              (__v16si) __W,
9427              (__mmask16) __U);
9428}
9429
9430static __inline__ __m512i __DEFAULT_FN_ATTRS
9431_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9432{
9433  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9434              (__v16si) _mm512_setzero_ps(),
9435              (__mmask16) __U);
9436}
9437
9438static __inline__ __m512 __DEFAULT_FN_ATTRS
9439_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9440{
9441  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9442               (__v16sf) __W,
9443               (__mmask16) __U);
9444}
9445
9446static __inline__ __m512 __DEFAULT_FN_ATTRS
9447_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9448{
9449  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9450               (__v16sf) _mm512_setzero_ps(),
9451               (__mmask16) __U);
9452}
9453
9454static __inline__ __m512i __DEFAULT_FN_ATTRS
9455_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9456{
9457  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9458                (__v16si) __W,
9459                (__mmask16) __U);
9460}
9461
9462static __inline__ __m512i __DEFAULT_FN_ATTRS
9463_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9464{
9465  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9466                (__v16si) _mm512_setzero_ps(),
9467                (__mmask16) __U);
9468}
9469
9470#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9471  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9472                                           (__v8df)_mm512_undefined_pd(), \
9473                                           (__mmask8)-1, (int)(R)); })
9474
9475#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9476  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9477                                           (__v8df)(__m512d)(W), \
9478                                           (__mmask8)(U), (int)(R)); })
9479
9480#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9481  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9482                                           (__v8df)_mm512_setzero_pd(), \
9483                                           (__mmask8)(U), (int)(R)); })
9484
9485static __inline__ __m512d __DEFAULT_FN_ATTRS
9486_mm512_cvtps_pd (__m256 __A)
9487{
9488  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9489                (__v8df)
9490                _mm512_undefined_pd (),
9491                (__mmask8) -1,
9492                _MM_FROUND_CUR_DIRECTION);
9493}
9494
9495static __inline__ __m512d __DEFAULT_FN_ATTRS
9496_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9497{
9498  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9499                (__v8df) __W,
9500                (__mmask8) __U,
9501                _MM_FROUND_CUR_DIRECTION);
9502}
9503
9504static __inline__ __m512d __DEFAULT_FN_ATTRS
9505_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9506{
9507  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9508                (__v8df)
9509                _mm512_setzero_pd (),
9510                (__mmask8) __U,
9511                _MM_FROUND_CUR_DIRECTION);
9512}
9513
9514static __inline__ __m512 __DEFAULT_FN_ATTRS
9515_mm512_cvtpslo_pd (__m512 __A)
9516{
9517  return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9518}
9519
9520static __inline__ __m512 __DEFAULT_FN_ATTRS
9521_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9522{
9523  return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9524}
9525
9526static __inline__ __m512d __DEFAULT_FN_ATTRS
9527_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9528{
9529  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9530              (__v8df) __A,
9531              (__v8df) __W);
9532}
9533
9534static __inline__ __m512d __DEFAULT_FN_ATTRS
9535_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9536{
9537  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9538              (__v8df) __A,
9539              (__v8df) _mm512_setzero_pd ());
9540}
9541
9542static __inline__ __m512 __DEFAULT_FN_ATTRS
9543_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9544{
9545  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9546             (__v16sf) __A,
9547             (__v16sf) __W);
9548}
9549
9550static __inline__ __m512 __DEFAULT_FN_ATTRS
9551_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9552{
9553  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9554             (__v16sf) __A,
9555             (__v16sf) _mm512_setzero_ps ());
9556}
9557
9558static __inline__ void __DEFAULT_FN_ATTRS
9559_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9560{
9561  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9562            (__mmask8) __U);
9563}
9564
9565static __inline__ void __DEFAULT_FN_ATTRS
9566_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9567{
9568  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9569            (__mmask8) __U);
9570}
9571
9572static __inline__ void __DEFAULT_FN_ATTRS
9573_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9574{
9575  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9576            (__mmask16) __U);
9577}
9578
9579static __inline__ void __DEFAULT_FN_ATTRS
9580_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9581{
9582  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9583            (__mmask16) __U);
9584}
9585
9586#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9587  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9588                                             (__v2df)(__m128d)(B), \
9589                                             (__v4sf)_mm_undefined_ps(), \
9590                                             (__mmask8)-1, (int)(R)); })
9591
9592#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9593  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9594                                             (__v2df)(__m128d)(B), \
9595                                             (__v4sf)(__m128)(W), \
9596                                             (__mmask8)(U), (int)(R)); })
9597
9598#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9599  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9600                                             (__v2df)(__m128d)(B), \
9601                                             (__v4sf)_mm_setzero_ps(), \
9602                                             (__mmask8)(U), (int)(R)); })
9603
9604static __inline__ __m128 __DEFAULT_FN_ATTRS
9605_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9606{
9607  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9608                                             (__v2df)(__B),
9609                                             (__v4sf)(__W),
9610                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9611}
9612
9613static __inline__ __m128 __DEFAULT_FN_ATTRS
9614_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9615{
9616  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9617                                             (__v2df)(__B),
9618                                             (__v4sf)_mm_setzero_ps(),
9619                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9620}
9621
9622#define _mm_cvtss_i32 _mm_cvtss_si32
9623#define _mm_cvtsd_i32 _mm_cvtsd_si32
9624#define _mm_cvti32_sd _mm_cvtsi32_sd
9625#define _mm_cvti32_ss _mm_cvtsi32_ss
9626#ifdef __x86_64__
9627#define _mm_cvtss_i64 _mm_cvtss_si64
9628#define _mm_cvtsd_i64 _mm_cvtsd_si64
9629#define _mm_cvti64_sd _mm_cvtsi64_sd
9630#define _mm_cvti64_ss _mm_cvtsi64_ss
9631#endif
9632
9633#ifdef __x86_64__
9634#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9635  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9636                                     (int)(R)); })
9637
9638#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9639  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9640                                     (int)(R)); })
9641#endif
9642
9643#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9644  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9645
9646#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9647  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9648
9649#ifdef __x86_64__
9650#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9651  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9652                                    (int)(R)); })
9653
9654#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9655  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9656                                    (int)(R)); })
9657#endif
9658
9659#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9660  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9661                                              (__v4sf)(__m128)(B), \
9662                                              (__v2df)_mm_undefined_pd(), \
9663                                              (__mmask8)-1, (int)(R)); })
9664
9665#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9666  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9667                                              (__v4sf)(__m128)(B), \
9668                                              (__v2df)(__m128d)(W), \
9669                                              (__mmask8)(U), (int)(R)); })
9670
9671#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9672  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9673                                              (__v4sf)(__m128)(B), \
9674                                              (__v2df)_mm_setzero_pd(), \
9675                                              (__mmask8)(U), (int)(R)); })
9676
9677static __inline__ __m128d __DEFAULT_FN_ATTRS
9678_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9679{
9680  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9681                                              (__v4sf)(__B),
9682                                              (__v2df)(__W),
9683                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9684}
9685
9686static __inline__ __m128d __DEFAULT_FN_ATTRS
9687_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9688{
9689  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9690                                              (__v4sf)(__B),
9691                                              (__v2df)_mm_setzero_pd(),
9692                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9693}
9694
9695static __inline__ __m128d __DEFAULT_FN_ATTRS
9696_mm_cvtu32_sd (__m128d __A, unsigned __B)
9697{
9698  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9699}
9700
9701#ifdef __x86_64__
9702#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9703  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9704                                      (unsigned long long)(B), (int)(R)); })
9705
9706static __inline__ __m128d __DEFAULT_FN_ATTRS
9707_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9708{
9709  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9710                 _MM_FROUND_CUR_DIRECTION);
9711}
9712#endif
9713
9714#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9715  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9716                                     (int)(R)); })
9717
9718static __inline__ __m128 __DEFAULT_FN_ATTRS
9719_mm_cvtu32_ss (__m128 __A, unsigned __B)
9720{
9721  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9722                _MM_FROUND_CUR_DIRECTION);
9723}
9724
9725#ifdef __x86_64__
9726#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9727  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9728                                     (unsigned long long)(B), (int)(R)); })
9729
9730static __inline__ __m128 __DEFAULT_FN_ATTRS
9731_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9732{
9733  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9734                _MM_FROUND_CUR_DIRECTION);
9735}
9736#endif
9737
9738static __inline__ __m512i __DEFAULT_FN_ATTRS
9739_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9740{
9741  return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
9742                 __M);
9743}
9744
9745#ifdef __x86_64__
9746static __inline__ __m512i __DEFAULT_FN_ATTRS
9747_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9748{
9749  return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
9750                 __M);
9751}
9752#endif
9753
9754static  __inline __m512i __DEFAULT_FN_ATTRS
9755_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9756    char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9757    char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9758    char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9759    char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9760    char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9761    char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9762    char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9763    char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9764    char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9765    char __e4, char __e3, char __e2, char __e1, char __e0) {
9766
9767  return __extension__ (__m512i)(__v64qi)
9768    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9769     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9770     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9771     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9772     __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9773     __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9774     __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9775     __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9776}
9777
9778static  __inline __m512i __DEFAULT_FN_ATTRS
9779_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9780    short __e27, short __e26, short __e25, short __e24, short __e23,
9781    short __e22, short __e21, short __e20, short __e19, short __e18,
9782    short __e17, short __e16, short __e15, short __e14, short __e13,
9783    short __e12, short __e11, short __e10, short __e9, short __e8,
9784    short __e7, short __e6, short __e5, short __e4, short __e3,
9785    short __e2, short __e1, short __e0) {
9786  return __extension__ (__m512i)(__v32hi)
9787    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9788     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9789     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9790     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9791}
9792
9793static __inline __m512i __DEFAULT_FN_ATTRS
9794_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9795     int __E, int __F, int __G, int __H,
9796     int __I, int __J, int __K, int __L,
9797     int __M, int __N, int __O, int __P)
9798{
9799  return __extension__ (__m512i)(__v16si)
9800  { __P, __O, __N, __M, __L, __K, __J, __I,
9801    __H, __G, __F, __E, __D, __C, __B, __A };
9802}
9803
9804#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9805       e8,e9,e10,e11,e12,e13,e14,e15)          \
9806  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9807                   (e5),(e4),(e3),(e2),(e1),(e0))
9808
9809static __inline__ __m512i __DEFAULT_FN_ATTRS
9810_mm512_set_epi64 (long long __A, long long __B, long long __C,
9811     long long __D, long long __E, long long __F,
9812     long long __G, long long __H)
9813{
9814  return __extension__ (__m512i) (__v8di)
9815  { __H, __G, __F, __E, __D, __C, __B, __A };
9816}
9817
9818#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9819  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9820
9821static __inline__ __m512d __DEFAULT_FN_ATTRS
9822_mm512_set_pd (double __A, double __B, double __C, double __D,
9823        double __E, double __F, double __G, double __H)
9824{
9825  return __extension__ (__m512d)
9826  { __H, __G, __F, __E, __D, __C, __B, __A };
9827}
9828
9829#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9830  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9831
9832static __inline__ __m512 __DEFAULT_FN_ATTRS
9833_mm512_set_ps (float __A, float __B, float __C, float __D,
9834        float __E, float __F, float __G, float __H,
9835        float __I, float __J, float __K, float __L,
9836        float __M, float __N, float __O, float __P)
9837{
9838  return __extension__ (__m512)
9839  { __P, __O, __N, __M, __L, __K, __J, __I,
9840    __H, __G, __F, __E, __D, __C, __B, __A };
9841}
9842
9843#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9844  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9845                (e4),(e3),(e2),(e1),(e0))
9846
9847static __inline__ __m512 __DEFAULT_FN_ATTRS
9848_mm512_abs_ps(__m512 __A)
9849{
9850  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9851}
9852
9853static __inline__ __m512 __DEFAULT_FN_ATTRS
9854_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9855{
9856  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9857}
9858
9859static __inline__ __m512d __DEFAULT_FN_ATTRS
9860_mm512_abs_pd(__m512d __A)
9861{
9862  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9863}
9864
9865static __inline__ __m512d __DEFAULT_FN_ATTRS
9866_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9867{
9868  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9869}
9870
9871// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9872// outputs. This class of vector operation forms the basis of many scientific
9873// computations. In vector-reduction arithmetic, the evaluation off is
9874// independent of the order of the input elements of V.
9875
9876// Used bisection method. At each step, we partition the vector with previous
9877// step in half, and the operation is performed on its two halves.
9878// This takes log2(n) steps where n is the number of elements in the vector.
9879
9880// Vec512 - Vector with size 512.
9881// Operator - Can be one of following: +,*,&,|
9882// T2  - Can get 'i' for int and 'f' for float.
9883// T1 - Can get 'i' for int and 'd' for double.
9884
9885#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
9886  __extension__({                                                      \
9887    __m256##T1 Vec256 = __builtin_shufflevector(                       \
9888                            (__v8d##T2)Vec512,                         \
9889                            (__v8d##T2)Vec512,                         \
9890                            0, 1, 2, 3)                                \
9891                        Operator                                       \
9892                        __builtin_shufflevector(                       \
9893                            (__v8d##T2)Vec512,                         \
9894                            (__v8d##T2)Vec512,                         \
9895                            4, 5, 6, 7);                               \
9896    __m128##T1 Vec128 = __builtin_shufflevector(                       \
9897                            (__v4d##T2)Vec256,                         \
9898                            (__v4d##T2)Vec256,                         \
9899                            0, 1)                                      \
9900                        Operator                                       \
9901                        __builtin_shufflevector(                       \
9902                            (__v4d##T2)Vec256,                         \
9903                            (__v4d##T2)Vec256,                         \
9904                            2, 3);                                     \
9905    Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
9906                                     (__v2d##T2)Vec128, 0, -1)         \
9907             Operator                                                  \
9908             __builtin_shufflevector((__v2d##T2)Vec128,                \
9909                                     (__v2d##T2)Vec128, 1, -1);        \
9910    return Vec128[0];                                                  \
9911  })
9912
9913static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9914  _mm512_reduce_operator_64bit(__W, +, i, i);
9915}
9916
9917static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9918  _mm512_reduce_operator_64bit(__W, *, i, i);
9919}
9920
9921static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9922  _mm512_reduce_operator_64bit(__W, &, i, i);
9923}
9924
9925static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9926  _mm512_reduce_operator_64bit(__W, |, i, i);
9927}
9928
9929static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9930  _mm512_reduce_operator_64bit(__W, +, f, d);
9931}
9932
9933static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9934  _mm512_reduce_operator_64bit(__W, *, f, d);
9935}
9936
9937// Vec512 - Vector with size 512.
9938// Vec512Neutral - All vector elements set to the identity element.
9939// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9940// Operator - Can be one of following: +,*,&,|
9941// Mask - Intrinsic Mask
9942// T2  - Can get 'i' for int and 'f' for float.
9943// T1 - Can get 'i' for int and 'd' for packed double-precision.
9944// T3 - Can be Pd for packed double or q for q-word.
9945
9946#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
9947                                          Mask, T2, T1, T3)                    \
9948  __extension__({                                                              \
9949    Vec512 = __builtin_ia32_select##T3##_512(                                  \
9950                 (__mmask8)Mask,                                               \
9951                 (__v8d##T2)Vec512,                                            \
9952                 (__v8d##T2)Vec512Neutral);                                    \
9953    _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
9954  })
9955
9956static __inline__ long long __DEFAULT_FN_ATTRS
9957_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9958  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9959}
9960
9961static __inline__ long long __DEFAULT_FN_ATTRS
9962_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9963  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9964}
9965
9966static __inline__ long long __DEFAULT_FN_ATTRS
9967_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9968  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9969                                    &, __M,  i, i, q);
9970}
9971
9972static __inline__ long long __DEFAULT_FN_ATTRS
9973_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9974  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
9975                                    i, i, q);
9976}
9977
9978static __inline__ double __DEFAULT_FN_ATTRS
9979_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9980  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
9981                                    f, d, pd);
9982}
9983
9984static __inline__ double __DEFAULT_FN_ATTRS
9985_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9986  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9987                                    f, d, pd);
9988}
9989
9990// Vec512 - Vector with size 512.
9991// Operator - Can be one of following: +,*,&,|
9992// T2 - Can get 'i' for int and ' ' for packed single.
9993// T1 - Can get 'i' for int and 'f' for float.
9994
9995#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9996    __m256##T1 Vec256 =                                                        \
9997            (__m256##T1)(__builtin_shufflevector(                              \
9998                                    (__v16s##T2)Vec512,                        \
9999                                    (__v16s##T2)Vec512,                        \
10000                                    0, 1, 2, 3, 4, 5, 6, 7)                    \
10001                                Operator                                       \
10002                         __builtin_shufflevector(                              \
10003                                    (__v16s##T2)Vec512,                        \
10004                                    (__v16s##T2)Vec512,                        \
10005                                    8, 9, 10, 11, 12, 13, 14, 15));            \
10006    __m128##T1 Vec128 =                                                        \
10007             (__m128##T1)(__builtin_shufflevector(                             \
10008                                    (__v8s##T2)Vec256,                         \
10009                                    (__v8s##T2)Vec256,                         \
10010                                    0, 1, 2, 3)                                \
10011                                Operator                                       \
10012                          __builtin_shufflevector(                             \
10013                                    (__v8s##T2)Vec256,                         \
10014                                    (__v8s##T2)Vec256,                         \
10015                                    4, 5, 6, 7));                              \
10016    Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
10017                                    (__v4s##T2)Vec128,                         \
10018                                    (__v4s##T2)Vec128,                         \
10019                                    0, 1, -1, -1)                              \
10020                                Operator                                       \
10021                          __builtin_shufflevector(                             \
10022                                    (__v4s##T2)Vec128,                         \
10023                                    (__v4s##T2)Vec128,                         \
10024                                    2, 3, -1, -1));                            \
10025    Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
10026                                    (__v4s##T2)Vec128,                         \
10027                                    (__v4s##T2)Vec128,                         \
10028                                    0, -1, -1, -1)                             \
10029                                Operator                                       \
10030                          __builtin_shufflevector(                             \
10031                                    (__v4s##T2)Vec128,                         \
10032                                    (__v4s##T2)Vec128,                         \
10033                                    1, -1, -1, -1));                           \
10034    return Vec128[0];                                                          \
10035  })
10036
10037static __inline__ int __DEFAULT_FN_ATTRS
10038_mm512_reduce_add_epi32(__m512i __W) {
10039  _mm512_reduce_operator_32bit(__W, +, i, i);
10040}
10041
10042static __inline__ int __DEFAULT_FN_ATTRS
10043_mm512_reduce_mul_epi32(__m512i __W) {
10044  _mm512_reduce_operator_32bit(__W, *, i, i);
10045}
10046
10047static __inline__ int __DEFAULT_FN_ATTRS
10048_mm512_reduce_and_epi32(__m512i __W) {
10049  _mm512_reduce_operator_32bit(__W, &, i, i);
10050}
10051
10052static __inline__ int __DEFAULT_FN_ATTRS
10053_mm512_reduce_or_epi32(__m512i __W) {
10054  _mm512_reduce_operator_32bit(__W, |, i, i);
10055}
10056
10057static __inline__ float __DEFAULT_FN_ATTRS
10058_mm512_reduce_add_ps(__m512 __W) {
10059  _mm512_reduce_operator_32bit(__W, +, f, );
10060}
10061
10062static __inline__ float __DEFAULT_FN_ATTRS
10063_mm512_reduce_mul_ps(__m512 __W) {
10064  _mm512_reduce_operator_32bit(__W, *, f, );
10065}
10066
10067// Vec512 - Vector with size 512.
10068// Vec512Neutral - All vector elements set to the identity element.
10069// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
10070// Operator - Can be one of following: +,*,&,|
10071// Mask - Intrinsic Mask
10072// T2  - Can get 'i' for int and 'f' for float.
10073// T1 - Can get 'i' for int and 'd' for double.
10074// T3 - Can be Ps for packed single or d for d-word.
10075
10076#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
10077                                          Mask, T2, T1, T3)                    \
10078  __extension__({                                                              \
10079    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10080                             (__mmask16)Mask,                                  \
10081                             (__v16s##T2)Vec512,                               \
10082                             (__v16s##T2)Vec512Neutral);                       \
10083    _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
10084  })
10085
10086static __inline__ int __DEFAULT_FN_ATTRS
10087_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
10088  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
10089}
10090
10091static __inline__ int __DEFAULT_FN_ATTRS
10092_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
10093  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
10094}
10095
10096static __inline__ int __DEFAULT_FN_ATTRS
10097_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
10098  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
10099                                    i, i, d);
10100}
10101
10102static __inline__ int __DEFAULT_FN_ATTRS
10103_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
10104  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
10105}
10106
10107static __inline__ float __DEFAULT_FN_ATTRS
10108_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
10109  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
10110}
10111
10112static __inline__ float __DEFAULT_FN_ATTRS
10113_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
10114  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
10115}
10116
10117// Used bisection method. At each step, we partition the vector with previous
10118// step in half, and the operation is performed on its two halves.
10119// This takes log2(n) steps where n is the number of elements in the vector.
10120// This macro uses only intrinsics from the AVX512F feature.
10121
10122// Vec512 - Vector with size of 512.
10123// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10124//              __mm512_max_epi64
10125// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10126// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10127
10128#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
10129        Vec512 = _mm512_##IntrinName(                                          \
10130                                (__m512##T1)__builtin_shufflevector(           \
10131                                                (__v8d##T2)Vec512,             \
10132                                                (__v8d##T2)Vec512,             \
10133                                                 0, 1, 2, 3, -1, -1, -1, -1),  \
10134                                (__m512##T1)__builtin_shufflevector(           \
10135                                                (__v8d##T2)Vec512,             \
10136                                                (__v8d##T2)Vec512,             \
10137                                                 4, 5, 6, 7, -1, -1, -1, -1)); \
10138        Vec512 = _mm512_##IntrinName(                                          \
10139                                (__m512##T1)__builtin_shufflevector(           \
10140                                                (__v8d##T2)Vec512,             \
10141                                                (__v8d##T2)Vec512,             \
10142                                                 0, 1, -1, -1, -1, -1, -1, -1),\
10143                                (__m512##T1)__builtin_shufflevector(           \
10144                                                (__v8d##T2)Vec512,             \
10145                                                (__v8d##T2)Vec512,             \
10146                                                 2, 3, -1, -1, -1, -1, -1,     \
10147                                                 -1));                         \
10148        Vec512 = _mm512_##IntrinName(                                          \
10149                                (__m512##T1)__builtin_shufflevector(           \
10150                                                (__v8d##T2)Vec512,             \
10151                                                (__v8d##T2)Vec512,             \
10152                                                0, -1, -1, -1, -1, -1, -1, -1),\
10153                                (__m512##T1)__builtin_shufflevector(           \
10154                                                (__v8d##T2)Vec512,             \
10155                                                (__v8d##T2)Vec512,             \
10156                                                1, -1, -1, -1, -1, -1, -1, -1))\
10157                                                ;                              \
10158    return Vec512[0];                                                          \
10159  })
10160
10161static __inline__ long long __DEFAULT_FN_ATTRS
10162_mm512_reduce_max_epi64(__m512i __V) {
10163  _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
10164}
10165
10166static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10167_mm512_reduce_max_epu64(__m512i __V) {
10168  _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10169}
10170
10171static __inline__ double __DEFAULT_FN_ATTRS
10172_mm512_reduce_max_pd(__m512d __V) {
10173  _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10174}
10175
10176static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10177(__m512i __V) {
10178  _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10179}
10180
10181static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10182_mm512_reduce_min_epu64(__m512i __V) {
10183  _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10184}
10185
10186static __inline__ double __DEFAULT_FN_ATTRS
10187_mm512_reduce_min_pd(__m512d __V) {
10188  _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10189}
10190
10191// Vec512 - Vector with size 512.
10192// Vec512Neutral - A 512 length vector with elements set to the identity element
10193// Identity element: {max_epi,0x8000000000000000}
10194//                   {max_epu,0x0000000000000000}
10195//                   {max_pd, 0xFFF0000000000000}
10196//                   {min_epi,0x7FFFFFFFFFFFFFFF}
10197//                   {min_epu,0xFFFFFFFFFFFFFFFF}
10198//                   {min_pd, 0x7FF0000000000000}
10199//
10200// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10201//              __mm512_max_epi64
10202// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10203// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10204// T3 - Can get 'q' q word and 'pd' for packed double.
10205//      [__builtin_ia32_select{q|pd}_512]
10206// Mask - Intrinsic Mask
10207
10208#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10209                                        T2, T3, Mask)                          \
10210  __extension__({                                                              \
10211    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10212                             (__mmask8)Mask,                                   \
10213                             (__v8d##T2)Vec512,                                \
10214                             (__v8d##T2)Vec512Neutral);                        \
10215    _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
10216  })
10217
10218static __inline__ long long __DEFAULT_FN_ATTRS
10219_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10220  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10221                                  max_epi64, i, i, q, __M);
10222}
10223
10224static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10225_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10226  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10227                                  max_epu64, i, i, q, __M);
10228}
10229
10230static __inline__ double __DEFAULT_FN_ATTRS
10231_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10232  _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10233                                  max_pd, d, f, pd, __M);
10234}
10235
10236static __inline__ long long __DEFAULT_FN_ATTRS
10237_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10238  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10239                                  min_epi64, i, i, q, __M);
10240}
10241
10242static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10243_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10244  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10245                                  min_epu64, i, i, q, __M);
10246}
10247
10248static __inline__ double __DEFAULT_FN_ATTRS
10249_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10250  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10251                                  min_pd, d, f, pd, __M);
10252}
10253
10254// Vec512 - Vector with size 512.
10255// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10256//              __mm512_max_epi32
10257// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10258// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10259
10260#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10261    Vec512 = _mm512_##IntrinName(                                              \
10262                  (__m512##T1)__builtin_shufflevector(                         \
10263                                  (__v16s##T2)Vec512,                          \
10264                                  (__v16s##T2)Vec512,                          \
10265                                  0, 1, 2, 3, 4, 5, 6, 7,                      \
10266                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10267                  (__m512##T1)__builtin_shufflevector(                         \
10268                                  (__v16s##T2)Vec512,                          \
10269                                  (__v16s##T2)Vec512,                          \
10270                                  8, 9, 10, 11, 12, 13, 14, 15,                \
10271                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10272    Vec512 = _mm512_##IntrinName(                                              \
10273                  (__m512##T1)__builtin_shufflevector(                         \
10274                                  (__v16s##T2)Vec512,                          \
10275                                  (__v16s##T2)Vec512,                          \
10276                                  0, 1, 2, 3, -1, -1, -1, -1,                  \
10277                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10278                  (__m512##T1)__builtin_shufflevector(                         \
10279                                  (__v16s##T2)Vec512,                          \
10280                                  (__v16s##T2)Vec512,                          \
10281                                  4, 5, 6, 7, -1, -1, -1, -1,                  \
10282                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10283    Vec512 = _mm512_##IntrinName(                                              \
10284                  (__m512##T1)__builtin_shufflevector(                         \
10285                                  (__v16s##T2)Vec512,                          \
10286                                  (__v16s##T2)Vec512,                          \
10287                                  0, 1, -1, -1, -1, -1, -1, -1,                \
10288                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10289                  (__m512##T1)__builtin_shufflevector(                         \
10290                                  (__v16s##T2)Vec512,                          \
10291                                  (__v16s##T2)Vec512,                          \
10292                                  2, 3, -1, -1, -1, -1, -1, -1,                \
10293                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10294    Vec512 = _mm512_##IntrinName(                                              \
10295                  (__m512##T1)__builtin_shufflevector(                         \
10296                                  (__v16s##T2)Vec512,                          \
10297                                  (__v16s##T2)Vec512,                          \
10298                                  0,  -1, -1, -1, -1, -1, -1, -1,              \
10299                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10300                  (__m512##T1)__builtin_shufflevector(                         \
10301                                  (__v16s##T2)Vec512,                          \
10302                                  (__v16s##T2)Vec512,                          \
10303                                  1, -1, -1, -1, -1, -1, -1, -1,               \
10304                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10305    return Vec512[0];                                                          \
10306  })
10307
10308static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10309  _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10310}
10311
10312static __inline__ unsigned int __DEFAULT_FN_ATTRS
10313_mm512_reduce_max_epu32(__m512i a) {
10314  _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10315}
10316
10317static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10318  _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10319}
10320
10321static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10322  _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10323}
10324
10325static __inline__ unsigned int __DEFAULT_FN_ATTRS
10326_mm512_reduce_min_epu32(__m512i a) {
10327  _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10328}
10329
10330static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10331  _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10332}
10333
10334// Vec512 - Vector with size 512.
10335// Vec512Neutral - A 512 length vector with elements set to the identity element
10336// Identity element: {max_epi,0x80000000}
10337//                   {max_epu,0x00000000}
10338//                   {max_ps, 0xFF800000}
10339//                   {min_epi,0x7FFFFFFF}
10340//                   {min_epu,0xFFFFFFFF}
10341//                   {min_ps, 0x7F800000}
10342//
10343// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10344//              __mm512_max_epi32
10345// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10346// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10347// T3 - Can get 'q' q word and 'pd' for packed double.
10348//      [__builtin_ia32_select{q|pd}_512]
10349// Mask - Intrinsic Mask
10350
10351#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10352                                        T2, T3, Mask)                          \
10353  __extension__({                                                              \
10354    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10355                                        (__mmask16)Mask,                       \
10356                                        (__v16s##T2)Vec512,                    \
10357                                        (__v16s##T2)Vec512Neutral);            \
10358   _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
10359   })
10360
10361static __inline__ int __DEFAULT_FN_ATTRS
10362_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10363  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10364                                  i, i, d, __M);
10365}
10366
10367static __inline__ unsigned int __DEFAULT_FN_ATTRS
10368_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10369  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10370                                  i, i, d, __M);
10371}
10372
10373static __inline__ float __DEFAULT_FN_ATTRS
10374_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10375  _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10376                                  ps, __M);
10377}
10378
10379static __inline__ int __DEFAULT_FN_ATTRS
10380_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10381  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10382                                  i, i, d, __M);
10383}
10384
10385static __inline__ unsigned int __DEFAULT_FN_ATTRS
10386_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10387  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10388                                  i, i, d, __M);
10389}
10390
10391static __inline__ float __DEFAULT_FN_ATTRS
10392_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10393  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10394                                  ps, __M);
10395}
10396
10397#undef __DEFAULT_FN_ATTRS
10398
10399#endif // __AVX512FINTRIN_H
10400