1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512FINTRIN_H
28#define __AVX512FINTRIN_H
29
30typedef char __v64qi __attribute__((__vector_size__(64)));
31typedef short __v32hi __attribute__((__vector_size__(64)));
32typedef double __v8df __attribute__((__vector_size__(64)));
33typedef float __v16sf __attribute__((__vector_size__(64)));
34typedef long long __v8di __attribute__((__vector_size__(64)));
35typedef int __v16si __attribute__((__vector_size__(64)));
36
37/* Unsigned types */
38typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
39typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
40typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
41typedef unsigned int __v16su __attribute__((__vector_size__(64)));
42
43typedef float __m512 __attribute__((__vector_size__(64)));
44typedef double __m512d __attribute__((__vector_size__(64)));
45typedef long long __m512i __attribute__((__vector_size__(64)));
46
47typedef unsigned char __mmask8;
48typedef unsigned short __mmask16;
49
50/* Rounding mode macros.  */
51#define _MM_FROUND_TO_NEAREST_INT   0x00
52#define _MM_FROUND_TO_NEG_INF       0x01
53#define _MM_FROUND_TO_POS_INF       0x02
54#define _MM_FROUND_TO_ZERO          0x03
55#define _MM_FROUND_CUR_DIRECTION    0x04
56
57/* Constants for integer comparison predicates */
58typedef enum {
59    _MM_CMPINT_EQ,      /* Equal */
60    _MM_CMPINT_LT,      /* Less than */
61    _MM_CMPINT_LE,      /* Less than or Equal */
62    _MM_CMPINT_UNUSED,
63    _MM_CMPINT_NE,      /* Not Equal */
64    _MM_CMPINT_NLT,     /* Not Less than */
65#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
66    _MM_CMPINT_NLE      /* Not Less than or Equal */
67#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
68} _MM_CMPINT_ENUM;
69
70typedef enum
71{
72  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
73  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
74  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
75  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
76  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
77  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
78  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
79  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
80  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
81  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
82  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
83  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
84  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
85  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
86  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
87  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
88  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
89  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
90  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
91  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
92  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
93  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
94  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
95  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
96  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
97  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
98  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
99  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
100  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
101  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
102  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
103  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
104  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
105  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
106  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
107  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
108  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
109  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
110  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
111  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
112  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
113  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
114  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
115  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
116  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
117  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
118  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
119  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
120  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
121  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
122  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
123  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
124  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
125  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
126  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
127  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
128  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
129  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
130  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
131  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
132  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
133  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
134  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
135  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
136  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
137  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
138  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
139  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
140  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
141  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
142  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
143  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
144  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
145  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
146  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
147  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
148  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
149  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
150  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
151  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
152  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
153  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
154  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
155  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
156  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
157  _MM_PERM_DDDD = 0xFF
158} _MM_PERM_ENUM;
159
160typedef enum
161{
162  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
163  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
164  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
165  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
166} _MM_MANTISSA_NORM_ENUM;
167
168typedef enum
169{
170  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
171  _MM_MANT_SIGN_zero,   /* sign = 0             */
172  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
173} _MM_MANTISSA_SIGN_ENUM;
174
175/* Define the default attributes for the functions in this file. */
176#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
177
178/* Create vectors with repeated elements */
179
180static  __inline __m512i __DEFAULT_FN_ATTRS
181_mm512_setzero_si512(void)
182{
183  return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
184}
185
186#define _mm512_setzero_epi32 _mm512_setzero_si512
187
188static __inline__ __m512d __DEFAULT_FN_ATTRS
189_mm512_undefined_pd(void)
190{
191  return (__m512d)__builtin_ia32_undef512();
192}
193
194static __inline__ __m512 __DEFAULT_FN_ATTRS
195_mm512_undefined(void)
196{
197  return (__m512)__builtin_ia32_undef512();
198}
199
200static __inline__ __m512 __DEFAULT_FN_ATTRS
201_mm512_undefined_ps(void)
202{
203  return (__m512)__builtin_ia32_undef512();
204}
205
206static __inline__ __m512i __DEFAULT_FN_ATTRS
207_mm512_undefined_epi32(void)
208{
209  return (__m512i)__builtin_ia32_undef512();
210}
211
212static __inline__ __m512i __DEFAULT_FN_ATTRS
213_mm512_broadcastd_epi32 (__m128i __A)
214{
215  return (__m512i)__builtin_shufflevector((__v4si) __A,
216                                          (__v4si)_mm_undefined_si128(),
217                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
218}
219
220static __inline__ __m512i __DEFAULT_FN_ATTRS
221_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
222{
223  return (__m512i)__builtin_ia32_selectd_512(__M,
224                                             (__v16si) _mm512_broadcastd_epi32(__A),
225                                             (__v16si) __O);
226}
227
228static __inline__ __m512i __DEFAULT_FN_ATTRS
229_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
230{
231  return (__m512i)__builtin_ia32_selectd_512(__M,
232                                             (__v16si) _mm512_broadcastd_epi32(__A),
233                                             (__v16si) _mm512_setzero_si512());
234}
235
236static __inline__ __m512i __DEFAULT_FN_ATTRS
237_mm512_broadcastq_epi64 (__m128i __A)
238{
239  return (__m512i)__builtin_shufflevector((__v2di) __A,
240                                          (__v2di) _mm_undefined_si128(),
241                                          0, 0, 0, 0, 0, 0, 0, 0);
242}
243
244static __inline__ __m512i __DEFAULT_FN_ATTRS
245_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
246{
247  return (__m512i)__builtin_ia32_selectq_512(__M,
248                                             (__v8di) _mm512_broadcastq_epi64(__A),
249                                             (__v8di) __O);
250
251}
252
253static __inline__ __m512i __DEFAULT_FN_ATTRS
254_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
255{
256  return (__m512i)__builtin_ia32_selectq_512(__M,
257                                             (__v8di) _mm512_broadcastq_epi64(__A),
258                                             (__v8di) _mm512_setzero_si512());
259}
260
261
262static __inline __m512 __DEFAULT_FN_ATTRS
263_mm512_setzero_ps(void)
264{
265  return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266                   0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
267}
268
269#define _mm512_setzero _mm512_setzero_ps
270
271static  __inline __m512d __DEFAULT_FN_ATTRS
272_mm512_setzero_pd(void)
273{
274  return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
275}
276
277static __inline __m512 __DEFAULT_FN_ATTRS
278_mm512_set1_ps(float __w)
279{
280  return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281                   __w, __w, __w, __w, __w, __w, __w, __w  };
282}
283
284static __inline __m512d __DEFAULT_FN_ATTRS
285_mm512_set1_pd(double __w)
286{
287  return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
288}
289
290static __inline __m512i __DEFAULT_FN_ATTRS
291_mm512_set1_epi8(char __w)
292{
293  return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w,
294                             __w, __w, __w, __w, __w, __w, __w, __w,
295                             __w, __w, __w, __w, __w, __w, __w, __w,
296                             __w, __w, __w, __w, __w, __w, __w, __w,
297                             __w, __w, __w, __w, __w, __w, __w, __w,
298                             __w, __w, __w, __w, __w, __w, __w, __w,
299                             __w, __w, __w, __w, __w, __w, __w, __w,
300                             __w, __w, __w, __w, __w, __w, __w, __w  };
301}
302
303static __inline __m512i __DEFAULT_FN_ATTRS
304_mm512_set1_epi16(short __w)
305{
306  return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w,
307                             __w, __w, __w, __w, __w, __w, __w, __w,
308                             __w, __w, __w, __w, __w, __w, __w, __w,
309                             __w, __w, __w, __w, __w, __w, __w, __w };
310}
311
312static __inline __m512i __DEFAULT_FN_ATTRS
313_mm512_set1_epi32(int __s)
314{
315  return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s,
316                             __s, __s, __s, __s, __s, __s, __s, __s };
317}
318
319static __inline __m512i __DEFAULT_FN_ATTRS
320_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
321{
322  return (__m512i)__builtin_ia32_selectd_512(__M,
323                                             (__v16si)_mm512_set1_epi32(__A),
324                                             (__v16si)_mm512_setzero_si512());
325}
326
327static __inline __m512i __DEFAULT_FN_ATTRS
328_mm512_set1_epi64(long long __d)
329{
330  return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
331}
332
333#ifdef __x86_64__
334static __inline __m512i __DEFAULT_FN_ATTRS
335_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
336{
337  return (__m512i)__builtin_ia32_selectq_512(__M,
338                                             (__v8di)_mm512_set1_epi64(__A),
339                                             (__v8di)_mm512_setzero_si512());
340}
341#endif
342
343static __inline__ __m512 __DEFAULT_FN_ATTRS
344_mm512_broadcastss_ps(__m128 __A)
345{
346  return (__m512)__builtin_shufflevector((__v4sf) __A,
347                                         (__v4sf)_mm_undefined_ps(),
348                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
349}
350
351static __inline __m512i __DEFAULT_FN_ATTRS
352_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
353{
354  return  (__m512i)(__v16si)
355   { __D, __C, __B, __A, __D, __C, __B, __A,
356     __D, __C, __B, __A, __D, __C, __B, __A };
357}
358
359static __inline __m512i __DEFAULT_FN_ATTRS
360_mm512_set4_epi64 (long long __A, long long __B, long long __C,
361       long long __D)
362{
363  return  (__m512i) (__v8di)
364   { __D, __C, __B, __A, __D, __C, __B, __A };
365}
366
367static __inline __m512d __DEFAULT_FN_ATTRS
368_mm512_set4_pd (double __A, double __B, double __C, double __D)
369{
370  return  (__m512d)
371   { __D, __C, __B, __A, __D, __C, __B, __A };
372}
373
374static __inline __m512 __DEFAULT_FN_ATTRS
375_mm512_set4_ps (float __A, float __B, float __C, float __D)
376{
377  return  (__m512)
378   { __D, __C, __B, __A, __D, __C, __B, __A,
379     __D, __C, __B, __A, __D, __C, __B, __A };
380}
381
382#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
383  _mm512_set4_epi32((e3),(e2),(e1),(e0))
384
385#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
386  _mm512_set4_epi64((e3),(e2),(e1),(e0))
387
388#define _mm512_setr4_pd(e0,e1,e2,e3)                \
389  _mm512_set4_pd((e3),(e2),(e1),(e0))
390
391#define _mm512_setr4_ps(e0,e1,e2,e3)                \
392  _mm512_set4_ps((e3),(e2),(e1),(e0))
393
394static __inline__ __m512d __DEFAULT_FN_ATTRS
395_mm512_broadcastsd_pd(__m128d __A)
396{
397  return (__m512d)__builtin_shufflevector((__v2df) __A,
398                                          (__v2df) _mm_undefined_pd(),
399                                          0, 0, 0, 0, 0, 0, 0, 0);
400}
401
402/* Cast between vector types */
403
404static __inline __m512d __DEFAULT_FN_ATTRS
405_mm512_castpd256_pd512(__m256d __a)
406{
407  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
408}
409
410static __inline __m512 __DEFAULT_FN_ATTRS
411_mm512_castps256_ps512(__m256 __a)
412{
413  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
414                                          -1, -1, -1, -1, -1, -1, -1, -1);
415}
416
417static __inline __m128d __DEFAULT_FN_ATTRS
418_mm512_castpd512_pd128(__m512d __a)
419{
420  return __builtin_shufflevector(__a, __a, 0, 1);
421}
422
423static __inline __m256d __DEFAULT_FN_ATTRS
424_mm512_castpd512_pd256 (__m512d __A)
425{
426  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
427}
428
429static __inline __m128 __DEFAULT_FN_ATTRS
430_mm512_castps512_ps128(__m512 __a)
431{
432  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
433}
434
435static __inline __m256 __DEFAULT_FN_ATTRS
436_mm512_castps512_ps256 (__m512 __A)
437{
438  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
439}
440
441static __inline __m512 __DEFAULT_FN_ATTRS
442_mm512_castpd_ps (__m512d __A)
443{
444  return (__m512) (__A);
445}
446
447static __inline __m512i __DEFAULT_FN_ATTRS
448_mm512_castpd_si512 (__m512d __A)
449{
450  return (__m512i) (__A);
451}
452
453static __inline__ __m512d __DEFAULT_FN_ATTRS
454_mm512_castpd128_pd512 (__m128d __A)
455{
456  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
457}
458
459static __inline __m512d __DEFAULT_FN_ATTRS
460_mm512_castps_pd (__m512 __A)
461{
462  return (__m512d) (__A);
463}
464
465static __inline __m512i __DEFAULT_FN_ATTRS
466_mm512_castps_si512 (__m512 __A)
467{
468  return (__m512i) (__A);
469}
470
471static __inline__ __m512 __DEFAULT_FN_ATTRS
472_mm512_castps128_ps512 (__m128 __A)
473{
474    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
475}
476
477static __inline__ __m512i __DEFAULT_FN_ATTRS
478_mm512_castsi128_si512 (__m128i __A)
479{
480   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
481}
482
483static __inline__ __m512i __DEFAULT_FN_ATTRS
484_mm512_castsi256_si512 (__m256i __A)
485{
486   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
487}
488
489static __inline __m512 __DEFAULT_FN_ATTRS
490_mm512_castsi512_ps (__m512i __A)
491{
492  return (__m512) (__A);
493}
494
495static __inline __m512d __DEFAULT_FN_ATTRS
496_mm512_castsi512_pd (__m512i __A)
497{
498  return (__m512d) (__A);
499}
500
501static __inline __m128i __DEFAULT_FN_ATTRS
502_mm512_castsi512_si128 (__m512i __A)
503{
504  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
505}
506
507static __inline __m256i __DEFAULT_FN_ATTRS
508_mm512_castsi512_si256 (__m512i __A)
509{
510  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
511}
512
513static __inline__ __mmask16 __DEFAULT_FN_ATTRS
514_mm512_int2mask(int __a)
515{
516  return (__mmask16)__a;
517}
518
519static __inline__ int __DEFAULT_FN_ATTRS
520_mm512_mask2int(__mmask16 __a)
521{
522  return (int)__a;
523}
524
525/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
526///    128-bit floating-point vector of [2 x double]. The lower 128 bits
527///    contain the value of the source vector. The upper 384 bits are set
528///    to zero.
529///
530/// \headerfile <x86intrin.h>
531///
532/// This intrinsic has no corresponding instruction.
533///
534/// \param __a
535///    A 128-bit vector of [2 x double].
536/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
537///    contain the value of the parameter. The upper 384 bits are set to zero.
538static __inline __m512d __DEFAULT_FN_ATTRS
539_mm512_zextpd128_pd512(__m128d __a)
540{
541  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
542}
543
544/// \brief Constructs a 512-bit floating-point vector of [8 x double] from a
545///    256-bit floating-point vector of [4 x double]. The lower 256 bits
546///    contain the value of the source vector. The upper 256 bits are set
547///    to zero.
548///
549/// \headerfile <x86intrin.h>
550///
551/// This intrinsic has no corresponding instruction.
552///
553/// \param __a
554///    A 256-bit vector of [4 x double].
555/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
556///    contain the value of the parameter. The upper 256 bits are set to zero.
557static __inline __m512d __DEFAULT_FN_ATTRS
558_mm512_zextpd256_pd512(__m256d __a)
559{
560  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
561}
562
563/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
564///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
565///    the value of the source vector. The upper 384 bits are set to zero.
566///
567/// \headerfile <x86intrin.h>
568///
569/// This intrinsic has no corresponding instruction.
570///
571/// \param __a
572///    A 128-bit vector of [4 x float].
573/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
574///    contain the value of the parameter. The upper 384 bits are set to zero.
575static __inline __m512 __DEFAULT_FN_ATTRS
576_mm512_zextps128_ps512(__m128 __a)
577{
578  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
579}
580
581/// \brief Constructs a 512-bit floating-point vector of [16 x float] from a
582///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
583///    the value of the source vector. The upper 256 bits are set to zero.
584///
585/// \headerfile <x86intrin.h>
586///
587/// This intrinsic has no corresponding instruction.
588///
589/// \param __a
590///    A 256-bit vector of [8 x float].
591/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
592///    contain the value of the parameter. The upper 256 bits are set to zero.
593static __inline __m512 __DEFAULT_FN_ATTRS
594_mm512_zextps256_ps512(__m256 __a)
595{
596  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
597}
598
599/// \brief Constructs a 512-bit integer vector from a 128-bit integer vector.
600///    The lower 128 bits contain the value of the source vector. The upper
601///    384 bits are set to zero.
602///
603/// \headerfile <x86intrin.h>
604///
605/// This intrinsic has no corresponding instruction.
606///
607/// \param __a
608///    A 128-bit integer vector.
609/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
610///    the parameter. The upper 384 bits are set to zero.
611static __inline __m512i __DEFAULT_FN_ATTRS
612_mm512_zextsi128_si512(__m128i __a)
613{
614  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
615}
616
617/// \brief Constructs a 512-bit integer vector from a 256-bit integer vector.
618///    The lower 256 bits contain the value of the source vector. The upper
619///    256 bits are set to zero.
620///
621/// \headerfile <x86intrin.h>
622///
623/// This intrinsic has no corresponding instruction.
624///
625/// \param __a
626///    A 256-bit integer vector.
627/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
628///    the parameter. The upper 256 bits are set to zero.
629static __inline __m512i __DEFAULT_FN_ATTRS
630_mm512_zextsi256_si512(__m256i __a)
631{
632  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
633}
634
635/* Bitwise operators */
636static __inline__ __m512i __DEFAULT_FN_ATTRS
637_mm512_and_epi32(__m512i __a, __m512i __b)
638{
639  return (__m512i)((__v16su)__a & (__v16su)__b);
640}
641
642static __inline__ __m512i __DEFAULT_FN_ATTRS
643_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
644{
645  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
646                (__v16si) _mm512_and_epi32(__a, __b),
647                (__v16si) __src);
648}
649
650static __inline__ __m512i __DEFAULT_FN_ATTRS
651_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
652{
653  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
654                                         __k, __a, __b);
655}
656
657static __inline__ __m512i __DEFAULT_FN_ATTRS
658_mm512_and_epi64(__m512i __a, __m512i __b)
659{
660  return (__m512i)((__v8du)__a & (__v8du)__b);
661}
662
663static __inline__ __m512i __DEFAULT_FN_ATTRS
664_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
665{
666    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
667                (__v8di) _mm512_and_epi64(__a, __b),
668                (__v8di) __src);
669}
670
671static __inline__ __m512i __DEFAULT_FN_ATTRS
672_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
673{
674  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
675                                         __k, __a, __b);
676}
677
678static __inline__ __m512i __DEFAULT_FN_ATTRS
679_mm512_andnot_si512 (__m512i __A, __m512i __B)
680{
681  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
682}
683
684static __inline__ __m512i __DEFAULT_FN_ATTRS
685_mm512_andnot_epi32 (__m512i __A, __m512i __B)
686{
687  return (__m512i)(~(__v16su)(__A) & (__v16su)__B);
688}
689
690static __inline__ __m512i __DEFAULT_FN_ATTRS
691_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
692{
693  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
694                                         (__v16si)_mm512_andnot_epi32(__A, __B),
695                                         (__v16si)__W);
696}
697
698static __inline__ __m512i __DEFAULT_FN_ATTRS
699_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
700{
701  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
702                                           __U, __A, __B);
703}
704
705static __inline__ __m512i __DEFAULT_FN_ATTRS
706_mm512_andnot_epi64(__m512i __A, __m512i __B)
707{
708  return (__m512i)(~(__v8du)(__A) & (__v8du)__B);
709}
710
711static __inline__ __m512i __DEFAULT_FN_ATTRS
712_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
713{
714  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
715                                          (__v8di)_mm512_andnot_epi64(__A, __B),
716                                          (__v8di)__W);
717}
718
719static __inline__ __m512i __DEFAULT_FN_ATTRS
720_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
721{
722  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
723                                           __U, __A, __B);
724}
725
726static __inline__ __m512i __DEFAULT_FN_ATTRS
727_mm512_or_epi32(__m512i __a, __m512i __b)
728{
729  return (__m512i)((__v16su)__a | (__v16su)__b);
730}
731
732static __inline__ __m512i __DEFAULT_FN_ATTRS
733_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
734{
735  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
736                                             (__v16si)_mm512_or_epi32(__a, __b),
737                                             (__v16si)__src);
738}
739
740static __inline__ __m512i __DEFAULT_FN_ATTRS
741_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
742{
743  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
744}
745
746static __inline__ __m512i __DEFAULT_FN_ATTRS
747_mm512_or_epi64(__m512i __a, __m512i __b)
748{
749  return (__m512i)((__v8du)__a | (__v8du)__b);
750}
751
752static __inline__ __m512i __DEFAULT_FN_ATTRS
753_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
754{
755  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
756                                             (__v8di)_mm512_or_epi64(__a, __b),
757                                             (__v8di)__src);
758}
759
760static __inline__ __m512i __DEFAULT_FN_ATTRS
761_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
762{
763  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
764}
765
766static __inline__ __m512i __DEFAULT_FN_ATTRS
767_mm512_xor_epi32(__m512i __a, __m512i __b)
768{
769  return (__m512i)((__v16su)__a ^ (__v16su)__b);
770}
771
772static __inline__ __m512i __DEFAULT_FN_ATTRS
773_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
774{
775  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
776                                            (__v16si)_mm512_xor_epi32(__a, __b),
777                                            (__v16si)__src);
778}
779
780static __inline__ __m512i __DEFAULT_FN_ATTRS
781_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
782{
783  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
784}
785
786static __inline__ __m512i __DEFAULT_FN_ATTRS
787_mm512_xor_epi64(__m512i __a, __m512i __b)
788{
789  return (__m512i)((__v8du)__a ^ (__v8du)__b);
790}
791
792static __inline__ __m512i __DEFAULT_FN_ATTRS
793_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
794{
795  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
796                                             (__v8di)_mm512_xor_epi64(__a, __b),
797                                             (__v8di)__src);
798}
799
800static __inline__ __m512i __DEFAULT_FN_ATTRS
801_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
802{
803  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
804}
805
806static __inline__ __m512i __DEFAULT_FN_ATTRS
807_mm512_and_si512(__m512i __a, __m512i __b)
808{
809  return (__m512i)((__v8du)__a & (__v8du)__b);
810}
811
812static __inline__ __m512i __DEFAULT_FN_ATTRS
813_mm512_or_si512(__m512i __a, __m512i __b)
814{
815  return (__m512i)((__v8du)__a | (__v8du)__b);
816}
817
818static __inline__ __m512i __DEFAULT_FN_ATTRS
819_mm512_xor_si512(__m512i __a, __m512i __b)
820{
821  return (__m512i)((__v8du)__a ^ (__v8du)__b);
822}
823
824/* Arithmetic */
825
826static __inline __m512d __DEFAULT_FN_ATTRS
827_mm512_add_pd(__m512d __a, __m512d __b)
828{
829  return (__m512d)((__v8df)__a + (__v8df)__b);
830}
831
832static __inline __m512 __DEFAULT_FN_ATTRS
833_mm512_add_ps(__m512 __a, __m512 __b)
834{
835  return (__m512)((__v16sf)__a + (__v16sf)__b);
836}
837
838static __inline __m512d __DEFAULT_FN_ATTRS
839_mm512_mul_pd(__m512d __a, __m512d __b)
840{
841  return (__m512d)((__v8df)__a * (__v8df)__b);
842}
843
844static __inline __m512 __DEFAULT_FN_ATTRS
845_mm512_mul_ps(__m512 __a, __m512 __b)
846{
847  return (__m512)((__v16sf)__a * (__v16sf)__b);
848}
849
850static __inline __m512d __DEFAULT_FN_ATTRS
851_mm512_sub_pd(__m512d __a, __m512d __b)
852{
853  return (__m512d)((__v8df)__a - (__v8df)__b);
854}
855
856static __inline __m512 __DEFAULT_FN_ATTRS
857_mm512_sub_ps(__m512 __a, __m512 __b)
858{
859  return (__m512)((__v16sf)__a - (__v16sf)__b);
860}
861
862static __inline__ __m512i __DEFAULT_FN_ATTRS
863_mm512_add_epi64 (__m512i __A, __m512i __B)
864{
865  return (__m512i) ((__v8du) __A + (__v8du) __B);
866}
867
868static __inline__ __m512i __DEFAULT_FN_ATTRS
869_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
870{
871  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
872                                             (__v8di)_mm512_add_epi64(__A, __B),
873                                             (__v8di)__W);
874}
875
876static __inline__ __m512i __DEFAULT_FN_ATTRS
877_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
878{
879  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
880                                             (__v8di)_mm512_add_epi64(__A, __B),
881                                             (__v8di)_mm512_setzero_si512());
882}
883
884static __inline__ __m512i __DEFAULT_FN_ATTRS
885_mm512_sub_epi64 (__m512i __A, __m512i __B)
886{
887  return (__m512i) ((__v8du) __A - (__v8du) __B);
888}
889
890static __inline__ __m512i __DEFAULT_FN_ATTRS
891_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
892{
893  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
894                                             (__v8di)_mm512_sub_epi64(__A, __B),
895                                             (__v8di)__W);
896}
897
898static __inline__ __m512i __DEFAULT_FN_ATTRS
899_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
900{
901  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
902                                             (__v8di)_mm512_sub_epi64(__A, __B),
903                                             (__v8di)_mm512_setzero_si512());
904}
905
906static __inline__ __m512i __DEFAULT_FN_ATTRS
907_mm512_add_epi32 (__m512i __A, __m512i __B)
908{
909  return (__m512i) ((__v16su) __A + (__v16su) __B);
910}
911
912static __inline__ __m512i __DEFAULT_FN_ATTRS
913_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
914{
915  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
916                                             (__v16si)_mm512_add_epi32(__A, __B),
917                                             (__v16si)__W);
918}
919
920static __inline__ __m512i __DEFAULT_FN_ATTRS
921_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
922{
923  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
924                                             (__v16si)_mm512_add_epi32(__A, __B),
925                                             (__v16si)_mm512_setzero_si512());
926}
927
928static __inline__ __m512i __DEFAULT_FN_ATTRS
929_mm512_sub_epi32 (__m512i __A, __m512i __B)
930{
931  return (__m512i) ((__v16su) __A - (__v16su) __B);
932}
933
934static __inline__ __m512i __DEFAULT_FN_ATTRS
935_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
936{
937  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
938                                             (__v16si)_mm512_sub_epi32(__A, __B),
939                                             (__v16si)__W);
940}
941
942static __inline__ __m512i __DEFAULT_FN_ATTRS
943_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
944{
945  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
946                                             (__v16si)_mm512_sub_epi32(__A, __B),
947                                             (__v16si)_mm512_setzero_si512());
948}
949
950#define _mm512_mask_max_round_pd(W, U, A, B, R) __extension__ ({ \
951  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
952                                        (__v8df)(__m512d)(B), \
953                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
954                                        (int)(R)); })
955
956#define _mm512_maskz_max_round_pd(U, A, B, R) __extension__ ({ \
957  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
958                                        (__v8df)(__m512d)(B), \
959                                        (__v8df)_mm512_setzero_pd(), \
960                                        (__mmask8)(U), (int)(R)); })
961
962#define _mm512_max_round_pd(A, B, R) __extension__ ({ \
963  (__m512d)__builtin_ia32_maxpd512_mask((__v8df)(__m512d)(A), \
964                                        (__v8df)(__m512d)(B), \
965                                        (__v8df)_mm512_undefined_pd(), \
966                                        (__mmask8)-1, (int)(R)); })
967
968static  __inline__ __m512d __DEFAULT_FN_ATTRS
969_mm512_max_pd(__m512d __A, __m512d __B)
970{
971  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
972             (__v8df) __B,
973             (__v8df)
974             _mm512_setzero_pd (),
975             (__mmask8) -1,
976             _MM_FROUND_CUR_DIRECTION);
977}
978
979static __inline__ __m512d __DEFAULT_FN_ATTRS
980_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
981{
982  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
983                  (__v8df) __B,
984                  (__v8df) __W,
985                  (__mmask8) __U,
986                  _MM_FROUND_CUR_DIRECTION);
987}
988
989static __inline__ __m512d __DEFAULT_FN_ATTRS
990_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
991{
992  return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
993                  (__v8df) __B,
994                  (__v8df)
995                  _mm512_setzero_pd (),
996                  (__mmask8) __U,
997                  _MM_FROUND_CUR_DIRECTION);
998}
999
1000#define _mm512_mask_max_round_ps(W, U, A, B, R) __extension__ ({ \
1001  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1002                                       (__v16sf)(__m512)(B), \
1003                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
1004                                       (int)(R)); })
1005
1006#define _mm512_maskz_max_round_ps(U, A, B, R) __extension__ ({ \
1007  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1008                                       (__v16sf)(__m512)(B), \
1009                                       (__v16sf)_mm512_setzero_ps(), \
1010                                       (__mmask16)(U), (int)(R)); })
1011
1012#define _mm512_max_round_ps(A, B, R) __extension__ ({ \
1013  (__m512)__builtin_ia32_maxps512_mask((__v16sf)(__m512)(A), \
1014                                       (__v16sf)(__m512)(B), \
1015                                       (__v16sf)_mm512_undefined_ps(), \
1016                                       (__mmask16)-1, (int)(R)); })
1017
1018static  __inline__ __m512 __DEFAULT_FN_ATTRS
1019_mm512_max_ps(__m512 __A, __m512 __B)
1020{
1021  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1022            (__v16sf) __B,
1023            (__v16sf)
1024            _mm512_setzero_ps (),
1025            (__mmask16) -1,
1026            _MM_FROUND_CUR_DIRECTION);
1027}
1028
1029static __inline__ __m512 __DEFAULT_FN_ATTRS
1030_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1031{
1032  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1033                 (__v16sf) __B,
1034                 (__v16sf) __W,
1035                 (__mmask16) __U,
1036                 _MM_FROUND_CUR_DIRECTION);
1037}
1038
1039static __inline__ __m512 __DEFAULT_FN_ATTRS
1040_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1041{
1042  return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
1043                 (__v16sf) __B,
1044                 (__v16sf)
1045                 _mm512_setzero_ps (),
1046                 (__mmask16) __U,
1047                 _MM_FROUND_CUR_DIRECTION);
1048}
1049
1050static __inline__ __m128 __DEFAULT_FN_ATTRS
1051_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1052  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1053                (__v4sf) __B,
1054                (__v4sf) __W,
1055                (__mmask8) __U,
1056                _MM_FROUND_CUR_DIRECTION);
1057}
1058
1059static __inline__ __m128 __DEFAULT_FN_ATTRS
1060_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1061  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1062                (__v4sf) __B,
1063                (__v4sf)  _mm_setzero_ps (),
1064                (__mmask8) __U,
1065                _MM_FROUND_CUR_DIRECTION);
1066}
1067
1068#define _mm_max_round_ss(A, B, R) __extension__ ({ \
1069  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1070                                          (__v4sf)(__m128)(B), \
1071                                          (__v4sf)_mm_setzero_ps(), \
1072                                          (__mmask8)-1, (int)(R)); })
1073
1074#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \
1075  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1076                                          (__v4sf)(__m128)(B), \
1077                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1078                                          (int)(R)); })
1079
1080#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \
1081  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1082                                          (__v4sf)(__m128)(B), \
1083                                          (__v4sf)_mm_setzero_ps(), \
1084                                          (__mmask8)(U), (int)(R)); })
1085
1086static __inline__ __m128d __DEFAULT_FN_ATTRS
1087_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1088  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1089                (__v2df) __B,
1090                (__v2df) __W,
1091                (__mmask8) __U,
1092                _MM_FROUND_CUR_DIRECTION);
1093}
1094
1095static __inline__ __m128d __DEFAULT_FN_ATTRS
1096_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1097  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1098                (__v2df) __B,
1099                (__v2df)  _mm_setzero_pd (),
1100                (__mmask8) __U,
1101                _MM_FROUND_CUR_DIRECTION);
1102}
1103
1104#define _mm_max_round_sd(A, B, R) __extension__ ({ \
1105  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1106                                           (__v2df)(__m128d)(B), \
1107                                           (__v2df)_mm_setzero_pd(), \
1108                                           (__mmask8)-1, (int)(R)); })
1109
1110#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \
1111  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1112                                           (__v2df)(__m128d)(B), \
1113                                           (__v2df)(__m128d)(W), \
1114                                           (__mmask8)(U), (int)(R)); })
1115
1116#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \
1117  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1118                                           (__v2df)(__m128d)(B), \
1119                                           (__v2df)_mm_setzero_pd(), \
1120                                           (__mmask8)(U), (int)(R)); })
1121
1122static __inline __m512i
1123__DEFAULT_FN_ATTRS
1124_mm512_max_epi32(__m512i __A, __m512i __B)
1125{
1126  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1127              (__v16si) __B,
1128              (__v16si)
1129              _mm512_setzero_si512 (),
1130              (__mmask16) -1);
1131}
1132
1133static __inline__ __m512i __DEFAULT_FN_ATTRS
1134_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1135{
1136  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1137                   (__v16si) __B,
1138                   (__v16si) __W, __M);
1139}
1140
1141static __inline__ __m512i __DEFAULT_FN_ATTRS
1142_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1143{
1144  return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
1145                   (__v16si) __B,
1146                   (__v16si)
1147                   _mm512_setzero_si512 (),
1148                   __M);
1149}
1150
1151static __inline __m512i __DEFAULT_FN_ATTRS
1152_mm512_max_epu32(__m512i __A, __m512i __B)
1153{
1154  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1155              (__v16si) __B,
1156              (__v16si)
1157              _mm512_setzero_si512 (),
1158              (__mmask16) -1);
1159}
1160
1161static __inline__ __m512i __DEFAULT_FN_ATTRS
1162_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1163{
1164  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1165                   (__v16si) __B,
1166                   (__v16si) __W, __M);
1167}
1168
1169static __inline__ __m512i __DEFAULT_FN_ATTRS
1170_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1171{
1172  return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
1173                   (__v16si) __B,
1174                   (__v16si)
1175                   _mm512_setzero_si512 (),
1176                   __M);
1177}
1178
1179static __inline __m512i __DEFAULT_FN_ATTRS
1180_mm512_max_epi64(__m512i __A, __m512i __B)
1181{
1182  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1183              (__v8di) __B,
1184              (__v8di)
1185              _mm512_setzero_si512 (),
1186              (__mmask8) -1);
1187}
1188
1189static __inline__ __m512i __DEFAULT_FN_ATTRS
1190_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1191{
1192  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1193                   (__v8di) __B,
1194                   (__v8di) __W, __M);
1195}
1196
1197static __inline__ __m512i __DEFAULT_FN_ATTRS
1198_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1199{
1200  return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
1201                   (__v8di) __B,
1202                   (__v8di)
1203                   _mm512_setzero_si512 (),
1204                   __M);
1205}
1206
1207static __inline __m512i __DEFAULT_FN_ATTRS
1208_mm512_max_epu64(__m512i __A, __m512i __B)
1209{
1210  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1211              (__v8di) __B,
1212              (__v8di)
1213              _mm512_setzero_si512 (),
1214              (__mmask8) -1);
1215}
1216
1217static __inline__ __m512i __DEFAULT_FN_ATTRS
1218_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1219{
1220  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1221                   (__v8di) __B,
1222                   (__v8di) __W, __M);
1223}
1224
1225static __inline__ __m512i __DEFAULT_FN_ATTRS
1226_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1227{
1228  return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
1229                   (__v8di) __B,
1230                   (__v8di)
1231                   _mm512_setzero_si512 (),
1232                   __M);
1233}
1234
1235#define _mm512_mask_min_round_pd(W, U, A, B, R) __extension__ ({ \
1236  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1237                                        (__v8df)(__m512d)(B), \
1238                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
1239                                        (int)(R)); })
1240
1241#define _mm512_maskz_min_round_pd(U, A, B, R) __extension__ ({ \
1242  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1243                                        (__v8df)(__m512d)(B), \
1244                                        (__v8df)_mm512_setzero_pd(), \
1245                                        (__mmask8)(U), (int)(R)); })
1246
1247#define _mm512_min_round_pd(A, B, R) __extension__ ({ \
1248  (__m512d)__builtin_ia32_minpd512_mask((__v8df)(__m512d)(A), \
1249                                        (__v8df)(__m512d)(B), \
1250                                        (__v8df)_mm512_undefined_pd(), \
1251                                        (__mmask8)-1, (int)(R)); })
1252
1253static  __inline__ __m512d __DEFAULT_FN_ATTRS
1254_mm512_min_pd(__m512d __A, __m512d __B)
1255{
1256  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1257             (__v8df) __B,
1258             (__v8df)
1259             _mm512_setzero_pd (),
1260             (__mmask8) -1,
1261             _MM_FROUND_CUR_DIRECTION);
1262}
1263
1264static __inline__ __m512d __DEFAULT_FN_ATTRS
1265_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1266{
1267  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1268                  (__v8df) __B,
1269                  (__v8df) __W,
1270                  (__mmask8) __U,
1271                  _MM_FROUND_CUR_DIRECTION);
1272}
1273
1274#define _mm512_mask_min_round_ps(W, U, A, B, R) __extension__ ({ \
1275  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1276                                       (__v16sf)(__m512)(B), \
1277                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
1278                                       (int)(R)); })
1279
1280#define _mm512_maskz_min_round_ps(U, A, B, R) __extension__ ({ \
1281  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1282                                       (__v16sf)(__m512)(B), \
1283                                       (__v16sf)_mm512_setzero_ps(), \
1284                                       (__mmask16)(U), (int)(R)); })
1285
1286#define _mm512_min_round_ps(A, B, R) __extension__ ({ \
1287  (__m512)__builtin_ia32_minps512_mask((__v16sf)(__m512)(A), \
1288                                       (__v16sf)(__m512)(B), \
1289                                       (__v16sf)_mm512_undefined_ps(), \
1290                                       (__mmask16)-1, (int)(R)); })
1291
1292static __inline__ __m512d __DEFAULT_FN_ATTRS
1293_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1294{
1295  return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
1296                  (__v8df) __B,
1297                  (__v8df)
1298                  _mm512_setzero_pd (),
1299                  (__mmask8) __U,
1300                  _MM_FROUND_CUR_DIRECTION);
1301}
1302
1303static  __inline__ __m512 __DEFAULT_FN_ATTRS
1304_mm512_min_ps(__m512 __A, __m512 __B)
1305{
1306  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1307            (__v16sf) __B,
1308            (__v16sf)
1309            _mm512_setzero_ps (),
1310            (__mmask16) -1,
1311            _MM_FROUND_CUR_DIRECTION);
1312}
1313
1314static __inline__ __m512 __DEFAULT_FN_ATTRS
1315_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1316{
1317  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1318                 (__v16sf) __B,
1319                 (__v16sf) __W,
1320                 (__mmask16) __U,
1321                 _MM_FROUND_CUR_DIRECTION);
1322}
1323
1324static __inline__ __m512 __DEFAULT_FN_ATTRS
1325_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1326{
1327  return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
1328                 (__v16sf) __B,
1329                 (__v16sf)
1330                 _mm512_setzero_ps (),
1331                 (__mmask16) __U,
1332                 _MM_FROUND_CUR_DIRECTION);
1333}
1334
1335static __inline__ __m128 __DEFAULT_FN_ATTRS
1336_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1337  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1338                (__v4sf) __B,
1339                (__v4sf) __W,
1340                (__mmask8) __U,
1341                _MM_FROUND_CUR_DIRECTION);
1342}
1343
1344static __inline__ __m128 __DEFAULT_FN_ATTRS
1345_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1346  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1347                (__v4sf) __B,
1348                (__v4sf)  _mm_setzero_ps (),
1349                (__mmask8) __U,
1350                _MM_FROUND_CUR_DIRECTION);
1351}
1352
1353#define _mm_min_round_ss(A, B, R) __extension__ ({ \
1354  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1355                                          (__v4sf)(__m128)(B), \
1356                                          (__v4sf)_mm_setzero_ps(), \
1357                                          (__mmask8)-1, (int)(R)); })
1358
1359#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \
1360  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1361                                          (__v4sf)(__m128)(B), \
1362                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1363                                          (int)(R)); })
1364
1365#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \
1366  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1367                                          (__v4sf)(__m128)(B), \
1368                                          (__v4sf)_mm_setzero_ps(), \
1369                                          (__mmask8)(U), (int)(R)); })
1370
1371static __inline__ __m128d __DEFAULT_FN_ATTRS
1372_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1373  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1374                (__v2df) __B,
1375                (__v2df) __W,
1376                (__mmask8) __U,
1377                _MM_FROUND_CUR_DIRECTION);
1378}
1379
1380static __inline__ __m128d __DEFAULT_FN_ATTRS
1381_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1382  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1383                (__v2df) __B,
1384                (__v2df)  _mm_setzero_pd (),
1385                (__mmask8) __U,
1386                _MM_FROUND_CUR_DIRECTION);
1387}
1388
1389#define _mm_min_round_sd(A, B, R) __extension__ ({ \
1390  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1391                                           (__v2df)(__m128d)(B), \
1392                                           (__v2df)_mm_setzero_pd(), \
1393                                           (__mmask8)-1, (int)(R)); })
1394
1395#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \
1396  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1397                                           (__v2df)(__m128d)(B), \
1398                                           (__v2df)(__m128d)(W), \
1399                                           (__mmask8)(U), (int)(R)); })
1400
1401#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \
1402  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1403                                           (__v2df)(__m128d)(B), \
1404                                           (__v2df)_mm_setzero_pd(), \
1405                                           (__mmask8)(U), (int)(R)); })
1406
1407static __inline __m512i
1408__DEFAULT_FN_ATTRS
1409_mm512_min_epi32(__m512i __A, __m512i __B)
1410{
1411  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1412              (__v16si) __B,
1413              (__v16si)
1414              _mm512_setzero_si512 (),
1415              (__mmask16) -1);
1416}
1417
1418static __inline__ __m512i __DEFAULT_FN_ATTRS
1419_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1420{
1421  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1422                   (__v16si) __B,
1423                   (__v16si) __W, __M);
1424}
1425
1426static __inline__ __m512i __DEFAULT_FN_ATTRS
1427_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1428{
1429  return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
1430                   (__v16si) __B,
1431                   (__v16si)
1432                   _mm512_setzero_si512 (),
1433                   __M);
1434}
1435
1436static __inline __m512i __DEFAULT_FN_ATTRS
1437_mm512_min_epu32(__m512i __A, __m512i __B)
1438{
1439  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1440              (__v16si) __B,
1441              (__v16si)
1442              _mm512_setzero_si512 (),
1443              (__mmask16) -1);
1444}
1445
1446static __inline__ __m512i __DEFAULT_FN_ATTRS
1447_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1448{
1449  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1450                   (__v16si) __B,
1451                   (__v16si) __W, __M);
1452}
1453
1454static __inline__ __m512i __DEFAULT_FN_ATTRS
1455_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1456{
1457  return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
1458                   (__v16si) __B,
1459                   (__v16si)
1460                   _mm512_setzero_si512 (),
1461                   __M);
1462}
1463
1464static __inline __m512i __DEFAULT_FN_ATTRS
1465_mm512_min_epi64(__m512i __A, __m512i __B)
1466{
1467  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1468              (__v8di) __B,
1469              (__v8di)
1470              _mm512_setzero_si512 (),
1471              (__mmask8) -1);
1472}
1473
1474static __inline__ __m512i __DEFAULT_FN_ATTRS
1475_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1476{
1477  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1478                   (__v8di) __B,
1479                   (__v8di) __W, __M);
1480}
1481
1482static __inline__ __m512i __DEFAULT_FN_ATTRS
1483_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1484{
1485  return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
1486                   (__v8di) __B,
1487                   (__v8di)
1488                   _mm512_setzero_si512 (),
1489                   __M);
1490}
1491
1492static __inline __m512i __DEFAULT_FN_ATTRS
1493_mm512_min_epu64(__m512i __A, __m512i __B)
1494{
1495  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1496              (__v8di) __B,
1497              (__v8di)
1498              _mm512_setzero_si512 (),
1499              (__mmask8) -1);
1500}
1501
1502static __inline__ __m512i __DEFAULT_FN_ATTRS
1503_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1504{
1505  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1506                   (__v8di) __B,
1507                   (__v8di) __W, __M);
1508}
1509
1510static __inline__ __m512i __DEFAULT_FN_ATTRS
1511_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1512{
1513  return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
1514                   (__v8di) __B,
1515                   (__v8di)
1516                   _mm512_setzero_si512 (),
1517                   __M);
1518}
1519
1520static __inline __m512i __DEFAULT_FN_ATTRS
1521_mm512_mul_epi32(__m512i __X, __m512i __Y)
1522{
1523  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1524}
1525
1526static __inline __m512i __DEFAULT_FN_ATTRS
1527_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1528{
1529  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1530                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1531                                             (__v8di)__W);
1532}
1533
1534static __inline __m512i __DEFAULT_FN_ATTRS
1535_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1536{
1537  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1538                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1539                                             (__v8di)_mm512_setzero_si512 ());
1540}
1541
1542static __inline __m512i __DEFAULT_FN_ATTRS
1543_mm512_mul_epu32(__m512i __X, __m512i __Y)
1544{
1545  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1546}
1547
1548static __inline __m512i __DEFAULT_FN_ATTRS
1549_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1550{
1551  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1552                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1553                                             (__v8di)__W);
1554}
1555
1556static __inline __m512i __DEFAULT_FN_ATTRS
1557_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1558{
1559  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1560                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1561                                             (__v8di)_mm512_setzero_si512 ());
1562}
1563
1564static __inline __m512i __DEFAULT_FN_ATTRS
1565_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1566{
1567  return (__m512i) ((__v16su) __A * (__v16su) __B);
1568}
1569
1570static __inline __m512i __DEFAULT_FN_ATTRS
1571_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1572{
1573  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1574                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1575                                             (__v16si)_mm512_setzero_si512());
1576}
1577
1578static __inline __m512i __DEFAULT_FN_ATTRS
1579_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1580{
1581  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1582                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1583                                             (__v16si)__W);
1584}
1585
1586#define _mm512_mask_sqrt_round_pd(W, U, A, R) __extension__ ({ \
1587  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1588                                         (__v8df)(__m512d)(W), (__mmask8)(U), \
1589                                         (int)(R)); })
1590
1591#define _mm512_maskz_sqrt_round_pd(U, A, R) __extension__ ({ \
1592  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1593                                         (__v8df)_mm512_setzero_pd(), \
1594                                         (__mmask8)(U), (int)(R)); })
1595
1596#define _mm512_sqrt_round_pd(A, R) __extension__ ({ \
1597  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
1598                                         (__v8df)_mm512_undefined_pd(), \
1599                                         (__mmask8)-1, (int)(R)); })
1600
1601static  __inline__ __m512d __DEFAULT_FN_ATTRS
1602_mm512_sqrt_pd(__m512d __a)
1603{
1604  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
1605                                                (__v8df) _mm512_setzero_pd (),
1606                                                (__mmask8) -1,
1607                                                _MM_FROUND_CUR_DIRECTION);
1608}
1609
1610static __inline__ __m512d __DEFAULT_FN_ATTRS
1611_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1612{
1613  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1614                   (__v8df) __W,
1615                   (__mmask8) __U,
1616                   _MM_FROUND_CUR_DIRECTION);
1617}
1618
1619static __inline__ __m512d __DEFAULT_FN_ATTRS
1620_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1621{
1622  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
1623                   (__v8df)
1624                   _mm512_setzero_pd (),
1625                   (__mmask8) __U,
1626                   _MM_FROUND_CUR_DIRECTION);
1627}
1628
1629#define _mm512_mask_sqrt_round_ps(W, U, A, R) __extension__ ({ \
1630  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1631                                        (__v16sf)(__m512)(W), (__mmask16)(U), \
1632                                        (int)(R)); })
1633
1634#define _mm512_maskz_sqrt_round_ps(U, A, R) __extension__ ({ \
1635  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1636                                        (__v16sf)_mm512_setzero_ps(), \
1637                                        (__mmask16)(U), (int)(R)); })
1638
1639#define _mm512_sqrt_round_ps(A, R) __extension__ ({ \
1640  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
1641                                        (__v16sf)_mm512_undefined_ps(), \
1642                                        (__mmask16)-1, (int)(R)); })
1643
1644static  __inline__ __m512 __DEFAULT_FN_ATTRS
1645_mm512_sqrt_ps(__m512 __a)
1646{
1647  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
1648                                               (__v16sf) _mm512_setzero_ps (),
1649                                               (__mmask16) -1,
1650                                               _MM_FROUND_CUR_DIRECTION);
1651}
1652
1653static  __inline__ __m512 __DEFAULT_FN_ATTRS
1654_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1655{
1656  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1657                                               (__v16sf) __W,
1658                                               (__mmask16) __U,
1659                                               _MM_FROUND_CUR_DIRECTION);
1660}
1661
1662static  __inline__ __m512 __DEFAULT_FN_ATTRS
1663_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1664{
1665  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
1666                                               (__v16sf) _mm512_setzero_ps (),
1667                                               (__mmask16) __U,
1668                                               _MM_FROUND_CUR_DIRECTION);
1669}
1670
1671static  __inline__ __m512d __DEFAULT_FN_ATTRS
1672_mm512_rsqrt14_pd(__m512d __A)
1673{
1674  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1675                 (__v8df)
1676                 _mm512_setzero_pd (),
1677                 (__mmask8) -1);}
1678
1679static __inline__ __m512d __DEFAULT_FN_ATTRS
1680_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1681{
1682  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1683                  (__v8df) __W,
1684                  (__mmask8) __U);
1685}
1686
1687static __inline__ __m512d __DEFAULT_FN_ATTRS
1688_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1689{
1690  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1691                  (__v8df)
1692                  _mm512_setzero_pd (),
1693                  (__mmask8) __U);
1694}
1695
1696static  __inline__ __m512 __DEFAULT_FN_ATTRS
1697_mm512_rsqrt14_ps(__m512 __A)
1698{
1699  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1700                (__v16sf)
1701                _mm512_setzero_ps (),
1702                (__mmask16) -1);
1703}
1704
1705static __inline__ __m512 __DEFAULT_FN_ATTRS
1706_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1707{
1708  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1709                 (__v16sf) __W,
1710                 (__mmask16) __U);
1711}
1712
1713static __inline__ __m512 __DEFAULT_FN_ATTRS
1714_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1715{
1716  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1717                 (__v16sf)
1718                 _mm512_setzero_ps (),
1719                 (__mmask16) __U);
1720}
1721
1722static  __inline__ __m128 __DEFAULT_FN_ATTRS
1723_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1724{
1725  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1726             (__v4sf) __B,
1727             (__v4sf)
1728             _mm_setzero_ps (),
1729             (__mmask8) -1);
1730}
1731
1732static __inline__ __m128 __DEFAULT_FN_ATTRS
1733_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1734{
1735 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1736          (__v4sf) __B,
1737          (__v4sf) __W,
1738          (__mmask8) __U);
1739}
1740
1741static __inline__ __m128 __DEFAULT_FN_ATTRS
1742_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1743{
1744 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1745          (__v4sf) __B,
1746          (__v4sf) _mm_setzero_ps (),
1747          (__mmask8) __U);
1748}
1749
1750static  __inline__ __m128d __DEFAULT_FN_ATTRS
1751_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1752{
1753  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1754              (__v2df) __B,
1755              (__v2df)
1756              _mm_setzero_pd (),
1757              (__mmask8) -1);
1758}
1759
1760static __inline__ __m128d __DEFAULT_FN_ATTRS
1761_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1762{
1763 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1764          (__v2df) __B,
1765          (__v2df) __W,
1766          (__mmask8) __U);
1767}
1768
1769static __inline__ __m128d __DEFAULT_FN_ATTRS
1770_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1771{
1772 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1773          (__v2df) __B,
1774          (__v2df) _mm_setzero_pd (),
1775          (__mmask8) __U);
1776}
1777
1778static  __inline__ __m512d __DEFAULT_FN_ATTRS
1779_mm512_rcp14_pd(__m512d __A)
1780{
1781  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1782               (__v8df)
1783               _mm512_setzero_pd (),
1784               (__mmask8) -1);
1785}
1786
1787static __inline__ __m512d __DEFAULT_FN_ATTRS
1788_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1789{
1790  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1791                (__v8df) __W,
1792                (__mmask8) __U);
1793}
1794
1795static __inline__ __m512d __DEFAULT_FN_ATTRS
1796_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1797{
1798  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1799                (__v8df)
1800                _mm512_setzero_pd (),
1801                (__mmask8) __U);
1802}
1803
1804static  __inline__ __m512 __DEFAULT_FN_ATTRS
1805_mm512_rcp14_ps(__m512 __A)
1806{
1807  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1808              (__v16sf)
1809              _mm512_setzero_ps (),
1810              (__mmask16) -1);
1811}
1812
1813static __inline__ __m512 __DEFAULT_FN_ATTRS
1814_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1815{
1816  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1817                   (__v16sf) __W,
1818                   (__mmask16) __U);
1819}
1820
1821static __inline__ __m512 __DEFAULT_FN_ATTRS
1822_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1823{
1824  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1825                   (__v16sf)
1826                   _mm512_setzero_ps (),
1827                   (__mmask16) __U);
1828}
1829
1830static  __inline__ __m128 __DEFAULT_FN_ATTRS
1831_mm_rcp14_ss(__m128 __A, __m128 __B)
1832{
1833  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1834                 (__v4sf) __B,
1835                 (__v4sf)
1836                 _mm_setzero_ps (),
1837                 (__mmask8) -1);
1838}
1839
1840static __inline__ __m128 __DEFAULT_FN_ATTRS
1841_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1842{
1843 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1844          (__v4sf) __B,
1845          (__v4sf) __W,
1846          (__mmask8) __U);
1847}
1848
1849static __inline__ __m128 __DEFAULT_FN_ATTRS
1850_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1851{
1852 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1853          (__v4sf) __B,
1854          (__v4sf) _mm_setzero_ps (),
1855          (__mmask8) __U);
1856}
1857
1858static  __inline__ __m128d __DEFAULT_FN_ATTRS
1859_mm_rcp14_sd(__m128d __A, __m128d __B)
1860{
1861  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1862            (__v2df) __B,
1863            (__v2df)
1864            _mm_setzero_pd (),
1865            (__mmask8) -1);
1866}
1867
1868static __inline__ __m128d __DEFAULT_FN_ATTRS
1869_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1870{
1871 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1872          (__v2df) __B,
1873          (__v2df) __W,
1874          (__mmask8) __U);
1875}
1876
1877static __inline__ __m128d __DEFAULT_FN_ATTRS
1878_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1879{
1880 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1881          (__v2df) __B,
1882          (__v2df) _mm_setzero_pd (),
1883          (__mmask8) __U);
1884}
1885
1886static __inline __m512 __DEFAULT_FN_ATTRS
1887_mm512_floor_ps(__m512 __A)
1888{
1889  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1890                                                  _MM_FROUND_FLOOR,
1891                                                  (__v16sf) __A, -1,
1892                                                  _MM_FROUND_CUR_DIRECTION);
1893}
1894
1895static __inline__ __m512 __DEFAULT_FN_ATTRS
1896_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1897{
1898  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1899                   _MM_FROUND_FLOOR,
1900                   (__v16sf) __W, __U,
1901                   _MM_FROUND_CUR_DIRECTION);
1902}
1903
1904static __inline __m512d __DEFAULT_FN_ATTRS
1905_mm512_floor_pd(__m512d __A)
1906{
1907  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1908                                                   _MM_FROUND_FLOOR,
1909                                                   (__v8df) __A, -1,
1910                                                   _MM_FROUND_CUR_DIRECTION);
1911}
1912
1913static __inline__ __m512d __DEFAULT_FN_ATTRS
1914_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1915{
1916  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1917                _MM_FROUND_FLOOR,
1918                (__v8df) __W, __U,
1919                _MM_FROUND_CUR_DIRECTION);
1920}
1921
1922static __inline__ __m512 __DEFAULT_FN_ATTRS
1923_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1924{
1925  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1926                   _MM_FROUND_CEIL,
1927                   (__v16sf) __W, __U,
1928                   _MM_FROUND_CUR_DIRECTION);
1929}
1930
1931static __inline __m512 __DEFAULT_FN_ATTRS
1932_mm512_ceil_ps(__m512 __A)
1933{
1934  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1935                                                  _MM_FROUND_CEIL,
1936                                                  (__v16sf) __A, -1,
1937                                                  _MM_FROUND_CUR_DIRECTION);
1938}
1939
1940static __inline __m512d __DEFAULT_FN_ATTRS
1941_mm512_ceil_pd(__m512d __A)
1942{
1943  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1944                                                   _MM_FROUND_CEIL,
1945                                                   (__v8df) __A, -1,
1946                                                   _MM_FROUND_CUR_DIRECTION);
1947}
1948
1949static __inline__ __m512d __DEFAULT_FN_ATTRS
1950_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1951{
1952  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1953                _MM_FROUND_CEIL,
1954                (__v8df) __W, __U,
1955                _MM_FROUND_CUR_DIRECTION);
1956}
1957
1958static __inline __m512i __DEFAULT_FN_ATTRS
1959_mm512_abs_epi64(__m512i __A)
1960{
1961  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1962             (__v8di)
1963             _mm512_setzero_si512 (),
1964             (__mmask8) -1);
1965}
1966
1967static __inline__ __m512i __DEFAULT_FN_ATTRS
1968_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1969{
1970  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1971                  (__v8di) __W,
1972                  (__mmask8) __U);
1973}
1974
1975static __inline__ __m512i __DEFAULT_FN_ATTRS
1976_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1977{
1978  return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
1979                  (__v8di)
1980                  _mm512_setzero_si512 (),
1981                  (__mmask8) __U);
1982}
1983
1984static __inline __m512i __DEFAULT_FN_ATTRS
1985_mm512_abs_epi32(__m512i __A)
1986{
1987  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1988             (__v16si)
1989             _mm512_setzero_si512 (),
1990             (__mmask16) -1);
1991}
1992
1993static __inline__ __m512i __DEFAULT_FN_ATTRS
1994_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1995{
1996  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
1997                  (__v16si) __W,
1998                  (__mmask16) __U);
1999}
2000
2001static __inline__ __m512i __DEFAULT_FN_ATTRS
2002_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
2003{
2004  return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
2005                  (__v16si)
2006                  _mm512_setzero_si512 (),
2007                  (__mmask16) __U);
2008}
2009
2010static __inline__ __m128 __DEFAULT_FN_ATTRS
2011_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2012  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2013                (__v4sf) __B,
2014                (__v4sf) __W,
2015                (__mmask8) __U,
2016                _MM_FROUND_CUR_DIRECTION);
2017}
2018
2019static __inline__ __m128 __DEFAULT_FN_ATTRS
2020_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2021  return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A,
2022                (__v4sf) __B,
2023                (__v4sf)  _mm_setzero_ps (),
2024                (__mmask8) __U,
2025                _MM_FROUND_CUR_DIRECTION);
2026}
2027
2028#define _mm_add_round_ss(A, B, R) __extension__ ({ \
2029  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2030                                          (__v4sf)(__m128)(B), \
2031                                          (__v4sf)_mm_setzero_ps(), \
2032                                          (__mmask8)-1, (int)(R)); })
2033
2034#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \
2035  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2036                                          (__v4sf)(__m128)(B), \
2037                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2038                                          (int)(R)); })
2039
2040#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \
2041  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
2042                                          (__v4sf)(__m128)(B), \
2043                                          (__v4sf)_mm_setzero_ps(), \
2044                                          (__mmask8)(U), (int)(R)); })
2045
2046static __inline__ __m128d __DEFAULT_FN_ATTRS
2047_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2048  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2049                (__v2df) __B,
2050                (__v2df) __W,
2051                (__mmask8) __U,
2052                _MM_FROUND_CUR_DIRECTION);
2053}
2054
2055static __inline__ __m128d __DEFAULT_FN_ATTRS
2056_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2057  return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A,
2058                (__v2df) __B,
2059                (__v2df)  _mm_setzero_pd (),
2060                (__mmask8) __U,
2061                _MM_FROUND_CUR_DIRECTION);
2062}
2063#define _mm_add_round_sd(A, B, R) __extension__ ({ \
2064  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2065                                           (__v2df)(__m128d)(B), \
2066                                           (__v2df)_mm_setzero_pd(), \
2067                                           (__mmask8)-1, (int)(R)); })
2068
2069#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \
2070  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2071                                           (__v2df)(__m128d)(B), \
2072                                           (__v2df)(__m128d)(W), \
2073                                           (__mmask8)(U), (int)(R)); })
2074
2075#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \
2076  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
2077                                           (__v2df)(__m128d)(B), \
2078                                           (__v2df)_mm_setzero_pd(), \
2079                                           (__mmask8)(U), (int)(R)); })
2080
2081static __inline__ __m512d __DEFAULT_FN_ATTRS
2082_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2083  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2084                                              (__v8df)_mm512_add_pd(__A, __B),
2085                                              (__v8df)__W);
2086}
2087
2088static __inline__ __m512d __DEFAULT_FN_ATTRS
2089_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2090  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2091                                              (__v8df)_mm512_add_pd(__A, __B),
2092                                              (__v8df)_mm512_setzero_pd());
2093}
2094
2095static __inline__ __m512 __DEFAULT_FN_ATTRS
2096_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2097  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2098                                             (__v16sf)_mm512_add_ps(__A, __B),
2099                                             (__v16sf)__W);
2100}
2101
2102static __inline__ __m512 __DEFAULT_FN_ATTRS
2103_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2104  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2105                                             (__v16sf)_mm512_add_ps(__A, __B),
2106                                             (__v16sf)_mm512_setzero_ps());
2107}
2108
2109#define _mm512_add_round_pd(A, B, R) __extension__ ({ \
2110  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2111                                        (__v8df)(__m512d)(B), \
2112                                        (__v8df)_mm512_setzero_pd(), \
2113                                        (__mmask8)-1, (int)(R)); })
2114
2115#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \
2116  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2117                                        (__v8df)(__m512d)(B), \
2118                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2119                                        (int)(R)); })
2120
2121#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \
2122  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \
2123                                        (__v8df)(__m512d)(B), \
2124                                        (__v8df)_mm512_setzero_pd(), \
2125                                        (__mmask8)(U), (int)(R)); })
2126
2127#define _mm512_add_round_ps(A, B, R) __extension__ ({ \
2128  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2129                                       (__v16sf)(__m512)(B), \
2130                                       (__v16sf)_mm512_setzero_ps(), \
2131                                       (__mmask16)-1, (int)(R)); })
2132
2133#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \
2134  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2135                                       (__v16sf)(__m512)(B), \
2136                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2137                                       (int)(R)); })
2138
2139#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \
2140  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \
2141                                       (__v16sf)(__m512)(B), \
2142                                       (__v16sf)_mm512_setzero_ps(), \
2143                                       (__mmask16)(U), (int)(R)); })
2144
2145static __inline__ __m128 __DEFAULT_FN_ATTRS
2146_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2147  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2148                (__v4sf) __B,
2149                (__v4sf) __W,
2150                (__mmask8) __U,
2151                _MM_FROUND_CUR_DIRECTION);
2152}
2153
2154static __inline__ __m128 __DEFAULT_FN_ATTRS
2155_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2156  return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A,
2157                (__v4sf) __B,
2158                (__v4sf)  _mm_setzero_ps (),
2159                (__mmask8) __U,
2160                _MM_FROUND_CUR_DIRECTION);
2161}
2162#define _mm_sub_round_ss(A, B, R) __extension__ ({ \
2163  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2164                                          (__v4sf)(__m128)(B), \
2165                                          (__v4sf)_mm_setzero_ps(), \
2166                                          (__mmask8)-1, (int)(R)); })
2167
2168#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \
2169  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2170                                          (__v4sf)(__m128)(B), \
2171                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2172                                          (int)(R)); })
2173
2174#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \
2175  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2176                                          (__v4sf)(__m128)(B), \
2177                                          (__v4sf)_mm_setzero_ps(), \
2178                                          (__mmask8)(U), (int)(R)); })
2179
2180static __inline__ __m128d __DEFAULT_FN_ATTRS
2181_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2182  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2183                (__v2df) __B,
2184                (__v2df) __W,
2185                (__mmask8) __U,
2186                _MM_FROUND_CUR_DIRECTION);
2187}
2188
2189static __inline__ __m128d __DEFAULT_FN_ATTRS
2190_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2191  return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A,
2192                (__v2df) __B,
2193                (__v2df)  _mm_setzero_pd (),
2194                (__mmask8) __U,
2195                _MM_FROUND_CUR_DIRECTION);
2196}
2197
2198#define _mm_sub_round_sd(A, B, R) __extension__ ({ \
2199  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2200                                           (__v2df)(__m128d)(B), \
2201                                           (__v2df)_mm_setzero_pd(), \
2202                                           (__mmask8)-1, (int)(R)); })
2203
2204#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \
2205  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2206                                           (__v2df)(__m128d)(B), \
2207                                           (__v2df)(__m128d)(W), \
2208                                           (__mmask8)(U), (int)(R)); })
2209
2210#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \
2211  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2212                                           (__v2df)(__m128d)(B), \
2213                                           (__v2df)_mm_setzero_pd(), \
2214                                           (__mmask8)(U), (int)(R)); })
2215
2216static __inline__ __m512d __DEFAULT_FN_ATTRS
2217_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2218  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2219                                              (__v8df)_mm512_sub_pd(__A, __B),
2220                                              (__v8df)__W);
2221}
2222
2223static __inline__ __m512d __DEFAULT_FN_ATTRS
2224_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2225  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2226                                              (__v8df)_mm512_sub_pd(__A, __B),
2227                                              (__v8df)_mm512_setzero_pd());
2228}
2229
2230static __inline__ __m512 __DEFAULT_FN_ATTRS
2231_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2232  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2233                                             (__v16sf)_mm512_sub_ps(__A, __B),
2234                                             (__v16sf)__W);
2235}
2236
2237static __inline__ __m512 __DEFAULT_FN_ATTRS
2238_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2239  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2240                                             (__v16sf)_mm512_sub_ps(__A, __B),
2241                                             (__v16sf)_mm512_setzero_ps());
2242}
2243
2244#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \
2245  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2246                                        (__v8df)(__m512d)(B), \
2247                                        (__v8df)_mm512_setzero_pd(), \
2248                                        (__mmask8)-1, (int)(R)); })
2249
2250#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \
2251  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2252                                        (__v8df)(__m512d)(B), \
2253                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2254                                        (int)(R)); })
2255
2256#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \
2257  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \
2258                                        (__v8df)(__m512d)(B), \
2259                                        (__v8df)_mm512_setzero_pd(), \
2260                                        (__mmask8)(U), (int)(R)); })
2261
2262#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \
2263  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2264                                       (__v16sf)(__m512)(B), \
2265                                       (__v16sf)_mm512_setzero_ps(), \
2266                                       (__mmask16)-1, (int)(R)); })
2267
2268#define _mm512_mask_sub_round_ps(W, U, A, B, R)  __extension__ ({ \
2269  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2270                                       (__v16sf)(__m512)(B), \
2271                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2272                                       (int)(R)); });
2273
2274#define _mm512_maskz_sub_round_ps(U, A, B, R)  __extension__ ({ \
2275  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \
2276                                       (__v16sf)(__m512)(B), \
2277                                       (__v16sf)_mm512_setzero_ps(), \
2278                                       (__mmask16)(U), (int)(R)); });
2279
2280static __inline__ __m128 __DEFAULT_FN_ATTRS
2281_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2282  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2283                (__v4sf) __B,
2284                (__v4sf) __W,
2285                (__mmask8) __U,
2286                _MM_FROUND_CUR_DIRECTION);
2287}
2288
2289static __inline__ __m128 __DEFAULT_FN_ATTRS
2290_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2291  return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A,
2292                (__v4sf) __B,
2293                (__v4sf)  _mm_setzero_ps (),
2294                (__mmask8) __U,
2295                _MM_FROUND_CUR_DIRECTION);
2296}
2297#define _mm_mul_round_ss(A, B, R) __extension__ ({ \
2298  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2299                                          (__v4sf)(__m128)(B), \
2300                                          (__v4sf)_mm_setzero_ps(), \
2301                                          (__mmask8)-1, (int)(R)); })
2302
2303#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \
2304  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2305                                          (__v4sf)(__m128)(B), \
2306                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2307                                          (int)(R)); })
2308
2309#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \
2310  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2311                                          (__v4sf)(__m128)(B), \
2312                                          (__v4sf)_mm_setzero_ps(), \
2313                                          (__mmask8)(U), (int)(R)); })
2314
2315static __inline__ __m128d __DEFAULT_FN_ATTRS
2316_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2317  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2318                (__v2df) __B,
2319                (__v2df) __W,
2320                (__mmask8) __U,
2321                _MM_FROUND_CUR_DIRECTION);
2322}
2323
2324static __inline__ __m128d __DEFAULT_FN_ATTRS
2325_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2326  return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A,
2327                (__v2df) __B,
2328                (__v2df)  _mm_setzero_pd (),
2329                (__mmask8) __U,
2330                _MM_FROUND_CUR_DIRECTION);
2331}
2332
2333#define _mm_mul_round_sd(A, B, R) __extension__ ({ \
2334  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2335                                           (__v2df)(__m128d)(B), \
2336                                           (__v2df)_mm_setzero_pd(), \
2337                                           (__mmask8)-1, (int)(R)); })
2338
2339#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \
2340  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2341                                           (__v2df)(__m128d)(B), \
2342                                           (__v2df)(__m128d)(W), \
2343                                           (__mmask8)(U), (int)(R)); })
2344
2345#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \
2346  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2347                                           (__v2df)(__m128d)(B), \
2348                                           (__v2df)_mm_setzero_pd(), \
2349                                           (__mmask8)(U), (int)(R)); })
2350
2351static __inline__ __m512d __DEFAULT_FN_ATTRS
2352_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2353  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2354                                              (__v8df)_mm512_mul_pd(__A, __B),
2355                                              (__v8df)__W);
2356}
2357
2358static __inline__ __m512d __DEFAULT_FN_ATTRS
2359_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2360  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2361                                              (__v8df)_mm512_mul_pd(__A, __B),
2362                                              (__v8df)_mm512_setzero_pd());
2363}
2364
2365static __inline__ __m512 __DEFAULT_FN_ATTRS
2366_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2367  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2368                                             (__v16sf)_mm512_mul_ps(__A, __B),
2369                                             (__v16sf)__W);
2370}
2371
2372static __inline__ __m512 __DEFAULT_FN_ATTRS
2373_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2374  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2375                                             (__v16sf)_mm512_mul_ps(__A, __B),
2376                                             (__v16sf)_mm512_setzero_ps());
2377}
2378
2379#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \
2380  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2381                                        (__v8df)(__m512d)(B), \
2382                                        (__v8df)_mm512_setzero_pd(), \
2383                                        (__mmask8)-1, (int)(R)); })
2384
2385#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \
2386  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2387                                        (__v8df)(__m512d)(B), \
2388                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2389                                        (int)(R)); })
2390
2391#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \
2392  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \
2393                                        (__v8df)(__m512d)(B), \
2394                                        (__v8df)_mm512_setzero_pd(), \
2395                                        (__mmask8)(U), (int)(R)); })
2396
2397#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \
2398  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2399                                       (__v16sf)(__m512)(B), \
2400                                       (__v16sf)_mm512_setzero_ps(), \
2401                                       (__mmask16)-1, (int)(R)); })
2402
2403#define _mm512_mask_mul_round_ps(W, U, A, B, R)  __extension__ ({ \
2404  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2405                                       (__v16sf)(__m512)(B), \
2406                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2407                                       (int)(R)); });
2408
2409#define _mm512_maskz_mul_round_ps(U, A, B, R)  __extension__ ({ \
2410  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \
2411                                       (__v16sf)(__m512)(B), \
2412                                       (__v16sf)_mm512_setzero_ps(), \
2413                                       (__mmask16)(U), (int)(R)); });
2414
2415static __inline__ __m128 __DEFAULT_FN_ATTRS
2416_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2417  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2418                (__v4sf) __B,
2419                (__v4sf) __W,
2420                (__mmask8) __U,
2421                _MM_FROUND_CUR_DIRECTION);
2422}
2423
2424static __inline__ __m128 __DEFAULT_FN_ATTRS
2425_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2426  return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
2427                (__v4sf) __B,
2428                (__v4sf)  _mm_setzero_ps (),
2429                (__mmask8) __U,
2430                _MM_FROUND_CUR_DIRECTION);
2431}
2432
2433#define _mm_div_round_ss(A, B, R) __extension__ ({ \
2434  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2435                                          (__v4sf)(__m128)(B), \
2436                                          (__v4sf)_mm_setzero_ps(), \
2437                                          (__mmask8)-1, (int)(R)); })
2438
2439#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \
2440  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2441                                          (__v4sf)(__m128)(B), \
2442                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2443                                          (int)(R)); })
2444
2445#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \
2446  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2447                                          (__v4sf)(__m128)(B), \
2448                                          (__v4sf)_mm_setzero_ps(), \
2449                                          (__mmask8)(U), (int)(R)); })
2450
2451static __inline__ __m128d __DEFAULT_FN_ATTRS
2452_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2453  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2454                (__v2df) __B,
2455                (__v2df) __W,
2456                (__mmask8) __U,
2457                _MM_FROUND_CUR_DIRECTION);
2458}
2459
2460static __inline__ __m128d __DEFAULT_FN_ATTRS
2461_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2462  return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
2463                (__v2df) __B,
2464                (__v2df)  _mm_setzero_pd (),
2465                (__mmask8) __U,
2466                _MM_FROUND_CUR_DIRECTION);
2467}
2468
2469#define _mm_div_round_sd(A, B, R) __extension__ ({ \
2470  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2471                                           (__v2df)(__m128d)(B), \
2472                                           (__v2df)_mm_setzero_pd(), \
2473                                           (__mmask8)-1, (int)(R)); })
2474
2475#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \
2476  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2477                                           (__v2df)(__m128d)(B), \
2478                                           (__v2df)(__m128d)(W), \
2479                                           (__mmask8)(U), (int)(R)); })
2480
2481#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \
2482  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2483                                           (__v2df)(__m128d)(B), \
2484                                           (__v2df)_mm_setzero_pd(), \
2485                                           (__mmask8)(U), (int)(R)); })
2486
2487static __inline __m512d __DEFAULT_FN_ATTRS
2488_mm512_div_pd(__m512d __a, __m512d __b)
2489{
2490  return (__m512d)((__v8df)__a/(__v8df)__b);
2491}
2492
2493static __inline__ __m512d __DEFAULT_FN_ATTRS
2494_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2495  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2496                                              (__v8df)_mm512_div_pd(__A, __B),
2497                                              (__v8df)__W);
2498}
2499
2500static __inline__ __m512d __DEFAULT_FN_ATTRS
2501_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2502  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2503                                              (__v8df)_mm512_div_pd(__A, __B),
2504                                              (__v8df)_mm512_setzero_pd());
2505}
2506
2507static __inline __m512 __DEFAULT_FN_ATTRS
2508_mm512_div_ps(__m512 __a, __m512 __b)
2509{
2510  return (__m512)((__v16sf)__a/(__v16sf)__b);
2511}
2512
2513static __inline__ __m512 __DEFAULT_FN_ATTRS
2514_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2515  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2516                                             (__v16sf)_mm512_div_ps(__A, __B),
2517                                             (__v16sf)__W);
2518}
2519
2520static __inline__ __m512 __DEFAULT_FN_ATTRS
2521_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2522  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2523                                             (__v16sf)_mm512_div_ps(__A, __B),
2524                                             (__v16sf)_mm512_setzero_ps());
2525}
2526
2527#define _mm512_div_round_pd(A, B, R) __extension__ ({ \
2528  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2529                                        (__v8df)(__m512d)(B), \
2530                                        (__v8df)_mm512_setzero_pd(), \
2531                                        (__mmask8)-1, (int)(R)); })
2532
2533#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \
2534  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2535                                        (__v8df)(__m512d)(B), \
2536                                        (__v8df)(__m512d)(W), (__mmask8)(U), \
2537                                        (int)(R)); })
2538
2539#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \
2540  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \
2541                                        (__v8df)(__m512d)(B), \
2542                                        (__v8df)_mm512_setzero_pd(), \
2543                                        (__mmask8)(U), (int)(R)); })
2544
2545#define _mm512_div_round_ps(A, B, R) __extension__ ({ \
2546  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2547                                       (__v16sf)(__m512)(B), \
2548                                       (__v16sf)_mm512_setzero_ps(), \
2549                                       (__mmask16)-1, (int)(R)); })
2550
2551#define _mm512_mask_div_round_ps(W, U, A, B, R)  __extension__ ({ \
2552  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2553                                       (__v16sf)(__m512)(B), \
2554                                       (__v16sf)(__m512)(W), (__mmask16)(U), \
2555                                       (int)(R)); });
2556
2557#define _mm512_maskz_div_round_ps(U, A, B, R)  __extension__ ({ \
2558  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \
2559                                       (__v16sf)(__m512)(B), \
2560                                       (__v16sf)_mm512_setzero_ps(), \
2561                                       (__mmask16)(U), (int)(R)); });
2562
2563#define _mm512_roundscale_ps(A, B) __extension__ ({ \
2564  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2565                                         (__v16sf)(__m512)(A), (__mmask16)-1, \
2566                                         _MM_FROUND_CUR_DIRECTION); })
2567
2568#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\
2569  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2570                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2571                                         _MM_FROUND_CUR_DIRECTION); })
2572
2573#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\
2574  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2575                                         (__v16sf)_mm512_setzero_ps(), \
2576                                         (__mmask16)(A), \
2577                                         _MM_FROUND_CUR_DIRECTION); })
2578
2579#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) __extension__ ({ \
2580  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2581                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2582                                         (int)(R)); })
2583
2584#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) __extension__ ({ \
2585  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2586                                         (__v16sf)_mm512_setzero_ps(), \
2587                                         (__mmask16)(A), (int)(R)); })
2588
2589#define _mm512_roundscale_round_ps(A, imm, R) __extension__ ({ \
2590  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2591                                         (__v16sf)_mm512_undefined_ps(), \
2592                                         (__mmask16)-1, (int)(R)); })
2593
2594#define _mm512_roundscale_pd(A, B) __extension__ ({ \
2595  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2596                                          (__v8df)(__m512d)(A), (__mmask8)-1, \
2597                                          _MM_FROUND_CUR_DIRECTION); })
2598
2599#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\
2600  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2601                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2602                                          _MM_FROUND_CUR_DIRECTION); })
2603
2604#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\
2605  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2606                                          (__v8df)_mm512_setzero_pd(), \
2607                                          (__mmask8)(A), \
2608                                          _MM_FROUND_CUR_DIRECTION); })
2609
2610#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) __extension__ ({ \
2611  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2612                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2613                                          (int)(R)); })
2614
2615#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) __extension__ ({ \
2616  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2617                                          (__v8df)_mm512_setzero_pd(), \
2618                                          (__mmask8)(A), (int)(R)); })
2619
2620#define _mm512_roundscale_round_pd(A, imm, R) __extension__ ({ \
2621  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2622                                          (__v8df)_mm512_undefined_pd(), \
2623                                          (__mmask8)-1, (int)(R)); })
2624
2625#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \
2626  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2627                                           (__v8df)(__m512d)(B), \
2628                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
2629                                           (int)(R)); })
2630
2631
2632#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \
2633  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2634                                           (__v8df)(__m512d)(B), \
2635                                           (__v8df)(__m512d)(C), \
2636                                           (__mmask8)(U), (int)(R)); })
2637
2638
2639#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2640  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2641                                            (__v8df)(__m512d)(B), \
2642                                            (__v8df)(__m512d)(C), \
2643                                            (__mmask8)(U), (int)(R)); })
2644
2645
2646#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2647  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2648                                            (__v8df)(__m512d)(B), \
2649                                            (__v8df)(__m512d)(C), \
2650                                            (__mmask8)(U), (int)(R)); })
2651
2652
2653#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \
2654  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2655                                           (__v8df)(__m512d)(B), \
2656                                           -(__v8df)(__m512d)(C), \
2657                                           (__mmask8)-1, (int)(R)); })
2658
2659
2660#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \
2661  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2662                                           (__v8df)(__m512d)(B), \
2663                                           -(__v8df)(__m512d)(C), \
2664                                           (__mmask8)(U), (int)(R)); })
2665
2666
2667#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2668  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2669                                            (__v8df)(__m512d)(B), \
2670                                            -(__v8df)(__m512d)(C), \
2671                                            (__mmask8)(U), (int)(R)); })
2672
2673
2674#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \
2675  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2676                                           (__v8df)(__m512d)(B), \
2677                                           (__v8df)(__m512d)(C), (__mmask8)-1, \
2678                                           (int)(R)); })
2679
2680
2681#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \
2682  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2683                                            (__v8df)(__m512d)(B), \
2684                                            (__v8df)(__m512d)(C), \
2685                                            (__mmask8)(U), (int)(R)); })
2686
2687
2688#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \
2689  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2690                                            (__v8df)(__m512d)(B), \
2691                                            (__v8df)(__m512d)(C), \
2692                                            (__mmask8)(U), (int)(R)); })
2693
2694
2695#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \
2696  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2697                                           (__v8df)(__m512d)(B), \
2698                                           -(__v8df)(__m512d)(C), \
2699                                           (__mmask8)-1, (int)(R)); })
2700
2701
2702#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \
2703  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2704                                            (__v8df)(__m512d)(B), \
2705                                            -(__v8df)(__m512d)(C), \
2706                                            (__mmask8)(U), (int)(R)); })
2707
2708
2709static __inline__ __m512d __DEFAULT_FN_ATTRS
2710_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2711{
2712  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2713                                                    (__v8df) __B,
2714                                                    (__v8df) __C,
2715                                                    (__mmask8) -1,
2716                                                    _MM_FROUND_CUR_DIRECTION);
2717}
2718
2719static __inline__ __m512d __DEFAULT_FN_ATTRS
2720_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2721{
2722  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2723                                                    (__v8df) __B,
2724                                                    (__v8df) __C,
2725                                                    (__mmask8) __U,
2726                                                    _MM_FROUND_CUR_DIRECTION);
2727}
2728
2729static __inline__ __m512d __DEFAULT_FN_ATTRS
2730_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2731{
2732  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2733                                                     (__v8df) __B,
2734                                                     (__v8df) __C,
2735                                                     (__mmask8) __U,
2736                                                     _MM_FROUND_CUR_DIRECTION);
2737}
2738
2739static __inline__ __m512d __DEFAULT_FN_ATTRS
2740_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2741{
2742  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2743                                                     (__v8df) __B,
2744                                                     (__v8df) __C,
2745                                                     (__mmask8) __U,
2746                                                     _MM_FROUND_CUR_DIRECTION);
2747}
2748
2749static __inline__ __m512d __DEFAULT_FN_ATTRS
2750_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2751{
2752  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2753                                                    (__v8df) __B,
2754                                                    -(__v8df) __C,
2755                                                    (__mmask8) -1,
2756                                                    _MM_FROUND_CUR_DIRECTION);
2757}
2758
2759static __inline__ __m512d __DEFAULT_FN_ATTRS
2760_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2761{
2762  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2763                                                    (__v8df) __B,
2764                                                    -(__v8df) __C,
2765                                                    (__mmask8) __U,
2766                                                    _MM_FROUND_CUR_DIRECTION);
2767}
2768
2769static __inline__ __m512d __DEFAULT_FN_ATTRS
2770_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2771{
2772  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2773                                                     (__v8df) __B,
2774                                                     -(__v8df) __C,
2775                                                     (__mmask8) __U,
2776                                                     _MM_FROUND_CUR_DIRECTION);
2777}
2778
2779static __inline__ __m512d __DEFAULT_FN_ATTRS
2780_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2781{
2782  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2783                                                    (__v8df) __B,
2784                                                    (__v8df) __C,
2785                                                    (__mmask8) -1,
2786                                                    _MM_FROUND_CUR_DIRECTION);
2787}
2788
2789static __inline__ __m512d __DEFAULT_FN_ATTRS
2790_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2791{
2792  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2793                                                     (__v8df) __B,
2794                                                     (__v8df) __C,
2795                                                     (__mmask8) __U,
2796                                                     _MM_FROUND_CUR_DIRECTION);
2797}
2798
2799static __inline__ __m512d __DEFAULT_FN_ATTRS
2800_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2801{
2802  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2803                                                     (__v8df) __B,
2804                                                     (__v8df) __C,
2805                                                     (__mmask8) __U,
2806                                                     _MM_FROUND_CUR_DIRECTION);
2807}
2808
2809static __inline__ __m512d __DEFAULT_FN_ATTRS
2810_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2811{
2812  return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
2813                                                    (__v8df) __B,
2814                                                    -(__v8df) __C,
2815                                                    (__mmask8) -1,
2816                                                    _MM_FROUND_CUR_DIRECTION);
2817}
2818
2819static __inline__ __m512d __DEFAULT_FN_ATTRS
2820_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2821{
2822  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2823                                                     (__v8df) __B,
2824                                                     -(__v8df) __C,
2825                                                     (__mmask8) __U,
2826                                                     _MM_FROUND_CUR_DIRECTION);
2827}
2828
2829#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \
2830  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2831                                          (__v16sf)(__m512)(B), \
2832                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
2833                                          (int)(R)); })
2834
2835
2836#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \
2837  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2838                                          (__v16sf)(__m512)(B), \
2839                                          (__v16sf)(__m512)(C), \
2840                                          (__mmask16)(U), (int)(R)); })
2841
2842
2843#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2844  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2845                                           (__v16sf)(__m512)(B), \
2846                                           (__v16sf)(__m512)(C), \
2847                                           (__mmask16)(U), (int)(R)); })
2848
2849
2850#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2851  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2852                                           (__v16sf)(__m512)(B), \
2853                                           (__v16sf)(__m512)(C), \
2854                                           (__mmask16)(U), (int)(R)); })
2855
2856
2857#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \
2858  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2859                                          (__v16sf)(__m512)(B), \
2860                                          -(__v16sf)(__m512)(C), \
2861                                          (__mmask16)-1, (int)(R)); })
2862
2863
2864#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \
2865  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2866                                          (__v16sf)(__m512)(B), \
2867                                          -(__v16sf)(__m512)(C), \
2868                                          (__mmask16)(U), (int)(R)); })
2869
2870
2871#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2872  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2873                                           (__v16sf)(__m512)(B), \
2874                                           -(__v16sf)(__m512)(C), \
2875                                           (__mmask16)(U), (int)(R)); })
2876
2877
2878#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \
2879  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2880                                          (__v16sf)(__m512)(B), \
2881                                          (__v16sf)(__m512)(C), (__mmask16)-1, \
2882                                          (int)(R)); })
2883
2884
2885#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \
2886  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2887                                           (__v16sf)(__m512)(B), \
2888                                           (__v16sf)(__m512)(C), \
2889                                           (__mmask16)(U), (int)(R)); })
2890
2891
2892#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \
2893  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2894                                           (__v16sf)(__m512)(B), \
2895                                           (__v16sf)(__m512)(C), \
2896                                           (__mmask16)(U), (int)(R)); })
2897
2898
2899#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \
2900  (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \
2901                                          (__v16sf)(__m512)(B), \
2902                                          -(__v16sf)(__m512)(C), \
2903                                          (__mmask16)-1, (int)(R)); })
2904
2905
2906#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \
2907  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2908                                           (__v16sf)(__m512)(B), \
2909                                           -(__v16sf)(__m512)(C), \
2910                                           (__mmask16)(U), (int)(R)); })
2911
2912
2913static __inline__ __m512 __DEFAULT_FN_ATTRS
2914_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2915{
2916  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2917                                                   (__v16sf) __B,
2918                                                   (__v16sf) __C,
2919                                                   (__mmask16) -1,
2920                                                   _MM_FROUND_CUR_DIRECTION);
2921}
2922
2923static __inline__ __m512 __DEFAULT_FN_ATTRS
2924_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2925{
2926  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2927                                                   (__v16sf) __B,
2928                                                   (__v16sf) __C,
2929                                                   (__mmask16) __U,
2930                                                   _MM_FROUND_CUR_DIRECTION);
2931}
2932
2933static __inline__ __m512 __DEFAULT_FN_ATTRS
2934_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2935{
2936  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2937                                                    (__v16sf) __B,
2938                                                    (__v16sf) __C,
2939                                                    (__mmask16) __U,
2940                                                    _MM_FROUND_CUR_DIRECTION);
2941}
2942
2943static __inline__ __m512 __DEFAULT_FN_ATTRS
2944_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2945{
2946  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2947                                                    (__v16sf) __B,
2948                                                    (__v16sf) __C,
2949                                                    (__mmask16) __U,
2950                                                    _MM_FROUND_CUR_DIRECTION);
2951}
2952
2953static __inline__ __m512 __DEFAULT_FN_ATTRS
2954_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2955{
2956  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2957                                                   (__v16sf) __B,
2958                                                   -(__v16sf) __C,
2959                                                   (__mmask16) -1,
2960                                                   _MM_FROUND_CUR_DIRECTION);
2961}
2962
2963static __inline__ __m512 __DEFAULT_FN_ATTRS
2964_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2965{
2966  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2967                                                   (__v16sf) __B,
2968                                                   -(__v16sf) __C,
2969                                                   (__mmask16) __U,
2970                                                   _MM_FROUND_CUR_DIRECTION);
2971}
2972
2973static __inline__ __m512 __DEFAULT_FN_ATTRS
2974_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2975{
2976  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2977                                                    (__v16sf) __B,
2978                                                    -(__v16sf) __C,
2979                                                    (__mmask16) __U,
2980                                                    _MM_FROUND_CUR_DIRECTION);
2981}
2982
2983static __inline__ __m512 __DEFAULT_FN_ATTRS
2984_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2985{
2986  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
2987                                                   (__v16sf) __B,
2988                                                   (__v16sf) __C,
2989                                                   (__mmask16) -1,
2990                                                   _MM_FROUND_CUR_DIRECTION);
2991}
2992
2993static __inline__ __m512 __DEFAULT_FN_ATTRS
2994_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2995{
2996  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2997                                                    (__v16sf) __B,
2998                                                    (__v16sf) __C,
2999                                                    (__mmask16) __U,
3000                                                    _MM_FROUND_CUR_DIRECTION);
3001}
3002
3003static __inline__ __m512 __DEFAULT_FN_ATTRS
3004_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3005{
3006  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3007                                                    (__v16sf) __B,
3008                                                    (__v16sf) __C,
3009                                                    (__mmask16) __U,
3010                                                    _MM_FROUND_CUR_DIRECTION);
3011}
3012
3013static __inline__ __m512 __DEFAULT_FN_ATTRS
3014_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
3015{
3016  return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
3017                                                   (__v16sf) __B,
3018                                                   -(__v16sf) __C,
3019                                                   (__mmask16) -1,
3020                                                   _MM_FROUND_CUR_DIRECTION);
3021}
3022
3023static __inline__ __m512 __DEFAULT_FN_ATTRS
3024_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3025{
3026  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
3027                                                    (__v16sf) __B,
3028                                                    -(__v16sf) __C,
3029                                                    (__mmask16) __U,
3030                                                    _MM_FROUND_CUR_DIRECTION);
3031}
3032
3033#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \
3034  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3035                                              (__v8df)(__m512d)(B), \
3036                                              (__v8df)(__m512d)(C), \
3037                                              (__mmask8)-1, (int)(R)); })
3038
3039
3040#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \
3041  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3042                                              (__v8df)(__m512d)(B), \
3043                                              (__v8df)(__m512d)(C), \
3044                                              (__mmask8)(U), (int)(R)); })
3045
3046
3047#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \
3048  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
3049                                               (__v8df)(__m512d)(B), \
3050                                               (__v8df)(__m512d)(C), \
3051                                               (__mmask8)(U), (int)(R)); })
3052
3053
3054#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \
3055  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3056                                               (__v8df)(__m512d)(B), \
3057                                               (__v8df)(__m512d)(C), \
3058                                               (__mmask8)(U), (int)(R)); })
3059
3060
3061#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \
3062  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3063                                              (__v8df)(__m512d)(B), \
3064                                              -(__v8df)(__m512d)(C), \
3065                                              (__mmask8)-1, (int)(R)); })
3066
3067
3068#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \
3069  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
3070                                              (__v8df)(__m512d)(B), \
3071                                              -(__v8df)(__m512d)(C), \
3072                                              (__mmask8)(U), (int)(R)); })
3073
3074
3075#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \
3076  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
3077                                               (__v8df)(__m512d)(B), \
3078                                               -(__v8df)(__m512d)(C), \
3079                                               (__mmask8)(U), (int)(R)); })
3080
3081
3082static __inline__ __m512d __DEFAULT_FN_ATTRS
3083_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
3084{
3085  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3086                                                       (__v8df) __B,
3087                                                       (__v8df) __C,
3088                                                       (__mmask8) -1,
3089                                                       _MM_FROUND_CUR_DIRECTION);
3090}
3091
3092static __inline__ __m512d __DEFAULT_FN_ATTRS
3093_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3094{
3095  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3096                                                       (__v8df) __B,
3097                                                       (__v8df) __C,
3098                                                       (__mmask8) __U,
3099                                                       _MM_FROUND_CUR_DIRECTION);
3100}
3101
3102static __inline__ __m512d __DEFAULT_FN_ATTRS
3103_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3104{
3105  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
3106                                                        (__v8df) __B,
3107                                                        (__v8df) __C,
3108                                                        (__mmask8) __U,
3109                                                        _MM_FROUND_CUR_DIRECTION);
3110}
3111
3112static __inline__ __m512d __DEFAULT_FN_ATTRS
3113_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3114{
3115  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3116                                                        (__v8df) __B,
3117                                                        (__v8df) __C,
3118                                                        (__mmask8) __U,
3119                                                        _MM_FROUND_CUR_DIRECTION);
3120}
3121
3122static __inline__ __m512d __DEFAULT_FN_ATTRS
3123_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
3124{
3125  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3126                                                       (__v8df) __B,
3127                                                       -(__v8df) __C,
3128                                                       (__mmask8) -1,
3129                                                       _MM_FROUND_CUR_DIRECTION);
3130}
3131
3132static __inline__ __m512d __DEFAULT_FN_ATTRS
3133_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3134{
3135  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
3136                                                       (__v8df) __B,
3137                                                       -(__v8df) __C,
3138                                                       (__mmask8) __U,
3139                                                       _MM_FROUND_CUR_DIRECTION);
3140}
3141
3142static __inline__ __m512d __DEFAULT_FN_ATTRS
3143_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
3144{
3145  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
3146                                                        (__v8df) __B,
3147                                                        -(__v8df) __C,
3148                                                        (__mmask8) __U,
3149                                                        _MM_FROUND_CUR_DIRECTION);
3150}
3151
3152#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \
3153  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3154                                             (__v16sf)(__m512)(B), \
3155                                             (__v16sf)(__m512)(C), \
3156                                             (__mmask16)-1, (int)(R)); })
3157
3158
3159#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \
3160  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3161                                             (__v16sf)(__m512)(B), \
3162                                             (__v16sf)(__m512)(C), \
3163                                             (__mmask16)(U), (int)(R)); })
3164
3165
3166#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \
3167  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
3168                                              (__v16sf)(__m512)(B), \
3169                                              (__v16sf)(__m512)(C), \
3170                                              (__mmask16)(U), (int)(R)); })
3171
3172
3173#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \
3174  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3175                                              (__v16sf)(__m512)(B), \
3176                                              (__v16sf)(__m512)(C), \
3177                                              (__mmask16)(U), (int)(R)); })
3178
3179
3180#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \
3181  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3182                                             (__v16sf)(__m512)(B), \
3183                                             -(__v16sf)(__m512)(C), \
3184                                             (__mmask16)-1, (int)(R)); })
3185
3186
3187#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \
3188  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3189                                             (__v16sf)(__m512)(B), \
3190                                             -(__v16sf)(__m512)(C), \
3191                                             (__mmask16)(U), (int)(R)); })
3192
3193
3194#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \
3195  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3196                                              (__v16sf)(__m512)(B), \
3197                                              -(__v16sf)(__m512)(C), \
3198                                              (__mmask16)(U), (int)(R)); })
3199
3200
3201static __inline__ __m512 __DEFAULT_FN_ATTRS
3202_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3203{
3204  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3205                                                      (__v16sf) __B,
3206                                                      (__v16sf) __C,
3207                                                      (__mmask16) -1,
3208                                                      _MM_FROUND_CUR_DIRECTION);
3209}
3210
3211static __inline__ __m512 __DEFAULT_FN_ATTRS
3212_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3213{
3214  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3215                                                      (__v16sf) __B,
3216                                                      (__v16sf) __C,
3217                                                      (__mmask16) __U,
3218                                                      _MM_FROUND_CUR_DIRECTION);
3219}
3220
3221static __inline__ __m512 __DEFAULT_FN_ATTRS
3222_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3223{
3224  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3225                                                       (__v16sf) __B,
3226                                                       (__v16sf) __C,
3227                                                       (__mmask16) __U,
3228                                                       _MM_FROUND_CUR_DIRECTION);
3229}
3230
3231static __inline__ __m512 __DEFAULT_FN_ATTRS
3232_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3233{
3234  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3235                                                       (__v16sf) __B,
3236                                                       (__v16sf) __C,
3237                                                       (__mmask16) __U,
3238                                                       _MM_FROUND_CUR_DIRECTION);
3239}
3240
3241static __inline__ __m512 __DEFAULT_FN_ATTRS
3242_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3243{
3244  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3245                                                      (__v16sf) __B,
3246                                                      -(__v16sf) __C,
3247                                                      (__mmask16) -1,
3248                                                      _MM_FROUND_CUR_DIRECTION);
3249}
3250
3251static __inline__ __m512 __DEFAULT_FN_ATTRS
3252_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3253{
3254  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3255                                                      (__v16sf) __B,
3256                                                      -(__v16sf) __C,
3257                                                      (__mmask16) __U,
3258                                                      _MM_FROUND_CUR_DIRECTION);
3259}
3260
3261static __inline__ __m512 __DEFAULT_FN_ATTRS
3262_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3263{
3264  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3265                                                       (__v16sf) __B,
3266                                                       -(__v16sf) __C,
3267                                                       (__mmask16) __U,
3268                                                       _MM_FROUND_CUR_DIRECTION);
3269}
3270
3271#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3272  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3273                                            (__v8df)(__m512d)(B), \
3274                                            (__v8df)(__m512d)(C), \
3275                                            (__mmask8)(U), (int)(R)); })
3276
3277
3278static __inline__ __m512d __DEFAULT_FN_ATTRS
3279_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3280{
3281  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3282                                                     (__v8df) __B,
3283                                                     (__v8df) __C,
3284                                                     (__mmask8) __U,
3285                                                     _MM_FROUND_CUR_DIRECTION);
3286}
3287
3288#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3289  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3290                                           (__v16sf)(__m512)(B), \
3291                                           (__v16sf)(__m512)(C), \
3292                                           (__mmask16)(U), (int)(R)); })
3293
3294
3295static __inline__ __m512 __DEFAULT_FN_ATTRS
3296_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3297{
3298  return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3299                                                    (__v16sf) __B,
3300                                                    (__v16sf) __C,
3301                                                    (__mmask16) __U,
3302                                                    _MM_FROUND_CUR_DIRECTION);
3303}
3304
3305#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \
3306  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3307                                               (__v8df)(__m512d)(B), \
3308                                               (__v8df)(__m512d)(C), \
3309                                               (__mmask8)(U), (int)(R)); })
3310
3311
3312static __inline__ __m512d __DEFAULT_FN_ATTRS
3313_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3314{
3315  return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3316                                                        (__v8df) __B,
3317                                                        (__v8df) __C,
3318                                                        (__mmask8) __U,
3319                                                        _MM_FROUND_CUR_DIRECTION);
3320}
3321
3322#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \
3323  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3324                                              (__v16sf)(__m512)(B), \
3325                                              (__v16sf)(__m512)(C), \
3326                                              (__mmask16)(U), (int)(R)); })
3327
3328
3329static __inline__ __m512 __DEFAULT_FN_ATTRS
3330_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3331{
3332  return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3333                                                       (__v16sf) __B,
3334                                                       (__v16sf) __C,
3335                                                       (__mmask16) __U,
3336                                                       _MM_FROUND_CUR_DIRECTION);
3337}
3338
3339#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \
3340  (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \
3341                                            (__v8df)(__m512d)(B), \
3342                                            (__v8df)(__m512d)(C), \
3343                                            (__mmask8)(U), (int)(R)); })
3344
3345
3346static __inline__ __m512d __DEFAULT_FN_ATTRS
3347_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3348{
3349  return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
3350                                                     (__v8df) __B,
3351                                                     (__v8df) __C,
3352                                                     (__mmask8) __U,
3353                                                     _MM_FROUND_CUR_DIRECTION);
3354}
3355
3356#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \
3357  (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \
3358                                           (__v16sf)(__m512)(B), \
3359                                           (__v16sf)(__m512)(C), \
3360                                           (__mmask16)(U), (int)(R)); })
3361
3362
3363static __inline__ __m512 __DEFAULT_FN_ATTRS
3364_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3365{
3366  return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
3367                                                    (__v16sf) __B,
3368                                                    (__v16sf) __C,
3369                                                    (__mmask16) __U,
3370                                                    _MM_FROUND_CUR_DIRECTION);
3371}
3372
3373#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \
3374  (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \
3375                                            (__v8df)(__m512d)(B), \
3376                                            (__v8df)(__m512d)(C), \
3377                                            (__mmask8)(U), (int)(R)); })
3378
3379
3380#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \
3381  (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \
3382                                             (__v8df)(__m512d)(B), \
3383                                             (__v8df)(__m512d)(C), \
3384                                             (__mmask8)(U), (int)(R)); })
3385
3386
3387static __inline__ __m512d __DEFAULT_FN_ATTRS
3388_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3389{
3390  return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
3391                                                     (__v8df) __B,
3392                                                     (__v8df) __C,
3393                                                     (__mmask8) __U,
3394                                                     _MM_FROUND_CUR_DIRECTION);
3395}
3396
3397static __inline__ __m512d __DEFAULT_FN_ATTRS
3398_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3399{
3400  return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
3401                                                      (__v8df) __B,
3402                                                      (__v8df) __C,
3403                                                      (__mmask8) __U,
3404                                                      _MM_FROUND_CUR_DIRECTION);
3405}
3406
3407#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \
3408  (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \
3409                                           (__v16sf)(__m512)(B), \
3410                                           (__v16sf)(__m512)(C), \
3411                                           (__mmask16)(U), (int)(R)); })
3412
3413
3414#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \
3415  (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \
3416                                            (__v16sf)(__m512)(B), \
3417                                            (__v16sf)(__m512)(C), \
3418                                            (__mmask16)(U), (int)(R)); })
3419
3420
3421static __inline__ __m512 __DEFAULT_FN_ATTRS
3422_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3423{
3424  return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
3425                                                    (__v16sf) __B,
3426                                                    (__v16sf) __C,
3427                                                    (__mmask16) __U,
3428                                                    _MM_FROUND_CUR_DIRECTION);
3429}
3430
3431static __inline__ __m512 __DEFAULT_FN_ATTRS
3432_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3433{
3434  return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
3435                                                     (__v16sf) __B,
3436                                                     (__v16sf) __C,
3437                                                     (__mmask16) __U,
3438                                                     _MM_FROUND_CUR_DIRECTION);
3439}
3440
3441
3442
3443/* Vector permutations */
3444
3445static __inline __m512i __DEFAULT_FN_ATTRS
3446_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3447{
3448  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3449                                                       /* idx */ ,
3450                                                       (__v16si) __A,
3451                                                       (__v16si) __B,
3452                                                       (__mmask16) -1);
3453}
3454
3455static __inline__ __m512i __DEFAULT_FN_ATTRS
3456_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
3457                                __m512i __I, __m512i __B)
3458{
3459  return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
3460                                                        /* idx */ ,
3461                                                        (__v16si) __A,
3462                                                        (__v16si) __B,
3463                                                        (__mmask16) __U);
3464}
3465
3466static __inline__ __m512i __DEFAULT_FN_ATTRS
3467_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
3468                                 __m512i __I, __m512i __B)
3469{
3470  return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
3471                                                        /* idx */ ,
3472                                                        (__v16si) __A,
3473                                                        (__v16si) __B,
3474                                                        (__mmask16) __U);
3475}
3476
3477static __inline __m512i __DEFAULT_FN_ATTRS
3478_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3479{
3480  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3481                                                       /* idx */ ,
3482                                                       (__v8di) __A,
3483                                                       (__v8di) __B,
3484                                                       (__mmask8) -1);
3485}
3486
3487static __inline__ __m512i __DEFAULT_FN_ATTRS
3488_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
3489                                __m512i __B)
3490{
3491  return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
3492                                                       /* idx */ ,
3493                                                       (__v8di) __A,
3494                                                       (__v8di) __B,
3495                                                       (__mmask8) __U);
3496}
3497
3498
3499static __inline__ __m512i __DEFAULT_FN_ATTRS
3500_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
3501         __m512i __I, __m512i __B)
3502{
3503  return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
3504                                                        /* idx */ ,
3505                                                        (__v8di) __A,
3506                                                        (__v8di) __B,
3507                                                        (__mmask8) __U);
3508}
3509
3510#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \
3511  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
3512                                   (__v8di)(__m512i)(A), \
3513                                   ((int)(I) & 0x7) + 0, \
3514                                   ((int)(I) & 0x7) + 1, \
3515                                   ((int)(I) & 0x7) + 2, \
3516                                   ((int)(I) & 0x7) + 3, \
3517                                   ((int)(I) & 0x7) + 4, \
3518                                   ((int)(I) & 0x7) + 5, \
3519                                   ((int)(I) & 0x7) + 6, \
3520                                   ((int)(I) & 0x7) + 7); })
3521
3522#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\
3523  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3524                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3525                                 (__v8di)(__m512i)(W)); })
3526
3527#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\
3528  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3529                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3530                                 (__v8di)_mm512_setzero_si512()); })
3531
3532#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
3533  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
3534                                   (__v16si)(__m512i)(A), \
3535                                   ((int)(I) & 0xf) + 0, \
3536                                   ((int)(I) & 0xf) + 1, \
3537                                   ((int)(I) & 0xf) + 2, \
3538                                   ((int)(I) & 0xf) + 3, \
3539                                   ((int)(I) & 0xf) + 4, \
3540                                   ((int)(I) & 0xf) + 5, \
3541                                   ((int)(I) & 0xf) + 6, \
3542                                   ((int)(I) & 0xf) + 7, \
3543                                   ((int)(I) & 0xf) + 8, \
3544                                   ((int)(I) & 0xf) + 9, \
3545                                   ((int)(I) & 0xf) + 10, \
3546                                   ((int)(I) & 0xf) + 11, \
3547                                   ((int)(I) & 0xf) + 12, \
3548                                   ((int)(I) & 0xf) + 13, \
3549                                   ((int)(I) & 0xf) + 14, \
3550                                   ((int)(I) & 0xf) + 15); })
3551
3552#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\
3553  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3554                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3555                                (__v16si)(__m512i)(W)); })
3556
3557#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\
3558  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3559                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3560                                (__v16si)_mm512_setzero_si512()); })
3561/* Vector Extract */
3562
3563#define _mm512_extractf64x4_pd(A, I) __extension__ ({             \
3564  (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A),          \
3565                                   (__v8df)_mm512_undefined_pd(), \
3566                                   ((I) & 1) ? 4 : 0,             \
3567                                   ((I) & 1) ? 5 : 1,             \
3568                                   ((I) & 1) ? 6 : 2,             \
3569                                   ((I) & 1) ? 7 : 3); })
3570
3571#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
3572  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3573                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3574                                   (__v4df)(W)); })
3575
3576#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
3577  (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
3578                                   (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
3579                                   (__v4df)_mm256_setzero_pd()); })
3580
3581#define _mm512_extractf32x4_ps(A, I) __extension__ ({             \
3582  (__m128)__builtin_shufflevector((__v16sf)(__m512)(A),           \
3583                                  (__v16sf)_mm512_undefined_ps(), \
3584                                  0 + ((I) & 0x3) * 4,            \
3585                                  1 + ((I) & 0x3) * 4,            \
3586                                  2 + ((I) & 0x3) * 4,            \
3587                                  3 + ((I) & 0x3) * 4); })
3588
3589#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
3590  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3591                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3592                                   (__v4sf)(W)); })
3593
3594#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
3595  (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
3596                                   (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
3597                                   (__v4sf)_mm_setzero_ps()); })
3598
3599/* Vector Blend */
3600
3601static __inline __m512d __DEFAULT_FN_ATTRS
3602_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3603{
3604  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3605                 (__v8df) __W,
3606                 (__v8df) __A);
3607}
3608
3609static __inline __m512 __DEFAULT_FN_ATTRS
3610_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3611{
3612  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3613                (__v16sf) __W,
3614                (__v16sf) __A);
3615}
3616
3617static __inline __m512i __DEFAULT_FN_ATTRS
3618_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3619{
3620  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3621                (__v8di) __W,
3622                (__v8di) __A);
3623}
3624
3625static __inline __m512i __DEFAULT_FN_ATTRS
3626_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3627{
3628  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3629                (__v16si) __W,
3630                (__v16si) __A);
3631}
3632
3633/* Compare */
3634
3635#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \
3636  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3637                                          (__v16sf)(__m512)(B), (int)(P), \
3638                                          (__mmask16)-1, (int)(R)); })
3639
3640#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \
3641  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3642                                          (__v16sf)(__m512)(B), (int)(P), \
3643                                          (__mmask16)(U), (int)(R)); })
3644
3645#define _mm512_cmp_ps_mask(A, B, P) \
3646  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3647#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3648  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3649
3650#define _mm512_cmpeq_ps_mask(A, B) \
3651    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3652#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3653    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3654
3655#define _mm512_cmplt_ps_mask(A, B) \
3656    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3657#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3658    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3659
3660#define _mm512_cmple_ps_mask(A, B) \
3661    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3662#define _mm512_mask_cmple_ps_mask(k, A, B) \
3663    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3664
3665#define _mm512_cmpunord_ps_mask(A, B) \
3666    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3667#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3668    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3669
3670#define _mm512_cmpneq_ps_mask(A, B) \
3671    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3672#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3673    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3674
3675#define _mm512_cmpnlt_ps_mask(A, B) \
3676    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3677#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3678    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3679
3680#define _mm512_cmpnle_ps_mask(A, B) \
3681    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3682#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3683    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3684
3685#define _mm512_cmpord_ps_mask(A, B) \
3686    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3687#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3688    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3689
3690#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
3691  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3692                                         (__v8df)(__m512d)(B), (int)(P), \
3693                                         (__mmask8)-1, (int)(R)); })
3694
3695#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \
3696  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3697                                         (__v8df)(__m512d)(B), (int)(P), \
3698                                         (__mmask8)(U), (int)(R)); })
3699
3700#define _mm512_cmp_pd_mask(A, B, P) \
3701  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3702#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3703  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3704
3705#define _mm512_cmpeq_pd_mask(A, B) \
3706    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3707#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3708    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3709
3710#define _mm512_cmplt_pd_mask(A, B) \
3711    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3712#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3713    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3714
3715#define _mm512_cmple_pd_mask(A, B) \
3716    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3717#define _mm512_mask_cmple_pd_mask(k, A, B) \
3718    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3719
3720#define _mm512_cmpunord_pd_mask(A, B) \
3721    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3722#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3723    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3724
3725#define _mm512_cmpneq_pd_mask(A, B) \
3726    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3727#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3728    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3729
3730#define _mm512_cmpnlt_pd_mask(A, B) \
3731    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3732#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3733    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3734
3735#define _mm512_cmpnle_pd_mask(A, B) \
3736    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3737#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3738    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3739
3740#define _mm512_cmpord_pd_mask(A, B) \
3741    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3742#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3743    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3744
3745/* Conversion */
3746
3747#define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
3748  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3749                                             (__v16si)_mm512_undefined_epi32(), \
3750                                             (__mmask16)-1, (int)(R)); })
3751
3752#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) __extension__ ({ \
3753  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3754                                             (__v16si)(__m512i)(W), \
3755                                             (__mmask16)(U), (int)(R)); })
3756
3757#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) __extension__ ({ \
3758  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3759                                             (__v16si)_mm512_setzero_si512(), \
3760                                             (__mmask16)(U), (int)(R)); })
3761
3762
3763static __inline __m512i __DEFAULT_FN_ATTRS
3764_mm512_cvttps_epu32(__m512 __A)
3765{
3766  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3767                  (__v16si)
3768                  _mm512_setzero_si512 (),
3769                  (__mmask16) -1,
3770                  _MM_FROUND_CUR_DIRECTION);
3771}
3772
3773static __inline__ __m512i __DEFAULT_FN_ATTRS
3774_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3775{
3776  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3777                   (__v16si) __W,
3778                   (__mmask16) __U,
3779                   _MM_FROUND_CUR_DIRECTION);
3780}
3781
3782static __inline__ __m512i __DEFAULT_FN_ATTRS
3783_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3784{
3785  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3786                   (__v16si) _mm512_setzero_si512 (),
3787                   (__mmask16) __U,
3788                   _MM_FROUND_CUR_DIRECTION);
3789}
3790
3791#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \
3792  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3793                                          (__v16sf)_mm512_setzero_ps(), \
3794                                          (__mmask16)-1, (int)(R)); })
3795
3796#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) __extension__ ({ \
3797  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3798                                          (__v16sf)(__m512)(W), \
3799                                          (__mmask16)(U), (int)(R)); })
3800
3801#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) __extension__ ({ \
3802  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3803                                          (__v16sf)_mm512_setzero_ps(), \
3804                                          (__mmask16)(U), (int)(R)); })
3805
3806#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \
3807  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3808                                           (__v16sf)_mm512_setzero_ps(), \
3809                                           (__mmask16)-1, (int)(R)); })
3810
3811#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) __extension__ ({ \
3812  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3813                                           (__v16sf)(__m512)(W), \
3814                                           (__mmask16)(U), (int)(R)); })
3815
3816#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) __extension__ ({ \
3817  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3818                                           (__v16sf)_mm512_setzero_ps(), \
3819                                           (__mmask16)(U), (int)(R)); })
3820
3821static __inline__ __m512 __DEFAULT_FN_ATTRS
3822_mm512_cvtepu32_ps (__m512i __A)
3823{
3824  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3825                 (__v16sf) _mm512_undefined_ps (),
3826                 (__mmask16) -1,
3827                 _MM_FROUND_CUR_DIRECTION);
3828}
3829
3830static __inline__ __m512 __DEFAULT_FN_ATTRS
3831_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3832{
3833  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3834                 (__v16sf) __W,
3835                 (__mmask16) __U,
3836                 _MM_FROUND_CUR_DIRECTION);
3837}
3838
3839static __inline__ __m512 __DEFAULT_FN_ATTRS
3840_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3841{
3842  return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
3843                 (__v16sf) _mm512_setzero_ps (),
3844                 (__mmask16) __U,
3845                 _MM_FROUND_CUR_DIRECTION);
3846}
3847
3848static __inline __m512d __DEFAULT_FN_ATTRS
3849_mm512_cvtepi32_pd(__m256i __A)
3850{
3851  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3852}
3853
3854static __inline__ __m512d __DEFAULT_FN_ATTRS
3855_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3856{
3857  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3858                                              (__v8df)_mm512_cvtepi32_pd(__A),
3859                                              (__v8df)__W);
3860}
3861
3862static __inline__ __m512d __DEFAULT_FN_ATTRS
3863_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3864{
3865  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3866                                              (__v8df)_mm512_cvtepi32_pd(__A),
3867                                              (__v8df)_mm512_setzero_pd());
3868}
3869
3870static __inline__ __m512d __DEFAULT_FN_ATTRS
3871_mm512_cvtepi32lo_pd(__m512i __A)
3872{
3873  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3874}
3875
3876static __inline__ __m512d __DEFAULT_FN_ATTRS
3877_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3878{
3879  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3880}
3881
3882static __inline__ __m512 __DEFAULT_FN_ATTRS
3883_mm512_cvtepi32_ps (__m512i __A)
3884{
3885  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3886                (__v16sf) _mm512_undefined_ps (),
3887                (__mmask16) -1,
3888                _MM_FROUND_CUR_DIRECTION);
3889}
3890
3891static __inline__ __m512 __DEFAULT_FN_ATTRS
3892_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3893{
3894  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3895                (__v16sf) __W,
3896                (__mmask16) __U,
3897                _MM_FROUND_CUR_DIRECTION);
3898}
3899
3900static __inline__ __m512 __DEFAULT_FN_ATTRS
3901_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3902{
3903  return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
3904                (__v16sf) _mm512_setzero_ps (),
3905                (__mmask16) __U,
3906                _MM_FROUND_CUR_DIRECTION);
3907}
3908
3909static __inline __m512d __DEFAULT_FN_ATTRS
3910_mm512_cvtepu32_pd(__m256i __A)
3911{
3912  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3913}
3914
3915static __inline__ __m512d __DEFAULT_FN_ATTRS
3916_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3917{
3918  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3919                                              (__v8df)_mm512_cvtepu32_pd(__A),
3920                                              (__v8df)__W);
3921}
3922
3923static __inline__ __m512d __DEFAULT_FN_ATTRS
3924_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3925{
3926  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3927                                              (__v8df)_mm512_cvtepu32_pd(__A),
3928                                              (__v8df)_mm512_setzero_pd());
3929}
3930
3931static __inline__ __m512d __DEFAULT_FN_ATTRS
3932_mm512_cvtepu32lo_pd(__m512i __A)
3933{
3934  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3935}
3936
3937static __inline__ __m512d __DEFAULT_FN_ATTRS
3938_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3939{
3940  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3941}
3942
3943#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \
3944  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3945                                          (__v8sf)_mm256_setzero_ps(), \
3946                                          (__mmask8)-1, (int)(R)); })
3947
3948#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \
3949  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3950                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3951                                          (int)(R)); })
3952
3953#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \
3954  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3955                                          (__v8sf)_mm256_setzero_ps(), \
3956                                          (__mmask8)(U), (int)(R)); })
3957
3958static __inline__ __m256 __DEFAULT_FN_ATTRS
3959_mm512_cvtpd_ps (__m512d __A)
3960{
3961  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3962                (__v8sf) _mm256_undefined_ps (),
3963                (__mmask8) -1,
3964                _MM_FROUND_CUR_DIRECTION);
3965}
3966
3967static __inline__ __m256 __DEFAULT_FN_ATTRS
3968_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3969{
3970  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3971                (__v8sf) __W,
3972                (__mmask8) __U,
3973                _MM_FROUND_CUR_DIRECTION);
3974}
3975
3976static __inline__ __m256 __DEFAULT_FN_ATTRS
3977_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3978{
3979  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3980                (__v8sf) _mm256_setzero_ps (),
3981                (__mmask8) __U,
3982                _MM_FROUND_CUR_DIRECTION);
3983}
3984
3985static __inline__ __m512 __DEFAULT_FN_ATTRS
3986_mm512_cvtpd_pslo (__m512d __A)
3987{
3988  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3989                (__v8sf) _mm256_setzero_ps (),
3990                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3991}
3992
3993static __inline__ __m512 __DEFAULT_FN_ATTRS
3994_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3995{
3996  return (__m512) __builtin_shufflevector (
3997                (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3998                                               __U, __A),
3999                (__v8sf) _mm256_setzero_ps (),
4000                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
4001}
4002
4003#define _mm512_cvt_roundps_ph(A, I) __extension__ ({ \
4004  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4005                                            (__v16hi)_mm256_undefined_si256(), \
4006                                            (__mmask16)-1); })
4007
4008#define _mm512_mask_cvt_roundps_ph(U, W, A, I) __extension__ ({ \
4009  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4010                                            (__v16hi)(__m256i)(U), \
4011                                            (__mmask16)(W)); })
4012
4013#define _mm512_maskz_cvt_roundps_ph(W, A, I) __extension__ ({ \
4014  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4015                                            (__v16hi)_mm256_setzero_si256(), \
4016                                            (__mmask16)(W)); })
4017
4018#define _mm512_cvtps_ph(A, I) __extension__ ({ \
4019  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4020                                            (__v16hi)_mm256_setzero_si256(), \
4021                                            (__mmask16)-1); })
4022
4023#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \
4024  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4025                                            (__v16hi)(__m256i)(U), \
4026                                            (__mmask16)(W)); })
4027
4028#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\
4029  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
4030                                            (__v16hi)_mm256_setzero_si256(), \
4031                                            (__mmask16)(W)); })
4032
4033#define _mm512_cvt_roundph_ps(A, R) __extension__ ({ \
4034  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4035                                           (__v16sf)_mm512_undefined_ps(), \
4036                                           (__mmask16)-1, (int)(R)); })
4037
4038#define _mm512_mask_cvt_roundph_ps(W, U, A, R) __extension__ ({ \
4039  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4040                                           (__v16sf)(__m512)(W), \
4041                                           (__mmask16)(U), (int)(R)); })
4042
4043#define _mm512_maskz_cvt_roundph_ps(U, A, R) __extension__ ({ \
4044  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
4045                                           (__v16sf)_mm512_setzero_ps(), \
4046                                           (__mmask16)(U), (int)(R)); })
4047
4048
4049static  __inline __m512 __DEFAULT_FN_ATTRS
4050_mm512_cvtph_ps(__m256i __A)
4051{
4052  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4053                (__v16sf)
4054                _mm512_setzero_ps (),
4055                (__mmask16) -1,
4056                _MM_FROUND_CUR_DIRECTION);
4057}
4058
4059static __inline__ __m512 __DEFAULT_FN_ATTRS
4060_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
4061{
4062  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4063                 (__v16sf) __W,
4064                 (__mmask16) __U,
4065                 _MM_FROUND_CUR_DIRECTION);
4066}
4067
4068static __inline__ __m512 __DEFAULT_FN_ATTRS
4069_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
4070{
4071  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
4072                 (__v16sf) _mm512_setzero_ps (),
4073                 (__mmask16) __U,
4074                 _MM_FROUND_CUR_DIRECTION);
4075}
4076
4077#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \
4078  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4079                                            (__v8si)_mm256_setzero_si256(), \
4080                                            (__mmask8)-1, (int)(R)); })
4081
4082#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4083  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4084                                            (__v8si)(__m256i)(W), \
4085                                            (__mmask8)(U), (int)(R)); })
4086
4087#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) __extension__ ({ \
4088  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
4089                                            (__v8si)_mm256_setzero_si256(), \
4090                                            (__mmask8)(U), (int)(R)); })
4091
4092static __inline __m256i __DEFAULT_FN_ATTRS
4093_mm512_cvttpd_epi32(__m512d __a)
4094{
4095  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
4096                                                   (__v8si)_mm256_setzero_si256(),
4097                                                   (__mmask8) -1,
4098                                                    _MM_FROUND_CUR_DIRECTION);
4099}
4100
4101static __inline__ __m256i __DEFAULT_FN_ATTRS
4102_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4103{
4104  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4105                  (__v8si) __W,
4106                  (__mmask8) __U,
4107                  _MM_FROUND_CUR_DIRECTION);
4108}
4109
4110static __inline__ __m256i __DEFAULT_FN_ATTRS
4111_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
4112{
4113  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
4114                  (__v8si) _mm256_setzero_si256 (),
4115                  (__mmask8) __U,
4116                  _MM_FROUND_CUR_DIRECTION);
4117}
4118
4119#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \
4120  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4121                                            (__v16si)_mm512_setzero_si512(), \
4122                                            (__mmask16)-1, (int)(R)); })
4123
4124#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) __extension__ ({ \
4125  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4126                                            (__v16si)(__m512i)(W), \
4127                                            (__mmask16)(U), (int)(R)); })
4128
4129#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) __extension__ ({ \
4130  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
4131                                            (__v16si)_mm512_setzero_si512(), \
4132                                            (__mmask16)(U), (int)(R)); })
4133
4134static __inline __m512i __DEFAULT_FN_ATTRS
4135_mm512_cvttps_epi32(__m512 __a)
4136{
4137  return (__m512i)
4138    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
4139                                     (__v16si) _mm512_setzero_si512 (),
4140                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
4141}
4142
4143static __inline__ __m512i __DEFAULT_FN_ATTRS
4144_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4145{
4146  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4147                  (__v16si) __W,
4148                  (__mmask16) __U,
4149                  _MM_FROUND_CUR_DIRECTION);
4150}
4151
4152static __inline__ __m512i __DEFAULT_FN_ATTRS
4153_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
4154{
4155  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
4156                  (__v16si) _mm512_setzero_si512 (),
4157                  (__mmask16) __U,
4158                  _MM_FROUND_CUR_DIRECTION);
4159}
4160
4161#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \
4162  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4163                                           (__v16si)_mm512_setzero_si512(), \
4164                                           (__mmask16)-1, (int)(R)); })
4165
4166#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) __extension__ ({ \
4167  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4168                                           (__v16si)(__m512i)(W), \
4169                                           (__mmask16)(U), (int)(R)); })
4170
4171#define _mm512_maskz_cvt_roundps_epi32(U, A, R) __extension__ ({ \
4172  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
4173                                           (__v16si)_mm512_setzero_si512(), \
4174                                           (__mmask16)(U), (int)(R)); })
4175
4176static __inline__ __m512i __DEFAULT_FN_ATTRS
4177_mm512_cvtps_epi32 (__m512 __A)
4178{
4179  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4180                 (__v16si) _mm512_undefined_epi32 (),
4181                 (__mmask16) -1,
4182                 _MM_FROUND_CUR_DIRECTION);
4183}
4184
4185static __inline__ __m512i __DEFAULT_FN_ATTRS
4186_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
4187{
4188  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4189                 (__v16si) __W,
4190                 (__mmask16) __U,
4191                 _MM_FROUND_CUR_DIRECTION);
4192}
4193
4194static __inline__ __m512i __DEFAULT_FN_ATTRS
4195_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
4196{
4197  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
4198                 (__v16si)
4199                 _mm512_setzero_si512 (),
4200                 (__mmask16) __U,
4201                 _MM_FROUND_CUR_DIRECTION);
4202}
4203
4204#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \
4205  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4206                                           (__v8si)_mm256_setzero_si256(), \
4207                                           (__mmask8)-1, (int)(R)); })
4208
4209#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) __extension__ ({ \
4210  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4211                                           (__v8si)(__m256i)(W), \
4212                                           (__mmask8)(U), (int)(R)); })
4213
4214#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \
4215  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
4216                                           (__v8si)_mm256_setzero_si256(), \
4217                                           (__mmask8)(U), (int)(R)); })
4218
4219static __inline__ __m256i __DEFAULT_FN_ATTRS
4220_mm512_cvtpd_epi32 (__m512d __A)
4221{
4222  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4223                 (__v8si)
4224                 _mm256_undefined_si256 (),
4225                 (__mmask8) -1,
4226                 _MM_FROUND_CUR_DIRECTION);
4227}
4228
4229static __inline__ __m256i __DEFAULT_FN_ATTRS
4230_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
4231{
4232  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4233                 (__v8si) __W,
4234                 (__mmask8) __U,
4235                 _MM_FROUND_CUR_DIRECTION);
4236}
4237
4238static __inline__ __m256i __DEFAULT_FN_ATTRS
4239_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4240{
4241  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4242                 (__v8si)
4243                 _mm256_setzero_si256 (),
4244                 (__mmask8) __U,
4245                 _MM_FROUND_CUR_DIRECTION);
4246}
4247
4248#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \
4249  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4250                                            (__v16si)_mm512_setzero_si512(), \
4251                                            (__mmask16)-1, (int)(R)); })
4252
4253#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) __extension__ ({ \
4254  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4255                                            (__v16si)(__m512i)(W), \
4256                                            (__mmask16)(U), (int)(R)); })
4257
4258#define _mm512_maskz_cvt_roundps_epu32(U, A, R) __extension__ ({ \
4259  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4260                                            (__v16si)_mm512_setzero_si512(), \
4261                                            (__mmask16)(U), (int)(R)); })
4262
4263static __inline__ __m512i __DEFAULT_FN_ATTRS
4264_mm512_cvtps_epu32 ( __m512 __A)
4265{
4266  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4267                  (__v16si)\
4268                  _mm512_undefined_epi32 (),\
4269                  (__mmask16) -1,\
4270                  _MM_FROUND_CUR_DIRECTION);\
4271}
4272
4273static __inline__ __m512i __DEFAULT_FN_ATTRS
4274_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4275{
4276  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4277                  (__v16si) __W,
4278                  (__mmask16) __U,
4279                  _MM_FROUND_CUR_DIRECTION);
4280}
4281
4282static __inline__ __m512i __DEFAULT_FN_ATTRS
4283_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4284{
4285  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4286                  (__v16si)
4287                  _mm512_setzero_si512 (),
4288                  (__mmask16) __U ,
4289                  _MM_FROUND_CUR_DIRECTION);
4290}
4291
4292#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \
4293  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4294                                            (__v8si)_mm256_setzero_si256(), \
4295                                            (__mmask8)-1, (int)(R)); })
4296
4297#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) __extension__ ({ \
4298  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4299                                            (__v8si)(W), \
4300                                            (__mmask8)(U), (int)(R)); })
4301
4302#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \
4303  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4304                                            (__v8si)_mm256_setzero_si256(), \
4305                                            (__mmask8)(U), (int)(R)); })
4306
4307static __inline__ __m256i __DEFAULT_FN_ATTRS
4308_mm512_cvtpd_epu32 (__m512d __A)
4309{
4310  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4311                  (__v8si)
4312                  _mm256_undefined_si256 (),
4313                  (__mmask8) -1,
4314                  _MM_FROUND_CUR_DIRECTION);
4315}
4316
4317static __inline__ __m256i __DEFAULT_FN_ATTRS
4318_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4319{
4320  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4321                  (__v8si) __W,
4322                  (__mmask8) __U,
4323                  _MM_FROUND_CUR_DIRECTION);
4324}
4325
4326static __inline__ __m256i __DEFAULT_FN_ATTRS
4327_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4328{
4329  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4330                  (__v8si)
4331                  _mm256_setzero_si256 (),
4332                  (__mmask8) __U,
4333                  _MM_FROUND_CUR_DIRECTION);
4334}
4335
4336static __inline__ double __DEFAULT_FN_ATTRS
4337_mm512_cvtsd_f64(__m512d __a)
4338{
4339  return __a[0];
4340}
4341
4342static __inline__ float __DEFAULT_FN_ATTRS
4343_mm512_cvtss_f32(__m512 __a)
4344{
4345  return __a[0];
4346}
4347
4348/* Unpack and Interleave */
4349
4350static __inline __m512d __DEFAULT_FN_ATTRS
4351_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4352{
4353  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4354                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4355}
4356
4357static __inline__ __m512d __DEFAULT_FN_ATTRS
4358_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4359{
4360  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4361                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4362                                           (__v8df)__W);
4363}
4364
4365static __inline__ __m512d __DEFAULT_FN_ATTRS
4366_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4367{
4368  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4369                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4370                                           (__v8df)_mm512_setzero_pd());
4371}
4372
4373static __inline __m512d __DEFAULT_FN_ATTRS
4374_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4375{
4376  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4377                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4378}
4379
4380static __inline__ __m512d __DEFAULT_FN_ATTRS
4381_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4382{
4383  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4384                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4385                                           (__v8df)__W);
4386}
4387
4388static __inline__ __m512d __DEFAULT_FN_ATTRS
4389_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4390{
4391  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4392                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4393                                           (__v8df)_mm512_setzero_pd());
4394}
4395
4396static __inline __m512 __DEFAULT_FN_ATTRS
4397_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4398{
4399  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4400                                         2,    18,    3,    19,
4401                                         2+4,  18+4,  3+4,  19+4,
4402                                         2+8,  18+8,  3+8,  19+8,
4403                                         2+12, 18+12, 3+12, 19+12);
4404}
4405
4406static __inline__ __m512 __DEFAULT_FN_ATTRS
4407_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4408{
4409  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4410                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4411                                          (__v16sf)__W);
4412}
4413
4414static __inline__ __m512 __DEFAULT_FN_ATTRS
4415_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4416{
4417  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4418                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4419                                          (__v16sf)_mm512_setzero_ps());
4420}
4421
4422static __inline __m512 __DEFAULT_FN_ATTRS
4423_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4424{
4425  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4426                                         0,    16,    1,    17,
4427                                         0+4,  16+4,  1+4,  17+4,
4428                                         0+8,  16+8,  1+8,  17+8,
4429                                         0+12, 16+12, 1+12, 17+12);
4430}
4431
4432static __inline__ __m512 __DEFAULT_FN_ATTRS
4433_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4434{
4435  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4436                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4437                                          (__v16sf)__W);
4438}
4439
4440static __inline__ __m512 __DEFAULT_FN_ATTRS
4441_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4442{
4443  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4444                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4445                                          (__v16sf)_mm512_setzero_ps());
4446}
4447
4448static __inline__ __m512i __DEFAULT_FN_ATTRS
4449_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4450{
4451  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4452                                          2,    18,    3,    19,
4453                                          2+4,  18+4,  3+4,  19+4,
4454                                          2+8,  18+8,  3+8,  19+8,
4455                                          2+12, 18+12, 3+12, 19+12);
4456}
4457
4458static __inline__ __m512i __DEFAULT_FN_ATTRS
4459_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4460{
4461  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4462                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4463                                       (__v16si)__W);
4464}
4465
4466static __inline__ __m512i __DEFAULT_FN_ATTRS
4467_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4468{
4469  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4470                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4471                                       (__v16si)_mm512_setzero_si512());
4472}
4473
4474static __inline__ __m512i __DEFAULT_FN_ATTRS
4475_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4476{
4477  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4478                                          0,    16,    1,    17,
4479                                          0+4,  16+4,  1+4,  17+4,
4480                                          0+8,  16+8,  1+8,  17+8,
4481                                          0+12, 16+12, 1+12, 17+12);
4482}
4483
4484static __inline__ __m512i __DEFAULT_FN_ATTRS
4485_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4486{
4487  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4488                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4489                                       (__v16si)__W);
4490}
4491
4492static __inline__ __m512i __DEFAULT_FN_ATTRS
4493_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4494{
4495  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4496                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4497                                       (__v16si)_mm512_setzero_si512());
4498}
4499
4500static __inline__ __m512i __DEFAULT_FN_ATTRS
4501_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4502{
4503  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4504                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4505}
4506
4507static __inline__ __m512i __DEFAULT_FN_ATTRS
4508_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4509{
4510  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4511                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4512                                        (__v8di)__W);
4513}
4514
4515static __inline__ __m512i __DEFAULT_FN_ATTRS
4516_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4517{
4518  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4519                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4520                                        (__v8di)_mm512_setzero_si512());
4521}
4522
4523static __inline__ __m512i __DEFAULT_FN_ATTRS
4524_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4525{
4526  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4527                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4528}
4529
4530static __inline__ __m512i __DEFAULT_FN_ATTRS
4531_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4532{
4533  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4534                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4535                                        (__v8di)__W);
4536}
4537
4538static __inline__ __m512i __DEFAULT_FN_ATTRS
4539_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4540{
4541  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4542                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4543                                        (__v8di)_mm512_setzero_si512());
4544}
4545
4546/* Bit Test */
4547
4548static __inline __mmask16 __DEFAULT_FN_ATTRS
4549_mm512_test_epi32_mask(__m512i __A, __m512i __B)
4550{
4551  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4552            (__v16si) __B,
4553            (__mmask16) -1);
4554}
4555
4556static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4557_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
4558{
4559  return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
4560                 (__v16si) __B, __U);
4561}
4562
4563static __inline __mmask8 __DEFAULT_FN_ATTRS
4564_mm512_test_epi64_mask(__m512i __A, __m512i __B)
4565{
4566  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
4567                 (__v8di) __B,
4568                 (__mmask8) -1);
4569}
4570
4571static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4572_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
4573{
4574  return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
4575}
4576
4577
4578/* SIMD load ops */
4579
4580static __inline __m512i __DEFAULT_FN_ATTRS
4581_mm512_loadu_si512 (void const *__P)
4582{
4583  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4584                  (__v16si)
4585                  _mm512_setzero_si512 (),
4586                  (__mmask16) -1);
4587}
4588
4589static __inline __m512i __DEFAULT_FN_ATTRS
4590_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4591{
4592  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4593                  (__v16si) __W,
4594                  (__mmask16) __U);
4595}
4596
4597
4598static __inline __m512i __DEFAULT_FN_ATTRS
4599_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4600{
4601  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4602                                                     (__v16si)
4603                                                     _mm512_setzero_si512 (),
4604                                                     (__mmask16) __U);
4605}
4606
4607static __inline __m512i __DEFAULT_FN_ATTRS
4608_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4609{
4610  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4611                  (__v8di) __W,
4612                  (__mmask8) __U);
4613}
4614
4615static __inline __m512i __DEFAULT_FN_ATTRS
4616_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4617{
4618  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4619                                                     (__v8di)
4620                                                     _mm512_setzero_si512 (),
4621                                                     (__mmask8) __U);
4622}
4623
4624static __inline __m512 __DEFAULT_FN_ATTRS
4625_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4626{
4627  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4628                   (__v16sf) __W,
4629                   (__mmask16) __U);
4630}
4631
4632static __inline __m512 __DEFAULT_FN_ATTRS
4633_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4634{
4635  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4636                                                  (__v16sf)
4637                                                  _mm512_setzero_ps (),
4638                                                  (__mmask16) __U);
4639}
4640
4641static __inline __m512d __DEFAULT_FN_ATTRS
4642_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4643{
4644  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4645                (__v8df) __W,
4646                (__mmask8) __U);
4647}
4648
4649static __inline __m512d __DEFAULT_FN_ATTRS
4650_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4651{
4652  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4653                                                   (__v8df)
4654                                                   _mm512_setzero_pd (),
4655                                                   (__mmask8) __U);
4656}
4657
4658static __inline __m512d __DEFAULT_FN_ATTRS
4659_mm512_loadu_pd(void const *__p)
4660{
4661  struct __loadu_pd {
4662    __m512d __v;
4663  } __attribute__((__packed__, __may_alias__));
4664  return ((struct __loadu_pd*)__p)->__v;
4665}
4666
4667static __inline __m512 __DEFAULT_FN_ATTRS
4668_mm512_loadu_ps(void const *__p)
4669{
4670  struct __loadu_ps {
4671    __m512 __v;
4672  } __attribute__((__packed__, __may_alias__));
4673  return ((struct __loadu_ps*)__p)->__v;
4674}
4675
4676static __inline __m512 __DEFAULT_FN_ATTRS
4677_mm512_load_ps(void const *__p)
4678{
4679  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p,
4680                                                  (__v16sf)
4681                                                  _mm512_setzero_ps (),
4682                                                  (__mmask16) -1);
4683}
4684
4685static __inline __m512 __DEFAULT_FN_ATTRS
4686_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4687{
4688  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4689                   (__v16sf) __W,
4690                   (__mmask16) __U);
4691}
4692
4693static __inline __m512 __DEFAULT_FN_ATTRS
4694_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4695{
4696  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4697                                                  (__v16sf)
4698                                                  _mm512_setzero_ps (),
4699                                                  (__mmask16) __U);
4700}
4701
4702static __inline __m512d __DEFAULT_FN_ATTRS
4703_mm512_load_pd(void const *__p)
4704{
4705  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p,
4706                                                   (__v8df)
4707                                                   _mm512_setzero_pd (),
4708                                                   (__mmask8) -1);
4709}
4710
4711static __inline __m512d __DEFAULT_FN_ATTRS
4712_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4713{
4714  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4715                          (__v8df) __W,
4716                          (__mmask8) __U);
4717}
4718
4719static __inline __m512d __DEFAULT_FN_ATTRS
4720_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4721{
4722  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4723                                                   (__v8df)
4724                                                   _mm512_setzero_pd (),
4725                                                   (__mmask8) __U);
4726}
4727
4728static __inline __m512i __DEFAULT_FN_ATTRS
4729_mm512_load_si512 (void const *__P)
4730{
4731  return *(__m512i *) __P;
4732}
4733
4734static __inline __m512i __DEFAULT_FN_ATTRS
4735_mm512_load_epi32 (void const *__P)
4736{
4737  return *(__m512i *) __P;
4738}
4739
4740static __inline __m512i __DEFAULT_FN_ATTRS
4741_mm512_load_epi64 (void const *__P)
4742{
4743  return *(__m512i *) __P;
4744}
4745
4746/* SIMD store ops */
4747
4748static __inline void __DEFAULT_FN_ATTRS
4749_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4750{
4751  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4752                                     (__mmask8) __U);
4753}
4754
4755static __inline void __DEFAULT_FN_ATTRS
4756_mm512_storeu_si512 (void *__P, __m512i __A)
4757{
4758  __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
4759            (__mmask16) -1);
4760}
4761
4762static __inline void __DEFAULT_FN_ATTRS
4763_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4764{
4765  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4766                                     (__mmask16) __U);
4767}
4768
4769static __inline void __DEFAULT_FN_ATTRS
4770_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4771{
4772  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4773}
4774
4775static __inline void __DEFAULT_FN_ATTRS
4776_mm512_storeu_pd(void *__P, __m512d __A)
4777{
4778  __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1);
4779}
4780
4781static __inline void __DEFAULT_FN_ATTRS
4782_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4783{
4784  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4785                                   (__mmask16) __U);
4786}
4787
4788static __inline void __DEFAULT_FN_ATTRS
4789_mm512_storeu_ps(void *__P, __m512 __A)
4790{
4791  __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1);
4792}
4793
4794static __inline void __DEFAULT_FN_ATTRS
4795_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4796{
4797  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4798}
4799
4800static __inline void __DEFAULT_FN_ATTRS
4801_mm512_store_pd(void *__P, __m512d __A)
4802{
4803  *(__m512d*)__P = __A;
4804}
4805
4806static __inline void __DEFAULT_FN_ATTRS
4807_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4808{
4809  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4810                                   (__mmask16) __U);
4811}
4812
4813static __inline void __DEFAULT_FN_ATTRS
4814_mm512_store_ps(void *__P, __m512 __A)
4815{
4816  *(__m512*)__P = __A;
4817}
4818
4819static __inline void __DEFAULT_FN_ATTRS
4820_mm512_store_si512 (void *__P, __m512i __A)
4821{
4822  *(__m512i *) __P = __A;
4823}
4824
4825static __inline void __DEFAULT_FN_ATTRS
4826_mm512_store_epi32 (void *__P, __m512i __A)
4827{
4828  *(__m512i *) __P = __A;
4829}
4830
4831static __inline void __DEFAULT_FN_ATTRS
4832_mm512_store_epi64 (void *__P, __m512i __A)
4833{
4834  *(__m512i *) __P = __A;
4835}
4836
4837/* Mask ops */
4838
4839static __inline __mmask16 __DEFAULT_FN_ATTRS
4840_mm512_knot(__mmask16 __M)
4841{
4842  return __builtin_ia32_knothi(__M);
4843}
4844
4845/* Integer compare */
4846
4847static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4848_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) {
4849  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4850                                                   (__mmask16)-1);
4851}
4852
4853static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4854_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4855  return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b,
4856                                                   __u);
4857}
4858
4859static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4860_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) {
4861  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4862                                                 (__mmask16)-1);
4863}
4864
4865static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4866_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4867  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0,
4868                                                 __u);
4869}
4870
4871static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4872_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4873  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4874                                                  __u);
4875}
4876
4877static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4878_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) {
4879  return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b,
4880                                                  (__mmask8)-1);
4881}
4882
4883static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4884_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) {
4885  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4886                                                (__mmask8)-1);
4887}
4888
4889static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4890_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4891  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0,
4892                                                __u);
4893}
4894
4895static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4896_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) {
4897  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4898                                                (__mmask16)-1);
4899}
4900
4901static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4902_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4903  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4904                                                __u);
4905}
4906
4907static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4908_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) {
4909  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4910                                                 (__mmask16)-1);
4911}
4912
4913static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4914_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4915  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5,
4916                                                 __u);
4917}
4918
4919static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4920_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) {
4921  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4922                                               (__mmask8)-1);
4923}
4924
4925static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4926_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4927  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4928                                               __u);
4929}
4930
4931static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4932_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) {
4933  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4934                                                (__mmask8)-1);
4935}
4936
4937static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4938_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4939  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5,
4940                                                __u);
4941}
4942
4943static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4944_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) {
4945  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4946                                                   (__mmask16)-1);
4947}
4948
4949static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4950_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4951  return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b,
4952                                                   __u);
4953}
4954
4955static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4956_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) {
4957  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4958                                                 (__mmask16)-1);
4959}
4960
4961static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4962_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4963  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6,
4964                                                 __u);
4965}
4966
4967static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4968_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4969  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4970                                                  __u);
4971}
4972
4973static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4974_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) {
4975  return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b,
4976                                                  (__mmask8)-1);
4977}
4978
4979static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4980_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) {
4981  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4982                                                (__mmask8)-1);
4983}
4984
4985static __inline__ __mmask8 __DEFAULT_FN_ATTRS
4986_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
4987  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6,
4988                                                __u);
4989}
4990
4991static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4992_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) {
4993  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
4994                                                (__mmask16)-1);
4995}
4996
4997static __inline__ __mmask16 __DEFAULT_FN_ATTRS
4998_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
4999  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5000                                                __u);
5001}
5002
5003static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5004_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) {
5005  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5006                                                 (__mmask16)-1);
5007}
5008
5009static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5010_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5011  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2,
5012                                                 __u);
5013}
5014
5015static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5016_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) {
5017  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5018                                               (__mmask8)-1);
5019}
5020
5021static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5022_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5023  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5024                                               __u);
5025}
5026
5027static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5028_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) {
5029  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5030                                                (__mmask8)-1);
5031}
5032
5033static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5034_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5035  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2,
5036                                                __u);
5037}
5038
5039static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5040_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) {
5041  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5042                                                (__mmask16)-1);
5043}
5044
5045static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5046_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5047  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5048                                                __u);
5049}
5050
5051static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5052_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) {
5053  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5054                                                 (__mmask16)-1);
5055}
5056
5057static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5058_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5059  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1,
5060                                                 __u);
5061}
5062
5063static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5064_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) {
5065  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5066                                               (__mmask8)-1);
5067}
5068
5069static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5070_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5071  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5072                                               __u);
5073}
5074
5075static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5076_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) {
5077  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5078                                                (__mmask8)-1);
5079}
5080
5081static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5082_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5083  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1,
5084                                                __u);
5085}
5086
5087static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5088_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) {
5089  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5090                                                (__mmask16)-1);
5091}
5092
5093static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5094_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5095  return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5096                                                __u);
5097}
5098
5099static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5100_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) {
5101  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5102                                                 (__mmask16)-1);
5103}
5104
5105static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5106_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) {
5107  return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4,
5108                                                 __u);
5109}
5110
5111static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5112_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) {
5113  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5114                                               (__mmask8)-1);
5115}
5116
5117static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5118_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5119  return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5120                                               __u);
5121}
5122
5123static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5124_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) {
5125  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5126                                                (__mmask8)-1);
5127}
5128
5129static __inline__ __mmask8 __DEFAULT_FN_ATTRS
5130_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
5131  return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4,
5132                                                __u);
5133}
5134
5135static __inline__ __m512i __DEFAULT_FN_ATTRS
5136_mm512_cvtepi8_epi32(__m128i __A)
5137{
5138  /* This function always performs a signed extension, but __v16qi is a char
5139     which may be signed or unsigned, so use __v16qs. */
5140  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
5141}
5142
5143static __inline__ __m512i __DEFAULT_FN_ATTRS
5144_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5145{
5146  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5147                                             (__v16si)_mm512_cvtepi8_epi32(__A),
5148                                             (__v16si)__W);
5149}
5150
5151static __inline__ __m512i __DEFAULT_FN_ATTRS
5152_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
5153{
5154  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5155                                             (__v16si)_mm512_cvtepi8_epi32(__A),
5156                                             (__v16si)_mm512_setzero_si512());
5157}
5158
5159static __inline__ __m512i __DEFAULT_FN_ATTRS
5160_mm512_cvtepi8_epi64(__m128i __A)
5161{
5162  /* This function always performs a signed extension, but __v16qi is a char
5163     which may be signed or unsigned, so use __v16qs. */
5164  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5165}
5166
5167static __inline__ __m512i __DEFAULT_FN_ATTRS
5168_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5169{
5170  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5171                                             (__v8di)_mm512_cvtepi8_epi64(__A),
5172                                             (__v8di)__W);
5173}
5174
5175static __inline__ __m512i __DEFAULT_FN_ATTRS
5176_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
5177{
5178  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5179                                             (__v8di)_mm512_cvtepi8_epi64(__A),
5180                                             (__v8di)_mm512_setzero_si512 ());
5181}
5182
5183static __inline__ __m512i __DEFAULT_FN_ATTRS
5184_mm512_cvtepi32_epi64(__m256i __X)
5185{
5186  return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
5187}
5188
5189static __inline__ __m512i __DEFAULT_FN_ATTRS
5190_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5191{
5192  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5193                                             (__v8di)_mm512_cvtepi32_epi64(__X),
5194                                             (__v8di)__W);
5195}
5196
5197static __inline__ __m512i __DEFAULT_FN_ATTRS
5198_mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
5199{
5200  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5201                                             (__v8di)_mm512_cvtepi32_epi64(__X),
5202                                             (__v8di)_mm512_setzero_si512());
5203}
5204
5205static __inline__ __m512i __DEFAULT_FN_ATTRS
5206_mm512_cvtepi16_epi32(__m256i __A)
5207{
5208  return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
5209}
5210
5211static __inline__ __m512i __DEFAULT_FN_ATTRS
5212_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5213{
5214  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5215                                            (__v16si)_mm512_cvtepi16_epi32(__A),
5216                                            (__v16si)__W);
5217}
5218
5219static __inline__ __m512i __DEFAULT_FN_ATTRS
5220_mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
5221{
5222  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5223                                            (__v16si)_mm512_cvtepi16_epi32(__A),
5224                                            (__v16si)_mm512_setzero_si512 ());
5225}
5226
5227static __inline__ __m512i __DEFAULT_FN_ATTRS
5228_mm512_cvtepi16_epi64(__m128i __A)
5229{
5230  return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
5231}
5232
5233static __inline__ __m512i __DEFAULT_FN_ATTRS
5234_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5235{
5236  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5237                                             (__v8di)_mm512_cvtepi16_epi64(__A),
5238                                             (__v8di)__W);
5239}
5240
5241static __inline__ __m512i __DEFAULT_FN_ATTRS
5242_mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
5243{
5244  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5245                                             (__v8di)_mm512_cvtepi16_epi64(__A),
5246                                             (__v8di)_mm512_setzero_si512());
5247}
5248
5249static __inline__ __m512i __DEFAULT_FN_ATTRS
5250_mm512_cvtepu8_epi32(__m128i __A)
5251{
5252  return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
5253}
5254
5255static __inline__ __m512i __DEFAULT_FN_ATTRS
5256_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
5257{
5258  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5259                                             (__v16si)_mm512_cvtepu8_epi32(__A),
5260                                             (__v16si)__W);
5261}
5262
5263static __inline__ __m512i __DEFAULT_FN_ATTRS
5264_mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
5265{
5266  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5267                                             (__v16si)_mm512_cvtepu8_epi32(__A),
5268                                             (__v16si)_mm512_setzero_si512());
5269}
5270
5271static __inline__ __m512i __DEFAULT_FN_ATTRS
5272_mm512_cvtepu8_epi64(__m128i __A)
5273{
5274  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
5275}
5276
5277static __inline__ __m512i __DEFAULT_FN_ATTRS
5278_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5279{
5280  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5281                                             (__v8di)_mm512_cvtepu8_epi64(__A),
5282                                             (__v8di)__W);
5283}
5284
5285static __inline__ __m512i __DEFAULT_FN_ATTRS
5286_mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
5287{
5288  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5289                                             (__v8di)_mm512_cvtepu8_epi64(__A),
5290                                             (__v8di)_mm512_setzero_si512());
5291}
5292
5293static __inline__ __m512i __DEFAULT_FN_ATTRS
5294_mm512_cvtepu32_epi64(__m256i __X)
5295{
5296  return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
5297}
5298
5299static __inline__ __m512i __DEFAULT_FN_ATTRS
5300_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
5301{
5302  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5303                                             (__v8di)_mm512_cvtepu32_epi64(__X),
5304                                             (__v8di)__W);
5305}
5306
5307static __inline__ __m512i __DEFAULT_FN_ATTRS
5308_mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
5309{
5310  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5311                                             (__v8di)_mm512_cvtepu32_epi64(__X),
5312                                             (__v8di)_mm512_setzero_si512());
5313}
5314
5315static __inline__ __m512i __DEFAULT_FN_ATTRS
5316_mm512_cvtepu16_epi32(__m256i __A)
5317{
5318  return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
5319}
5320
5321static __inline__ __m512i __DEFAULT_FN_ATTRS
5322_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
5323{
5324  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5325                                            (__v16si)_mm512_cvtepu16_epi32(__A),
5326                                            (__v16si)__W);
5327}
5328
5329static __inline__ __m512i __DEFAULT_FN_ATTRS
5330_mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
5331{
5332  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5333                                            (__v16si)_mm512_cvtepu16_epi32(__A),
5334                                            (__v16si)_mm512_setzero_si512());
5335}
5336
5337static __inline__ __m512i __DEFAULT_FN_ATTRS
5338_mm512_cvtepu16_epi64(__m128i __A)
5339{
5340  return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
5341}
5342
5343static __inline__ __m512i __DEFAULT_FN_ATTRS
5344_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
5345{
5346  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5347                                             (__v8di)_mm512_cvtepu16_epi64(__A),
5348                                             (__v8di)__W);
5349}
5350
5351static __inline__ __m512i __DEFAULT_FN_ATTRS
5352_mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
5353{
5354  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5355                                             (__v8di)_mm512_cvtepu16_epi64(__A),
5356                                             (__v8di)_mm512_setzero_si512());
5357}
5358
5359static __inline__ __m512i __DEFAULT_FN_ATTRS
5360_mm512_rorv_epi32 (__m512i __A, __m512i __B)
5361{
5362  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5363              (__v16si) __B,
5364              (__v16si)
5365              _mm512_setzero_si512 (),
5366              (__mmask16) -1);
5367}
5368
5369static __inline__ __m512i __DEFAULT_FN_ATTRS
5370_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5371{
5372  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5373              (__v16si) __B,
5374              (__v16si) __W,
5375              (__mmask16) __U);
5376}
5377
5378static __inline__ __m512i __DEFAULT_FN_ATTRS
5379_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5380{
5381  return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
5382              (__v16si) __B,
5383              (__v16si)
5384              _mm512_setzero_si512 (),
5385              (__mmask16) __U);
5386}
5387
5388static __inline__ __m512i __DEFAULT_FN_ATTRS
5389_mm512_rorv_epi64 (__m512i __A, __m512i __B)
5390{
5391  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5392              (__v8di) __B,
5393              (__v8di)
5394              _mm512_setzero_si512 (),
5395              (__mmask8) -1);
5396}
5397
5398static __inline__ __m512i __DEFAULT_FN_ATTRS
5399_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5400{
5401  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5402              (__v8di) __B,
5403              (__v8di) __W,
5404              (__mmask8) __U);
5405}
5406
5407static __inline__ __m512i __DEFAULT_FN_ATTRS
5408_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5409{
5410  return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
5411              (__v8di) __B,
5412              (__v8di)
5413              _mm512_setzero_si512 (),
5414              (__mmask8) __U);
5415}
5416
5417
5418
5419#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
5420  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5421                                         (__v16si)(__m512i)(b), (int)(p), \
5422                                         (__mmask16)-1); })
5423
5424#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
5425  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5426                                          (__v16si)(__m512i)(b), (int)(p), \
5427                                          (__mmask16)-1); })
5428
5429#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
5430  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5431                                        (__v8di)(__m512i)(b), (int)(p), \
5432                                        (__mmask8)-1); })
5433
5434#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
5435  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5436                                         (__v8di)(__m512i)(b), (int)(p), \
5437                                         (__mmask8)-1); })
5438
5439#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
5440  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5441                                         (__v16si)(__m512i)(b), (int)(p), \
5442                                         (__mmask16)(m)); })
5443
5444#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
5445  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5446                                          (__v16si)(__m512i)(b), (int)(p), \
5447                                          (__mmask16)(m)); })
5448
5449#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
5450  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5451                                        (__v8di)(__m512i)(b), (int)(p), \
5452                                        (__mmask8)(m)); })
5453
5454#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
5455  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5456                                         (__v8di)(__m512i)(b), (int)(p), \
5457                                         (__mmask8)(m)); })
5458
5459#define _mm512_rol_epi32(a, b) __extension__ ({ \
5460  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5461                                        (__v16si)_mm512_setzero_si512(), \
5462                                        (__mmask16)-1); })
5463
5464#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \
5465  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5466                                        (__v16si)(__m512i)(W), \
5467                                        (__mmask16)(U)); })
5468
5469#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \
5470  (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \
5471                                        (__v16si)_mm512_setzero_si512(), \
5472                                        (__mmask16)(U)); })
5473
5474#define _mm512_rol_epi64(a, b) __extension__ ({ \
5475  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5476                                        (__v8di)_mm512_setzero_si512(), \
5477                                        (__mmask8)-1); })
5478
5479#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \
5480  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5481                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
5482
5483#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \
5484  (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \
5485                                        (__v8di)_mm512_setzero_si512(), \
5486                                        (__mmask8)(U)); })
5487static __inline__ __m512i __DEFAULT_FN_ATTRS
5488_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5489{
5490  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5491              (__v16si) __B,
5492              (__v16si)
5493              _mm512_setzero_si512 (),
5494              (__mmask16) -1);
5495}
5496
5497static __inline__ __m512i __DEFAULT_FN_ATTRS
5498_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5499{
5500  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5501              (__v16si) __B,
5502              (__v16si) __W,
5503              (__mmask16) __U);
5504}
5505
5506static __inline__ __m512i __DEFAULT_FN_ATTRS
5507_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5508{
5509  return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
5510              (__v16si) __B,
5511              (__v16si)
5512              _mm512_setzero_si512 (),
5513              (__mmask16) __U);
5514}
5515
5516static __inline__ __m512i __DEFAULT_FN_ATTRS
5517_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5518{
5519  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5520              (__v8di) __B,
5521              (__v8di)
5522              _mm512_setzero_si512 (),
5523              (__mmask8) -1);
5524}
5525
5526static __inline__ __m512i __DEFAULT_FN_ATTRS
5527_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5528{
5529  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5530              (__v8di) __B,
5531              (__v8di) __W,
5532              (__mmask8) __U);
5533}
5534
5535static __inline__ __m512i __DEFAULT_FN_ATTRS
5536_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5537{
5538  return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
5539              (__v8di) __B,
5540              (__v8di)
5541              _mm512_setzero_si512 (),
5542              (__mmask8) __U);
5543}
5544
5545#define _mm512_ror_epi32(A, B) __extension__ ({ \
5546  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5547                                        (__v16si)_mm512_setzero_si512(), \
5548                                        (__mmask16)-1); })
5549
5550#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \
5551  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5552                                        (__v16si)(__m512i)(W), \
5553                                        (__mmask16)(U)); })
5554
5555#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \
5556  (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \
5557                                        (__v16si)_mm512_setzero_si512(), \
5558                                        (__mmask16)(U)); })
5559
5560#define _mm512_ror_epi64(A, B) __extension__ ({ \
5561  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5562                                        (__v8di)_mm512_setzero_si512(), \
5563                                        (__mmask8)-1); })
5564
5565#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \
5566  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5567                                        (__v8di)(__m512i)(W), (__mmask8)(U)); })
5568
5569#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \
5570  (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \
5571                                        (__v8di)_mm512_setzero_si512(), \
5572                                        (__mmask8)(U)); })
5573
5574static __inline__ __m512i __DEFAULT_FN_ATTRS
5575_mm512_slli_epi32(__m512i __A, int __B)
5576{
5577  return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5578}
5579
5580static __inline__ __m512i __DEFAULT_FN_ATTRS
5581_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5582{
5583  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5584                                         (__v16si)_mm512_slli_epi32(__A, __B),
5585                                         (__v16si)__W);
5586}
5587
5588static __inline__ __m512i __DEFAULT_FN_ATTRS
5589_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
5590  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5591                                         (__v16si)_mm512_slli_epi32(__A, __B),
5592                                         (__v16si)_mm512_setzero_si512());
5593}
5594
5595static __inline__ __m512i __DEFAULT_FN_ATTRS
5596_mm512_slli_epi64(__m512i __A, int __B)
5597{
5598  return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5599}
5600
5601static __inline__ __m512i __DEFAULT_FN_ATTRS
5602_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5603{
5604  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5605                                          (__v8di)_mm512_slli_epi64(__A, __B),
5606                                          (__v8di)__W);
5607}
5608
5609static __inline__ __m512i __DEFAULT_FN_ATTRS
5610_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
5611{
5612  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5613                                          (__v8di)_mm512_slli_epi64(__A, __B),
5614                                          (__v8di)_mm512_setzero_si512());
5615}
5616
5617static __inline__ __m512i __DEFAULT_FN_ATTRS
5618_mm512_srli_epi32(__m512i __A, int __B)
5619{
5620  return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5621}
5622
5623static __inline__ __m512i __DEFAULT_FN_ATTRS
5624_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
5625{
5626  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5627                                         (__v16si)_mm512_srli_epi32(__A, __B),
5628                                         (__v16si)__W);
5629}
5630
5631static __inline__ __m512i __DEFAULT_FN_ATTRS
5632_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
5633  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5634                                         (__v16si)_mm512_srli_epi32(__A, __B),
5635                                         (__v16si)_mm512_setzero_si512());
5636}
5637
5638static __inline__ __m512i __DEFAULT_FN_ATTRS
5639_mm512_srli_epi64(__m512i __A, int __B)
5640{
5641  return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5642}
5643
5644static __inline__ __m512i __DEFAULT_FN_ATTRS
5645_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
5646{
5647  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5648                                          (__v8di)_mm512_srli_epi64(__A, __B),
5649                                          (__v8di)__W);
5650}
5651
5652static __inline__ __m512i __DEFAULT_FN_ATTRS
5653_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
5654{
5655  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5656                                          (__v8di)_mm512_srli_epi64(__A, __B),
5657                                          (__v8di)_mm512_setzero_si512());
5658}
5659
5660static __inline__ __m512i __DEFAULT_FN_ATTRS
5661_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5662{
5663  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5664              (__v16si) __W,
5665              (__mmask16) __U);
5666}
5667
5668static __inline__ __m512i __DEFAULT_FN_ATTRS
5669_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5670{
5671  return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5672              (__v16si)
5673              _mm512_setzero_si512 (),
5674              (__mmask16) __U);
5675}
5676
5677static __inline__ void __DEFAULT_FN_ATTRS
5678_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5679{
5680  __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5681          (__mmask16) __U);
5682}
5683
5684static __inline__ __m512i __DEFAULT_FN_ATTRS
5685_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5686{
5687  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5688                 (__v16si) __A,
5689                 (__v16si) __W);
5690}
5691
5692static __inline__ __m512i __DEFAULT_FN_ATTRS
5693_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5694{
5695  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5696                 (__v16si) __A,
5697                 (__v16si) _mm512_setzero_si512 ());
5698}
5699
5700static __inline__ __m512i __DEFAULT_FN_ATTRS
5701_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5702{
5703  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5704                 (__v8di) __A,
5705                 (__v8di) __W);
5706}
5707
5708static __inline__ __m512i __DEFAULT_FN_ATTRS
5709_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5710{
5711  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5712                 (__v8di) __A,
5713                 (__v8di) _mm512_setzero_si512 ());
5714}
5715
5716static __inline__ __m512i __DEFAULT_FN_ATTRS
5717_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5718{
5719  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5720              (__v8di) __W,
5721              (__mmask8) __U);
5722}
5723
5724static __inline__ __m512i __DEFAULT_FN_ATTRS
5725_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5726{
5727  return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5728              (__v8di)
5729              _mm512_setzero_si512 (),
5730              (__mmask8) __U);
5731}
5732
5733static __inline__ void __DEFAULT_FN_ATTRS
5734_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5735{
5736  __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5737          (__mmask8) __U);
5738}
5739
5740static __inline__ __m512d __DEFAULT_FN_ATTRS
5741_mm512_movedup_pd (__m512d __A)
5742{
5743  return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5744                                          0, 0, 2, 2, 4, 4, 6, 6);
5745}
5746
5747static __inline__ __m512d __DEFAULT_FN_ATTRS
5748_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5749{
5750  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5751                                              (__v8df)_mm512_movedup_pd(__A),
5752                                              (__v8df)__W);
5753}
5754
5755static __inline__ __m512d __DEFAULT_FN_ATTRS
5756_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5757{
5758  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5759                                              (__v8df)_mm512_movedup_pd(__A),
5760                                              (__v8df)_mm512_setzero_pd());
5761}
5762
5763#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \
5764  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5765                                             (__v8df)(__m512d)(B), \
5766                                             (__v8di)(__m512i)(C), (int)(imm), \
5767                                             (__mmask8)-1, (int)(R)); })
5768
5769#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \
5770  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5771                                             (__v8df)(__m512d)(B), \
5772                                             (__v8di)(__m512i)(C), (int)(imm), \
5773                                             (__mmask8)(U), (int)(R)); })
5774
5775#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5776  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5777                                             (__v8df)(__m512d)(B), \
5778                                             (__v8di)(__m512i)(C), (int)(imm), \
5779                                             (__mmask8)-1, \
5780                                             _MM_FROUND_CUR_DIRECTION); })
5781
5782#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5783  (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5784                                             (__v8df)(__m512d)(B), \
5785                                             (__v8di)(__m512i)(C), (int)(imm), \
5786                                             (__mmask8)(U), \
5787                                             _MM_FROUND_CUR_DIRECTION); })
5788
5789#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \
5790  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5791                                              (__v8df)(__m512d)(B), \
5792                                              (__v8di)(__m512i)(C), \
5793                                              (int)(imm), (__mmask8)(U), \
5794                                              (int)(R)); })
5795
5796#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5797  (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5798                                              (__v8df)(__m512d)(B), \
5799                                              (__v8di)(__m512i)(C), \
5800                                              (int)(imm), (__mmask8)(U), \
5801                                              _MM_FROUND_CUR_DIRECTION); })
5802
5803#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \
5804  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5805                                            (__v16sf)(__m512)(B), \
5806                                            (__v16si)(__m512i)(C), (int)(imm), \
5807                                            (__mmask16)-1, (int)(R)); })
5808
5809#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \
5810  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5811                                            (__v16sf)(__m512)(B), \
5812                                            (__v16si)(__m512i)(C), (int)(imm), \
5813                                            (__mmask16)(U), (int)(R)); })
5814
5815#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5816  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5817                                            (__v16sf)(__m512)(B), \
5818                                            (__v16si)(__m512i)(C), (int)(imm), \
5819                                            (__mmask16)-1, \
5820                                            _MM_FROUND_CUR_DIRECTION); })
5821
5822#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5823  (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5824                                            (__v16sf)(__m512)(B), \
5825                                            (__v16si)(__m512i)(C), (int)(imm), \
5826                                            (__mmask16)(U), \
5827                                            _MM_FROUND_CUR_DIRECTION); })
5828
5829#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \
5830  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5831                                             (__v16sf)(__m512)(B), \
5832                                             (__v16si)(__m512i)(C), \
5833                                             (int)(imm), (__mmask16)(U), \
5834                                             (int)(R)); })
5835
5836#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5837  (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5838                                             (__v16sf)(__m512)(B), \
5839                                             (__v16si)(__m512i)(C), \
5840                                             (int)(imm), (__mmask16)(U), \
5841                                             _MM_FROUND_CUR_DIRECTION); })
5842
5843#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \
5844  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5845                                          (__v2df)(__m128d)(B), \
5846                                          (__v2di)(__m128i)(C), (int)(imm), \
5847                                          (__mmask8)-1, (int)(R)); })
5848
5849#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \
5850  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5851                                          (__v2df)(__m128d)(B), \
5852                                          (__v2di)(__m128i)(C), (int)(imm), \
5853                                          (__mmask8)(U), (int)(R)); })
5854
5855#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \
5856  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5857                                          (__v2df)(__m128d)(B), \
5858                                          (__v2di)(__m128i)(C), (int)(imm), \
5859                                          (__mmask8)-1, \
5860                                          _MM_FROUND_CUR_DIRECTION); })
5861
5862#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \
5863  (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5864                                          (__v2df)(__m128d)(B), \
5865                                          (__v2di)(__m128i)(C), (int)(imm), \
5866                                          (__mmask8)(U), \
5867                                          _MM_FROUND_CUR_DIRECTION); })
5868
5869#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \
5870  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5871                                           (__v2df)(__m128d)(B), \
5872                                           (__v2di)(__m128i)(C), (int)(imm), \
5873                                           (__mmask8)(U), (int)(R)); })
5874
5875#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \
5876  (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5877                                           (__v2df)(__m128d)(B), \
5878                                           (__v2di)(__m128i)(C), (int)(imm), \
5879                                           (__mmask8)(U), \
5880                                           _MM_FROUND_CUR_DIRECTION); })
5881
5882#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \
5883  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5884                                         (__v4sf)(__m128)(B), \
5885                                         (__v4si)(__m128i)(C), (int)(imm), \
5886                                         (__mmask8)-1, (int)(R)); })
5887
5888#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \
5889  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5890                                         (__v4sf)(__m128)(B), \
5891                                         (__v4si)(__m128i)(C), (int)(imm), \
5892                                         (__mmask8)(U), (int)(R)); })
5893
5894#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \
5895  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5896                                         (__v4sf)(__m128)(B), \
5897                                         (__v4si)(__m128i)(C), (int)(imm), \
5898                                         (__mmask8)-1, \
5899                                         _MM_FROUND_CUR_DIRECTION); })
5900
5901#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \
5902  (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5903                                         (__v4sf)(__m128)(B), \
5904                                         (__v4si)(__m128i)(C), (int)(imm), \
5905                                         (__mmask8)(U), \
5906                                         _MM_FROUND_CUR_DIRECTION); })
5907
5908#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \
5909  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5910                                          (__v4sf)(__m128)(B), \
5911                                          (__v4si)(__m128i)(C), (int)(imm), \
5912                                          (__mmask8)(U), (int)(R)); })
5913
5914#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \
5915  (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5916                                          (__v4sf)(__m128)(B), \
5917                                          (__v4si)(__m128i)(C), (int)(imm), \
5918                                          (__mmask8)(U), \
5919                                          _MM_FROUND_CUR_DIRECTION); })
5920
5921#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \
5922  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5923                                                 (__v2df)(__m128d)(B), \
5924                                                 (__v2df)_mm_setzero_pd(), \
5925                                                 (__mmask8)-1, (int)(R)); })
5926
5927
5928static __inline__ __m128d __DEFAULT_FN_ATTRS
5929_mm_getexp_sd (__m128d __A, __m128d __B)
5930{
5931  return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5932                 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5933}
5934
5935static __inline__ __m128d __DEFAULT_FN_ATTRS
5936_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5937{
5938 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5939          (__v2df) __B,
5940          (__v2df) __W,
5941          (__mmask8) __U,
5942          _MM_FROUND_CUR_DIRECTION);
5943}
5944
5945#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\
5946  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5947                                                 (__v2df)(__m128d)(B), \
5948                                                 (__v2df)(__m128d)(W), \
5949                                                 (__mmask8)(U), (int)(R)); })
5950
5951static __inline__ __m128d __DEFAULT_FN_ATTRS
5952_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5953{
5954 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5955          (__v2df) __B,
5956          (__v2df) _mm_setzero_pd (),
5957          (__mmask8) __U,
5958          _MM_FROUND_CUR_DIRECTION);
5959}
5960
5961#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\
5962  (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5963                                                 (__v2df)(__m128d)(B), \
5964                                                 (__v2df)_mm_setzero_pd(), \
5965                                                 (__mmask8)(U), (int)(R)); })
5966
5967#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \
5968  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5969                                                (__v4sf)(__m128)(B), \
5970                                                (__v4sf)_mm_setzero_ps(), \
5971                                                (__mmask8)-1, (int)(R)); })
5972
5973static __inline__ __m128 __DEFAULT_FN_ATTRS
5974_mm_getexp_ss (__m128 __A, __m128 __B)
5975{
5976  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5977                (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5978}
5979
5980static __inline__ __m128 __DEFAULT_FN_ATTRS
5981_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5982{
5983 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5984          (__v4sf) __B,
5985          (__v4sf) __W,
5986          (__mmask8) __U,
5987          _MM_FROUND_CUR_DIRECTION);
5988}
5989
5990#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\
5991  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5992                                                (__v4sf)(__m128)(B), \
5993                                                (__v4sf)(__m128)(W), \
5994                                                (__mmask8)(U), (int)(R)); })
5995
5996static __inline__ __m128 __DEFAULT_FN_ATTRS
5997_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5998{
5999 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
6000          (__v4sf) __B,
6001          (__v4sf) _mm_setzero_pd (),
6002          (__mmask8) __U,
6003          _MM_FROUND_CUR_DIRECTION);
6004}
6005
6006#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\
6007  (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
6008                                                (__v4sf)(__m128)(B), \
6009                                                (__v4sf)_mm_setzero_ps(), \
6010                                                (__mmask8)(U), (int)(R)); })
6011
6012#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \
6013  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6014                                               (__v2df)(__m128d)(B), \
6015                                               (int)(((D)<<2) | (C)), \
6016                                               (__v2df)_mm_setzero_pd(), \
6017                                               (__mmask8)-1, (int)(R)); })
6018
6019#define _mm_getmant_sd(A, B, C, D)  __extension__ ({ \
6020  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6021                                               (__v2df)(__m128d)(B), \
6022                                               (int)(((D)<<2) | (C)), \
6023                                               (__v2df)_mm_setzero_pd(), \
6024                                               (__mmask8)-1, \
6025                                               _MM_FROUND_CUR_DIRECTION); })
6026
6027#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\
6028  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6029                                               (__v2df)(__m128d)(B), \
6030                                               (int)(((D)<<2) | (C)), \
6031                                               (__v2df)(__m128d)(W), \
6032                                               (__mmask8)(U), \
6033                                               _MM_FROUND_CUR_DIRECTION); })
6034
6035#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\
6036  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6037                                               (__v2df)(__m128d)(B), \
6038                                               (int)(((D)<<2) | (C)), \
6039                                               (__v2df)(__m128d)(W), \
6040                                               (__mmask8)(U), (int)(R)); })
6041
6042#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\
6043  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6044                                               (__v2df)(__m128d)(B), \
6045                                               (int)(((D)<<2) | (C)), \
6046                                               (__v2df)_mm_setzero_pd(), \
6047                                               (__mmask8)(U), \
6048                                               _MM_FROUND_CUR_DIRECTION); })
6049
6050#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\
6051  (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
6052                                               (__v2df)(__m128d)(B), \
6053                                               (int)(((D)<<2) | (C)), \
6054                                               (__v2df)_mm_setzero_pd(), \
6055                                               (__mmask8)(U), (int)(R)); })
6056
6057#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \
6058  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6059                                              (__v4sf)(__m128)(B), \
6060                                              (int)(((D)<<2) | (C)), \
6061                                              (__v4sf)_mm_setzero_ps(), \
6062                                              (__mmask8)-1, (int)(R)); })
6063
6064#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \
6065  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6066                                              (__v4sf)(__m128)(B), \
6067                                              (int)(((D)<<2) | (C)), \
6068                                              (__v4sf)_mm_setzero_ps(), \
6069                                              (__mmask8)-1, \
6070                                              _MM_FROUND_CUR_DIRECTION); })
6071
6072#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\
6073  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6074                                              (__v4sf)(__m128)(B), \
6075                                              (int)(((D)<<2) | (C)), \
6076                                              (__v4sf)(__m128)(W), \
6077                                              (__mmask8)(U), \
6078                                              _MM_FROUND_CUR_DIRECTION); })
6079
6080#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\
6081  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6082                                              (__v4sf)(__m128)(B), \
6083                                              (int)(((D)<<2) | (C)), \
6084                                              (__v4sf)(__m128)(W), \
6085                                              (__mmask8)(U), (int)(R)); })
6086
6087#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\
6088  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6089                                              (__v4sf)(__m128)(B), \
6090                                              (int)(((D)<<2) | (C)), \
6091                                              (__v4sf)_mm_setzero_pd(), \
6092                                              (__mmask8)(U), \
6093                                              _MM_FROUND_CUR_DIRECTION); })
6094
6095#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\
6096  (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
6097                                              (__v4sf)(__m128)(B), \
6098                                              (int)(((D)<<2) | (C)), \
6099                                              (__v4sf)_mm_setzero_ps(), \
6100                                              (__mmask8)(U), (int)(R)); })
6101
6102static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6103_mm512_kmov (__mmask16 __A)
6104{
6105  return  __A;
6106}
6107
6108#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\
6109  (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
6110                              (int)(P), (int)(R)); })
6111
6112#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\
6113  (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
6114                              (int)(P), (int)(R)); })
6115
6116#ifdef __x86_64__
6117#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \
6118  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6119#endif
6120
6121static __inline__ __m512i __DEFAULT_FN_ATTRS
6122_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
6123         __mmask16 __U, __m512i __B)
6124{
6125  return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
6126                   (__v16si) __I
6127                   /* idx */ ,
6128                   (__v16si) __B,
6129                   (__mmask16) __U);
6130}
6131
6132static __inline__ __m512i __DEFAULT_FN_ATTRS
6133_mm512_sll_epi32(__m512i __A, __m128i __B)
6134{
6135  return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
6136}
6137
6138static __inline__ __m512i __DEFAULT_FN_ATTRS
6139_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6140{
6141  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6142                                          (__v16si)_mm512_sll_epi32(__A, __B),
6143                                          (__v16si)__W);
6144}
6145
6146static __inline__ __m512i __DEFAULT_FN_ATTRS
6147_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6148{
6149  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6150                                          (__v16si)_mm512_sll_epi32(__A, __B),
6151                                          (__v16si)_mm512_setzero_si512());
6152}
6153
6154static __inline__ __m512i __DEFAULT_FN_ATTRS
6155_mm512_sll_epi64(__m512i __A, __m128i __B)
6156{
6157  return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
6158}
6159
6160static __inline__ __m512i __DEFAULT_FN_ATTRS
6161_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6162{
6163  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6164                                             (__v8di)_mm512_sll_epi64(__A, __B),
6165                                             (__v8di)__W);
6166}
6167
6168static __inline__ __m512i __DEFAULT_FN_ATTRS
6169_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6170{
6171  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6172                                           (__v8di)_mm512_sll_epi64(__A, __B),
6173                                           (__v8di)_mm512_setzero_si512());
6174}
6175
6176static __inline__ __m512i __DEFAULT_FN_ATTRS
6177_mm512_sllv_epi32(__m512i __X, __m512i __Y)
6178{
6179  return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
6180}
6181
6182static __inline__ __m512i __DEFAULT_FN_ATTRS
6183_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6184{
6185  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6186                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
6187                                           (__v16si)__W);
6188}
6189
6190static __inline__ __m512i __DEFAULT_FN_ATTRS
6191_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6192{
6193  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6194                                           (__v16si)_mm512_sllv_epi32(__X, __Y),
6195                                           (__v16si)_mm512_setzero_si512());
6196}
6197
6198static __inline__ __m512i __DEFAULT_FN_ATTRS
6199_mm512_sllv_epi64(__m512i __X, __m512i __Y)
6200{
6201  return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
6202}
6203
6204static __inline__ __m512i __DEFAULT_FN_ATTRS
6205_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6206{
6207  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6208                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
6209                                            (__v8di)__W);
6210}
6211
6212static __inline__ __m512i __DEFAULT_FN_ATTRS
6213_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6214{
6215  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6216                                            (__v8di)_mm512_sllv_epi64(__X, __Y),
6217                                            (__v8di)_mm512_setzero_si512());
6218}
6219
6220static __inline__ __m512i __DEFAULT_FN_ATTRS
6221_mm512_sra_epi32(__m512i __A, __m128i __B)
6222{
6223  return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
6224}
6225
6226static __inline__ __m512i __DEFAULT_FN_ATTRS
6227_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6228{
6229  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6230                                          (__v16si)_mm512_sra_epi32(__A, __B),
6231                                          (__v16si)__W);
6232}
6233
6234static __inline__ __m512i __DEFAULT_FN_ATTRS
6235_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6236{
6237  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6238                                          (__v16si)_mm512_sra_epi32(__A, __B),
6239                                          (__v16si)_mm512_setzero_si512());
6240}
6241
6242static __inline__ __m512i __DEFAULT_FN_ATTRS
6243_mm512_sra_epi64(__m512i __A, __m128i __B)
6244{
6245  return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
6246}
6247
6248static __inline__ __m512i __DEFAULT_FN_ATTRS
6249_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6250{
6251  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6252                                           (__v8di)_mm512_sra_epi64(__A, __B),
6253                                           (__v8di)__W);
6254}
6255
6256static __inline__ __m512i __DEFAULT_FN_ATTRS
6257_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6258{
6259  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6260                                           (__v8di)_mm512_sra_epi64(__A, __B),
6261                                           (__v8di)_mm512_setzero_si512());
6262}
6263
6264static __inline__ __m512i __DEFAULT_FN_ATTRS
6265_mm512_srav_epi32(__m512i __X, __m512i __Y)
6266{
6267  return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
6268}
6269
6270static __inline__ __m512i __DEFAULT_FN_ATTRS
6271_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6272{
6273  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6274                                           (__v16si)_mm512_srav_epi32(__X, __Y),
6275                                           (__v16si)__W);
6276}
6277
6278static __inline__ __m512i __DEFAULT_FN_ATTRS
6279_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6280{
6281  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6282                                           (__v16si)_mm512_srav_epi32(__X, __Y),
6283                                           (__v16si)_mm512_setzero_si512());
6284}
6285
6286static __inline__ __m512i __DEFAULT_FN_ATTRS
6287_mm512_srav_epi64(__m512i __X, __m512i __Y)
6288{
6289  return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
6290}
6291
6292static __inline__ __m512i __DEFAULT_FN_ATTRS
6293_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6294{
6295  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6296                                            (__v8di)_mm512_srav_epi64(__X, __Y),
6297                                            (__v8di)__W);
6298}
6299
6300static __inline__ __m512i __DEFAULT_FN_ATTRS
6301_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6302{
6303  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6304                                            (__v8di)_mm512_srav_epi64(__X, __Y),
6305                                            (__v8di)_mm512_setzero_si512());
6306}
6307
6308static __inline__ __m512i __DEFAULT_FN_ATTRS
6309_mm512_srl_epi32(__m512i __A, __m128i __B)
6310{
6311  return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
6312}
6313
6314static __inline__ __m512i __DEFAULT_FN_ATTRS
6315_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
6316{
6317  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6318                                          (__v16si)_mm512_srl_epi32(__A, __B),
6319                                          (__v16si)__W);
6320}
6321
6322static __inline__ __m512i __DEFAULT_FN_ATTRS
6323_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
6324{
6325  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6326                                          (__v16si)_mm512_srl_epi32(__A, __B),
6327                                          (__v16si)_mm512_setzero_si512());
6328}
6329
6330static __inline__ __m512i __DEFAULT_FN_ATTRS
6331_mm512_srl_epi64(__m512i __A, __m128i __B)
6332{
6333  return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
6334}
6335
6336static __inline__ __m512i __DEFAULT_FN_ATTRS
6337_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
6338{
6339  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6340                                           (__v8di)_mm512_srl_epi64(__A, __B),
6341                                           (__v8di)__W);
6342}
6343
6344static __inline__ __m512i __DEFAULT_FN_ATTRS
6345_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
6346{
6347  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6348                                           (__v8di)_mm512_srl_epi64(__A, __B),
6349                                           (__v8di)_mm512_setzero_si512());
6350}
6351
6352static __inline__ __m512i __DEFAULT_FN_ATTRS
6353_mm512_srlv_epi32(__m512i __X, __m512i __Y)
6354{
6355  return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
6356}
6357
6358static __inline__ __m512i __DEFAULT_FN_ATTRS
6359_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
6360{
6361  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6362                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
6363                                           (__v16si)__W);
6364}
6365
6366static __inline__ __m512i __DEFAULT_FN_ATTRS
6367_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
6368{
6369  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6370                                           (__v16si)_mm512_srlv_epi32(__X, __Y),
6371                                           (__v16si)_mm512_setzero_si512());
6372}
6373
6374static __inline__ __m512i __DEFAULT_FN_ATTRS
6375_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
6376{
6377  return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
6378}
6379
6380static __inline__ __m512i __DEFAULT_FN_ATTRS
6381_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
6382{
6383  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6384                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
6385                                            (__v8di)__W);
6386}
6387
6388static __inline__ __m512i __DEFAULT_FN_ATTRS
6389_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
6390{
6391  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6392                                            (__v8di)_mm512_srlv_epi64(__X, __Y),
6393                                            (__v8di)_mm512_setzero_si512());
6394}
6395
6396#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6397  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6398                                            (__v16si)(__m512i)(B), \
6399                                            (__v16si)(__m512i)(C), (int)(imm), \
6400                                            (__mmask16)-1); })
6401
6402#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6403  (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
6404                                            (__v16si)(__m512i)(B), \
6405                                            (__v16si)(__m512i)(C), (int)(imm), \
6406                                            (__mmask16)(U)); })
6407
6408#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6409  (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
6410                                             (__v16si)(__m512i)(B), \
6411                                             (__v16si)(__m512i)(C), \
6412                                             (int)(imm), (__mmask16)(U)); })
6413
6414#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6415  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6416                                            (__v8di)(__m512i)(B), \
6417                                            (__v8di)(__m512i)(C), (int)(imm), \
6418                                            (__mmask8)-1); })
6419
6420#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6421  (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
6422                                            (__v8di)(__m512i)(B), \
6423                                            (__v8di)(__m512i)(C), (int)(imm), \
6424                                            (__mmask8)(U)); })
6425
6426#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6427  (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
6428                                             (__v8di)(__m512i)(B), \
6429                                             (__v8di)(__m512i)(C), (int)(imm), \
6430                                             (__mmask8)(U)); })
6431
6432#ifdef __x86_64__
6433#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \
6434  (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6435#endif
6436
6437#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \
6438  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6439
6440#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \
6441  (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6442
6443#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \
6444  (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6445
6446static __inline__ unsigned __DEFAULT_FN_ATTRS
6447_mm_cvtsd_u32 (__m128d __A)
6448{
6449  return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6450             _MM_FROUND_CUR_DIRECTION);
6451}
6452
6453#ifdef __x86_64__
6454#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \
6455  (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6456                                                  (int)(R)); })
6457
6458static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6459_mm_cvtsd_u64 (__m128d __A)
6460{
6461  return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6462                 __A,
6463                 _MM_FROUND_CUR_DIRECTION);
6464}
6465#endif
6466
6467#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \
6468  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6469
6470#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \
6471  (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); })
6472
6473#ifdef __x86_64__
6474#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \
6475  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6476
6477#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \
6478  (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); })
6479#endif
6480
6481#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \
6482  (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6483
6484static __inline__ unsigned __DEFAULT_FN_ATTRS
6485_mm_cvtss_u32 (__m128 __A)
6486{
6487  return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6488             _MM_FROUND_CUR_DIRECTION);
6489}
6490
6491#ifdef __x86_64__
6492#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \
6493  (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6494                                                  (int)(R)); })
6495
6496static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6497_mm_cvtss_u64 (__m128 __A)
6498{
6499  return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6500                 __A,
6501                 _MM_FROUND_CUR_DIRECTION);
6502}
6503#endif
6504
6505#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \
6506  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6507
6508#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \
6509  (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); })
6510
6511static __inline__ int __DEFAULT_FN_ATTRS
6512_mm_cvttsd_i32 (__m128d __A)
6513{
6514  return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6515              _MM_FROUND_CUR_DIRECTION);
6516}
6517
6518#ifdef __x86_64__
6519#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \
6520  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6521
6522#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \
6523  (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); })
6524
6525static __inline__ long long __DEFAULT_FN_ATTRS
6526_mm_cvttsd_i64 (__m128d __A)
6527{
6528  return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6529              _MM_FROUND_CUR_DIRECTION);
6530}
6531#endif
6532
6533#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \
6534  (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); })
6535
6536static __inline__ unsigned __DEFAULT_FN_ATTRS
6537_mm_cvttsd_u32 (__m128d __A)
6538{
6539  return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6540              _MM_FROUND_CUR_DIRECTION);
6541}
6542
6543#ifdef __x86_64__
6544#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \
6545  (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6546                                                   (int)(R)); })
6547
6548static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6549_mm_cvttsd_u64 (__m128d __A)
6550{
6551  return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6552                  __A,
6553                  _MM_FROUND_CUR_DIRECTION);
6554}
6555#endif
6556
6557#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \
6558  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6559
6560#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \
6561  (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); })
6562
6563static __inline__ int __DEFAULT_FN_ATTRS
6564_mm_cvttss_i32 (__m128 __A)
6565{
6566  return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6567              _MM_FROUND_CUR_DIRECTION);
6568}
6569
6570#ifdef __x86_64__
6571#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \
6572  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6573
6574#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \
6575  (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); })
6576
6577static __inline__ long long __DEFAULT_FN_ATTRS
6578_mm_cvttss_i64 (__m128 __A)
6579{
6580  return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6581              _MM_FROUND_CUR_DIRECTION);
6582}
6583#endif
6584
6585#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \
6586  (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); })
6587
6588static __inline__ unsigned __DEFAULT_FN_ATTRS
6589_mm_cvttss_u32 (__m128 __A)
6590{
6591  return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6592              _MM_FROUND_CUR_DIRECTION);
6593}
6594
6595#ifdef __x86_64__
6596#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \
6597  (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6598                                                   (int)(R)); })
6599
6600static __inline__ unsigned long long __DEFAULT_FN_ATTRS
6601_mm_cvttss_u64 (__m128 __A)
6602{
6603  return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6604                  __A,
6605                  _MM_FROUND_CUR_DIRECTION);
6606}
6607#endif
6608
6609static __inline__ __m512d __DEFAULT_FN_ATTRS
6610_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
6611            __m512d __B)
6612{
6613  return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
6614              (__v8di) __I
6615              /* idx */ ,
6616              (__v8df) __B,
6617              (__mmask8) __U);
6618}
6619
6620static __inline__ __m512 __DEFAULT_FN_ATTRS
6621_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
6622            __m512 __B)
6623{
6624  return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
6625                   (__v16si) __I
6626                   /* idx */ ,
6627                   (__v16sf) __B,
6628                   (__mmask16) __U);
6629}
6630
6631static __inline__ __m512i __DEFAULT_FN_ATTRS
6632_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
6633         __mmask8 __U, __m512i __B)
6634{
6635  return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
6636                   (__v8di) __I
6637                   /* idx */ ,
6638                   (__v8di) __B,
6639                   (__mmask8) __U);
6640}
6641
6642#define _mm512_permute_pd(X, C) __extension__ ({ \
6643  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
6644                                   (__v8df)_mm512_undefined_pd(), \
6645                                   0 + (((C) >> 0) & 0x1), \
6646                                   0 + (((C) >> 1) & 0x1), \
6647                                   2 + (((C) >> 2) & 0x1), \
6648                                   2 + (((C) >> 3) & 0x1), \
6649                                   4 + (((C) >> 4) & 0x1), \
6650                                   4 + (((C) >> 5) & 0x1), \
6651                                   6 + (((C) >> 6) & 0x1), \
6652                                   6 + (((C) >> 7) & 0x1)); })
6653
6654#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \
6655  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6656                                       (__v8df)_mm512_permute_pd((X), (C)), \
6657                                       (__v8df)(__m512d)(W)); })
6658
6659#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \
6660  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6661                                       (__v8df)_mm512_permute_pd((X), (C)), \
6662                                       (__v8df)_mm512_setzero_pd()); })
6663
6664#define _mm512_permute_ps(X, C) __extension__ ({ \
6665  (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
6666                                  (__v16sf)_mm512_undefined_ps(), \
6667                                   0  + (((C) >> 0) & 0x3), \
6668                                   0  + (((C) >> 2) & 0x3), \
6669                                   0  + (((C) >> 4) & 0x3), \
6670                                   0  + (((C) >> 6) & 0x3), \
6671                                   4  + (((C) >> 0) & 0x3), \
6672                                   4  + (((C) >> 2) & 0x3), \
6673                                   4  + (((C) >> 4) & 0x3), \
6674                                   4  + (((C) >> 6) & 0x3), \
6675                                   8  + (((C) >> 0) & 0x3), \
6676                                   8  + (((C) >> 2) & 0x3), \
6677                                   8  + (((C) >> 4) & 0x3), \
6678                                   8  + (((C) >> 6) & 0x3), \
6679                                   12 + (((C) >> 0) & 0x3), \
6680                                   12 + (((C) >> 2) & 0x3), \
6681                                   12 + (((C) >> 4) & 0x3), \
6682                                   12 + (((C) >> 6) & 0x3)); })
6683
6684#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \
6685  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6686                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6687                                      (__v16sf)(__m512)(W)); })
6688
6689#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \
6690  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6691                                      (__v16sf)_mm512_permute_ps((X), (C)), \
6692                                      (__v16sf)_mm512_setzero_ps()); })
6693
6694static __inline__ __m512d __DEFAULT_FN_ATTRS
6695_mm512_permutevar_pd(__m512d __A, __m512i __C)
6696{
6697  return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6698}
6699
6700static __inline__ __m512d __DEFAULT_FN_ATTRS
6701_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6702{
6703  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6704                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6705                                         (__v8df)__W);
6706}
6707
6708static __inline__ __m512d __DEFAULT_FN_ATTRS
6709_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6710{
6711  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6712                                         (__v8df)_mm512_permutevar_pd(__A, __C),
6713                                         (__v8df)_mm512_setzero_pd());
6714}
6715
6716static __inline__ __m512 __DEFAULT_FN_ATTRS
6717_mm512_permutevar_ps(__m512 __A, __m512i __C)
6718{
6719  return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6720}
6721
6722static __inline__ __m512 __DEFAULT_FN_ATTRS
6723_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6724{
6725  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6726                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6727                                        (__v16sf)__W);
6728}
6729
6730static __inline__ __m512 __DEFAULT_FN_ATTRS
6731_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6732{
6733  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6734                                        (__v16sf)_mm512_permutevar_ps(__A, __C),
6735                                        (__v16sf)_mm512_setzero_ps());
6736}
6737
6738static __inline __m512d __DEFAULT_FN_ATTRS
6739_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6740{
6741  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6742                    /* idx */ ,
6743                    (__v8df) __A,
6744                    (__v8df) __B,
6745                    (__mmask8) -1);
6746}
6747
6748static __inline__ __m512d __DEFAULT_FN_ATTRS
6749_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6750{
6751  return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
6752                    /* idx */ ,
6753                    (__v8df) __A,
6754                    (__v8df) __B,
6755                    (__mmask8) __U);
6756}
6757
6758static __inline__ __m512d __DEFAULT_FN_ATTRS
6759_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
6760            __m512d __B)
6761{
6762  return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
6763                                                         /* idx */ ,
6764                                                         (__v8df) __A,
6765                                                         (__v8df) __B,
6766                                                         (__mmask8) __U);
6767}
6768
6769static __inline __m512 __DEFAULT_FN_ATTRS
6770_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6771{
6772  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6773                                                         /* idx */ ,
6774                                                         (__v16sf) __A,
6775                                                         (__v16sf) __B,
6776                                                         (__mmask16) -1);
6777}
6778
6779static __inline__ __m512 __DEFAULT_FN_ATTRS
6780_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6781{
6782  return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
6783                                                         /* idx */ ,
6784                                                         (__v16sf) __A,
6785                                                         (__v16sf) __B,
6786                                                         (__mmask16) __U);
6787}
6788
6789static __inline__ __m512 __DEFAULT_FN_ATTRS
6790_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
6791            __m512 __B)
6792{
6793  return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
6794                                                        /* idx */ ,
6795                                                        (__v16sf) __A,
6796                                                        (__v16sf) __B,
6797                                                        (__mmask16) __U);
6798}
6799
6800static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6801_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
6802{
6803  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6804             (__v16si) __B,
6805             (__mmask16) -1);
6806}
6807
6808static __inline__ __mmask16 __DEFAULT_FN_ATTRS
6809_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
6810{
6811  return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
6812             (__v16si) __B, __U);
6813}
6814
6815static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6816_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
6817{
6818  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6819            (__v8di) __B,
6820            (__mmask8) -1);
6821}
6822
6823static __inline__ __mmask8 __DEFAULT_FN_ATTRS
6824_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
6825{
6826  return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
6827            (__v8di) __B, __U);
6828}
6829
6830#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \
6831  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6832                                             (__v8si)_mm256_undefined_si256(), \
6833                                             (__mmask8)-1, (int)(R)); })
6834
6835#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \
6836  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6837                                             (__v8si)(__m256i)(W), \
6838                                             (__mmask8)(U), (int)(R)); })
6839
6840#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \
6841  (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6842                                             (__v8si)_mm256_setzero_si256(), \
6843                                             (__mmask8)(U), (int)(R)); })
6844
6845static __inline__ __m256i __DEFAULT_FN_ATTRS
6846_mm512_cvttpd_epu32 (__m512d __A)
6847{
6848  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6849                  (__v8si)
6850                  _mm256_undefined_si256 (),
6851                  (__mmask8) -1,
6852                  _MM_FROUND_CUR_DIRECTION);
6853}
6854
6855static __inline__ __m256i __DEFAULT_FN_ATTRS
6856_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6857{
6858  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6859                  (__v8si) __W,
6860                  (__mmask8) __U,
6861                  _MM_FROUND_CUR_DIRECTION);
6862}
6863
6864static __inline__ __m256i __DEFAULT_FN_ATTRS
6865_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6866{
6867  return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6868                  (__v8si)
6869                  _mm256_setzero_si256 (),
6870                  (__mmask8) __U,
6871                  _MM_FROUND_CUR_DIRECTION);
6872}
6873
6874#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \
6875  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6876                                                (__v2df)(__m128d)(B), \
6877                                                (__v2df)_mm_setzero_pd(), \
6878                                                (__mmask8)-1, (int)(imm), \
6879                                                (int)(R)); })
6880
6881#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \
6882  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6883                                                (__v2df)(__m128d)(B), \
6884                                                (__v2df)_mm_setzero_pd(), \
6885                                                (__mmask8)-1, (int)(imm), \
6886                                                _MM_FROUND_CUR_DIRECTION); })
6887
6888#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \
6889  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6890                                                (__v2df)(__m128d)(B), \
6891                                                (__v2df)(__m128d)(W), \
6892                                                (__mmask8)(U), (int)(imm), \
6893                                                _MM_FROUND_CUR_DIRECTION); })
6894
6895#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \
6896  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6897                                                (__v2df)(__m128d)(B), \
6898                                                (__v2df)(__m128d)(W), \
6899                                                (__mmask8)(U), (int)(I), \
6900                                                (int)(R)); })
6901
6902#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \
6903  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6904                                                (__v2df)(__m128d)(B), \
6905                                                (__v2df)_mm_setzero_pd(), \
6906                                                (__mmask8)(U), (int)(I), \
6907                                                _MM_FROUND_CUR_DIRECTION); })
6908
6909#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \
6910  (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6911                                                (__v2df)(__m128d)(B), \
6912                                                (__v2df)_mm_setzero_pd(), \
6913                                                (__mmask8)(U), (int)(I), \
6914                                                (int)(R)); })
6915
6916#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \
6917  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6918                                               (__v4sf)(__m128)(B), \
6919                                               (__v4sf)_mm_setzero_ps(), \
6920                                               (__mmask8)-1, (int)(imm), \
6921                                               (int)(R)); })
6922
6923#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \
6924  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6925                                               (__v4sf)(__m128)(B), \
6926                                               (__v4sf)_mm_setzero_ps(), \
6927                                               (__mmask8)-1, (int)(imm), \
6928                                               _MM_FROUND_CUR_DIRECTION); })
6929
6930#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \
6931  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6932                                               (__v4sf)(__m128)(B), \
6933                                               (__v4sf)(__m128)(W), \
6934                                               (__mmask8)(U), (int)(I), \
6935                                               _MM_FROUND_CUR_DIRECTION); })
6936
6937#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \
6938  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6939                                               (__v4sf)(__m128)(B), \
6940                                               (__v4sf)(__m128)(W), \
6941                                               (__mmask8)(U), (int)(I), \
6942                                               (int)(R)); })
6943
6944#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \
6945  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6946                                               (__v4sf)(__m128)(B), \
6947                                               (__v4sf)_mm_setzero_ps(), \
6948                                               (__mmask8)(U), (int)(I), \
6949                                               _MM_FROUND_CUR_DIRECTION); })
6950
6951#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \
6952  (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6953                                               (__v4sf)(__m128)(B), \
6954                                               (__v4sf)_mm_setzero_ps(), \
6955                                               (__mmask8)(U), (int)(I), \
6956                                               (int)(R)); })
6957
6958#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \
6959  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6960                                           (__v8df)(__m512d)(B), \
6961                                           (__v8df)_mm512_undefined_pd(), \
6962                                           (__mmask8)-1, (int)(R)); })
6963
6964#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \
6965  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6966                                           (__v8df)(__m512d)(B), \
6967                                           (__v8df)(__m512d)(W), \
6968                                           (__mmask8)(U), (int)(R)); })
6969
6970#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \
6971  (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6972                                           (__v8df)(__m512d)(B), \
6973                                           (__v8df)_mm512_setzero_pd(), \
6974                                           (__mmask8)(U), (int)(R)); })
6975
6976static __inline__ __m512d __DEFAULT_FN_ATTRS
6977_mm512_scalef_pd (__m512d __A, __m512d __B)
6978{
6979  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6980                (__v8df) __B,
6981                (__v8df)
6982                _mm512_undefined_pd (),
6983                (__mmask8) -1,
6984                _MM_FROUND_CUR_DIRECTION);
6985}
6986
6987static __inline__ __m512d __DEFAULT_FN_ATTRS
6988_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6989{
6990  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6991                (__v8df) __B,
6992                (__v8df) __W,
6993                (__mmask8) __U,
6994                _MM_FROUND_CUR_DIRECTION);
6995}
6996
6997static __inline__ __m512d __DEFAULT_FN_ATTRS
6998_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6999{
7000  return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
7001                (__v8df) __B,
7002                (__v8df)
7003                _mm512_setzero_pd (),
7004                (__mmask8) __U,
7005                _MM_FROUND_CUR_DIRECTION);
7006}
7007
7008#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \
7009  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7010                                          (__v16sf)(__m512)(B), \
7011                                          (__v16sf)_mm512_undefined_ps(), \
7012                                          (__mmask16)-1, (int)(R)); })
7013
7014#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \
7015  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7016                                          (__v16sf)(__m512)(B), \
7017                                          (__v16sf)(__m512)(W), \
7018                                          (__mmask16)(U), (int)(R)); })
7019
7020#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \
7021  (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
7022                                          (__v16sf)(__m512)(B), \
7023                                          (__v16sf)_mm512_setzero_ps(), \
7024                                          (__mmask16)(U), (int)(R)); })
7025
7026static __inline__ __m512 __DEFAULT_FN_ATTRS
7027_mm512_scalef_ps (__m512 __A, __m512 __B)
7028{
7029  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7030               (__v16sf) __B,
7031               (__v16sf)
7032               _mm512_undefined_ps (),
7033               (__mmask16) -1,
7034               _MM_FROUND_CUR_DIRECTION);
7035}
7036
7037static __inline__ __m512 __DEFAULT_FN_ATTRS
7038_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
7039{
7040  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7041               (__v16sf) __B,
7042               (__v16sf) __W,
7043               (__mmask16) __U,
7044               _MM_FROUND_CUR_DIRECTION);
7045}
7046
7047static __inline__ __m512 __DEFAULT_FN_ATTRS
7048_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
7049{
7050  return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
7051               (__v16sf) __B,
7052               (__v16sf)
7053               _mm512_setzero_ps (),
7054               (__mmask16) __U,
7055               _MM_FROUND_CUR_DIRECTION);
7056}
7057
7058#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \
7059  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7060                                              (__v2df)(__m128d)(B), \
7061                                              (__v2df)_mm_setzero_pd(), \
7062                                              (__mmask8)-1, (int)(R)); })
7063
7064static __inline__ __m128d __DEFAULT_FN_ATTRS
7065_mm_scalef_sd (__m128d __A, __m128d __B)
7066{
7067  return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
7068              (__v2df)( __B), (__v2df) _mm_setzero_pd(),
7069              (__mmask8) -1,
7070              _MM_FROUND_CUR_DIRECTION);
7071}
7072
7073static __inline__ __m128d __DEFAULT_FN_ATTRS
7074_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7075{
7076 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7077                 (__v2df) __B,
7078                (__v2df) __W,
7079                (__mmask8) __U,
7080                _MM_FROUND_CUR_DIRECTION);
7081}
7082
7083#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \
7084  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7085                                              (__v2df)(__m128d)(B), \
7086                                              (__v2df)(__m128d)(W), \
7087                                              (__mmask8)(U), (int)(R)); })
7088
7089static __inline__ __m128d __DEFAULT_FN_ATTRS
7090_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
7091{
7092 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
7093                 (__v2df) __B,
7094                (__v2df) _mm_setzero_pd (),
7095                (__mmask8) __U,
7096                _MM_FROUND_CUR_DIRECTION);
7097}
7098
7099#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \
7100  (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
7101                                              (__v2df)(__m128d)(B), \
7102                                              (__v2df)_mm_setzero_pd(), \
7103                                              (__mmask8)(U), (int)(R)); })
7104
7105#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \
7106  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7107                                             (__v4sf)(__m128)(B), \
7108                                             (__v4sf)_mm_setzero_ps(), \
7109                                             (__mmask8)-1, (int)(R)); })
7110
7111static __inline__ __m128 __DEFAULT_FN_ATTRS
7112_mm_scalef_ss (__m128 __A, __m128 __B)
7113{
7114  return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
7115             (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
7116             (__mmask8) -1,
7117             _MM_FROUND_CUR_DIRECTION);
7118}
7119
7120static __inline__ __m128 __DEFAULT_FN_ATTRS
7121_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7122{
7123 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7124                (__v4sf) __B,
7125                (__v4sf) __W,
7126                (__mmask8) __U,
7127                _MM_FROUND_CUR_DIRECTION);
7128}
7129
7130#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \
7131  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7132                                             (__v4sf)(__m128)(B), \
7133                                             (__v4sf)(__m128)(W), \
7134                                             (__mmask8)(U), (int)(R)); })
7135
7136static __inline__ __m128 __DEFAULT_FN_ATTRS
7137_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
7138{
7139 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
7140                 (__v4sf) __B,
7141                (__v4sf) _mm_setzero_ps (),
7142                (__mmask8) __U,
7143                _MM_FROUND_CUR_DIRECTION);
7144}
7145
7146#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \
7147  (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
7148                                             (__v4sf)(__m128)(B), \
7149                                             (__v4sf)_mm_setzero_ps(), \
7150                                             (__mmask8)(U), \
7151                                             _MM_FROUND_CUR_DIRECTION); })
7152
7153static __inline__ __m512i __DEFAULT_FN_ATTRS
7154_mm512_srai_epi32(__m512i __A, int __B)
7155{
7156  return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
7157}
7158
7159static __inline__ __m512i __DEFAULT_FN_ATTRS
7160_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
7161{
7162  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7163                                         (__v16si)_mm512_srai_epi32(__A, __B), \
7164                                         (__v16si)__W);
7165}
7166
7167static __inline__ __m512i __DEFAULT_FN_ATTRS
7168_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
7169  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
7170                                         (__v16si)_mm512_srai_epi32(__A, __B), \
7171                                         (__v16si)_mm512_setzero_si512());
7172}
7173
7174static __inline__ __m512i __DEFAULT_FN_ATTRS
7175_mm512_srai_epi64(__m512i __A, int __B)
7176{
7177  return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
7178}
7179
7180static __inline__ __m512i __DEFAULT_FN_ATTRS
7181_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
7182{
7183  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7184                                          (__v8di)_mm512_srai_epi64(__A, __B), \
7185                                          (__v8di)__W);
7186}
7187
7188static __inline__ __m512i __DEFAULT_FN_ATTRS
7189_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
7190{
7191  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
7192                                          (__v8di)_mm512_srai_epi64(__A, __B), \
7193                                          (__v8di)_mm512_setzero_si512());
7194}
7195
7196#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
7197  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7198                                         (__v16sf)(__m512)(B), (int)(imm), \
7199                                         (__v16sf)_mm512_undefined_ps(), \
7200                                         (__mmask16)-1); })
7201
7202#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
7203  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7204                                         (__v16sf)(__m512)(B), (int)(imm), \
7205                                         (__v16sf)(__m512)(W), \
7206                                         (__mmask16)(U)); })
7207
7208#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
7209  (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
7210                                         (__v16sf)(__m512)(B), (int)(imm), \
7211                                         (__v16sf)_mm512_setzero_ps(), \
7212                                         (__mmask16)(U)); })
7213
7214#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
7215  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7216                                          (__v8df)(__m512d)(B), (int)(imm), \
7217                                          (__v8df)_mm512_undefined_pd(), \
7218                                          (__mmask8)-1); })
7219
7220#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
7221  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7222                                          (__v8df)(__m512d)(B), (int)(imm), \
7223                                          (__v8df)(__m512d)(W), \
7224                                          (__mmask8)(U)); })
7225
7226#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
7227  (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
7228                                          (__v8df)(__m512d)(B), (int)(imm), \
7229                                          (__v8df)_mm512_setzero_pd(), \
7230                                          (__mmask8)(U)); })
7231
7232#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
7233  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7234                                          (__v16si)(__m512i)(B), (int)(imm), \
7235                                          (__v16si)_mm512_setzero_si512(), \
7236                                          (__mmask16)-1); })
7237
7238#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7239  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7240                                          (__v16si)(__m512i)(B), (int)(imm), \
7241                                          (__v16si)(__m512i)(W), \
7242                                          (__mmask16)(U)); })
7243
7244#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7245  (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
7246                                          (__v16si)(__m512i)(B), (int)(imm), \
7247                                          (__v16si)_mm512_setzero_si512(), \
7248                                          (__mmask16)(U)); })
7249
7250#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
7251  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7252                                          (__v8di)(__m512i)(B), (int)(imm), \
7253                                          (__v8di)_mm512_setzero_si512(), \
7254                                          (__mmask8)-1); })
7255
7256#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7257  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7258                                          (__v8di)(__m512i)(B), (int)(imm), \
7259                                          (__v8di)(__m512i)(W), \
7260                                          (__mmask8)(U)); })
7261
7262#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7263  (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
7264                                          (__v8di)(__m512i)(B), (int)(imm), \
7265                                          (__v8di)_mm512_setzero_si512(), \
7266                                          (__mmask8)(U)); })
7267
7268#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
7269  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7270                                   (__v8df)(__m512d)(B), \
7271                                   0  + (((M) >> 0) & 0x1), \
7272                                   8  + (((M) >> 1) & 0x1), \
7273                                   2  + (((M) >> 2) & 0x1), \
7274                                   10 + (((M) >> 3) & 0x1), \
7275                                   4  + (((M) >> 4) & 0x1), \
7276                                   12 + (((M) >> 5) & 0x1), \
7277                                   6  + (((M) >> 6) & 0x1), \
7278                                   14 + (((M) >> 7) & 0x1)); })
7279
7280#define _mm512_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7281  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7282                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7283                                       (__v8df)(__m512d)(W)); })
7284
7285#define _mm512_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7286  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7287                                       (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
7288                                       (__v8df)_mm512_setzero_pd()); })
7289
7290#define _mm512_shuffle_ps(A, B, M) __extension__ ({ \
7291  (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \
7292                                   (__v16sf)(__m512)(B), \
7293                                   0  + (((M) >> 0) & 0x3), \
7294                                   0  + (((M) >> 2) & 0x3), \
7295                                   16 + (((M) >> 4) & 0x3), \
7296                                   16 + (((M) >> 6) & 0x3), \
7297                                   4  + (((M) >> 0) & 0x3), \
7298                                   4  + (((M) >> 2) & 0x3), \
7299                                   20 + (((M) >> 4) & 0x3), \
7300                                   20 + (((M) >> 6) & 0x3), \
7301                                   8  + (((M) >> 0) & 0x3), \
7302                                   8  + (((M) >> 2) & 0x3), \
7303                                   24 + (((M) >> 4) & 0x3), \
7304                                   24 + (((M) >> 6) & 0x3), \
7305                                   12 + (((M) >> 0) & 0x3), \
7306                                   12 + (((M) >> 2) & 0x3), \
7307                                   28 + (((M) >> 4) & 0x3), \
7308                                   28 + (((M) >> 6) & 0x3)); })
7309
7310#define _mm512_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7311  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7312                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7313                                      (__v16sf)(__m512)(W)); })
7314
7315#define _mm512_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7316  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7317                                      (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
7318                                      (__v16sf)_mm512_setzero_ps()); })
7319
7320#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \
7321  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7322                                            (__v2df)(__m128d)(B), \
7323                                            (__v2df)_mm_setzero_pd(), \
7324                                            (__mmask8)-1, (int)(R)); })
7325
7326static __inline__ __m128d __DEFAULT_FN_ATTRS
7327_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7328{
7329 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7330                 (__v2df) __B,
7331                (__v2df) __W,
7332                (__mmask8) __U,
7333                _MM_FROUND_CUR_DIRECTION);
7334}
7335
7336#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \
7337  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7338                                            (__v2df)(__m128d)(B), \
7339                                            (__v2df)(__m128d)(W), \
7340                                            (__mmask8)(U), (int)(R)); })
7341
7342static __inline__ __m128d __DEFAULT_FN_ATTRS
7343_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
7344{
7345 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
7346                 (__v2df) __B,
7347                (__v2df) _mm_setzero_pd (),
7348                (__mmask8) __U,
7349                _MM_FROUND_CUR_DIRECTION);
7350}
7351
7352#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \
7353  (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
7354                                            (__v2df)(__m128d)(B), \
7355                                            (__v2df)_mm_setzero_pd(), \
7356                                            (__mmask8)(U), (int)(R)); })
7357
7358#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \
7359  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7360                                           (__v4sf)(__m128)(B), \
7361                                           (__v4sf)_mm_setzero_ps(), \
7362                                           (__mmask8)-1, (int)(R)); })
7363
7364static __inline__ __m128 __DEFAULT_FN_ATTRS
7365_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7366{
7367 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7368                 (__v4sf) __B,
7369                (__v4sf) __W,
7370                (__mmask8) __U,
7371                _MM_FROUND_CUR_DIRECTION);
7372}
7373
7374#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \
7375  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7376                                           (__v4sf)(__m128)(B), \
7377                                           (__v4sf)(__m128)(W), (__mmask8)(U), \
7378                                           (int)(R)); })
7379
7380static __inline__ __m128 __DEFAULT_FN_ATTRS
7381_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
7382{
7383 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
7384                 (__v4sf) __B,
7385                (__v4sf) _mm_setzero_ps (),
7386                (__mmask8) __U,
7387                _MM_FROUND_CUR_DIRECTION);
7388}
7389
7390#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \
7391  (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
7392                                           (__v4sf)(__m128)(B), \
7393                                           (__v4sf)_mm_setzero_ps(), \
7394                                           (__mmask8)(U), (int)(R)); })
7395
7396static __inline__ __m512 __DEFAULT_FN_ATTRS
7397_mm512_broadcast_f32x4(__m128 __A)
7398{
7399  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7400                                         0, 1, 2, 3, 0, 1, 2, 3,
7401                                         0, 1, 2, 3, 0, 1, 2, 3);
7402}
7403
7404static __inline__ __m512 __DEFAULT_FN_ATTRS
7405_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
7406{
7407  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7408                                           (__v16sf)_mm512_broadcast_f32x4(__A),
7409                                           (__v16sf)__O);
7410}
7411
7412static __inline__ __m512 __DEFAULT_FN_ATTRS
7413_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
7414{
7415  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
7416                                           (__v16sf)_mm512_broadcast_f32x4(__A),
7417                                           (__v16sf)_mm512_setzero_ps());
7418}
7419
7420static __inline__ __m512d __DEFAULT_FN_ATTRS
7421_mm512_broadcast_f64x4(__m256d __A)
7422{
7423  return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
7424                                          0, 1, 2, 3, 0, 1, 2, 3);
7425}
7426
7427static __inline__ __m512d __DEFAULT_FN_ATTRS
7428_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
7429{
7430  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7431                                            (__v8df)_mm512_broadcast_f64x4(__A),
7432                                            (__v8df)__O);
7433}
7434
7435static __inline__ __m512d __DEFAULT_FN_ATTRS
7436_mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
7437{
7438  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
7439                                            (__v8df)_mm512_broadcast_f64x4(__A),
7440                                            (__v8df)_mm512_setzero_pd());
7441}
7442
7443static __inline__ __m512i __DEFAULT_FN_ATTRS
7444_mm512_broadcast_i32x4(__m128i __A)
7445{
7446  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7447                                          0, 1, 2, 3, 0, 1, 2, 3,
7448                                          0, 1, 2, 3, 0, 1, 2, 3);
7449}
7450
7451static __inline__ __m512i __DEFAULT_FN_ATTRS
7452_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
7453{
7454  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7455                                           (__v16si)_mm512_broadcast_i32x4(__A),
7456                                           (__v16si)__O);
7457}
7458
7459static __inline__ __m512i __DEFAULT_FN_ATTRS
7460_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
7461{
7462  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
7463                                           (__v16si)_mm512_broadcast_i32x4(__A),
7464                                           (__v16si)_mm512_setzero_si512());
7465}
7466
7467static __inline__ __m512i __DEFAULT_FN_ATTRS
7468_mm512_broadcast_i64x4(__m256i __A)
7469{
7470  return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
7471                                          0, 1, 2, 3, 0, 1, 2, 3);
7472}
7473
7474static __inline__ __m512i __DEFAULT_FN_ATTRS
7475_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
7476{
7477  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7478                                            (__v8di)_mm512_broadcast_i64x4(__A),
7479                                            (__v8di)__O);
7480}
7481
7482static __inline__ __m512i __DEFAULT_FN_ATTRS
7483_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
7484{
7485  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
7486                                            (__v8di)_mm512_broadcast_i64x4(__A),
7487                                            (__v8di)_mm512_setzero_si512());
7488}
7489
7490static __inline__ __m512d __DEFAULT_FN_ATTRS
7491_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7492{
7493  return (__m512d)__builtin_ia32_selectpd_512(__M,
7494                                              (__v8df) _mm512_broadcastsd_pd(__A),
7495                                              (__v8df) __O);
7496}
7497
7498static __inline__ __m512d __DEFAULT_FN_ATTRS
7499_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7500{
7501  return (__m512d)__builtin_ia32_selectpd_512(__M,
7502                                              (__v8df) _mm512_broadcastsd_pd(__A),
7503                                              (__v8df) _mm512_setzero_pd());
7504}
7505
7506static __inline__ __m512 __DEFAULT_FN_ATTRS
7507_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7508{
7509  return (__m512)__builtin_ia32_selectps_512(__M,
7510                                             (__v16sf) _mm512_broadcastss_ps(__A),
7511                                             (__v16sf) __O);
7512}
7513
7514static __inline__ __m512 __DEFAULT_FN_ATTRS
7515_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7516{
7517  return (__m512)__builtin_ia32_selectps_512(__M,
7518                                             (__v16sf) _mm512_broadcastss_ps(__A),
7519                                             (__v16sf) _mm512_setzero_ps());
7520}
7521
7522static __inline__ __m128i __DEFAULT_FN_ATTRS
7523_mm512_cvtsepi32_epi8 (__m512i __A)
7524{
7525  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7526               (__v16qi) _mm_undefined_si128 (),
7527               (__mmask16) -1);
7528}
7529
7530static __inline__ __m128i __DEFAULT_FN_ATTRS
7531_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7532{
7533  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7534               (__v16qi) __O, __M);
7535}
7536
7537static __inline__ __m128i __DEFAULT_FN_ATTRS
7538_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
7539{
7540  return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
7541               (__v16qi) _mm_setzero_si128 (),
7542               __M);
7543}
7544
7545static __inline__ void __DEFAULT_FN_ATTRS
7546_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7547{
7548  __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7549}
7550
7551static __inline__ __m256i __DEFAULT_FN_ATTRS
7552_mm512_cvtsepi32_epi16 (__m512i __A)
7553{
7554  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7555               (__v16hi) _mm256_undefined_si256 (),
7556               (__mmask16) -1);
7557}
7558
7559static __inline__ __m256i __DEFAULT_FN_ATTRS
7560_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7561{
7562  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7563               (__v16hi) __O, __M);
7564}
7565
7566static __inline__ __m256i __DEFAULT_FN_ATTRS
7567_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
7568{
7569  return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7570               (__v16hi) _mm256_setzero_si256 (),
7571               __M);
7572}
7573
7574static __inline__ void __DEFAULT_FN_ATTRS
7575_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7576{
7577  __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7578}
7579
7580static __inline__ __m128i __DEFAULT_FN_ATTRS
7581_mm512_cvtsepi64_epi8 (__m512i __A)
7582{
7583  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7584               (__v16qi) _mm_undefined_si128 (),
7585               (__mmask8) -1);
7586}
7587
7588static __inline__ __m128i __DEFAULT_FN_ATTRS
7589_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7590{
7591  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7592               (__v16qi) __O, __M);
7593}
7594
7595static __inline__ __m128i __DEFAULT_FN_ATTRS
7596_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7597{
7598  return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7599               (__v16qi) _mm_setzero_si128 (),
7600               __M);
7601}
7602
7603static __inline__ void __DEFAULT_FN_ATTRS
7604_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7605{
7606  __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7607}
7608
7609static __inline__ __m256i __DEFAULT_FN_ATTRS
7610_mm512_cvtsepi64_epi32 (__m512i __A)
7611{
7612  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7613               (__v8si) _mm256_undefined_si256 (),
7614               (__mmask8) -1);
7615}
7616
7617static __inline__ __m256i __DEFAULT_FN_ATTRS
7618_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7619{
7620  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7621               (__v8si) __O, __M);
7622}
7623
7624static __inline__ __m256i __DEFAULT_FN_ATTRS
7625_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7626{
7627  return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7628               (__v8si) _mm256_setzero_si256 (),
7629               __M);
7630}
7631
7632static __inline__ void __DEFAULT_FN_ATTRS
7633_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7634{
7635  __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7636}
7637
7638static __inline__ __m128i __DEFAULT_FN_ATTRS
7639_mm512_cvtsepi64_epi16 (__m512i __A)
7640{
7641  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7642               (__v8hi) _mm_undefined_si128 (),
7643               (__mmask8) -1);
7644}
7645
7646static __inline__ __m128i __DEFAULT_FN_ATTRS
7647_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7648{
7649  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7650               (__v8hi) __O, __M);
7651}
7652
7653static __inline__ __m128i __DEFAULT_FN_ATTRS
7654_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7655{
7656  return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7657               (__v8hi) _mm_setzero_si128 (),
7658               __M);
7659}
7660
7661static __inline__ void __DEFAULT_FN_ATTRS
7662_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7663{
7664  __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7665}
7666
7667static __inline__ __m128i __DEFAULT_FN_ATTRS
7668_mm512_cvtusepi32_epi8 (__m512i __A)
7669{
7670  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7671                (__v16qi) _mm_undefined_si128 (),
7672                (__mmask16) -1);
7673}
7674
7675static __inline__ __m128i __DEFAULT_FN_ATTRS
7676_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7677{
7678  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7679                (__v16qi) __O,
7680                __M);
7681}
7682
7683static __inline__ __m128i __DEFAULT_FN_ATTRS
7684_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7685{
7686  return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7687                (__v16qi) _mm_setzero_si128 (),
7688                __M);
7689}
7690
7691static __inline__ void __DEFAULT_FN_ATTRS
7692_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7693{
7694  __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7695}
7696
7697static __inline__ __m256i __DEFAULT_FN_ATTRS
7698_mm512_cvtusepi32_epi16 (__m512i __A)
7699{
7700  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7701                (__v16hi) _mm256_undefined_si256 (),
7702                (__mmask16) -1);
7703}
7704
7705static __inline__ __m256i __DEFAULT_FN_ATTRS
7706_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7707{
7708  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7709                (__v16hi) __O,
7710                __M);
7711}
7712
7713static __inline__ __m256i __DEFAULT_FN_ATTRS
7714_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7715{
7716  return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7717                (__v16hi) _mm256_setzero_si256 (),
7718                __M);
7719}
7720
7721static __inline__ void __DEFAULT_FN_ATTRS
7722_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7723{
7724  __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7725}
7726
7727static __inline__ __m128i __DEFAULT_FN_ATTRS
7728_mm512_cvtusepi64_epi8 (__m512i __A)
7729{
7730  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7731                (__v16qi) _mm_undefined_si128 (),
7732                (__mmask8) -1);
7733}
7734
7735static __inline__ __m128i __DEFAULT_FN_ATTRS
7736_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7737{
7738  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7739                (__v16qi) __O,
7740                __M);
7741}
7742
7743static __inline__ __m128i __DEFAULT_FN_ATTRS
7744_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7745{
7746  return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7747                (__v16qi) _mm_setzero_si128 (),
7748                __M);
7749}
7750
7751static __inline__ void __DEFAULT_FN_ATTRS
7752_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7753{
7754  __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7755}
7756
7757static __inline__ __m256i __DEFAULT_FN_ATTRS
7758_mm512_cvtusepi64_epi32 (__m512i __A)
7759{
7760  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7761                (__v8si) _mm256_undefined_si256 (),
7762                (__mmask8) -1);
7763}
7764
7765static __inline__ __m256i __DEFAULT_FN_ATTRS
7766_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7767{
7768  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7769                (__v8si) __O, __M);
7770}
7771
7772static __inline__ __m256i __DEFAULT_FN_ATTRS
7773_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7774{
7775  return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7776                (__v8si) _mm256_setzero_si256 (),
7777                __M);
7778}
7779
7780static __inline__ void __DEFAULT_FN_ATTRS
7781_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7782{
7783  __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7784}
7785
7786static __inline__ __m128i __DEFAULT_FN_ATTRS
7787_mm512_cvtusepi64_epi16 (__m512i __A)
7788{
7789  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7790                (__v8hi) _mm_undefined_si128 (),
7791                (__mmask8) -1);
7792}
7793
7794static __inline__ __m128i __DEFAULT_FN_ATTRS
7795_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7796{
7797  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7798                (__v8hi) __O, __M);
7799}
7800
7801static __inline__ __m128i __DEFAULT_FN_ATTRS
7802_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7803{
7804  return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7805                (__v8hi) _mm_setzero_si128 (),
7806                __M);
7807}
7808
7809static __inline__ void __DEFAULT_FN_ATTRS
7810_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7811{
7812  __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7813}
7814
7815static __inline__ __m128i __DEFAULT_FN_ATTRS
7816_mm512_cvtepi32_epi8 (__m512i __A)
7817{
7818  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7819              (__v16qi) _mm_undefined_si128 (),
7820              (__mmask16) -1);
7821}
7822
7823static __inline__ __m128i __DEFAULT_FN_ATTRS
7824_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7825{
7826  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7827              (__v16qi) __O, __M);
7828}
7829
7830static __inline__ __m128i __DEFAULT_FN_ATTRS
7831_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7832{
7833  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7834              (__v16qi) _mm_setzero_si128 (),
7835              __M);
7836}
7837
7838static __inline__ void __DEFAULT_FN_ATTRS
7839_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7840{
7841  __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7842}
7843
7844static __inline__ __m256i __DEFAULT_FN_ATTRS
7845_mm512_cvtepi32_epi16 (__m512i __A)
7846{
7847  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7848              (__v16hi) _mm256_undefined_si256 (),
7849              (__mmask16) -1);
7850}
7851
7852static __inline__ __m256i __DEFAULT_FN_ATTRS
7853_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7854{
7855  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7856              (__v16hi) __O, __M);
7857}
7858
7859static __inline__ __m256i __DEFAULT_FN_ATTRS
7860_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7861{
7862  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7863              (__v16hi) _mm256_setzero_si256 (),
7864              __M);
7865}
7866
7867static __inline__ void __DEFAULT_FN_ATTRS
7868_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7869{
7870  __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7871}
7872
7873static __inline__ __m128i __DEFAULT_FN_ATTRS
7874_mm512_cvtepi64_epi8 (__m512i __A)
7875{
7876  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7877              (__v16qi) _mm_undefined_si128 (),
7878              (__mmask8) -1);
7879}
7880
7881static __inline__ __m128i __DEFAULT_FN_ATTRS
7882_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7883{
7884  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7885              (__v16qi) __O, __M);
7886}
7887
7888static __inline__ __m128i __DEFAULT_FN_ATTRS
7889_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7890{
7891  return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7892              (__v16qi) _mm_setzero_si128 (),
7893              __M);
7894}
7895
7896static __inline__ void __DEFAULT_FN_ATTRS
7897_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7898{
7899  __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7900}
7901
7902static __inline__ __m256i __DEFAULT_FN_ATTRS
7903_mm512_cvtepi64_epi32 (__m512i __A)
7904{
7905  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7906              (__v8si) _mm256_undefined_si256 (),
7907              (__mmask8) -1);
7908}
7909
7910static __inline__ __m256i __DEFAULT_FN_ATTRS
7911_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7912{
7913  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7914              (__v8si) __O, __M);
7915}
7916
7917static __inline__ __m256i __DEFAULT_FN_ATTRS
7918_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7919{
7920  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7921              (__v8si) _mm256_setzero_si256 (),
7922              __M);
7923}
7924
7925static __inline__ void __DEFAULT_FN_ATTRS
7926_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7927{
7928  __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7929}
7930
7931static __inline__ __m128i __DEFAULT_FN_ATTRS
7932_mm512_cvtepi64_epi16 (__m512i __A)
7933{
7934  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7935              (__v8hi) _mm_undefined_si128 (),
7936              (__mmask8) -1);
7937}
7938
7939static __inline__ __m128i __DEFAULT_FN_ATTRS
7940_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7941{
7942  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7943              (__v8hi) __O, __M);
7944}
7945
7946static __inline__ __m128i __DEFAULT_FN_ATTRS
7947_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7948{
7949  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7950              (__v8hi) _mm_setzero_si128 (),
7951              __M);
7952}
7953
7954static __inline__ void __DEFAULT_FN_ATTRS
7955_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7956{
7957  __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7958}
7959
7960#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({            \
7961  (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A),             \
7962                                   (__v16si)_mm512_undefined_epi32(), \
7963                                   0 + ((imm) & 0x3) * 4,             \
7964                                   1 + ((imm) & 0x3) * 4,             \
7965                                   2 + ((imm) & 0x3) * 4,             \
7966                                   3 + ((imm) & 0x3) * 4); })
7967
7968#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
7969  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7970                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7971                                (__v4si)(W)); })
7972
7973#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
7974  (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
7975                                (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
7976                                (__v4si)_mm_setzero_si128()); })
7977
7978#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({           \
7979  (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A),             \
7980                                   (__v8di)_mm512_undefined_epi32(), \
7981                                   ((imm) & 1) ? 4 : 0,              \
7982                                   ((imm) & 1) ? 5 : 1,              \
7983                                   ((imm) & 1) ? 6 : 2,              \
7984                                   ((imm) & 1) ? 7 : 3); })
7985
7986#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
7987  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7988                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7989                                (__v4di)(W)); })
7990
7991#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
7992  (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
7993                                (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
7994                                (__v4di)_mm256_setzero_si256()); })
7995
7996#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
7997  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
7998                                 (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
7999                                 ((imm) & 0x1) ?  0 :  8, \
8000                                 ((imm) & 0x1) ?  1 :  9, \
8001                                 ((imm) & 0x1) ?  2 : 10, \
8002                                 ((imm) & 0x1) ?  3 : 11, \
8003                                 ((imm) & 0x1) ?  8 :  4, \
8004                                 ((imm) & 0x1) ?  9 :  5, \
8005                                 ((imm) & 0x1) ? 10 :  6, \
8006                                 ((imm) & 0x1) ? 11 :  7); })
8007
8008#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
8009  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8010                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
8011                                  (__v8df)(W)); })
8012
8013#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
8014  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8015                                  (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
8016                                  (__v8df)_mm512_setzero_pd()); })
8017
8018#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
8019  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
8020                                 (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
8021                                 ((imm) & 0x1) ?  0 :  8, \
8022                                 ((imm) & 0x1) ?  1 :  9, \
8023                                 ((imm) & 0x1) ?  2 : 10, \
8024                                 ((imm) & 0x1) ?  3 : 11, \
8025                                 ((imm) & 0x1) ?  8 :  4, \
8026                                 ((imm) & 0x1) ?  9 :  5, \
8027                                 ((imm) & 0x1) ? 10 :  6, \
8028                                 ((imm) & 0x1) ? 11 :  7); })
8029
8030#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
8031  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8032                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
8033                                  (__v8di)(W)); })
8034
8035#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
8036  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8037                                  (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
8038                                  (__v8di)_mm512_setzero_si512()); })
8039
8040#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
8041  (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
8042                                  (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
8043                                  (((imm) & 0x3) == 0) ? 16 :  0, \
8044                                  (((imm) & 0x3) == 0) ? 17 :  1, \
8045                                  (((imm) & 0x3) == 0) ? 18 :  2, \
8046                                  (((imm) & 0x3) == 0) ? 19 :  3, \
8047                                  (((imm) & 0x3) == 1) ? 16 :  4, \
8048                                  (((imm) & 0x3) == 1) ? 17 :  5, \
8049                                  (((imm) & 0x3) == 1) ? 18 :  6, \
8050                                  (((imm) & 0x3) == 1) ? 19 :  7, \
8051                                  (((imm) & 0x3) == 2) ? 16 :  8, \
8052                                  (((imm) & 0x3) == 2) ? 17 :  9, \
8053                                  (((imm) & 0x3) == 2) ? 18 : 10, \
8054                                  (((imm) & 0x3) == 2) ? 19 : 11, \
8055                                  (((imm) & 0x3) == 3) ? 16 : 12, \
8056                                  (((imm) & 0x3) == 3) ? 17 : 13, \
8057                                  (((imm) & 0x3) == 3) ? 18 : 14, \
8058                                  (((imm) & 0x3) == 3) ? 19 : 15); })
8059
8060#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8061  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8062                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8063                                 (__v16sf)(W)); })
8064
8065#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8066  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
8067                                 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
8068                                 (__v16sf)_mm512_setzero_ps()); })
8069
8070#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
8071  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
8072                                 (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
8073                                 (((imm) & 0x3) == 0) ? 16 :  0, \
8074                                 (((imm) & 0x3) == 0) ? 17 :  1, \
8075                                 (((imm) & 0x3) == 0) ? 18 :  2, \
8076                                 (((imm) & 0x3) == 0) ? 19 :  3, \
8077                                 (((imm) & 0x3) == 1) ? 16 :  4, \
8078                                 (((imm) & 0x3) == 1) ? 17 :  5, \
8079                                 (((imm) & 0x3) == 1) ? 18 :  6, \
8080                                 (((imm) & 0x3) == 1) ? 19 :  7, \
8081                                 (((imm) & 0x3) == 2) ? 16 :  8, \
8082                                 (((imm) & 0x3) == 2) ? 17 :  9, \
8083                                 (((imm) & 0x3) == 2) ? 18 : 10, \
8084                                 (((imm) & 0x3) == 2) ? 19 : 11, \
8085                                 (((imm) & 0x3) == 3) ? 16 : 12, \
8086                                 (((imm) & 0x3) == 3) ? 17 : 13, \
8087                                 (((imm) & 0x3) == 3) ? 18 : 14, \
8088                                 (((imm) & 0x3) == 3) ? 19 : 15); })
8089
8090#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8091  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8092                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8093                                 (__v16si)(W)); })
8094
8095#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8096  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8097                                 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
8098                                 (__v16si)_mm512_setzero_si512()); })
8099
8100#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
8101  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8102                                            (int)(((C)<<2) | (B)), \
8103                                            (__v8df)_mm512_undefined_pd(), \
8104                                            (__mmask8)-1, (int)(R)); })
8105
8106#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \
8107  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8108                                            (int)(((C)<<2) | (B)), \
8109                                            (__v8df)(__m512d)(W), \
8110                                            (__mmask8)(U), (int)(R)); })
8111
8112#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \
8113  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8114                                            (int)(((C)<<2) | (B)), \
8115                                            (__v8df)_mm512_setzero_pd(), \
8116                                            (__mmask8)(U), (int)(R)); })
8117
8118#define _mm512_getmant_pd(A, B, C) __extension__ ({ \
8119  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8120                                            (int)(((C)<<2) | (B)), \
8121                                            (__v8df)_mm512_setzero_pd(), \
8122                                            (__mmask8)-1, \
8123                                            _MM_FROUND_CUR_DIRECTION); })
8124
8125#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8126  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8127                                            (int)(((C)<<2) | (B)), \
8128                                            (__v8df)(__m512d)(W), \
8129                                            (__mmask8)(U), \
8130                                            _MM_FROUND_CUR_DIRECTION); })
8131
8132#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8133  (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
8134                                            (int)(((C)<<2) | (B)), \
8135                                            (__v8df)_mm512_setzero_pd(), \
8136                                            (__mmask8)(U), \
8137                                            _MM_FROUND_CUR_DIRECTION); })
8138
8139#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \
8140  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8141                                           (int)(((C)<<2) | (B)), \
8142                                           (__v16sf)_mm512_undefined_ps(), \
8143                                           (__mmask16)-1, (int)(R)); })
8144
8145#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \
8146  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8147                                           (int)(((C)<<2) | (B)), \
8148                                           (__v16sf)(__m512)(W), \
8149                                           (__mmask16)(U), (int)(R)); })
8150
8151#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \
8152  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8153                                           (int)(((C)<<2) | (B)), \
8154                                           (__v16sf)_mm512_setzero_ps(), \
8155                                           (__mmask16)(U), (int)(R)); })
8156
8157#define _mm512_getmant_ps(A, B, C) __extension__ ({ \
8158  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8159                                           (int)(((C)<<2)|(B)), \
8160                                           (__v16sf)_mm512_undefined_ps(), \
8161                                           (__mmask16)-1, \
8162                                           _MM_FROUND_CUR_DIRECTION); })
8163
8164#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8165  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8166                                           (int)(((C)<<2)|(B)), \
8167                                           (__v16sf)(__m512)(W), \
8168                                           (__mmask16)(U), \
8169                                           _MM_FROUND_CUR_DIRECTION); })
8170
8171#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8172  (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
8173                                           (int)(((C)<<2)|(B)), \
8174                                           (__v16sf)_mm512_setzero_ps(), \
8175                                           (__mmask16)(U), \
8176                                           _MM_FROUND_CUR_DIRECTION); })
8177
8178#define _mm512_getexp_round_pd(A, R) __extension__ ({ \
8179  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8180                                           (__v8df)_mm512_undefined_pd(), \
8181                                           (__mmask8)-1, (int)(R)); })
8182
8183#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \
8184  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8185                                           (__v8df)(__m512d)(W), \
8186                                           (__mmask8)(U), (int)(R)); })
8187
8188#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \
8189  (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
8190                                           (__v8df)_mm512_setzero_pd(), \
8191                                           (__mmask8)(U), (int)(R)); })
8192
8193static __inline__ __m512d __DEFAULT_FN_ATTRS
8194_mm512_getexp_pd (__m512d __A)
8195{
8196  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8197                (__v8df) _mm512_undefined_pd (),
8198                (__mmask8) -1,
8199                _MM_FROUND_CUR_DIRECTION);
8200}
8201
8202static __inline__ __m512d __DEFAULT_FN_ATTRS
8203_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
8204{
8205  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8206                (__v8df) __W,
8207                (__mmask8) __U,
8208                _MM_FROUND_CUR_DIRECTION);
8209}
8210
8211static __inline__ __m512d __DEFAULT_FN_ATTRS
8212_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
8213{
8214  return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
8215                (__v8df) _mm512_setzero_pd (),
8216                (__mmask8) __U,
8217                _MM_FROUND_CUR_DIRECTION);
8218}
8219
8220#define _mm512_getexp_round_ps(A, R) __extension__ ({ \
8221  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8222                                          (__v16sf)_mm512_undefined_ps(), \
8223                                          (__mmask16)-1, (int)(R)); })
8224
8225#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \
8226  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8227                                          (__v16sf)(__m512)(W), \
8228                                          (__mmask16)(U), (int)(R)); })
8229
8230#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \
8231  (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
8232                                          (__v16sf)_mm512_setzero_ps(), \
8233                                          (__mmask16)(U), (int)(R)); })
8234
8235static __inline__ __m512 __DEFAULT_FN_ATTRS
8236_mm512_getexp_ps (__m512 __A)
8237{
8238  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8239               (__v16sf) _mm512_undefined_ps (),
8240               (__mmask16) -1,
8241               _MM_FROUND_CUR_DIRECTION);
8242}
8243
8244static __inline__ __m512 __DEFAULT_FN_ATTRS
8245_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
8246{
8247  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8248               (__v16sf) __W,
8249               (__mmask16) __U,
8250               _MM_FROUND_CUR_DIRECTION);
8251}
8252
8253static __inline__ __m512 __DEFAULT_FN_ATTRS
8254_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
8255{
8256  return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
8257               (__v16sf) _mm512_setzero_ps (),
8258               (__mmask16) __U,
8259               _MM_FROUND_CUR_DIRECTION);
8260}
8261
8262#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \
8263  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
8264                                       (float const *)(addr), \
8265                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8266                                       (int)(scale)); })
8267
8268#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__({\
8269  (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
8270                                       (float const *)(addr), \
8271                                       (__v8di)(__m512i)(index), \
8272                                       (__mmask8)(mask), (int)(scale)); })
8273
8274#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\
8275  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \
8276                                        (int const *)(addr), \
8277                                        (__v8di)(__m512i)(index), \
8278                                        (__mmask8)-1, (int)(scale)); })
8279
8280#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8281  (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
8282                                        (int const *)(addr), \
8283                                        (__v8di)(__m512i)(index), \
8284                                        (__mmask8)(mask), (int)(scale)); })
8285
8286#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\
8287  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
8288                                       (double const *)(addr), \
8289                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8290                                       (int)(scale)); })
8291
8292#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8293  (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
8294                                       (double const *)(addr), \
8295                                       (__v8di)(__m512i)(index), \
8296                                       (__mmask8)(mask), (int)(scale)); })
8297
8298#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\
8299  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \
8300                                       (long long const *)(addr), \
8301                                       (__v8di)(__m512i)(index), (__mmask8)-1, \
8302                                       (int)(scale)); })
8303
8304#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8305  (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
8306                                       (long long const *)(addr), \
8307                                       (__v8di)(__m512i)(index), \
8308                                       (__mmask8)(mask), (int)(scale)); })
8309
8310#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\
8311  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
8312                                       (float const *)(addr), \
8313                                       (__v16sf)(__m512)(index), \
8314                                       (__mmask16)-1, (int)(scale)); })
8315
8316#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8317  (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
8318                                       (float const *)(addr), \
8319                                       (__v16sf)(__m512)(index), \
8320                                       (__mmask16)(mask), (int)(scale)); })
8321
8322#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\
8323  (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
8324                                        (int const *)(addr), \
8325                                        (__v16si)(__m512i)(index), \
8326                                        (__mmask16)-1, (int)(scale)); })
8327
8328#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8329  (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
8330                                        (int const *)(addr), \
8331                                        (__v16si)(__m512i)(index), \
8332                                        (__mmask16)(mask), (int)(scale)); })
8333
8334#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\
8335  (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
8336                                       (double const *)(addr), \
8337                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
8338                                       (int)(scale)); })
8339
8340#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8341  (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
8342                                       (double const *)(addr), \
8343                                       (__v8si)(__m256i)(index), \
8344                                       (__mmask8)(mask), (int)(scale)); })
8345
8346#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\
8347  (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
8348                                       (long long const *)(addr), \
8349                                       (__v8si)(__m256i)(index), (__mmask8)-1, \
8350                                       (int)(scale)); })
8351
8352#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8353  (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
8354                                       (long long const *)(addr), \
8355                                       (__v8si)(__m256i)(index), \
8356                                       (__mmask8)(mask), (int)(scale)); })
8357
8358#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\
8359  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \
8360                                (__v8di)(__m512i)(index), \
8361                                (__v8sf)(__m256)(v1), (int)(scale)); })
8362
8363#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8364  __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \
8365                                (__v8di)(__m512i)(index), \
8366                                (__v8sf)(__m256)(v1), (int)(scale)); })
8367
8368#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\
8369  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \
8370                                (__v8di)(__m512i)(index), \
8371                                (__v8si)(__m256i)(v1), (int)(scale)); })
8372
8373#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8374  __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \
8375                                (__v8di)(__m512i)(index), \
8376                                (__v8si)(__m256i)(v1), (int)(scale)); })
8377
8378#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\
8379  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \
8380                               (__v8di)(__m512i)(index), \
8381                               (__v8df)(__m512d)(v1), (int)(scale)); })
8382
8383#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8384  __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \
8385                               (__v8di)(__m512i)(index), \
8386                               (__v8df)(__m512d)(v1), (int)(scale)); })
8387
8388#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\
8389  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \
8390                               (__v8di)(__m512i)(index), \
8391                               (__v8di)(__m512i)(v1), (int)(scale)); })
8392
8393#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8394  __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \
8395                               (__v8di)(__m512i)(index), \
8396                               (__v8di)(__m512i)(v1), (int)(scale)); })
8397
8398#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\
8399  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \
8400                                (__v16si)(__m512i)(index), \
8401                                (__v16sf)(__m512)(v1), (int)(scale)); })
8402
8403#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\
8404  __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \
8405                                (__v16si)(__m512i)(index), \
8406                                (__v16sf)(__m512)(v1), (int)(scale)); })
8407
8408#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\
8409  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \
8410                                (__v16si)(__m512i)(index), \
8411                                (__v16si)(__m512i)(v1), (int)(scale)); })
8412
8413#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\
8414  __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \
8415                                (__v16si)(__m512i)(index), \
8416                                (__v16si)(__m512i)(v1), (int)(scale)); })
8417
8418#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\
8419  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \
8420                               (__v8si)(__m256i)(index), \
8421                               (__v8df)(__m512d)(v1), (int)(scale)); })
8422
8423#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\
8424  __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \
8425                               (__v8si)(__m256i)(index), \
8426                               (__v8df)(__m512d)(v1), (int)(scale)); })
8427
8428#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\
8429  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \
8430                               (__v8si)(__m256i)(index), \
8431                               (__v8di)(__m512i)(v1), (int)(scale)); })
8432
8433#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\
8434  __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \
8435                               (__v8si)(__m256i)(index), \
8436                               (__v8di)(__m512i)(v1), (int)(scale)); })
8437
8438static __inline__ __m128 __DEFAULT_FN_ATTRS
8439_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8440{
8441 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8442          (__v4sf) __A,
8443          (__v4sf) __B,
8444          (__mmask8) __U,
8445          _MM_FROUND_CUR_DIRECTION);
8446}
8447
8448#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\
8449  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8450                                        (__v4sf)(__m128)(A), \
8451                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8452                                        (int)(R)); })
8453
8454static __inline__ __m128 __DEFAULT_FN_ATTRS
8455_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8456{
8457 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8458          (__v4sf) __B,
8459          (__v4sf) __C,
8460          (__mmask8) __U,
8461          _MM_FROUND_CUR_DIRECTION);
8462}
8463
8464#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\
8465  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8466                                         (__v4sf)(__m128)(B), \
8467                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
8468                                         _MM_FROUND_CUR_DIRECTION); })
8469
8470static __inline__ __m128 __DEFAULT_FN_ATTRS
8471_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8472{
8473 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
8474          (__v4sf) __X,
8475          (__v4sf) __Y,
8476          (__mmask8) __U,
8477          _MM_FROUND_CUR_DIRECTION);
8478}
8479
8480#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\
8481  (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
8482                                         (__v4sf)(__m128)(X), \
8483                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8484                                         (int)(R)); })
8485
8486static __inline__ __m128 __DEFAULT_FN_ATTRS
8487_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8488{
8489 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8490          (__v4sf) __A,
8491          -(__v4sf) __B,
8492          (__mmask8) __U,
8493          _MM_FROUND_CUR_DIRECTION);
8494}
8495
8496#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\
8497  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8498                                        (__v4sf)(__m128)(A), \
8499                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8500                                        (int)(R)); })
8501
8502static __inline__ __m128 __DEFAULT_FN_ATTRS
8503_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8504{
8505 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A,
8506          (__v4sf) __B,
8507          -(__v4sf) __C,
8508          (__mmask8) __U,
8509          _MM_FROUND_CUR_DIRECTION);
8510}
8511
8512#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\
8513  (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8514                                         (__v4sf)(__m128)(B), \
8515                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8516                                         (int)(R)); })
8517
8518static __inline__ __m128 __DEFAULT_FN_ATTRS
8519_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8520{
8521 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
8522          (__v4sf) __X,
8523          (__v4sf) __Y,
8524          (__mmask8) __U,
8525          _MM_FROUND_CUR_DIRECTION);
8526}
8527
8528#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\
8529  (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8530                                         (__v4sf)(__m128)(X), \
8531                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8532                                         (int)(R)); })
8533
8534static __inline__ __m128 __DEFAULT_FN_ATTRS
8535_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8536{
8537 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8538          -(__v4sf) __A,
8539          (__v4sf) __B,
8540          (__mmask8) __U,
8541          _MM_FROUND_CUR_DIRECTION);
8542}
8543
8544#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\
8545  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8546                                        -(__v4sf)(__m128)(A), \
8547                                        (__v4sf)(__m128)(B), (__mmask8)(U), \
8548                                        (int)(R)); })
8549
8550static __inline__ __m128 __DEFAULT_FN_ATTRS
8551_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8552{
8553 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8554          (__v4sf) __B,
8555          (__v4sf) __C,
8556          (__mmask8) __U,
8557          _MM_FROUND_CUR_DIRECTION);
8558}
8559
8560#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\
8561  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8562                                         (__v4sf)(__m128)(B), \
8563                                         (__v4sf)(__m128)(C), (__mmask8)(U), \
8564                                         (int)(R)); })
8565
8566static __inline__ __m128 __DEFAULT_FN_ATTRS
8567_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8568{
8569 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W,
8570          (__v4sf) __X,
8571          (__v4sf) __Y,
8572          (__mmask8) __U,
8573          _MM_FROUND_CUR_DIRECTION);
8574}
8575
8576#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\
8577  (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \
8578                                         (__v4sf)(__m128)(X), \
8579                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8580                                         (int)(R)); })
8581
8582static __inline__ __m128 __DEFAULT_FN_ATTRS
8583_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8584{
8585 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
8586          -(__v4sf) __A,
8587          -(__v4sf) __B,
8588          (__mmask8) __U,
8589          _MM_FROUND_CUR_DIRECTION);
8590}
8591
8592#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\
8593  (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
8594                                        -(__v4sf)(__m128)(A), \
8595                                        -(__v4sf)(__m128)(B), (__mmask8)(U), \
8596                                        (int)(R)); })
8597
8598static __inline__ __m128 __DEFAULT_FN_ATTRS
8599_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
8600{
8601 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A,
8602          (__v4sf) __B,
8603          -(__v4sf) __C,
8604          (__mmask8) __U,
8605          _MM_FROUND_CUR_DIRECTION);
8606}
8607
8608#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\
8609  (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \
8610                                         (__v4sf)(__m128)(B), \
8611                                         -(__v4sf)(__m128)(C), (__mmask8)(U), \
8612                                         _MM_FROUND_CUR_DIRECTION); })
8613
8614static __inline__ __m128 __DEFAULT_FN_ATTRS
8615_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8616{
8617 return (__m128) __builtin_ia32_vfnmsubss3_mask3 ((__v4sf) __W,
8618          (__v4sf) __X,
8619          (__v4sf) __Y,
8620          (__mmask8) __U,
8621          _MM_FROUND_CUR_DIRECTION);
8622}
8623
8624#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\
8625  (__m128)__builtin_ia32_vfnmsubss3_mask3((__v4sf)(__m128)(W), \
8626                                         (__v4sf)(__m128)(X), \
8627                                         (__v4sf)(__m128)(Y), (__mmask8)(U), \
8628                                         (int)(R)); })
8629
8630static __inline__ __m128d __DEFAULT_FN_ATTRS
8631_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8632{
8633 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8634          (__v2df) __A,
8635          (__v2df) __B,
8636          (__mmask8) __U,
8637          _MM_FROUND_CUR_DIRECTION);
8638}
8639
8640#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\
8641  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8642                                         (__v2df)(__m128d)(A), \
8643                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8644                                         (int)(R)); })
8645
8646static __inline__ __m128d __DEFAULT_FN_ATTRS
8647_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8648{
8649 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8650          (__v2df) __B,
8651          (__v2df) __C,
8652          (__mmask8) __U,
8653          _MM_FROUND_CUR_DIRECTION);
8654}
8655
8656#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\
8657  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8658                                          (__v2df)(__m128d)(B), \
8659                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8660                                          _MM_FROUND_CUR_DIRECTION); })
8661
8662static __inline__ __m128d __DEFAULT_FN_ATTRS
8663_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8664{
8665 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
8666          (__v2df) __X,
8667          (__v2df) __Y,
8668          (__mmask8) __U,
8669          _MM_FROUND_CUR_DIRECTION);
8670}
8671
8672#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\
8673  (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8674                                          (__v2df)(__m128d)(X), \
8675                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8676                                          (int)(R)); })
8677
8678static __inline__ __m128d __DEFAULT_FN_ATTRS
8679_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8680{
8681 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8682          (__v2df) __A,
8683          -(__v2df) __B,
8684          (__mmask8) __U,
8685          _MM_FROUND_CUR_DIRECTION);
8686}
8687
8688#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\
8689  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8690                                         (__v2df)(__m128d)(A), \
8691                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8692                                         (int)(R)); })
8693
8694static __inline__ __m128d __DEFAULT_FN_ATTRS
8695_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8696{
8697 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A,
8698          (__v2df) __B,
8699          -(__v2df) __C,
8700          (__mmask8) __U,
8701          _MM_FROUND_CUR_DIRECTION);
8702}
8703
8704#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\
8705  (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8706                                          (__v2df)(__m128d)(B), \
8707                                          -(__v2df)(__m128d)(C), \
8708                                          (__mmask8)(U), (int)(R)); })
8709
8710static __inline__ __m128d __DEFAULT_FN_ATTRS
8711_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8712{
8713 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
8714          (__v2df) __X,
8715          (__v2df) __Y,
8716          (__mmask8) __U,
8717          _MM_FROUND_CUR_DIRECTION);
8718}
8719
8720#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\
8721  (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8722                                          (__v2df)(__m128d)(X), \
8723                                          (__v2df)(__m128d)(Y), \
8724                                          (__mmask8)(U), (int)(R)); })
8725
8726static __inline__ __m128d __DEFAULT_FN_ATTRS
8727_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8728{
8729 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8730          -(__v2df) __A,
8731          (__v2df) __B,
8732          (__mmask8) __U,
8733          _MM_FROUND_CUR_DIRECTION);
8734}
8735
8736#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\
8737  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8738                                         -(__v2df)(__m128d)(A), \
8739                                         (__v2df)(__m128d)(B), (__mmask8)(U), \
8740                                         (int)(R)); })
8741
8742static __inline__ __m128d __DEFAULT_FN_ATTRS
8743_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8744{
8745 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8746          (__v2df) __B,
8747          (__v2df) __C,
8748          (__mmask8) __U,
8749          _MM_FROUND_CUR_DIRECTION);
8750}
8751
8752#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\
8753  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8754                                          (__v2df)(__m128d)(B), \
8755                                          (__v2df)(__m128d)(C), (__mmask8)(U), \
8756                                          (int)(R)); })
8757
8758static __inline__ __m128d __DEFAULT_FN_ATTRS
8759_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8760{
8761 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W,
8762          (__v2df) __X,
8763          (__v2df) __Y,
8764          (__mmask8) __U,
8765          _MM_FROUND_CUR_DIRECTION);
8766}
8767
8768#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\
8769  (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \
8770                                          (__v2df)(__m128d)(X), \
8771                                          (__v2df)(__m128d)(Y), (__mmask8)(U), \
8772                                          (int)(R)); })
8773
8774static __inline__ __m128d __DEFAULT_FN_ATTRS
8775_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8776{
8777 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __W,
8778          -(__v2df) __A,
8779          -(__v2df) __B,
8780          (__mmask8) __U,
8781          _MM_FROUND_CUR_DIRECTION);
8782}
8783
8784#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\
8785  (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8786                                         -(__v2df)(__m128d)(A), \
8787                                         -(__v2df)(__m128d)(B), (__mmask8)(U), \
8788                                         (int)(R)); })
8789
8790static __inline__ __m128d __DEFAULT_FN_ATTRS
8791_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8792{
8793 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A,
8794          (__v2df) __B,
8795          -(__v2df) __C,
8796          (__mmask8) __U,
8797          _MM_FROUND_CUR_DIRECTION);
8798}
8799
8800#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\
8801  (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \
8802                                          (__v2df)(__m128d)(B), \
8803                                          -(__v2df)(__m128d)(C), \
8804                                          (__mmask8)(U), \
8805                                          _MM_FROUND_CUR_DIRECTION); })
8806
8807static __inline__ __m128d __DEFAULT_FN_ATTRS
8808_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8809{
8810 return (__m128d) __builtin_ia32_vfnmsubsd3_mask3 ((__v2df) (__W),
8811          (__v2df) __X,
8812          (__v2df) (__Y),
8813          (__mmask8) __U,
8814          _MM_FROUND_CUR_DIRECTION);
8815}
8816
8817#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\
8818  (__m128d)__builtin_ia32_vfnmsubsd3_mask3((__v2df)(__m128d)(W), \
8819                                          (__v2df)(__m128d)(X), \
8820                                          (__v2df)(__m128d)(Y), \
8821                                          (__mmask8)(U), (int)(R)); })
8822
8823#define _mm512_permutex_pd(X, C) __extension__ ({ \
8824  (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
8825                                   (__v8df)_mm512_undefined_pd(), \
8826                                   0 + (((C) >> 0) & 0x3), \
8827                                   0 + (((C) >> 2) & 0x3), \
8828                                   0 + (((C) >> 4) & 0x3), \
8829                                   0 + (((C) >> 6) & 0x3), \
8830                                   4 + (((C) >> 0) & 0x3), \
8831                                   4 + (((C) >> 2) & 0x3), \
8832                                   4 + (((C) >> 4) & 0x3), \
8833                                   4 + (((C) >> 6) & 0x3)); })
8834
8835#define _mm512_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8836  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8837                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8838                                       (__v8df)(__m512d)(W)); })
8839
8840#define _mm512_maskz_permutex_pd(U, X, C) __extension__ ({ \
8841  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8842                                       (__v8df)_mm512_permutex_pd((X), (C)), \
8843                                       (__v8df)_mm512_setzero_pd()); })
8844
8845#define _mm512_permutex_epi64(X, C) __extension__ ({ \
8846  (__m512i)__builtin_shufflevector((__v8di)(__m512i)(X), \
8847                                   (__v8di)_mm512_undefined_epi32(), \
8848                                   0 + (((C) >> 0) & 0x3), \
8849                                   0 + (((C) >> 2) & 0x3), \
8850                                   0 + (((C) >> 4) & 0x3), \
8851                                   0 + (((C) >> 6) & 0x3), \
8852                                   4 + (((C) >> 0) & 0x3), \
8853                                   4 + (((C) >> 2) & 0x3), \
8854                                   4 + (((C) >> 4) & 0x3), \
8855                                   4 + (((C) >> 6) & 0x3)); })
8856
8857#define _mm512_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8858  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8859                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8860                                      (__v8di)(__m512i)(W)); })
8861
8862#define _mm512_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8863  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8864                                      (__v8di)_mm512_permutex_epi64((X), (C)), \
8865                                      (__v8di)_mm512_setzero_si512()); })
8866
8867static __inline__ __m512d __DEFAULT_FN_ATTRS
8868_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8869{
8870  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8871                 (__v8di) __X,
8872                 (__v8df) _mm512_undefined_pd (),
8873                 (__mmask8) -1);
8874}
8875
8876static __inline__ __m512d __DEFAULT_FN_ATTRS
8877_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8878{
8879  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8880                 (__v8di) __X,
8881                 (__v8df) __W,
8882                 (__mmask8) __U);
8883}
8884
8885static __inline__ __m512d __DEFAULT_FN_ATTRS
8886_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8887{
8888  return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
8889                 (__v8di) __X,
8890                 (__v8df) _mm512_setzero_pd (),
8891                 (__mmask8) __U);
8892}
8893
8894static __inline__ __m512i __DEFAULT_FN_ATTRS
8895_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8896{
8897  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8898                 (__v8di) __X,
8899                 (__v8di) _mm512_setzero_si512 (),
8900                 __M);
8901}
8902
8903static __inline__ __m512i __DEFAULT_FN_ATTRS
8904_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8905{
8906  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8907                 (__v8di) __X,
8908                 (__v8di) _mm512_undefined_epi32 (),
8909                 (__mmask8) -1);
8910}
8911
8912static __inline__ __m512i __DEFAULT_FN_ATTRS
8913_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8914             __m512i __Y)
8915{
8916  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
8917                 (__v8di) __X,
8918                 (__v8di) __W,
8919                 __M);
8920}
8921
8922static __inline__ __m512 __DEFAULT_FN_ATTRS
8923_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8924{
8925  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8926                (__v16si) __X,
8927                (__v16sf) _mm512_undefined_ps (),
8928                (__mmask16) -1);
8929}
8930
8931static __inline__ __m512 __DEFAULT_FN_ATTRS
8932_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8933{
8934  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8935                (__v16si) __X,
8936                (__v16sf) __W,
8937                (__mmask16) __U);
8938}
8939
8940static __inline__ __m512 __DEFAULT_FN_ATTRS
8941_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8942{
8943  return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
8944                (__v16si) __X,
8945                (__v16sf) _mm512_setzero_ps (),
8946                (__mmask16) __U);
8947}
8948
8949static __inline__ __m512i __DEFAULT_FN_ATTRS
8950_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8951{
8952  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8953                 (__v16si) __X,
8954                 (__v16si) _mm512_setzero_si512 (),
8955                 __M);
8956}
8957
8958static __inline__ __m512i __DEFAULT_FN_ATTRS
8959_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8960{
8961  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8962                 (__v16si) __X,
8963                 (__v16si) _mm512_undefined_epi32 (),
8964                 (__mmask16) -1);
8965}
8966
8967#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8968
8969static __inline__ __m512i __DEFAULT_FN_ATTRS
8970_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8971             __m512i __Y)
8972{
8973  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
8974                 (__v16si) __X,
8975                 (__v16si) __W,
8976                 __M);
8977}
8978
8979#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8980
8981static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8982_mm512_kand (__mmask16 __A, __mmask16 __B)
8983{
8984  return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8985}
8986
8987static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8988_mm512_kandn (__mmask16 __A, __mmask16 __B)
8989{
8990  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8991}
8992
8993static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8994_mm512_kor (__mmask16 __A, __mmask16 __B)
8995{
8996  return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8997}
8998
8999static __inline__ int __DEFAULT_FN_ATTRS
9000_mm512_kortestc (__mmask16 __A, __mmask16 __B)
9001{
9002  return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
9003}
9004
9005static __inline__ int __DEFAULT_FN_ATTRS
9006_mm512_kortestz (__mmask16 __A, __mmask16 __B)
9007{
9008  return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
9009}
9010
9011static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9012_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
9013{
9014  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
9015}
9016
9017static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9018_mm512_kxnor (__mmask16 __A, __mmask16 __B)
9019{
9020  return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
9021}
9022
9023static __inline__ __mmask16 __DEFAULT_FN_ATTRS
9024_mm512_kxor (__mmask16 __A, __mmask16 __B)
9025{
9026  return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
9027}
9028
9029static __inline__ void __DEFAULT_FN_ATTRS
9030_mm512_stream_si512 (__m512i * __P, __m512i __A)
9031{
9032  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
9033  __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
9034}
9035
9036static __inline__ __m512i __DEFAULT_FN_ATTRS
9037_mm512_stream_load_si512 (void const *__P)
9038{
9039  typedef __v8di __v8di_aligned __attribute__((aligned(64)));
9040  return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
9041}
9042
9043static __inline__ void __DEFAULT_FN_ATTRS
9044_mm512_stream_pd (double *__P, __m512d __A)
9045{
9046  typedef __v8df __v8df_aligned __attribute__((aligned(64)));
9047  __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
9048}
9049
9050static __inline__ void __DEFAULT_FN_ATTRS
9051_mm512_stream_ps (float *__P, __m512 __A)
9052{
9053  typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
9054  __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
9055}
9056
9057static __inline__ __m512d __DEFAULT_FN_ATTRS
9058_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
9059{
9060  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9061                  (__v8df) __W,
9062                  (__mmask8) __U);
9063}
9064
9065static __inline__ __m512d __DEFAULT_FN_ATTRS
9066_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
9067{
9068  return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
9069                  (__v8df)
9070                  _mm512_setzero_pd (),
9071                  (__mmask8) __U);
9072}
9073
9074static __inline__ __m512i __DEFAULT_FN_ATTRS
9075_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9076{
9077  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9078                  (__v8di) __W,
9079                  (__mmask8) __U);
9080}
9081
9082static __inline__ __m512i __DEFAULT_FN_ATTRS
9083_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
9084{
9085  return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
9086                  (__v8di)
9087                  _mm512_setzero_si512 (),
9088                  (__mmask8) __U);
9089}
9090
9091static __inline__ __m512 __DEFAULT_FN_ATTRS
9092_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
9093{
9094  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9095                 (__v16sf) __W,
9096                 (__mmask16) __U);
9097}
9098
9099static __inline__ __m512 __DEFAULT_FN_ATTRS
9100_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
9101{
9102  return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
9103                 (__v16sf)
9104                 _mm512_setzero_ps (),
9105                 (__mmask16) __U);
9106}
9107
9108static __inline__ __m512i __DEFAULT_FN_ATTRS
9109_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9110{
9111  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9112                  (__v16si) __W,
9113                  (__mmask16) __U);
9114}
9115
9116static __inline__ __m512i __DEFAULT_FN_ATTRS
9117_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
9118{
9119  return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
9120                  (__v16si)
9121                  _mm512_setzero_si512 (),
9122                  (__mmask16) __U);
9123}
9124
9125#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \
9126  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9127                                      (__v4sf)(__m128)(Y), (int)(P), \
9128                                      (__mmask8)-1, (int)(R)); })
9129
9130#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \
9131  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9132                                      (__v4sf)(__m128)(Y), (int)(P), \
9133                                      (__mmask8)(M), (int)(R)); })
9134
9135#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \
9136  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9137                                      (__v4sf)(__m128)(Y), (int)(P), \
9138                                      (__mmask8)-1, \
9139                                      _MM_FROUND_CUR_DIRECTION); })
9140
9141#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \
9142  (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
9143                                      (__v4sf)(__m128)(Y), (int)(P), \
9144                                      (__mmask8)(M), \
9145                                      _MM_FROUND_CUR_DIRECTION); })
9146
9147#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \
9148  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9149                                      (__v2df)(__m128d)(Y), (int)(P), \
9150                                      (__mmask8)-1, (int)(R)); })
9151
9152#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \
9153  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9154                                      (__v2df)(__m128d)(Y), (int)(P), \
9155                                      (__mmask8)(M), (int)(R)); })
9156
9157#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \
9158  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9159                                      (__v2df)(__m128d)(Y), (int)(P), \
9160                                      (__mmask8)-1, \
9161                                      _MM_FROUND_CUR_DIRECTION); })
9162
9163#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \
9164  (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
9165                                      (__v2df)(__m128d)(Y), (int)(P), \
9166                                      (__mmask8)(M), \
9167                                      _MM_FROUND_CUR_DIRECTION); })
9168
9169static __inline__ __m512 __DEFAULT_FN_ATTRS
9170_mm512_movehdup_ps (__m512 __A)
9171{
9172  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9173                         1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
9174}
9175
9176static __inline__ __m512 __DEFAULT_FN_ATTRS
9177_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9178{
9179  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9180                                             (__v16sf)_mm512_movehdup_ps(__A),
9181                                             (__v16sf)__W);
9182}
9183
9184static __inline__ __m512 __DEFAULT_FN_ATTRS
9185_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
9186{
9187  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9188                                             (__v16sf)_mm512_movehdup_ps(__A),
9189                                             (__v16sf)_mm512_setzero_ps());
9190}
9191
9192static __inline__ __m512 __DEFAULT_FN_ATTRS
9193_mm512_moveldup_ps (__m512 __A)
9194{
9195  return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
9196                         0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
9197}
9198
9199static __inline__ __m512 __DEFAULT_FN_ATTRS
9200_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
9201{
9202  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9203                                             (__v16sf)_mm512_moveldup_ps(__A),
9204                                             (__v16sf)__W);
9205}
9206
9207static __inline__ __m512 __DEFAULT_FN_ATTRS
9208_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
9209{
9210  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
9211                                             (__v16sf)_mm512_moveldup_ps(__A),
9212                                             (__v16sf)_mm512_setzero_ps());
9213}
9214
9215static __inline__ __m128 __DEFAULT_FN_ATTRS
9216_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
9217{
9218  __m128 res = __A;
9219  res[0] = (__U & 1) ? __B[0] : __W[0];
9220  return res;
9221}
9222
9223static __inline__ __m128 __DEFAULT_FN_ATTRS
9224_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
9225{
9226  __m128 res = __A;
9227  res[0] = (__U & 1) ? __B[0] : 0;
9228  return res;
9229}
9230
9231static __inline__ __m128d __DEFAULT_FN_ATTRS
9232_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
9233{
9234  __m128d res = __A;
9235  res[0] = (__U & 1) ? __B[0] : __W[0];
9236  return res;
9237}
9238
9239static __inline__ __m128d __DEFAULT_FN_ATTRS
9240_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
9241{
9242  __m128d res = __A;
9243  res[0] = (__U & 1) ? __B[0] : 0;
9244  return res;
9245}
9246
9247static __inline__ void __DEFAULT_FN_ATTRS
9248_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
9249{
9250  __builtin_ia32_storess128_mask ((__v16sf *)__W,
9251                (__v16sf) _mm512_castps128_ps512(__A),
9252                (__mmask16) __U & (__mmask16)1);
9253}
9254
9255static __inline__ void __DEFAULT_FN_ATTRS
9256_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
9257{
9258  __builtin_ia32_storesd128_mask ((__v8df *)__W,
9259                (__v8df) _mm512_castpd128_pd512(__A),
9260                (__mmask8) __U & 1);
9261}
9262
9263static __inline__ __m128 __DEFAULT_FN_ATTRS
9264_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
9265{
9266  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
9267                                                (__v4sf) {0.0, 0.0, 0.0, 0.0},
9268                                                0, 4, 4, 4);
9269
9270  return (__m128) __builtin_shufflevector(
9271                           __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9272                                      (__v16sf) _mm512_castps128_ps512(src),
9273                                      (__mmask16) __U & 1),
9274                           _mm512_undefined_ps(), 0, 1, 2, 3);
9275}
9276
9277static __inline__ __m128 __DEFAULT_FN_ATTRS
9278_mm_maskz_load_ss (__mmask8 __U, const float* __A)
9279{
9280  return (__m128) __builtin_shufflevector(
9281                           __builtin_ia32_loadss128_mask ((__v16sf *) __A,
9282                                      (__v16sf) _mm512_setzero_ps(),
9283                                      (__mmask16) __U & 1),
9284                           _mm512_undefined_ps(), 0, 1, 2, 3);
9285}
9286
9287static __inline__ __m128d __DEFAULT_FN_ATTRS
9288_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
9289{
9290  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
9291                                                 (__v2df) {0.0, 0.0}, 0, 2);
9292
9293  return (__m128d) __builtin_shufflevector(
9294                            __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9295                                      (__v8df) _mm512_castpd128_pd512(src),
9296                                      (__mmask8) __U & 1),
9297                            _mm512_undefined_pd(), 0, 1);
9298}
9299
9300static __inline__ __m128d __DEFAULT_FN_ATTRS
9301_mm_maskz_load_sd (__mmask8 __U, const double* __A)
9302{
9303  return (__m128d) __builtin_shufflevector(
9304                            __builtin_ia32_loadsd128_mask ((__v8df *) __A,
9305                                      (__v8df) _mm512_setzero_pd(),
9306                                      (__mmask8) __U & 1),
9307                            _mm512_undefined_pd(), 0, 1);
9308}
9309
9310#define _mm512_shuffle_epi32(A, I) __extension__ ({ \
9311  (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
9312                                   (__v16si)_mm512_undefined_epi32(), \
9313                                   0  + (((I) >> 0) & 0x3), \
9314                                   0  + (((I) >> 2) & 0x3), \
9315                                   0  + (((I) >> 4) & 0x3), \
9316                                   0  + (((I) >> 6) & 0x3), \
9317                                   4  + (((I) >> 0) & 0x3), \
9318                                   4  + (((I) >> 2) & 0x3), \
9319                                   4  + (((I) >> 4) & 0x3), \
9320                                   4  + (((I) >> 6) & 0x3), \
9321                                   8  + (((I) >> 0) & 0x3), \
9322                                   8  + (((I) >> 2) & 0x3), \
9323                                   8  + (((I) >> 4) & 0x3), \
9324                                   8  + (((I) >> 6) & 0x3), \
9325                                   12 + (((I) >> 0) & 0x3), \
9326                                   12 + (((I) >> 2) & 0x3), \
9327                                   12 + (((I) >> 4) & 0x3), \
9328                                   12 + (((I) >> 6) & 0x3)); })
9329
9330#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \
9331  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9332                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
9333                                      (__v16si)(__m512i)(W)); })
9334
9335#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \
9336  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
9337                                      (__v16si)_mm512_shuffle_epi32((A), (I)), \
9338                                      (__v16si)_mm512_setzero_si512()); })
9339
9340static __inline__ __m512d __DEFAULT_FN_ATTRS
9341_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
9342{
9343  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9344                (__v8df) __W,
9345                (__mmask8) __U);
9346}
9347
9348static __inline__ __m512d __DEFAULT_FN_ATTRS
9349_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
9350{
9351  return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
9352                (__v8df) _mm512_setzero_pd (),
9353                (__mmask8) __U);
9354}
9355
9356static __inline__ __m512i __DEFAULT_FN_ATTRS
9357_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
9358{
9359  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9360                (__v8di) __W,
9361                (__mmask8) __U);
9362}
9363
9364static __inline__ __m512i __DEFAULT_FN_ATTRS
9365_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
9366{
9367  return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
9368                (__v8di) _mm512_setzero_pd (),
9369                (__mmask8) __U);
9370}
9371
9372static __inline__ __m512d __DEFAULT_FN_ATTRS
9373_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
9374{
9375  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9376              (__v8df) __W,
9377              (__mmask8) __U);
9378}
9379
9380static __inline__ __m512d __DEFAULT_FN_ATTRS
9381_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
9382{
9383  return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
9384              (__v8df) _mm512_setzero_pd(),
9385              (__mmask8) __U);
9386}
9387
9388static __inline__ __m512i __DEFAULT_FN_ATTRS
9389_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
9390{
9391  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9392              (__v8di) __W,
9393              (__mmask8) __U);
9394}
9395
9396static __inline__ __m512i __DEFAULT_FN_ATTRS
9397_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
9398{
9399  return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
9400              (__v8di) _mm512_setzero_pd(),
9401              (__mmask8) __U);
9402}
9403
9404static __inline__ __m512 __DEFAULT_FN_ATTRS
9405_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
9406{
9407  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9408                   (__v16sf) __W,
9409                   (__mmask16) __U);
9410}
9411
9412static __inline__ __m512 __DEFAULT_FN_ATTRS
9413_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
9414{
9415  return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
9416                   (__v16sf) _mm512_setzero_ps(),
9417                   (__mmask16) __U);
9418}
9419
9420static __inline__ __m512i __DEFAULT_FN_ATTRS
9421_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
9422{
9423  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9424              (__v16si) __W,
9425              (__mmask16) __U);
9426}
9427
9428static __inline__ __m512i __DEFAULT_FN_ATTRS
9429_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
9430{
9431  return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
9432              (__v16si) _mm512_setzero_ps(),
9433              (__mmask16) __U);
9434}
9435
9436static __inline__ __m512 __DEFAULT_FN_ATTRS
9437_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
9438{
9439  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9440               (__v16sf) __W,
9441               (__mmask16) __U);
9442}
9443
9444static __inline__ __m512 __DEFAULT_FN_ATTRS
9445_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
9446{
9447  return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
9448               (__v16sf) _mm512_setzero_ps(),
9449               (__mmask16) __U);
9450}
9451
9452static __inline__ __m512i __DEFAULT_FN_ATTRS
9453_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
9454{
9455  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9456                (__v16si) __W,
9457                (__mmask16) __U);
9458}
9459
9460static __inline__ __m512i __DEFAULT_FN_ATTRS
9461_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
9462{
9463  return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
9464                (__v16si) _mm512_setzero_ps(),
9465                (__mmask16) __U);
9466}
9467
9468#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \
9469  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9470                                           (__v8df)_mm512_undefined_pd(), \
9471                                           (__mmask8)-1, (int)(R)); })
9472
9473#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \
9474  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9475                                           (__v8df)(__m512d)(W), \
9476                                           (__mmask8)(U), (int)(R)); })
9477
9478#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \
9479  (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
9480                                           (__v8df)_mm512_setzero_pd(), \
9481                                           (__mmask8)(U), (int)(R)); })
9482
9483static __inline__ __m512d __DEFAULT_FN_ATTRS
9484_mm512_cvtps_pd (__m256 __A)
9485{
9486  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9487                (__v8df)
9488                _mm512_undefined_pd (),
9489                (__mmask8) -1,
9490                _MM_FROUND_CUR_DIRECTION);
9491}
9492
9493static __inline__ __m512d __DEFAULT_FN_ATTRS
9494_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
9495{
9496  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9497                (__v8df) __W,
9498                (__mmask8) __U,
9499                _MM_FROUND_CUR_DIRECTION);
9500}
9501
9502static __inline__ __m512d __DEFAULT_FN_ATTRS
9503_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
9504{
9505  return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
9506                (__v8df)
9507                _mm512_setzero_pd (),
9508                (__mmask8) __U,
9509                _MM_FROUND_CUR_DIRECTION);
9510}
9511
9512static __inline__ __m512 __DEFAULT_FN_ATTRS
9513_mm512_cvtpslo_pd (__m512 __A)
9514{
9515  return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
9516}
9517
9518static __inline__ __m512 __DEFAULT_FN_ATTRS
9519_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
9520{
9521  return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
9522}
9523
9524static __inline__ __m512d __DEFAULT_FN_ATTRS
9525_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
9526{
9527  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9528              (__v8df) __A,
9529              (__v8df) __W);
9530}
9531
9532static __inline__ __m512d __DEFAULT_FN_ATTRS
9533_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
9534{
9535  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
9536              (__v8df) __A,
9537              (__v8df) _mm512_setzero_pd ());
9538}
9539
9540static __inline__ __m512 __DEFAULT_FN_ATTRS
9541_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
9542{
9543  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9544             (__v16sf) __A,
9545             (__v16sf) __W);
9546}
9547
9548static __inline__ __m512 __DEFAULT_FN_ATTRS
9549_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
9550{
9551  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9552             (__v16sf) __A,
9553             (__v16sf) _mm512_setzero_ps ());
9554}
9555
9556static __inline__ void __DEFAULT_FN_ATTRS
9557_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9558{
9559  __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9560            (__mmask8) __U);
9561}
9562
9563static __inline__ void __DEFAULT_FN_ATTRS
9564_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9565{
9566  __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9567            (__mmask8) __U);
9568}
9569
9570static __inline__ void __DEFAULT_FN_ATTRS
9571_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9572{
9573  __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9574            (__mmask16) __U);
9575}
9576
9577static __inline__ void __DEFAULT_FN_ATTRS
9578_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9579{
9580  __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9581            (__mmask16) __U);
9582}
9583
9584#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \
9585  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9586                                             (__v2df)(__m128d)(B), \
9587                                             (__v4sf)_mm_undefined_ps(), \
9588                                             (__mmask8)-1, (int)(R)); })
9589
9590#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \
9591  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9592                                             (__v2df)(__m128d)(B), \
9593                                             (__v4sf)(__m128)(W), \
9594                                             (__mmask8)(U), (int)(R)); })
9595
9596#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \
9597  (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9598                                             (__v2df)(__m128d)(B), \
9599                                             (__v4sf)_mm_setzero_ps(), \
9600                                             (__mmask8)(U), (int)(R)); })
9601
9602static __inline__ __m128 __DEFAULT_FN_ATTRS
9603_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9604{
9605  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9606                                             (__v2df)(__B),
9607                                             (__v4sf)(__W),
9608                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9609}
9610
9611static __inline__ __m128 __DEFAULT_FN_ATTRS
9612_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9613{
9614  return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A),
9615                                             (__v2df)(__B),
9616                                             (__v4sf)_mm_setzero_ps(),
9617                                             (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9618}
9619
9620#define _mm_cvtss_i32 _mm_cvtss_si32
9621#define _mm_cvtsd_i32 _mm_cvtsd_si32
9622#define _mm_cvti32_sd _mm_cvtsi32_sd
9623#define _mm_cvti32_ss _mm_cvtsi32_ss
9624#ifdef __x86_64__
9625#define _mm_cvtss_i64 _mm_cvtss_si64
9626#define _mm_cvtsd_i64 _mm_cvtsd_si64
9627#define _mm_cvti64_sd _mm_cvtsi64_sd
9628#define _mm_cvti64_ss _mm_cvtsi64_ss
9629#endif
9630
9631#ifdef __x86_64__
9632#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \
9633  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9634                                     (int)(R)); })
9635
9636#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \
9637  (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9638                                     (int)(R)); })
9639#endif
9640
9641#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \
9642  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9643
9644#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \
9645  (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); })
9646
9647#ifdef __x86_64__
9648#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \
9649  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9650                                    (int)(R)); })
9651
9652#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \
9653  (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9654                                    (int)(R)); })
9655#endif
9656
9657#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \
9658  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9659                                              (__v4sf)(__m128)(B), \
9660                                              (__v2df)_mm_undefined_pd(), \
9661                                              (__mmask8)-1, (int)(R)); })
9662
9663#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \
9664  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9665                                              (__v4sf)(__m128)(B), \
9666                                              (__v2df)(__m128d)(W), \
9667                                              (__mmask8)(U), (int)(R)); })
9668
9669#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \
9670  (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9671                                              (__v4sf)(__m128)(B), \
9672                                              (__v2df)_mm_setzero_pd(), \
9673                                              (__mmask8)(U), (int)(R)); })
9674
9675static __inline__ __m128d __DEFAULT_FN_ATTRS
9676_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9677{
9678  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9679                                              (__v4sf)(__B),
9680                                              (__v2df)(__W),
9681                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9682}
9683
9684static __inline__ __m128d __DEFAULT_FN_ATTRS
9685_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9686{
9687  return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A),
9688                                              (__v4sf)(__B),
9689                                              (__v2df)_mm_setzero_pd(),
9690                                              (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
9691}
9692
9693static __inline__ __m128d __DEFAULT_FN_ATTRS
9694_mm_cvtu32_sd (__m128d __A, unsigned __B)
9695{
9696  return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
9697}
9698
9699#ifdef __x86_64__
9700#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \
9701  (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9702                                      (unsigned long long)(B), (int)(R)); })
9703
9704static __inline__ __m128d __DEFAULT_FN_ATTRS
9705_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9706{
9707  return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
9708                 _MM_FROUND_CUR_DIRECTION);
9709}
9710#endif
9711
9712#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \
9713  (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9714                                     (int)(R)); })
9715
9716static __inline__ __m128 __DEFAULT_FN_ATTRS
9717_mm_cvtu32_ss (__m128 __A, unsigned __B)
9718{
9719  return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
9720                _MM_FROUND_CUR_DIRECTION);
9721}
9722
9723#ifdef __x86_64__
9724#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \
9725  (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9726                                     (unsigned long long)(B), (int)(R)); })
9727
9728static __inline__ __m128 __DEFAULT_FN_ATTRS
9729_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9730{
9731  return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
9732                _MM_FROUND_CUR_DIRECTION);
9733}
9734#endif
9735
9736static __inline__ __m512i __DEFAULT_FN_ATTRS
9737_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9738{
9739  return (__m512i) __builtin_ia32_selectd_512(__M,
9740                                              (__v16si) _mm512_set1_epi32(__A),
9741                                              (__v16si) __O);
9742}
9743
9744#ifdef __x86_64__
9745static __inline__ __m512i __DEFAULT_FN_ATTRS
9746_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9747{
9748  return (__m512i) __builtin_ia32_selectq_512(__M,
9749                                              (__v8di) _mm512_set1_epi64(__A),
9750                                              (__v8di) __O);
9751}
9752#endif
9753
9754static  __inline __m512i __DEFAULT_FN_ATTRS
9755_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9756    char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9757    char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9758    char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9759    char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9760    char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9761    char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9762    char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9763    char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9764    char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9765    char __e4, char __e3, char __e2, char __e1, char __e0) {
9766
9767  return __extension__ (__m512i)(__v64qi)
9768    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9769     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9770     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9771     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9772     __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9773     __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9774     __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9775     __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9776}
9777
9778static  __inline __m512i __DEFAULT_FN_ATTRS
9779_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9780    short __e27, short __e26, short __e25, short __e24, short __e23,
9781    short __e22, short __e21, short __e20, short __e19, short __e18,
9782    short __e17, short __e16, short __e15, short __e14, short __e13,
9783    short __e12, short __e11, short __e10, short __e9, short __e8,
9784    short __e7, short __e6, short __e5, short __e4, short __e3,
9785    short __e2, short __e1, short __e0) {
9786  return __extension__ (__m512i)(__v32hi)
9787    {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9788     __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9789     __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9790     __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9791}
9792
9793static __inline __m512i __DEFAULT_FN_ATTRS
9794_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9795     int __E, int __F, int __G, int __H,
9796     int __I, int __J, int __K, int __L,
9797     int __M, int __N, int __O, int __P)
9798{
9799  return __extension__ (__m512i)(__v16si)
9800  { __P, __O, __N, __M, __L, __K, __J, __I,
9801    __H, __G, __F, __E, __D, __C, __B, __A };
9802}
9803
9804#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9805       e8,e9,e10,e11,e12,e13,e14,e15)          \
9806  _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9807                   (e5),(e4),(e3),(e2),(e1),(e0))
9808
9809static __inline__ __m512i __DEFAULT_FN_ATTRS
9810_mm512_set_epi64 (long long __A, long long __B, long long __C,
9811     long long __D, long long __E, long long __F,
9812     long long __G, long long __H)
9813{
9814  return __extension__ (__m512i) (__v8di)
9815  { __H, __G, __F, __E, __D, __C, __B, __A };
9816}
9817
9818#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9819  _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9820
9821static __inline__ __m512d __DEFAULT_FN_ATTRS
9822_mm512_set_pd (double __A, double __B, double __C, double __D,
9823        double __E, double __F, double __G, double __H)
9824{
9825  return __extension__ (__m512d)
9826  { __H, __G, __F, __E, __D, __C, __B, __A };
9827}
9828
9829#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9830  _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9831
9832static __inline__ __m512 __DEFAULT_FN_ATTRS
9833_mm512_set_ps (float __A, float __B, float __C, float __D,
9834        float __E, float __F, float __G, float __H,
9835        float __I, float __J, float __K, float __L,
9836        float __M, float __N, float __O, float __P)
9837{
9838  return __extension__ (__m512)
9839  { __P, __O, __N, __M, __L, __K, __J, __I,
9840    __H, __G, __F, __E, __D, __C, __B, __A };
9841}
9842
9843#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9844  _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9845                (e4),(e3),(e2),(e1),(e0))
9846
9847static __inline__ __m512 __DEFAULT_FN_ATTRS
9848_mm512_abs_ps(__m512 __A)
9849{
9850  return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9851}
9852
9853static __inline__ __m512 __DEFAULT_FN_ATTRS
9854_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9855{
9856  return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9857}
9858
9859static __inline__ __m512d __DEFAULT_FN_ATTRS
9860_mm512_abs_pd(__m512d __A)
9861{
9862  return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9863}
9864
9865static __inline__ __m512d __DEFAULT_FN_ATTRS
9866_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9867{
9868  return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9869}
9870
9871// Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9872// outputs. This class of vector operation forms the basis of many scientific
9873// computations. In vector-reduction arithmetic, the evaluation off is
9874// independent of the order of the input elements of V.
9875
9876// Used bisection method. At each step, we partition the vector with previous
9877// step in half, and the operation is performed on its two halves.
9878// This takes log2(n) steps where n is the number of elements in the vector.
9879
9880// Vec512 - Vector with size 512.
9881// Operator - Can be one of following: +,*,&,|
9882// T2  - Can get 'i' for int and 'f' for float.
9883// T1 - Can get 'i' for int and 'd' for double.
9884
9885#define _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1)         \
9886  __extension__({                                                      \
9887    __m256##T1 Vec256 = __builtin_shufflevector(                       \
9888                            (__v8d##T2)Vec512,                         \
9889                            (__v8d##T2)Vec512,                         \
9890                            0, 1, 2, 3)                                \
9891                        Operator                                       \
9892                        __builtin_shufflevector(                       \
9893                            (__v8d##T2)Vec512,                         \
9894                            (__v8d##T2)Vec512,                         \
9895                            4, 5, 6, 7);                               \
9896    __m128##T1 Vec128 = __builtin_shufflevector(                       \
9897                            (__v4d##T2)Vec256,                         \
9898                            (__v4d##T2)Vec256,                         \
9899                            0, 1)                                      \
9900                        Operator                                       \
9901                        __builtin_shufflevector(                       \
9902                            (__v4d##T2)Vec256,                         \
9903                            (__v4d##T2)Vec256,                         \
9904                            2, 3);                                     \
9905    Vec128 = __builtin_shufflevector((__v2d##T2)Vec128,                \
9906                                     (__v2d##T2)Vec128, 0, -1)         \
9907             Operator                                                  \
9908             __builtin_shufflevector((__v2d##T2)Vec128,                \
9909                                     (__v2d##T2)Vec128, 1, -1);        \
9910    return Vec128[0];                                                  \
9911  })
9912
9913static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
9914  _mm512_reduce_operator_64bit(__W, +, i, i);
9915}
9916
9917static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
9918  _mm512_reduce_operator_64bit(__W, *, i, i);
9919}
9920
9921static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
9922  _mm512_reduce_operator_64bit(__W, &, i, i);
9923}
9924
9925static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
9926  _mm512_reduce_operator_64bit(__W, |, i, i);
9927}
9928
9929static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
9930  _mm512_reduce_operator_64bit(__W, +, f, d);
9931}
9932
9933static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
9934  _mm512_reduce_operator_64bit(__W, *, f, d);
9935}
9936
9937// Vec512 - Vector with size 512.
9938// Vec512Neutral - All vector elements set to the identity element.
9939// Identity element: {+,0},{*,1},{&,0xFFFFFFFFFFFFFFFF},{|,0}
9940// Operator - Can be one of following: +,*,&,|
9941// Mask - Intrinsic Mask
9942// T2  - Can get 'i' for int and 'f' for float.
9943// T1 - Can get 'i' for int and 'd' for packed double-precision.
9944// T3 - Can be Pd for packed double or q for q-word.
9945
9946#define _mm512_mask_reduce_operator_64bit(Vec512, Vec512Neutral, Operator,     \
9947                                          Mask, T2, T1, T3)                    \
9948  __extension__({                                                              \
9949    Vec512 = __builtin_ia32_select##T3##_512(                                  \
9950                 (__mmask8)Mask,                                               \
9951                 (__v8d##T2)Vec512,                                            \
9952                 (__v8d##T2)Vec512Neutral);                                    \
9953    _mm512_reduce_operator_64bit(Vec512, Operator, T2, T1);                    \
9954  })
9955
9956static __inline__ long long __DEFAULT_FN_ATTRS
9957_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9958  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), +, __M, i, i, q);
9959}
9960
9961static __inline__ long long __DEFAULT_FN_ATTRS
9962_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9963  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(1), *, __M, i, i, q);
9964}
9965
9966static __inline__ long long __DEFAULT_FN_ATTRS
9967_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9968  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
9969                                    &, __M,  i, i, q);
9970}
9971
9972static __inline__ long long __DEFAULT_FN_ATTRS
9973_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9974  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_epi64(0), |, __M,
9975                                    i, i, q);
9976}
9977
9978static __inline__ double __DEFAULT_FN_ATTRS
9979_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9980  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(0), +, __M,
9981                                    f, d, pd);
9982}
9983
9984static __inline__ double __DEFAULT_FN_ATTRS
9985_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9986  _mm512_mask_reduce_operator_64bit(__W, _mm512_set1_pd(1), *, __M,
9987                                    f, d, pd);
9988}
9989
9990// Vec512 - Vector with size 512.
9991// Operator - Can be one of following: +,*,&,|
9992// T2 - Can get 'i' for int and ' ' for packed single.
9993// T1 - Can get 'i' for int and 'f' for float.
9994
9995#define _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1) __extension__({ \
9996    __m256##T1 Vec256 =                                                        \
9997            (__m256##T1)(__builtin_shufflevector(                              \
9998                                    (__v16s##T2)Vec512,                        \
9999                                    (__v16s##T2)Vec512,                        \
10000                                    0, 1, 2, 3, 4, 5, 6, 7)                    \
10001                                Operator                                       \
10002                         __builtin_shufflevector(                              \
10003                                    (__v16s##T2)Vec512,                        \
10004                                    (__v16s##T2)Vec512,                        \
10005                                    8, 9, 10, 11, 12, 13, 14, 15));            \
10006    __m128##T1 Vec128 =                                                        \
10007             (__m128##T1)(__builtin_shufflevector(                             \
10008                                    (__v8s##T2)Vec256,                         \
10009                                    (__v8s##T2)Vec256,                         \
10010                                    0, 1, 2, 3)                                \
10011                                Operator                                       \
10012                          __builtin_shufflevector(                             \
10013                                    (__v8s##T2)Vec256,                         \
10014                                    (__v8s##T2)Vec256,                         \
10015                                    4, 5, 6, 7));                              \
10016    Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
10017                                    (__v4s##T2)Vec128,                         \
10018                                    (__v4s##T2)Vec128,                         \
10019                                    0, 1, -1, -1)                              \
10020                                Operator                                       \
10021                          __builtin_shufflevector(                             \
10022                                    (__v4s##T2)Vec128,                         \
10023                                    (__v4s##T2)Vec128,                         \
10024                                    2, 3, -1, -1));                            \
10025    Vec128 = (__m128##T1)(__builtin_shufflevector(                             \
10026                                    (__v4s##T2)Vec128,                         \
10027                                    (__v4s##T2)Vec128,                         \
10028                                    0, -1, -1, -1)                             \
10029                                Operator                                       \
10030                          __builtin_shufflevector(                             \
10031                                    (__v4s##T2)Vec128,                         \
10032                                    (__v4s##T2)Vec128,                         \
10033                                    1, -1, -1, -1));                           \
10034    return Vec128[0];                                                          \
10035  })
10036
10037static __inline__ int __DEFAULT_FN_ATTRS
10038_mm512_reduce_add_epi32(__m512i __W) {
10039  _mm512_reduce_operator_32bit(__W, +, i, i);
10040}
10041
10042static __inline__ int __DEFAULT_FN_ATTRS
10043_mm512_reduce_mul_epi32(__m512i __W) {
10044  _mm512_reduce_operator_32bit(__W, *, i, i);
10045}
10046
10047static __inline__ int __DEFAULT_FN_ATTRS
10048_mm512_reduce_and_epi32(__m512i __W) {
10049  _mm512_reduce_operator_32bit(__W, &, i, i);
10050}
10051
10052static __inline__ int __DEFAULT_FN_ATTRS
10053_mm512_reduce_or_epi32(__m512i __W) {
10054  _mm512_reduce_operator_32bit(__W, |, i, i);
10055}
10056
10057static __inline__ float __DEFAULT_FN_ATTRS
10058_mm512_reduce_add_ps(__m512 __W) {
10059  _mm512_reduce_operator_32bit(__W, +, f, );
10060}
10061
10062static __inline__ float __DEFAULT_FN_ATTRS
10063_mm512_reduce_mul_ps(__m512 __W) {
10064  _mm512_reduce_operator_32bit(__W, *, f, );
10065}
10066
10067// Vec512 - Vector with size 512.
10068// Vec512Neutral - All vector elements set to the identity element.
10069// Identity element: {+,0},{*,1},{&,0xFFFFFFFF},{|,0}
10070// Operator - Can be one of following: +,*,&,|
10071// Mask - Intrinsic Mask
10072// T2  - Can get 'i' for int and 'f' for float.
10073// T1 - Can get 'i' for int and 'd' for double.
10074// T3 - Can be Ps for packed single or d for d-word.
10075
10076#define _mm512_mask_reduce_operator_32bit(Vec512, Vec512Neutral, Operator,     \
10077                                          Mask, T2, T1, T3)                    \
10078  __extension__({                                                              \
10079    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10080                             (__mmask16)Mask,                                  \
10081                             (__v16s##T2)Vec512,                               \
10082                             (__v16s##T2)Vec512Neutral);                       \
10083    _mm512_reduce_operator_32bit(Vec512, Operator, T2, T1);                    \
10084  })
10085
10086static __inline__ int __DEFAULT_FN_ATTRS
10087_mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
10088  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), +, __M, i, i, d);
10089}
10090
10091static __inline__ int __DEFAULT_FN_ATTRS
10092_mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
10093  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(1), *, __M, i, i, d);
10094}
10095
10096static __inline__ int __DEFAULT_FN_ATTRS
10097_mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
10098  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0xFFFFFFFF), &, __M,
10099                                    i, i, d);
10100}
10101
10102static __inline__ int __DEFAULT_FN_ATTRS
10103_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
10104  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_epi32(0), |, __M, i, i, d);
10105}
10106
10107static __inline__ float __DEFAULT_FN_ATTRS
10108_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
10109  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(0), +, __M, f, , ps);
10110}
10111
10112static __inline__ float __DEFAULT_FN_ATTRS
10113_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
10114  _mm512_mask_reduce_operator_32bit(__W, _mm512_set1_ps(1), *, __M, f, , ps);
10115}
10116
10117// Used bisection method. At each step, we partition the vector with previous
10118// step in half, and the operation is performed on its two halves.
10119// This takes log2(n) steps where n is the number of elements in the vector.
10120// This macro uses only intrinsics from the AVX512F feature.
10121
10122// Vec512 - Vector with size of 512.
10123// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10124//              __mm512_max_epi64
10125// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10126// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10127
10128#define _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2) __extension__({ \
10129        Vec512 = _mm512_##IntrinName(                                          \
10130                                (__m512##T1)__builtin_shufflevector(           \
10131                                                (__v8d##T2)Vec512,             \
10132                                                (__v8d##T2)Vec512,             \
10133                                                 0, 1, 2, 3, -1, -1, -1, -1),  \
10134                                (__m512##T1)__builtin_shufflevector(           \
10135                                                (__v8d##T2)Vec512,             \
10136                                                (__v8d##T2)Vec512,             \
10137                                                 4, 5, 6, 7, -1, -1, -1, -1)); \
10138        Vec512 = _mm512_##IntrinName(                                          \
10139                                (__m512##T1)__builtin_shufflevector(           \
10140                                                (__v8d##T2)Vec512,             \
10141                                                (__v8d##T2)Vec512,             \
10142                                                 0, 1, -1, -1, -1, -1, -1, -1),\
10143                                (__m512##T1)__builtin_shufflevector(           \
10144                                                (__v8d##T2)Vec512,             \
10145                                                (__v8d##T2)Vec512,             \
10146                                                 2, 3, -1, -1, -1, -1, -1,     \
10147                                                 -1));                         \
10148        Vec512 = _mm512_##IntrinName(                                          \
10149                                (__m512##T1)__builtin_shufflevector(           \
10150                                                (__v8d##T2)Vec512,             \
10151                                                (__v8d##T2)Vec512,             \
10152                                                0, -1, -1, -1, -1, -1, -1, -1),\
10153                                (__m512##T1)__builtin_shufflevector(           \
10154                                                (__v8d##T2)Vec512,             \
10155                                                (__v8d##T2)Vec512,             \
10156                                                1, -1, -1, -1, -1, -1, -1, -1))\
10157                                                ;                              \
10158    return Vec512[0];                                                          \
10159  })
10160
10161static __inline__ long long __DEFAULT_FN_ATTRS
10162_mm512_reduce_max_epi64(__m512i __V) {
10163  _mm512_reduce_maxMin_64bit(__V, max_epi64, i, i);
10164}
10165
10166static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10167_mm512_reduce_max_epu64(__m512i __V) {
10168  _mm512_reduce_maxMin_64bit(__V, max_epu64, i, i);
10169}
10170
10171static __inline__ double __DEFAULT_FN_ATTRS
10172_mm512_reduce_max_pd(__m512d __V) {
10173  _mm512_reduce_maxMin_64bit(__V, max_pd, d, f);
10174}
10175
10176static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_min_epi64
10177(__m512i __V) {
10178  _mm512_reduce_maxMin_64bit(__V, min_epi64, i, i);
10179}
10180
10181static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10182_mm512_reduce_min_epu64(__m512i __V) {
10183  _mm512_reduce_maxMin_64bit(__V, min_epu64, i, i);
10184}
10185
10186static __inline__ double __DEFAULT_FN_ATTRS
10187_mm512_reduce_min_pd(__m512d __V) {
10188  _mm512_reduce_maxMin_64bit(__V, min_pd, d, f);
10189}
10190
10191// Vec512 - Vector with size 512.
10192// Vec512Neutral - A 512 length vector with elements set to the identity element
10193// Identity element: {max_epi,0x8000000000000000}
10194//                   {max_epu,0x0000000000000000}
10195//                   {max_pd, 0xFFF0000000000000}
10196//                   {min_epi,0x7FFFFFFFFFFFFFFF}
10197//                   {min_epu,0xFFFFFFFFFFFFFFFF}
10198//                   {min_pd, 0x7FF0000000000000}
10199//
10200// IntrinName - Can be one of following: {max|min}_{epi64|epu64|pd} for example:
10201//              __mm512_max_epi64
10202// T1 - Can get 'i' for int and 'd' for double.[__m512{i|d}]
10203// T2 - Can get 'i' for int and 'f' for float. [__v8d{i|f}]
10204// T3 - Can get 'q' q word and 'pd' for packed double.
10205//      [__builtin_ia32_select{q|pd}_512]
10206// Mask - Intrinsic Mask
10207
10208#define _mm512_mask_reduce_maxMin_64bit(Vec512, Vec512Neutral, IntrinName, T1, \
10209                                        T2, T3, Mask)                          \
10210  __extension__({                                                              \
10211    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10212                             (__mmask8)Mask,                                   \
10213                             (__v8d##T2)Vec512,                                \
10214                             (__v8d##T2)Vec512Neutral);                        \
10215    _mm512_reduce_maxMin_64bit(Vec512, IntrinName, T1, T2);                    \
10216  })
10217
10218static __inline__ long long __DEFAULT_FN_ATTRS
10219_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
10220  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x8000000000000000),
10221                                  max_epi64, i, i, q, __M);
10222}
10223
10224static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10225_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
10226  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x0000000000000000),
10227                                  max_epu64, i, i, q, __M);
10228}
10229
10230static __inline__ double __DEFAULT_FN_ATTRS
10231_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
10232  _mm512_mask_reduce_maxMin_64bit(__V, -_mm512_set1_pd(__builtin_inf()),
10233                                  max_pd, d, f, pd, __M);
10234}
10235
10236static __inline__ long long __DEFAULT_FN_ATTRS
10237_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
10238  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),
10239                                  min_epi64, i, i, q, __M);
10240}
10241
10242static __inline__ unsigned long long __DEFAULT_FN_ATTRS
10243_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
10244  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_epi64(0xFFFFFFFFFFFFFFFF),
10245                                  min_epu64, i, i, q, __M);
10246}
10247
10248static __inline__ double __DEFAULT_FN_ATTRS
10249_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
10250  _mm512_mask_reduce_maxMin_64bit(__V, _mm512_set1_pd(__builtin_inf()),
10251                                  min_pd, d, f, pd, __M);
10252}
10253
10254// Vec512 - Vector with size 512.
10255// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10256//              __mm512_max_epi32
10257// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10258// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10259
10260#define _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2) __extension__({ \
10261    Vec512 = _mm512_##IntrinName(                                              \
10262                  (__m512##T1)__builtin_shufflevector(                         \
10263                                  (__v16s##T2)Vec512,                          \
10264                                  (__v16s##T2)Vec512,                          \
10265                                  0, 1, 2, 3, 4, 5, 6, 7,                      \
10266                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10267                  (__m512##T1)__builtin_shufflevector(                         \
10268                                  (__v16s##T2)Vec512,                          \
10269                                  (__v16s##T2)Vec512,                          \
10270                                  8, 9, 10, 11, 12, 13, 14, 15,                \
10271                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10272    Vec512 = _mm512_##IntrinName(                                              \
10273                  (__m512##T1)__builtin_shufflevector(                         \
10274                                  (__v16s##T2)Vec512,                          \
10275                                  (__v16s##T2)Vec512,                          \
10276                                  0, 1, 2, 3, -1, -1, -1, -1,                  \
10277                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10278                  (__m512##T1)__builtin_shufflevector(                         \
10279                                  (__v16s##T2)Vec512,                          \
10280                                  (__v16s##T2)Vec512,                          \
10281                                  4, 5, 6, 7, -1, -1, -1, -1,                  \
10282                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10283    Vec512 = _mm512_##IntrinName(                                              \
10284                  (__m512##T1)__builtin_shufflevector(                         \
10285                                  (__v16s##T2)Vec512,                          \
10286                                  (__v16s##T2)Vec512,                          \
10287                                  0, 1, -1, -1, -1, -1, -1, -1,                \
10288                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10289                  (__m512##T1)__builtin_shufflevector(                         \
10290                                  (__v16s##T2)Vec512,                          \
10291                                  (__v16s##T2)Vec512,                          \
10292                                  2, 3, -1, -1, -1, -1, -1, -1,                \
10293                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10294    Vec512 = _mm512_##IntrinName(                                              \
10295                  (__m512##T1)__builtin_shufflevector(                         \
10296                                  (__v16s##T2)Vec512,                          \
10297                                  (__v16s##T2)Vec512,                          \
10298                                  0,  -1, -1, -1, -1, -1, -1, -1,              \
10299                                  -1, -1, -1, -1, -1, -1, -1, -1),             \
10300                  (__m512##T1)__builtin_shufflevector(                         \
10301                                  (__v16s##T2)Vec512,                          \
10302                                  (__v16s##T2)Vec512,                          \
10303                                  1, -1, -1, -1, -1, -1, -1, -1,               \
10304                                  -1, -1, -1, -1, -1, -1, -1, -1));            \
10305    return Vec512[0];                                                          \
10306  })
10307
10308static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_max_epi32(__m512i a) {
10309  _mm512_reduce_maxMin_32bit(a, max_epi32, i, i);
10310}
10311
10312static __inline__ unsigned int __DEFAULT_FN_ATTRS
10313_mm512_reduce_max_epu32(__m512i a) {
10314  _mm512_reduce_maxMin_32bit(a, max_epu32, i, i);
10315}
10316
10317static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_max_ps(__m512 a) {
10318  _mm512_reduce_maxMin_32bit(a, max_ps, , f);
10319}
10320
10321static __inline__ int __DEFAULT_FN_ATTRS _mm512_reduce_min_epi32(__m512i a) {
10322  _mm512_reduce_maxMin_32bit(a, min_epi32, i, i);
10323}
10324
10325static __inline__ unsigned int __DEFAULT_FN_ATTRS
10326_mm512_reduce_min_epu32(__m512i a) {
10327  _mm512_reduce_maxMin_32bit(a, min_epu32, i, i);
10328}
10329
10330static __inline__ float __DEFAULT_FN_ATTRS _mm512_reduce_min_ps(__m512 a) {
10331  _mm512_reduce_maxMin_32bit(a, min_ps, , f);
10332}
10333
10334// Vec512 - Vector with size 512.
10335// Vec512Neutral - A 512 length vector with elements set to the identity element
10336// Identity element: {max_epi,0x80000000}
10337//                   {max_epu,0x00000000}
10338//                   {max_ps, 0xFF800000}
10339//                   {min_epi,0x7FFFFFFF}
10340//                   {min_epu,0xFFFFFFFF}
10341//                   {min_ps, 0x7F800000}
10342//
10343// IntrinName - Can be one of following: {max|min}_{epi32|epu32|ps} for example:
10344//              __mm512_max_epi32
10345// T1 - Can get 'i' for int and ' ' .[__m512{i|}]
10346// T2 - Can get 'i' for int and 'f' for float.[__v16s{i|f}]
10347// T3 - Can get 'q' q word and 'pd' for packed double.
10348//      [__builtin_ia32_select{q|pd}_512]
10349// Mask - Intrinsic Mask
10350
10351#define _mm512_mask_reduce_maxMin_32bit(Vec512, Vec512Neutral, IntrinName, T1, \
10352                                        T2, T3, Mask)                          \
10353  __extension__({                                                              \
10354    Vec512 = (__m512##T1)__builtin_ia32_select##T3##_512(                      \
10355                                        (__mmask16)Mask,                       \
10356                                        (__v16s##T2)Vec512,                    \
10357                                        (__v16s##T2)Vec512Neutral);            \
10358   _mm512_reduce_maxMin_32bit(Vec512, IntrinName, T1, T2);                     \
10359   })
10360
10361static __inline__ int __DEFAULT_FN_ATTRS
10362_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
10363  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x80000000), max_epi32,
10364                                  i, i, d, __M);
10365}
10366
10367static __inline__ unsigned int __DEFAULT_FN_ATTRS
10368_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
10369  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x00000000), max_epu32,
10370                                  i, i, d, __M);
10371}
10372
10373static __inline__ float __DEFAULT_FN_ATTRS
10374_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
10375  _mm512_mask_reduce_maxMin_32bit(__V,-_mm512_set1_ps(__builtin_inff()), max_ps, , f,
10376                                  ps, __M);
10377}
10378
10379static __inline__ int __DEFAULT_FN_ATTRS
10380_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
10381  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0x7FFFFFFF), min_epi32,
10382                                  i, i, d, __M);
10383}
10384
10385static __inline__ unsigned int __DEFAULT_FN_ATTRS
10386_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
10387  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_epi32(0xFFFFFFFF), min_epu32,
10388                                  i, i, d, __M);
10389}
10390
10391static __inline__ float __DEFAULT_FN_ATTRS
10392_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
10393  _mm512_mask_reduce_maxMin_32bit(__V, _mm512_set1_ps(__builtin_inff()), min_ps, , f,
10394                                  ps, __M);
10395}
10396
10397#undef __DEFAULT_FN_ATTRS
10398
10399#endif // __AVX512FINTRIN_H
10400