1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23#ifndef __IMMINTRIN_H 24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 25#endif 26 27#ifndef __AVX512FINTRIN_H 28#define __AVX512FINTRIN_H 29 30typedef char __v64qi __attribute__((__vector_size__(64))); 31typedef short __v32hi __attribute__((__vector_size__(64))); 32typedef double __v8df __attribute__((__vector_size__(64))); 33typedef float __v16sf __attribute__((__vector_size__(64))); 34typedef long long __v8di __attribute__((__vector_size__(64))); 35typedef int __v16si __attribute__((__vector_size__(64))); 36 37typedef float __m512 __attribute__((__vector_size__(64))); 38typedef double __m512d __attribute__((__vector_size__(64))); 39typedef long long __m512i __attribute__((__vector_size__(64))); 40 41typedef unsigned char __mmask8; 42typedef unsigned short __mmask16; 43 44/* Rounding mode macros. */ 45#define _MM_FROUND_TO_NEAREST_INT 0x00 46#define _MM_FROUND_TO_NEG_INF 0x01 47#define _MM_FROUND_TO_POS_INF 0x02 48#define _MM_FROUND_TO_ZERO 0x03 49#define _MM_FROUND_CUR_DIRECTION 0x04 50 51typedef enum 52{ 53 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 54 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 55 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 56 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 57 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 58 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 59 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 60 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 61 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 62 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 63 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 64 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 65 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 66 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 67 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 68 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 69 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 70 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 71 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 72 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 73 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 74 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 75 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 76 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 77 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 78 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 79 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 80 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 81 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 82 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 83 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 84 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 85 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 86 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 87 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 88 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 89 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 90 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 91 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 92 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 93 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 94 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 95 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 96 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 97 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 98 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 99 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 100 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 101 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 102 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 103 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 104 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 105 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 106 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 107 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 108 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 109 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 110 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 111 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 112 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 113 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 114 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 115 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 116 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 117 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 118 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 119 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 120 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 121 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 122 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 123 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 124 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 125 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 126 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 127 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 128 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 129 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 130 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 131 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 132 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 133 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 134 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 135 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 136 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 137 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 138 _MM_PERM_DDDD = 0xFF 139} _MM_PERM_ENUM; 140 141typedef enum 142{ 143 _MM_MANT_NORM_1_2, /* interval [1, 2) */ 144 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 145 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 146 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 147} _MM_MANTISSA_NORM_ENUM; 148 149typedef enum 150{ 151 _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 152 _MM_MANT_SIGN_zero, /* sign = 0 */ 153 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 154} _MM_MANTISSA_SIGN_ENUM; 155 156/* Define the default attributes for the functions in this file. */ 157#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 158 159/* Create vectors with repeated elements */ 160 161static __inline __m512i __DEFAULT_FN_ATTRS 162_mm512_setzero_si512(void) 163{ 164 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 165} 166 167static __inline__ __m512d __DEFAULT_FN_ATTRS 168_mm512_undefined_pd() 169{ 170 return (__m512d)__builtin_ia32_undef512(); 171} 172 173static __inline__ __m512 __DEFAULT_FN_ATTRS 174_mm512_undefined() 175{ 176 return (__m512)__builtin_ia32_undef512(); 177} 178 179static __inline__ __m512 __DEFAULT_FN_ATTRS 180_mm512_undefined_ps() 181{ 182 return (__m512)__builtin_ia32_undef512(); 183} 184 185static __inline__ __m512i __DEFAULT_FN_ATTRS 186_mm512_undefined_epi32() 187{ 188 return (__m512i)__builtin_ia32_undef512(); 189} 190static __inline__ __m512i __DEFAULT_FN_ATTRS 191_mm512_broadcastd_epi32 (__m128i __A) 192{ 193 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 194 (__v16si) 195 _mm512_undefined_epi32 (), 196 (__mmask16) -1); 197} 198 199static __inline__ __m512i __DEFAULT_FN_ATTRS 200_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 201{ 202 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 203 (__v16si) __O, __M); 204} 205 206static __inline__ __m512i __DEFAULT_FN_ATTRS 207_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 208{ 209 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 210 (__v16si) 211 _mm512_setzero_si512 (), 212 __M); 213} 214 215static __inline__ __m512i __DEFAULT_FN_ATTRS 216_mm512_broadcastq_epi64 (__m128i __A) 217{ 218 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 219 (__v8di) 220 _mm512_undefined_pd (), 221 (__mmask8) -1); 222} 223 224static __inline__ __m512i __DEFAULT_FN_ATTRS 225_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 226{ 227 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 228 (__v8di) __O, __M); 229} 230 231static __inline__ __m512i __DEFAULT_FN_ATTRS 232_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 233{ 234 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 235 (__v8di) 236 _mm512_setzero_si512 (), 237 __M); 238} 239 240static __inline __m512i __DEFAULT_FN_ATTRS 241_mm512_maskz_set1_epi32(__mmask16 __M, int __A) 242{ 243 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 244 (__v16si) 245 _mm512_setzero_si512 (), 246 __M); 247} 248 249static __inline __m512i __DEFAULT_FN_ATTRS 250_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 251{ 252#ifdef __x86_64__ 253 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 254 (__v8di) 255 _mm512_setzero_si512 (), 256 __M); 257#else 258 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 259 (__v8di) 260 _mm512_setzero_si512 (), 261 __M); 262#endif 263} 264 265static __inline __m512 __DEFAULT_FN_ATTRS 266_mm512_setzero_ps(void) 267{ 268 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 269 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 270} 271static __inline __m512d __DEFAULT_FN_ATTRS 272_mm512_setzero_pd(void) 273{ 274 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 275} 276 277static __inline __m512 __DEFAULT_FN_ATTRS 278_mm512_set1_ps(float __w) 279{ 280 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 281 __w, __w, __w, __w, __w, __w, __w, __w }; 282} 283 284static __inline __m512d __DEFAULT_FN_ATTRS 285_mm512_set1_pd(double __w) 286{ 287 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 288} 289 290static __inline __m512i __DEFAULT_FN_ATTRS 291_mm512_set1_epi8(char __w) 292{ 293 return (__m512i)(__v64qi){ __w, __w, __w, __w, __w, __w, __w, __w, 294 __w, __w, __w, __w, __w, __w, __w, __w, 295 __w, __w, __w, __w, __w, __w, __w, __w, 296 __w, __w, __w, __w, __w, __w, __w, __w, 297 __w, __w, __w, __w, __w, __w, __w, __w, 298 __w, __w, __w, __w, __w, __w, __w, __w, 299 __w, __w, __w, __w, __w, __w, __w, __w, 300 __w, __w, __w, __w, __w, __w, __w, __w }; 301} 302 303static __inline __m512i __DEFAULT_FN_ATTRS 304_mm512_set1_epi16(short __w) 305{ 306 return (__m512i)(__v32hi){ __w, __w, __w, __w, __w, __w, __w, __w, 307 __w, __w, __w, __w, __w, __w, __w, __w, 308 __w, __w, __w, __w, __w, __w, __w, __w, 309 __w, __w, __w, __w, __w, __w, __w, __w }; 310} 311 312static __inline __m512i __DEFAULT_FN_ATTRS 313_mm512_set1_epi32(int __s) 314{ 315 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, 316 __s, __s, __s, __s, __s, __s, __s, __s }; 317} 318 319static __inline __m512i __DEFAULT_FN_ATTRS 320_mm512_set1_epi64(long long __d) 321{ 322 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 323} 324 325static __inline__ __m512 __DEFAULT_FN_ATTRS 326_mm512_broadcastss_ps(__m128 __X) 327{ 328 float __f = __X[0]; 329 return (__v16sf){ __f, __f, __f, __f, 330 __f, __f, __f, __f, 331 __f, __f, __f, __f, 332 __f, __f, __f, __f }; 333} 334 335static __inline__ __m512d __DEFAULT_FN_ATTRS 336_mm512_broadcastsd_pd(__m128d __X) 337{ 338 double __d = __X[0]; 339 return (__v8df){ __d, __d, __d, __d, 340 __d, __d, __d, __d }; 341} 342 343/* Cast between vector types */ 344 345static __inline __m512d __DEFAULT_FN_ATTRS 346_mm512_castpd256_pd512(__m256d __a) 347{ 348 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 349} 350 351static __inline __m512 __DEFAULT_FN_ATTRS 352_mm512_castps256_ps512(__m256 __a) 353{ 354 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 355 -1, -1, -1, -1, -1, -1, -1, -1); 356} 357 358static __inline __m128d __DEFAULT_FN_ATTRS 359_mm512_castpd512_pd128(__m512d __a) 360{ 361 return __builtin_shufflevector(__a, __a, 0, 1); 362} 363 364static __inline __m256d __DEFAULT_FN_ATTRS 365_mm512_castpd512_pd256 (__m512d __A) 366{ 367 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); 368} 369 370static __inline __m128 __DEFAULT_FN_ATTRS 371_mm512_castps512_ps128(__m512 __a) 372{ 373 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 374} 375 376static __inline __m256 __DEFAULT_FN_ATTRS 377_mm512_castps512_ps256 (__m512 __A) 378{ 379 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); 380} 381 382static __inline __m512 __DEFAULT_FN_ATTRS 383_mm512_castpd_ps (__m512d __A) 384{ 385 return (__m512) (__A); 386} 387 388static __inline __m512i __DEFAULT_FN_ATTRS 389_mm512_castpd_si512 (__m512d __A) 390{ 391 return (__m512i) (__A); 392} 393 394static __inline__ __m512d __DEFAULT_FN_ATTRS 395_mm512_castpd128_pd512 (__m128d __A) 396{ 397 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 398} 399 400static __inline __m512d __DEFAULT_FN_ATTRS 401_mm512_castps_pd (__m512 __A) 402{ 403 return (__m512d) (__A); 404} 405 406static __inline __m512i __DEFAULT_FN_ATTRS 407_mm512_castps_si512 (__m512 __A) 408{ 409 return (__m512i) (__A); 410} 411 412static __inline__ __m512 __DEFAULT_FN_ATTRS 413_mm512_castps128_ps512 (__m128 __A) 414{ 415 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); 416} 417 418static __inline__ __m512i __DEFAULT_FN_ATTRS 419_mm512_castsi128_si512 (__m128i __A) 420{ 421 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); 422} 423 424static __inline__ __m512i __DEFAULT_FN_ATTRS 425_mm512_castsi256_si512 (__m256i __A) 426{ 427 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); 428} 429 430static __inline __m512 __DEFAULT_FN_ATTRS 431_mm512_castsi512_ps (__m512i __A) 432{ 433 return (__m512) (__A); 434} 435 436static __inline __m512d __DEFAULT_FN_ATTRS 437_mm512_castsi512_pd (__m512i __A) 438{ 439 return (__m512d) (__A); 440} 441 442static __inline __m128i __DEFAULT_FN_ATTRS 443_mm512_castsi512_si128 (__m512i __A) 444{ 445 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); 446} 447 448static __inline __m256i __DEFAULT_FN_ATTRS 449_mm512_castsi512_si256 (__m512i __A) 450{ 451 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); 452} 453 454/* Bitwise operators */ 455static __inline__ __m512i __DEFAULT_FN_ATTRS 456_mm512_and_epi32(__m512i __a, __m512i __b) 457{ 458 return (__m512i)((__v16si)__a & (__v16si)__b); 459} 460 461static __inline__ __m512i __DEFAULT_FN_ATTRS 462_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 463{ 464 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, 465 (__v16si) __b, 466 (__v16si) __src, 467 (__mmask16) __k); 468} 469static __inline__ __m512i __DEFAULT_FN_ATTRS 470_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 471{ 472 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, 473 (__v16si) __b, 474 (__v16si) 475 _mm512_setzero_si512 (), 476 (__mmask16) __k); 477} 478 479static __inline__ __m512i __DEFAULT_FN_ATTRS 480_mm512_and_epi64(__m512i __a, __m512i __b) 481{ 482 return (__m512i)((__v8di)__a & (__v8di)__b); 483} 484 485static __inline__ __m512i __DEFAULT_FN_ATTRS 486_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 487{ 488 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, 489 (__v8di) __b, 490 (__v8di) __src, 491 (__mmask8) __k); 492} 493static __inline__ __m512i __DEFAULT_FN_ATTRS 494_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 495{ 496 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, 497 (__v8di) __b, 498 (__v8di) 499 _mm512_setzero_si512 (), 500 (__mmask8) __k); 501} 502 503static __inline__ __m512i __DEFAULT_FN_ATTRS 504_mm512_andnot_si512 (__m512i __A, __m512i __B) 505{ 506 return (__m512i)(~(__A) & __B); 507} 508 509static __inline__ __m512i __DEFAULT_FN_ATTRS 510_mm512_andnot_epi32 (__m512i __A, __m512i __B) 511{ 512 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 513 (__v16si) __B, 514 (__v16si) 515 _mm512_setzero_si512 (), 516 (__mmask16) -1); 517} 518 519static __inline__ __m512i __DEFAULT_FN_ATTRS 520_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 521{ 522 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 523 (__v16si) __B, 524 (__v16si) __W, 525 (__mmask16) __U); 526} 527 528static __inline__ __m512i __DEFAULT_FN_ATTRS 529_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 530{ 531 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 532 (__v16si) __B, 533 (__v16si) 534 _mm512_setzero_si512 (), 535 (__mmask16) __U); 536} 537 538static __inline__ __m512i __DEFAULT_FN_ATTRS 539_mm512_andnot_epi64 (__m512i __A, __m512i __B) 540{ 541 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 542 (__v8di) __B, 543 (__v8di) 544 _mm512_setzero_si512 (), 545 (__mmask8) -1); 546} 547 548static __inline__ __m512i __DEFAULT_FN_ATTRS 549_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 550{ 551 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 552 (__v8di) __B, 553 (__v8di) __W, __U); 554} 555 556static __inline__ __m512i __DEFAULT_FN_ATTRS 557_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 558{ 559 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 560 (__v8di) __B, 561 (__v8di) 562 _mm512_setzero_pd (), 563 __U); 564} 565static __inline__ __m512i __DEFAULT_FN_ATTRS 566_mm512_or_epi32(__m512i __a, __m512i __b) 567{ 568 return (__m512i)((__v16si)__a | (__v16si)__b); 569} 570 571static __inline__ __m512i __DEFAULT_FN_ATTRS 572_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 573{ 574 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, 575 (__v16si) __b, 576 (__v16si) __src, 577 (__mmask16) __k); 578} 579static __inline__ __m512i __DEFAULT_FN_ATTRS 580_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 581{ 582 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, 583 (__v16si) __b, 584 (__v16si) 585 _mm512_setzero_si512 (), 586 (__mmask16) __k); 587} 588 589static __inline__ __m512i __DEFAULT_FN_ATTRS 590_mm512_or_epi64(__m512i __a, __m512i __b) 591{ 592 return (__m512i)((__v8di)__a | (__v8di)__b); 593} 594 595static __inline__ __m512i __DEFAULT_FN_ATTRS 596_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 597{ 598 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, 599 (__v8di) __b, 600 (__v8di) __src, 601 (__mmask8) __k); 602} 603static __inline__ __m512i __DEFAULT_FN_ATTRS 604_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 605{ 606 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, 607 (__v8di) __b, 608 (__v8di) 609 _mm512_setzero_si512 (), 610 (__mmask8) __k); 611} 612 613static __inline__ __m512i __DEFAULT_FN_ATTRS 614_mm512_xor_epi32(__m512i __a, __m512i __b) 615{ 616 return (__m512i)((__v16si)__a ^ (__v16si)__b); 617} 618 619static __inline__ __m512i __DEFAULT_FN_ATTRS 620_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 621{ 622 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, 623 (__v16si) __b, 624 (__v16si) __src, 625 (__mmask16) __k); 626} 627static __inline__ __m512i __DEFAULT_FN_ATTRS 628_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 629{ 630 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, 631 (__v16si) __b, 632 (__v16si) 633 _mm512_setzero_si512 (), 634 (__mmask16) __k); 635} 636 637static __inline__ __m512i __DEFAULT_FN_ATTRS 638_mm512_xor_epi64(__m512i __a, __m512i __b) 639{ 640 return (__m512i)((__v8di)__a ^ (__v8di)__b); 641} 642 643static __inline__ __m512i __DEFAULT_FN_ATTRS 644_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 645{ 646 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, 647 (__v8di) __b, 648 (__v8di) __src, 649 (__mmask8) __k); 650} 651static __inline__ __m512i __DEFAULT_FN_ATTRS 652_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 653{ 654 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, 655 (__v8di) __b, 656 (__v8di) 657 _mm512_setzero_si512 (), 658 (__mmask8) __k); 659} 660 661static __inline__ __m512i __DEFAULT_FN_ATTRS 662_mm512_and_si512(__m512i __a, __m512i __b) 663{ 664 return (__m512i)((__v8di)__a & (__v8di)__b); 665} 666 667static __inline__ __m512i __DEFAULT_FN_ATTRS 668_mm512_or_si512(__m512i __a, __m512i __b) 669{ 670 return (__m512i)((__v8di)__a | (__v8di)__b); 671} 672 673static __inline__ __m512i __DEFAULT_FN_ATTRS 674_mm512_xor_si512(__m512i __a, __m512i __b) 675{ 676 return (__m512i)((__v8di)__a ^ (__v8di)__b); 677} 678/* Arithmetic */ 679 680static __inline __m512d __DEFAULT_FN_ATTRS 681_mm512_add_pd(__m512d __a, __m512d __b) 682{ 683 return (__m512d)((__v8df)__a + (__v8df)__b); 684} 685 686static __inline __m512 __DEFAULT_FN_ATTRS 687_mm512_add_ps(__m512 __a, __m512 __b) 688{ 689 return (__m512)((__v16sf)__a + (__v16sf)__b); 690} 691 692static __inline __m512d __DEFAULT_FN_ATTRS 693_mm512_mul_pd(__m512d __a, __m512d __b) 694{ 695 return (__m512d)((__v8df)__a * (__v8df)__b); 696} 697 698static __inline __m512 __DEFAULT_FN_ATTRS 699_mm512_mul_ps(__m512 __a, __m512 __b) 700{ 701 return (__m512)((__v16sf)__a * (__v16sf)__b); 702} 703 704static __inline __m512d __DEFAULT_FN_ATTRS 705_mm512_sub_pd(__m512d __a, __m512d __b) 706{ 707 return (__m512d)((__v8df)__a - (__v8df)__b); 708} 709 710static __inline __m512 __DEFAULT_FN_ATTRS 711_mm512_sub_ps(__m512 __a, __m512 __b) 712{ 713 return (__m512)((__v16sf)__a - (__v16sf)__b); 714} 715 716static __inline__ __m512i __DEFAULT_FN_ATTRS 717_mm512_add_epi64 (__m512i __A, __m512i __B) 718{ 719 return (__m512i) ((__v8di) __A + (__v8di) __B); 720} 721 722static __inline__ __m512i __DEFAULT_FN_ATTRS 723_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 724{ 725 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 726 (__v8di) __B, 727 (__v8di) __W, 728 (__mmask8) __U); 729} 730 731static __inline__ __m512i __DEFAULT_FN_ATTRS 732_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 733{ 734 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 735 (__v8di) __B, 736 (__v8di) 737 _mm512_setzero_si512 (), 738 (__mmask8) __U); 739} 740 741static __inline__ __m512i __DEFAULT_FN_ATTRS 742_mm512_sub_epi64 (__m512i __A, __m512i __B) 743{ 744 return (__m512i) ((__v8di) __A - (__v8di) __B); 745} 746 747static __inline__ __m512i __DEFAULT_FN_ATTRS 748_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 749{ 750 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 751 (__v8di) __B, 752 (__v8di) __W, 753 (__mmask8) __U); 754} 755 756static __inline__ __m512i __DEFAULT_FN_ATTRS 757_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 758{ 759 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 760 (__v8di) __B, 761 (__v8di) 762 _mm512_setzero_si512 (), 763 (__mmask8) __U); 764} 765 766static __inline__ __m512i __DEFAULT_FN_ATTRS 767_mm512_add_epi32 (__m512i __A, __m512i __B) 768{ 769 return (__m512i) ((__v16si) __A + (__v16si) __B); 770} 771 772static __inline__ __m512i __DEFAULT_FN_ATTRS 773_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 774{ 775 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 776 (__v16si) __B, 777 (__v16si) __W, 778 (__mmask16) __U); 779} 780 781static __inline__ __m512i __DEFAULT_FN_ATTRS 782_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 783{ 784 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 785 (__v16si) __B, 786 (__v16si) 787 _mm512_setzero_si512 (), 788 (__mmask16) __U); 789} 790 791static __inline__ __m512i __DEFAULT_FN_ATTRS 792_mm512_sub_epi32 (__m512i __A, __m512i __B) 793{ 794 return (__m512i) ((__v16si) __A - (__v16si) __B); 795} 796 797static __inline__ __m512i __DEFAULT_FN_ATTRS 798_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 799{ 800 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 801 (__v16si) __B, 802 (__v16si) __W, 803 (__mmask16) __U); 804} 805 806static __inline__ __m512i __DEFAULT_FN_ATTRS 807_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 808{ 809 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 810 (__v16si) __B, 811 (__v16si) 812 _mm512_setzero_si512 (), 813 (__mmask16) __U); 814} 815 816#define _mm512_mask_max_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \ 817__builtin_ia32_maxpd512_mask ((__v8df)( __A),\ 818 (__v8df)( __B),\ 819 (__v8df)( __W),\ 820 (__mmask8)( __U),( __R));\ 821}) 822 823#define _mm512_maskz_max_round_pd( __U, __A, __B, __R) __extension__ ({ \ 824__builtin_ia32_maxpd512_mask ((__v8df)( __A),\ 825 (__v8df)( __B),\ 826 (__v8df)\ 827 _mm512_setzero_pd (),\ 828 (__mmask8)( __U),( __R));\ 829}) 830 831#define _mm512_max_round_pd( __A, __B, __R) __extension__ ({ \ 832__builtin_ia32_maxpd512_mask ((__v8df)( __A),\ 833 (__v8df)( __B),\ 834 (__v8df)\ 835 _mm512_undefined_pd (),\ 836 (__mmask8) -1,( __R));\ 837}) 838 839static __inline__ __m512d __DEFAULT_FN_ATTRS 840_mm512_max_pd(__m512d __A, __m512d __B) 841{ 842 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 843 (__v8df) __B, 844 (__v8df) 845 _mm512_setzero_pd (), 846 (__mmask8) -1, 847 _MM_FROUND_CUR_DIRECTION); 848} 849 850static __inline__ __m512d __DEFAULT_FN_ATTRS 851_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 852{ 853 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 854 (__v8df) __B, 855 (__v8df) __W, 856 (__mmask8) __U, 857 _MM_FROUND_CUR_DIRECTION); 858} 859 860static __inline__ __m512d __DEFAULT_FN_ATTRS 861_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 862{ 863 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 864 (__v8df) __B, 865 (__v8df) 866 _mm512_setzero_pd (), 867 (__mmask8) __U, 868 _MM_FROUND_CUR_DIRECTION); 869} 870 871#define _mm512_mask_max_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \ 872__builtin_ia32_maxps512_mask ((__v16sf)( __A),\ 873 (__v16sf)( __B),\ 874 (__v16sf)( __W),\ 875 (__mmask16)( __U),( __R));\ 876}) 877 878#define _mm512_maskz_max_round_ps( __U, __A, __B, __R) __extension__ ({ \ 879__builtin_ia32_maxps512_mask ((__v16sf)( __A),\ 880 (__v16sf)( __B),\ 881 (__v16sf)\ 882 _mm512_setzero_ps (),\ 883 (__mmask16)( __U),( __R));\ 884}) 885 886#define _mm512_max_round_ps( __A, __B, __R) __extension__ ({ \ 887__builtin_ia32_maxps512_mask ((__v16sf)( __A),\ 888 (__v16sf)( __B),\ 889 (__v16sf)\ 890 _mm512_undefined_ps (),\ 891 (__mmask16) -1,( __R));\ 892}) 893 894static __inline__ __m512 __DEFAULT_FN_ATTRS 895_mm512_max_ps(__m512 __A, __m512 __B) 896{ 897 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 898 (__v16sf) __B, 899 (__v16sf) 900 _mm512_setzero_ps (), 901 (__mmask16) -1, 902 _MM_FROUND_CUR_DIRECTION); 903} 904 905static __inline__ __m512 __DEFAULT_FN_ATTRS 906_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 907{ 908 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 909 (__v16sf) __B, 910 (__v16sf) __W, 911 (__mmask16) __U, 912 _MM_FROUND_CUR_DIRECTION); 913} 914 915static __inline__ __m512 __DEFAULT_FN_ATTRS 916_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 917{ 918 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 919 (__v16sf) __B, 920 (__v16sf) 921 _mm512_setzero_ps (), 922 (__mmask16) __U, 923 _MM_FROUND_CUR_DIRECTION); 924} 925 926static __inline__ __m128 __DEFAULT_FN_ATTRS 927_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 928 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 929 (__v4sf) __B, 930 (__v4sf) __W, 931 (__mmask8) __U, 932 _MM_FROUND_CUR_DIRECTION); 933} 934 935static __inline__ __m128 __DEFAULT_FN_ATTRS 936_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 937 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, 938 (__v4sf) __B, 939 (__v4sf) _mm_setzero_ps (), 940 (__mmask8) __U, 941 _MM_FROUND_CUR_DIRECTION); 942} 943 944#define _mm_max_round_ss(A, B, R) __extension__ ({ \ 945 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 946 (__v4sf)(__m128)(B), \ 947 (__v4sf)_mm_setzero_ps(), \ 948 (__mmask8)-1, (int)(R)); }) 949 950#define _mm_mask_max_round_ss(W, U, A, B, R) __extension__ ({ \ 951 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 952 (__v4sf)(__m128)(B), \ 953 (__v4sf)(__m128)(W), (__mmask8)(U), \ 954 (int)(R)); }) 955 956#define _mm_maskz_max_round_ss(U, A, B, R) __extension__ ({ \ 957 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 958 (__v4sf)(__m128)(B), \ 959 (__v4sf)_mm_setzero_ps(), \ 960 (__mmask8)(U), (int)(R)); }) 961 962static __inline__ __m128d __DEFAULT_FN_ATTRS 963_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 964 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 965 (__v2df) __B, 966 (__v2df) __W, 967 (__mmask8) __U, 968 _MM_FROUND_CUR_DIRECTION); 969} 970 971static __inline__ __m128d __DEFAULT_FN_ATTRS 972_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 973 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, 974 (__v2df) __B, 975 (__v2df) _mm_setzero_pd (), 976 (__mmask8) __U, 977 _MM_FROUND_CUR_DIRECTION); 978} 979 980#define _mm_max_round_sd(A, B, R) __extension__ ({ \ 981 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 982 (__v2df)(__m128d)(B), \ 983 (__v2df)_mm_setzero_pd(), \ 984 (__mmask8)-1, (int)(R)); }) 985 986#define _mm_mask_max_round_sd(W, U, A, B, R) __extension__ ({ \ 987 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 988 (__v2df)(__m128d)(B), \ 989 (__v2df)(__m128d)(W), \ 990 (__mmask8)(U), (int)(R)); }) 991 992#define _mm_maskz_max_round_sd(U, A, B, R) __extension__ ({ \ 993 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 994 (__v2df)(__m128d)(B), \ 995 (__v2df)_mm_setzero_pd(), \ 996 (__mmask8)(U), (int)(R)); }) 997 998static __inline __m512i 999__DEFAULT_FN_ATTRS 1000_mm512_max_epi32(__m512i __A, __m512i __B) 1001{ 1002 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1003 (__v16si) __B, 1004 (__v16si) 1005 _mm512_setzero_si512 (), 1006 (__mmask16) -1); 1007} 1008 1009static __inline__ __m512i __DEFAULT_FN_ATTRS 1010_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1011{ 1012 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1013 (__v16si) __B, 1014 (__v16si) __W, __M); 1015} 1016 1017static __inline__ __m512i __DEFAULT_FN_ATTRS 1018_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1019{ 1020 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 1021 (__v16si) __B, 1022 (__v16si) 1023 _mm512_setzero_si512 (), 1024 __M); 1025} 1026 1027static __inline __m512i __DEFAULT_FN_ATTRS 1028_mm512_max_epu32(__m512i __A, __m512i __B) 1029{ 1030 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1031 (__v16si) __B, 1032 (__v16si) 1033 _mm512_setzero_si512 (), 1034 (__mmask16) -1); 1035} 1036 1037static __inline__ __m512i __DEFAULT_FN_ATTRS 1038_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1039{ 1040 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1041 (__v16si) __B, 1042 (__v16si) __W, __M); 1043} 1044 1045static __inline__ __m512i __DEFAULT_FN_ATTRS 1046_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1047{ 1048 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 1049 (__v16si) __B, 1050 (__v16si) 1051 _mm512_setzero_si512 (), 1052 __M); 1053} 1054 1055static __inline __m512i __DEFAULT_FN_ATTRS 1056_mm512_max_epi64(__m512i __A, __m512i __B) 1057{ 1058 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1059 (__v8di) __B, 1060 (__v8di) 1061 _mm512_setzero_si512 (), 1062 (__mmask8) -1); 1063} 1064 1065static __inline__ __m512i __DEFAULT_FN_ATTRS 1066_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1067{ 1068 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1069 (__v8di) __B, 1070 (__v8di) __W, __M); 1071} 1072 1073static __inline__ __m512i __DEFAULT_FN_ATTRS 1074_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1075{ 1076 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 1077 (__v8di) __B, 1078 (__v8di) 1079 _mm512_setzero_si512 (), 1080 __M); 1081} 1082 1083static __inline __m512i __DEFAULT_FN_ATTRS 1084_mm512_max_epu64(__m512i __A, __m512i __B) 1085{ 1086 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1087 (__v8di) __B, 1088 (__v8di) 1089 _mm512_setzero_si512 (), 1090 (__mmask8) -1); 1091} 1092 1093static __inline__ __m512i __DEFAULT_FN_ATTRS 1094_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1095{ 1096 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1097 (__v8di) __B, 1098 (__v8di) __W, __M); 1099} 1100 1101static __inline__ __m512i __DEFAULT_FN_ATTRS 1102_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1103{ 1104 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 1105 (__v8di) __B, 1106 (__v8di) 1107 _mm512_setzero_si512 (), 1108 __M); 1109} 1110 1111#define _mm512_mask_min_round_pd( __W, __U, __A, __B, __R) __extension__ ({ \ 1112__builtin_ia32_minpd512_mask ((__v8df)( __A),\ 1113 (__v8df)( __B),\ 1114 (__v8df)( __W),\ 1115 (__mmask8)( __U),( __R));\ 1116}) 1117 1118#define _mm512_maskz_min_round_pd( __U, __A, __B, __R) __extension__ ({ \ 1119__builtin_ia32_minpd512_mask ((__v8df)( __A),\ 1120 (__v8df)( __B),\ 1121 (__v8df)\ 1122 _mm512_setzero_pd (),\ 1123 (__mmask8)( __U),( __R));\ 1124}) 1125 1126#define _mm512_min_round_pd( __A, __B, __R) __extension__ ({ \ 1127__builtin_ia32_minpd512_mask ((__v8df)( __A),\ 1128 (__v8df)( __B),\ 1129 (__v8df)\ 1130 _mm512_undefined_pd (),\ 1131 (__mmask8) -1,( __R));\ 1132}) 1133 1134static __inline__ __m512d __DEFAULT_FN_ATTRS 1135_mm512_min_pd(__m512d __A, __m512d __B) 1136{ 1137 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1138 (__v8df) __B, 1139 (__v8df) 1140 _mm512_setzero_pd (), 1141 (__mmask8) -1, 1142 _MM_FROUND_CUR_DIRECTION); 1143} 1144 1145static __inline__ __m512d __DEFAULT_FN_ATTRS 1146_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 1147{ 1148 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1149 (__v8df) __B, 1150 (__v8df) __W, 1151 (__mmask8) __U, 1152 _MM_FROUND_CUR_DIRECTION); 1153} 1154 1155#define _mm512_mask_min_round_ps( __W, __U, __A, __B, __R) __extension__ ({ \ 1156__builtin_ia32_minps512_mask ((__v16sf)( __A),\ 1157 (__v16sf)( __B),\ 1158 (__v16sf)( __W),\ 1159 (__mmask16)( __U),( __R));\ 1160}) 1161 1162#define _mm512_maskz_min_round_ps( __U, __A, __B, __R) __extension__ ({ \ 1163__builtin_ia32_minps512_mask ((__v16sf)( __A),\ 1164 (__v16sf)( __B),\ 1165 (__v16sf)\ 1166 _mm512_setzero_ps (),\ 1167 (__mmask16)( __U),( __R));\ 1168}) 1169 1170#define _mm512_min_round_ps( __A, __B, __R) __extension__ ({ \ 1171__builtin_ia32_minps512_mask ((__v16sf)( __A),\ 1172 (__v16sf)( __B),\ 1173 (__v16sf)\ 1174 _mm512_undefined_ps (),\ 1175 (__mmask16) -1,( __R));\ 1176}) 1177 1178static __inline__ __m512d __DEFAULT_FN_ATTRS 1179_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 1180{ 1181 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 1182 (__v8df) __B, 1183 (__v8df) 1184 _mm512_setzero_pd (), 1185 (__mmask8) __U, 1186 _MM_FROUND_CUR_DIRECTION); 1187} 1188 1189static __inline__ __m512 __DEFAULT_FN_ATTRS 1190_mm512_min_ps(__m512 __A, __m512 __B) 1191{ 1192 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1193 (__v16sf) __B, 1194 (__v16sf) 1195 _mm512_setzero_ps (), 1196 (__mmask16) -1, 1197 _MM_FROUND_CUR_DIRECTION); 1198} 1199 1200static __inline__ __m512 __DEFAULT_FN_ATTRS 1201_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 1202{ 1203 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1204 (__v16sf) __B, 1205 (__v16sf) __W, 1206 (__mmask16) __U, 1207 _MM_FROUND_CUR_DIRECTION); 1208} 1209 1210static __inline__ __m512 __DEFAULT_FN_ATTRS 1211_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 1212{ 1213 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 1214 (__v16sf) __B, 1215 (__v16sf) 1216 _mm512_setzero_ps (), 1217 (__mmask16) __U, 1218 _MM_FROUND_CUR_DIRECTION); 1219} 1220 1221static __inline__ __m128 __DEFAULT_FN_ATTRS 1222_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1223 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1224 (__v4sf) __B, 1225 (__v4sf) __W, 1226 (__mmask8) __U, 1227 _MM_FROUND_CUR_DIRECTION); 1228} 1229 1230static __inline__ __m128 __DEFAULT_FN_ATTRS 1231_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1232 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, 1233 (__v4sf) __B, 1234 (__v4sf) _mm_setzero_ps (), 1235 (__mmask8) __U, 1236 _MM_FROUND_CUR_DIRECTION); 1237} 1238 1239#define _mm_min_round_ss(A, B, R) __extension__ ({ \ 1240 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1241 (__v4sf)(__m128)(B), \ 1242 (__v4sf)_mm_setzero_ps(), \ 1243 (__mmask8)-1, (int)(R)); }) 1244 1245#define _mm_mask_min_round_ss(W, U, A, B, R) __extension__ ({ \ 1246 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1247 (__v4sf)(__m128)(B), \ 1248 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1249 (int)(R)); }) 1250 1251#define _mm_maskz_min_round_ss(U, A, B, R) __extension__ ({ \ 1252 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1253 (__v4sf)(__m128)(B), \ 1254 (__v4sf)_mm_setzero_ps(), \ 1255 (__mmask8)(U), (int)(R)); }) 1256 1257static __inline__ __m128d __DEFAULT_FN_ATTRS 1258_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1259 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1260 (__v2df) __B, 1261 (__v2df) __W, 1262 (__mmask8) __U, 1263 _MM_FROUND_CUR_DIRECTION); 1264} 1265 1266static __inline__ __m128d __DEFAULT_FN_ATTRS 1267_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1268 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, 1269 (__v2df) __B, 1270 (__v2df) _mm_setzero_pd (), 1271 (__mmask8) __U, 1272 _MM_FROUND_CUR_DIRECTION); 1273} 1274 1275#define _mm_min_round_sd(A, B, R) __extension__ ({ \ 1276 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1277 (__v2df)(__m128d)(B), \ 1278 (__v2df)_mm_setzero_pd(), \ 1279 (__mmask8)-1, (int)(R)); }) 1280 1281#define _mm_mask_min_round_sd(W, U, A, B, R) __extension__ ({ \ 1282 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1283 (__v2df)(__m128d)(B), \ 1284 (__v2df)(__m128d)(W), \ 1285 (__mmask8)(U), (int)(R)); }) 1286 1287#define _mm_maskz_min_round_sd(U, A, B, R) __extension__ ({ \ 1288 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1289 (__v2df)(__m128d)(B), \ 1290 (__v2df)_mm_setzero_pd(), \ 1291 (__mmask8)(U), (int)(R)); }) 1292 1293static __inline __m512i 1294__DEFAULT_FN_ATTRS 1295_mm512_min_epi32(__m512i __A, __m512i __B) 1296{ 1297 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1298 (__v16si) __B, 1299 (__v16si) 1300 _mm512_setzero_si512 (), 1301 (__mmask16) -1); 1302} 1303 1304static __inline__ __m512i __DEFAULT_FN_ATTRS 1305_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1306{ 1307 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1308 (__v16si) __B, 1309 (__v16si) __W, __M); 1310} 1311 1312static __inline__ __m512i __DEFAULT_FN_ATTRS 1313_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1314{ 1315 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 1316 (__v16si) __B, 1317 (__v16si) 1318 _mm512_setzero_si512 (), 1319 __M); 1320} 1321 1322static __inline __m512i __DEFAULT_FN_ATTRS 1323_mm512_min_epu32(__m512i __A, __m512i __B) 1324{ 1325 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1326 (__v16si) __B, 1327 (__v16si) 1328 _mm512_setzero_si512 (), 1329 (__mmask16) -1); 1330} 1331 1332static __inline__ __m512i __DEFAULT_FN_ATTRS 1333_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1334{ 1335 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1336 (__v16si) __B, 1337 (__v16si) __W, __M); 1338} 1339 1340static __inline__ __m512i __DEFAULT_FN_ATTRS 1341_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 1342{ 1343 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 1344 (__v16si) __B, 1345 (__v16si) 1346 _mm512_setzero_si512 (), 1347 __M); 1348} 1349 1350static __inline __m512i __DEFAULT_FN_ATTRS 1351_mm512_min_epi64(__m512i __A, __m512i __B) 1352{ 1353 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1354 (__v8di) __B, 1355 (__v8di) 1356 _mm512_setzero_si512 (), 1357 (__mmask8) -1); 1358} 1359 1360static __inline__ __m512i __DEFAULT_FN_ATTRS 1361_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1362{ 1363 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1364 (__v8di) __B, 1365 (__v8di) __W, __M); 1366} 1367 1368static __inline__ __m512i __DEFAULT_FN_ATTRS 1369_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 1370{ 1371 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 1372 (__v8di) __B, 1373 (__v8di) 1374 _mm512_setzero_si512 (), 1375 __M); 1376} 1377 1378static __inline __m512i __DEFAULT_FN_ATTRS 1379_mm512_min_epu64(__m512i __A, __m512i __B) 1380{ 1381 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1382 (__v8di) __B, 1383 (__v8di) 1384 _mm512_setzero_si512 (), 1385 (__mmask8) -1); 1386} 1387 1388static __inline__ __m512i __DEFAULT_FN_ATTRS 1389_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 1390{ 1391 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1392 (__v8di) __B, 1393 (__v8di) __W, __M); 1394} 1395 1396static __inline__ __m512i __DEFAULT_FN_ATTRS 1397_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 1398{ 1399 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 1400 (__v8di) __B, 1401 (__v8di) 1402 _mm512_setzero_si512 (), 1403 __M); 1404} 1405 1406static __inline __m512i __DEFAULT_FN_ATTRS 1407_mm512_mul_epi32(__m512i __X, __m512i __Y) 1408{ 1409 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 1410 (__v16si) __Y, 1411 (__v8di) 1412 _mm512_setzero_si512 (), 1413 (__mmask8) -1); 1414} 1415 1416static __inline __m512i __DEFAULT_FN_ATTRS 1417_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1418{ 1419 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 1420 (__v16si) __Y, 1421 (__v8di) __W, __M); 1422} 1423 1424static __inline __m512i __DEFAULT_FN_ATTRS 1425_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 1426{ 1427 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 1428 (__v16si) __Y, 1429 (__v8di) 1430 _mm512_setzero_si512 (), 1431 __M); 1432} 1433 1434static __inline __m512i __DEFAULT_FN_ATTRS 1435_mm512_mul_epu32(__m512i __X, __m512i __Y) 1436{ 1437 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 1438 (__v16si) __Y, 1439 (__v8di) 1440 _mm512_setzero_si512 (), 1441 (__mmask8) -1); 1442} 1443 1444static __inline __m512i __DEFAULT_FN_ATTRS 1445_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 1446{ 1447 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 1448 (__v16si) __Y, 1449 (__v8di) __W, __M); 1450} 1451 1452static __inline __m512i __DEFAULT_FN_ATTRS 1453_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 1454{ 1455 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 1456 (__v16si) __Y, 1457 (__v8di) 1458 _mm512_setzero_si512 (), 1459 __M); 1460} 1461 1462static __inline __m512i __DEFAULT_FN_ATTRS 1463_mm512_mullo_epi32 (__m512i __A, __m512i __B) 1464{ 1465 return (__m512i) ((__v16si) __A * (__v16si) __B); 1466} 1467 1468static __inline __m512i __DEFAULT_FN_ATTRS 1469_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 1470{ 1471 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 1472 (__v16si) __B, 1473 (__v16si) 1474 _mm512_setzero_si512 (), 1475 __M); 1476} 1477 1478static __inline __m512i __DEFAULT_FN_ATTRS 1479_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 1480{ 1481 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 1482 (__v16si) __B, 1483 (__v16si) __W, __M); 1484} 1485 1486#define _mm512_mask_sqrt_round_pd( __W, __U, __A, __R) __extension__ ({ \ 1487__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\ 1488 (__v8df)( __W),\ 1489 (__mmask8)( __U),( __R));\ 1490}) 1491 1492#define _mm512_maskz_sqrt_round_pd( __U, __A, __R) __extension__ ({ \ 1493__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\ 1494 (__v8df)\ 1495 _mm512_setzero_pd (),\ 1496 (__mmask8)( __U),( __R));\ 1497}) 1498 1499#define _mm512_sqrt_round_pd( __A, __R) __extension__ ({ \ 1500__builtin_ia32_sqrtpd512_mask ((__v8df)( __A),\ 1501 (__v8df)\ 1502 _mm512_undefined_pd (),\ 1503 (__mmask8) -1,( __R));\ 1504}) 1505 1506static __inline__ __m512d __DEFAULT_FN_ATTRS 1507_mm512_sqrt_pd(__m512d __a) 1508{ 1509 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, 1510 (__v8df) _mm512_setzero_pd (), 1511 (__mmask8) -1, 1512 _MM_FROUND_CUR_DIRECTION); 1513} 1514 1515static __inline__ __m512d __DEFAULT_FN_ATTRS 1516_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 1517{ 1518 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1519 (__v8df) __W, 1520 (__mmask8) __U, 1521 _MM_FROUND_CUR_DIRECTION); 1522} 1523 1524static __inline__ __m512d __DEFAULT_FN_ATTRS 1525_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 1526{ 1527 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1528 (__v8df) 1529 _mm512_setzero_pd (), 1530 (__mmask8) __U, 1531 _MM_FROUND_CUR_DIRECTION); 1532} 1533 1534#define _mm512_mask_sqrt_round_ps( __W, __U, __A, __R) __extension__ ({ \ 1535__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\ 1536 (__v16sf)( __W),\ 1537 (__mmask16)( __U),( __R));\ 1538}) 1539 1540#define _mm512_maskz_sqrt_round_ps( __U, __A, __R) __extension__ ({ \ 1541__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\ 1542 (__v16sf)\ 1543 _mm512_setzero_ps (),\ 1544 (__mmask16)( __U),( __R));\ 1545}) 1546 1547#define _mm512_sqrt_round_ps( __A, __R) __extension__ ({ \ 1548__builtin_ia32_sqrtps512_mask ((__v16sf)( __A),\ 1549 (__v16sf)\ 1550 _mm512_undefined_ps (),\ 1551 (__mmask16) -1,( __R));\ 1552}) 1553 1554static __inline__ __m512 __DEFAULT_FN_ATTRS 1555_mm512_sqrt_ps(__m512 __a) 1556{ 1557 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, 1558 (__v16sf) _mm512_setzero_ps (), 1559 (__mmask16) -1, 1560 _MM_FROUND_CUR_DIRECTION); 1561} 1562 1563static __inline__ __m512 __DEFAULT_FN_ATTRS 1564_mm512_mask_sqrt_ps(__m512 __W, __mmask8 __U, __m512 __A) 1565{ 1566 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1567 (__v16sf) __W, 1568 (__mmask16) __U, 1569 _MM_FROUND_CUR_DIRECTION); 1570} 1571 1572static __inline__ __m512 __DEFAULT_FN_ATTRS 1573_mm512_maskz_sqrt_ps( __mmask8 __U, __m512 __A) 1574{ 1575 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, 1576 (__v16sf) _mm512_setzero_ps (), 1577 (__mmask16) __U, 1578 _MM_FROUND_CUR_DIRECTION); 1579} 1580 1581static __inline__ __m512d __DEFAULT_FN_ATTRS 1582_mm512_rsqrt14_pd(__m512d __A) 1583{ 1584 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1585 (__v8df) 1586 _mm512_setzero_pd (), 1587 (__mmask8) -1);} 1588 1589static __inline__ __m512d __DEFAULT_FN_ATTRS 1590_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1591{ 1592 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1593 (__v8df) __W, 1594 (__mmask8) __U); 1595} 1596 1597static __inline__ __m512d __DEFAULT_FN_ATTRS 1598_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1599{ 1600 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1601 (__v8df) 1602 _mm512_setzero_pd (), 1603 (__mmask8) __U); 1604} 1605 1606static __inline__ __m512 __DEFAULT_FN_ATTRS 1607_mm512_rsqrt14_ps(__m512 __A) 1608{ 1609 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1610 (__v16sf) 1611 _mm512_setzero_ps (), 1612 (__mmask16) -1); 1613} 1614 1615static __inline__ __m512 __DEFAULT_FN_ATTRS 1616_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1617{ 1618 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1619 (__v16sf) __W, 1620 (__mmask16) __U); 1621} 1622 1623static __inline__ __m512 __DEFAULT_FN_ATTRS 1624_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1625{ 1626 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1627 (__v16sf) 1628 _mm512_setzero_ps (), 1629 (__mmask16) __U); 1630} 1631 1632static __inline__ __m128 __DEFAULT_FN_ATTRS 1633_mm_rsqrt14_ss(__m128 __A, __m128 __B) 1634{ 1635 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1636 (__v4sf) __B, 1637 (__v4sf) 1638 _mm_setzero_ps (), 1639 (__mmask8) -1); 1640} 1641 1642static __inline__ __m128 __DEFAULT_FN_ATTRS 1643_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1644{ 1645 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1646 (__v4sf) __B, 1647 (__v4sf) __W, 1648 (__mmask8) __U); 1649} 1650 1651static __inline__ __m128 __DEFAULT_FN_ATTRS 1652_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1653{ 1654 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, 1655 (__v4sf) __B, 1656 (__v4sf) _mm_setzero_ps (), 1657 (__mmask8) __U); 1658} 1659 1660static __inline__ __m128d __DEFAULT_FN_ATTRS 1661_mm_rsqrt14_sd(__m128d __A, __m128d __B) 1662{ 1663 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, 1664 (__v2df) __B, 1665 (__v2df) 1666 _mm_setzero_pd (), 1667 (__mmask8) -1); 1668} 1669 1670static __inline__ __m128d __DEFAULT_FN_ATTRS 1671_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1672{ 1673 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1674 (__v2df) __B, 1675 (__v2df) __W, 1676 (__mmask8) __U); 1677} 1678 1679static __inline__ __m128d __DEFAULT_FN_ATTRS 1680_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1681{ 1682 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, 1683 (__v2df) __B, 1684 (__v2df) _mm_setzero_pd (), 1685 (__mmask8) __U); 1686} 1687 1688static __inline__ __m512d __DEFAULT_FN_ATTRS 1689_mm512_rcp14_pd(__m512d __A) 1690{ 1691 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1692 (__v8df) 1693 _mm512_setzero_pd (), 1694 (__mmask8) -1); 1695} 1696 1697static __inline__ __m512d __DEFAULT_FN_ATTRS 1698_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1699{ 1700 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1701 (__v8df) __W, 1702 (__mmask8) __U); 1703} 1704 1705static __inline__ __m512d __DEFAULT_FN_ATTRS 1706_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1707{ 1708 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1709 (__v8df) 1710 _mm512_setzero_pd (), 1711 (__mmask8) __U); 1712} 1713 1714static __inline__ __m512 __DEFAULT_FN_ATTRS 1715_mm512_rcp14_ps(__m512 __A) 1716{ 1717 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1718 (__v16sf) 1719 _mm512_setzero_ps (), 1720 (__mmask16) -1); 1721} 1722 1723static __inline__ __m512 __DEFAULT_FN_ATTRS 1724_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1725{ 1726 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1727 (__v16sf) __W, 1728 (__mmask16) __U); 1729} 1730 1731static __inline__ __m512 __DEFAULT_FN_ATTRS 1732_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1733{ 1734 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1735 (__v16sf) 1736 _mm512_setzero_ps (), 1737 (__mmask16) __U); 1738} 1739 1740static __inline__ __m128 __DEFAULT_FN_ATTRS 1741_mm_rcp14_ss(__m128 __A, __m128 __B) 1742{ 1743 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1744 (__v4sf) __B, 1745 (__v4sf) 1746 _mm_setzero_ps (), 1747 (__mmask8) -1); 1748} 1749 1750static __inline__ __m128 __DEFAULT_FN_ATTRS 1751_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1752{ 1753 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1754 (__v4sf) __B, 1755 (__v4sf) __W, 1756 (__mmask8) __U); 1757} 1758 1759static __inline__ __m128 __DEFAULT_FN_ATTRS 1760_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) 1761{ 1762 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, 1763 (__v4sf) __B, 1764 (__v4sf) _mm_setzero_ps (), 1765 (__mmask8) __U); 1766} 1767 1768static __inline__ __m128d __DEFAULT_FN_ATTRS 1769_mm_rcp14_sd(__m128d __A, __m128d __B) 1770{ 1771 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, 1772 (__v2df) __B, 1773 (__v2df) 1774 _mm_setzero_pd (), 1775 (__mmask8) -1); 1776} 1777 1778static __inline__ __m128d __DEFAULT_FN_ATTRS 1779_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1780{ 1781 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1782 (__v2df) __B, 1783 (__v2df) __W, 1784 (__mmask8) __U); 1785} 1786 1787static __inline__ __m128d __DEFAULT_FN_ATTRS 1788_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) 1789{ 1790 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, 1791 (__v2df) __B, 1792 (__v2df) _mm_setzero_pd (), 1793 (__mmask8) __U); 1794} 1795 1796static __inline __m512 __DEFAULT_FN_ATTRS 1797_mm512_floor_ps(__m512 __A) 1798{ 1799 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1800 _MM_FROUND_FLOOR, 1801 (__v16sf) __A, -1, 1802 _MM_FROUND_CUR_DIRECTION); 1803} 1804 1805static __inline__ __m512 __DEFAULT_FN_ATTRS 1806_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 1807{ 1808 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1809 _MM_FROUND_FLOOR, 1810 (__v16sf) __W, __U, 1811 _MM_FROUND_CUR_DIRECTION); 1812} 1813 1814static __inline __m512d __DEFAULT_FN_ATTRS 1815_mm512_floor_pd(__m512d __A) 1816{ 1817 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1818 _MM_FROUND_FLOOR, 1819 (__v8df) __A, -1, 1820 _MM_FROUND_CUR_DIRECTION); 1821} 1822 1823static __inline__ __m512d __DEFAULT_FN_ATTRS 1824_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 1825{ 1826 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1827 _MM_FROUND_FLOOR, 1828 (__v8df) __W, __U, 1829 _MM_FROUND_CUR_DIRECTION); 1830} 1831 1832static __inline__ __m512 __DEFAULT_FN_ATTRS 1833_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 1834{ 1835 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1836 _MM_FROUND_CEIL, 1837 (__v16sf) __W, __U, 1838 _MM_FROUND_CUR_DIRECTION); 1839} 1840 1841static __inline __m512 __DEFAULT_FN_ATTRS 1842_mm512_ceil_ps(__m512 __A) 1843{ 1844 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 1845 _MM_FROUND_CEIL, 1846 (__v16sf) __A, -1, 1847 _MM_FROUND_CUR_DIRECTION); 1848} 1849 1850static __inline __m512d __DEFAULT_FN_ATTRS 1851_mm512_ceil_pd(__m512d __A) 1852{ 1853 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1854 _MM_FROUND_CEIL, 1855 (__v8df) __A, -1, 1856 _MM_FROUND_CUR_DIRECTION); 1857} 1858 1859static __inline__ __m512d __DEFAULT_FN_ATTRS 1860_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 1861{ 1862 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 1863 _MM_FROUND_CEIL, 1864 (__v8df) __W, __U, 1865 _MM_FROUND_CUR_DIRECTION); 1866} 1867 1868static __inline __m512i __DEFAULT_FN_ATTRS 1869_mm512_abs_epi64(__m512i __A) 1870{ 1871 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1872 (__v8di) 1873 _mm512_setzero_si512 (), 1874 (__mmask8) -1); 1875} 1876 1877static __inline__ __m512i __DEFAULT_FN_ATTRS 1878_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 1879{ 1880 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1881 (__v8di) __W, 1882 (__mmask8) __U); 1883} 1884 1885static __inline__ __m512i __DEFAULT_FN_ATTRS 1886_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 1887{ 1888 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1889 (__v8di) 1890 _mm512_setzero_si512 (), 1891 (__mmask8) __U); 1892} 1893 1894static __inline __m512i __DEFAULT_FN_ATTRS 1895_mm512_abs_epi32(__m512i __A) 1896{ 1897 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1898 (__v16si) 1899 _mm512_setzero_si512 (), 1900 (__mmask16) -1); 1901} 1902 1903static __inline__ __m512i __DEFAULT_FN_ATTRS 1904_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 1905{ 1906 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1907 (__v16si) __W, 1908 (__mmask16) __U); 1909} 1910 1911static __inline__ __m512i __DEFAULT_FN_ATTRS 1912_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 1913{ 1914 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1915 (__v16si) 1916 _mm512_setzero_si512 (), 1917 (__mmask16) __U); 1918} 1919 1920static __inline__ __m128 __DEFAULT_FN_ATTRS 1921_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1922 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 1923 (__v4sf) __B, 1924 (__v4sf) __W, 1925 (__mmask8) __U, 1926 _MM_FROUND_CUR_DIRECTION); 1927} 1928 1929static __inline__ __m128 __DEFAULT_FN_ATTRS 1930_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1931 return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, 1932 (__v4sf) __B, 1933 (__v4sf) _mm_setzero_ps (), 1934 (__mmask8) __U, 1935 _MM_FROUND_CUR_DIRECTION); 1936} 1937 1938#define _mm_add_round_ss(A, B, R) __extension__ ({ \ 1939 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1940 (__v4sf)(__m128)(B), \ 1941 (__v4sf)_mm_setzero_ps(), \ 1942 (__mmask8)-1, (int)(R)); }) 1943 1944#define _mm_mask_add_round_ss(W, U, A, B, R) __extension__ ({ \ 1945 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1946 (__v4sf)(__m128)(B), \ 1947 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1948 (int)(R)); }) 1949 1950#define _mm_maskz_add_round_ss(U, A, B, R) __extension__ ({ \ 1951 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1952 (__v4sf)(__m128)(B), \ 1953 (__v4sf)_mm_setzero_ps(), \ 1954 (__mmask8)(U), (int)(R)); }) 1955 1956static __inline__ __m128d __DEFAULT_FN_ATTRS 1957_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1958 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 1959 (__v2df) __B, 1960 (__v2df) __W, 1961 (__mmask8) __U, 1962 _MM_FROUND_CUR_DIRECTION); 1963} 1964 1965static __inline__ __m128d __DEFAULT_FN_ATTRS 1966_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1967 return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, 1968 (__v2df) __B, 1969 (__v2df) _mm_setzero_pd (), 1970 (__mmask8) __U, 1971 _MM_FROUND_CUR_DIRECTION); 1972} 1973#define _mm_add_round_sd(A, B, R) __extension__ ({ \ 1974 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1975 (__v2df)(__m128d)(B), \ 1976 (__v2df)_mm_setzero_pd(), \ 1977 (__mmask8)-1, (int)(R)); }) 1978 1979#define _mm_mask_add_round_sd(W, U, A, B, R) __extension__ ({ \ 1980 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1981 (__v2df)(__m128d)(B), \ 1982 (__v2df)(__m128d)(W), \ 1983 (__mmask8)(U), (int)(R)); }) 1984 1985#define _mm_maskz_add_round_sd(U, A, B, R) __extension__ ({ \ 1986 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1987 (__v2df)(__m128d)(B), \ 1988 (__v2df)_mm_setzero_pd(), \ 1989 (__mmask8)(U), (int)(R)); }) 1990 1991static __inline__ __m512d __DEFAULT_FN_ATTRS 1992_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1993 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 1994 (__v8df) __B, 1995 (__v8df) __W, 1996 (__mmask8) __U, 1997 _MM_FROUND_CUR_DIRECTION); 1998} 1999 2000static __inline__ __m512d __DEFAULT_FN_ATTRS 2001_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2002 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2003 (__v8df) __B, 2004 (__v8df) _mm512_setzero_pd (), 2005 (__mmask8) __U, 2006 _MM_FROUND_CUR_DIRECTION); 2007} 2008 2009static __inline__ __m512 __DEFAULT_FN_ATTRS 2010_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2011 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2012 (__v16sf) __B, 2013 (__v16sf) __W, 2014 (__mmask16) __U, 2015 _MM_FROUND_CUR_DIRECTION); 2016} 2017 2018static __inline__ __m512 __DEFAULT_FN_ATTRS 2019_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2020 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2021 (__v16sf) __B, 2022 (__v16sf) _mm512_setzero_ps (), 2023 (__mmask16) __U, 2024 _MM_FROUND_CUR_DIRECTION); 2025} 2026 2027#define _mm512_add_round_pd(A, B, R) __extension__ ({ \ 2028 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2029 (__v8df)(__m512d)(B), \ 2030 (__v8df)_mm512_setzero_pd(), \ 2031 (__mmask8)-1, (int)(R)); }) 2032 2033#define _mm512_mask_add_round_pd(W, U, A, B, R) __extension__ ({ \ 2034 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2035 (__v8df)(__m512d)(B), \ 2036 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2037 (int)(R)); }) 2038 2039#define _mm512_maskz_add_round_pd(U, A, B, R) __extension__ ({ \ 2040 (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ 2041 (__v8df)(__m512d)(B), \ 2042 (__v8df)_mm512_setzero_pd(), \ 2043 (__mmask8)(U), (int)(R)); }) 2044 2045#define _mm512_add_round_ps(A, B, R) __extension__ ({ \ 2046 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2047 (__v16sf)(__m512)(B), \ 2048 (__v16sf)_mm512_setzero_ps(), \ 2049 (__mmask16)-1, (int)(R)); }) 2050 2051#define _mm512_mask_add_round_ps(W, U, A, B, R) __extension__ ({ \ 2052 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2053 (__v16sf)(__m512)(B), \ 2054 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2055 (int)(R)); }) 2056 2057#define _mm512_maskz_add_round_ps(U, A, B, R) __extension__ ({ \ 2058 (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ 2059 (__v16sf)(__m512)(B), \ 2060 (__v16sf)_mm512_setzero_ps(), \ 2061 (__mmask16)(U), (int)(R)); }) 2062 2063static __inline__ __m128 __DEFAULT_FN_ATTRS 2064_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2065 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2066 (__v4sf) __B, 2067 (__v4sf) __W, 2068 (__mmask8) __U, 2069 _MM_FROUND_CUR_DIRECTION); 2070} 2071 2072static __inline__ __m128 __DEFAULT_FN_ATTRS 2073_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2074 return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, 2075 (__v4sf) __B, 2076 (__v4sf) _mm_setzero_ps (), 2077 (__mmask8) __U, 2078 _MM_FROUND_CUR_DIRECTION); 2079} 2080#define _mm_sub_round_ss(A, B, R) __extension__ ({ \ 2081 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2082 (__v4sf)(__m128)(B), \ 2083 (__v4sf)_mm_setzero_ps(), \ 2084 (__mmask8)-1, (int)(R)); }) 2085 2086#define _mm_mask_sub_round_ss(W, U, A, B, R) __extension__ ({ \ 2087 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2088 (__v4sf)(__m128)(B), \ 2089 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2090 (int)(R)); }) 2091 2092#define _mm_maskz_sub_round_ss(U, A, B, R) __extension__ ({ \ 2093 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2094 (__v4sf)(__m128)(B), \ 2095 (__v4sf)_mm_setzero_ps(), \ 2096 (__mmask8)(U), (int)(R)); }) 2097 2098static __inline__ __m128d __DEFAULT_FN_ATTRS 2099_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2100 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2101 (__v2df) __B, 2102 (__v2df) __W, 2103 (__mmask8) __U, 2104 _MM_FROUND_CUR_DIRECTION); 2105} 2106 2107static __inline__ __m128d __DEFAULT_FN_ATTRS 2108_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2109 return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, 2110 (__v2df) __B, 2111 (__v2df) _mm_setzero_pd (), 2112 (__mmask8) __U, 2113 _MM_FROUND_CUR_DIRECTION); 2114} 2115 2116#define _mm_sub_round_sd(A, B, R) __extension__ ({ \ 2117 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2118 (__v2df)(__m128d)(B), \ 2119 (__v2df)_mm_setzero_pd(), \ 2120 (__mmask8)-1, (int)(R)); }) 2121 2122#define _mm_mask_sub_round_sd(W, U, A, B, R) __extension__ ({ \ 2123 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2124 (__v2df)(__m128d)(B), \ 2125 (__v2df)(__m128d)(W), \ 2126 (__mmask8)(U), (int)(R)); }) 2127 2128#define _mm_maskz_sub_round_sd(U, A, B, R) __extension__ ({ \ 2129 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2130 (__v2df)(__m128d)(B), \ 2131 (__v2df)_mm_setzero_pd(), \ 2132 (__mmask8)(U), (int)(R)); }) 2133 2134static __inline__ __m512d __DEFAULT_FN_ATTRS 2135_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2136 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2137 (__v8df) __B, 2138 (__v8df) __W, 2139 (__mmask8) __U, 2140 _MM_FROUND_CUR_DIRECTION); 2141} 2142 2143static __inline__ __m512d __DEFAULT_FN_ATTRS 2144_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2145 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2146 (__v8df) __B, 2147 (__v8df) 2148 _mm512_setzero_pd (), 2149 (__mmask8) __U, 2150 _MM_FROUND_CUR_DIRECTION); 2151} 2152 2153static __inline__ __m512 __DEFAULT_FN_ATTRS 2154_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2155 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2156 (__v16sf) __B, 2157 (__v16sf) __W, 2158 (__mmask16) __U, 2159 _MM_FROUND_CUR_DIRECTION); 2160} 2161 2162static __inline__ __m512 __DEFAULT_FN_ATTRS 2163_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2164 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2165 (__v16sf) __B, 2166 (__v16sf) 2167 _mm512_setzero_ps (), 2168 (__mmask16) __U, 2169 _MM_FROUND_CUR_DIRECTION); 2170} 2171 2172#define _mm512_sub_round_pd(A, B, R) __extension__ ({ \ 2173 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2174 (__v8df)(__m512d)(B), \ 2175 (__v8df)_mm512_setzero_pd(), \ 2176 (__mmask8)-1, (int)(R)); }) 2177 2178#define _mm512_mask_sub_round_pd(W, U, A, B, R) __extension__ ({ \ 2179 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2180 (__v8df)(__m512d)(B), \ 2181 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2182 (int)(R)); }) 2183 2184#define _mm512_maskz_sub_round_pd(U, A, B, R) __extension__ ({ \ 2185 (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ 2186 (__v8df)(__m512d)(B), \ 2187 (__v8df)_mm512_setzero_pd(), \ 2188 (__mmask8)(U), (int)(R)); }) 2189 2190#define _mm512_sub_round_ps(A, B, R) __extension__ ({ \ 2191 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2192 (__v16sf)(__m512)(B), \ 2193 (__v16sf)_mm512_setzero_ps(), \ 2194 (__mmask16)-1, (int)(R)); }) 2195 2196#define _mm512_mask_sub_round_ps(W, U, A, B, R) __extension__ ({ \ 2197 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2198 (__v16sf)(__m512)(B), \ 2199 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2200 (int)(R)); }); 2201 2202#define _mm512_maskz_sub_round_ps(U, A, B, R) __extension__ ({ \ 2203 (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ 2204 (__v16sf)(__m512)(B), \ 2205 (__v16sf)_mm512_setzero_ps(), \ 2206 (__mmask16)(U), (int)(R)); }); 2207 2208static __inline__ __m128 __DEFAULT_FN_ATTRS 2209_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2210 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2211 (__v4sf) __B, 2212 (__v4sf) __W, 2213 (__mmask8) __U, 2214 _MM_FROUND_CUR_DIRECTION); 2215} 2216 2217static __inline__ __m128 __DEFAULT_FN_ATTRS 2218_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2219 return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, 2220 (__v4sf) __B, 2221 (__v4sf) _mm_setzero_ps (), 2222 (__mmask8) __U, 2223 _MM_FROUND_CUR_DIRECTION); 2224} 2225#define _mm_mul_round_ss(A, B, R) __extension__ ({ \ 2226 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2227 (__v4sf)(__m128)(B), \ 2228 (__v4sf)_mm_setzero_ps(), \ 2229 (__mmask8)-1, (int)(R)); }) 2230 2231#define _mm_mask_mul_round_ss(W, U, A, B, R) __extension__ ({ \ 2232 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2233 (__v4sf)(__m128)(B), \ 2234 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2235 (int)(R)); }) 2236 2237#define _mm_maskz_mul_round_ss(U, A, B, R) __extension__ ({ \ 2238 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2239 (__v4sf)(__m128)(B), \ 2240 (__v4sf)_mm_setzero_ps(), \ 2241 (__mmask8)(U), (int)(R)); }) 2242 2243static __inline__ __m128d __DEFAULT_FN_ATTRS 2244_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2245 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2246 (__v2df) __B, 2247 (__v2df) __W, 2248 (__mmask8) __U, 2249 _MM_FROUND_CUR_DIRECTION); 2250} 2251 2252static __inline__ __m128d __DEFAULT_FN_ATTRS 2253_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2254 return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, 2255 (__v2df) __B, 2256 (__v2df) _mm_setzero_pd (), 2257 (__mmask8) __U, 2258 _MM_FROUND_CUR_DIRECTION); 2259} 2260 2261#define _mm_mul_round_sd(A, B, R) __extension__ ({ \ 2262 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2263 (__v2df)(__m128d)(B), \ 2264 (__v2df)_mm_setzero_pd(), \ 2265 (__mmask8)-1, (int)(R)); }) 2266 2267#define _mm_mask_mul_round_sd(W, U, A, B, R) __extension__ ({ \ 2268 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2269 (__v2df)(__m128d)(B), \ 2270 (__v2df)(__m128d)(W), \ 2271 (__mmask8)(U), (int)(R)); }) 2272 2273#define _mm_maskz_mul_round_sd(U, A, B, R) __extension__ ({ \ 2274 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2275 (__v2df)(__m128d)(B), \ 2276 (__v2df)_mm_setzero_pd(), \ 2277 (__mmask8)(U), (int)(R)); }) 2278 2279static __inline__ __m512d __DEFAULT_FN_ATTRS 2280_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2281 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2282 (__v8df) __B, 2283 (__v8df) __W, 2284 (__mmask8) __U, 2285 _MM_FROUND_CUR_DIRECTION); 2286} 2287 2288static __inline__ __m512d __DEFAULT_FN_ATTRS 2289_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2290 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2291 (__v8df) __B, 2292 (__v8df) 2293 _mm512_setzero_pd (), 2294 (__mmask8) __U, 2295 _MM_FROUND_CUR_DIRECTION); 2296} 2297 2298static __inline__ __m512 __DEFAULT_FN_ATTRS 2299_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2300 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2301 (__v16sf) __B, 2302 (__v16sf) __W, 2303 (__mmask16) __U, 2304 _MM_FROUND_CUR_DIRECTION); 2305} 2306 2307static __inline__ __m512 __DEFAULT_FN_ATTRS 2308_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2309 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2310 (__v16sf) __B, 2311 (__v16sf) 2312 _mm512_setzero_ps (), 2313 (__mmask16) __U, 2314 _MM_FROUND_CUR_DIRECTION); 2315} 2316 2317#define _mm512_mul_round_pd(A, B, R) __extension__ ({ \ 2318 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2319 (__v8df)(__m512d)(B), \ 2320 (__v8df)_mm512_setzero_pd(), \ 2321 (__mmask8)-1, (int)(R)); }) 2322 2323#define _mm512_mask_mul_round_pd(W, U, A, B, R) __extension__ ({ \ 2324 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2325 (__v8df)(__m512d)(B), \ 2326 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2327 (int)(R)); }) 2328 2329#define _mm512_maskz_mul_round_pd(U, A, B, R) __extension__ ({ \ 2330 (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ 2331 (__v8df)(__m512d)(B), \ 2332 (__v8df)_mm512_setzero_pd(), \ 2333 (__mmask8)(U), (int)(R)); }) 2334 2335#define _mm512_mul_round_ps(A, B, R) __extension__ ({ \ 2336 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2337 (__v16sf)(__m512)(B), \ 2338 (__v16sf)_mm512_setzero_ps(), \ 2339 (__mmask16)-1, (int)(R)); }) 2340 2341#define _mm512_mask_mul_round_ps(W, U, A, B, R) __extension__ ({ \ 2342 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2343 (__v16sf)(__m512)(B), \ 2344 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2345 (int)(R)); }); 2346 2347#define _mm512_maskz_mul_round_ps(U, A, B, R) __extension__ ({ \ 2348 (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ 2349 (__v16sf)(__m512)(B), \ 2350 (__v16sf)_mm512_setzero_ps(), \ 2351 (__mmask16)(U), (int)(R)); }); 2352 2353static __inline__ __m128 __DEFAULT_FN_ATTRS 2354_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 2355 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2356 (__v4sf) __B, 2357 (__v4sf) __W, 2358 (__mmask8) __U, 2359 _MM_FROUND_CUR_DIRECTION); 2360} 2361 2362static __inline__ __m128 __DEFAULT_FN_ATTRS 2363_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 2364 return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, 2365 (__v4sf) __B, 2366 (__v4sf) _mm_setzero_ps (), 2367 (__mmask8) __U, 2368 _MM_FROUND_CUR_DIRECTION); 2369} 2370 2371#define _mm_div_round_ss(A, B, R) __extension__ ({ \ 2372 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2373 (__v4sf)(__m128)(B), \ 2374 (__v4sf)_mm_setzero_ps(), \ 2375 (__mmask8)-1, (int)(R)); }) 2376 2377#define _mm_mask_div_round_ss(W, U, A, B, R) __extension__ ({ \ 2378 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2379 (__v4sf)(__m128)(B), \ 2380 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2381 (int)(R)); }) 2382 2383#define _mm_maskz_div_round_ss(U, A, B, R) __extension__ ({ \ 2384 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2385 (__v4sf)(__m128)(B), \ 2386 (__v4sf)_mm_setzero_ps(), \ 2387 (__mmask8)(U), (int)(R)); }) 2388 2389static __inline__ __m128d __DEFAULT_FN_ATTRS 2390_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 2391 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2392 (__v2df) __B, 2393 (__v2df) __W, 2394 (__mmask8) __U, 2395 _MM_FROUND_CUR_DIRECTION); 2396} 2397 2398static __inline__ __m128d __DEFAULT_FN_ATTRS 2399_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 2400 return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, 2401 (__v2df) __B, 2402 (__v2df) _mm_setzero_pd (), 2403 (__mmask8) __U, 2404 _MM_FROUND_CUR_DIRECTION); 2405} 2406 2407#define _mm_div_round_sd(A, B, R) __extension__ ({ \ 2408 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2409 (__v2df)(__m128d)(B), \ 2410 (__v2df)_mm_setzero_pd(), \ 2411 (__mmask8)-1, (int)(R)); }) 2412 2413#define _mm_mask_div_round_sd(W, U, A, B, R) __extension__ ({ \ 2414 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2415 (__v2df)(__m128d)(B), \ 2416 (__v2df)(__m128d)(W), \ 2417 (__mmask8)(U), (int)(R)); }) 2418 2419#define _mm_maskz_div_round_sd(U, A, B, R) __extension__ ({ \ 2420 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2421 (__v2df)(__m128d)(B), \ 2422 (__v2df)_mm_setzero_pd(), \ 2423 (__mmask8)(U), (int)(R)); }) 2424 2425static __inline__ __m512d __DEFAULT_FN_ATTRS 2426_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 2427 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, 2428 (__v8df) __B, 2429 (__v8df) __W, 2430 (__mmask8) __U, 2431 _MM_FROUND_CUR_DIRECTION); 2432} 2433 2434static __inline__ __m512d __DEFAULT_FN_ATTRS 2435_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 2436 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, 2437 (__v8df) __B, 2438 (__v8df) 2439 _mm512_setzero_pd (), 2440 (__mmask8) __U, 2441 _MM_FROUND_CUR_DIRECTION); 2442} 2443 2444static __inline__ __m512 __DEFAULT_FN_ATTRS 2445_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 2446 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2447 (__v16sf) __B, 2448 (__v16sf) __W, 2449 (__mmask16) __U, 2450 _MM_FROUND_CUR_DIRECTION); 2451} 2452 2453static __inline__ __m512 __DEFAULT_FN_ATTRS 2454_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 2455 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2456 (__v16sf) __B, 2457 (__v16sf) 2458 _mm512_setzero_ps (), 2459 (__mmask16) __U, 2460 _MM_FROUND_CUR_DIRECTION); 2461} 2462 2463#define _mm512_div_round_pd(A, B, R) __extension__ ({ \ 2464 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2465 (__v8df)(__m512d)(B), \ 2466 (__v8df)_mm512_setzero_pd(), \ 2467 (__mmask8)-1, (int)(R)); }) 2468 2469#define _mm512_mask_div_round_pd(W, U, A, B, R) __extension__ ({ \ 2470 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2471 (__v8df)(__m512d)(B), \ 2472 (__v8df)(__m512d)(W), (__mmask8)(U), \ 2473 (int)(R)); }) 2474 2475#define _mm512_maskz_div_round_pd(U, A, B, R) __extension__ ({ \ 2476 (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ 2477 (__v8df)(__m512d)(B), \ 2478 (__v8df)_mm512_setzero_pd(), \ 2479 (__mmask8)(U), (int)(R)); }) 2480 2481#define _mm512_div_round_ps(A, B, R) __extension__ ({ \ 2482 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2483 (__v16sf)(__m512)(B), \ 2484 (__v16sf)_mm512_setzero_ps(), \ 2485 (__mmask16)-1, (int)(R)); }) 2486 2487#define _mm512_mask_div_round_ps(W, U, A, B, R) __extension__ ({ \ 2488 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2489 (__v16sf)(__m512)(B), \ 2490 (__v16sf)(__m512)(W), (__mmask16)(U), \ 2491 (int)(R)); }); 2492 2493#define _mm512_maskz_div_round_ps(U, A, B, R) __extension__ ({ \ 2494 (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ 2495 (__v16sf)(__m512)(B), \ 2496 (__v16sf)_mm512_setzero_ps(), \ 2497 (__mmask16)(U), (int)(R)); }); 2498 2499#define _mm512_roundscale_ps(A, B) __extension__ ({ \ 2500 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2501 (__v16sf)(__m512)(A), (__mmask16)-1, \ 2502 _MM_FROUND_CUR_DIRECTION); }) 2503 2504#define _mm512_mask_roundscale_ps(A, B, C, imm) __extension__ ({\ 2505 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2506 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2507 _MM_FROUND_CUR_DIRECTION); }) 2508 2509#define _mm512_maskz_roundscale_ps(A, B, imm) __extension__ ({\ 2510 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2511 (__v16sf)_mm512_setzero_ps(), \ 2512 (__mmask16)(A), \ 2513 _MM_FROUND_CUR_DIRECTION); }) 2514 2515#define _mm512_mask_roundscale_round_ps( __A, __B, __C, __imm, __R) __extension__ ({ \ 2516 (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __C), (int)__imm,\ 2517 (__v16sf)( __A),\ 2518 (__mmask16)( __B),(int) __R);\ 2519}) 2520 2521#define _mm512_maskz_roundscale_round_ps( __A, __B, __imm,__R) __extension__ ({ \ 2522 (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __B), (int)__imm,\ 2523 (__v16sf)_mm512_setzero_ps (),\ 2524 (__mmask16)( __A),(int) __R);\ 2525}) 2526 2527#define _mm512_roundscale_round_ps( __A, __imm, __R) __extension__ ({ \ 2528 (__m512)__builtin_ia32_rndscaleps_mask ((__v16sf)( __A),(int) __imm,\ 2529 (__v16sf) _mm512_undefined_ps (),\ 2530 (__mmask16) -1,(int) __R);\ 2531}) 2532 2533#define _mm512_roundscale_pd(A, B) __extension__ ({ \ 2534 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2535 (__v8df)(__m512d)(A), (__mmask8)-1, \ 2536 _MM_FROUND_CUR_DIRECTION); }) 2537 2538#define _mm512_mask_roundscale_pd(A, B, C, imm) __extension__ ({\ 2539 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2540 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2541 _MM_FROUND_CUR_DIRECTION); }) 2542 2543#define _mm512_maskz_roundscale_pd(A, B, imm) __extension__ ({\ 2544 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2545 (__v8df)_mm512_setzero_pd(), \ 2546 (__mmask8)(A), \ 2547 _MM_FROUND_CUR_DIRECTION); }) 2548 2549#define _mm512_mask_roundscale_round_pd( __A, __B, __C, __imm ,__R) __extension__ ({ \ 2550 (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __C),(int)__imm,\ 2551 (__v8df)( __A),\ 2552 (__mmask8)( __B),(int)__R);\ 2553}) 2554 2555#define _mm512_maskz_roundscale_round_pd( __A, __B, __imm, __R) __extension__ ({ \ 2556 (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __B),(int)__imm,\ 2557 (__v8df)_mm512_setzero_pd (),\ 2558 (__mmask8)( __A),(int) __R);\ 2559}) 2560 2561#define _mm512_roundscale_round_pd( __A, __imm , __R) __extension__ ({ \ 2562 (__m512d)__builtin_ia32_rndscalepd_mask ((__v8df)( __A),(int) __imm,\ 2563 (__v8df)_mm512_undefined_pd (),\ 2564 (__mmask8) -1,(int) __R);\ 2565}) 2566 2567#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 2568 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2569 (__v8df)(__m512d)(B), \ 2570 (__v8df)(__m512d)(C), (__mmask8)-1, \ 2571 (int)(R)); }) 2572 2573 2574#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2575 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2576 (__v8df)(__m512d)(B), \ 2577 (__v8df)(__m512d)(C), \ 2578 (__mmask8)(U), (int)(R)); }) 2579 2580 2581#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2582 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2583 (__v8df)(__m512d)(B), \ 2584 (__v8df)(__m512d)(C), \ 2585 (__mmask8)(U), (int)(R)); }) 2586 2587 2588#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2589 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2590 (__v8df)(__m512d)(B), \ 2591 (__v8df)(__m512d)(C), \ 2592 (__mmask8)(U), (int)(R)); }) 2593 2594 2595#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 2596 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2597 (__v8df)(__m512d)(B), \ 2598 -(__v8df)(__m512d)(C), \ 2599 (__mmask8)-1, (int)(R)); }) 2600 2601 2602#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2603 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2604 (__v8df)(__m512d)(B), \ 2605 -(__v8df)(__m512d)(C), \ 2606 (__mmask8)(U), (int)(R)); }) 2607 2608 2609#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2610 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2611 (__v8df)(__m512d)(B), \ 2612 -(__v8df)(__m512d)(C), \ 2613 (__mmask8)(U), (int)(R)); }) 2614 2615 2616#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 2617 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2618 (__v8df)(__m512d)(B), \ 2619 (__v8df)(__m512d)(C), (__mmask8)-1, \ 2620 (int)(R)); }) 2621 2622 2623#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2624 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2625 (__v8df)(__m512d)(B), \ 2626 (__v8df)(__m512d)(C), \ 2627 (__mmask8)(U), (int)(R)); }) 2628 2629 2630#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 2631 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2632 (__v8df)(__m512d)(B), \ 2633 (__v8df)(__m512d)(C), \ 2634 (__mmask8)(U), (int)(R)); }) 2635 2636 2637#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 2638 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2639 (__v8df)(__m512d)(B), \ 2640 -(__v8df)(__m512d)(C), \ 2641 (__mmask8)-1, (int)(R)); }) 2642 2643 2644#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2645 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2646 (__v8df)(__m512d)(B), \ 2647 -(__v8df)(__m512d)(C), \ 2648 (__mmask8)(U), (int)(R)); }) 2649 2650 2651static __inline__ __m512d __DEFAULT_FN_ATTRS 2652_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2653{ 2654 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2655 (__v8df) __B, 2656 (__v8df) __C, 2657 (__mmask8) -1, 2658 _MM_FROUND_CUR_DIRECTION); 2659} 2660 2661static __inline__ __m512d __DEFAULT_FN_ATTRS 2662_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2663{ 2664 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2665 (__v8df) __B, 2666 (__v8df) __C, 2667 (__mmask8) __U, 2668 _MM_FROUND_CUR_DIRECTION); 2669} 2670 2671static __inline__ __m512d __DEFAULT_FN_ATTRS 2672_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2673{ 2674 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2675 (__v8df) __B, 2676 (__v8df) __C, 2677 (__mmask8) __U, 2678 _MM_FROUND_CUR_DIRECTION); 2679} 2680 2681static __inline__ __m512d __DEFAULT_FN_ATTRS 2682_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2683{ 2684 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2685 (__v8df) __B, 2686 (__v8df) __C, 2687 (__mmask8) __U, 2688 _MM_FROUND_CUR_DIRECTION); 2689} 2690 2691static __inline__ __m512d __DEFAULT_FN_ATTRS 2692_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2693{ 2694 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2695 (__v8df) __B, 2696 -(__v8df) __C, 2697 (__mmask8) -1, 2698 _MM_FROUND_CUR_DIRECTION); 2699} 2700 2701static __inline__ __m512d __DEFAULT_FN_ATTRS 2702_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2703{ 2704 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2705 (__v8df) __B, 2706 -(__v8df) __C, 2707 (__mmask8) __U, 2708 _MM_FROUND_CUR_DIRECTION); 2709} 2710 2711static __inline__ __m512d __DEFAULT_FN_ATTRS 2712_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2713{ 2714 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2715 (__v8df) __B, 2716 -(__v8df) __C, 2717 (__mmask8) __U, 2718 _MM_FROUND_CUR_DIRECTION); 2719} 2720 2721static __inline__ __m512d __DEFAULT_FN_ATTRS 2722_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 2723{ 2724 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2725 (__v8df) __B, 2726 (__v8df) __C, 2727 (__mmask8) -1, 2728 _MM_FROUND_CUR_DIRECTION); 2729} 2730 2731static __inline__ __m512d __DEFAULT_FN_ATTRS 2732_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2733{ 2734 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 2735 (__v8df) __B, 2736 (__v8df) __C, 2737 (__mmask8) __U, 2738 _MM_FROUND_CUR_DIRECTION); 2739} 2740 2741static __inline__ __m512d __DEFAULT_FN_ATTRS 2742_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2743{ 2744 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2745 (__v8df) __B, 2746 (__v8df) __C, 2747 (__mmask8) __U, 2748 _MM_FROUND_CUR_DIRECTION); 2749} 2750 2751static __inline__ __m512d __DEFAULT_FN_ATTRS 2752_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 2753{ 2754 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 2755 (__v8df) __B, 2756 -(__v8df) __C, 2757 (__mmask8) -1, 2758 _MM_FROUND_CUR_DIRECTION); 2759} 2760 2761static __inline__ __m512d __DEFAULT_FN_ATTRS 2762_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 2763{ 2764 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 2765 (__v8df) __B, 2766 -(__v8df) __C, 2767 (__mmask8) __U, 2768 _MM_FROUND_CUR_DIRECTION); 2769} 2770 2771#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 2772 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2773 (__v16sf)(__m512)(B), \ 2774 (__v16sf)(__m512)(C), (__mmask16)-1, \ 2775 (int)(R)); }) 2776 2777 2778#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2779 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2780 (__v16sf)(__m512)(B), \ 2781 (__v16sf)(__m512)(C), \ 2782 (__mmask16)(U), (int)(R)); }) 2783 2784 2785#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2786 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2787 (__v16sf)(__m512)(B), \ 2788 (__v16sf)(__m512)(C), \ 2789 (__mmask16)(U), (int)(R)); }) 2790 2791 2792#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2793 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2794 (__v16sf)(__m512)(B), \ 2795 (__v16sf)(__m512)(C), \ 2796 (__mmask16)(U), (int)(R)); }) 2797 2798 2799#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 2800 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2801 (__v16sf)(__m512)(B), \ 2802 -(__v16sf)(__m512)(C), \ 2803 (__mmask16)-1, (int)(R)); }) 2804 2805 2806#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2807 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2808 (__v16sf)(__m512)(B), \ 2809 -(__v16sf)(__m512)(C), \ 2810 (__mmask16)(U), (int)(R)); }) 2811 2812 2813#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2814 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2815 (__v16sf)(__m512)(B), \ 2816 -(__v16sf)(__m512)(C), \ 2817 (__mmask16)(U), (int)(R)); }) 2818 2819 2820#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 2821 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2822 (__v16sf)(__m512)(B), \ 2823 (__v16sf)(__m512)(C), (__mmask16)-1, \ 2824 (int)(R)); }) 2825 2826 2827#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2828 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2829 (__v16sf)(__m512)(B), \ 2830 (__v16sf)(__m512)(C), \ 2831 (__mmask16)(U), (int)(R)); }) 2832 2833 2834#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2835 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2836 (__v16sf)(__m512)(B), \ 2837 (__v16sf)(__m512)(C), \ 2838 (__mmask16)(U), (int)(R)); }) 2839 2840 2841#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 2842 (__m512)__builtin_ia32_vfmaddps512_mask(-(__v16sf)(__m512)(A), \ 2843 (__v16sf)(__m512)(B), \ 2844 -(__v16sf)(__m512)(C), \ 2845 (__mmask16)-1, (int)(R)); }) 2846 2847 2848#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2849 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2850 (__v16sf)(__m512)(B), \ 2851 -(__v16sf)(__m512)(C), \ 2852 (__mmask16)(U), (int)(R)); }) 2853 2854 2855static __inline__ __m512 __DEFAULT_FN_ATTRS 2856_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2857{ 2858 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2859 (__v16sf) __B, 2860 (__v16sf) __C, 2861 (__mmask16) -1, 2862 _MM_FROUND_CUR_DIRECTION); 2863} 2864 2865static __inline__ __m512 __DEFAULT_FN_ATTRS 2866_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2867{ 2868 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2869 (__v16sf) __B, 2870 (__v16sf) __C, 2871 (__mmask16) __U, 2872 _MM_FROUND_CUR_DIRECTION); 2873} 2874 2875static __inline__ __m512 __DEFAULT_FN_ATTRS 2876_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2877{ 2878 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2879 (__v16sf) __B, 2880 (__v16sf) __C, 2881 (__mmask16) __U, 2882 _MM_FROUND_CUR_DIRECTION); 2883} 2884 2885static __inline__ __m512 __DEFAULT_FN_ATTRS 2886_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2887{ 2888 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2889 (__v16sf) __B, 2890 (__v16sf) __C, 2891 (__mmask16) __U, 2892 _MM_FROUND_CUR_DIRECTION); 2893} 2894 2895static __inline__ __m512 __DEFAULT_FN_ATTRS 2896_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2897{ 2898 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2899 (__v16sf) __B, 2900 -(__v16sf) __C, 2901 (__mmask16) -1, 2902 _MM_FROUND_CUR_DIRECTION); 2903} 2904 2905static __inline__ __m512 __DEFAULT_FN_ATTRS 2906_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2907{ 2908 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2909 (__v16sf) __B, 2910 -(__v16sf) __C, 2911 (__mmask16) __U, 2912 _MM_FROUND_CUR_DIRECTION); 2913} 2914 2915static __inline__ __m512 __DEFAULT_FN_ATTRS 2916_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2917{ 2918 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2919 (__v16sf) __B, 2920 -(__v16sf) __C, 2921 (__mmask16) __U, 2922 _MM_FROUND_CUR_DIRECTION); 2923} 2924 2925static __inline__ __m512 __DEFAULT_FN_ATTRS 2926_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 2927{ 2928 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2929 (__v16sf) __B, 2930 (__v16sf) __C, 2931 (__mmask16) -1, 2932 _MM_FROUND_CUR_DIRECTION); 2933} 2934 2935static __inline__ __m512 __DEFAULT_FN_ATTRS 2936_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2937{ 2938 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 2939 (__v16sf) __B, 2940 (__v16sf) __C, 2941 (__mmask16) __U, 2942 _MM_FROUND_CUR_DIRECTION); 2943} 2944 2945static __inline__ __m512 __DEFAULT_FN_ATTRS 2946_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2947{ 2948 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2949 (__v16sf) __B, 2950 (__v16sf) __C, 2951 (__mmask16) __U, 2952 _MM_FROUND_CUR_DIRECTION); 2953} 2954 2955static __inline__ __m512 __DEFAULT_FN_ATTRS 2956_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 2957{ 2958 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 2959 (__v16sf) __B, 2960 -(__v16sf) __C, 2961 (__mmask16) -1, 2962 _MM_FROUND_CUR_DIRECTION); 2963} 2964 2965static __inline__ __m512 __DEFAULT_FN_ATTRS 2966_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2967{ 2968 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 2969 (__v16sf) __B, 2970 -(__v16sf) __C, 2971 (__mmask16) __U, 2972 _MM_FROUND_CUR_DIRECTION); 2973} 2974 2975#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 2976 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2977 (__v8df)(__m512d)(B), \ 2978 (__v8df)(__m512d)(C), \ 2979 (__mmask8)-1, (int)(R)); }) 2980 2981 2982#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2983 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2984 (__v8df)(__m512d)(B), \ 2985 (__v8df)(__m512d)(C), \ 2986 (__mmask8)(U), (int)(R)); }) 2987 2988 2989#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2990 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2991 (__v8df)(__m512d)(B), \ 2992 (__v8df)(__m512d)(C), \ 2993 (__mmask8)(U), (int)(R)); }) 2994 2995 2996#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 2997 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2998 (__v8df)(__m512d)(B), \ 2999 (__v8df)(__m512d)(C), \ 3000 (__mmask8)(U), (int)(R)); }) 3001 3002 3003#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 3004 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3005 (__v8df)(__m512d)(B), \ 3006 -(__v8df)(__m512d)(C), \ 3007 (__mmask8)-1, (int)(R)); }) 3008 3009 3010#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3011 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 3012 (__v8df)(__m512d)(B), \ 3013 -(__v8df)(__m512d)(C), \ 3014 (__mmask8)(U), (int)(R)); }) 3015 3016 3017#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 3018 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 3019 (__v8df)(__m512d)(B), \ 3020 -(__v8df)(__m512d)(C), \ 3021 (__mmask8)(U), (int)(R)); }) 3022 3023 3024static __inline__ __m512d __DEFAULT_FN_ATTRS 3025_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 3026{ 3027 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3028 (__v8df) __B, 3029 (__v8df) __C, 3030 (__mmask8) -1, 3031 _MM_FROUND_CUR_DIRECTION); 3032} 3033 3034static __inline__ __m512d __DEFAULT_FN_ATTRS 3035_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3036{ 3037 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3038 (__v8df) __B, 3039 (__v8df) __C, 3040 (__mmask8) __U, 3041 _MM_FROUND_CUR_DIRECTION); 3042} 3043 3044static __inline__ __m512d __DEFAULT_FN_ATTRS 3045_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3046{ 3047 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 3048 (__v8df) __B, 3049 (__v8df) __C, 3050 (__mmask8) __U, 3051 _MM_FROUND_CUR_DIRECTION); 3052} 3053 3054static __inline__ __m512d __DEFAULT_FN_ATTRS 3055_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3056{ 3057 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3058 (__v8df) __B, 3059 (__v8df) __C, 3060 (__mmask8) __U, 3061 _MM_FROUND_CUR_DIRECTION); 3062} 3063 3064static __inline__ __m512d __DEFAULT_FN_ATTRS 3065_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 3066{ 3067 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3068 (__v8df) __B, 3069 -(__v8df) __C, 3070 (__mmask8) -1, 3071 _MM_FROUND_CUR_DIRECTION); 3072} 3073 3074static __inline__ __m512d __DEFAULT_FN_ATTRS 3075_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3076{ 3077 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3078 (__v8df) __B, 3079 -(__v8df) __C, 3080 (__mmask8) __U, 3081 _MM_FROUND_CUR_DIRECTION); 3082} 3083 3084static __inline__ __m512d __DEFAULT_FN_ATTRS 3085_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 3086{ 3087 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3088 (__v8df) __B, 3089 -(__v8df) __C, 3090 (__mmask8) __U, 3091 _MM_FROUND_CUR_DIRECTION); 3092} 3093 3094#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 3095 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3096 (__v16sf)(__m512)(B), \ 3097 (__v16sf)(__m512)(C), \ 3098 (__mmask16)-1, (int)(R)); }) 3099 3100 3101#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3102 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3103 (__v16sf)(__m512)(B), \ 3104 (__v16sf)(__m512)(C), \ 3105 (__mmask16)(U), (int)(R)); }) 3106 3107 3108#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3109 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 3110 (__v16sf)(__m512)(B), \ 3111 (__v16sf)(__m512)(C), \ 3112 (__mmask16)(U), (int)(R)); }) 3113 3114 3115#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 3116 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3117 (__v16sf)(__m512)(B), \ 3118 (__v16sf)(__m512)(C), \ 3119 (__mmask16)(U), (int)(R)); }) 3120 3121 3122#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 3123 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3124 (__v16sf)(__m512)(B), \ 3125 -(__v16sf)(__m512)(C), \ 3126 (__mmask16)-1, (int)(R)); }) 3127 3128 3129#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3130 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3131 (__v16sf)(__m512)(B), \ 3132 -(__v16sf)(__m512)(C), \ 3133 (__mmask16)(U), (int)(R)); }) 3134 3135 3136#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 3137 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3138 (__v16sf)(__m512)(B), \ 3139 -(__v16sf)(__m512)(C), \ 3140 (__mmask16)(U), (int)(R)); }) 3141 3142 3143static __inline__ __m512 __DEFAULT_FN_ATTRS 3144_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 3145{ 3146 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3147 (__v16sf) __B, 3148 (__v16sf) __C, 3149 (__mmask16) -1, 3150 _MM_FROUND_CUR_DIRECTION); 3151} 3152 3153static __inline__ __m512 __DEFAULT_FN_ATTRS 3154_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3155{ 3156 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3157 (__v16sf) __B, 3158 (__v16sf) __C, 3159 (__mmask16) __U, 3160 _MM_FROUND_CUR_DIRECTION); 3161} 3162 3163static __inline__ __m512 __DEFAULT_FN_ATTRS 3164_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3165{ 3166 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 3167 (__v16sf) __B, 3168 (__v16sf) __C, 3169 (__mmask16) __U, 3170 _MM_FROUND_CUR_DIRECTION); 3171} 3172 3173static __inline__ __m512 __DEFAULT_FN_ATTRS 3174_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3175{ 3176 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3177 (__v16sf) __B, 3178 (__v16sf) __C, 3179 (__mmask16) __U, 3180 _MM_FROUND_CUR_DIRECTION); 3181} 3182 3183static __inline__ __m512 __DEFAULT_FN_ATTRS 3184_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 3185{ 3186 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3187 (__v16sf) __B, 3188 -(__v16sf) __C, 3189 (__mmask16) -1, 3190 _MM_FROUND_CUR_DIRECTION); 3191} 3192 3193static __inline__ __m512 __DEFAULT_FN_ATTRS 3194_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3195{ 3196 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3197 (__v16sf) __B, 3198 -(__v16sf) __C, 3199 (__mmask16) __U, 3200 _MM_FROUND_CUR_DIRECTION); 3201} 3202 3203static __inline__ __m512 __DEFAULT_FN_ATTRS 3204_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 3205{ 3206 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3207 (__v16sf) __B, 3208 -(__v16sf) __C, 3209 (__mmask16) __U, 3210 _MM_FROUND_CUR_DIRECTION); 3211} 3212 3213#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3214 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3215 (__v8df)(__m512d)(B), \ 3216 (__v8df)(__m512d)(C), \ 3217 (__mmask8)(U), (int)(R)); }) 3218 3219 3220static __inline__ __m512d __DEFAULT_FN_ATTRS 3221_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3222{ 3223 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 3224 (__v8df) __B, 3225 (__v8df) __C, 3226 (__mmask8) __U, 3227 _MM_FROUND_CUR_DIRECTION); 3228} 3229 3230#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3231 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3232 (__v16sf)(__m512)(B), \ 3233 (__v16sf)(__m512)(C), \ 3234 (__mmask16)(U), (int)(R)); }) 3235 3236 3237static __inline__ __m512 __DEFAULT_FN_ATTRS 3238_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3239{ 3240 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 3241 (__v16sf) __B, 3242 (__v16sf) __C, 3243 (__mmask16) __U, 3244 _MM_FROUND_CUR_DIRECTION); 3245} 3246 3247#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 3248 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3249 (__v8df)(__m512d)(B), \ 3250 (__v8df)(__m512d)(C), \ 3251 (__mmask8)(U), (int)(R)); }) 3252 3253 3254static __inline__ __m512d __DEFAULT_FN_ATTRS 3255_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3256{ 3257 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3258 (__v8df) __B, 3259 (__v8df) __C, 3260 (__mmask8) __U, 3261 _MM_FROUND_CUR_DIRECTION); 3262} 3263 3264#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 3265 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3266 (__v16sf)(__m512)(B), \ 3267 (__v16sf)(__m512)(C), \ 3268 (__mmask16)(U), (int)(R)); }) 3269 3270 3271static __inline__ __m512 __DEFAULT_FN_ATTRS 3272_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3273{ 3274 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3275 (__v16sf) __B, 3276 (__v16sf) __C, 3277 (__mmask16) __U, 3278 _MM_FROUND_CUR_DIRECTION); 3279} 3280 3281#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 3282 (__m512d)__builtin_ia32_vfnmaddpd512_mask((__v8df)(__m512d)(A), \ 3283 (__v8df)(__m512d)(B), \ 3284 (__v8df)(__m512d)(C), \ 3285 (__mmask8)(U), (int)(R)); }) 3286 3287 3288static __inline__ __m512d __DEFAULT_FN_ATTRS 3289_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3290{ 3291 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3292 (__v8df) __B, 3293 (__v8df) __C, 3294 (__mmask8) __U, 3295 _MM_FROUND_CUR_DIRECTION); 3296} 3297 3298#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 3299 (__m512)__builtin_ia32_vfnmaddps512_mask((__v16sf)(__m512)(A), \ 3300 (__v16sf)(__m512)(B), \ 3301 (__v16sf)(__m512)(C), \ 3302 (__mmask16)(U), (int)(R)); }) 3303 3304 3305static __inline__ __m512 __DEFAULT_FN_ATTRS 3306_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3307{ 3308 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3309 (__v16sf) __B, 3310 (__v16sf) __C, 3311 (__mmask16) __U, 3312 _MM_FROUND_CUR_DIRECTION); 3313} 3314 3315#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 3316 (__m512d)__builtin_ia32_vfnmsubpd512_mask((__v8df)(__m512d)(A), \ 3317 (__v8df)(__m512d)(B), \ 3318 (__v8df)(__m512d)(C), \ 3319 (__mmask8)(U), (int)(R)); }) 3320 3321 3322#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 3323 (__m512d)__builtin_ia32_vfnmsubpd512_mask3((__v8df)(__m512d)(A), \ 3324 (__v8df)(__m512d)(B), \ 3325 (__v8df)(__m512d)(C), \ 3326 (__mmask8)(U), (int)(R)); }) 3327 3328 3329static __inline__ __m512d __DEFAULT_FN_ATTRS 3330_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 3331{ 3332 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3333 (__v8df) __B, 3334 (__v8df) __C, 3335 (__mmask8) __U, 3336 _MM_FROUND_CUR_DIRECTION); 3337} 3338 3339static __inline__ __m512d __DEFAULT_FN_ATTRS 3340_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 3341{ 3342 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3343 (__v8df) __B, 3344 (__v8df) __C, 3345 (__mmask8) __U, 3346 _MM_FROUND_CUR_DIRECTION); 3347} 3348 3349#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 3350 (__m512)__builtin_ia32_vfnmsubps512_mask((__v16sf)(__m512)(A), \ 3351 (__v16sf)(__m512)(B), \ 3352 (__v16sf)(__m512)(C), \ 3353 (__mmask16)(U), (int)(R)); }) 3354 3355 3356#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 3357 (__m512)__builtin_ia32_vfnmsubps512_mask3((__v16sf)(__m512)(A), \ 3358 (__v16sf)(__m512)(B), \ 3359 (__v16sf)(__m512)(C), \ 3360 (__mmask16)(U), (int)(R)); }) 3361 3362 3363static __inline__ __m512 __DEFAULT_FN_ATTRS 3364_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 3365{ 3366 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3367 (__v16sf) __B, 3368 (__v16sf) __C, 3369 (__mmask16) __U, 3370 _MM_FROUND_CUR_DIRECTION); 3371} 3372 3373static __inline__ __m512 __DEFAULT_FN_ATTRS 3374_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 3375{ 3376 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3377 (__v16sf) __B, 3378 (__v16sf) __C, 3379 (__mmask16) __U, 3380 _MM_FROUND_CUR_DIRECTION); 3381} 3382 3383 3384 3385/* Vector permutations */ 3386 3387static __inline __m512i __DEFAULT_FN_ATTRS 3388_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 3389{ 3390 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3391 /* idx */ , 3392 (__v16si) __A, 3393 (__v16si) __B, 3394 (__mmask16) -1); 3395} 3396 3397static __inline__ __m512i __DEFAULT_FN_ATTRS 3398_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 3399 __m512i __I, __m512i __B) 3400{ 3401 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 3402 /* idx */ , 3403 (__v16si) __A, 3404 (__v16si) __B, 3405 (__mmask16) __U); 3406} 3407 3408static __inline__ __m512i __DEFAULT_FN_ATTRS 3409_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 3410 __m512i __I, __m512i __B) 3411{ 3412 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 3413 /* idx */ , 3414 (__v16si) __A, 3415 (__v16si) __B, 3416 (__mmask16) __U); 3417} 3418 3419static __inline __m512i __DEFAULT_FN_ATTRS 3420_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 3421{ 3422 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3423 /* idx */ , 3424 (__v8di) __A, 3425 (__v8di) __B, 3426 (__mmask8) -1); 3427} 3428 3429static __inline__ __m512i __DEFAULT_FN_ATTRS 3430_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 3431 __m512i __B) 3432{ 3433 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 3434 /* idx */ , 3435 (__v8di) __A, 3436 (__v8di) __B, 3437 (__mmask8) __U); 3438} 3439 3440 3441static __inline__ __m512i __DEFAULT_FN_ATTRS 3442_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 3443 __m512i __I, __m512i __B) 3444{ 3445 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 3446 /* idx */ , 3447 (__v8di) __A, 3448 (__v8di) __B, 3449 (__mmask8) __U); 3450} 3451 3452#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 3453 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ 3454 (__v8di)(__m512i)(B), (int)(I), \ 3455 (__v8di)_mm512_setzero_si512(), \ 3456 (__mmask8)-1); }) 3457 3458#define _mm512_mask_alignr_epi64(W, U, A, B, imm) __extension__({\ 3459 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ 3460 (__v8di)(__m512i)(B), (int)(imm), \ 3461 (__v8di)(__m512i)(W), \ 3462 (__mmask8)(U)); }) 3463 3464#define _mm512_maskz_alignr_epi64(U, A, B, imm) __extension__({\ 3465 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ 3466 (__v8di)(__m512i)(B), (int)(imm), \ 3467 (__v8di)_mm512_setzero_si512(), \ 3468 (__mmask8)(U)); }) 3469 3470#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 3471 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ 3472 (__v16si)(__m512i)(B), (int)(I), \ 3473 (__v16si)_mm512_setzero_si512(), \ 3474 (__mmask16)-1); }) 3475 3476#define _mm512_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({\ 3477 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ 3478 (__v16si)(__m512i)(B), (int)(imm), \ 3479 (__v16si)(__m512i)(W), \ 3480 (__mmask16)(U)); }) 3481 3482#define _mm512_maskz_alignr_epi32(U, A, B, imm) __extension__({\ 3483 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ 3484 (__v16si)(__m512i)(B), (int)(imm), \ 3485 (__v16si)_mm512_setzero_si512(), \ 3486 (__mmask16)(U)); }) 3487/* Vector Extract */ 3488 3489#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 3490 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ 3491 (__v4df)_mm256_setzero_si256(), \ 3492 (__mmask8)-1); }) 3493 3494#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ 3495 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3496 (__v4df)(__m256d)(W), \ 3497 (__mmask8)(U)); }) 3498 3499#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ 3500 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3501 (__v4df)_mm256_setzero_pd(), \ 3502 (__mmask8)(U)); }) 3503 3504#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 3505 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ 3506 (__v4sf)_mm_setzero_ps(), \ 3507 (__mmask8)-1); }) 3508 3509#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ 3510 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3511 (__v4sf)(__m128)(W), \ 3512 (__mmask8)(U)); }) 3513 3514#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ 3515 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3516 (__v4sf)_mm_setzero_ps(), \ 3517 (__mmask8)(U)); }) 3518/* Vector Blend */ 3519 3520static __inline __m512d __DEFAULT_FN_ATTRS 3521_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 3522{ 3523 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 3524 (__v8df) __W, 3525 (__mmask8) __U); 3526} 3527 3528static __inline __m512 __DEFAULT_FN_ATTRS 3529_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 3530{ 3531 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 3532 (__v16sf) __W, 3533 (__mmask16) __U); 3534} 3535 3536static __inline __m512i __DEFAULT_FN_ATTRS 3537_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 3538{ 3539 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 3540 (__v8di) __W, 3541 (__mmask8) __U); 3542} 3543 3544static __inline __m512i __DEFAULT_FN_ATTRS 3545_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 3546{ 3547 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 3548 (__v16si) __W, 3549 (__mmask16) __U); 3550} 3551 3552/* Compare */ 3553 3554#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 3555 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3556 (__v16sf)(__m512)(B), (int)(P), \ 3557 (__mmask16)-1, (int)(R)); }) 3558 3559#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 3560 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3561 (__v16sf)(__m512)(B), (int)(P), \ 3562 (__mmask16)(U), (int)(R)); }) 3563 3564#define _mm512_cmp_ps_mask(A, B, P) \ 3565 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3566 3567#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3568 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3569 3570#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 3571 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3572 (__v8df)(__m512d)(B), (int)(P), \ 3573 (__mmask8)-1, (int)(R)); }) 3574 3575#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 3576 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3577 (__v8df)(__m512d)(B), (int)(P), \ 3578 (__mmask8)(U), (int)(R)); }) 3579 3580#define _mm512_cmp_pd_mask(A, B, P) \ 3581 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3582 3583#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3584 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3585 3586/* Conversion */ 3587 3588static __inline __m512i __DEFAULT_FN_ATTRS 3589_mm512_cvttps_epu32(__m512 __A) 3590{ 3591 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3592 (__v16si) 3593 _mm512_setzero_si512 (), 3594 (__mmask16) -1, 3595 _MM_FROUND_CUR_DIRECTION); 3596} 3597 3598static __inline__ __m512i __DEFAULT_FN_ATTRS 3599_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 3600{ 3601 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3602 (__v16si) __W, 3603 (__mmask16) __U, 3604 _MM_FROUND_CUR_DIRECTION); 3605} 3606 3607static __inline__ __m512i __DEFAULT_FN_ATTRS 3608_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 3609{ 3610 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 3611 (__v16si) _mm512_setzero_si512 (), 3612 (__mmask16) __U, 3613 _MM_FROUND_CUR_DIRECTION); 3614} 3615 3616#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 3617 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3618 (__v16sf)_mm512_setzero_ps(), \ 3619 (__mmask16)-1, (int)(R)); }) 3620 3621#define _mm512_mask_cvt_roundepi32_ps( __W, __U, __A, __R) __extension__ ({ \ 3622__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\ 3623 (__v16sf)( __W),\ 3624 (__mmask16)( __U),( __R));\ 3625}) 3626 3627#define _mm512_maskz_cvt_roundepi32_ps( __U, __A, __R) __extension__ ({ \ 3628__builtin_ia32_cvtdq2ps512_mask ((__v16si)( __A),\ 3629 (__v16sf)\ 3630 _mm512_setzero_ps (),\ 3631 (__mmask16)( __U),( __R));\ 3632}) 3633 3634#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 3635 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3636 (__v16sf)_mm512_setzero_ps(), \ 3637 (__mmask16)-1, (int)(R)); }) 3638 3639#define _mm512_mask_cvt_roundepu32_ps( __W, __U, __A, __R) __extension__ ({ \ 3640__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\ 3641 (__v16sf)( __W),\ 3642 (__mmask16)( __U),( __R));\ 3643}) 3644 3645#define _mm512_maskz_cvt_roundepu32_ps( __U, __A, __R) __extension__ ({ \ 3646__builtin_ia32_cvtudq2ps512_mask ((__v16si)( __A),\ 3647 (__v16sf)\ 3648 _mm512_setzero_ps (),\ 3649 (__mmask16)( __U),( __R));\ 3650}) 3651 3652static __inline__ __m512 __DEFAULT_FN_ATTRS 3653_mm512_cvtepu32_ps (__m512i __A) 3654{ 3655 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3656 (__v16sf) _mm512_undefined_ps (), 3657 (__mmask16) -1, 3658 _MM_FROUND_CUR_DIRECTION); 3659} 3660 3661static __inline__ __m512 __DEFAULT_FN_ATTRS 3662_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3663{ 3664 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3665 (__v16sf) __W, 3666 (__mmask16) __U, 3667 _MM_FROUND_CUR_DIRECTION); 3668} 3669 3670static __inline__ __m512 __DEFAULT_FN_ATTRS 3671_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 3672{ 3673 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 3674 (__v16sf) _mm512_setzero_ps (), 3675 (__mmask16) __U, 3676 _MM_FROUND_CUR_DIRECTION); 3677} 3678 3679static __inline __m512d __DEFAULT_FN_ATTRS 3680_mm512_cvtepi32_pd(__m256i __A) 3681{ 3682 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 3683 (__v8df) 3684 _mm512_setzero_pd (), 3685 (__mmask8) -1); 3686} 3687 3688static __inline__ __m512d __DEFAULT_FN_ATTRS 3689_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3690{ 3691 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 3692 (__v8df) __W, 3693 (__mmask8) __U); 3694} 3695 3696static __inline__ __m512d __DEFAULT_FN_ATTRS 3697_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 3698{ 3699 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 3700 (__v8df) _mm512_setzero_pd (), 3701 (__mmask8) __U); 3702} 3703 3704static __inline__ __m512 __DEFAULT_FN_ATTRS 3705_mm512_cvtepi32_ps (__m512i __A) 3706{ 3707 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3708 (__v16sf) _mm512_undefined_ps (), 3709 (__mmask16) -1, 3710 _MM_FROUND_CUR_DIRECTION); 3711} 3712 3713static __inline__ __m512 __DEFAULT_FN_ATTRS 3714_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 3715{ 3716 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3717 (__v16sf) __W, 3718 (__mmask16) __U, 3719 _MM_FROUND_CUR_DIRECTION); 3720} 3721 3722static __inline__ __m512 __DEFAULT_FN_ATTRS 3723_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 3724{ 3725 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 3726 (__v16sf) _mm512_setzero_ps (), 3727 (__mmask16) __U, 3728 _MM_FROUND_CUR_DIRECTION); 3729} 3730 3731static __inline __m512d __DEFAULT_FN_ATTRS 3732_mm512_cvtepu32_pd(__m256i __A) 3733{ 3734 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 3735 (__v8df) 3736 _mm512_setzero_pd (), 3737 (__mmask8) -1); 3738} 3739 3740static __inline__ __m512d __DEFAULT_FN_ATTRS 3741_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 3742{ 3743 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 3744 (__v8df) __W, 3745 (__mmask8) __U); 3746} 3747 3748static __inline__ __m512d __DEFAULT_FN_ATTRS 3749_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 3750{ 3751 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 3752 (__v8df) _mm512_setzero_pd (), 3753 (__mmask8) __U); 3754} 3755 3756#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 3757 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3758 (__v8sf)_mm256_setzero_ps(), \ 3759 (__mmask8)-1, (int)(R)); }) 3760 3761#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) __extension__ ({ \ 3762 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3763 (__v8sf)(W), \ 3764 (__mmask8)(U), (int)(R)); }) 3765 3766#define _mm512_maskz_cvt_roundpd_ps(U, A, R) __extension__ ({ \ 3767 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3768 (__v8sf)_mm256_setzero_ps(), \ 3769 (__mmask8)(U), (int)(R)); }) 3770 3771static __inline__ __m256 __DEFAULT_FN_ATTRS 3772_mm512_cvtpd_ps (__m512d __A) 3773{ 3774 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3775 (__v8sf) _mm256_undefined_ps (), 3776 (__mmask8) -1, 3777 _MM_FROUND_CUR_DIRECTION); 3778} 3779 3780static __inline__ __m256 __DEFAULT_FN_ATTRS 3781_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 3782{ 3783 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3784 (__v8sf) __W, 3785 (__mmask8) __U, 3786 _MM_FROUND_CUR_DIRECTION); 3787} 3788 3789static __inline__ __m256 __DEFAULT_FN_ATTRS 3790_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 3791{ 3792 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 3793 (__v8sf) _mm256_setzero_ps (), 3794 (__mmask8) __U, 3795 _MM_FROUND_CUR_DIRECTION); 3796} 3797 3798#define _mm512_cvtps_ph(A, I) __extension__ ({ \ 3799 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3800 (__v16hi)_mm256_setzero_si256(), \ 3801 (__mmask16)-1); }) 3802 3803#define _mm512_mask_cvtps_ph(U, W, A, I) __extension__ ({ \ 3804 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3805 (__v16hi)(__m256i)(U), \ 3806 (__mmask16)(W)); }) 3807 3808#define _mm512_maskz_cvtps_ph(W, A, I) __extension__ ({\ 3809 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3810 (__v16hi)_mm256_setzero_si256(), \ 3811 (__mmask16)(W)); }) 3812 3813static __inline __m512 __DEFAULT_FN_ATTRS 3814_mm512_cvtph_ps(__m256i __A) 3815{ 3816 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3817 (__v16sf) 3818 _mm512_setzero_ps (), 3819 (__mmask16) -1, 3820 _MM_FROUND_CUR_DIRECTION); 3821} 3822 3823static __inline__ __m512 __DEFAULT_FN_ATTRS 3824_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 3825{ 3826 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3827 (__v16sf) __W, 3828 (__mmask16) __U, 3829 _MM_FROUND_CUR_DIRECTION); 3830} 3831 3832static __inline__ __m512 __DEFAULT_FN_ATTRS 3833_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 3834{ 3835 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 3836 (__v16sf) _mm512_setzero_ps (), 3837 (__mmask16) __U, 3838 _MM_FROUND_CUR_DIRECTION); 3839} 3840 3841#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 3842 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3843 (__v8si)_mm256_setzero_si256(), \ 3844 (__mmask8)-1, (int)(R)); }) 3845 3846#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) __extension__ ({ \ 3847 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3848 (__v8si)(W), \ 3849 (__mmask8)(U), (int)(R)); }) 3850 3851#define _mm512_maskz_cvtt_roundpd_epi32( U, A, R) __extension__ ({ \ 3852 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3853 (__v8si)_mm256_setzero_si256(), \ 3854 (__mmask8)(U), (int)(R)); }) 3855 3856static __inline __m256i __DEFAULT_FN_ATTRS 3857_mm512_cvttpd_epi32(__m512d __a) 3858{ 3859 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 3860 (__v8si)_mm256_setzero_si256(), 3861 (__mmask8) -1, 3862 _MM_FROUND_CUR_DIRECTION); 3863} 3864 3865static __inline__ __m256i __DEFAULT_FN_ATTRS 3866_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 3867{ 3868 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 3869 (__v8si) __W, 3870 (__mmask8) __U, 3871 _MM_FROUND_CUR_DIRECTION); 3872} 3873 3874static __inline__ __m256i __DEFAULT_FN_ATTRS 3875_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 3876{ 3877 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 3878 (__v8si) _mm256_setzero_si256 (), 3879 (__mmask8) __U, 3880 _MM_FROUND_CUR_DIRECTION); 3881} 3882 3883#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 3884 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3885 (__v16si)_mm512_setzero_si512(), \ 3886 (__mmask16)-1, (int)(R)); }) 3887 3888#define _mm512_mask_cvtt_roundps_epi32( W, U, A, R) __extension__ ({ \ 3889 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3890 (__v16si)(W), \ 3891 (__mmask16)(U), (int)(R)); }) 3892 3893#define _mm512_maskz_cvtt_roundps_epi32( U, A, R) __extension__ ({ \ 3894 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3895 (__v16si)_mm512_setzero_si512(), \ 3896 (__mmask16)(U), (int)(R)); }) 3897 3898static __inline __m512i __DEFAULT_FN_ATTRS 3899_mm512_cvttps_epi32(__m512 __a) 3900{ 3901 return (__m512i) 3902 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 3903 (__v16si) _mm512_setzero_si512 (), 3904 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 3905} 3906 3907static __inline__ __m512i __DEFAULT_FN_ATTRS 3908_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 3909{ 3910 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 3911 (__v16si) __W, 3912 (__mmask16) __U, 3913 _MM_FROUND_CUR_DIRECTION); 3914} 3915 3916static __inline__ __m512i __DEFAULT_FN_ATTRS 3917_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 3918{ 3919 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 3920 (__v16si) _mm512_setzero_si512 (), 3921 (__mmask16) __U, 3922 _MM_FROUND_CUR_DIRECTION); 3923} 3924 3925#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 3926 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3927 (__v16si)_mm512_setzero_si512(), \ 3928 (__mmask16)-1, (int)(R)); }) 3929 3930#define _mm512_mask_cvt_roundps_epi32( __W, __U, __A, __R) __extension__ ({ \ 3931 (__m512i)__builtin_ia32_cvtps2dq512_mask ((__v16sf)( __A),\ 3932 (__v16si)( __W),\ 3933 (__mmask16)( __U),( __R));\ 3934}) 3935 3936#define _mm512_maskz_cvt_roundps_epi32( __U, __A, __R) __extension__ ({ \ 3937 (__m512i)__builtin_ia32_cvtps2dq512_mask ((__v16sf)( __A),\ 3938 (__v16si)\ 3939 _mm512_setzero_si512 (),\ 3940 (__mmask16)( __U),( __R));\ 3941}) 3942 3943static __inline__ __m512i __DEFAULT_FN_ATTRS 3944_mm512_cvtps_epi32 (__m512 __A) 3945{ 3946 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 3947 (__v16si) _mm512_undefined_epi32 (), 3948 (__mmask16) -1, 3949 _MM_FROUND_CUR_DIRECTION); 3950} 3951 3952static __inline__ __m512i __DEFAULT_FN_ATTRS 3953_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 3954{ 3955 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 3956 (__v16si) __W, 3957 (__mmask16) __U, 3958 _MM_FROUND_CUR_DIRECTION); 3959} 3960 3961static __inline__ __m512i __DEFAULT_FN_ATTRS 3962_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 3963{ 3964 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 3965 (__v16si) 3966 _mm512_setzero_si512 (), 3967 (__mmask16) __U, 3968 _MM_FROUND_CUR_DIRECTION); 3969} 3970 3971#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 3972 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3973 (__v8si)_mm256_setzero_si256(), \ 3974 (__mmask8)-1, (int)(R)); }) 3975 3976#define _mm512_mask_cvt_roundpd_epi32( W, U, A, R) __extension__ ({ \ 3977 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3978 (__v8si)(W), \ 3979 (__mmask8)(U), (int)(R)); }) 3980 3981#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) __extension__ ({ \ 3982 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3983 (__v8si)_mm256_setzero_si256(), \ 3984 (__mmask8)(U), (int)(R)); }) 3985 3986static __inline__ __m256i __DEFAULT_FN_ATTRS 3987_mm512_cvtpd_epi32 (__m512d __A) 3988{ 3989 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 3990 (__v8si) 3991 _mm256_undefined_si256 (), 3992 (__mmask8) -1, 3993 _MM_FROUND_CUR_DIRECTION); 3994} 3995 3996static __inline__ __m256i __DEFAULT_FN_ATTRS 3997_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 3998{ 3999 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4000 (__v8si) __W, 4001 (__mmask8) __U, 4002 _MM_FROUND_CUR_DIRECTION); 4003} 4004 4005static __inline__ __m256i __DEFAULT_FN_ATTRS 4006_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 4007{ 4008 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4009 (__v8si) 4010 _mm256_setzero_si256 (), 4011 (__mmask8) __U, 4012 _MM_FROUND_CUR_DIRECTION); 4013} 4014 4015#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 4016 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4017 (__v16si)_mm512_setzero_si512(), \ 4018 (__mmask16)-1, (int)(R)); }) 4019 4020#define _mm512_mask_cvt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \ 4021 (__m512i)__builtin_ia32_cvtps2udq512_mask ((__v16sf)( __A),\ 4022 (__v16si)( __W),\ 4023 (__mmask16)( __U),( __R));\ 4024}) 4025 4026#define _mm512_maskz_cvt_roundps_epu32( __U, __A, __R) __extension__ ({ \ 4027 (__m512i)__builtin_ia32_cvtps2udq512_mask ((__v16sf)( __A),\ 4028 (__v16si)\ 4029 _mm512_setzero_si512 (),\ 4030 (__mmask16)( __U),( __R));\ 4031}) 4032 4033static __inline__ __m512i __DEFAULT_FN_ATTRS 4034_mm512_cvtps_epu32 ( __m512 __A) 4035{ 4036 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ 4037 (__v16si)\ 4038 _mm512_undefined_epi32 (),\ 4039 (__mmask16) -1,\ 4040 _MM_FROUND_CUR_DIRECTION);\ 4041} 4042 4043static __inline__ __m512i __DEFAULT_FN_ATTRS 4044_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 4045{ 4046 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4047 (__v16si) __W, 4048 (__mmask16) __U, 4049 _MM_FROUND_CUR_DIRECTION); 4050} 4051 4052static __inline__ __m512i __DEFAULT_FN_ATTRS 4053_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) 4054{ 4055 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4056 (__v16si) 4057 _mm512_setzero_si512 (), 4058 (__mmask16) __U , 4059 _MM_FROUND_CUR_DIRECTION); 4060} 4061 4062#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 4063 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4064 (__v8si)_mm256_setzero_si256(), \ 4065 (__mmask8)-1, (int)(R)); }) 4066 4067#define _mm512_mask_cvt_roundpd_epu32( W, U, A, R) __extension__ ({ \ 4068 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4069 (__v8si)(W), \ 4070 (__mmask8) (U), (int)(R)); }) 4071 4072#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) __extension__ ({ \ 4073 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4074 (__v8si)_mm256_setzero_si256(), \ 4075 (__mmask8)(U), (int)(R)); }) 4076 4077static __inline__ __m256i __DEFAULT_FN_ATTRS 4078_mm512_cvtpd_epu32 (__m512d __A) 4079{ 4080 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4081 (__v8si) 4082 _mm256_undefined_si256 (), 4083 (__mmask8) -1, 4084 _MM_FROUND_CUR_DIRECTION); 4085} 4086 4087static __inline__ __m256i __DEFAULT_FN_ATTRS 4088_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 4089{ 4090 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4091 (__v8si) __W, 4092 (__mmask8) __U, 4093 _MM_FROUND_CUR_DIRECTION); 4094} 4095 4096static __inline__ __m256i __DEFAULT_FN_ATTRS 4097_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 4098{ 4099 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4100 (__v8si) 4101 _mm256_setzero_si256 (), 4102 (__mmask8) __U, 4103 _MM_FROUND_CUR_DIRECTION); 4104} 4105/* Unpack and Interleave */ 4106static __inline __m512d __DEFAULT_FN_ATTRS 4107_mm512_unpackhi_pd(__m512d __a, __m512d __b) 4108{ 4109 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 4110} 4111 4112static __inline __m512d __DEFAULT_FN_ATTRS 4113_mm512_unpacklo_pd(__m512d __a, __m512d __b) 4114{ 4115 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 4116} 4117 4118static __inline __m512 __DEFAULT_FN_ATTRS 4119_mm512_unpackhi_ps(__m512 __a, __m512 __b) 4120{ 4121 return __builtin_shufflevector(__a, __b, 4122 2, 18, 3, 19, 4123 2+4, 18+4, 3+4, 19+4, 4124 2+8, 18+8, 3+8, 19+8, 4125 2+12, 18+12, 3+12, 19+12); 4126} 4127 4128static __inline __m512 __DEFAULT_FN_ATTRS 4129_mm512_unpacklo_ps(__m512 __a, __m512 __b) 4130{ 4131 return __builtin_shufflevector(__a, __b, 4132 0, 16, 1, 17, 4133 0+4, 16+4, 1+4, 17+4, 4134 0+8, 16+8, 1+8, 17+8, 4135 0+12, 16+12, 1+12, 17+12); 4136} 4137 4138/* Bit Test */ 4139 4140static __inline __mmask16 __DEFAULT_FN_ATTRS 4141_mm512_test_epi32_mask(__m512i __A, __m512i __B) 4142{ 4143 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4144 (__v16si) __B, 4145 (__mmask16) -1); 4146} 4147 4148static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4149_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 4150{ 4151 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 4152 (__v16si) __B, __U); 4153} 4154 4155static __inline __mmask8 __DEFAULT_FN_ATTRS 4156_mm512_test_epi64_mask(__m512i __A, __m512i __B) 4157{ 4158 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 4159 (__v8di) __B, 4160 (__mmask8) -1); 4161} 4162 4163static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4164_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 4165{ 4166 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 4167} 4168 4169 4170/* SIMD load ops */ 4171 4172static __inline __m512i __DEFAULT_FN_ATTRS 4173_mm512_loadu_si512 (void const *__P) 4174{ 4175 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4176 (__v16si) 4177 _mm512_setzero_si512 (), 4178 (__mmask16) -1); 4179} 4180 4181static __inline __m512i __DEFAULT_FN_ATTRS 4182_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 4183{ 4184 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, 4185 (__v16si) __W, 4186 (__mmask16) __U); 4187} 4188 4189 4190static __inline __m512i __DEFAULT_FN_ATTRS 4191_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 4192{ 4193 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, 4194 (__v16si) 4195 _mm512_setzero_si512 (), 4196 (__mmask16) __U); 4197} 4198 4199static __inline __m512i __DEFAULT_FN_ATTRS 4200_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 4201{ 4202 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, 4203 (__v8di) __W, 4204 (__mmask8) __U); 4205} 4206 4207static __inline __m512i __DEFAULT_FN_ATTRS 4208_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 4209{ 4210 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, 4211 (__v8di) 4212 _mm512_setzero_si512 (), 4213 (__mmask8) __U); 4214} 4215 4216static __inline __m512 __DEFAULT_FN_ATTRS 4217_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 4218{ 4219 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, 4220 (__v16sf) __W, 4221 (__mmask16) __U); 4222} 4223 4224static __inline __m512 __DEFAULT_FN_ATTRS 4225_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 4226{ 4227 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, 4228 (__v16sf) 4229 _mm512_setzero_ps (), 4230 (__mmask16) __U); 4231} 4232 4233static __inline __m512d __DEFAULT_FN_ATTRS 4234_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 4235{ 4236 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, 4237 (__v8df) __W, 4238 (__mmask8) __U); 4239} 4240 4241static __inline __m512d __DEFAULT_FN_ATTRS 4242_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 4243{ 4244 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, 4245 (__v8df) 4246 _mm512_setzero_pd (), 4247 (__mmask8) __U); 4248} 4249 4250static __inline __m512d __DEFAULT_FN_ATTRS 4251_mm512_loadu_pd(double const *__p) 4252{ 4253 struct __loadu_pd { 4254 __m512d __v; 4255 } __attribute__((__packed__, __may_alias__)); 4256 return ((struct __loadu_pd*)__p)->__v; 4257} 4258 4259static __inline __m512 __DEFAULT_FN_ATTRS 4260_mm512_loadu_ps(float const *__p) 4261{ 4262 struct __loadu_ps { 4263 __m512 __v; 4264 } __attribute__((__packed__, __may_alias__)); 4265 return ((struct __loadu_ps*)__p)->__v; 4266} 4267 4268static __inline __m512 __DEFAULT_FN_ATTRS 4269_mm512_load_ps(float const *__p) 4270{ 4271 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, 4272 (__v16sf) 4273 _mm512_setzero_ps (), 4274 (__mmask16) -1); 4275} 4276 4277static __inline __m512 __DEFAULT_FN_ATTRS 4278_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 4279{ 4280 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 4281 (__v16sf) __W, 4282 (__mmask16) __U); 4283} 4284 4285static __inline __m512 __DEFAULT_FN_ATTRS 4286_mm512_maskz_load_ps(__mmask16 __U, void const *__P) 4287{ 4288 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 4289 (__v16sf) 4290 _mm512_setzero_ps (), 4291 (__mmask16) __U); 4292} 4293 4294static __inline __m512d __DEFAULT_FN_ATTRS 4295_mm512_load_pd(double const *__p) 4296{ 4297 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, 4298 (__v8df) 4299 _mm512_setzero_pd (), 4300 (__mmask8) -1); 4301} 4302 4303static __inline __m512d __DEFAULT_FN_ATTRS 4304_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 4305{ 4306 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 4307 (__v8df) __W, 4308 (__mmask8) __U); 4309} 4310 4311static __inline __m512d __DEFAULT_FN_ATTRS 4312_mm512_maskz_load_pd(__mmask8 __U, void const *__P) 4313{ 4314 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 4315 (__v8df) 4316 _mm512_setzero_pd (), 4317 (__mmask8) __U); 4318} 4319 4320static __inline __m512i __DEFAULT_FN_ATTRS 4321_mm512_load_si512 (void const *__P) 4322{ 4323 return *(__m512i *) __P; 4324} 4325 4326static __inline __m512i __DEFAULT_FN_ATTRS 4327_mm512_load_epi32 (void const *__P) 4328{ 4329 return *(__m512i *) __P; 4330} 4331 4332static __inline __m512i __DEFAULT_FN_ATTRS 4333_mm512_load_epi64 (void const *__P) 4334{ 4335 return *(__m512i *) __P; 4336} 4337 4338/* SIMD store ops */ 4339 4340static __inline void __DEFAULT_FN_ATTRS 4341_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 4342{ 4343 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, 4344 (__mmask8) __U); 4345} 4346 4347static __inline void __DEFAULT_FN_ATTRS 4348_mm512_storeu_si512 (void *__P, __m512i __A) 4349{ 4350 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A, 4351 (__mmask16) -1); 4352} 4353 4354static __inline void __DEFAULT_FN_ATTRS 4355_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 4356{ 4357 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, 4358 (__mmask16) __U); 4359} 4360 4361static __inline void __DEFAULT_FN_ATTRS 4362_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 4363{ 4364 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); 4365} 4366 4367static __inline void __DEFAULT_FN_ATTRS 4368_mm512_storeu_pd(void *__P, __m512d __A) 4369{ 4370 __builtin_ia32_storeupd512_mask((double *)__P, (__v8df)__A, (__mmask8)-1); 4371} 4372 4373static __inline void __DEFAULT_FN_ATTRS 4374_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 4375{ 4376 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, 4377 (__mmask16) __U); 4378} 4379 4380static __inline void __DEFAULT_FN_ATTRS 4381_mm512_storeu_ps(void *__P, __m512 __A) 4382{ 4383 __builtin_ia32_storeups512_mask((float *)__P, (__v16sf)__A, (__mmask16)-1); 4384} 4385 4386static __inline void __DEFAULT_FN_ATTRS 4387_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 4388{ 4389 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 4390} 4391 4392static __inline void __DEFAULT_FN_ATTRS 4393_mm512_store_pd(void *__P, __m512d __A) 4394{ 4395 *(__m512d*)__P = __A; 4396} 4397 4398static __inline void __DEFAULT_FN_ATTRS 4399_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 4400{ 4401 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 4402 (__mmask16) __U); 4403} 4404 4405static __inline void __DEFAULT_FN_ATTRS 4406_mm512_store_ps(void *__P, __m512 __A) 4407{ 4408 *(__m512*)__P = __A; 4409} 4410 4411static __inline void __DEFAULT_FN_ATTRS 4412_mm512_store_si512 (void *__P, __m512i __A) 4413{ 4414 *(__m512i *) __P = __A; 4415} 4416 4417static __inline void __DEFAULT_FN_ATTRS 4418_mm512_store_epi32 (void *__P, __m512i __A) 4419{ 4420 *(__m512i *) __P = __A; 4421} 4422 4423static __inline void __DEFAULT_FN_ATTRS 4424_mm512_store_epi64 (void *__P, __m512i __A) 4425{ 4426 *(__m512i *) __P = __A; 4427} 4428 4429/* Mask ops */ 4430 4431static __inline __mmask16 __DEFAULT_FN_ATTRS 4432_mm512_knot(__mmask16 __M) 4433{ 4434 return __builtin_ia32_knothi(__M); 4435} 4436 4437/* Integer compare */ 4438 4439static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4440_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { 4441 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4442 (__mmask16)-1); 4443} 4444 4445static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4446_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4447 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 4448 __u); 4449} 4450 4451static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4452_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { 4453 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4454 (__mmask16)-1); 4455} 4456 4457static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4458_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4459 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 4460 __u); 4461} 4462 4463static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4464_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4465 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4466 __u); 4467} 4468 4469static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4470_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { 4471 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 4472 (__mmask8)-1); 4473} 4474 4475static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4476_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { 4477 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4478 (__mmask8)-1); 4479} 4480 4481static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4482_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4483 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 4484 __u); 4485} 4486 4487static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4488_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { 4489 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4490 (__mmask16)-1); 4491} 4492 4493static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4494_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4495 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4496 __u); 4497} 4498 4499static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4500_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { 4501 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4502 (__mmask16)-1); 4503} 4504 4505static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4506_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4507 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 4508 __u); 4509} 4510 4511static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4512_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { 4513 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4514 (__mmask8)-1); 4515} 4516 4517static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4518_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4519 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4520 __u); 4521} 4522 4523static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4524_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { 4525 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4526 (__mmask8)-1); 4527} 4528 4529static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4530_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4531 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 4532 __u); 4533} 4534 4535static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4536_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { 4537 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4538 (__mmask16)-1); 4539} 4540 4541static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4542_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4543 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 4544 __u); 4545} 4546 4547static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4548_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { 4549 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4550 (__mmask16)-1); 4551} 4552 4553static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4554_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4555 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 4556 __u); 4557} 4558 4559static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4560_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4561 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4562 __u); 4563} 4564 4565static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4566_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { 4567 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 4568 (__mmask8)-1); 4569} 4570 4571static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4572_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { 4573 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4574 (__mmask8)-1); 4575} 4576 4577static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4578_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4579 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 4580 __u); 4581} 4582 4583static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4584_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { 4585 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4586 (__mmask16)-1); 4587} 4588 4589static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4590_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4591 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4592 __u); 4593} 4594 4595static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4596_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { 4597 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4598 (__mmask16)-1); 4599} 4600 4601static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4602_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4603 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 4604 __u); 4605} 4606 4607static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4608_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { 4609 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4610 (__mmask8)-1); 4611} 4612 4613static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4614_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4615 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4616 __u); 4617} 4618 4619static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4620_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { 4621 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4622 (__mmask8)-1); 4623} 4624 4625static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4626_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4627 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 4628 __u); 4629} 4630 4631static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4632_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { 4633 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4634 (__mmask16)-1); 4635} 4636 4637static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4638_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4639 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4640 __u); 4641} 4642 4643static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4644_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { 4645 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4646 (__mmask16)-1); 4647} 4648 4649static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4650_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4651 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 4652 __u); 4653} 4654 4655static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4656_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { 4657 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4658 (__mmask8)-1); 4659} 4660 4661static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4662_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4663 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4664 __u); 4665} 4666 4667static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4668_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { 4669 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4670 (__mmask8)-1); 4671} 4672 4673static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4674_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4675 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 4676 __u); 4677} 4678 4679static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4680_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { 4681 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4682 (__mmask16)-1); 4683} 4684 4685static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4686_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4687 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4688 __u); 4689} 4690 4691static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4692_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { 4693 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4694 (__mmask16)-1); 4695} 4696 4697static __inline__ __mmask16 __DEFAULT_FN_ATTRS 4698_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 4699 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 4700 __u); 4701} 4702 4703static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4704_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { 4705 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 4706 (__mmask8)-1); 4707} 4708 4709static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4710_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4711 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 4712 __u); 4713} 4714 4715static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4716_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { 4717 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 4718 (__mmask8)-1); 4719} 4720 4721static __inline__ __mmask8 __DEFAULT_FN_ATTRS 4722_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 4723 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 4724 __u); 4725} 4726 4727static __inline__ __m512i __DEFAULT_FN_ATTRS 4728_mm512_cvtepi8_epi32 (__m128i __A) 4729{ 4730 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 4731 (__v16si) 4732 _mm512_setzero_si512 (), 4733 (__mmask16) -1); 4734} 4735 4736static __inline__ __m512i __DEFAULT_FN_ATTRS 4737_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 4738{ 4739 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 4740 (__v16si) __W, 4741 (__mmask16) __U); 4742} 4743 4744static __inline__ __m512i __DEFAULT_FN_ATTRS 4745_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) 4746{ 4747 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 4748 (__v16si) 4749 _mm512_setzero_si512 (), 4750 (__mmask16) __U); 4751} 4752 4753static __inline__ __m512i __DEFAULT_FN_ATTRS 4754_mm512_cvtepi8_epi64 (__m128i __A) 4755{ 4756 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 4757 (__v8di) 4758 _mm512_setzero_si512 (), 4759 (__mmask8) -1); 4760} 4761 4762static __inline__ __m512i __DEFAULT_FN_ATTRS 4763_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 4764{ 4765 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 4766 (__v8di) __W, 4767 (__mmask8) __U); 4768} 4769 4770static __inline__ __m512i __DEFAULT_FN_ATTRS 4771_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 4772{ 4773 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 4774 (__v8di) 4775 _mm512_setzero_si512 (), 4776 (__mmask8) __U); 4777} 4778 4779static __inline__ __m512i __DEFAULT_FN_ATTRS 4780_mm512_cvtepi32_epi64 (__m256i __X) 4781{ 4782 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 4783 (__v8di) 4784 _mm512_setzero_si512 (), 4785 (__mmask8) -1); 4786} 4787 4788static __inline__ __m512i __DEFAULT_FN_ATTRS 4789_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 4790{ 4791 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 4792 (__v8di) __W, 4793 (__mmask8) __U); 4794} 4795 4796static __inline__ __m512i __DEFAULT_FN_ATTRS 4797_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) 4798{ 4799 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 4800 (__v8di) 4801 _mm512_setzero_si512 (), 4802 (__mmask8) __U); 4803} 4804 4805static __inline__ __m512i __DEFAULT_FN_ATTRS 4806_mm512_cvtepi16_epi32 (__m256i __A) 4807{ 4808 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 4809 (__v16si) 4810 _mm512_setzero_si512 (), 4811 (__mmask16) -1); 4812} 4813 4814static __inline__ __m512i __DEFAULT_FN_ATTRS 4815_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 4816{ 4817 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 4818 (__v16si) __W, 4819 (__mmask16) __U); 4820} 4821 4822static __inline__ __m512i __DEFAULT_FN_ATTRS 4823_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) 4824{ 4825 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 4826 (__v16si) 4827 _mm512_setzero_si512 (), 4828 (__mmask16) __U); 4829} 4830 4831static __inline__ __m512i __DEFAULT_FN_ATTRS 4832_mm512_cvtepi16_epi64 (__m128i __A) 4833{ 4834 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 4835 (__v8di) 4836 _mm512_setzero_si512 (), 4837 (__mmask8) -1); 4838} 4839 4840static __inline__ __m512i __DEFAULT_FN_ATTRS 4841_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 4842{ 4843 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 4844 (__v8di) __W, 4845 (__mmask8) __U); 4846} 4847 4848static __inline__ __m512i __DEFAULT_FN_ATTRS 4849_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 4850{ 4851 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 4852 (__v8di) 4853 _mm512_setzero_si512 (), 4854 (__mmask8) __U); 4855} 4856 4857static __inline__ __m512i __DEFAULT_FN_ATTRS 4858_mm512_cvtepu8_epi32 (__m128i __A) 4859{ 4860 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 4861 (__v16si) 4862 _mm512_setzero_si512 (), 4863 (__mmask16) -1); 4864} 4865 4866static __inline__ __m512i __DEFAULT_FN_ATTRS 4867_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 4868{ 4869 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 4870 (__v16si) __W, 4871 (__mmask16) __U); 4872} 4873 4874static __inline__ __m512i __DEFAULT_FN_ATTRS 4875_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) 4876{ 4877 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 4878 (__v16si) 4879 _mm512_setzero_si512 (), 4880 (__mmask16) __U); 4881} 4882 4883static __inline__ __m512i __DEFAULT_FN_ATTRS 4884_mm512_cvtepu8_epi64 (__m128i __A) 4885{ 4886 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 4887 (__v8di) 4888 _mm512_setzero_si512 (), 4889 (__mmask8) -1); 4890} 4891 4892static __inline__ __m512i __DEFAULT_FN_ATTRS 4893_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 4894{ 4895 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 4896 (__v8di) __W, 4897 (__mmask8) __U); 4898} 4899 4900static __inline__ __m512i __DEFAULT_FN_ATTRS 4901_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4902{ 4903 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 4904 (__v8di) 4905 _mm512_setzero_si512 (), 4906 (__mmask8) __U); 4907} 4908 4909static __inline__ __m512i __DEFAULT_FN_ATTRS 4910_mm512_cvtepu32_epi64 (__m256i __X) 4911{ 4912 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 4913 (__v8di) 4914 _mm512_setzero_si512 (), 4915 (__mmask8) -1); 4916} 4917 4918static __inline__ __m512i __DEFAULT_FN_ATTRS 4919_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 4920{ 4921 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 4922 (__v8di) __W, 4923 (__mmask8) __U); 4924} 4925 4926static __inline__ __m512i __DEFAULT_FN_ATTRS 4927_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) 4928{ 4929 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 4930 (__v8di) 4931 _mm512_setzero_si512 (), 4932 (__mmask8) __U); 4933} 4934 4935static __inline__ __m512i __DEFAULT_FN_ATTRS 4936_mm512_cvtepu16_epi32 (__m256i __A) 4937{ 4938 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 4939 (__v16si) 4940 _mm512_setzero_si512 (), 4941 (__mmask16) -1); 4942} 4943 4944static __inline__ __m512i __DEFAULT_FN_ATTRS 4945_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 4946{ 4947 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 4948 (__v16si) __W, 4949 (__mmask16) __U); 4950} 4951 4952static __inline__ __m512i __DEFAULT_FN_ATTRS 4953_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) 4954{ 4955 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 4956 (__v16si) 4957 _mm512_setzero_si512 (), 4958 (__mmask16) __U); 4959} 4960 4961static __inline__ __m512i __DEFAULT_FN_ATTRS 4962_mm512_cvtepu16_epi64 (__m128i __A) 4963{ 4964 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 4965 (__v8di) 4966 _mm512_setzero_si512 (), 4967 (__mmask8) -1); 4968} 4969 4970static __inline__ __m512i __DEFAULT_FN_ATTRS 4971_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 4972{ 4973 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 4974 (__v8di) __W, 4975 (__mmask8) __U); 4976} 4977 4978static __inline__ __m512i __DEFAULT_FN_ATTRS 4979_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 4980{ 4981 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 4982 (__v8di) 4983 _mm512_setzero_si512 (), 4984 (__mmask8) __U); 4985} 4986 4987static __inline__ __m512i __DEFAULT_FN_ATTRS 4988_mm512_rorv_epi32 (__m512i __A, __m512i __B) 4989{ 4990 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4991 (__v16si) __B, 4992 (__v16si) 4993 _mm512_setzero_si512 (), 4994 (__mmask16) -1); 4995} 4996 4997static __inline__ __m512i __DEFAULT_FN_ATTRS 4998_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4999{ 5000 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5001 (__v16si) __B, 5002 (__v16si) __W, 5003 (__mmask16) __U); 5004} 5005 5006static __inline__ __m512i __DEFAULT_FN_ATTRS 5007_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5008{ 5009 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 5010 (__v16si) __B, 5011 (__v16si) 5012 _mm512_setzero_si512 (), 5013 (__mmask16) __U); 5014} 5015 5016static __inline__ __m512i __DEFAULT_FN_ATTRS 5017_mm512_rorv_epi64 (__m512i __A, __m512i __B) 5018{ 5019 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5020 (__v8di) __B, 5021 (__v8di) 5022 _mm512_setzero_si512 (), 5023 (__mmask8) -1); 5024} 5025 5026static __inline__ __m512i __DEFAULT_FN_ATTRS 5027_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5028{ 5029 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5030 (__v8di) __B, 5031 (__v8di) __W, 5032 (__mmask8) __U); 5033} 5034 5035static __inline__ __m512i __DEFAULT_FN_ATTRS 5036_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5037{ 5038 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 5039 (__v8di) __B, 5040 (__v8di) 5041 _mm512_setzero_si512 (), 5042 (__mmask8) __U); 5043} 5044 5045 5046 5047#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 5048 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5049 (__v16si)(__m512i)(b), (int)(p), \ 5050 (__mmask16)-1); }) 5051 5052#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 5053 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5054 (__v16si)(__m512i)(b), (int)(p), \ 5055 (__mmask16)-1); }) 5056 5057#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 5058 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5059 (__v8di)(__m512i)(b), (int)(p), \ 5060 (__mmask8)-1); }) 5061 5062#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 5063 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5064 (__v8di)(__m512i)(b), (int)(p), \ 5065 (__mmask8)-1); }) 5066 5067#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 5068 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5069 (__v16si)(__m512i)(b), (int)(p), \ 5070 (__mmask16)(m)); }) 5071 5072#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 5073 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5074 (__v16si)(__m512i)(b), (int)(p), \ 5075 (__mmask16)(m)); }) 5076 5077#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 5078 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5079 (__v8di)(__m512i)(b), (int)(p), \ 5080 (__mmask8)(m)); }) 5081 5082#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 5083 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5084 (__v8di)(__m512i)(b), (int)(p), \ 5085 (__mmask8)(m)); }) 5086 5087#define _mm512_rol_epi32(a, b) __extension__ ({ \ 5088 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5089 (__v16si)_mm512_setzero_si512(), \ 5090 (__mmask16)-1); }) 5091 5092#define _mm512_mask_rol_epi32(W, U, a, b) __extension__ ({ \ 5093 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5094 (__v16si)(__m512i)(W), \ 5095 (__mmask16)(U)); }) 5096 5097#define _mm512_maskz_rol_epi32(U, a, b) __extension__ ({ \ 5098 (__m512i)__builtin_ia32_prold512_mask((__v16si)(__m512i)(a), (int)(b), \ 5099 (__v16si)_mm512_setzero_si512(), \ 5100 (__mmask16)(U)); }) 5101 5102#define _mm512_rol_epi64(a, b) __extension__ ({ \ 5103 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5104 (__v8di)_mm512_setzero_si512(), \ 5105 (__mmask8)-1); }) 5106 5107#define _mm512_mask_rol_epi64(W, U, a, b) __extension__ ({ \ 5108 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5109 (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5110 5111#define _mm512_maskz_rol_epi64(U, a, b) __extension__ ({ \ 5112 (__m512i)__builtin_ia32_prolq512_mask((__v8di)(__m512i)(a), (int)(b), \ 5113 (__v8di)_mm512_setzero_si512(), \ 5114 (__mmask8)(U)); }) 5115static __inline__ __m512i __DEFAULT_FN_ATTRS 5116_mm512_rolv_epi32 (__m512i __A, __m512i __B) 5117{ 5118 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5119 (__v16si) __B, 5120 (__v16si) 5121 _mm512_setzero_si512 (), 5122 (__mmask16) -1); 5123} 5124 5125static __inline__ __m512i __DEFAULT_FN_ATTRS 5126_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 5127{ 5128 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5129 (__v16si) __B, 5130 (__v16si) __W, 5131 (__mmask16) __U); 5132} 5133 5134static __inline__ __m512i __DEFAULT_FN_ATTRS 5135_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 5136{ 5137 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 5138 (__v16si) __B, 5139 (__v16si) 5140 _mm512_setzero_si512 (), 5141 (__mmask16) __U); 5142} 5143 5144static __inline__ __m512i __DEFAULT_FN_ATTRS 5145_mm512_rolv_epi64 (__m512i __A, __m512i __B) 5146{ 5147 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5148 (__v8di) __B, 5149 (__v8di) 5150 _mm512_setzero_si512 (), 5151 (__mmask8) -1); 5152} 5153 5154static __inline__ __m512i __DEFAULT_FN_ATTRS 5155_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 5156{ 5157 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5158 (__v8di) __B, 5159 (__v8di) __W, 5160 (__mmask8) __U); 5161} 5162 5163static __inline__ __m512i __DEFAULT_FN_ATTRS 5164_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 5165{ 5166 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 5167 (__v8di) __B, 5168 (__v8di) 5169 _mm512_setzero_si512 (), 5170 (__mmask8) __U); 5171} 5172 5173#define _mm512_ror_epi32(A, B) __extension__ ({ \ 5174 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5175 (__v16si)_mm512_setzero_si512(), \ 5176 (__mmask16)-1); }) 5177 5178#define _mm512_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5179 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5180 (__v16si)(__m512i)(W), \ 5181 (__mmask16)(U)); }) 5182 5183#define _mm512_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5184 (__m512i)__builtin_ia32_prord512_mask((__v16si)(__m512i)(A), (int)(B), \ 5185 (__v16si)_mm512_setzero_si512(), \ 5186 (__mmask16)(U)); }) 5187 5188#define _mm512_ror_epi64(A, B) __extension__ ({ \ 5189 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5190 (__v8di)_mm512_setzero_si512(), \ 5191 (__mmask8)-1); }) 5192 5193#define _mm512_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5194 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5195 (__v8di)(__m512i)(W), (__mmask8)(U)); }) 5196 5197#define _mm512_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5198 (__m512i)__builtin_ia32_prorq512_mask((__v8di)(__m512i)(A), (int)(B), \ 5199 (__v8di)_mm512_setzero_si512(), \ 5200 (__mmask8)(U)); }) 5201 5202#define _mm512_slli_epi32(A, B) __extension__ ({ \ 5203 (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \ 5204 (__v16si)_mm512_setzero_si512(), \ 5205 (__mmask16)-1); }) 5206 5207#define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \ 5208 (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \ 5209 (__v16si)(__m512i)(W), \ 5210 (__mmask16)(U)); }) 5211 5212#define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \ 5213 (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \ 5214 (__v16si)_mm512_setzero_si512(), \ 5215 (__mmask16)(U)); }) 5216 5217#define _mm512_slli_epi64(A, B) __extension__ ({ \ 5218 (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 5219 (__v8di)_mm512_setzero_si512(), \ 5220 (__mmask8)-1); }) 5221 5222#define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \ 5223 (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 5224 (__v8di)(__m512i)(W), \ 5225 (__mmask8)(U)); }) 5226 5227#define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \ 5228 (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 5229 (__v8di)_mm512_setzero_si512(), \ 5230 (__mmask8)(U)); }) 5231 5232 5233 5234#define _mm512_srli_epi32(A, B) __extension__ ({ \ 5235 (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \ 5236 (__v16si)_mm512_setzero_si512(), \ 5237 (__mmask16)-1); }) 5238 5239#define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \ 5240 (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \ 5241 (__v16si)(__m512i)(W), \ 5242 (__mmask16)(U)); }) 5243 5244#define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \ 5245 (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \ 5246 (__v16si)_mm512_setzero_si512(), \ 5247 (__mmask16)(U)); }) 5248 5249#define _mm512_srli_epi64(A, B) __extension__ ({ \ 5250 (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 5251 (__v8di)_mm512_setzero_si512(), \ 5252 (__mmask8)-1); }) 5253 5254#define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \ 5255 (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 5256 (__v8di)(__m512i)(W), \ 5257 (__mmask8)(U)); }) 5258 5259#define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \ 5260 (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 5261 (__v8di)_mm512_setzero_si512(), \ 5262 (__mmask8)(U)); }) 5263 5264static __inline__ __m512i __DEFAULT_FN_ATTRS 5265_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5266{ 5267 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5268 (__v16si) __W, 5269 (__mmask16) __U); 5270} 5271 5272static __inline__ __m512i __DEFAULT_FN_ATTRS 5273_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 5274{ 5275 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 5276 (__v16si) 5277 _mm512_setzero_si512 (), 5278 (__mmask16) __U); 5279} 5280 5281static __inline__ void __DEFAULT_FN_ATTRS 5282_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 5283{ 5284 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 5285 (__mmask16) __U); 5286} 5287 5288static __inline__ __m512i __DEFAULT_FN_ATTRS 5289_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 5290{ 5291 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 5292 (__v16si) __W, 5293 (__mmask16) __U); 5294} 5295 5296static __inline__ __m512i __DEFAULT_FN_ATTRS 5297_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 5298{ 5299 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 5300 (__v16si) 5301 _mm512_setzero_si512 (), 5302 (__mmask16) __U); 5303} 5304 5305static __inline__ __m512i __DEFAULT_FN_ATTRS 5306_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 5307{ 5308 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 5309 (__v8di) __W, 5310 (__mmask8) __U); 5311} 5312 5313static __inline__ __m512i __DEFAULT_FN_ATTRS 5314_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 5315{ 5316 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 5317 (__v8di) 5318 _mm512_setzero_si512 (), 5319 (__mmask8) __U); 5320} 5321 5322static __inline__ __m512i __DEFAULT_FN_ATTRS 5323_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5324{ 5325 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5326 (__v8di) __W, 5327 (__mmask8) __U); 5328} 5329 5330static __inline__ __m512i __DEFAULT_FN_ATTRS 5331_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 5332{ 5333 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 5334 (__v8di) 5335 _mm512_setzero_si512 (), 5336 (__mmask8) __U); 5337} 5338 5339static __inline__ void __DEFAULT_FN_ATTRS 5340_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 5341{ 5342 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 5343 (__mmask8) __U); 5344} 5345 5346 5347 5348static __inline__ __m512d __DEFAULT_FN_ATTRS 5349_mm512_movedup_pd (__m512d __A) 5350{ 5351 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 5352 (__v8df) 5353 _mm512_undefined_pd (), 5354 (__mmask8) -1); 5355} 5356 5357static __inline__ __m512d __DEFAULT_FN_ATTRS 5358_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 5359{ 5360 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 5361 (__v8df) __W, 5362 (__mmask8) __U); 5363} 5364 5365static __inline__ __m512d __DEFAULT_FN_ATTRS 5366_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 5367{ 5368 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 5369 (__v8df) 5370 _mm512_setzero_pd (), 5371 (__mmask8) __U); 5372} 5373 5374#define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ 5375 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5376 (__v8df)(__m512d)(B), \ 5377 (__v8di)(__m512i)(C), (int)(imm), \ 5378 (__mmask8)-1, (int)(R)); }) 5379 5380#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) __extension__ ({ \ 5381 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5382 (__v8df)(__m512d)(B), \ 5383 (__v8di)(__m512i)(C), (int)(imm), \ 5384 (__mmask8)(U), (int)(R)); }) 5385 5386#define _mm512_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5387 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5388 (__v8df)(__m512d)(B), \ 5389 (__v8di)(__m512i)(C), (int)(imm), \ 5390 (__mmask8)-1, \ 5391 _MM_FROUND_CUR_DIRECTION); }) 5392 5393#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5394 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5395 (__v8df)(__m512d)(B), \ 5396 (__v8di)(__m512i)(C), (int)(imm), \ 5397 (__mmask8)(U), \ 5398 _MM_FROUND_CUR_DIRECTION); }) 5399 5400#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) __extension__ ({ \ 5401 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5402 (__v8df)(__m512d)(B), \ 5403 (__v8di)(__m512i)(C), \ 5404 (int)(imm), (__mmask8)(U), \ 5405 (int)(R)); }) 5406 5407#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5408 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5409 (__v8df)(__m512d)(B), \ 5410 (__v8di)(__m512i)(C), \ 5411 (int)(imm), (__mmask8)(U), \ 5412 _MM_FROUND_CUR_DIRECTION); }) 5413 5414#define _mm512_fixupimm_round_ps(A, B, C, imm, R) __extension__ ({ \ 5415 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5416 (__v16sf)(__m512)(B), \ 5417 (__v16si)(__m512i)(C), (int)(imm), \ 5418 (__mmask16)-1, (int)(R)); }) 5419 5420#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) __extension__ ({ \ 5421 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5422 (__v16sf)(__m512)(B), \ 5423 (__v16si)(__m512i)(C), (int)(imm), \ 5424 (__mmask16)(U), (int)(R)); }) 5425 5426#define _mm512_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5427 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5428 (__v16sf)(__m512)(B), \ 5429 (__v16si)(__m512i)(C), (int)(imm), \ 5430 (__mmask16)-1, \ 5431 _MM_FROUND_CUR_DIRECTION); }) 5432 5433#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5434 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5435 (__v16sf)(__m512)(B), \ 5436 (__v16si)(__m512i)(C), (int)(imm), \ 5437 (__mmask16)(U), \ 5438 _MM_FROUND_CUR_DIRECTION); }) 5439 5440#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) __extension__ ({ \ 5441 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5442 (__v16sf)(__m512)(B), \ 5443 (__v16si)(__m512i)(C), \ 5444 (int)(imm), (__mmask16)(U), \ 5445 (int)(R)); }) 5446 5447#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5448 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5449 (__v16sf)(__m512)(B), \ 5450 (__v16si)(__m512i)(C), \ 5451 (int)(imm), (__mmask16)(U), \ 5452 _MM_FROUND_CUR_DIRECTION); }) 5453 5454#define _mm_fixupimm_round_sd(A, B, C, imm, R) __extension__ ({ \ 5455 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5456 (__v2df)(__m128d)(B), \ 5457 (__v2di)(__m128i)(C), (int)(imm), \ 5458 (__mmask8)-1, (int)(R)); }) 5459 5460#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) __extension__ ({ \ 5461 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5462 (__v2df)(__m128d)(B), \ 5463 (__v2di)(__m128i)(C), (int)(imm), \ 5464 (__mmask8)(U), (int)(R)); }) 5465 5466#define _mm_fixupimm_sd(A, B, C, imm) __extension__ ({ \ 5467 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5468 (__v2df)(__m128d)(B), \ 5469 (__v2di)(__m128i)(C), (int)(imm), \ 5470 (__mmask8)-1, \ 5471 _MM_FROUND_CUR_DIRECTION); }) 5472 5473#define _mm_mask_fixupimm_sd(A, U, B, C, imm) __extension__ ({ \ 5474 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5475 (__v2df)(__m128d)(B), \ 5476 (__v2di)(__m128i)(C), (int)(imm), \ 5477 (__mmask8)(U), \ 5478 _MM_FROUND_CUR_DIRECTION); }) 5479 5480#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) __extension__ ({ \ 5481 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5482 (__v2df)(__m128d)(B), \ 5483 (__v2di)(__m128i)(C), (int)(imm), \ 5484 (__mmask8)(U), (int)(R)); }) 5485 5486#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) __extension__ ({ \ 5487 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5488 (__v2df)(__m128d)(B), \ 5489 (__v2di)(__m128i)(C), (int)(imm), \ 5490 (__mmask8)(U), \ 5491 _MM_FROUND_CUR_DIRECTION); }) 5492 5493#define _mm_fixupimm_round_ss(A, B, C, imm, R) __extension__ ({ \ 5494 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5495 (__v4sf)(__m128)(B), \ 5496 (__v4si)(__m128i)(C), (int)(imm), \ 5497 (__mmask8)-1, (int)(R)); }) 5498 5499#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) __extension__ ({ \ 5500 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5501 (__v4sf)(__m128)(B), \ 5502 (__v4si)(__m128i)(C), (int)(imm), \ 5503 (__mmask8)(U), (int)(R)); }) 5504 5505#define _mm_fixupimm_ss(A, B, C, imm) __extension__ ({ \ 5506 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5507 (__v4sf)(__m128)(B), \ 5508 (__v4si)(__m128i)(C), (int)(imm), \ 5509 (__mmask8)-1, \ 5510 _MM_FROUND_CUR_DIRECTION); }) 5511 5512#define _mm_mask_fixupimm_ss(A, U, B, C, imm) __extension__ ({ \ 5513 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5514 (__v4sf)(__m128)(B), \ 5515 (__v4si)(__m128i)(C), (int)(imm), \ 5516 (__mmask8)(U), \ 5517 _MM_FROUND_CUR_DIRECTION); }) 5518 5519#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) __extension__ ({ \ 5520 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5521 (__v4sf)(__m128)(B), \ 5522 (__v4si)(__m128i)(C), (int)(imm), \ 5523 (__mmask8)(U), (int)(R)); }) 5524 5525#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) __extension__ ({ \ 5526 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5527 (__v4sf)(__m128)(B), \ 5528 (__v4si)(__m128i)(C), (int)(imm), \ 5529 (__mmask8)(U), \ 5530 _MM_FROUND_CUR_DIRECTION); }) 5531 5532#define _mm_getexp_round_sd(A, B, R) __extension__ ({ \ 5533 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5534 (__v2df)(__m128d)(B), \ 5535 (__v2df)_mm_setzero_pd(), \ 5536 (__mmask8)-1, (int)(R)); }) 5537 5538 5539static __inline__ __m128d __DEFAULT_FN_ATTRS 5540_mm_getexp_sd (__m128d __A, __m128d __B) 5541{ 5542 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, 5543 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5544} 5545 5546static __inline__ __m128d __DEFAULT_FN_ATTRS 5547_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5548{ 5549 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5550 (__v2df) __B, 5551 (__v2df) __W, 5552 (__mmask8) __U, 5553 _MM_FROUND_CUR_DIRECTION); 5554} 5555 5556#define _mm_mask_getexp_round_sd(W, U, A, B, R) __extension__ ({\ 5557 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5558 (__v2df)(__m128d)(B), \ 5559 (__v2df)(__m128d)(W), \ 5560 (__mmask8)(U), (int)(R)); }) 5561 5562static __inline__ __m128d __DEFAULT_FN_ATTRS 5563_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) 5564{ 5565 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, 5566 (__v2df) __B, 5567 (__v2df) _mm_setzero_pd (), 5568 (__mmask8) __U, 5569 _MM_FROUND_CUR_DIRECTION); 5570} 5571 5572#define _mm_maskz_getexp_round_sd(U, A, B, R) __extension__ ({\ 5573 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5574 (__v2df)(__m128d)(B), \ 5575 (__v2df)_mm_setzero_pd(), \ 5576 (__mmask8)(U), (int)(R)); }) 5577 5578#define _mm_getexp_round_ss(A, B, R) __extension__ ({ \ 5579 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5580 (__v4sf)(__m128)(B), \ 5581 (__v4sf)_mm_setzero_ps(), \ 5582 (__mmask8)-1, (int)(R)); }) 5583 5584static __inline__ __m128 __DEFAULT_FN_ATTRS 5585_mm_getexp_ss (__m128 __A, __m128 __B) 5586{ 5587 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5588 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); 5589} 5590 5591static __inline__ __m128 __DEFAULT_FN_ATTRS 5592_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5593{ 5594 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5595 (__v4sf) __B, 5596 (__v4sf) __W, 5597 (__mmask8) __U, 5598 _MM_FROUND_CUR_DIRECTION); 5599} 5600 5601#define _mm_mask_getexp_round_ss(W, U, A, B, R) __extension__ ({\ 5602 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5603 (__v4sf)(__m128)(B), \ 5604 (__v4sf)(__m128)(W), \ 5605 (__mmask8)(U), (int)(R)); }) 5606 5607static __inline__ __m128 __DEFAULT_FN_ATTRS 5608_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) 5609{ 5610 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, 5611 (__v4sf) __B, 5612 (__v4sf) _mm_setzero_pd (), 5613 (__mmask8) __U, 5614 _MM_FROUND_CUR_DIRECTION); 5615} 5616 5617#define _mm_maskz_getexp_round_ss(U, A, B, R) __extension__ ({\ 5618 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5619 (__v4sf)(__m128)(B), \ 5620 (__v4sf)_mm_setzero_ps(), \ 5621 (__mmask8)(U), (int)(R)); }) 5622 5623#define _mm_getmant_round_sd(A, B, C, D, R) __extension__ ({ \ 5624 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5625 (__v2df)(__m128d)(B), \ 5626 (int)(((D)<<2) | (C)), \ 5627 (__v2df)_mm_setzero_pd(), \ 5628 (__mmask8)-1, (int)(R)); }) 5629 5630#define _mm_getmant_sd(A, B, C, D) __extension__ ({ \ 5631 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5632 (__v2df)(__m128d)(B), \ 5633 (int)(((D)<<2) | (C)), \ 5634 (__v2df)_mm_setzero_pd(), \ 5635 (__mmask8)-1, \ 5636 _MM_FROUND_CUR_DIRECTION); }) 5637 5638#define _mm_mask_getmant_sd(W, U, A, B, C, D) __extension__ ({\ 5639 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5640 (__v2df)(__m128d)(B), \ 5641 (int)(((D)<<2) | (C)), \ 5642 (__v2df)(__m128d)(W), \ 5643 (__mmask8)(U), \ 5644 _MM_FROUND_CUR_DIRECTION); }) 5645 5646#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R)({\ 5647 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5648 (__v2df)(__m128d)(B), \ 5649 (int)(((D)<<2) | (C)), \ 5650 (__v2df)(__m128d)(W), \ 5651 (__mmask8)(U), (int)(R)); }) 5652 5653#define _mm_maskz_getmant_sd(U, A, B, C, D) __extension__ ({\ 5654 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5655 (__v2df)(__m128d)(B), \ 5656 (int)(((D)<<2) | (C)), \ 5657 (__v2df)_mm_setzero_pd(), \ 5658 (__mmask8)(U), \ 5659 _MM_FROUND_CUR_DIRECTION); }) 5660 5661#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) __extension__ ({\ 5662 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5663 (__v2df)(__m128d)(B), \ 5664 (int)(((D)<<2) | (C)), \ 5665 (__v2df)_mm_setzero_pd(), \ 5666 (__mmask8)(U), (int)(R)); }) 5667 5668#define _mm_getmant_round_ss(A, B, C, D, R) __extension__ ({ \ 5669 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5670 (__v4sf)(__m128)(B), \ 5671 (int)(((D)<<2) | (C)), \ 5672 (__v4sf)_mm_setzero_ps(), \ 5673 (__mmask8)-1, (int)(R)); }) 5674 5675#define _mm_getmant_ss(A, B, C, D) __extension__ ({ \ 5676 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5677 (__v4sf)(__m128)(B), \ 5678 (int)(((D)<<2) | (C)), \ 5679 (__v4sf)_mm_setzero_ps(), \ 5680 (__mmask8)-1, \ 5681 _MM_FROUND_CUR_DIRECTION); }) 5682 5683#define _mm_mask_getmant_ss(W, U, A, B, C, D) __extension__ ({\ 5684 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5685 (__v4sf)(__m128)(B), \ 5686 (int)(((D)<<2) | (C)), \ 5687 (__v4sf)(__m128)(W), \ 5688 (__mmask8)(U), \ 5689 _MM_FROUND_CUR_DIRECTION); }) 5690 5691#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R)({\ 5692 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5693 (__v4sf)(__m128)(B), \ 5694 (int)(((D)<<2) | (C)), \ 5695 (__v4sf)(__m128)(W), \ 5696 (__mmask8)(U), (int)(R)); }) 5697 5698#define _mm_maskz_getmant_ss(U, A, B, C, D) __extension__ ({\ 5699 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5700 (__v4sf)(__m128)(B), \ 5701 (int)(((D)<<2) | (C)), \ 5702 (__v4sf)_mm_setzero_pd(), \ 5703 (__mmask8)(U), \ 5704 _MM_FROUND_CUR_DIRECTION); }) 5705 5706#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) __extension__ ({\ 5707 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5708 (__v4sf)(__m128)(B), \ 5709 (int)(((D)<<2) | (C)), \ 5710 (__v4sf)_mm_setzero_ps(), \ 5711 (__mmask8)(U), (int)(R)); }) 5712 5713static __inline__ __mmask16 __DEFAULT_FN_ATTRS 5714_mm512_kmov (__mmask16 __A) 5715{ 5716 return __A; 5717} 5718 5719#define _mm_comi_round_sd(A, B, P, R) __extension__ ({\ 5720 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5721 (int)(P), (int)(R)); }) 5722 5723#define _mm_comi_round_ss(A, B, P, R) __extension__ ({\ 5724 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5725 (int)(P), (int)(R)); }) 5726 5727static __inline__ __m512d __DEFAULT_FN_ATTRS 5728_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 5729{ 5730 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 5731 (__v8df) __B, 5732 (__v8df) __W, 5733 (__mmask8) __U); 5734} 5735#define _mm_cvt_roundsd_si64(A, R) __extension__ ({ \ 5736 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 5737static __inline__ __m512i __DEFAULT_FN_ATTRS 5738_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 5739 __mmask16 __U, __m512i __B) 5740{ 5741 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 5742 (__v16si) __I 5743 /* idx */ , 5744 (__v16si) __B, 5745 (__mmask16) __U); 5746} 5747static __inline__ __m512i __DEFAULT_FN_ATTRS 5748_mm512_unpackhi_epi32 (__m512i __A, __m512i __B) 5749{ 5750 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 5751 (__v16si) __B, 5752 (__v16si) 5753 _mm512_setzero_si512 (), 5754 (__mmask16) -1); 5755} 5756 5757static __inline__ __m512i __DEFAULT_FN_ATTRS 5758_mm512_sll_epi32 (__m512i __A, __m128i __B) 5759{ 5760 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 5761 (__v4si) __B, 5762 (__v16si) 5763 _mm512_setzero_si512 (), 5764 (__mmask16) -1); 5765} 5766 5767static __inline__ __m512i __DEFAULT_FN_ATTRS 5768_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 5769{ 5770 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 5771 (__v4si) __B, 5772 (__v16si) __W, 5773 (__mmask16) __U); 5774} 5775 5776static __inline__ __m512i __DEFAULT_FN_ATTRS 5777_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 5778{ 5779 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 5780 (__v4si) __B, 5781 (__v16si) 5782 _mm512_setzero_si512 (), 5783 (__mmask16) __U); 5784} 5785 5786static __inline__ __m512i __DEFAULT_FN_ATTRS 5787_mm512_sll_epi64 (__m512i __A, __m128i __B) 5788{ 5789 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 5790 (__v2di) __B, 5791 (__v8di) 5792 _mm512_setzero_si512 (), 5793 (__mmask8) -1); 5794} 5795 5796static __inline__ __m512i __DEFAULT_FN_ATTRS 5797_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 5798{ 5799 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 5800 (__v2di) __B, 5801 (__v8di) __W, 5802 (__mmask8) __U); 5803} 5804 5805static __inline__ __m512i __DEFAULT_FN_ATTRS 5806_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 5807{ 5808 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 5809 (__v2di) __B, 5810 (__v8di) 5811 _mm512_setzero_si512 (), 5812 (__mmask8) __U); 5813} 5814 5815static __inline__ __m512i __DEFAULT_FN_ATTRS 5816_mm512_sllv_epi32 (__m512i __X, __m512i __Y) 5817{ 5818 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 5819 (__v16si) __Y, 5820 (__v16si) 5821 _mm512_setzero_si512 (), 5822 (__mmask16) -1); 5823} 5824 5825static __inline__ __m512i __DEFAULT_FN_ATTRS 5826_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 5827{ 5828 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 5829 (__v16si) __Y, 5830 (__v16si) __W, 5831 (__mmask16) __U); 5832} 5833 5834static __inline__ __m512i __DEFAULT_FN_ATTRS 5835_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 5836{ 5837 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 5838 (__v16si) __Y, 5839 (__v16si) 5840 _mm512_setzero_si512 (), 5841 (__mmask16) __U); 5842} 5843 5844static __inline__ __m512i __DEFAULT_FN_ATTRS 5845_mm512_sllv_epi64 (__m512i __X, __m512i __Y) 5846{ 5847 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 5848 (__v8di) __Y, 5849 (__v8di) 5850 _mm512_undefined_pd (), 5851 (__mmask8) -1); 5852} 5853 5854static __inline__ __m512i __DEFAULT_FN_ATTRS 5855_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 5856{ 5857 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 5858 (__v8di) __Y, 5859 (__v8di) __W, 5860 (__mmask8) __U); 5861} 5862 5863static __inline__ __m512i __DEFAULT_FN_ATTRS 5864_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 5865{ 5866 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 5867 (__v8di) __Y, 5868 (__v8di) 5869 _mm512_setzero_si512 (), 5870 (__mmask8) __U); 5871} 5872 5873static __inline__ __m512i __DEFAULT_FN_ATTRS 5874_mm512_sra_epi32 (__m512i __A, __m128i __B) 5875{ 5876 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 5877 (__v4si) __B, 5878 (__v16si) 5879 _mm512_setzero_si512 (), 5880 (__mmask16) -1); 5881} 5882 5883static __inline__ __m512i __DEFAULT_FN_ATTRS 5884_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 5885{ 5886 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 5887 (__v4si) __B, 5888 (__v16si) __W, 5889 (__mmask16) __U); 5890} 5891 5892static __inline__ __m512i __DEFAULT_FN_ATTRS 5893_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 5894{ 5895 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 5896 (__v4si) __B, 5897 (__v16si) 5898 _mm512_setzero_si512 (), 5899 (__mmask16) __U); 5900} 5901 5902static __inline__ __m512i __DEFAULT_FN_ATTRS 5903_mm512_sra_epi64 (__m512i __A, __m128i __B) 5904{ 5905 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 5906 (__v2di) __B, 5907 (__v8di) 5908 _mm512_setzero_si512 (), 5909 (__mmask8) -1); 5910} 5911 5912static __inline__ __m512i __DEFAULT_FN_ATTRS 5913_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 5914{ 5915 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 5916 (__v2di) __B, 5917 (__v8di) __W, 5918 (__mmask8) __U); 5919} 5920 5921static __inline__ __m512i __DEFAULT_FN_ATTRS 5922_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 5923{ 5924 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 5925 (__v2di) __B, 5926 (__v8di) 5927 _mm512_setzero_si512 (), 5928 (__mmask8) __U); 5929} 5930 5931static __inline__ __m512i __DEFAULT_FN_ATTRS 5932_mm512_srav_epi32 (__m512i __X, __m512i __Y) 5933{ 5934 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 5935 (__v16si) __Y, 5936 (__v16si) 5937 _mm512_setzero_si512 (), 5938 (__mmask16) -1); 5939} 5940 5941static __inline__ __m512i __DEFAULT_FN_ATTRS 5942_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 5943{ 5944 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 5945 (__v16si) __Y, 5946 (__v16si) __W, 5947 (__mmask16) __U); 5948} 5949 5950static __inline__ __m512i __DEFAULT_FN_ATTRS 5951_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 5952{ 5953 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 5954 (__v16si) __Y, 5955 (__v16si) 5956 _mm512_setzero_si512 (), 5957 (__mmask16) __U); 5958} 5959 5960static __inline__ __m512i __DEFAULT_FN_ATTRS 5961_mm512_srav_epi64 (__m512i __X, __m512i __Y) 5962{ 5963 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 5964 (__v8di) __Y, 5965 (__v8di) 5966 _mm512_setzero_si512 (), 5967 (__mmask8) -1); 5968} 5969 5970static __inline__ __m512i __DEFAULT_FN_ATTRS 5971_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 5972{ 5973 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 5974 (__v8di) __Y, 5975 (__v8di) __W, 5976 (__mmask8) __U); 5977} 5978 5979static __inline__ __m512i __DEFAULT_FN_ATTRS 5980_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 5981{ 5982 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 5983 (__v8di) __Y, 5984 (__v8di) 5985 _mm512_setzero_si512 (), 5986 (__mmask8) __U); 5987} 5988 5989static __inline__ __m512i __DEFAULT_FN_ATTRS 5990_mm512_srl_epi32 (__m512i __A, __m128i __B) 5991{ 5992 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 5993 (__v4si) __B, 5994 (__v16si) 5995 _mm512_setzero_si512 (), 5996 (__mmask16) -1); 5997} 5998 5999static __inline__ __m512i __DEFAULT_FN_ATTRS 6000_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 6001{ 6002 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 6003 (__v4si) __B, 6004 (__v16si) __W, 6005 (__mmask16) __U); 6006} 6007 6008static __inline__ __m512i __DEFAULT_FN_ATTRS 6009_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 6010{ 6011 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 6012 (__v4si) __B, 6013 (__v16si) 6014 _mm512_setzero_si512 (), 6015 (__mmask16) __U); 6016} 6017 6018static __inline__ __m512i __DEFAULT_FN_ATTRS 6019_mm512_srl_epi64 (__m512i __A, __m128i __B) 6020{ 6021 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 6022 (__v2di) __B, 6023 (__v8di) 6024 _mm512_setzero_si512 (), 6025 (__mmask8) -1); 6026} 6027 6028static __inline__ __m512i __DEFAULT_FN_ATTRS 6029_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 6030{ 6031 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 6032 (__v2di) __B, 6033 (__v8di) __W, 6034 (__mmask8) __U); 6035} 6036 6037static __inline__ __m512i __DEFAULT_FN_ATTRS 6038_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 6039{ 6040 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 6041 (__v2di) __B, 6042 (__v8di) 6043 _mm512_setzero_si512 (), 6044 (__mmask8) __U); 6045} 6046 6047static __inline__ __m512i __DEFAULT_FN_ATTRS 6048_mm512_srlv_epi32 (__m512i __X, __m512i __Y) 6049{ 6050 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 6051 (__v16si) __Y, 6052 (__v16si) 6053 _mm512_setzero_si512 (), 6054 (__mmask16) -1); 6055} 6056 6057static __inline__ __m512i __DEFAULT_FN_ATTRS 6058_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 6059{ 6060 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 6061 (__v16si) __Y, 6062 (__v16si) __W, 6063 (__mmask16) __U); 6064} 6065 6066static __inline__ __m512i __DEFAULT_FN_ATTRS 6067_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 6068{ 6069 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 6070 (__v16si) __Y, 6071 (__v16si) 6072 _mm512_setzero_si512 (), 6073 (__mmask16) __U); 6074} 6075 6076static __inline__ __m512i __DEFAULT_FN_ATTRS 6077_mm512_srlv_epi64 (__m512i __X, __m512i __Y) 6078{ 6079 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 6080 (__v8di) __Y, 6081 (__v8di) 6082 _mm512_setzero_si512 (), 6083 (__mmask8) -1); 6084} 6085 6086static __inline__ __m512i __DEFAULT_FN_ATTRS 6087_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 6088{ 6089 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 6090 (__v8di) __Y, 6091 (__v8di) __W, 6092 (__mmask8) __U); 6093} 6094 6095static __inline__ __m512i __DEFAULT_FN_ATTRS 6096_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 6097{ 6098 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 6099 (__v8di) __Y, 6100 (__v8di) 6101 _mm512_setzero_si512 (), 6102 (__mmask8) __U); 6103} 6104 6105#define _mm512_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6106 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6107 (__v16si)(__m512i)(B), \ 6108 (__v16si)(__m512i)(C), (int)(imm), \ 6109 (__mmask16)-1); }) 6110 6111#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6112 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 6113 (__v16si)(__m512i)(B), \ 6114 (__v16si)(__m512i)(C), (int)(imm), \ 6115 (__mmask16)(U)); }) 6116 6117#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6118 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 6119 (__v16si)(__m512i)(B), \ 6120 (__v16si)(__m512i)(C), \ 6121 (int)(imm), (__mmask16)(U)); }) 6122 6123#define _mm512_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6124 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6125 (__v8di)(__m512i)(B), \ 6126 (__v8di)(__m512i)(C), (int)(imm), \ 6127 (__mmask8)-1); }) 6128 6129#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6130 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 6131 (__v8di)(__m512i)(B), \ 6132 (__v8di)(__m512i)(C), (int)(imm), \ 6133 (__mmask8)(U)); }) 6134 6135#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6136 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 6137 (__v8di)(__m512i)(B), \ 6138 (__v8di)(__m512i)(C), (int)(imm), \ 6139 (__mmask8)(U)); }) 6140 6141static __inline__ __m512d __DEFAULT_FN_ATTRS 6142_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) 6143{ 6144 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 6145 (__v8df) __B, 6146 (__v8df) 6147 _mm512_setzero_pd (), 6148 (__mmask8) __U); 6149} 6150 6151static __inline__ __m512 __DEFAULT_FN_ATTRS 6152_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 6153{ 6154 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 6155 (__v16sf) __B, 6156 (__v16sf) __W, 6157 (__mmask16) __U); 6158} 6159 6160static __inline__ __m512 __DEFAULT_FN_ATTRS 6161_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 6162{ 6163 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 6164 (__v16sf) __B, 6165 (__v16sf) 6166 _mm512_setzero_ps (), 6167 (__mmask16) __U); 6168} 6169 6170static __inline__ __m512d __DEFAULT_FN_ATTRS 6171_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 6172{ 6173 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 6174 (__v8df) __B, 6175 (__v8df) __W, 6176 (__mmask8) __U); 6177} 6178 6179static __inline__ __m512d __DEFAULT_FN_ATTRS 6180_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 6181{ 6182 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 6183 (__v8df) __B, 6184 (__v8df) 6185 _mm512_setzero_pd (), 6186 (__mmask8) __U); 6187} 6188 6189static __inline__ __m512 __DEFAULT_FN_ATTRS 6190_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 6191{ 6192 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 6193 (__v16sf) __B, 6194 (__v16sf) __W, 6195 (__mmask16) __U); 6196} 6197 6198static __inline__ __m512 __DEFAULT_FN_ATTRS 6199_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 6200{ 6201 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 6202 (__v16sf) __B, 6203 (__v16sf) 6204 _mm512_setzero_ps (), 6205 (__mmask16) __U); 6206} 6207 6208#define _mm_cvt_roundsd_i64(A, R) __extension__ ({ \ 6209 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6210 6211#define _mm_cvt_roundsd_si32(A, R) __extension__ ({ \ 6212 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6213 6214#define _mm_cvt_roundsd_i32(A, R) __extension__ ({ \ 6215 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6216 6217#define _mm_cvt_roundsd_u32(A, R) __extension__ ({ \ 6218 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6219 6220static __inline__ unsigned __DEFAULT_FN_ATTRS 6221_mm_cvtsd_u32 (__m128d __A) 6222{ 6223 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 6224 _MM_FROUND_CUR_DIRECTION); 6225} 6226 6227#define _mm_cvt_roundsd_u64(A, R) __extension__ ({ \ 6228 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6229 (int)(R)); }) 6230 6231static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6232_mm_cvtsd_u64 (__m128d __A) 6233{ 6234 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 6235 __A, 6236 _MM_FROUND_CUR_DIRECTION); 6237} 6238 6239#define _mm_cvt_roundss_si32(A, R) __extension__ ({ \ 6240 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6241 6242#define _mm_cvt_roundss_i32(A, R) __extension__ ({ \ 6243 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6244 6245#define _mm_cvt_roundss_si64(A, R) __extension__ ({ \ 6246 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6247 6248#define _mm_cvt_roundss_i64(A, R) __extension__ ({ \ 6249 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6250 6251#define _mm_cvt_roundss_u32(A, R) __extension__ ({ \ 6252 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6253 6254static __inline__ unsigned __DEFAULT_FN_ATTRS 6255_mm_cvtss_u32 (__m128 __A) 6256{ 6257 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 6258 _MM_FROUND_CUR_DIRECTION); 6259} 6260 6261#define _mm_cvt_roundss_u64(A, R) __extension__ ({ \ 6262 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6263 (int)(R)); }) 6264 6265static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6266_mm_cvtss_u64 (__m128 __A) 6267{ 6268 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 6269 __A, 6270 _MM_FROUND_CUR_DIRECTION); 6271} 6272 6273#define _mm_cvtt_roundsd_i32(A, R) __extension__ ({ \ 6274 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6275 6276#define _mm_cvtt_roundsd_si32(A, R) __extension__ ({ \ 6277 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)); }) 6278 6279static __inline__ int __DEFAULT_FN_ATTRS 6280_mm_cvttsd_i32 (__m128d __A) 6281{ 6282 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 6283 _MM_FROUND_CUR_DIRECTION); 6284} 6285 6286#define _mm_cvtt_roundsd_si64(A, R) __extension__ ({ \ 6287 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6288 6289#define _mm_cvtt_roundsd_i64(A, R) __extension__ ({ \ 6290 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)); }) 6291 6292static __inline__ long long __DEFAULT_FN_ATTRS 6293_mm_cvttsd_i64 (__m128d __A) 6294{ 6295 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 6296 _MM_FROUND_CUR_DIRECTION); 6297} 6298 6299#define _mm_cvtt_roundsd_u32(A, R) __extension__ ({ \ 6300 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)); }) 6301 6302static __inline__ unsigned __DEFAULT_FN_ATTRS 6303_mm_cvttsd_u32 (__m128d __A) 6304{ 6305 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 6306 _MM_FROUND_CUR_DIRECTION); 6307} 6308 6309#define _mm_cvtt_roundsd_u64(A, R) __extension__ ({ \ 6310 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6311 (int)(R)); }) 6312 6313static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6314_mm_cvttsd_u64 (__m128d __A) 6315{ 6316 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 6317 __A, 6318 _MM_FROUND_CUR_DIRECTION); 6319} 6320 6321#define _mm_cvtt_roundss_i32(A, R) __extension__ ({ \ 6322 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6323 6324#define _mm_cvtt_roundss_si32(A, R) __extension__ ({ \ 6325 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)); }) 6326 6327static __inline__ int __DEFAULT_FN_ATTRS 6328_mm_cvttss_i32 (__m128 __A) 6329{ 6330 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 6331 _MM_FROUND_CUR_DIRECTION); 6332} 6333 6334#define _mm_cvtt_roundss_i64(A, R) __extension__ ({ \ 6335 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6336 6337#define _mm_cvtt_roundss_si64(A, R) __extension__ ({ \ 6338 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)); }) 6339 6340static __inline__ long long __DEFAULT_FN_ATTRS 6341_mm_cvttss_i64 (__m128 __A) 6342{ 6343 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 6344 _MM_FROUND_CUR_DIRECTION); 6345} 6346 6347#define _mm_cvtt_roundss_u32(A, R) __extension__ ({ \ 6348 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)); }) 6349 6350static __inline__ unsigned __DEFAULT_FN_ATTRS 6351_mm_cvttss_u32 (__m128 __A) 6352{ 6353 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 6354 _MM_FROUND_CUR_DIRECTION); 6355} 6356 6357#define _mm_cvtt_roundss_u64(A, R) __extension__ ({ \ 6358 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6359 (int)(R)); }) 6360 6361static __inline__ unsigned long long __DEFAULT_FN_ATTRS 6362_mm_cvttss_u64 (__m128 __A) 6363{ 6364 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 6365 __A, 6366 _MM_FROUND_CUR_DIRECTION); 6367} 6368 6369static __inline__ __m512d __DEFAULT_FN_ATTRS 6370_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6371 __m512d __B) 6372{ 6373 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6374 (__v8di) __I 6375 /* idx */ , 6376 (__v8df) __B, 6377 (__mmask8) __U); 6378} 6379 6380static __inline__ __m512 __DEFAULT_FN_ATTRS 6381_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6382 __m512 __B) 6383{ 6384 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6385 (__v16si) __I 6386 /* idx */ , 6387 (__v16sf) __B, 6388 (__mmask16) __U); 6389} 6390 6391static __inline__ __m512i __DEFAULT_FN_ATTRS 6392_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 6393 __mmask8 __U, __m512i __B) 6394{ 6395 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 6396 (__v8di) __I 6397 /* idx */ , 6398 (__v8di) __B, 6399 (__mmask8) __U); 6400} 6401 6402#define _mm512_permute_pd(X, C) __extension__ ({ \ 6403 (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \ 6404 (__v8df)_mm512_undefined_pd(), \ 6405 (__mmask8)-1); }) 6406 6407#define _mm512_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6408 (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \ 6409 (__v8df)(__m512d)(W), \ 6410 (__mmask8)(U)); }) 6411 6412#define _mm512_maskz_permute_pd(U, X, C) __extension__ ({ \ 6413 (__m512d)__builtin_ia32_vpermilpd512_mask((__v8df)(__m512d)(X), (int)(C), \ 6414 (__v8df)_mm512_setzero_pd(), \ 6415 (__mmask8)(U)); }) 6416 6417#define _mm512_permute_ps(X, C) __extension__ ({ \ 6418 (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \ 6419 (__v16sf)_mm512_undefined_ps(), \ 6420 (__mmask16)-1); }) 6421 6422#define _mm512_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6423 (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \ 6424 (__v16sf)(__m512)(W), \ 6425 (__mmask16)(U)); }) 6426 6427#define _mm512_maskz_permute_ps(U, X, C) __extension__ ({ \ 6428 (__m512)__builtin_ia32_vpermilps512_mask((__v16sf)(__m512)(X), (int)(C), \ 6429 (__v16sf)_mm512_setzero_ps(), \ 6430 (__mmask16)(U)); }) 6431 6432static __inline__ __m512d __DEFAULT_FN_ATTRS 6433_mm512_permutevar_pd (__m512d __A, __m512i __C) 6434{ 6435 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6436 (__v8di) __C, 6437 (__v8df) 6438 _mm512_undefined_pd (), 6439 (__mmask8) -1); 6440} 6441 6442static __inline__ __m512d __DEFAULT_FN_ATTRS 6443_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 6444{ 6445 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6446 (__v8di) __C, 6447 (__v8df) __W, 6448 (__mmask8) __U); 6449} 6450 6451static __inline__ __m512d __DEFAULT_FN_ATTRS 6452_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) 6453{ 6454 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 6455 (__v8di) __C, 6456 (__v8df) 6457 _mm512_setzero_pd (), 6458 (__mmask8) __U); 6459} 6460 6461static __inline__ __m512 __DEFAULT_FN_ATTRS 6462_mm512_permutevar_ps (__m512 __A, __m512i __C) 6463{ 6464 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6465 (__v16si) __C, 6466 (__v16sf) 6467 _mm512_undefined_ps (), 6468 (__mmask16) -1); 6469} 6470 6471static __inline__ __m512 __DEFAULT_FN_ATTRS 6472_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 6473{ 6474 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6475 (__v16si) __C, 6476 (__v16sf) __W, 6477 (__mmask16) __U); 6478} 6479 6480static __inline__ __m512 __DEFAULT_FN_ATTRS 6481_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) 6482{ 6483 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 6484 (__v16si) __C, 6485 (__v16sf) 6486 _mm512_setzero_ps (), 6487 (__mmask16) __U); 6488} 6489 6490static __inline __m512d __DEFAULT_FN_ATTRS 6491_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 6492{ 6493 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6494 /* idx */ , 6495 (__v8df) __A, 6496 (__v8df) __B, 6497 (__mmask8) -1); 6498} 6499 6500static __inline__ __m512d __DEFAULT_FN_ATTRS 6501_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) 6502{ 6503 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6504 /* idx */ , 6505 (__v8df) __A, 6506 (__v8df) __B, 6507 (__mmask8) __U); 6508} 6509 6510static __inline__ __m512d __DEFAULT_FN_ATTRS 6511_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6512 __m512d __B) 6513{ 6514 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6515 /* idx */ , 6516 (__v8df) __A, 6517 (__v8df) __B, 6518 (__mmask8) __U); 6519} 6520 6521static __inline __m512 __DEFAULT_FN_ATTRS 6522_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 6523{ 6524 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6525 /* idx */ , 6526 (__v16sf) __A, 6527 (__v16sf) __B, 6528 (__mmask16) -1); 6529} 6530 6531static __inline__ __m512 __DEFAULT_FN_ATTRS 6532_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6533{ 6534 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6535 /* idx */ , 6536 (__v16sf) __A, 6537 (__v16sf) __B, 6538 (__mmask16) __U); 6539} 6540 6541static __inline__ __m512 __DEFAULT_FN_ATTRS 6542_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6543 __m512 __B) 6544{ 6545 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6546 /* idx */ , 6547 (__v16sf) __A, 6548 (__v16sf) __B, 6549 (__mmask16) __U); 6550} 6551 6552static __inline__ __mmask16 __DEFAULT_FN_ATTRS 6553_mm512_testn_epi32_mask (__m512i __A, __m512i __B) 6554{ 6555 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6556 (__v16si) __B, 6557 (__mmask16) -1); 6558} 6559 6560static __inline__ __mmask16 __DEFAULT_FN_ATTRS 6561_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 6562{ 6563 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 6564 (__v16si) __B, __U); 6565} 6566 6567static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6568_mm512_testn_epi64_mask (__m512i __A, __m512i __B) 6569{ 6570 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6571 (__v8di) __B, 6572 (__mmask8) -1); 6573} 6574 6575static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6576_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 6577{ 6578 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 6579 (__v8di) __B, __U); 6580} 6581 6582#define _mm512_cvtt_roundpd_epu32(A, R) __extension__ ({ \ 6583 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6584 (__v8si)_mm256_undefined_si256(), \ 6585 (__mmask8)-1, (int)(R)); }) 6586 6587#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) __extension__ ({ \ 6588 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6589 (__v8si)(__m256i)(W), \ 6590 (__mmask8)(U), (int)(R)); }) 6591 6592#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) __extension__ ({ \ 6593 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6594 (__v8si)_mm256_setzero_si256(), \ 6595 (__mmask8)(U), (int)(R)); }) 6596 6597static __inline__ __m256i __DEFAULT_FN_ATTRS 6598_mm512_cvttpd_epu32 (__m512d __A) 6599{ 6600 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6601 (__v8si) 6602 _mm256_undefined_si256 (), 6603 (__mmask8) -1, 6604 _MM_FROUND_CUR_DIRECTION); 6605} 6606 6607static __inline__ __m256i __DEFAULT_FN_ATTRS 6608_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 6609{ 6610 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6611 (__v8si) __W, 6612 (__mmask8) __U, 6613 _MM_FROUND_CUR_DIRECTION); 6614} 6615 6616static __inline__ __m256i __DEFAULT_FN_ATTRS 6617_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 6618{ 6619 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 6620 (__v8si) 6621 _mm256_setzero_si256 (), 6622 (__mmask8) __U, 6623 _MM_FROUND_CUR_DIRECTION); 6624} 6625 6626static __inline__ __m512i __DEFAULT_FN_ATTRS 6627_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 6628 __m512i __B) 6629{ 6630 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 6631 (__v16si) __B, 6632 (__v16si) __W, 6633 (__mmask16) __U); 6634} 6635 6636static __inline__ __m512i __DEFAULT_FN_ATTRS 6637_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 6638{ 6639 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 6640 (__v16si) __B, 6641 (__v16si) 6642 _mm512_setzero_si512 (), 6643 (__mmask16) __U); 6644} 6645 6646static __inline__ __m512i __DEFAULT_FN_ATTRS 6647_mm512_unpackhi_epi64 (__m512i __A, __m512i __B) 6648{ 6649 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 6650 (__v8di) __B, 6651 (__v8di) 6652 _mm512_setzero_si512 (), 6653 (__mmask8) -1); 6654} 6655 6656static __inline__ __m512i __DEFAULT_FN_ATTRS 6657_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 6658{ 6659 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 6660 (__v8di) __B, 6661 (__v8di) __W, 6662 (__mmask8) __U); 6663} 6664 6665static __inline__ __m512i __DEFAULT_FN_ATTRS 6666_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 6667{ 6668 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 6669 (__v8di) __B, 6670 (__v8di) 6671 _mm512_setzero_si512 (), 6672 (__mmask8) __U); 6673} 6674 6675static __inline__ __m512i __DEFAULT_FN_ATTRS 6676_mm512_unpacklo_epi32 (__m512i __A, __m512i __B) 6677{ 6678 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 6679 (__v16si) __B, 6680 (__v16si) 6681 _mm512_setzero_si512 (), 6682 (__mmask16) -1); 6683} 6684 6685static __inline__ __m512i __DEFAULT_FN_ATTRS 6686_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 6687 __m512i __B) 6688{ 6689 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 6690 (__v16si) __B, 6691 (__v16si) __W, 6692 (__mmask16) __U); 6693} 6694 6695static __inline__ __m512i __DEFAULT_FN_ATTRS 6696_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 6697{ 6698 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 6699 (__v16si) __B, 6700 (__v16si) 6701 _mm512_setzero_si512 (), 6702 (__mmask16) __U); 6703} 6704 6705static __inline__ __m512i __DEFAULT_FN_ATTRS 6706_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 6707{ 6708 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 6709 (__v8di) __B, 6710 (__v8di) 6711 _mm512_setzero_si512 (), 6712 (__mmask8) -1); 6713} 6714 6715static __inline__ __m512i __DEFAULT_FN_ATTRS 6716_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 6717{ 6718 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 6719 (__v8di) __B, 6720 (__v8di) __W, 6721 (__mmask8) __U); 6722} 6723 6724static __inline__ __m512i __DEFAULT_FN_ATTRS 6725_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 6726{ 6727 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 6728 (__v8di) __B, 6729 (__v8di) 6730 _mm512_setzero_si512 (), 6731 (__mmask8) __U); 6732} 6733 6734#define _mm_roundscale_round_sd(A, B, imm, R) __extension__ ({ \ 6735 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6736 (__v2df)(__m128d)(B), \ 6737 (__v2df)_mm_setzero_pd(), \ 6738 (__mmask8)-1, (int)(imm), \ 6739 (int)(R)); }) 6740 6741#define _mm_roundscale_sd(A, B, imm) __extension__ ({ \ 6742 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6743 (__v2df)(__m128d)(B), \ 6744 (__v2df)_mm_setzero_pd(), \ 6745 (__mmask8)-1, (int)(imm), \ 6746 _MM_FROUND_CUR_DIRECTION); }) 6747 6748#define _mm_mask_roundscale_sd(W, U, A, B, imm) __extension__ ({ \ 6749 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6750 (__v2df)(__m128d)(B), \ 6751 (__v2df)(__m128d)(W), \ 6752 (__mmask8)(U), (int)(imm), \ 6753 _MM_FROUND_CUR_DIRECTION); }) 6754 6755#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) __extension__ ({ \ 6756 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6757 (__v2df)(__m128d)(B), \ 6758 (__v2df)(__m128d)(W), \ 6759 (__mmask8)(U), (int)(I), \ 6760 (int)(R)); }) 6761 6762#define _mm_maskz_roundscale_sd(U, A, B, I) __extension__ ({ \ 6763 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6764 (__v2df)(__m128d)(B), \ 6765 (__v2df)_mm_setzero_pd(), \ 6766 (__mmask8)(U), (int)(I), \ 6767 _MM_FROUND_CUR_DIRECTION); }) 6768 6769#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) __extension__ ({ \ 6770 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6771 (__v2df)(__m128d)(B), \ 6772 (__v2df)_mm_setzero_pd(), \ 6773 (__mmask8)(U), (int)(I), \ 6774 (int)(R)); }) 6775 6776#define _mm_roundscale_round_ss(A, B, imm, R) __extension__ ({ \ 6777 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6778 (__v4sf)(__m128)(B), \ 6779 (__v4sf)_mm_setzero_ps(), \ 6780 (__mmask8)-1, (int)(imm), \ 6781 (int)(R)); }) 6782 6783#define _mm_roundscale_ss(A, B, imm) __extension__ ({ \ 6784 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6785 (__v4sf)(__m128)(B), \ 6786 (__v4sf)_mm_setzero_ps(), \ 6787 (__mmask8)-1, (int)(imm), \ 6788 _MM_FROUND_CUR_DIRECTION); }) 6789 6790#define _mm_mask_roundscale_ss(W, U, A, B, I) __extension__ ({ \ 6791 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6792 (__v4sf)(__m128)(B), \ 6793 (__v4sf)(__m128)(W), \ 6794 (__mmask8)(U), (int)(I), \ 6795 _MM_FROUND_CUR_DIRECTION); }) 6796 6797#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) __extension__ ({ \ 6798 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6799 (__v4sf)(__m128)(B), \ 6800 (__v4sf)(__m128)(W), \ 6801 (__mmask8)(U), (int)(I), \ 6802 (int)(R)); }) 6803 6804#define _mm_maskz_roundscale_ss(U, A, B, I) __extension__ ({ \ 6805 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6806 (__v4sf)(__m128)(B), \ 6807 (__v4sf)_mm_setzero_ps(), \ 6808 (__mmask8)(U), (int)(I), \ 6809 _MM_FROUND_CUR_DIRECTION); }) 6810 6811#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) __extension__ ({ \ 6812 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6813 (__v4sf)(__m128)(B), \ 6814 (__v4sf)_mm_setzero_ps(), \ 6815 (__mmask8)(U), (int)(I), \ 6816 (int)(R)); }) 6817 6818#define _mm512_scalef_round_pd(A, B, R) __extension__ ({ \ 6819 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6820 (__v8df)(__m512d)(B), \ 6821 (__v8df)_mm512_undefined_pd(), \ 6822 (__mmask8)-1, (int)(R)); }) 6823 6824#define _mm512_mask_scalef_round_pd(W, U, A, B, R) __extension__ ({ \ 6825 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6826 (__v8df)(__m512d)(B), \ 6827 (__v8df)(__m512d)(W), \ 6828 (__mmask8)(U), (int)(R)); }) 6829 6830#define _mm512_maskz_scalef_round_pd(U, A, B, R) __extension__ ({ \ 6831 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6832 (__v8df)(__m512d)(B), \ 6833 (__v8df)_mm512_setzero_pd(), \ 6834 (__mmask8)(U), (int)(R)); }) 6835 6836static __inline__ __m512d __DEFAULT_FN_ATTRS 6837_mm512_scalef_pd (__m512d __A, __m512d __B) 6838{ 6839 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6840 (__v8df) __B, 6841 (__v8df) 6842 _mm512_undefined_pd (), 6843 (__mmask8) -1, 6844 _MM_FROUND_CUR_DIRECTION); 6845} 6846 6847static __inline__ __m512d __DEFAULT_FN_ATTRS 6848_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 6849{ 6850 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6851 (__v8df) __B, 6852 (__v8df) __W, 6853 (__mmask8) __U, 6854 _MM_FROUND_CUR_DIRECTION); 6855} 6856 6857static __inline__ __m512d __DEFAULT_FN_ATTRS 6858_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 6859{ 6860 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 6861 (__v8df) __B, 6862 (__v8df) 6863 _mm512_setzero_pd (), 6864 (__mmask8) __U, 6865 _MM_FROUND_CUR_DIRECTION); 6866} 6867 6868#define _mm512_scalef_round_ps(A, B, R) __extension__ ({ \ 6869 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6870 (__v16sf)(__m512)(B), \ 6871 (__v16sf)_mm512_undefined_ps(), \ 6872 (__mmask16)-1, (int)(R)); }) 6873 6874#define _mm512_mask_scalef_round_ps(W, U, A, B, R) __extension__ ({ \ 6875 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6876 (__v16sf)(__m512)(B), \ 6877 (__v16sf)(__m512)(W), \ 6878 (__mmask16)(U), (int)(R)); }) 6879 6880#define _mm512_maskz_scalef_round_ps(U, A, B, R) __extension__ ({ \ 6881 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6882 (__v16sf)(__m512)(B), \ 6883 (__v16sf)_mm512_setzero_ps(), \ 6884 (__mmask16)(U), (int)(R)); }) 6885 6886static __inline__ __m512 __DEFAULT_FN_ATTRS 6887_mm512_scalef_ps (__m512 __A, __m512 __B) 6888{ 6889 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6890 (__v16sf) __B, 6891 (__v16sf) 6892 _mm512_undefined_ps (), 6893 (__mmask16) -1, 6894 _MM_FROUND_CUR_DIRECTION); 6895} 6896 6897static __inline__ __m512 __DEFAULT_FN_ATTRS 6898_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 6899{ 6900 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6901 (__v16sf) __B, 6902 (__v16sf) __W, 6903 (__mmask16) __U, 6904 _MM_FROUND_CUR_DIRECTION); 6905} 6906 6907static __inline__ __m512 __DEFAULT_FN_ATTRS 6908_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 6909{ 6910 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 6911 (__v16sf) __B, 6912 (__v16sf) 6913 _mm512_setzero_ps (), 6914 (__mmask16) __U, 6915 _MM_FROUND_CUR_DIRECTION); 6916} 6917 6918#define _mm_scalef_round_sd(A, B, R) __extension__ ({ \ 6919 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6920 (__v2df)(__m128d)(B), \ 6921 (__v2df)_mm_setzero_pd(), \ 6922 (__mmask8)-1, (int)(R)); }) 6923 6924static __inline__ __m128d __DEFAULT_FN_ATTRS 6925_mm_scalef_sd (__m128d __A, __m128d __B) 6926{ 6927 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, 6928 (__v2df)( __B), (__v2df) _mm_setzero_pd(), 6929 (__mmask8) -1, 6930 _MM_FROUND_CUR_DIRECTION); 6931} 6932 6933static __inline__ __m128d __DEFAULT_FN_ATTRS 6934_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6935{ 6936 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6937 (__v2df) __B, 6938 (__v2df) __W, 6939 (__mmask8) __U, 6940 _MM_FROUND_CUR_DIRECTION); 6941} 6942 6943#define _mm_mask_scalef_round_sd(W, U, A, B, R) __extension__ ({ \ 6944 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6945 (__v2df)(__m128d)(B), \ 6946 (__v2df)(__m128d)(W), \ 6947 (__mmask8)(U), (int)(R)); }) 6948 6949static __inline__ __m128d __DEFAULT_FN_ATTRS 6950_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) 6951{ 6952 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, 6953 (__v2df) __B, 6954 (__v2df) _mm_setzero_pd (), 6955 (__mmask8) __U, 6956 _MM_FROUND_CUR_DIRECTION); 6957} 6958 6959#define _mm_maskz_scalef_round_sd(U, A, B, R) __extension__ ({ \ 6960 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6961 (__v2df)(__m128d)(B), \ 6962 (__v2df)_mm_setzero_pd(), \ 6963 (__mmask8)(U), (int)(R)); }) 6964 6965#define _mm_scalef_round_ss(A, B, R) __extension__ ({ \ 6966 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6967 (__v4sf)(__m128)(B), \ 6968 (__v4sf)_mm_setzero_ps(), \ 6969 (__mmask8)-1, (int)(R)); }) 6970 6971static __inline__ __m128 __DEFAULT_FN_ATTRS 6972_mm_scalef_ss (__m128 __A, __m128 __B) 6973{ 6974 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, 6975 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), 6976 (__mmask8) -1, 6977 _MM_FROUND_CUR_DIRECTION); 6978} 6979 6980static __inline__ __m128 __DEFAULT_FN_ATTRS 6981_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6982{ 6983 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 6984 (__v4sf) __B, 6985 (__v4sf) __W, 6986 (__mmask8) __U, 6987 _MM_FROUND_CUR_DIRECTION); 6988} 6989 6990#define _mm_mask_scalef_round_ss(W, U, A, B, R) __extension__ ({ \ 6991 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6992 (__v4sf)(__m128)(B), \ 6993 (__v4sf)(__m128)(W), \ 6994 (__mmask8)(U), (int)(R)); }) 6995 6996static __inline__ __m128 __DEFAULT_FN_ATTRS 6997_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) 6998{ 6999 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, 7000 (__v4sf) __B, 7001 (__v4sf) _mm_setzero_ps (), 7002 (__mmask8) __U, 7003 _MM_FROUND_CUR_DIRECTION); 7004} 7005 7006#define _mm_maskz_scalef_round_ss(U, A, B, R) __extension__ ({ \ 7007 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 7008 (__v4sf)(__m128)(B), \ 7009 (__v4sf)_mm_setzero_ps(), \ 7010 (__mmask8)(U), \ 7011 _MM_FROUND_CUR_DIRECTION); }) 7012 7013#define _mm512_srai_epi32(A, B) __extension__ ({ \ 7014 (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \ 7015 (__v16si)_mm512_setzero_si512(), \ 7016 (__mmask16)-1); }) 7017 7018#define _mm512_mask_srai_epi32(W, U, A, B) __extension__ ({ \ 7019 (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \ 7020 (__v16si)(__m512i)(W), \ 7021 (__mmask16)(U)); }) 7022 7023#define _mm512_maskz_srai_epi32(U, A, B) __extension__ ({ \ 7024 (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \ 7025 (__v16si)_mm512_setzero_si512(), \ 7026 (__mmask16)(U)); }) 7027 7028#define _mm512_srai_epi64(A, B) __extension__ ({ \ 7029 (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 7030 (__v8di)_mm512_setzero_si512(), \ 7031 (__mmask8)-1); }) 7032 7033#define _mm512_mask_srai_epi64(W, U, A, B) __extension__ ({ \ 7034 (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 7035 (__v8di)(__m512i)(W), \ 7036 (__mmask8)(U)); }) 7037 7038#define _mm512_maskz_srai_epi64(U, A, B) __extension__ ({ \ 7039 (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \ 7040 (__v8di)_mm512_setzero_si512(), \ 7041 (__mmask8)(U)); }) 7042 7043#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ 7044 (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7045 (__v16sf)(__m512)(B), (int)(imm), \ 7046 (__v16sf)_mm512_undefined_ps(), \ 7047 (__mmask16)-1); }) 7048 7049#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 7050 (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7051 (__v16sf)(__m512)(B), (int)(imm), \ 7052 (__v16sf)(__m512)(W), \ 7053 (__mmask16)(U)); }) 7054 7055#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 7056 (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ 7057 (__v16sf)(__m512)(B), (int)(imm), \ 7058 (__v16sf)_mm512_setzero_ps(), \ 7059 (__mmask16)(U)); }) 7060 7061#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \ 7062 (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7063 (__v8df)(__m512d)(B), (int)(imm), \ 7064 (__v8df)_mm512_undefined_pd(), \ 7065 (__mmask8)-1); }) 7066 7067#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7068 (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7069 (__v8df)(__m512d)(B), (int)(imm), \ 7070 (__v8df)(__m512d)(W), \ 7071 (__mmask8)(U)); }) 7072 7073#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7074 (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \ 7075 (__v8df)(__m512d)(B), (int)(imm), \ 7076 (__v8df)_mm512_setzero_pd(), \ 7077 (__mmask8)(U)); }) 7078 7079#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7080 (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7081 (__v16si)(__m512i)(B), (int)(imm), \ 7082 (__v16si)_mm512_setzero_si512(), \ 7083 (__mmask16)-1); }) 7084 7085#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7086 (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7087 (__v16si)(__m512i)(B), (int)(imm), \ 7088 (__v16si)(__m512i)(W), \ 7089 (__mmask16)(U)); }) 7090 7091#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7092 (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \ 7093 (__v16si)(__m512i)(B), (int)(imm), \ 7094 (__v16si)_mm512_setzero_si512(), \ 7095 (__mmask16)(U)); }) 7096 7097#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7098 (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7099 (__v8di)(__m512i)(B), (int)(imm), \ 7100 (__v8di)_mm512_setzero_si512(), \ 7101 (__mmask8)-1); }) 7102 7103#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7104 (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7105 (__v8di)(__m512i)(B), (int)(imm), \ 7106 (__v8di)(__m512i)(W), \ 7107 (__mmask8)(U)); }) 7108 7109#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7110 (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \ 7111 (__v8di)(__m512i)(B), (int)(imm), \ 7112 (__v8di)_mm512_setzero_si512(), \ 7113 (__mmask8)(U)); }) 7114 7115#define _mm512_shuffle_pd(M, V, imm) __extension__ ({ \ 7116 (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \ 7117 (__v8df)(__m512d)(V), (int)(imm), \ 7118 (__v8df)_mm512_undefined_pd(), \ 7119 (__mmask8)-1); }) 7120 7121#define _mm512_mask_shuffle_pd(W, U, M, V, imm) __extension__ ({ \ 7122 (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \ 7123 (__v8df)(__m512d)(V), (int)(imm), \ 7124 (__v8df)(__m512d)(W), \ 7125 (__mmask8)(U)); }) 7126 7127#define _mm512_maskz_shuffle_pd(U, M, V, imm) __extension__ ({ \ 7128 (__m512d)__builtin_ia32_shufpd512_mask((__v8df)(__m512d)(M), \ 7129 (__v8df)(__m512d)(V), (int)(imm), \ 7130 (__v8df)_mm512_setzero_pd(), \ 7131 (__mmask8)(U)); }) 7132 7133#define _mm512_shuffle_ps(M, V, imm) __extension__ ({ \ 7134 (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ 7135 (__v16sf)(__m512)(V), (int)(imm), \ 7136 (__v16sf)_mm512_undefined_ps(), \ 7137 (__mmask16)-1); }) 7138 7139#define _mm512_mask_shuffle_ps(W, U, M, V, imm) __extension__ ({ \ 7140 (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ 7141 (__v16sf)(__m512)(V), (int)(imm), \ 7142 (__v16sf)(__m512)(W), \ 7143 (__mmask16)(U)); }) 7144 7145#define _mm512_maskz_shuffle_ps(U, M, V, imm) __extension__ ({ \ 7146 (__m512)__builtin_ia32_shufps512_mask((__v16sf)(__m512)(M), \ 7147 (__v16sf)(__m512)(V), (int)(imm), \ 7148 (__v16sf)_mm512_setzero_ps(), \ 7149 (__mmask16)(U)); }) 7150 7151#define _mm_sqrt_round_sd(A, B, R) __extension__ ({ \ 7152 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(B), \ 7153 (__v2df)(__m128d)(A), \ 7154 (__v2df)_mm_setzero_pd(), \ 7155 (__mmask8)-1, (int)(R)); }) 7156 7157static __inline__ __m128d __DEFAULT_FN_ATTRS 7158_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 7159{ 7160 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B, 7161 (__v2df) __A, 7162 (__v2df) __W, 7163 (__mmask8) __U, 7164 _MM_FROUND_CUR_DIRECTION); 7165} 7166 7167#define _mm_mask_sqrt_round_sd(W, U, A, B, R) __extension__ ({ \ 7168 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(B), \ 7169 (__v2df)(__m128d)(A), \ 7170 (__v2df)(__m128d)(W), \ 7171 (__mmask8)(U), (int)(R)); }) 7172 7173static __inline__ __m128d __DEFAULT_FN_ATTRS 7174_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) 7175{ 7176 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __B, 7177 (__v2df) __A, 7178 (__v2df) _mm_setzero_pd (), 7179 (__mmask8) __U, 7180 _MM_FROUND_CUR_DIRECTION); 7181} 7182 7183#define _mm_maskz_sqrt_round_sd(U, A, B, R) __extension__ ({ \ 7184 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(B), \ 7185 (__v2df)(__m128d)(A), \ 7186 (__v2df)_mm_setzero_pd(), \ 7187 (__mmask8)(U), (int)(R)); }) 7188 7189#define _mm_sqrt_round_ss(A, B, R) __extension__ ({ \ 7190 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(B), \ 7191 (__v4sf)(__m128)(A), \ 7192 (__v4sf)_mm_setzero_ps(), \ 7193 (__mmask8)-1, (int)(R)); }) 7194 7195static __inline__ __m128 __DEFAULT_FN_ATTRS 7196_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 7197{ 7198 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __B, 7199 (__v4sf) __A, 7200 (__v4sf) __W, 7201 (__mmask8) __U, 7202 _MM_FROUND_CUR_DIRECTION); 7203} 7204 7205#define _mm_mask_sqrt_round_ss(W, U, A, B, R) __extension__ ({ \ 7206 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(B), \ 7207 (__v4sf)(__m128)(A), \ 7208 (__v4sf)(__m128)(W), (__mmask8)(U), \ 7209 (int)(R)); }) 7210 7211static __inline__ __m128 __DEFAULT_FN_ATTRS 7212_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) 7213{ 7214 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, 7215 (__v4sf) __B, 7216 (__v4sf) _mm_setzero_ps (), 7217 (__mmask8) __U, 7218 _MM_FROUND_CUR_DIRECTION); 7219} 7220 7221#define _mm_maskz_sqrt_round_ss(U, A, B, R) __extension__ ({ \ 7222 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(B), \ 7223 (__v4sf)(__m128)(A), \ 7224 (__v4sf)_mm_setzero_ps(), \ 7225 (__mmask8)(U), (int)(R)); }) 7226 7227static __inline__ __m512 __DEFAULT_FN_ATTRS 7228_mm512_broadcast_f32x4 (__m128 __A) 7229{ 7230 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 7231 (__v16sf) 7232 _mm512_undefined_ps (), 7233 (__mmask16) -1); 7234} 7235 7236static __inline__ __m512 __DEFAULT_FN_ATTRS 7237_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) 7238{ 7239 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 7240 (__v16sf) __O, 7241 __M); 7242} 7243 7244static __inline__ __m512 __DEFAULT_FN_ATTRS 7245_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) 7246{ 7247 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 7248 (__v16sf) 7249 _mm512_setzero_ps (), 7250 __M); 7251} 7252 7253static __inline__ __m512d __DEFAULT_FN_ATTRS 7254_mm512_broadcast_f64x4 (__m256d __A) 7255{ 7256 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 7257 (__v8df) 7258 _mm512_undefined_pd (), 7259 (__mmask8) -1); 7260} 7261 7262static __inline__ __m512d __DEFAULT_FN_ATTRS 7263_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) 7264{ 7265 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 7266 (__v8df) __O, 7267 __M); 7268} 7269 7270static __inline__ __m512d __DEFAULT_FN_ATTRS 7271_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) 7272{ 7273 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 7274 (__v8df) 7275 _mm512_setzero_pd (), 7276 __M); 7277} 7278 7279static __inline__ __m512i __DEFAULT_FN_ATTRS 7280_mm512_broadcast_i32x4 (__m128i __A) 7281{ 7282 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 7283 (__v16si) 7284 _mm512_undefined_epi32 (), 7285 (__mmask16) -1); 7286} 7287 7288static __inline__ __m512i __DEFAULT_FN_ATTRS 7289_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) 7290{ 7291 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 7292 (__v16si) __O, 7293 __M); 7294} 7295 7296static __inline__ __m512i __DEFAULT_FN_ATTRS 7297_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) 7298{ 7299 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 7300 (__v16si) 7301 _mm512_setzero_si512 (), 7302 __M); 7303} 7304 7305static __inline__ __m512i __DEFAULT_FN_ATTRS 7306_mm512_broadcast_i64x4 (__m256i __A) 7307{ 7308 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 7309 (__v8di) 7310 _mm512_undefined_epi32 (), 7311 (__mmask8) -1); 7312} 7313 7314static __inline__ __m512i __DEFAULT_FN_ATTRS 7315_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) 7316{ 7317 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 7318 (__v8di) __O, 7319 __M); 7320} 7321 7322static __inline__ __m512i __DEFAULT_FN_ATTRS 7323_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) 7324{ 7325 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 7326 (__v8di) 7327 _mm512_setzero_si512 (), 7328 __M); 7329} 7330 7331static __inline__ __m512d __DEFAULT_FN_ATTRS 7332_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 7333{ 7334 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 7335 (__v8df) __O, __M); 7336} 7337 7338static __inline__ __m512d __DEFAULT_FN_ATTRS 7339_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7340{ 7341 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 7342 (__v8df) 7343 _mm512_setzero_pd (), 7344 __M); 7345} 7346 7347static __inline__ __m512 __DEFAULT_FN_ATTRS 7348_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 7349{ 7350 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 7351 (__v16sf) __O, __M); 7352} 7353 7354static __inline__ __m512 __DEFAULT_FN_ATTRS 7355_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 7356{ 7357 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 7358 (__v16sf) 7359 _mm512_setzero_ps (), 7360 __M); 7361} 7362 7363static __inline__ __m128i __DEFAULT_FN_ATTRS 7364_mm512_cvtsepi32_epi8 (__m512i __A) 7365{ 7366 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7367 (__v16qi) _mm_undefined_si128 (), 7368 (__mmask16) -1); 7369} 7370 7371static __inline__ __m128i __DEFAULT_FN_ATTRS 7372_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7373{ 7374 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7375 (__v16qi) __O, __M); 7376} 7377 7378static __inline__ __m128i __DEFAULT_FN_ATTRS 7379_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 7380{ 7381 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 7382 (__v16qi) _mm_setzero_si128 (), 7383 __M); 7384} 7385 7386static __inline__ void __DEFAULT_FN_ATTRS 7387_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7388{ 7389 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7390} 7391 7392static __inline__ __m256i __DEFAULT_FN_ATTRS 7393_mm512_cvtsepi32_epi16 (__m512i __A) 7394{ 7395 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7396 (__v16hi) _mm256_undefined_si256 (), 7397 (__mmask16) -1); 7398} 7399 7400static __inline__ __m256i __DEFAULT_FN_ATTRS 7401_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7402{ 7403 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7404 (__v16hi) __O, __M); 7405} 7406 7407static __inline__ __m256i __DEFAULT_FN_ATTRS 7408_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 7409{ 7410 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 7411 (__v16hi) _mm256_setzero_si256 (), 7412 __M); 7413} 7414 7415static __inline__ void __DEFAULT_FN_ATTRS 7416_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7417{ 7418 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7419} 7420 7421static __inline__ __m128i __DEFAULT_FN_ATTRS 7422_mm512_cvtsepi64_epi8 (__m512i __A) 7423{ 7424 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7425 (__v16qi) _mm_undefined_si128 (), 7426 (__mmask8) -1); 7427} 7428 7429static __inline__ __m128i __DEFAULT_FN_ATTRS 7430_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7431{ 7432 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7433 (__v16qi) __O, __M); 7434} 7435 7436static __inline__ __m128i __DEFAULT_FN_ATTRS 7437_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 7438{ 7439 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 7440 (__v16qi) _mm_setzero_si128 (), 7441 __M); 7442} 7443 7444static __inline__ void __DEFAULT_FN_ATTRS 7445_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7446{ 7447 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7448} 7449 7450static __inline__ __m256i __DEFAULT_FN_ATTRS 7451_mm512_cvtsepi64_epi32 (__m512i __A) 7452{ 7453 __v8si __O; 7454 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7455 (__v8si) _mm256_undefined_si256 (), 7456 (__mmask8) -1); 7457} 7458 7459static __inline__ __m256i __DEFAULT_FN_ATTRS 7460_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7461{ 7462 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7463 (__v8si) __O, __M); 7464} 7465 7466static __inline__ __m256i __DEFAULT_FN_ATTRS 7467_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 7468{ 7469 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 7470 (__v8si) _mm256_setzero_si256 (), 7471 __M); 7472} 7473 7474static __inline__ void __DEFAULT_FN_ATTRS 7475_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 7476{ 7477 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7478} 7479 7480static __inline__ __m128i __DEFAULT_FN_ATTRS 7481_mm512_cvtsepi64_epi16 (__m512i __A) 7482{ 7483 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7484 (__v8hi) _mm_undefined_si128 (), 7485 (__mmask8) -1); 7486} 7487 7488static __inline__ __m128i __DEFAULT_FN_ATTRS 7489_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7490{ 7491 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7492 (__v8hi) __O, __M); 7493} 7494 7495static __inline__ __m128i __DEFAULT_FN_ATTRS 7496_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 7497{ 7498 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 7499 (__v8hi) _mm_setzero_si128 (), 7500 __M); 7501} 7502 7503static __inline__ void __DEFAULT_FN_ATTRS 7504_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 7505{ 7506 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7507} 7508 7509static __inline__ __m128i __DEFAULT_FN_ATTRS 7510_mm512_cvtusepi32_epi8 (__m512i __A) 7511{ 7512 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7513 (__v16qi) _mm_undefined_si128 (), 7514 (__mmask16) -1); 7515} 7516 7517static __inline__ __m128i __DEFAULT_FN_ATTRS 7518_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7519{ 7520 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7521 (__v16qi) __O, 7522 __M); 7523} 7524 7525static __inline__ __m128i __DEFAULT_FN_ATTRS 7526_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 7527{ 7528 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 7529 (__v16qi) _mm_setzero_si128 (), 7530 __M); 7531} 7532 7533static __inline__ void __DEFAULT_FN_ATTRS 7534_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7535{ 7536 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7537} 7538 7539static __inline__ __m256i __DEFAULT_FN_ATTRS 7540_mm512_cvtusepi32_epi16 (__m512i __A) 7541{ 7542 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7543 (__v16hi) _mm256_undefined_si256 (), 7544 (__mmask16) -1); 7545} 7546 7547static __inline__ __m256i __DEFAULT_FN_ATTRS 7548_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7549{ 7550 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7551 (__v16hi) __O, 7552 __M); 7553} 7554 7555static __inline__ __m256i __DEFAULT_FN_ATTRS 7556_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 7557{ 7558 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 7559 (__v16hi) _mm256_setzero_si256 (), 7560 __M); 7561} 7562 7563static __inline__ void __DEFAULT_FN_ATTRS 7564_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 7565{ 7566 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 7567} 7568 7569static __inline__ __m128i __DEFAULT_FN_ATTRS 7570_mm512_cvtusepi64_epi8 (__m512i __A) 7571{ 7572 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7573 (__v16qi) _mm_undefined_si128 (), 7574 (__mmask8) -1); 7575} 7576 7577static __inline__ __m128i __DEFAULT_FN_ATTRS 7578_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7579{ 7580 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7581 (__v16qi) __O, 7582 __M); 7583} 7584 7585static __inline__ __m128i __DEFAULT_FN_ATTRS 7586_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 7587{ 7588 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 7589 (__v16qi) _mm_setzero_si128 (), 7590 __M); 7591} 7592 7593static __inline__ void __DEFAULT_FN_ATTRS 7594_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7595{ 7596 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7597} 7598 7599static __inline__ __m256i __DEFAULT_FN_ATTRS 7600_mm512_cvtusepi64_epi32 (__m512i __A) 7601{ 7602 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7603 (__v8si) _mm256_undefined_si256 (), 7604 (__mmask8) -1); 7605} 7606 7607static __inline__ __m256i __DEFAULT_FN_ATTRS 7608_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7609{ 7610 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7611 (__v8si) __O, __M); 7612} 7613 7614static __inline__ __m256i __DEFAULT_FN_ATTRS 7615_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 7616{ 7617 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 7618 (__v8si) _mm256_setzero_si256 (), 7619 __M); 7620} 7621 7622static __inline__ void __DEFAULT_FN_ATTRS 7623_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7624{ 7625 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 7626} 7627 7628static __inline__ __m128i __DEFAULT_FN_ATTRS 7629_mm512_cvtusepi64_epi16 (__m512i __A) 7630{ 7631 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7632 (__v8hi) _mm_undefined_si128 (), 7633 (__mmask8) -1); 7634} 7635 7636static __inline__ __m128i __DEFAULT_FN_ATTRS 7637_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7638{ 7639 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7640 (__v8hi) __O, __M); 7641} 7642 7643static __inline__ __m128i __DEFAULT_FN_ATTRS 7644_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 7645{ 7646 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 7647 (__v8hi) _mm_setzero_si128 (), 7648 __M); 7649} 7650 7651static __inline__ void __DEFAULT_FN_ATTRS 7652_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7653{ 7654 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 7655} 7656 7657static __inline__ __m128i __DEFAULT_FN_ATTRS 7658_mm512_cvtepi32_epi8 (__m512i __A) 7659{ 7660 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7661 (__v16qi) _mm_undefined_si128 (), 7662 (__mmask16) -1); 7663} 7664 7665static __inline__ __m128i __DEFAULT_FN_ATTRS 7666_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 7667{ 7668 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7669 (__v16qi) __O, __M); 7670} 7671 7672static __inline__ __m128i __DEFAULT_FN_ATTRS 7673_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 7674{ 7675 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 7676 (__v16qi) _mm_setzero_si128 (), 7677 __M); 7678} 7679 7680static __inline__ void __DEFAULT_FN_ATTRS 7681_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 7682{ 7683 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 7684} 7685 7686static __inline__ __m256i __DEFAULT_FN_ATTRS 7687_mm512_cvtepi32_epi16 (__m512i __A) 7688{ 7689 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7690 (__v16hi) _mm256_undefined_si256 (), 7691 (__mmask16) -1); 7692} 7693 7694static __inline__ __m256i __DEFAULT_FN_ATTRS 7695_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 7696{ 7697 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7698 (__v16hi) __O, __M); 7699} 7700 7701static __inline__ __m256i __DEFAULT_FN_ATTRS 7702_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 7703{ 7704 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 7705 (__v16hi) _mm256_setzero_si256 (), 7706 __M); 7707} 7708 7709static __inline__ void __DEFAULT_FN_ATTRS 7710_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 7711{ 7712 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 7713} 7714 7715static __inline__ __m128i __DEFAULT_FN_ATTRS 7716_mm512_cvtepi64_epi8 (__m512i __A) 7717{ 7718 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7719 (__v16qi) _mm_undefined_si128 (), 7720 (__mmask8) -1); 7721} 7722 7723static __inline__ __m128i __DEFAULT_FN_ATTRS 7724_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 7725{ 7726 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7727 (__v16qi) __O, __M); 7728} 7729 7730static __inline__ __m128i __DEFAULT_FN_ATTRS 7731_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 7732{ 7733 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 7734 (__v16qi) _mm_setzero_si128 (), 7735 __M); 7736} 7737 7738static __inline__ void __DEFAULT_FN_ATTRS 7739_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 7740{ 7741 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 7742} 7743 7744static __inline__ __m256i __DEFAULT_FN_ATTRS 7745_mm512_cvtepi64_epi32 (__m512i __A) 7746{ 7747 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7748 (__v8si) _mm256_undefined_si256 (), 7749 (__mmask8) -1); 7750} 7751 7752static __inline__ __m256i __DEFAULT_FN_ATTRS 7753_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 7754{ 7755 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7756 (__v8si) __O, __M); 7757} 7758 7759static __inline__ __m256i __DEFAULT_FN_ATTRS 7760_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 7761{ 7762 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 7763 (__v8si) _mm256_setzero_si256 (), 7764 __M); 7765} 7766 7767static __inline__ void __DEFAULT_FN_ATTRS 7768_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 7769{ 7770 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 7771} 7772 7773static __inline__ __m128i __DEFAULT_FN_ATTRS 7774_mm512_cvtepi64_epi16 (__m512i __A) 7775{ 7776 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7777 (__v8hi) _mm_undefined_si128 (), 7778 (__mmask8) -1); 7779} 7780 7781static __inline__ __m128i __DEFAULT_FN_ATTRS 7782_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 7783{ 7784 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7785 (__v8hi) __O, __M); 7786} 7787 7788static __inline__ __m128i __DEFAULT_FN_ATTRS 7789_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 7790{ 7791 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 7792 (__v8hi) _mm_setzero_si128 (), 7793 __M); 7794} 7795 7796static __inline__ void __DEFAULT_FN_ATTRS 7797_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 7798{ 7799 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 7800} 7801 7802#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ 7803 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7804 (__v4si)_mm_undefined_si128(), \ 7805 (__mmask8)-1); }) 7806 7807#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 7808 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7809 (__v4si)(__m128i)(W), \ 7810 (__mmask8)(U)); }) 7811 7812#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 7813 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7814 (__v4si)_mm_setzero_si128(), \ 7815 (__mmask8)(U)); }) 7816 7817#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ 7818 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7819 (__v4di)_mm256_undefined_si256(), \ 7820 (__mmask8)-1); }) 7821 7822#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ 7823 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7824 (__v4di)(__m256i)(W), \ 7825 (__mmask8)(U)); }) 7826 7827#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ 7828 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7829 (__v4di)_mm256_setzero_si256(), \ 7830 (__mmask8)(U)); }) 7831 7832#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ 7833 (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \ 7834 (__v4df)(__m256d)(B), (int)(imm), \ 7835 (__v8df)_mm512_undefined_pd(), \ 7836 (__mmask8)-1); }) 7837 7838#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \ 7839 (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \ 7840 (__v4df)(__m256d)(B), (int)(imm), \ 7841 (__v8df)(__m512d)(W), \ 7842 (__mmask8)(U)); }) 7843 7844#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \ 7845 (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \ 7846 (__v4df)(__m256d)(B), (int)(imm), \ 7847 (__v8df)_mm512_setzero_pd(), \ 7848 (__mmask8)(U)); }) 7849 7850#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \ 7851 (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \ 7852 (__v4di)(__m256i)(B), (int)(imm), \ 7853 (__v8di)_mm512_setzero_si512(), \ 7854 (__mmask8)-1); }) 7855 7856#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \ 7857 (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \ 7858 (__v4di)(__m256i)(B), (int)(imm), \ 7859 (__v8di)(__m512i)(W), \ 7860 (__mmask8)(U)); }) 7861 7862#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \ 7863 (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \ 7864 (__v4di)(__m256i)(B), (int)(imm), \ 7865 (__v8di)_mm512_setzero_si512(), \ 7866 (__mmask8)(U)); }) 7867 7868#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \ 7869 (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \ 7870 (__v4sf)(__m128)(B), (int)(imm), \ 7871 (__v16sf)_mm512_undefined_ps(), \ 7872 (__mmask16)-1); }) 7873 7874#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 7875 (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \ 7876 (__v4sf)(__m128)(B), (int)(imm), \ 7877 (__v16sf)(__m512)(W), \ 7878 (__mmask16)(U)); }) 7879 7880#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 7881 (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \ 7882 (__v4sf)(__m128)(B), (int)(imm), \ 7883 (__v16sf)_mm512_setzero_ps(), \ 7884 (__mmask16)(U)); }) 7885 7886#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \ 7887 (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \ 7888 (__v4si)(__m128i)(B), (int)(imm), \ 7889 (__v16si)_mm512_setzero_si512(), \ 7890 (__mmask16)-1); }) 7891 7892#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 7893 (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \ 7894 (__v4si)(__m128i)(B), (int)(imm), \ 7895 (__v16si)(__m512i)(W), \ 7896 (__mmask16)(U)); }) 7897 7898#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 7899 (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \ 7900 (__v4si)(__m128i)(B), (int)(imm), \ 7901 (__v16si)_mm512_setzero_si512(), \ 7902 (__mmask16)(U)); }) 7903 7904#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \ 7905 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7906 (int)(((C)<<2) | (B)), \ 7907 (__v8df)_mm512_undefined_pd(), \ 7908 (__mmask8)-1, (int)(R)); }) 7909 7910#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) __extension__ ({ \ 7911 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7912 (int)(((C)<<2) | (B)), \ 7913 (__v8df)(__m512d)(W), \ 7914 (__mmask8)(U), (int)(R)); }) 7915 7916#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) __extension__ ({ \ 7917 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7918 (int)(((C)<<2) | (B)), \ 7919 (__v8df)_mm512_setzero_pd(), \ 7920 (__mmask8)(U), (int)(R)); }) 7921 7922#define _mm512_getmant_pd(A, B, C) __extension__ ({ \ 7923 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7924 (int)(((C)<<2) | (B)), \ 7925 (__v8df)_mm512_setzero_pd(), \ 7926 (__mmask8)-1, \ 7927 _MM_FROUND_CUR_DIRECTION); }) 7928 7929#define _mm512_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 7930 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7931 (int)(((C)<<2) | (B)), \ 7932 (__v8df)(__m512d)(W), \ 7933 (__mmask8)(U), \ 7934 _MM_FROUND_CUR_DIRECTION); }) 7935 7936#define _mm512_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 7937 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7938 (int)(((C)<<2) | (B)), \ 7939 (__v8df)_mm512_setzero_pd(), \ 7940 (__mmask8)(U), \ 7941 _MM_FROUND_CUR_DIRECTION); }) 7942 7943#define _mm512_getmant_round_ps(A, B, C, R) __extension__ ({ \ 7944 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7945 (int)(((C)<<2) | (B)), \ 7946 (__v16sf)_mm512_undefined_ps(), \ 7947 (__mmask16)-1, (int)(R)); }) 7948 7949#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) __extension__ ({ \ 7950 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7951 (int)(((C)<<2) | (B)), \ 7952 (__v16sf)(__m512)(W), \ 7953 (__mmask16)(U), (int)(R)); }) 7954 7955#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) __extension__ ({ \ 7956 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7957 (int)(((C)<<2) | (B)), \ 7958 (__v16sf)_mm512_setzero_ps(), \ 7959 (__mmask16)(U), (int)(R)); }) 7960 7961#define _mm512_getmant_ps(A, B, C) __extension__ ({ \ 7962 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7963 (int)(((C)<<2)|(B)), \ 7964 (__v16sf)_mm512_undefined_ps(), \ 7965 (__mmask16)-1, \ 7966 _MM_FROUND_CUR_DIRECTION); }) 7967 7968#define _mm512_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 7969 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7970 (int)(((C)<<2)|(B)), \ 7971 (__v16sf)(__m512)(W), \ 7972 (__mmask16)(U), \ 7973 _MM_FROUND_CUR_DIRECTION); }) 7974 7975#define _mm512_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 7976 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7977 (int)(((C)<<2)|(B)), \ 7978 (__v16sf)_mm512_setzero_ps(), \ 7979 (__mmask16)(U), \ 7980 _MM_FROUND_CUR_DIRECTION); }) 7981 7982#define _mm512_getexp_round_pd(A, R) __extension__ ({ \ 7983 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7984 (__v8df)_mm512_undefined_pd(), \ 7985 (__mmask8)-1, (int)(R)); }) 7986 7987#define _mm512_mask_getexp_round_pd(W, U, A, R) __extension__ ({ \ 7988 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7989 (__v8df)(__m512d)(W), \ 7990 (__mmask8)(U), (int)(R)); }) 7991 7992#define _mm512_maskz_getexp_round_pd(U, A, R) __extension__ ({ \ 7993 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7994 (__v8df)_mm512_setzero_pd(), \ 7995 (__mmask8)(U), (int)(R)); }) 7996 7997static __inline__ __m512d __DEFAULT_FN_ATTRS 7998_mm512_getexp_pd (__m512d __A) 7999{ 8000 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8001 (__v8df) _mm512_undefined_pd (), 8002 (__mmask8) -1, 8003 _MM_FROUND_CUR_DIRECTION); 8004} 8005 8006static __inline__ __m512d __DEFAULT_FN_ATTRS 8007_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 8008{ 8009 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8010 (__v8df) __W, 8011 (__mmask8) __U, 8012 _MM_FROUND_CUR_DIRECTION); 8013} 8014 8015static __inline__ __m512d __DEFAULT_FN_ATTRS 8016_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 8017{ 8018 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8019 (__v8df) _mm512_setzero_pd (), 8020 (__mmask8) __U, 8021 _MM_FROUND_CUR_DIRECTION); 8022} 8023 8024#define _mm512_getexp_round_ps(A, R) __extension__ ({ \ 8025 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8026 (__v16sf)_mm512_undefined_ps(), \ 8027 (__mmask16)-1, (int)(R)); }) 8028 8029#define _mm512_mask_getexp_round_ps(W, U, A, R) __extension__ ({ \ 8030 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8031 (__v16sf)(__m512)(W), \ 8032 (__mmask16)(U), (int)(R)); }) 8033 8034#define _mm512_maskz_getexp_round_ps(U, A, R) __extension__ ({ \ 8035 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8036 (__v16sf)_mm512_setzero_ps(), \ 8037 (__mmask16)(U), (int)(R)); }) 8038 8039static __inline__ __m512 __DEFAULT_FN_ATTRS 8040_mm512_getexp_ps (__m512 __A) 8041{ 8042 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8043 (__v16sf) _mm512_undefined_ps (), 8044 (__mmask16) -1, 8045 _MM_FROUND_CUR_DIRECTION); 8046} 8047 8048static __inline__ __m512 __DEFAULT_FN_ATTRS 8049_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 8050{ 8051 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8052 (__v16sf) __W, 8053 (__mmask16) __U, 8054 _MM_FROUND_CUR_DIRECTION); 8055} 8056 8057static __inline__ __m512 __DEFAULT_FN_ATTRS 8058_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 8059{ 8060 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8061 (__v16sf) _mm512_setzero_ps (), 8062 (__mmask16) __U, 8063 _MM_FROUND_CUR_DIRECTION); 8064} 8065 8066#define _mm512_i64gather_ps(index, addr, scale) __extension__ ({ \ 8067 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 8068 (float const *)(addr), \ 8069 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8070 (int)(scale)); }) 8071 8072#define _mm512_mask_i64gather_ps( __v1_old, __mask, __index,\ 8073 __addr, __scale) __extension__({\ 8074__builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,\ 8075 __addr,(__v8di) __index, __mask, __scale);\ 8076}) 8077 8078#define _mm512_i64gather_epi32(index, addr, scale) __extension__ ({\ 8079 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ 8080 (int const *)(addr), \ 8081 (__v8di)(__m512i)(index), \ 8082 (__mmask8)-1, (int)(scale)); }) 8083 8084#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8085 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 8086 (int const *)(addr), \ 8087 (__v8di)(__m512i)(index), \ 8088 (__mmask8)(mask), (int)(scale)); }) 8089 8090#define _mm512_i64gather_pd(index, addr, scale) __extension__ ({\ 8091 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 8092 (double const *)(addr), \ 8093 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8094 (int)(scale)); }) 8095 8096#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8097 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 8098 (double const *)(addr), \ 8099 (__v8di)(__m512i)(index), \ 8100 (__mmask8)(mask), (int)(scale)); }) 8101 8102#define _mm512_i64gather_epi64(index, addr, scale) __extension__ ({\ 8103 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ 8104 (long long const *)(addr), \ 8105 (__v8di)(__m512i)(index), (__mmask8)-1, \ 8106 (int)(scale)); }) 8107 8108#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8109 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 8110 (long long const *)(addr), \ 8111 (__v8di)(__m512i)(index), \ 8112 (__mmask8)(mask), (int)(scale)); }) 8113 8114#define _mm512_i32gather_ps(index, addr, scale) __extension__ ({\ 8115 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 8116 (float const *)(addr), \ 8117 (__v16sf)(__m512)(index), \ 8118 (__mmask16)-1, (int)(scale)); }) 8119 8120#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8121 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 8122 (float const *)(addr), \ 8123 (__v16sf)(__m512)(index), \ 8124 (__mmask16)(mask), (int)(scale)); }) 8125 8126#define _mm512_i32gather_epi32(index, addr, scale) __extension__ ({\ 8127 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 8128 (int const *)(addr), \ 8129 (__v16si)(__m512i)(index), \ 8130 (__mmask16)-1, (int)(scale)); }) 8131 8132#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8133 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 8134 (int const *)(addr), \ 8135 (__v16si)(__m512i)(index), \ 8136 (__mmask16)(mask), (int)(scale)); }) 8137 8138#define _mm512_i32gather_pd(index, addr, scale) __extension__ ({\ 8139 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 8140 (double const *)(addr), \ 8141 (__v8si)(__m256i)(index), (__mmask8)-1, \ 8142 (int)(scale)); }) 8143 8144#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8145 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 8146 (double const *)(addr), \ 8147 (__v8si)(__m256i)(index), \ 8148 (__mmask8)(mask), (int)(scale)); }) 8149 8150#define _mm512_i32gather_epi64(index, addr, scale) __extension__ ({\ 8151 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 8152 (long long const *)(addr), \ 8153 (__v8si)(__m256i)(index), (__mmask8)-1, \ 8154 (int)(scale)); }) 8155 8156#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8157 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 8158 (long long const *)(addr), \ 8159 (__v8si)(__m256i)(index), \ 8160 (__mmask8)(mask), (int)(scale)); }) 8161 8162#define _mm512_i64scatter_ps(addr, index, v1, scale) __extension__ ({\ 8163 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ 8164 (__v8di)(__m512i)(index), \ 8165 (__v8sf)(__m256)(v1), (int)(scale)); }) 8166 8167#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8168 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ 8169 (__v8di)(__m512i)(index), \ 8170 (__v8sf)(__m256)(v1), (int)(scale)); }) 8171 8172#define _mm512_i64scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8173 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ 8174 (__v8di)(__m512i)(index), \ 8175 (__v8si)(__m256i)(v1), (int)(scale)); }) 8176 8177#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8178 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ 8179 (__v8di)(__m512i)(index), \ 8180 (__v8si)(__m256i)(v1), (int)(scale)); }) 8181 8182#define _mm512_i64scatter_pd(addr, index, v1, scale) __extension__ ({\ 8183 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ 8184 (__v8di)(__m512i)(index), \ 8185 (__v8df)(__m512d)(v1), (int)(scale)); }) 8186 8187#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8188 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ 8189 (__v8di)(__m512i)(index), \ 8190 (__v8df)(__m512d)(v1), (int)(scale)); }) 8191 8192#define _mm512_i64scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8193 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ 8194 (__v8di)(__m512i)(index), \ 8195 (__v8di)(__m512i)(v1), (int)(scale)); }) 8196 8197#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8198 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ 8199 (__v8di)(__m512i)(index), \ 8200 (__v8di)(__m512i)(v1), (int)(scale)); }) 8201 8202#define _mm512_i32scatter_ps(addr, index, v1, scale) __extension__ ({\ 8203 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ 8204 (__v16si)(__m512i)(index), \ 8205 (__v16sf)(__m512)(v1), (int)(scale)); }) 8206 8207#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({\ 8208 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ 8209 (__v16si)(__m512i)(index), \ 8210 (__v16sf)(__m512)(v1), (int)(scale)); }) 8211 8212#define _mm512_i32scatter_epi32(addr, index, v1, scale) __extension__ ({\ 8213 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ 8214 (__v16si)(__m512i)(index), \ 8215 (__v16si)(__m512i)(v1), (int)(scale)); }) 8216 8217#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({\ 8218 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ 8219 (__v16si)(__m512i)(index), \ 8220 (__v16si)(__m512i)(v1), (int)(scale)); }) 8221 8222#define _mm512_i32scatter_pd(addr, index, v1, scale) __extension__ ({\ 8223 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ 8224 (__v8si)(__m256i)(index), \ 8225 (__v8df)(__m512d)(v1), (int)(scale)); }) 8226 8227#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({\ 8228 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ 8229 (__v8si)(__m256i)(index), \ 8230 (__v8df)(__m512d)(v1), (int)(scale)); }) 8231 8232#define _mm512_i32scatter_epi64(addr, index, v1, scale) __extension__ ({\ 8233 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ 8234 (__v8si)(__m256i)(index), \ 8235 (__v8di)(__m512i)(v1), (int)(scale)); }) 8236 8237#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({\ 8238 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ 8239 (__v8si)(__m256i)(index), \ 8240 (__v8di)(__m512i)(v1), (int)(scale)); }) 8241 8242static __inline__ __m128 __DEFAULT_FN_ATTRS 8243_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8244{ 8245 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A, 8246 (__v4sf) __B, 8247 (__v4sf) __W, 8248 (__mmask8) __U, 8249 _MM_FROUND_CUR_DIRECTION); 8250} 8251 8252#define _mm_mask_fmadd_round_ss(W, U, A, B, R) __extension__({\ 8253 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 8254 (__v4sf)(__m128)(B), \ 8255 (__v4sf)(__m128)(W), (__mmask8)(U), \ 8256 (int)(R)); }) 8257 8258static __inline__ __m128 __DEFAULT_FN_ATTRS 8259_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8260{ 8261 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8262 (__v4sf) __B, 8263 (__v4sf) __C, 8264 (__mmask8) __U, 8265 _MM_FROUND_CUR_DIRECTION); 8266} 8267 8268#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8269 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8270 (__v4sf)(__m128)(B), \ 8271 (__v4sf)(__m128)(C), (__mmask8)(U), \ 8272 _MM_FROUND_CUR_DIRECTION); }) 8273 8274static __inline__ __m128 __DEFAULT_FN_ATTRS 8275_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8276{ 8277 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 8278 (__v4sf) __X, 8279 (__v4sf) __Y, 8280 (__mmask8) __U, 8281 _MM_FROUND_CUR_DIRECTION); 8282} 8283 8284#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) __extension__ ({\ 8285 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 8286 (__v4sf)(__m128)(X), \ 8287 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8288 (int)(R)); }) 8289 8290static __inline__ __m128 __DEFAULT_FN_ATTRS 8291_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8292{ 8293 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __A, 8294 -(__v4sf) __B, 8295 (__v4sf) __W, 8296 (__mmask8) __U, 8297 _MM_FROUND_CUR_DIRECTION); 8298} 8299 8300#define _mm_mask_fmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8301 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 8302 -(__v4sf)(__m128)(B), \ 8303 (__v4sf)(__m128)(W), (__mmask8)(U), \ 8304 (int)(R)); }) 8305 8306static __inline__ __m128 __DEFAULT_FN_ATTRS 8307_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8308{ 8309 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __A, 8310 (__v4sf) __B, 8311 -(__v4sf) __C, 8312 (__mmask8) __U, 8313 _MM_FROUND_CUR_DIRECTION); 8314} 8315 8316#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8317 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8318 (__v4sf)(__m128)(B), \ 8319 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8320 (int)(R)); }) 8321 8322static __inline__ __m128 __DEFAULT_FN_ATTRS 8323_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8324{ 8325 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W, 8326 (__v4sf) __X, 8327 -(__v4sf) __Y, 8328 (__mmask8) __U, 8329 _MM_FROUND_CUR_DIRECTION); 8330} 8331 8332#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) __extension__ ({\ 8333 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 8334 (__v4sf)(__m128)(X), \ 8335 -(__v4sf)(__m128)(Y), (__mmask8)(U), \ 8336 (int)(R)); }) 8337 8338static __inline__ __m128 __DEFAULT_FN_ATTRS 8339_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8340{ 8341 return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A, 8342 (__v4sf) __B, 8343 (__v4sf) __W, 8344 (__mmask8) __U, 8345 _MM_FROUND_CUR_DIRECTION); 8346} 8347 8348#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) __extension__ ({\ 8349 (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \ 8350 (__v4sf)(__m128)(B), \ 8351 (__v4sf)(__m128)(W), (__mmask8)(U), \ 8352 (int)(R)); }) 8353 8354static __inline__ __m128 __DEFAULT_FN_ATTRS 8355_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8356{ 8357 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8358 (__v4sf) __B, 8359 (__v4sf) __C, 8360 (__mmask8) __U, 8361 _MM_FROUND_CUR_DIRECTION); 8362} 8363 8364#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) __extension__ ({\ 8365 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8366 (__v4sf)(__m128)(B), \ 8367 (__v4sf)(__m128)(C), (__mmask8)(U), \ 8368 (int)(R)); }) 8369 8370static __inline__ __m128 __DEFAULT_FN_ATTRS 8371_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8372{ 8373 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, 8374 (__v4sf) __X, 8375 (__v4sf) __Y, 8376 (__mmask8) __U, 8377 _MM_FROUND_CUR_DIRECTION); 8378} 8379 8380#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) __extension__({\ 8381 (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ 8382 (__v4sf)(__m128)(X), \ 8383 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8384 (int)(R)); }) 8385 8386static __inline__ __m128 __DEFAULT_FN_ATTRS 8387_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 8388{ 8389 return (__m128) __builtin_ia32_vfmaddss3_mask (-(__v4sf) __A, 8390 -(__v4sf) __B, 8391 (__v4sf) __W, 8392 (__mmask8) __U, 8393 _MM_FROUND_CUR_DIRECTION); 8394} 8395 8396#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) __extension__ ({\ 8397 (__m128)__builtin_ia32_vfmaddss3_mask(-(__v4sf)(__m128)(A), \ 8398 -(__v4sf)(__m128)(B), \ 8399 (__v4sf)(__m128)(W), (__mmask8)(U), \ 8400 (int)(R)); }) 8401 8402static __inline__ __m128 __DEFAULT_FN_ATTRS 8403_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 8404{ 8405 return (__m128) __builtin_ia32_vfmaddss3_maskz (-(__v4sf) __A, 8406 (__v4sf) __B, 8407 -(__v4sf) __C, 8408 (__mmask8) __U, 8409 _MM_FROUND_CUR_DIRECTION); 8410} 8411 8412#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) __extension__ ({\ 8413 (__m128)__builtin_ia32_vfmaddss3_maskz(-(__v4sf)(__m128)(A), \ 8414 (__v4sf)(__m128)(B), \ 8415 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8416 _MM_FROUND_CUR_DIRECTION); }) 8417 8418static __inline__ __m128 __DEFAULT_FN_ATTRS 8419_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) 8420{ 8421 return (__m128) __builtin_ia32_vfmaddss3_mask3 (-(__v4sf) __W, 8422 (__v4sf) __X, 8423 -(__v4sf) __Y, 8424 (__mmask8) __U, 8425 _MM_FROUND_CUR_DIRECTION); 8426} 8427 8428#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) __extension__({\ 8429 (__m128)__builtin_ia32_vfmaddss3_mask3(-(__v4sf)(__m128)(W), \ 8430 (__v4sf)(__m128)(X), \ 8431 -(__v4sf)(__m128)(Y), (__mmask8)(U), \ 8432 (int)(R)); }) 8433 8434static __inline__ __m128d __DEFAULT_FN_ATTRS 8435_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8436{ 8437 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A, 8438 (__v2df) __B, 8439 (__v2df) __W, 8440 (__mmask8) __U, 8441 _MM_FROUND_CUR_DIRECTION); 8442} 8443 8444#define _mm_mask_fmadd_round_sd(W, U, A, B, R) __extension__({\ 8445 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8446 (__v2df)(__m128d)(B), \ 8447 (__v2df)(__m128d)(W), (__mmask8)(U), \ 8448 (int)(R)); }) 8449 8450static __inline__ __m128d __DEFAULT_FN_ATTRS 8451_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8452{ 8453 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8454 (__v2df) __B, 8455 (__v2df) __C, 8456 (__mmask8) __U, 8457 _MM_FROUND_CUR_DIRECTION); 8458} 8459 8460#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8461 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8462 (__v2df)(__m128d)(B), \ 8463 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8464 _MM_FROUND_CUR_DIRECTION); }) 8465 8466static __inline__ __m128d __DEFAULT_FN_ATTRS 8467_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8468{ 8469 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 8470 (__v2df) __X, 8471 (__v2df) __Y, 8472 (__mmask8) __U, 8473 _MM_FROUND_CUR_DIRECTION); 8474} 8475 8476#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) __extension__ ({\ 8477 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8478 (__v2df)(__m128d)(X), \ 8479 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8480 (int)(R)); }) 8481 8482static __inline__ __m128d __DEFAULT_FN_ATTRS 8483_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8484{ 8485 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( (__v2df) __A, 8486 -(__v2df) __B, 8487 (__v2df) __W, 8488 (__mmask8) __U, 8489 _MM_FROUND_CUR_DIRECTION); 8490} 8491 8492#define _mm_mask_fmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8493 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8494 -(__v2df)(__m128d)(B), \ 8495 (__v2df)(__m128d)(W), (__mmask8)(U), \ 8496 (int)(R)); }) 8497 8498static __inline__ __m128d __DEFAULT_FN_ATTRS 8499_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8500{ 8501 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( (__v2df) __A, 8502 (__v2df) __B, 8503 -(__v2df) __C, 8504 (__mmask8) __U, 8505 _MM_FROUND_CUR_DIRECTION); 8506} 8507 8508#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8509 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8510 (__v2df)(__m128d)(B), \ 8511 -(__v2df)(__m128d)(C), \ 8512 (__mmask8)(U), (int)(R)); }) 8513 8514static __inline__ __m128d __DEFAULT_FN_ATTRS 8515_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8516{ 8517 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W, 8518 (__v2df) __X, 8519 -(__v2df) __Y, 8520 (__mmask8) __U, 8521 _MM_FROUND_CUR_DIRECTION); 8522} 8523 8524#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) __extension__ ({\ 8525 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8526 (__v2df)(__m128d)(X), \ 8527 -(__v2df)(__m128d)(Y), \ 8528 (__mmask8)(U), (int)(R)); }) 8529 8530static __inline__ __m128d __DEFAULT_FN_ATTRS 8531_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8532{ 8533 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A, 8534 (__v2df) __B, 8535 (__v2df) __W, 8536 (__mmask8) __U, 8537 _MM_FROUND_CUR_DIRECTION); 8538} 8539 8540#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) __extension__ ({\ 8541 (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \ 8542 (__v2df)(__m128d)(B), \ 8543 (__v2df)(__m128d)(W), (__mmask8)(U), \ 8544 (int)(R)); }) 8545 8546static __inline__ __m128d __DEFAULT_FN_ATTRS 8547_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8548{ 8549 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8550 (__v2df) __B, 8551 (__v2df) __C, 8552 (__mmask8) __U, 8553 _MM_FROUND_CUR_DIRECTION); 8554} 8555 8556#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) __extension__ ({\ 8557 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8558 (__v2df)(__m128d)(B), \ 8559 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8560 (int)(R)); }) 8561 8562static __inline__ __m128d __DEFAULT_FN_ATTRS 8563_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8564{ 8565 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) __W, 8566 (__v2df) __X, 8567 (__v2df) __Y, 8568 (__mmask8) __U, 8569 _MM_FROUND_CUR_DIRECTION); 8570} 8571 8572#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) __extension__({\ 8573 (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ 8574 (__v2df)(__m128d)(X), \ 8575 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8576 (int)(R)); }) 8577 8578static __inline__ __m128d __DEFAULT_FN_ATTRS 8579_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 8580{ 8581 return (__m128d) __builtin_ia32_vfmaddsd3_mask ( -(__v2df) __A, 8582 -(__v2df) __B, 8583 (__v2df) __W, 8584 (__mmask8) __U, 8585 _MM_FROUND_CUR_DIRECTION); 8586} 8587 8588#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) __extension__ ({\ 8589 (__m128d)__builtin_ia32_vfmaddsd3_mask(-(__v2df)(__m128d)(A), \ 8590 -(__v2df)(__m128d)(B), \ 8591 (__v2df)(__m128d)(W), (__mmask8)(U), \ 8592 (int)(R)); }) 8593 8594static __inline__ __m128d __DEFAULT_FN_ATTRS 8595_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 8596{ 8597 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ( -(__v2df) __A, 8598 (__v2df) __B, 8599 -(__v2df) __C, 8600 (__mmask8) __U, 8601 _MM_FROUND_CUR_DIRECTION); 8602} 8603 8604#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) __extension__ ({\ 8605 (__m128d)__builtin_ia32_vfmaddsd3_maskz(-(__v2df)(__m128d)(A), \ 8606 (__v2df)(__m128d)(B), \ 8607 -(__v2df)(__m128d)(C), \ 8608 (__mmask8)(U), \ 8609 _MM_FROUND_CUR_DIRECTION); }) 8610 8611static __inline__ __m128d __DEFAULT_FN_ATTRS 8612_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) 8613{ 8614 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 (-(__v2df) (__W), 8615 (__v2df) __X, 8616 -(__v2df) (__Y), 8617 (__mmask8) __U, 8618 _MM_FROUND_CUR_DIRECTION); 8619} 8620 8621#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) __extension__({\ 8622 (__m128d)__builtin_ia32_vfmaddsd3_mask3(-(__v2df)(__m128d)(W), \ 8623 (__v2df)(__m128d)(X), \ 8624 -(__v2df)(__m128d)(Y), \ 8625 (__mmask8)(U), (int)(R)); }) 8626 8627#define _mm512_permutex_pd(X, M) __extension__ ({ \ 8628 (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ 8629 (__v8df)_mm512_undefined_pd(), \ 8630 (__mmask8)-1); }) 8631 8632#define _mm512_mask_permutex_pd(W, U, X, M) __extension__ ({ \ 8633 (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ 8634 (__v8df)(__m512d)(W), \ 8635 (__mmask8)(U)); }) 8636 8637#define _mm512_maskz_permutex_pd(U, X, M) __extension__ ({ \ 8638 (__m512d)__builtin_ia32_permdf512_mask((__v8df)(__m512d)(X), (int)(M), \ 8639 (__v8df)_mm512_setzero_pd(), \ 8640 (__mmask8)(U)); }) 8641 8642#define _mm512_permutex_epi64(X, I) __extension__ ({ \ 8643 (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ 8644 (__v8di)_mm512_undefined_epi32(), \ 8645 (__mmask8)-1); }) 8646 8647#define _mm512_mask_permutex_epi64(W, M, X, I) __extension__ ({ \ 8648 (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ 8649 (__v8di)(__m512i)(W), \ 8650 (__mmask8)(M)); }) 8651 8652#define _mm512_maskz_permutex_epi64(M, X, I) __extension__ ({ \ 8653 (__m512i)__builtin_ia32_permdi512_mask((__v8di)(__m512i)(X), (int)(I), \ 8654 (__v8di)_mm512_setzero_si512(), \ 8655 (__mmask8)(M)); }) 8656 8657static __inline__ __m512d __DEFAULT_FN_ATTRS 8658_mm512_permutexvar_pd (__m512i __X, __m512d __Y) 8659{ 8660 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8661 (__v8di) __X, 8662 (__v8df) _mm512_undefined_pd (), 8663 (__mmask8) -1); 8664} 8665 8666static __inline__ __m512d __DEFAULT_FN_ATTRS 8667_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 8668{ 8669 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8670 (__v8di) __X, 8671 (__v8df) __W, 8672 (__mmask8) __U); 8673} 8674 8675static __inline__ __m512d __DEFAULT_FN_ATTRS 8676_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 8677{ 8678 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 8679 (__v8di) __X, 8680 (__v8df) _mm512_setzero_pd (), 8681 (__mmask8) __U); 8682} 8683 8684static __inline__ __m512i __DEFAULT_FN_ATTRS 8685_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 8686{ 8687 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8688 (__v8di) __X, 8689 (__v8di) _mm512_setzero_si512 (), 8690 __M); 8691} 8692 8693static __inline__ __m512i __DEFAULT_FN_ATTRS 8694_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 8695{ 8696 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8697 (__v8di) __X, 8698 (__v8di) _mm512_undefined_epi32 (), 8699 (__mmask8) -1); 8700} 8701 8702static __inline__ __m512i __DEFAULT_FN_ATTRS 8703_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 8704 __m512i __Y) 8705{ 8706 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 8707 (__v8di) __X, 8708 (__v8di) __W, 8709 __M); 8710} 8711 8712static __inline__ __m512 __DEFAULT_FN_ATTRS 8713_mm512_permutexvar_ps (__m512i __X, __m512 __Y) 8714{ 8715 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8716 (__v16si) __X, 8717 (__v16sf) _mm512_undefined_ps (), 8718 (__mmask16) -1); 8719} 8720 8721static __inline__ __m512 __DEFAULT_FN_ATTRS 8722_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 8723{ 8724 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8725 (__v16si) __X, 8726 (__v16sf) __W, 8727 (__mmask16) __U); 8728} 8729 8730static __inline__ __m512 __DEFAULT_FN_ATTRS 8731_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 8732{ 8733 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 8734 (__v16si) __X, 8735 (__v16sf) _mm512_setzero_ps (), 8736 (__mmask16) __U); 8737} 8738 8739static __inline__ __m512i __DEFAULT_FN_ATTRS 8740_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 8741{ 8742 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8743 (__v16si) __X, 8744 (__v16si) _mm512_setzero_si512 (), 8745 __M); 8746} 8747 8748static __inline__ __m512i __DEFAULT_FN_ATTRS 8749_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 8750{ 8751 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8752 (__v16si) __X, 8753 (__v16si) _mm512_undefined_epi32 (), 8754 (__mmask16) -1); 8755} 8756 8757static __inline__ __m512i __DEFAULT_FN_ATTRS 8758_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 8759 __m512i __Y) 8760{ 8761 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 8762 (__v16si) __X, 8763 (__v16si) __W, 8764 __M); 8765} 8766 8767static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8768_mm512_kand (__mmask16 __A, __mmask16 __B) 8769{ 8770 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 8771} 8772 8773static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8774_mm512_kandn (__mmask16 __A, __mmask16 __B) 8775{ 8776 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 8777} 8778 8779static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8780_mm512_kor (__mmask16 __A, __mmask16 __B) 8781{ 8782 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 8783} 8784 8785static __inline__ int __DEFAULT_FN_ATTRS 8786_mm512_kortestc (__mmask16 __A, __mmask16 __B) 8787{ 8788 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); 8789} 8790 8791static __inline__ int __DEFAULT_FN_ATTRS 8792_mm512_kortestz (__mmask16 __A, __mmask16 __B) 8793{ 8794 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); 8795} 8796 8797static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8798_mm512_kunpackb (__mmask16 __A, __mmask16 __B) 8799{ 8800 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 8801} 8802 8803static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8804_mm512_kxnor (__mmask16 __A, __mmask16 __B) 8805{ 8806 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 8807} 8808 8809static __inline__ __mmask16 __DEFAULT_FN_ATTRS 8810_mm512_kxor (__mmask16 __A, __mmask16 __B) 8811{ 8812 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 8813} 8814 8815static __inline__ void __DEFAULT_FN_ATTRS 8816_mm512_stream_si512 (__m512i * __P, __m512i __A) 8817{ 8818 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); 8819} 8820 8821static __inline__ __m512i __DEFAULT_FN_ATTRS 8822_mm512_stream_load_si512 (void *__P) 8823{ 8824 return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8825} 8826 8827static __inline__ void __DEFAULT_FN_ATTRS 8828_mm512_stream_pd (double *__P, __m512d __A) 8829{ 8830 __builtin_ia32_movntpd512 (__P, (__v8df) __A); 8831} 8832 8833static __inline__ void __DEFAULT_FN_ATTRS 8834_mm512_stream_ps (float *__P, __m512 __A) 8835{ 8836 __builtin_ia32_movntps512 (__P, (__v16sf) __A); 8837} 8838 8839static __inline__ __m512d __DEFAULT_FN_ATTRS 8840_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 8841{ 8842 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8843 (__v8df) __W, 8844 (__mmask8) __U); 8845} 8846 8847static __inline__ __m512d __DEFAULT_FN_ATTRS 8848_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 8849{ 8850 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 8851 (__v8df) 8852 _mm512_setzero_pd (), 8853 (__mmask8) __U); 8854} 8855 8856static __inline__ __m512i __DEFAULT_FN_ATTRS 8857_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 8858{ 8859 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8860 (__v8di) __W, 8861 (__mmask8) __U); 8862} 8863 8864static __inline__ __m512i __DEFAULT_FN_ATTRS 8865_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 8866{ 8867 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 8868 (__v8di) 8869 _mm512_setzero_si512 (), 8870 (__mmask8) __U); 8871} 8872 8873static __inline__ __m512 __DEFAULT_FN_ATTRS 8874_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 8875{ 8876 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8877 (__v16sf) __W, 8878 (__mmask16) __U); 8879} 8880 8881static __inline__ __m512 __DEFAULT_FN_ATTRS 8882_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 8883{ 8884 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 8885 (__v16sf) 8886 _mm512_setzero_ps (), 8887 (__mmask16) __U); 8888} 8889 8890static __inline__ __m512i __DEFAULT_FN_ATTRS 8891_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 8892{ 8893 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 8894 (__v16si) __W, 8895 (__mmask16) __U); 8896} 8897 8898static __inline__ __m512i __DEFAULT_FN_ATTRS 8899_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 8900{ 8901 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 8902 (__v16si) 8903 _mm512_setzero_si512 (), 8904 (__mmask16) __U); 8905} 8906 8907#define _mm_cmp_round_ss_mask(X, Y, P, R) __extension__ ({ \ 8908 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8909 (__v4sf)(__m128)(Y), (int)(P), \ 8910 (__mmask8)-1, (int)(R)); }) 8911 8912#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) __extension__ ({ \ 8913 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8914 (__v4sf)(__m128)(Y), (int)(P), \ 8915 (__mmask8)(M), (int)(R)); }) 8916 8917#define _mm_cmp_ss_mask(X, Y, P) __extension__ ({ \ 8918 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8919 (__v4sf)(__m128)(Y), (int)(P), \ 8920 (__mmask8)-1, \ 8921 _MM_FROUND_CUR_DIRECTION); }) 8922 8923#define _mm_mask_cmp_ss_mask(M, X, Y, P) __extension__ ({ \ 8924 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8925 (__v4sf)(__m128)(Y), (int)(P), \ 8926 (__mmask8)(M), \ 8927 _MM_FROUND_CUR_DIRECTION); }) 8928 8929#define _mm_cmp_round_sd_mask(X, Y, P, R) __extension__ ({ \ 8930 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8931 (__v2df)(__m128d)(Y), (int)(P), \ 8932 (__mmask8)-1, (int)(R)); }) 8933 8934#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) __extension__ ({ \ 8935 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8936 (__v2df)(__m128d)(Y), (int)(P), \ 8937 (__mmask8)(M), (int)(R)); }) 8938 8939#define _mm_cmp_sd_mask(X, Y, P) __extension__ ({ \ 8940 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8941 (__v2df)(__m128d)(Y), (int)(P), \ 8942 (__mmask8)-1, \ 8943 _MM_FROUND_CUR_DIRECTION); }) 8944 8945#define _mm_mask_cmp_sd_mask(M, X, Y, P) __extension__ ({ \ 8946 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8947 (__v2df)(__m128d)(Y), (int)(P), \ 8948 (__mmask8)(M), \ 8949 _MM_FROUND_CUR_DIRECTION); }) 8950 8951static __inline__ __m512 __DEFAULT_FN_ATTRS 8952_mm512_movehdup_ps (__m512 __A) 8953{ 8954 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 8955 (__v16sf) 8956 _mm512_undefined_ps (), 8957 (__mmask16) -1); 8958} 8959 8960static __inline__ __m512 __DEFAULT_FN_ATTRS 8961_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 8962{ 8963 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 8964 (__v16sf) __W, 8965 (__mmask16) __U); 8966} 8967 8968static __inline__ __m512 __DEFAULT_FN_ATTRS 8969_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 8970{ 8971 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 8972 (__v16sf) 8973 _mm512_setzero_ps (), 8974 (__mmask16) __U); 8975} 8976 8977static __inline__ __m512 __DEFAULT_FN_ATTRS 8978_mm512_moveldup_ps (__m512 __A) 8979{ 8980 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 8981 (__v16sf) 8982 _mm512_undefined_ps (), 8983 (__mmask16) -1); 8984} 8985 8986static __inline__ __m512 __DEFAULT_FN_ATTRS 8987_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 8988{ 8989 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 8990 (__v16sf) __W, 8991 (__mmask16) __U); 8992} 8993 8994static __inline__ __m512 __DEFAULT_FN_ATTRS 8995_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 8996{ 8997 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 8998 (__v16sf) 8999 _mm512_setzero_ps (), 9000 (__mmask16) __U); 9001} 9002 9003#define _mm512_shuffle_epi32(A, I) __extension__ ({ \ 9004 (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ 9005 (__v16si)_mm512_undefined_epi32(), \ 9006 (__mmask16)-1); }) 9007 9008#define _mm512_mask_shuffle_epi32(W, U, A, I) __extension__ ({ \ 9009 (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ 9010 (__v16si)(__m512i)(W), \ 9011 (__mmask16)(U)); }) 9012 9013#define _mm512_maskz_shuffle_epi32(U, A, I) __extension__ ({ \ 9014 (__m512i)__builtin_ia32_pshufd512_mask((__v16si)(__m512i)(A), (int)(I), \ 9015 (__v16si)_mm512_setzero_si512(), \ 9016 (__mmask16)(U)); }) 9017 9018static __inline__ __m512d __DEFAULT_FN_ATTRS 9019_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 9020{ 9021 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9022 (__v8df) __W, 9023 (__mmask8) __U); 9024} 9025 9026static __inline__ __m512d __DEFAULT_FN_ATTRS 9027_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 9028{ 9029 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9030 (__v8df) _mm512_setzero_pd (), 9031 (__mmask8) __U); 9032} 9033 9034static __inline__ __m512i __DEFAULT_FN_ATTRS 9035_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9036{ 9037 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9038 (__v8di) __W, 9039 (__mmask8) __U); 9040} 9041 9042static __inline__ __m512i __DEFAULT_FN_ATTRS 9043_mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) 9044{ 9045 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9046 (__v8di) _mm512_setzero_pd (), 9047 (__mmask8) __U); 9048} 9049 9050static __inline__ __m512d __DEFAULT_FN_ATTRS 9051_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) 9052{ 9053 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9054 (__v8df) __W, 9055 (__mmask8) __U); 9056} 9057 9058static __inline__ __m512d __DEFAULT_FN_ATTRS 9059_mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) 9060{ 9061 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, 9062 (__v8df) _mm512_setzero_pd(), 9063 (__mmask8) __U); 9064} 9065 9066static __inline__ __m512i __DEFAULT_FN_ATTRS 9067_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) 9068{ 9069 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9070 (__v8di) __W, 9071 (__mmask8) __U); 9072} 9073 9074static __inline__ __m512i __DEFAULT_FN_ATTRS 9075_mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) 9076{ 9077 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, 9078 (__v8di) _mm512_setzero_pd(), 9079 (__mmask8) __U); 9080} 9081 9082static __inline__ __m512 __DEFAULT_FN_ATTRS 9083_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) 9084{ 9085 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9086 (__v16sf) __W, 9087 (__mmask16) __U); 9088} 9089 9090static __inline__ __m512 __DEFAULT_FN_ATTRS 9091_mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) 9092{ 9093 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, 9094 (__v16sf) _mm512_setzero_ps(), 9095 (__mmask16) __U); 9096} 9097 9098static __inline__ __m512i __DEFAULT_FN_ATTRS 9099_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) 9100{ 9101 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9102 (__v16si) __W, 9103 (__mmask16) __U); 9104} 9105 9106static __inline__ __m512i __DEFAULT_FN_ATTRS 9107_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) 9108{ 9109 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, 9110 (__v16si) _mm512_setzero_ps(), 9111 (__mmask16) __U); 9112} 9113 9114static __inline__ __m512 __DEFAULT_FN_ATTRS 9115_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 9116{ 9117 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9118 (__v16sf) __W, 9119 (__mmask16) __U); 9120} 9121 9122static __inline__ __m512 __DEFAULT_FN_ATTRS 9123_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 9124{ 9125 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9126 (__v16sf) _mm512_setzero_ps(), 9127 (__mmask16) __U); 9128} 9129 9130static __inline__ __m512i __DEFAULT_FN_ATTRS 9131_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9132{ 9133 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9134 (__v16si) __W, 9135 (__mmask16) __U); 9136} 9137 9138static __inline__ __m512i __DEFAULT_FN_ATTRS 9139_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 9140{ 9141 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9142 (__v16si) _mm512_setzero_ps(), 9143 (__mmask16) __U); 9144} 9145 9146#define _mm512_cvt_roundps_pd(A, R) __extension__ ({ \ 9147 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9148 (__v8df)_mm512_undefined_pd(), \ 9149 (__mmask8)-1, (int)(R)); }) 9150 9151#define _mm512_mask_cvt_roundps_pd(W, U, A, R) __extension__ ({ \ 9152 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9153 (__v8df)(__m512d)(W), \ 9154 (__mmask8)(U), (int)(R)); }) 9155 9156#define _mm512_maskz_cvt_roundps_pd(U, A, R) __extension__ ({ \ 9157 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 9158 (__v8df)_mm512_setzero_pd(), \ 9159 (__mmask8)(U), (int)(R)); }) 9160 9161static __inline__ __m512d __DEFAULT_FN_ATTRS 9162_mm512_cvtps_pd (__m256 __A) 9163{ 9164 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9165 (__v8df) 9166 _mm512_undefined_pd (), 9167 (__mmask8) -1, 9168 _MM_FROUND_CUR_DIRECTION); 9169} 9170 9171static __inline__ __m512d __DEFAULT_FN_ATTRS 9172_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 9173{ 9174 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9175 (__v8df) __W, 9176 (__mmask8) __U, 9177 _MM_FROUND_CUR_DIRECTION); 9178} 9179 9180static __inline__ __m512d __DEFAULT_FN_ATTRS 9181_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 9182{ 9183 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 9184 (__v8df) 9185 _mm512_setzero_pd (), 9186 (__mmask8) __U, 9187 _MM_FROUND_CUR_DIRECTION); 9188} 9189 9190static __inline__ __m512d __DEFAULT_FN_ATTRS 9191_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 9192{ 9193 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 9194 (__v8df) __W, 9195 (__mmask8) __U); 9196} 9197 9198static __inline__ __m512d __DEFAULT_FN_ATTRS 9199_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 9200{ 9201 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 9202 (__v8df) 9203 _mm512_setzero_pd (), 9204 (__mmask8) __U); 9205} 9206 9207static __inline__ __m512 __DEFAULT_FN_ATTRS 9208_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 9209{ 9210 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 9211 (__v16sf) __W, 9212 (__mmask16) __U); 9213} 9214 9215static __inline__ __m512 __DEFAULT_FN_ATTRS 9216_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 9217{ 9218 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 9219 (__v16sf) 9220 _mm512_setzero_ps (), 9221 (__mmask16) __U); 9222} 9223 9224static __inline__ void __DEFAULT_FN_ATTRS 9225_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 9226{ 9227 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 9228 (__mmask8) __U); 9229} 9230 9231static __inline__ void __DEFAULT_FN_ATTRS 9232_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 9233{ 9234 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 9235 (__mmask8) __U); 9236} 9237 9238static __inline__ void __DEFAULT_FN_ATTRS 9239_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 9240{ 9241 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 9242 (__mmask16) __U); 9243} 9244 9245static __inline__ void __DEFAULT_FN_ATTRS 9246_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9247{ 9248 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9249 (__mmask16) __U); 9250} 9251 9252#define _mm_cvt_roundsd_ss(A, B, R) __extension__ ({ \ 9253 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9254 (__v2df)(__m128d)(B), \ 9255 (__v4sf)_mm_undefined_ps(), \ 9256 (__mmask8)-1, (int)(R)); }) 9257 9258#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) __extension__ ({ \ 9259 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9260 (__v2df)(__m128d)(B), \ 9261 (__v4sf)(__m128)(W), \ 9262 (__mmask8)(U), (int)(R)); }) 9263 9264#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) __extension__ ({ \ 9265 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9266 (__v2df)(__m128d)(B), \ 9267 (__v4sf)_mm_setzero_ps(), \ 9268 (__mmask8)(U), (int)(R)); }) 9269 9270#define _mm_cvt_roundi64_sd(A, B, R) __extension__ ({ \ 9271 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9272 (int)(R)); }) 9273 9274#define _mm_cvt_roundsi64_sd(A, B, R) __extension__ ({ \ 9275 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9276 (int)(R)); }) 9277 9278#define _mm_cvt_roundsi32_ss(A, B, R) __extension__ ({ \ 9279 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9280 9281#define _mm_cvt_roundi32_ss(A, B, R) __extension__ ({ \ 9282 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)); }) 9283 9284#define _mm_cvt_roundsi64_ss(A, B, R) __extension__ ({ \ 9285 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9286 (int)(R)); }) 9287 9288#define _mm_cvt_roundi64_ss(A, B, R) __extension__ ({ \ 9289 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9290 (int)(R)); }) 9291 9292#define _mm_cvt_roundss_sd(A, B, R) __extension__ ({ \ 9293 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9294 (__v4sf)(__m128)(B), \ 9295 (__v2df)_mm_undefined_pd(), \ 9296 (__mmask8)-1, (int)(R)); }) 9297 9298#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) __extension__ ({ \ 9299 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9300 (__v4sf)(__m128)(B), \ 9301 (__v2df)(__m128d)(W), \ 9302 (__mmask8)(U), (int)(R)); }) 9303 9304#define _mm_maskz_cvt_roundss_sd(U, A, B, R) __extension__ ({ \ 9305 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9306 (__v4sf)(__m128)(B), \ 9307 (__v2df)_mm_setzero_pd(), \ 9308 (__mmask8)(U), (int)(R)); }) 9309 9310static __inline__ __m128d __DEFAULT_FN_ATTRS 9311_mm_cvtu32_sd (__m128d __A, unsigned __B) 9312{ 9313 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 9314} 9315 9316#define _mm_cvt_roundu64_sd(A, B, R) __extension__ ({ \ 9317 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9318 (unsigned long long)(B), (int)(R)); }) 9319 9320static __inline__ __m128d __DEFAULT_FN_ATTRS 9321_mm_cvtu64_sd (__m128d __A, unsigned long long __B) 9322{ 9323 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 9324 _MM_FROUND_CUR_DIRECTION); 9325} 9326 9327#define _mm_cvt_roundu32_ss(A, B, R) __extension__ ({ \ 9328 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9329 (int)(R)); }) 9330 9331static __inline__ __m128 __DEFAULT_FN_ATTRS 9332_mm_cvtu32_ss (__m128 __A, unsigned __B) 9333{ 9334 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 9335 _MM_FROUND_CUR_DIRECTION); 9336} 9337 9338#define _mm_cvt_roundu64_ss(A, B, R) __extension__ ({ \ 9339 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9340 (unsigned long long)(B), (int)(R)); }) 9341 9342static __inline__ __m128 __DEFAULT_FN_ATTRS 9343_mm_cvtu64_ss (__m128 __A, unsigned long long __B) 9344{ 9345 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 9346 _MM_FROUND_CUR_DIRECTION); 9347} 9348 9349static __inline__ __m512i __DEFAULT_FN_ATTRS 9350_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 9351{ 9352 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 9353 __M); 9354} 9355 9356static __inline__ __m512i __DEFAULT_FN_ATTRS 9357_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 9358{ 9359 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 9360 __M); 9361} 9362 9363static __inline __m512i __DEFAULT_FN_ATTRS 9364_mm512_set_epi32 (int __A, int __B, int __C, int __D, 9365 int __E, int __F, int __G, int __H, 9366 int __I, int __J, int __K, int __L, 9367 int __M, int __N, int __O, int __P) 9368{ 9369 return __extension__ (__m512i)(__v16si) 9370 { __P, __O, __N, __M, __L, __K, __J, __I, 9371 __H, __G, __F, __E, __D, __C, __B, __A }; 9372} 9373 9374#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9375 e8,e9,e10,e11,e12,e13,e14,e15) \ 9376 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 9377 9378static __inline__ __m512i __DEFAULT_FN_ATTRS 9379_mm512_set_epi64 (long long __A, long long __B, long long __C, 9380 long long __D, long long __E, long long __F, 9381 long long __G, long long __H) 9382{ 9383 return __extension__ (__m512i) (__v8di) 9384 { __H, __G, __F, __E, __D, __C, __B, __A }; 9385} 9386 9387#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9388 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) 9389 9390static __inline__ __m512d __DEFAULT_FN_ATTRS 9391_mm512_set_pd (double __A, double __B, double __C, double __D, 9392 double __E, double __F, double __G, double __H) 9393{ 9394 return __extension__ (__m512d) 9395 { __H, __G, __F, __E, __D, __C, __B, __A }; 9396} 9397 9398#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9399 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) 9400 9401static __inline__ __m512 __DEFAULT_FN_ATTRS 9402_mm512_set_ps (float __A, float __B, float __C, float __D, 9403 float __E, float __F, float __G, float __H, 9404 float __I, float __J, float __K, float __L, 9405 float __M, float __N, float __O, float __P) 9406{ 9407 return __extension__ (__m512) 9408 { __P, __O, __N, __M, __L, __K, __J, __I, 9409 __H, __G, __F, __E, __D, __C, __B, __A }; 9410} 9411 9412#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9413 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 9414 9415#undef __DEFAULT_FN_ATTRS 9416 9417#endif // __AVX512FINTRIN_H 9418