1/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512DQINTRIN_H 29#define __AVX512DQINTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) 33 34static __inline__ __m512i __DEFAULT_FN_ATTRS 35_mm512_mullo_epi64 (__m512i __A, __m512i __B) { 36 return (__m512i) ((__v8du) __A * (__v8du) __B); 37} 38 39static __inline__ __m512i __DEFAULT_FN_ATTRS 40_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 41 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 42 (__v8di)_mm512_mullo_epi64(__A, __B), 43 (__v8di)__W); 44} 45 46static __inline__ __m512i __DEFAULT_FN_ATTRS 47_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { 48 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 49 (__v8di)_mm512_mullo_epi64(__A, __B), 50 (__v8di)_mm512_setzero_si512()); 51} 52 53static __inline__ __m512d __DEFAULT_FN_ATTRS 54_mm512_xor_pd(__m512d __A, __m512d __B) { 55 return (__m512d)((__v8du)__A ^ (__v8du)__B); 56} 57 58static __inline__ __m512d __DEFAULT_FN_ATTRS 59_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 60 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 61 (__v8df)_mm512_xor_pd(__A, __B), 62 (__v8df)__W); 63} 64 65static __inline__ __m512d __DEFAULT_FN_ATTRS 66_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { 67 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 68 (__v8df)_mm512_xor_pd(__A, __B), 69 (__v8df)_mm512_setzero_pd()); 70} 71 72static __inline__ __m512 __DEFAULT_FN_ATTRS 73_mm512_xor_ps (__m512 __A, __m512 __B) { 74 return (__m512)((__v16su)__A ^ (__v16su)__B); 75} 76 77static __inline__ __m512 __DEFAULT_FN_ATTRS 78_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 79 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 80 (__v16sf)_mm512_xor_ps(__A, __B), 81 (__v16sf)__W); 82} 83 84static __inline__ __m512 __DEFAULT_FN_ATTRS 85_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { 86 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 87 (__v16sf)_mm512_xor_ps(__A, __B), 88 (__v16sf)_mm512_setzero_ps()); 89} 90 91static __inline__ __m512d __DEFAULT_FN_ATTRS 92_mm512_or_pd(__m512d __A, __m512d __B) { 93 return (__m512d)((__v8du)__A | (__v8du)__B); 94} 95 96static __inline__ __m512d __DEFAULT_FN_ATTRS 97_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 98 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 99 (__v8df)_mm512_or_pd(__A, __B), 100 (__v8df)__W); 101} 102 103static __inline__ __m512d __DEFAULT_FN_ATTRS 104_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { 105 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 106 (__v8df)_mm512_or_pd(__A, __B), 107 (__v8df)_mm512_setzero_pd()); 108} 109 110static __inline__ __m512 __DEFAULT_FN_ATTRS 111_mm512_or_ps(__m512 __A, __m512 __B) { 112 return (__m512)((__v16su)__A | (__v16su)__B); 113} 114 115static __inline__ __m512 __DEFAULT_FN_ATTRS 116_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 117 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 118 (__v16sf)_mm512_or_ps(__A, __B), 119 (__v16sf)__W); 120} 121 122static __inline__ __m512 __DEFAULT_FN_ATTRS 123_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { 124 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 125 (__v16sf)_mm512_or_ps(__A, __B), 126 (__v16sf)_mm512_setzero_ps()); 127} 128 129static __inline__ __m512d __DEFAULT_FN_ATTRS 130_mm512_and_pd(__m512d __A, __m512d __B) { 131 return (__m512d)((__v8du)__A & (__v8du)__B); 132} 133 134static __inline__ __m512d __DEFAULT_FN_ATTRS 135_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 137 (__v8df)_mm512_and_pd(__A, __B), 138 (__v8df)__W); 139} 140 141static __inline__ __m512d __DEFAULT_FN_ATTRS 142_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { 143 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 144 (__v8df)_mm512_and_pd(__A, __B), 145 (__v8df)_mm512_setzero_pd()); 146} 147 148static __inline__ __m512 __DEFAULT_FN_ATTRS 149_mm512_and_ps(__m512 __A, __m512 __B) { 150 return (__m512)((__v16su)__A & (__v16su)__B); 151} 152 153static __inline__ __m512 __DEFAULT_FN_ATTRS 154_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 155 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 156 (__v16sf)_mm512_and_ps(__A, __B), 157 (__v16sf)__W); 158} 159 160static __inline__ __m512 __DEFAULT_FN_ATTRS 161_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { 162 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 163 (__v16sf)_mm512_and_ps(__A, __B), 164 (__v16sf)_mm512_setzero_ps()); 165} 166 167static __inline__ __m512d __DEFAULT_FN_ATTRS 168_mm512_andnot_pd(__m512d __A, __m512d __B) { 169 return (__m512d)(~(__v8du)__A & (__v8du)__B); 170} 171 172static __inline__ __m512d __DEFAULT_FN_ATTRS 173_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 174 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 175 (__v8df)_mm512_andnot_pd(__A, __B), 176 (__v8df)__W); 177} 178 179static __inline__ __m512d __DEFAULT_FN_ATTRS 180_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { 181 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 182 (__v8df)_mm512_andnot_pd(__A, __B), 183 (__v8df)_mm512_setzero_pd()); 184} 185 186static __inline__ __m512 __DEFAULT_FN_ATTRS 187_mm512_andnot_ps(__m512 __A, __m512 __B) { 188 return (__m512)(~(__v16su)__A & (__v16su)__B); 189} 190 191static __inline__ __m512 __DEFAULT_FN_ATTRS 192_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 193 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 194 (__v16sf)_mm512_andnot_ps(__A, __B), 195 (__v16sf)__W); 196} 197 198static __inline__ __m512 __DEFAULT_FN_ATTRS 199_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { 200 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 201 (__v16sf)_mm512_andnot_ps(__A, __B), 202 (__v16sf)_mm512_setzero_ps()); 203} 204 205static __inline__ __m512i __DEFAULT_FN_ATTRS 206_mm512_cvtpd_epi64 (__m512d __A) { 207 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 208 (__v8di) _mm512_setzero_si512(), 209 (__mmask8) -1, 210 _MM_FROUND_CUR_DIRECTION); 211} 212 213static __inline__ __m512i __DEFAULT_FN_ATTRS 214_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 215 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 216 (__v8di) __W, 217 (__mmask8) __U, 218 _MM_FROUND_CUR_DIRECTION); 219} 220 221static __inline__ __m512i __DEFAULT_FN_ATTRS 222_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { 223 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 224 (__v8di) _mm512_setzero_si512(), 225 (__mmask8) __U, 226 _MM_FROUND_CUR_DIRECTION); 227} 228 229#define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \ 230 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 231 (__v8di)_mm512_setzero_si512(), \ 232 (__mmask8)-1, (int)(R)); }) 233 234#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 235 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 236 (__v8di)(__m512i)(W), \ 237 (__mmask8)(U), (int)(R)); }) 238 239#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \ 240 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 241 (__v8di)_mm512_setzero_si512(), \ 242 (__mmask8)(U), (int)(R)); }) 243 244static __inline__ __m512i __DEFAULT_FN_ATTRS 245_mm512_cvtpd_epu64 (__m512d __A) { 246 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 247 (__v8di) _mm512_setzero_si512(), 248 (__mmask8) -1, 249 _MM_FROUND_CUR_DIRECTION); 250} 251 252static __inline__ __m512i __DEFAULT_FN_ATTRS 253_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 254 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 255 (__v8di) __W, 256 (__mmask8) __U, 257 _MM_FROUND_CUR_DIRECTION); 258} 259 260static __inline__ __m512i __DEFAULT_FN_ATTRS 261_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { 262 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 263 (__v8di) _mm512_setzero_si512(), 264 (__mmask8) __U, 265 _MM_FROUND_CUR_DIRECTION); 266} 267 268#define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \ 269 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 270 (__v8di)_mm512_setzero_si512(), \ 271 (__mmask8)-1, (int)(R)); }) 272 273#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 274 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 275 (__v8di)(__m512i)(W), \ 276 (__mmask8)(U), (int)(R)); }) 277 278#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \ 279 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 280 (__v8di)_mm512_setzero_si512(), \ 281 (__mmask8)(U), (int)(R)); }) 282 283static __inline__ __m512i __DEFAULT_FN_ATTRS 284_mm512_cvtps_epi64 (__m256 __A) { 285 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 286 (__v8di) _mm512_setzero_si512(), 287 (__mmask8) -1, 288 _MM_FROUND_CUR_DIRECTION); 289} 290 291static __inline__ __m512i __DEFAULT_FN_ATTRS 292_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 293 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 294 (__v8di) __W, 295 (__mmask8) __U, 296 _MM_FROUND_CUR_DIRECTION); 297} 298 299static __inline__ __m512i __DEFAULT_FN_ATTRS 300_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { 301 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 302 (__v8di) _mm512_setzero_si512(), 303 (__mmask8) __U, 304 _MM_FROUND_CUR_DIRECTION); 305} 306 307#define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \ 308 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 309 (__v8di)_mm512_setzero_si512(), \ 310 (__mmask8)-1, (int)(R)); }) 311 312#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \ 313 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 314 (__v8di)(__m512i)(W), \ 315 (__mmask8)(U), (int)(R)); }) 316 317#define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \ 318 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 319 (__v8di)_mm512_setzero_si512(), \ 320 (__mmask8)(U), (int)(R)); }) 321 322static __inline__ __m512i __DEFAULT_FN_ATTRS 323_mm512_cvtps_epu64 (__m256 __A) { 324 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 325 (__v8di) _mm512_setzero_si512(), 326 (__mmask8) -1, 327 _MM_FROUND_CUR_DIRECTION); 328} 329 330static __inline__ __m512i __DEFAULT_FN_ATTRS 331_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 332 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 333 (__v8di) __W, 334 (__mmask8) __U, 335 _MM_FROUND_CUR_DIRECTION); 336} 337 338static __inline__ __m512i __DEFAULT_FN_ATTRS 339_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { 340 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 341 (__v8di) _mm512_setzero_si512(), 342 (__mmask8) __U, 343 _MM_FROUND_CUR_DIRECTION); 344} 345 346#define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \ 347 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 348 (__v8di)_mm512_setzero_si512(), \ 349 (__mmask8)-1, (int)(R)); }) 350 351#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \ 352 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 353 (__v8di)(__m512i)(W), \ 354 (__mmask8)(U), (int)(R)); }) 355 356#define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \ 357 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 358 (__v8di)_mm512_setzero_si512(), \ 359 (__mmask8)(U), (int)(R)); }) 360 361 362static __inline__ __m512d __DEFAULT_FN_ATTRS 363_mm512_cvtepi64_pd (__m512i __A) { 364 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 365 (__v8df) _mm512_setzero_pd(), 366 (__mmask8) -1, 367 _MM_FROUND_CUR_DIRECTION); 368} 369 370static __inline__ __m512d __DEFAULT_FN_ATTRS 371_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 372 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 373 (__v8df) __W, 374 (__mmask8) __U, 375 _MM_FROUND_CUR_DIRECTION); 376} 377 378static __inline__ __m512d __DEFAULT_FN_ATTRS 379_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { 380 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A, 381 (__v8df) _mm512_setzero_pd(), 382 (__mmask8) __U, 383 _MM_FROUND_CUR_DIRECTION); 384} 385 386#define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \ 387 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 388 (__v8df)_mm512_setzero_pd(), \ 389 (__mmask8)-1, (int)(R)); }) 390 391#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \ 392 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 393 (__v8df)(__m512d)(W), \ 394 (__mmask8)(U), (int)(R)); }) 395 396#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \ 397 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 398 (__v8df)_mm512_setzero_pd(), \ 399 (__mmask8)(U), (int)(R)); }) 400 401static __inline__ __m256 __DEFAULT_FN_ATTRS 402_mm512_cvtepi64_ps (__m512i __A) { 403 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 404 (__v8sf) _mm256_setzero_ps(), 405 (__mmask8) -1, 406 _MM_FROUND_CUR_DIRECTION); 407} 408 409static __inline__ __m256 __DEFAULT_FN_ATTRS 410_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 411 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 412 (__v8sf) __W, 413 (__mmask8) __U, 414 _MM_FROUND_CUR_DIRECTION); 415} 416 417static __inline__ __m256 __DEFAULT_FN_ATTRS 418_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { 419 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 420 (__v8sf) _mm256_setzero_ps(), 421 (__mmask8) __U, 422 _MM_FROUND_CUR_DIRECTION); 423} 424 425#define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \ 426 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 427 (__v8sf)_mm256_setzero_ps(), \ 428 (__mmask8)-1, (int)(R)); }) 429 430#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \ 431 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 432 (__v8sf)(__m256)(W), (__mmask8)(U), \ 433 (int)(R)); }) 434 435#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \ 436 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 437 (__v8sf)_mm256_setzero_ps(), \ 438 (__mmask8)(U), (int)(R)); }) 439 440 441static __inline__ __m512i __DEFAULT_FN_ATTRS 442_mm512_cvttpd_epi64 (__m512d __A) { 443 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 444 (__v8di) _mm512_setzero_si512(), 445 (__mmask8) -1, 446 _MM_FROUND_CUR_DIRECTION); 447} 448 449static __inline__ __m512i __DEFAULT_FN_ATTRS 450_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 451 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 452 (__v8di) __W, 453 (__mmask8) __U, 454 _MM_FROUND_CUR_DIRECTION); 455} 456 457static __inline__ __m512i __DEFAULT_FN_ATTRS 458_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { 459 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 460 (__v8di) _mm512_setzero_si512(), 461 (__mmask8) __U, 462 _MM_FROUND_CUR_DIRECTION); 463} 464 465#define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \ 466 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 467 (__v8di)_mm512_setzero_si512(), \ 468 (__mmask8)-1, (int)(R)); }) 469 470#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \ 471 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 472 (__v8di)(__m512i)(W), \ 473 (__mmask8)(U), (int)(R)); }) 474 475#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \ 476 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 477 (__v8di)_mm512_setzero_si512(), \ 478 (__mmask8)(U), (int)(R)); }) 479 480static __inline__ __m512i __DEFAULT_FN_ATTRS 481_mm512_cvttpd_epu64 (__m512d __A) { 482 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 483 (__v8di) _mm512_setzero_si512(), 484 (__mmask8) -1, 485 _MM_FROUND_CUR_DIRECTION); 486} 487 488static __inline__ __m512i __DEFAULT_FN_ATTRS 489_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 490 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 491 (__v8di) __W, 492 (__mmask8) __U, 493 _MM_FROUND_CUR_DIRECTION); 494} 495 496static __inline__ __m512i __DEFAULT_FN_ATTRS 497_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { 498 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 499 (__v8di) _mm512_setzero_si512(), 500 (__mmask8) __U, 501 _MM_FROUND_CUR_DIRECTION); 502} 503 504#define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \ 505 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 506 (__v8di)_mm512_setzero_si512(), \ 507 (__mmask8)-1, (int)(R)); }) 508 509#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \ 510 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 511 (__v8di)(__m512i)(W), \ 512 (__mmask8)(U), (int)(R)); }) 513 514#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \ 515 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 516 (__v8di)_mm512_setzero_si512(), \ 517 (__mmask8)(U), (int)(R)); }) 518 519static __inline__ __m512i __DEFAULT_FN_ATTRS 520_mm512_cvttps_epi64 (__m256 __A) { 521 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 522 (__v8di) _mm512_setzero_si512(), 523 (__mmask8) -1, 524 _MM_FROUND_CUR_DIRECTION); 525} 526 527static __inline__ __m512i __DEFAULT_FN_ATTRS 528_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 529 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 530 (__v8di) __W, 531 (__mmask8) __U, 532 _MM_FROUND_CUR_DIRECTION); 533} 534 535static __inline__ __m512i __DEFAULT_FN_ATTRS 536_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { 537 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 538 (__v8di) _mm512_setzero_si512(), 539 (__mmask8) __U, 540 _MM_FROUND_CUR_DIRECTION); 541} 542 543#define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \ 544 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 545 (__v8di)_mm512_setzero_si512(), \ 546 (__mmask8)-1, (int)(R)); }) 547 548#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \ 549 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 550 (__v8di)(__m512i)(W), \ 551 (__mmask8)(U), (int)(R)); }) 552 553#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \ 554 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 555 (__v8di)_mm512_setzero_si512(), \ 556 (__mmask8)(U), (int)(R)); }) 557 558static __inline__ __m512i __DEFAULT_FN_ATTRS 559_mm512_cvttps_epu64 (__m256 __A) { 560 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 561 (__v8di) _mm512_setzero_si512(), 562 (__mmask8) -1, 563 _MM_FROUND_CUR_DIRECTION); 564} 565 566static __inline__ __m512i __DEFAULT_FN_ATTRS 567_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 568 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 569 (__v8di) __W, 570 (__mmask8) __U, 571 _MM_FROUND_CUR_DIRECTION); 572} 573 574static __inline__ __m512i __DEFAULT_FN_ATTRS 575_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { 576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 577 (__v8di) _mm512_setzero_si512(), 578 (__mmask8) __U, 579 _MM_FROUND_CUR_DIRECTION); 580} 581 582#define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \ 583 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 584 (__v8di)_mm512_setzero_si512(), \ 585 (__mmask8)-1, (int)(R)); }) 586 587#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \ 588 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 589 (__v8di)(__m512i)(W), \ 590 (__mmask8)(U), (int)(R)); }) 591 592#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \ 593 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 594 (__v8di)_mm512_setzero_si512(), \ 595 (__mmask8)(U), (int)(R)); }) 596 597static __inline__ __m512d __DEFAULT_FN_ATTRS 598_mm512_cvtepu64_pd (__m512i __A) { 599 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 600 (__v8df) _mm512_setzero_pd(), 601 (__mmask8) -1, 602 _MM_FROUND_CUR_DIRECTION); 603} 604 605static __inline__ __m512d __DEFAULT_FN_ATTRS 606_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 607 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 608 (__v8df) __W, 609 (__mmask8) __U, 610 _MM_FROUND_CUR_DIRECTION); 611} 612 613static __inline__ __m512d __DEFAULT_FN_ATTRS 614_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { 615 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A, 616 (__v8df) _mm512_setzero_pd(), 617 (__mmask8) __U, 618 _MM_FROUND_CUR_DIRECTION); 619} 620 621#define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \ 622 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 623 (__v8df)_mm512_setzero_pd(), \ 624 (__mmask8)-1, (int)(R)); }) 625 626#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \ 627 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 628 (__v8df)(__m512d)(W), \ 629 (__mmask8)(U), (int)(R)); }) 630 631 632#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \ 633 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 634 (__v8df)_mm512_setzero_pd(), \ 635 (__mmask8)(U), (int)(R)); }) 636 637 638static __inline__ __m256 __DEFAULT_FN_ATTRS 639_mm512_cvtepu64_ps (__m512i __A) { 640 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 641 (__v8sf) _mm256_setzero_ps(), 642 (__mmask8) -1, 643 _MM_FROUND_CUR_DIRECTION); 644} 645 646static __inline__ __m256 __DEFAULT_FN_ATTRS 647_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 648 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 649 (__v8sf) __W, 650 (__mmask8) __U, 651 _MM_FROUND_CUR_DIRECTION); 652} 653 654static __inline__ __m256 __DEFAULT_FN_ATTRS 655_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { 656 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 657 (__v8sf) _mm256_setzero_ps(), 658 (__mmask8) __U, 659 _MM_FROUND_CUR_DIRECTION); 660} 661 662#define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \ 663 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 664 (__v8sf)_mm256_setzero_ps(), \ 665 (__mmask8)-1, (int)(R)); }) 666 667#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \ 668 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 669 (__v8sf)(__m256)(W), (__mmask8)(U), \ 670 (int)(R)); }) 671 672#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \ 673 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 674 (__v8sf)_mm256_setzero_ps(), \ 675 (__mmask8)(U), (int)(R)); }) 676 677#define _mm512_range_pd(A, B, C) __extension__ ({ \ 678 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 679 (__v8df)(__m512d)(B), (int)(C), \ 680 (__v8df)_mm512_setzero_pd(), \ 681 (__mmask8)-1, \ 682 _MM_FROUND_CUR_DIRECTION); }) 683 684#define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 685 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 686 (__v8df)(__m512d)(B), (int)(C), \ 687 (__v8df)(__m512d)(W), (__mmask8)(U), \ 688 _MM_FROUND_CUR_DIRECTION); }) 689 690#define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \ 691 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 692 (__v8df)(__m512d)(B), (int)(C), \ 693 (__v8df)_mm512_setzero_pd(), \ 694 (__mmask8)(U), \ 695 _MM_FROUND_CUR_DIRECTION); }) 696 697#define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \ 698 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 699 (__v8df)(__m512d)(B), (int)(C), \ 700 (__v8df)_mm512_setzero_pd(), \ 701 (__mmask8)-1, (int)(R)); }) 702 703#define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \ 704 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 705 (__v8df)(__m512d)(B), (int)(C), \ 706 (__v8df)(__m512d)(W), (__mmask8)(U), \ 707 (int)(R)); }) 708 709#define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \ 710 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 711 (__v8df)(__m512d)(B), (int)(C), \ 712 (__v8df)_mm512_setzero_pd(), \ 713 (__mmask8)(U), (int)(R)); }) 714 715#define _mm512_range_ps(A, B, C) __extension__ ({ \ 716 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 717 (__v16sf)(__m512)(B), (int)(C), \ 718 (__v16sf)_mm512_setzero_ps(), \ 719 (__mmask16)-1, \ 720 _MM_FROUND_CUR_DIRECTION); }) 721 722#define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 723 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 724 (__v16sf)(__m512)(B), (int)(C), \ 725 (__v16sf)(__m512)(W), (__mmask16)(U), \ 726 _MM_FROUND_CUR_DIRECTION); }) 727 728#define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \ 729 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 730 (__v16sf)(__m512)(B), (int)(C), \ 731 (__v16sf)_mm512_setzero_ps(), \ 732 (__mmask16)(U), \ 733 _MM_FROUND_CUR_DIRECTION); }) 734 735#define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \ 736 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 737 (__v16sf)(__m512)(B), (int)(C), \ 738 (__v16sf)_mm512_setzero_ps(), \ 739 (__mmask16)-1, (int)(R)); }) 740 741#define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \ 742 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 743 (__v16sf)(__m512)(B), (int)(C), \ 744 (__v16sf)(__m512)(W), (__mmask16)(U), \ 745 (int)(R)); }) 746 747#define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \ 748 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 749 (__v16sf)(__m512)(B), (int)(C), \ 750 (__v16sf)_mm512_setzero_ps(), \ 751 (__mmask16)(U), (int)(R)); }) 752 753#define _mm_range_round_ss(A, B, C, R) __extension__ ({ \ 754 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 755 (__v4sf)(__m128)(B), \ 756 (__v4sf)_mm_setzero_ps(), \ 757 (__mmask8) -1, (int)(C),\ 758 (int)(R)); }) 759 760#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 761 762#define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \ 763 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 764 (__v4sf)(__m128)(B), \ 765 (__v4sf)(__m128)(W),\ 766 (__mmask8)(U), (int)(C),\ 767 (int)(R)); }) 768 769#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 770 771#define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \ 772 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 773 (__v4sf)(__m128)(B), \ 774 (__v4sf)_mm_setzero_ps(), \ 775 (__mmask8)(U), (int)(C),\ 776 (int)(R)); }) 777 778#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 779 780#define _mm_range_round_sd(A, B, C, R) __extension__ ({ \ 781 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 782 (__v2df)(__m128d)(B), \ 783 (__v2df)_mm_setzero_pd(), \ 784 (__mmask8) -1, (int)(C),\ 785 (int)(R)); }) 786 787#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 788 789#define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \ 790 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 791 (__v2df)(__m128d)(B), \ 792 (__v2df)(__m128d)(W),\ 793 (__mmask8)(U), (int)(C),\ 794 (int)(R)); }) 795 796#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 797 798#define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \ 799 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 800 (__v2df)(__m128d)(B), \ 801 (__v2df)_mm_setzero_pd(), \ 802 (__mmask8)(U), (int)(C),\ 803 (int)(R)); }) 804 805#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 806 807#define _mm512_reduce_pd(A, B) __extension__ ({ \ 808 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 809 (__v8df)_mm512_setzero_pd(), \ 810 (__mmask8)-1, \ 811 _MM_FROUND_CUR_DIRECTION); }) 812 813#define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 814 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 815 (__v8df)(__m512d)(W), \ 816 (__mmask8)(U), \ 817 _MM_FROUND_CUR_DIRECTION); }) 818 819#define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \ 820 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 821 (__v8df)_mm512_setzero_pd(), \ 822 (__mmask8)(U), \ 823 _MM_FROUND_CUR_DIRECTION); }) 824 825#define _mm512_reduce_ps(A, B) __extension__ ({ \ 826 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 827 (__v16sf)_mm512_setzero_ps(), \ 828 (__mmask16)-1, \ 829 _MM_FROUND_CUR_DIRECTION); }) 830 831#define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 832 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 833 (__v16sf)(__m512)(W), \ 834 (__mmask16)(U), \ 835 _MM_FROUND_CUR_DIRECTION); }) 836 837#define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \ 838 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 839 (__v16sf)_mm512_setzero_ps(), \ 840 (__mmask16)(U), \ 841 _MM_FROUND_CUR_DIRECTION); }) 842 843#define _mm512_reduce_round_pd(A, B, R) __extension__ ({\ 844 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 845 (__v8df)_mm512_setzero_pd(), \ 846 (__mmask8)-1, (int)(R)); }) 847 848#define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\ 849 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 850 (__v8df)(__m512d)(W), \ 851 (__mmask8)(U), (int)(R)); }) 852 853#define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\ 854 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 855 (__v8df)_mm512_setzero_pd(), \ 856 (__mmask8)(U), (int)(R)); }) 857 858#define _mm512_reduce_round_ps(A, B, R) __extension__ ({\ 859 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 860 (__v16sf)_mm512_setzero_ps(), \ 861 (__mmask16)-1, (int)(R)); }) 862 863#define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\ 864 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 865 (__v16sf)(__m512)(W), \ 866 (__mmask16)(U), (int)(R)); }) 867 868#define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\ 869 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 870 (__v16sf)_mm512_setzero_ps(), \ 871 (__mmask16)(U), (int)(R)); }) 872 873#define _mm_reduce_ss(A, B, C) __extension__ ({ \ 874 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 875 (__v4sf)(__m128)(B), \ 876 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 877 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 878 879#define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \ 880 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 881 (__v4sf)(__m128)(B), \ 882 (__v4sf)(__m128)(W), (__mmask8)(U), \ 883 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 884 885#define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \ 886 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 887 (__v4sf)(__m128)(B), \ 888 (__v4sf)_mm_setzero_ps(), \ 889 (__mmask8)(U), (int)(C), \ 890 _MM_FROUND_CUR_DIRECTION); }) 891 892#define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \ 893 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 894 (__v4sf)(__m128)(B), \ 895 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 896 (int)(C), (int)(R)); }) 897 898#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \ 899 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 900 (__v4sf)(__m128)(B), \ 901 (__v4sf)(__m128)(W), (__mmask8)(U), \ 902 (int)(C), (int)(R)); }) 903 904#define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \ 905 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 906 (__v4sf)(__m128)(B), \ 907 (__v4sf)_mm_setzero_ps(), \ 908 (__mmask8)(U), (int)(C), (int)(R)); }) 909 910#define _mm_reduce_sd(A, B, C) __extension__ ({ \ 911 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 912 (__v2df)(__m128d)(B), \ 913 (__v2df)_mm_setzero_pd(), \ 914 (__mmask8)-1, (int)(C), \ 915 _MM_FROUND_CUR_DIRECTION); }) 916 917#define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \ 918 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 919 (__v2df)(__m128d)(B), \ 920 (__v2df)(__m128d)(W), (__mmask8)(U), \ 921 (int)(C), _MM_FROUND_CUR_DIRECTION); }) 922 923#define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \ 924 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 925 (__v2df)(__m128d)(B), \ 926 (__v2df)_mm_setzero_pd(), \ 927 (__mmask8)(U), (int)(C), \ 928 _MM_FROUND_CUR_DIRECTION); }) 929 930#define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \ 931 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 932 (__v2df)(__m128d)(B), \ 933 (__v2df)_mm_setzero_pd(), \ 934 (__mmask8)-1, (int)(C), (int)(R)); }) 935 936#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \ 937 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 938 (__v2df)(__m128d)(B), \ 939 (__v2df)(__m128d)(W), (__mmask8)(U), \ 940 (int)(C), (int)(R)); }) 941 942#define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \ 943 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 944 (__v2df)(__m128d)(B), \ 945 (__v2df)_mm_setzero_pd(), \ 946 (__mmask8)(U), (int)(C), (int)(R)); }) 947 948static __inline__ __mmask16 __DEFAULT_FN_ATTRS 949_mm512_movepi32_mask (__m512i __A) 950{ 951 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 952} 953 954static __inline__ __m512i __DEFAULT_FN_ATTRS 955_mm512_movm_epi32 (__mmask16 __A) 956{ 957 return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 958} 959 960static __inline__ __m512i __DEFAULT_FN_ATTRS 961_mm512_movm_epi64 (__mmask8 __A) 962{ 963 return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 964} 965 966static __inline__ __mmask8 __DEFAULT_FN_ATTRS 967_mm512_movepi64_mask (__m512i __A) 968{ 969 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 970} 971 972 973static __inline__ __m512 __DEFAULT_FN_ATTRS 974_mm512_broadcast_f32x2 (__m128 __A) 975{ 976 return (__m512)__builtin_shufflevector((__v4sf)__A, 977 (__v4sf)_mm_undefined_ps(), 978 0, 1, 0, 1, 0, 1, 0, 1, 979 0, 1, 0, 1, 0, 1, 0, 1); 980} 981 982static __inline__ __m512 __DEFAULT_FN_ATTRS 983_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 984{ 985 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 986 (__v16sf)_mm512_broadcast_f32x2(__A), 987 (__v16sf)__O); 988} 989 990static __inline__ __m512 __DEFAULT_FN_ATTRS 991_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 992{ 993 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 994 (__v16sf)_mm512_broadcast_f32x2(__A), 995 (__v16sf)_mm512_setzero_ps()); 996} 997 998static __inline__ __m512 __DEFAULT_FN_ATTRS 999_mm512_broadcast_f32x8(__m256 __A) 1000{ 1001 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 1002 0, 1, 2, 3, 4, 5, 6, 7, 1003 0, 1, 2, 3, 4, 5, 6, 7); 1004} 1005 1006static __inline__ __m512 __DEFAULT_FN_ATTRS 1007_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) 1008{ 1009 return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, 1010 (__v16sf)_mm512_broadcast_f32x8(__A), 1011 (__v16sf)__O); 1012} 1013 1014static __inline__ __m512 __DEFAULT_FN_ATTRS 1015_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) 1016{ 1017 return (__m512)__builtin_ia32_selectps_512((__mmask8)__M, 1018 (__v16sf)_mm512_broadcast_f32x8(__A), 1019 (__v16sf)_mm512_setzero_ps()); 1020} 1021 1022static __inline__ __m512d __DEFAULT_FN_ATTRS 1023_mm512_broadcast_f64x2(__m128d __A) 1024{ 1025 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 1026 0, 1, 0, 1, 0, 1, 0, 1); 1027} 1028 1029static __inline__ __m512d __DEFAULT_FN_ATTRS 1030_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) 1031{ 1032 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1033 (__v8df)_mm512_broadcast_f64x2(__A), 1034 (__v8df)__O); 1035} 1036 1037static __inline__ __m512d __DEFAULT_FN_ATTRS 1038_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) 1039{ 1040 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1041 (__v8df)_mm512_broadcast_f64x2(__A), 1042 (__v8df)_mm512_setzero_pd()); 1043} 1044 1045static __inline__ __m512i __DEFAULT_FN_ATTRS 1046_mm512_broadcast_i32x2 (__m128i __A) 1047{ 1048 return (__m512i)__builtin_shufflevector((__v4si)__A, 1049 (__v4si)_mm_undefined_si128(), 1050 0, 1, 0, 1, 0, 1, 0, 1, 1051 0, 1, 0, 1, 0, 1, 0, 1); 1052} 1053 1054static __inline__ __m512i __DEFAULT_FN_ATTRS 1055_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 1056{ 1057 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1058 (__v16si)_mm512_broadcast_i32x2(__A), 1059 (__v16si)__O); 1060} 1061 1062static __inline__ __m512i __DEFAULT_FN_ATTRS 1063_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 1064{ 1065 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1066 (__v16si)_mm512_broadcast_i32x2(__A), 1067 (__v16si)_mm512_setzero_si512()); 1068} 1069 1070static __inline__ __m512i __DEFAULT_FN_ATTRS 1071_mm512_broadcast_i32x8(__m256i __A) 1072{ 1073 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 1074 0, 1, 2, 3, 4, 5, 6, 7, 1075 0, 1, 2, 3, 4, 5, 6, 7); 1076} 1077 1078static __inline__ __m512i __DEFAULT_FN_ATTRS 1079_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) 1080{ 1081 return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, 1082 (__v16si)_mm512_broadcast_i32x8(__A), 1083 (__v16si)__O); 1084} 1085 1086static __inline__ __m512i __DEFAULT_FN_ATTRS 1087_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) 1088{ 1089 return (__m512i)__builtin_ia32_selectd_512((__mmask8)__M, 1090 (__v16si)_mm512_broadcast_i32x8(__A), 1091 (__v16si)_mm512_setzero_si512()); 1092} 1093 1094static __inline__ __m512i __DEFAULT_FN_ATTRS 1095_mm512_broadcast_i64x2(__m128i __A) 1096{ 1097 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1098 0, 1, 0, 1, 0, 1, 0, 1); 1099} 1100 1101static __inline__ __m512i __DEFAULT_FN_ATTRS 1102_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) 1103{ 1104 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1105 (__v8di)_mm512_broadcast_i64x2(__A), 1106 (__v8di)__O); 1107} 1108 1109static __inline__ __m512i __DEFAULT_FN_ATTRS 1110_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) 1111{ 1112 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1113 (__v8di)_mm512_broadcast_i64x2(__A), 1114 (__v8di)_mm512_setzero_si512()); 1115} 1116 1117#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ 1118 (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ 1119 (__v16sf)_mm512_undefined_ps(), \ 1120 ((imm) & 1) ? 8 : 0, \ 1121 ((imm) & 1) ? 9 : 1, \ 1122 ((imm) & 1) ? 10 : 2, \ 1123 ((imm) & 1) ? 11 : 3, \ 1124 ((imm) & 1) ? 12 : 4, \ 1125 ((imm) & 1) ? 13 : 5, \ 1126 ((imm) & 1) ? 14 : 6, \ 1127 ((imm) & 1) ? 15 : 7); }) 1128 1129#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ 1130 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 1131 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ 1132 (__v8sf)(W)); }) 1133 1134#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ 1135 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 1136 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ 1137 (__v8sf)_mm256_setzero_ps()); }) 1138 1139#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ 1140 (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 1141 (__v8df)_mm512_undefined_pd(), \ 1142 0 + ((imm) & 0x3) * 2, \ 1143 1 + ((imm) & 0x3) * 2); }) 1144 1145#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1146 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1147 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ 1148 (__v2df)(W)); }) 1149 1150#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1151 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1152 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ 1153 (__v2df)_mm_setzero_pd()); }) 1154 1155#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ 1156 (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 1157 (__v16si)_mm512_undefined_epi32(), \ 1158 ((imm) & 1) ? 8 : 0, \ 1159 ((imm) & 1) ? 9 : 1, \ 1160 ((imm) & 1) ? 10 : 2, \ 1161 ((imm) & 1) ? 11 : 3, \ 1162 ((imm) & 1) ? 12 : 4, \ 1163 ((imm) & 1) ? 13 : 5, \ 1164 ((imm) & 1) ? 14 : 6, \ 1165 ((imm) & 1) ? 15 : 7); }) 1166 1167#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ 1168 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 1169 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ 1170 (__v8si)(W)); }) 1171 1172#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ 1173 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 1174 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ 1175 (__v8si)_mm256_setzero_si256()); }) 1176 1177#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ 1178 (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 1179 (__v8di)_mm512_undefined_epi32(), \ 1180 0 + ((imm) & 0x3) * 2, \ 1181 1 + ((imm) & 0x3) * 2); }) 1182 1183#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1184 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ 1185 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ 1186 (__v2di)(W)); }) 1187 1188#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1189 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ 1190 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ 1191 (__v2di)_mm_setzero_di()); }) 1192 1193#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ 1194 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ 1195 (__v16sf)_mm512_castps256_ps512((__m256)(B)),\ 1196 ((imm) & 0x1) ? 0 : 16, \ 1197 ((imm) & 0x1) ? 1 : 17, \ 1198 ((imm) & 0x1) ? 2 : 18, \ 1199 ((imm) & 0x1) ? 3 : 19, \ 1200 ((imm) & 0x1) ? 4 : 20, \ 1201 ((imm) & 0x1) ? 5 : 21, \ 1202 ((imm) & 0x1) ? 6 : 22, \ 1203 ((imm) & 0x1) ? 7 : 23, \ 1204 ((imm) & 0x1) ? 16 : 8, \ 1205 ((imm) & 0x1) ? 17 : 9, \ 1206 ((imm) & 0x1) ? 18 : 10, \ 1207 ((imm) & 0x1) ? 19 : 11, \ 1208 ((imm) & 0x1) ? 20 : 12, \ 1209 ((imm) & 0x1) ? 21 : 13, \ 1210 ((imm) & 0x1) ? 22 : 14, \ 1211 ((imm) & 0x1) ? 23 : 15); }) 1212 1213#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \ 1214 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1215 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1216 (__v16sf)(W)); }) 1217 1218#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \ 1219 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1220 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1221 (__v16sf)_mm512_setzero_ps()); }) 1222 1223#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \ 1224 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \ 1225 (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\ 1226 (((imm) & 0x3) == 0) ? 8 : 0, \ 1227 (((imm) & 0x3) == 0) ? 9 : 1, \ 1228 (((imm) & 0x3) == 1) ? 8 : 2, \ 1229 (((imm) & 0x3) == 1) ? 9 : 3, \ 1230 (((imm) & 0x3) == 2) ? 8 : 4, \ 1231 (((imm) & 0x3) == 2) ? 9 : 5, \ 1232 (((imm) & 0x3) == 3) ? 8 : 6, \ 1233 (((imm) & 0x3) == 3) ? 9 : 7); }) 1234 1235#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1236 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1237 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1238 (__v8df)(W)); }) 1239 1240#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1241 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1242 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1243 (__v8df)_mm512_setzero_pd()); }) 1244 1245#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \ 1246 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \ 1247 (__v16si)_mm512_castsi256_si512((__m256i)(B)),\ 1248 ((imm) & 0x1) ? 0 : 16, \ 1249 ((imm) & 0x1) ? 1 : 17, \ 1250 ((imm) & 0x1) ? 2 : 18, \ 1251 ((imm) & 0x1) ? 3 : 19, \ 1252 ((imm) & 0x1) ? 4 : 20, \ 1253 ((imm) & 0x1) ? 5 : 21, \ 1254 ((imm) & 0x1) ? 6 : 22, \ 1255 ((imm) & 0x1) ? 7 : 23, \ 1256 ((imm) & 0x1) ? 16 : 8, \ 1257 ((imm) & 0x1) ? 17 : 9, \ 1258 ((imm) & 0x1) ? 18 : 10, \ 1259 ((imm) & 0x1) ? 19 : 11, \ 1260 ((imm) & 0x1) ? 20 : 12, \ 1261 ((imm) & 0x1) ? 21 : 13, \ 1262 ((imm) & 0x1) ? 22 : 14, \ 1263 ((imm) & 0x1) ? 23 : 15); }) 1264 1265#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \ 1266 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1267 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1268 (__v16si)(W)); }) 1269 1270#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \ 1271 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1272 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1273 (__v16si)_mm512_setzero_si512()); }) 1274 1275#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \ 1276 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \ 1277 (__v8di)_mm512_castsi128_si512((__m128i)(B)),\ 1278 (((imm) & 0x3) == 0) ? 8 : 0, \ 1279 (((imm) & 0x3) == 0) ? 9 : 1, \ 1280 (((imm) & 0x3) == 1) ? 8 : 2, \ 1281 (((imm) & 0x3) == 1) ? 9 : 3, \ 1282 (((imm) & 0x3) == 2) ? 8 : 4, \ 1283 (((imm) & 0x3) == 2) ? 9 : 5, \ 1284 (((imm) & 0x3) == 3) ? 8 : 6, \ 1285 (((imm) & 0x3) == 3) ? 9 : 7); }) 1286 1287#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1288 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1289 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1290 (__v8di)(W)); }) 1291 1292#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1293 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1294 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1295 (__v8di)_mm512_setzero_si512()); }) 1296 1297#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1298 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1299 (int)(imm), (__mmask16)(U)); }) 1300 1301#define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \ 1302 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1303 (int)(imm), (__mmask16)-1); }) 1304 1305#define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1306 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1307 (__mmask8)(U)); }) 1308 1309#define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \ 1310 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1311 (__mmask8)-1); }) 1312 1313#define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \ 1314 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1315 (__mmask8)-1); }) 1316 1317#define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \ 1318 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1319 (__mmask8)(U)); }) 1320 1321#define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \ 1322 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1323 (__mmask8)-1); }) 1324 1325#define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \ 1326 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1327 (__mmask8)(U)); }) 1328 1329#undef __DEFAULT_FN_ATTRS 1330 1331#endif 1332