1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLDQINTRIN_H 29#define __AVX512VLDQINTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) 33 34static __inline__ __m256i __DEFAULT_FN_ATTRS 35_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 36 return (__m256i) ((__v4du) __A * (__v4du) __B); 37} 38 39static __inline__ __m256i __DEFAULT_FN_ATTRS 40_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 41 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 42 (__v4di) __B, 43 (__v4di) __W, 44 (__mmask8) __U); 45} 46 47static __inline__ __m256i __DEFAULT_FN_ATTRS 48_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 49 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 50 (__v4di) __B, 51 (__v4di) 52 _mm256_setzero_si256 (), 53 (__mmask8) __U); 54} 55 56static __inline__ __m128i __DEFAULT_FN_ATTRS 57_mm_mullo_epi64 (__m128i __A, __m128i __B) { 58 return (__m128i) ((__v2du) __A * (__v2du) __B); 59} 60 61static __inline__ __m128i __DEFAULT_FN_ATTRS 62_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 63 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 64 (__v2di) __B, 65 (__v2di) __W, 66 (__mmask8) __U); 67} 68 69static __inline__ __m128i __DEFAULT_FN_ATTRS 70_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 71 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 72 (__v2di) __B, 73 (__v2di) 74 _mm_setzero_si128 (), 75 (__mmask8) __U); 76} 77 78static __inline__ __m256d __DEFAULT_FN_ATTRS 79_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 80 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 81 (__v4df) __B, 82 (__v4df) __W, 83 (__mmask8) __U); 84} 85 86static __inline__ __m256d __DEFAULT_FN_ATTRS 87_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 88 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 89 (__v4df) __B, 90 (__v4df) 91 _mm256_setzero_pd (), 92 (__mmask8) __U); 93} 94 95static __inline__ __m128d __DEFAULT_FN_ATTRS 96_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 97 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 98 (__v2df) __B, 99 (__v2df) __W, 100 (__mmask8) __U); 101} 102 103static __inline__ __m128d __DEFAULT_FN_ATTRS 104_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 105 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 106 (__v2df) __B, 107 (__v2df) 108 _mm_setzero_pd (), 109 (__mmask8) __U); 110} 111 112static __inline__ __m256 __DEFAULT_FN_ATTRS 113_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 114 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 115 (__v8sf) __B, 116 (__v8sf) __W, 117 (__mmask8) __U); 118} 119 120static __inline__ __m256 __DEFAULT_FN_ATTRS 121_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 122 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 123 (__v8sf) __B, 124 (__v8sf) 125 _mm256_setzero_ps (), 126 (__mmask8) __U); 127} 128 129static __inline__ __m128 __DEFAULT_FN_ATTRS 130_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 131 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 132 (__v4sf) __B, 133 (__v4sf) __W, 134 (__mmask8) __U); 135} 136 137static __inline__ __m128 __DEFAULT_FN_ATTRS 138_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 139 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 140 (__v4sf) __B, 141 (__v4sf) 142 _mm_setzero_ps (), 143 (__mmask8) __U); 144} 145 146static __inline__ __m256d __DEFAULT_FN_ATTRS 147_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 148 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 149 (__v4df) __B, 150 (__v4df) __W, 151 (__mmask8) __U); 152} 153 154static __inline__ __m256d __DEFAULT_FN_ATTRS 155_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 156 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 157 (__v4df) __B, 158 (__v4df) 159 _mm256_setzero_pd (), 160 (__mmask8) __U); 161} 162 163static __inline__ __m128d __DEFAULT_FN_ATTRS 164_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 165 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 166 (__v2df) __B, 167 (__v2df) __W, 168 (__mmask8) __U); 169} 170 171static __inline__ __m128d __DEFAULT_FN_ATTRS 172_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 173 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 174 (__v2df) __B, 175 (__v2df) 176 _mm_setzero_pd (), 177 (__mmask8) __U); 178} 179 180static __inline__ __m256 __DEFAULT_FN_ATTRS 181_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 182 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 183 (__v8sf) __B, 184 (__v8sf) __W, 185 (__mmask8) __U); 186} 187 188static __inline__ __m256 __DEFAULT_FN_ATTRS 189_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 190 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 191 (__v8sf) __B, 192 (__v8sf) 193 _mm256_setzero_ps (), 194 (__mmask8) __U); 195} 196 197static __inline__ __m128 __DEFAULT_FN_ATTRS 198_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 199 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 200 (__v4sf) __B, 201 (__v4sf) __W, 202 (__mmask8) __U); 203} 204 205static __inline__ __m128 __DEFAULT_FN_ATTRS 206_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 207 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 208 (__v4sf) __B, 209 (__v4sf) 210 _mm_setzero_ps (), 211 (__mmask8) __U); 212} 213 214static __inline__ __m256d __DEFAULT_FN_ATTRS 215_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 216 __m256d __B) { 217 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 218 (__v4df) __B, 219 (__v4df) __W, 220 (__mmask8) __U); 221} 222 223static __inline__ __m256d __DEFAULT_FN_ATTRS 224_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 225 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 226 (__v4df) __B, 227 (__v4df) 228 _mm256_setzero_pd (), 229 (__mmask8) __U); 230} 231 232static __inline__ __m128d __DEFAULT_FN_ATTRS 233_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 234 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 235 (__v2df) __B, 236 (__v2df) __W, 237 (__mmask8) __U); 238} 239 240static __inline__ __m128d __DEFAULT_FN_ATTRS 241_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 242 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 243 (__v2df) __B, 244 (__v2df) 245 _mm_setzero_pd (), 246 (__mmask8) __U); 247} 248 249static __inline__ __m256 __DEFAULT_FN_ATTRS 250_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 251 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 252 (__v8sf) __B, 253 (__v8sf) __W, 254 (__mmask8) __U); 255} 256 257static __inline__ __m256 __DEFAULT_FN_ATTRS 258_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 259 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 260 (__v8sf) __B, 261 (__v8sf) 262 _mm256_setzero_ps (), 263 (__mmask8) __U); 264} 265 266static __inline__ __m128 __DEFAULT_FN_ATTRS 267_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 268 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 269 (__v4sf) __B, 270 (__v4sf) __W, 271 (__mmask8) __U); 272} 273 274static __inline__ __m128 __DEFAULT_FN_ATTRS 275_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 276 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 277 (__v4sf) __B, 278 (__v4sf) 279 _mm_setzero_ps (), 280 (__mmask8) __U); 281} 282 283static __inline__ __m256d __DEFAULT_FN_ATTRS 284_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 285 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 286 (__v4df) __B, 287 (__v4df) __W, 288 (__mmask8) __U); 289} 290 291static __inline__ __m256d __DEFAULT_FN_ATTRS 292_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 293 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 294 (__v4df) __B, 295 (__v4df) 296 _mm256_setzero_pd (), 297 (__mmask8) __U); 298} 299 300static __inline__ __m128d __DEFAULT_FN_ATTRS 301_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 302 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 303 (__v2df) __B, 304 (__v2df) __W, 305 (__mmask8) __U); 306} 307 308static __inline__ __m128d __DEFAULT_FN_ATTRS 309_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 310 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 311 (__v2df) __B, 312 (__v2df) 313 _mm_setzero_pd (), 314 (__mmask8) __U); 315} 316 317static __inline__ __m256 __DEFAULT_FN_ATTRS 318_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 319 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 320 (__v8sf) __B, 321 (__v8sf) __W, 322 (__mmask8) __U); 323} 324 325static __inline__ __m256 __DEFAULT_FN_ATTRS 326_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 327 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 328 (__v8sf) __B, 329 (__v8sf) 330 _mm256_setzero_ps (), 331 (__mmask8) __U); 332} 333 334static __inline__ __m128 __DEFAULT_FN_ATTRS 335_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 336 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 337 (__v4sf) __B, 338 (__v4sf) __W, 339 (__mmask8) __U); 340} 341 342static __inline__ __m128 __DEFAULT_FN_ATTRS 343_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 344 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 345 (__v4sf) __B, 346 (__v4sf) 347 _mm_setzero_ps (), 348 (__mmask8) __U); 349} 350 351static __inline__ __m128i __DEFAULT_FN_ATTRS 352_mm_cvtpd_epi64 (__m128d __A) { 353 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 354 (__v2di) _mm_setzero_si128(), 355 (__mmask8) -1); 356} 357 358static __inline__ __m128i __DEFAULT_FN_ATTRS 359_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 360 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 361 (__v2di) __W, 362 (__mmask8) __U); 363} 364 365static __inline__ __m128i __DEFAULT_FN_ATTRS 366_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 367 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 368 (__v2di) _mm_setzero_si128(), 369 (__mmask8) __U); 370} 371 372static __inline__ __m256i __DEFAULT_FN_ATTRS 373_mm256_cvtpd_epi64 (__m256d __A) { 374 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 375 (__v4di) _mm256_setzero_si256(), 376 (__mmask8) -1); 377} 378 379static __inline__ __m256i __DEFAULT_FN_ATTRS 380_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 381 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 382 (__v4di) __W, 383 (__mmask8) __U); 384} 385 386static __inline__ __m256i __DEFAULT_FN_ATTRS 387_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 388 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 389 (__v4di) _mm256_setzero_si256(), 390 (__mmask8) __U); 391} 392 393static __inline__ __m128i __DEFAULT_FN_ATTRS 394_mm_cvtpd_epu64 (__m128d __A) { 395 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 396 (__v2di) _mm_setzero_si128(), 397 (__mmask8) -1); 398} 399 400static __inline__ __m128i __DEFAULT_FN_ATTRS 401_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 402 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 403 (__v2di) __W, 404 (__mmask8) __U); 405} 406 407static __inline__ __m128i __DEFAULT_FN_ATTRS 408_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 409 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 410 (__v2di) _mm_setzero_si128(), 411 (__mmask8) __U); 412} 413 414static __inline__ __m256i __DEFAULT_FN_ATTRS 415_mm256_cvtpd_epu64 (__m256d __A) { 416 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 417 (__v4di) _mm256_setzero_si256(), 418 (__mmask8) -1); 419} 420 421static __inline__ __m256i __DEFAULT_FN_ATTRS 422_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 423 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 424 (__v4di) __W, 425 (__mmask8) __U); 426} 427 428static __inline__ __m256i __DEFAULT_FN_ATTRS 429_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 430 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 431 (__v4di) _mm256_setzero_si256(), 432 (__mmask8) __U); 433} 434 435static __inline__ __m128i __DEFAULT_FN_ATTRS 436_mm_cvtps_epi64 (__m128 __A) { 437 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 438 (__v2di) _mm_setzero_si128(), 439 (__mmask8) -1); 440} 441 442static __inline__ __m128i __DEFAULT_FN_ATTRS 443_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 444 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 445 (__v2di) __W, 446 (__mmask8) __U); 447} 448 449static __inline__ __m128i __DEFAULT_FN_ATTRS 450_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 451 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 452 (__v2di) _mm_setzero_si128(), 453 (__mmask8) __U); 454} 455 456static __inline__ __m256i __DEFAULT_FN_ATTRS 457_mm256_cvtps_epi64 (__m128 __A) { 458 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 459 (__v4di) _mm256_setzero_si256(), 460 (__mmask8) -1); 461} 462 463static __inline__ __m256i __DEFAULT_FN_ATTRS 464_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 465 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 466 (__v4di) __W, 467 (__mmask8) __U); 468} 469 470static __inline__ __m256i __DEFAULT_FN_ATTRS 471_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 472 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 473 (__v4di) _mm256_setzero_si256(), 474 (__mmask8) __U); 475} 476 477static __inline__ __m128i __DEFAULT_FN_ATTRS 478_mm_cvtps_epu64 (__m128 __A) { 479 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 480 (__v2di) _mm_setzero_si128(), 481 (__mmask8) -1); 482} 483 484static __inline__ __m128i __DEFAULT_FN_ATTRS 485_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 486 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 487 (__v2di) __W, 488 (__mmask8) __U); 489} 490 491static __inline__ __m128i __DEFAULT_FN_ATTRS 492_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 493 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 494 (__v2di) _mm_setzero_si128(), 495 (__mmask8) __U); 496} 497 498static __inline__ __m256i __DEFAULT_FN_ATTRS 499_mm256_cvtps_epu64 (__m128 __A) { 500 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 501 (__v4di) _mm256_setzero_si256(), 502 (__mmask8) -1); 503} 504 505static __inline__ __m256i __DEFAULT_FN_ATTRS 506_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 507 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 508 (__v4di) __W, 509 (__mmask8) __U); 510} 511 512static __inline__ __m256i __DEFAULT_FN_ATTRS 513_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 514 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 515 (__v4di) _mm256_setzero_si256(), 516 (__mmask8) __U); 517} 518 519static __inline__ __m128d __DEFAULT_FN_ATTRS 520_mm_cvtepi64_pd (__m128i __A) { 521 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 522 (__v2df) _mm_setzero_pd(), 523 (__mmask8) -1); 524} 525 526static __inline__ __m128d __DEFAULT_FN_ATTRS 527_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 528 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 529 (__v2df) __W, 530 (__mmask8) __U); 531} 532 533static __inline__ __m128d __DEFAULT_FN_ATTRS 534_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 535 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 536 (__v2df) _mm_setzero_pd(), 537 (__mmask8) __U); 538} 539 540static __inline__ __m256d __DEFAULT_FN_ATTRS 541_mm256_cvtepi64_pd (__m256i __A) { 542 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 543 (__v4df) _mm256_setzero_pd(), 544 (__mmask8) -1); 545} 546 547static __inline__ __m256d __DEFAULT_FN_ATTRS 548_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 549 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 550 (__v4df) __W, 551 (__mmask8) __U); 552} 553 554static __inline__ __m256d __DEFAULT_FN_ATTRS 555_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 556 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 557 (__v4df) _mm256_setzero_pd(), 558 (__mmask8) __U); 559} 560 561static __inline__ __m128 __DEFAULT_FN_ATTRS 562_mm_cvtepi64_ps (__m128i __A) { 563 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 564 (__v4sf) _mm_setzero_ps(), 565 (__mmask8) -1); 566} 567 568static __inline__ __m128 __DEFAULT_FN_ATTRS 569_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 570 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 571 (__v4sf) __W, 572 (__mmask8) __U); 573} 574 575static __inline__ __m128 __DEFAULT_FN_ATTRS 576_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 577 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 578 (__v4sf) _mm_setzero_ps(), 579 (__mmask8) __U); 580} 581 582static __inline__ __m128 __DEFAULT_FN_ATTRS 583_mm256_cvtepi64_ps (__m256i __A) { 584 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 585 (__v4sf) _mm_setzero_ps(), 586 (__mmask8) -1); 587} 588 589static __inline__ __m128 __DEFAULT_FN_ATTRS 590_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 591 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 592 (__v4sf) __W, 593 (__mmask8) __U); 594} 595 596static __inline__ __m128 __DEFAULT_FN_ATTRS 597_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 598 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 599 (__v4sf) _mm_setzero_ps(), 600 (__mmask8) __U); 601} 602 603static __inline__ __m128i __DEFAULT_FN_ATTRS 604_mm_cvttpd_epi64 (__m128d __A) { 605 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 606 (__v2di) _mm_setzero_si128(), 607 (__mmask8) -1); 608} 609 610static __inline__ __m128i __DEFAULT_FN_ATTRS 611_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 612 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 613 (__v2di) __W, 614 (__mmask8) __U); 615} 616 617static __inline__ __m128i __DEFAULT_FN_ATTRS 618_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 619 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 620 (__v2di) _mm_setzero_si128(), 621 (__mmask8) __U); 622} 623 624static __inline__ __m256i __DEFAULT_FN_ATTRS 625_mm256_cvttpd_epi64 (__m256d __A) { 626 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 627 (__v4di) _mm256_setzero_si256(), 628 (__mmask8) -1); 629} 630 631static __inline__ __m256i __DEFAULT_FN_ATTRS 632_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 633 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 634 (__v4di) __W, 635 (__mmask8) __U); 636} 637 638static __inline__ __m256i __DEFAULT_FN_ATTRS 639_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 640 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 641 (__v4di) _mm256_setzero_si256(), 642 (__mmask8) __U); 643} 644 645static __inline__ __m128i __DEFAULT_FN_ATTRS 646_mm_cvttpd_epu64 (__m128d __A) { 647 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 648 (__v2di) _mm_setzero_si128(), 649 (__mmask8) -1); 650} 651 652static __inline__ __m128i __DEFAULT_FN_ATTRS 653_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 654 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 655 (__v2di) __W, 656 (__mmask8) __U); 657} 658 659static __inline__ __m128i __DEFAULT_FN_ATTRS 660_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 661 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 662 (__v2di) _mm_setzero_si128(), 663 (__mmask8) __U); 664} 665 666static __inline__ __m256i __DEFAULT_FN_ATTRS 667_mm256_cvttpd_epu64 (__m256d __A) { 668 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 669 (__v4di) _mm256_setzero_si256(), 670 (__mmask8) -1); 671} 672 673static __inline__ __m256i __DEFAULT_FN_ATTRS 674_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 675 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 676 (__v4di) __W, 677 (__mmask8) __U); 678} 679 680static __inline__ __m256i __DEFAULT_FN_ATTRS 681_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 682 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 683 (__v4di) _mm256_setzero_si256(), 684 (__mmask8) __U); 685} 686 687static __inline__ __m128i __DEFAULT_FN_ATTRS 688_mm_cvttps_epi64 (__m128 __A) { 689 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 690 (__v2di) _mm_setzero_si128(), 691 (__mmask8) -1); 692} 693 694static __inline__ __m128i __DEFAULT_FN_ATTRS 695_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 696 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 697 (__v2di) __W, 698 (__mmask8) __U); 699} 700 701static __inline__ __m128i __DEFAULT_FN_ATTRS 702_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 703 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 704 (__v2di) _mm_setzero_si128(), 705 (__mmask8) __U); 706} 707 708static __inline__ __m256i __DEFAULT_FN_ATTRS 709_mm256_cvttps_epi64 (__m128 __A) { 710 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 711 (__v4di) _mm256_setzero_si256(), 712 (__mmask8) -1); 713} 714 715static __inline__ __m256i __DEFAULT_FN_ATTRS 716_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 717 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 718 (__v4di) __W, 719 (__mmask8) __U); 720} 721 722static __inline__ __m256i __DEFAULT_FN_ATTRS 723_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 724 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 725 (__v4di) _mm256_setzero_si256(), 726 (__mmask8) __U); 727} 728 729static __inline__ __m128i __DEFAULT_FN_ATTRS 730_mm_cvttps_epu64 (__m128 __A) { 731 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 732 (__v2di) _mm_setzero_si128(), 733 (__mmask8) -1); 734} 735 736static __inline__ __m128i __DEFAULT_FN_ATTRS 737_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 738 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 739 (__v2di) __W, 740 (__mmask8) __U); 741} 742 743static __inline__ __m128i __DEFAULT_FN_ATTRS 744_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 745 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 746 (__v2di) _mm_setzero_si128(), 747 (__mmask8) __U); 748} 749 750static __inline__ __m256i __DEFAULT_FN_ATTRS 751_mm256_cvttps_epu64 (__m128 __A) { 752 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 753 (__v4di) _mm256_setzero_si256(), 754 (__mmask8) -1); 755} 756 757static __inline__ __m256i __DEFAULT_FN_ATTRS 758_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 759 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 760 (__v4di) __W, 761 (__mmask8) __U); 762} 763 764static __inline__ __m256i __DEFAULT_FN_ATTRS 765_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 766 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 767 (__v4di) _mm256_setzero_si256(), 768 (__mmask8) __U); 769} 770 771static __inline__ __m128d __DEFAULT_FN_ATTRS 772_mm_cvtepu64_pd (__m128i __A) { 773 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 774 (__v2df) _mm_setzero_pd(), 775 (__mmask8) -1); 776} 777 778static __inline__ __m128d __DEFAULT_FN_ATTRS 779_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 780 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 781 (__v2df) __W, 782 (__mmask8) __U); 783} 784 785static __inline__ __m128d __DEFAULT_FN_ATTRS 786_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 787 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 788 (__v2df) _mm_setzero_pd(), 789 (__mmask8) __U); 790} 791 792static __inline__ __m256d __DEFAULT_FN_ATTRS 793_mm256_cvtepu64_pd (__m256i __A) { 794 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 795 (__v4df) _mm256_setzero_pd(), 796 (__mmask8) -1); 797} 798 799static __inline__ __m256d __DEFAULT_FN_ATTRS 800_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 801 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 802 (__v4df) __W, 803 (__mmask8) __U); 804} 805 806static __inline__ __m256d __DEFAULT_FN_ATTRS 807_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 808 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 809 (__v4df) _mm256_setzero_pd(), 810 (__mmask8) __U); 811} 812 813static __inline__ __m128 __DEFAULT_FN_ATTRS 814_mm_cvtepu64_ps (__m128i __A) { 815 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 816 (__v4sf) _mm_setzero_ps(), 817 (__mmask8) -1); 818} 819 820static __inline__ __m128 __DEFAULT_FN_ATTRS 821_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 822 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 823 (__v4sf) __W, 824 (__mmask8) __U); 825} 826 827static __inline__ __m128 __DEFAULT_FN_ATTRS 828_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 829 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 830 (__v4sf) _mm_setzero_ps(), 831 (__mmask8) __U); 832} 833 834static __inline__ __m128 __DEFAULT_FN_ATTRS 835_mm256_cvtepu64_ps (__m256i __A) { 836 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 837 (__v4sf) _mm_setzero_ps(), 838 (__mmask8) -1); 839} 840 841static __inline__ __m128 __DEFAULT_FN_ATTRS 842_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 843 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 844 (__v4sf) __W, 845 (__mmask8) __U); 846} 847 848static __inline__ __m128 __DEFAULT_FN_ATTRS 849_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 850 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 851 (__v4sf) _mm_setzero_ps(), 852 (__mmask8) __U); 853} 854 855#define _mm_range_pd(A, B, C) __extension__ ({ \ 856 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 857 (__v2df)(__m128d)(B), (int)(C), \ 858 (__v2df)_mm_setzero_pd(), \ 859 (__mmask8)-1); }) 860 861#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 862 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 863 (__v2df)(__m128d)(B), (int)(C), \ 864 (__v2df)(__m128d)(W), \ 865 (__mmask8)(U)); }) 866 867#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \ 868 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 869 (__v2df)(__m128d)(B), (int)(C), \ 870 (__v2df)_mm_setzero_pd(), \ 871 (__mmask8)(U)); }) 872 873#define _mm256_range_pd(A, B, C) __extension__ ({ \ 874 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 875 (__v4df)(__m256d)(B), (int)(C), \ 876 (__v4df)_mm256_setzero_pd(), \ 877 (__mmask8)-1); }) 878 879#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 880 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 881 (__v4df)(__m256d)(B), (int)(C), \ 882 (__v4df)(__m256d)(W), \ 883 (__mmask8)(U)); }) 884 885#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \ 886 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 887 (__v4df)(__m256d)(B), (int)(C), \ 888 (__v4df)_mm256_setzero_pd(), \ 889 (__mmask8)(U)); }) 890 891#define _mm_range_ps(A, B, C) __extension__ ({ \ 892 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 893 (__v4sf)(__m128)(B), (int)(C), \ 894 (__v4sf)_mm_setzero_ps(), \ 895 (__mmask8)-1); }) 896 897#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 898 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 899 (__v4sf)(__m128)(B), (int)(C), \ 900 (__v4sf)(__m128)(W), (__mmask8)(U)); }) 901 902#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \ 903 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 904 (__v4sf)(__m128)(B), (int)(C), \ 905 (__v4sf)_mm_setzero_ps(), \ 906 (__mmask8)(U)); }) 907 908#define _mm256_range_ps(A, B, C) __extension__ ({ \ 909 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 910 (__v8sf)(__m256)(B), (int)(C), \ 911 (__v8sf)_mm256_setzero_ps(), \ 912 (__mmask8)-1); }) 913 914#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 915 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 916 (__v8sf)(__m256)(B), (int)(C), \ 917 (__v8sf)(__m256)(W), (__mmask8)(U)); }) 918 919#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \ 920 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 921 (__v8sf)(__m256)(B), (int)(C), \ 922 (__v8sf)_mm256_setzero_ps(), \ 923 (__mmask8)(U)); }) 924 925#define _mm_reduce_pd(A, B) __extension__ ({ \ 926 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 927 (__v2df)_mm_setzero_pd(), \ 928 (__mmask8)-1); }) 929 930#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 931 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 932 (__v2df)(__m128d)(W), \ 933 (__mmask8)(U)); }) 934 935#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \ 936 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 937 (__v2df)_mm_setzero_pd(), \ 938 (__mmask8)(U)); }) 939 940#define _mm256_reduce_pd(A, B) __extension__ ({ \ 941 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 942 (__v4df)_mm256_setzero_pd(), \ 943 (__mmask8)-1); }) 944 945#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 946 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 947 (__v4df)(__m256d)(W), \ 948 (__mmask8)(U)); }) 949 950#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \ 951 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 952 (__v4df)_mm256_setzero_pd(), \ 953 (__mmask8)(U)); }) 954 955#define _mm_reduce_ps(A, B) __extension__ ({ \ 956 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 957 (__v4sf)_mm_setzero_ps(), \ 958 (__mmask8)-1); }) 959 960#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 961 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 962 (__v4sf)(__m128)(W), \ 963 (__mmask8)(U)); }) 964 965#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \ 966 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 967 (__v4sf)_mm_setzero_ps(), \ 968 (__mmask8)(U)); }) 969 970#define _mm256_reduce_ps(A, B) __extension__ ({ \ 971 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 972 (__v8sf)_mm256_setzero_ps(), \ 973 (__mmask8)-1); }) 974 975#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 976 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 977 (__v8sf)(__m256)(W), \ 978 (__mmask8)(U)); }) 979 980#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \ 981 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 982 (__v8sf)_mm256_setzero_ps(), \ 983 (__mmask8)(U)); }) 984 985static __inline__ __mmask8 __DEFAULT_FN_ATTRS 986_mm_movepi32_mask (__m128i __A) 987{ 988 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 989} 990 991static __inline__ __mmask8 __DEFAULT_FN_ATTRS 992_mm256_movepi32_mask (__m256i __A) 993{ 994 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 995} 996 997static __inline__ __m128i __DEFAULT_FN_ATTRS 998_mm_movm_epi32 (__mmask8 __A) 999{ 1000 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 1001} 1002 1003static __inline__ __m256i __DEFAULT_FN_ATTRS 1004_mm256_movm_epi32 (__mmask8 __A) 1005{ 1006 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 1007} 1008 1009static __inline__ __m128i __DEFAULT_FN_ATTRS 1010_mm_movm_epi64 (__mmask8 __A) 1011{ 1012 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 1013} 1014 1015static __inline__ __m256i __DEFAULT_FN_ATTRS 1016_mm256_movm_epi64 (__mmask8 __A) 1017{ 1018 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 1019} 1020 1021static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1022_mm_movepi64_mask (__m128i __A) 1023{ 1024 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 1025} 1026 1027static __inline__ __mmask8 __DEFAULT_FN_ATTRS 1028_mm256_movepi64_mask (__m256i __A) 1029{ 1030 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 1031} 1032 1033static __inline__ __m256 __DEFAULT_FN_ATTRS 1034_mm256_broadcast_f32x2 (__m128 __A) 1035{ 1036 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 1037 (__v8sf)_mm256_undefined_ps(), 1038 (__mmask8) -1); 1039} 1040 1041static __inline__ __m256 __DEFAULT_FN_ATTRS 1042_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 1043{ 1044 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 1045 (__v8sf) __O, 1046 __M); 1047} 1048 1049static __inline__ __m256 __DEFAULT_FN_ATTRS 1050_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 1051{ 1052 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 1053 (__v8sf) _mm256_setzero_ps (), 1054 __M); 1055} 1056 1057static __inline__ __m256d __DEFAULT_FN_ATTRS 1058_mm256_broadcast_f64x2 (__m128d __A) 1059{ 1060 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, 1061 (__v4df)_mm256_undefined_pd(), 1062 (__mmask8) -1); 1063} 1064 1065static __inline__ __m256d __DEFAULT_FN_ATTRS 1066_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) 1067{ 1068 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, 1069 (__v4df) __O, 1070 __M); 1071} 1072 1073static __inline__ __m256d __DEFAULT_FN_ATTRS 1074_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 1075{ 1076 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) __A, 1077 (__v4df) _mm256_setzero_ps (), 1078 __M); 1079} 1080 1081static __inline__ __m128i __DEFAULT_FN_ATTRS 1082_mm_broadcast_i32x2 (__m128i __A) 1083{ 1084 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1085 (__v4si)_mm_undefined_si128(), 1086 (__mmask8) -1); 1087} 1088 1089static __inline__ __m128i __DEFAULT_FN_ATTRS 1090_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 1091{ 1092 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1093 (__v4si) __O, 1094 __M); 1095} 1096 1097static __inline__ __m128i __DEFAULT_FN_ATTRS 1098_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1099{ 1100 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1101 (__v4si) _mm_setzero_si128 (), 1102 __M); 1103} 1104 1105static __inline__ __m256i __DEFAULT_FN_ATTRS 1106_mm256_broadcast_i32x2 (__m128i __A) 1107{ 1108 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1109 (__v8si)_mm256_undefined_si256(), 1110 (__mmask8) -1); 1111} 1112 1113static __inline__ __m256i __DEFAULT_FN_ATTRS 1114_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 1115{ 1116 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1117 (__v8si) __O, 1118 __M); 1119} 1120 1121static __inline__ __m256i __DEFAULT_FN_ATTRS 1122_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1123{ 1124 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1125 (__v8si) _mm256_setzero_si256 (), 1126 __M); 1127} 1128 1129static __inline__ __m256i __DEFAULT_FN_ATTRS 1130_mm256_broadcast_i64x2 (__m128i __A) 1131{ 1132 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, 1133 (__v4di)_mm256_undefined_si256(), 1134 (__mmask8) -1); 1135} 1136 1137static __inline__ __m256i __DEFAULT_FN_ATTRS 1138_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) 1139{ 1140 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, 1141 (__v4di) __O, 1142 __M); 1143} 1144 1145static __inline__ __m256i __DEFAULT_FN_ATTRS 1146_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1147{ 1148 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) __A, 1149 (__v4di) _mm256_setzero_si256 (), 1150 __M); 1151} 1152 1153#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ 1154 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1155 (int)(imm), \ 1156 (__v2df)_mm_setzero_pd(), \ 1157 (__mmask8)-1); }) 1158 1159#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1160 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1161 (int)(imm), \ 1162 (__v2df)(__m128d)(W), \ 1163 (__mmask8)(U)); }) 1164 1165#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1166 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1167 (int)(imm), \ 1168 (__v2df)_mm_setzero_pd(), \ 1169 (__mmask8)(U)); }) 1170 1171#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ 1172 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1173 (int)(imm), \ 1174 (__v2di)_mm_setzero_di(), \ 1175 (__mmask8)-1); }) 1176 1177#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1178 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1179 (int)(imm), \ 1180 (__v2di)(__m128i)(W), \ 1181 (__mmask8)(U)); }) 1182 1183#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1184 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1185 (int)(imm), \ 1186 (__v2di)_mm_setzero_di(), \ 1187 (__mmask8)(U)); }) 1188 1189#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ 1190 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ 1191 (__v2df)(__m128d)(B), \ 1192 (int)(imm), \ 1193 (__v4df)_mm256_setzero_pd(), \ 1194 (__mmask8)-1); }) 1195 1196#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1197 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ 1198 (__v2df)(__m128d)(B), \ 1199 (int)(imm), \ 1200 (__v4df)(__m256d)(W), \ 1201 (__mmask8)(U)); }) 1202 1203#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1204 (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ 1205 (__v2df)(__m128d)(B), \ 1206 (int)(imm), \ 1207 (__v4df)_mm256_setzero_pd(), \ 1208 (__mmask8)(U)); }) 1209 1210#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \ 1211 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \ 1212 (__v2di)(__m128i)(B), \ 1213 (int)(imm), \ 1214 (__v4di)_mm256_setzero_si256(), \ 1215 (__mmask8)-1); }) 1216 1217#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1218 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \ 1219 (__v2di)(__m128i)(B), \ 1220 (int)(imm), \ 1221 (__v4di)(__m256i)(W), \ 1222 (__mmask8)(U)); }) 1223 1224#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1225 (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \ 1226 (__v2di)(__m128i)(B), \ 1227 (int)(imm), \ 1228 (__v4di)_mm256_setzero_si256(), \ 1229 (__mmask8)(U)); }) 1230 1231#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1232 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1233 (__mmask8)(U)); }) 1234 1235#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \ 1236 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1237 (__mmask8)-1); }) 1238 1239#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1240 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1241 (__mmask8)(U)); }) 1242 1243#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \ 1244 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1245 (__mmask8)-1); }) 1246 1247#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1248 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1249 (__mmask8)(U)); }) 1250 1251#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \ 1252 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1253 (__mmask8)-1); }) 1254 1255#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1256 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1257 (__mmask8)(U)); }) 1258 1259#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \ 1260 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1261 (__mmask8)-1); }) 1262 1263#undef __DEFAULT_FN_ATTRS 1264 1265#endif 1266