1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLDQINTRIN_H 29#define __AVX512VLDQINTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) 33 34static __inline__ __m256i __DEFAULT_FN_ATTRS 35_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 36 return (__m256i) ((__v4du) __A * (__v4du) __B); 37} 38 39static __inline__ __m256i __DEFAULT_FN_ATTRS 40_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 41 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 42 (__v4di)_mm256_mullo_epi64(__A, __B), 43 (__v4di)__W); 44} 45 46static __inline__ __m256i __DEFAULT_FN_ATTRS 47_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { 48 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 49 (__v4di)_mm256_mullo_epi64(__A, __B), 50 (__v4di)_mm256_setzero_si256()); 51} 52 53static __inline__ __m128i __DEFAULT_FN_ATTRS 54_mm_mullo_epi64 (__m128i __A, __m128i __B) { 55 return (__m128i) ((__v2du) __A * (__v2du) __B); 56} 57 58static __inline__ __m128i __DEFAULT_FN_ATTRS 59_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 60 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 61 (__v2di)_mm_mullo_epi64(__A, __B), 62 (__v2di)__W); 63} 64 65static __inline__ __m128i __DEFAULT_FN_ATTRS 66_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { 67 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 68 (__v2di)_mm_mullo_epi64(__A, __B), 69 (__v2di)_mm_setzero_si128()); 70} 71 72static __inline__ __m256d __DEFAULT_FN_ATTRS 73_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 74 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 75 (__v4df)_mm256_andnot_pd(__A, __B), 76 (__v4df)__W); 77} 78 79static __inline__ __m256d __DEFAULT_FN_ATTRS 80_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { 81 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 82 (__v4df)_mm256_andnot_pd(__A, __B), 83 (__v4df)_mm256_setzero_pd()); 84} 85 86static __inline__ __m128d __DEFAULT_FN_ATTRS 87_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 88 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 89 (__v2df)_mm_andnot_pd(__A, __B), 90 (__v2df)__W); 91} 92 93static __inline__ __m128d __DEFAULT_FN_ATTRS 94_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { 95 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 96 (__v2df)_mm_andnot_pd(__A, __B), 97 (__v2df)_mm_setzero_pd()); 98} 99 100static __inline__ __m256 __DEFAULT_FN_ATTRS 101_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 102 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 103 (__v8sf)_mm256_andnot_ps(__A, __B), 104 (__v8sf)__W); 105} 106 107static __inline__ __m256 __DEFAULT_FN_ATTRS 108_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { 109 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 110 (__v8sf)_mm256_andnot_ps(__A, __B), 111 (__v8sf)_mm256_setzero_ps()); 112} 113 114static __inline__ __m128 __DEFAULT_FN_ATTRS 115_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 116 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 117 (__v4sf)_mm_andnot_ps(__A, __B), 118 (__v4sf)__W); 119} 120 121static __inline__ __m128 __DEFAULT_FN_ATTRS 122_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { 123 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 124 (__v4sf)_mm_andnot_ps(__A, __B), 125 (__v4sf)_mm_setzero_ps()); 126} 127 128static __inline__ __m256d __DEFAULT_FN_ATTRS 129_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 130 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 131 (__v4df)_mm256_and_pd(__A, __B), 132 (__v4df)__W); 133} 134 135static __inline__ __m256d __DEFAULT_FN_ATTRS 136_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { 137 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 138 (__v4df)_mm256_and_pd(__A, __B), 139 (__v4df)_mm256_setzero_pd()); 140} 141 142static __inline__ __m128d __DEFAULT_FN_ATTRS 143_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 144 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 145 (__v2df)_mm_and_pd(__A, __B), 146 (__v2df)__W); 147} 148 149static __inline__ __m128d __DEFAULT_FN_ATTRS 150_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { 151 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 152 (__v2df)_mm_and_pd(__A, __B), 153 (__v2df)_mm_setzero_pd()); 154} 155 156static __inline__ __m256 __DEFAULT_FN_ATTRS 157_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 158 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 159 (__v8sf)_mm256_and_ps(__A, __B), 160 (__v8sf)__W); 161} 162 163static __inline__ __m256 __DEFAULT_FN_ATTRS 164_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { 165 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 166 (__v8sf)_mm256_and_ps(__A, __B), 167 (__v8sf)_mm256_setzero_ps()); 168} 169 170static __inline__ __m128 __DEFAULT_FN_ATTRS 171_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 172 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 173 (__v4sf)_mm_and_ps(__A, __B), 174 (__v4sf)__W); 175} 176 177static __inline__ __m128 __DEFAULT_FN_ATTRS 178_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { 179 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 180 (__v4sf)_mm_and_ps(__A, __B), 181 (__v4sf)_mm_setzero_ps()); 182} 183 184static __inline__ __m256d __DEFAULT_FN_ATTRS 185_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 186 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 187 (__v4df)_mm256_xor_pd(__A, __B), 188 (__v4df)__W); 189} 190 191static __inline__ __m256d __DEFAULT_FN_ATTRS 192_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { 193 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 194 (__v4df)_mm256_xor_pd(__A, __B), 195 (__v4df)_mm256_setzero_pd()); 196} 197 198static __inline__ __m128d __DEFAULT_FN_ATTRS 199_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 200 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 201 (__v2df)_mm_xor_pd(__A, __B), 202 (__v2df)__W); 203} 204 205static __inline__ __m128d __DEFAULT_FN_ATTRS 206_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 207 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 208 (__v2df)_mm_xor_pd(__A, __B), 209 (__v2df)_mm_setzero_pd()); 210} 211 212static __inline__ __m256 __DEFAULT_FN_ATTRS 213_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 214 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 215 (__v8sf)_mm256_xor_ps(__A, __B), 216 (__v8sf)__W); 217} 218 219static __inline__ __m256 __DEFAULT_FN_ATTRS 220_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { 221 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 222 (__v8sf)_mm256_xor_ps(__A, __B), 223 (__v8sf)_mm256_setzero_ps()); 224} 225 226static __inline__ __m128 __DEFAULT_FN_ATTRS 227_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 228 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 229 (__v4sf)_mm_xor_ps(__A, __B), 230 (__v4sf)__W); 231} 232 233static __inline__ __m128 __DEFAULT_FN_ATTRS 234_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { 235 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 236 (__v4sf)_mm_xor_ps(__A, __B), 237 (__v4sf)_mm_setzero_ps()); 238} 239 240static __inline__ __m256d __DEFAULT_FN_ATTRS 241_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 242 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 243 (__v4df)_mm256_or_pd(__A, __B), 244 (__v4df)__W); 245} 246 247static __inline__ __m256d __DEFAULT_FN_ATTRS 248_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { 249 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 250 (__v4df)_mm256_or_pd(__A, __B), 251 (__v4df)_mm256_setzero_pd()); 252} 253 254static __inline__ __m128d __DEFAULT_FN_ATTRS 255_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 256 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 257 (__v2df)_mm_or_pd(__A, __B), 258 (__v2df)__W); 259} 260 261static __inline__ __m128d __DEFAULT_FN_ATTRS 262_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { 263 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 264 (__v2df)_mm_or_pd(__A, __B), 265 (__v2df)_mm_setzero_pd()); 266} 267 268static __inline__ __m256 __DEFAULT_FN_ATTRS 269_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 270 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 271 (__v8sf)_mm256_or_ps(__A, __B), 272 (__v8sf)__W); 273} 274 275static __inline__ __m256 __DEFAULT_FN_ATTRS 276_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { 277 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 278 (__v8sf)_mm256_or_ps(__A, __B), 279 (__v8sf)_mm256_setzero_ps()); 280} 281 282static __inline__ __m128 __DEFAULT_FN_ATTRS 283_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 284 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 285 (__v4sf)_mm_or_ps(__A, __B), 286 (__v4sf)__W); 287} 288 289static __inline__ __m128 __DEFAULT_FN_ATTRS 290_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { 291 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 292 (__v4sf)_mm_or_ps(__A, __B), 293 (__v4sf)_mm_setzero_ps()); 294} 295 296static __inline__ __m128i __DEFAULT_FN_ATTRS 297_mm_cvtpd_epi64 (__m128d __A) { 298 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 299 (__v2di) _mm_setzero_si128(), 300 (__mmask8) -1); 301} 302 303static __inline__ __m128i __DEFAULT_FN_ATTRS 304_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 305 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 306 (__v2di) __W, 307 (__mmask8) __U); 308} 309 310static __inline__ __m128i __DEFAULT_FN_ATTRS 311_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 312 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 313 (__v2di) _mm_setzero_si128(), 314 (__mmask8) __U); 315} 316 317static __inline__ __m256i __DEFAULT_FN_ATTRS 318_mm256_cvtpd_epi64 (__m256d __A) { 319 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 320 (__v4di) _mm256_setzero_si256(), 321 (__mmask8) -1); 322} 323 324static __inline__ __m256i __DEFAULT_FN_ATTRS 325_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 326 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 327 (__v4di) __W, 328 (__mmask8) __U); 329} 330 331static __inline__ __m256i __DEFAULT_FN_ATTRS 332_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 333 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 334 (__v4di) _mm256_setzero_si256(), 335 (__mmask8) __U); 336} 337 338static __inline__ __m128i __DEFAULT_FN_ATTRS 339_mm_cvtpd_epu64 (__m128d __A) { 340 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 341 (__v2di) _mm_setzero_si128(), 342 (__mmask8) -1); 343} 344 345static __inline__ __m128i __DEFAULT_FN_ATTRS 346_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 347 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 348 (__v2di) __W, 349 (__mmask8) __U); 350} 351 352static __inline__ __m128i __DEFAULT_FN_ATTRS 353_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 354 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 355 (__v2di) _mm_setzero_si128(), 356 (__mmask8) __U); 357} 358 359static __inline__ __m256i __DEFAULT_FN_ATTRS 360_mm256_cvtpd_epu64 (__m256d __A) { 361 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 362 (__v4di) _mm256_setzero_si256(), 363 (__mmask8) -1); 364} 365 366static __inline__ __m256i __DEFAULT_FN_ATTRS 367_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 368 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 369 (__v4di) __W, 370 (__mmask8) __U); 371} 372 373static __inline__ __m256i __DEFAULT_FN_ATTRS 374_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 375 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 376 (__v4di) _mm256_setzero_si256(), 377 (__mmask8) __U); 378} 379 380static __inline__ __m128i __DEFAULT_FN_ATTRS 381_mm_cvtps_epi64 (__m128 __A) { 382 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 383 (__v2di) _mm_setzero_si128(), 384 (__mmask8) -1); 385} 386 387static __inline__ __m128i __DEFAULT_FN_ATTRS 388_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 389 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 390 (__v2di) __W, 391 (__mmask8) __U); 392} 393 394static __inline__ __m128i __DEFAULT_FN_ATTRS 395_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 396 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 397 (__v2di) _mm_setzero_si128(), 398 (__mmask8) __U); 399} 400 401static __inline__ __m256i __DEFAULT_FN_ATTRS 402_mm256_cvtps_epi64 (__m128 __A) { 403 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 404 (__v4di) _mm256_setzero_si256(), 405 (__mmask8) -1); 406} 407 408static __inline__ __m256i __DEFAULT_FN_ATTRS 409_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 410 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 411 (__v4di) __W, 412 (__mmask8) __U); 413} 414 415static __inline__ __m256i __DEFAULT_FN_ATTRS 416_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 417 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 418 (__v4di) _mm256_setzero_si256(), 419 (__mmask8) __U); 420} 421 422static __inline__ __m128i __DEFAULT_FN_ATTRS 423_mm_cvtps_epu64 (__m128 __A) { 424 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 425 (__v2di) _mm_setzero_si128(), 426 (__mmask8) -1); 427} 428 429static __inline__ __m128i __DEFAULT_FN_ATTRS 430_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 431 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 432 (__v2di) __W, 433 (__mmask8) __U); 434} 435 436static __inline__ __m128i __DEFAULT_FN_ATTRS 437_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 438 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 439 (__v2di) _mm_setzero_si128(), 440 (__mmask8) __U); 441} 442 443static __inline__ __m256i __DEFAULT_FN_ATTRS 444_mm256_cvtps_epu64 (__m128 __A) { 445 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 446 (__v4di) _mm256_setzero_si256(), 447 (__mmask8) -1); 448} 449 450static __inline__ __m256i __DEFAULT_FN_ATTRS 451_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 452 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 453 (__v4di) __W, 454 (__mmask8) __U); 455} 456 457static __inline__ __m256i __DEFAULT_FN_ATTRS 458_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 459 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 460 (__v4di) _mm256_setzero_si256(), 461 (__mmask8) __U); 462} 463 464static __inline__ __m128d __DEFAULT_FN_ATTRS 465_mm_cvtepi64_pd (__m128i __A) { 466 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 467 (__v2df) _mm_setzero_pd(), 468 (__mmask8) -1); 469} 470 471static __inline__ __m128d __DEFAULT_FN_ATTRS 472_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 473 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 474 (__v2df) __W, 475 (__mmask8) __U); 476} 477 478static __inline__ __m128d __DEFAULT_FN_ATTRS 479_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 480 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 481 (__v2df) _mm_setzero_pd(), 482 (__mmask8) __U); 483} 484 485static __inline__ __m256d __DEFAULT_FN_ATTRS 486_mm256_cvtepi64_pd (__m256i __A) { 487 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 488 (__v4df) _mm256_setzero_pd(), 489 (__mmask8) -1); 490} 491 492static __inline__ __m256d __DEFAULT_FN_ATTRS 493_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 494 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 495 (__v4df) __W, 496 (__mmask8) __U); 497} 498 499static __inline__ __m256d __DEFAULT_FN_ATTRS 500_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 501 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 502 (__v4df) _mm256_setzero_pd(), 503 (__mmask8) __U); 504} 505 506static __inline__ __m128 __DEFAULT_FN_ATTRS 507_mm_cvtepi64_ps (__m128i __A) { 508 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 509 (__v4sf) _mm_setzero_ps(), 510 (__mmask8) -1); 511} 512 513static __inline__ __m128 __DEFAULT_FN_ATTRS 514_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 515 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 516 (__v4sf) __W, 517 (__mmask8) __U); 518} 519 520static __inline__ __m128 __DEFAULT_FN_ATTRS 521_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 522 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 523 (__v4sf) _mm_setzero_ps(), 524 (__mmask8) __U); 525} 526 527static __inline__ __m128 __DEFAULT_FN_ATTRS 528_mm256_cvtepi64_ps (__m256i __A) { 529 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 530 (__v4sf) _mm_setzero_ps(), 531 (__mmask8) -1); 532} 533 534static __inline__ __m128 __DEFAULT_FN_ATTRS 535_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 536 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 537 (__v4sf) __W, 538 (__mmask8) __U); 539} 540 541static __inline__ __m128 __DEFAULT_FN_ATTRS 542_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 543 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 544 (__v4sf) _mm_setzero_ps(), 545 (__mmask8) __U); 546} 547 548static __inline__ __m128i __DEFAULT_FN_ATTRS 549_mm_cvttpd_epi64 (__m128d __A) { 550 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 551 (__v2di) _mm_setzero_si128(), 552 (__mmask8) -1); 553} 554 555static __inline__ __m128i __DEFAULT_FN_ATTRS 556_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 557 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 558 (__v2di) __W, 559 (__mmask8) __U); 560} 561 562static __inline__ __m128i __DEFAULT_FN_ATTRS 563_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 564 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 565 (__v2di) _mm_setzero_si128(), 566 (__mmask8) __U); 567} 568 569static __inline__ __m256i __DEFAULT_FN_ATTRS 570_mm256_cvttpd_epi64 (__m256d __A) { 571 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 572 (__v4di) _mm256_setzero_si256(), 573 (__mmask8) -1); 574} 575 576static __inline__ __m256i __DEFAULT_FN_ATTRS 577_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 578 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 579 (__v4di) __W, 580 (__mmask8) __U); 581} 582 583static __inline__ __m256i __DEFAULT_FN_ATTRS 584_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 585 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 586 (__v4di) _mm256_setzero_si256(), 587 (__mmask8) __U); 588} 589 590static __inline__ __m128i __DEFAULT_FN_ATTRS 591_mm_cvttpd_epu64 (__m128d __A) { 592 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 593 (__v2di) _mm_setzero_si128(), 594 (__mmask8) -1); 595} 596 597static __inline__ __m128i __DEFAULT_FN_ATTRS 598_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 599 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 600 (__v2di) __W, 601 (__mmask8) __U); 602} 603 604static __inline__ __m128i __DEFAULT_FN_ATTRS 605_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 606 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 607 (__v2di) _mm_setzero_si128(), 608 (__mmask8) __U); 609} 610 611static __inline__ __m256i __DEFAULT_FN_ATTRS 612_mm256_cvttpd_epu64 (__m256d __A) { 613 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 614 (__v4di) _mm256_setzero_si256(), 615 (__mmask8) -1); 616} 617 618static __inline__ __m256i __DEFAULT_FN_ATTRS 619_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 620 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 621 (__v4di) __W, 622 (__mmask8) __U); 623} 624 625static __inline__ __m256i __DEFAULT_FN_ATTRS 626_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 627 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 628 (__v4di) _mm256_setzero_si256(), 629 (__mmask8) __U); 630} 631 632static __inline__ __m128i __DEFAULT_FN_ATTRS 633_mm_cvttps_epi64 (__m128 __A) { 634 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 635 (__v2di) _mm_setzero_si128(), 636 (__mmask8) -1); 637} 638 639static __inline__ __m128i __DEFAULT_FN_ATTRS 640_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 641 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 642 (__v2di) __W, 643 (__mmask8) __U); 644} 645 646static __inline__ __m128i __DEFAULT_FN_ATTRS 647_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 648 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 649 (__v2di) _mm_setzero_si128(), 650 (__mmask8) __U); 651} 652 653static __inline__ __m256i __DEFAULT_FN_ATTRS 654_mm256_cvttps_epi64 (__m128 __A) { 655 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 656 (__v4di) _mm256_setzero_si256(), 657 (__mmask8) -1); 658} 659 660static __inline__ __m256i __DEFAULT_FN_ATTRS 661_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 662 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 663 (__v4di) __W, 664 (__mmask8) __U); 665} 666 667static __inline__ __m256i __DEFAULT_FN_ATTRS 668_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 669 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 670 (__v4di) _mm256_setzero_si256(), 671 (__mmask8) __U); 672} 673 674static __inline__ __m128i __DEFAULT_FN_ATTRS 675_mm_cvttps_epu64 (__m128 __A) { 676 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 677 (__v2di) _mm_setzero_si128(), 678 (__mmask8) -1); 679} 680 681static __inline__ __m128i __DEFAULT_FN_ATTRS 682_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 683 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 684 (__v2di) __W, 685 (__mmask8) __U); 686} 687 688static __inline__ __m128i __DEFAULT_FN_ATTRS 689_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 690 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 691 (__v2di) _mm_setzero_si128(), 692 (__mmask8) __U); 693} 694 695static __inline__ __m256i __DEFAULT_FN_ATTRS 696_mm256_cvttps_epu64 (__m128 __A) { 697 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 698 (__v4di) _mm256_setzero_si256(), 699 (__mmask8) -1); 700} 701 702static __inline__ __m256i __DEFAULT_FN_ATTRS 703_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 704 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 705 (__v4di) __W, 706 (__mmask8) __U); 707} 708 709static __inline__ __m256i __DEFAULT_FN_ATTRS 710_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 711 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 712 (__v4di) _mm256_setzero_si256(), 713 (__mmask8) __U); 714} 715 716static __inline__ __m128d __DEFAULT_FN_ATTRS 717_mm_cvtepu64_pd (__m128i __A) { 718 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 719 (__v2df) _mm_setzero_pd(), 720 (__mmask8) -1); 721} 722 723static __inline__ __m128d __DEFAULT_FN_ATTRS 724_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 725 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 726 (__v2df) __W, 727 (__mmask8) __U); 728} 729 730static __inline__ __m128d __DEFAULT_FN_ATTRS 731_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 732 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 733 (__v2df) _mm_setzero_pd(), 734 (__mmask8) __U); 735} 736 737static __inline__ __m256d __DEFAULT_FN_ATTRS 738_mm256_cvtepu64_pd (__m256i __A) { 739 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 740 (__v4df) _mm256_setzero_pd(), 741 (__mmask8) -1); 742} 743 744static __inline__ __m256d __DEFAULT_FN_ATTRS 745_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 746 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 747 (__v4df) __W, 748 (__mmask8) __U); 749} 750 751static __inline__ __m256d __DEFAULT_FN_ATTRS 752_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 753 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 754 (__v4df) _mm256_setzero_pd(), 755 (__mmask8) __U); 756} 757 758static __inline__ __m128 __DEFAULT_FN_ATTRS 759_mm_cvtepu64_ps (__m128i __A) { 760 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 761 (__v4sf) _mm_setzero_ps(), 762 (__mmask8) -1); 763} 764 765static __inline__ __m128 __DEFAULT_FN_ATTRS 766_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 767 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 768 (__v4sf) __W, 769 (__mmask8) __U); 770} 771 772static __inline__ __m128 __DEFAULT_FN_ATTRS 773_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 774 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 775 (__v4sf) _mm_setzero_ps(), 776 (__mmask8) __U); 777} 778 779static __inline__ __m128 __DEFAULT_FN_ATTRS 780_mm256_cvtepu64_ps (__m256i __A) { 781 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 782 (__v4sf) _mm_setzero_ps(), 783 (__mmask8) -1); 784} 785 786static __inline__ __m128 __DEFAULT_FN_ATTRS 787_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 788 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 789 (__v4sf) __W, 790 (__mmask8) __U); 791} 792 793static __inline__ __m128 __DEFAULT_FN_ATTRS 794_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 795 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 796 (__v4sf) _mm_setzero_ps(), 797 (__mmask8) __U); 798} 799 800#define _mm_range_pd(A, B, C) __extension__ ({ \ 801 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 802 (__v2df)(__m128d)(B), (int)(C), \ 803 (__v2df)_mm_setzero_pd(), \ 804 (__mmask8)-1); }) 805 806#define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 807 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 808 (__v2df)(__m128d)(B), (int)(C), \ 809 (__v2df)(__m128d)(W), \ 810 (__mmask8)(U)); }) 811 812#define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \ 813 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 814 (__v2df)(__m128d)(B), (int)(C), \ 815 (__v2df)_mm_setzero_pd(), \ 816 (__mmask8)(U)); }) 817 818#define _mm256_range_pd(A, B, C) __extension__ ({ \ 819 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 820 (__v4df)(__m256d)(B), (int)(C), \ 821 (__v4df)_mm256_setzero_pd(), \ 822 (__mmask8)-1); }) 823 824#define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \ 825 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 826 (__v4df)(__m256d)(B), (int)(C), \ 827 (__v4df)(__m256d)(W), \ 828 (__mmask8)(U)); }) 829 830#define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \ 831 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 832 (__v4df)(__m256d)(B), (int)(C), \ 833 (__v4df)_mm256_setzero_pd(), \ 834 (__mmask8)(U)); }) 835 836#define _mm_range_ps(A, B, C) __extension__ ({ \ 837 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 838 (__v4sf)(__m128)(B), (int)(C), \ 839 (__v4sf)_mm_setzero_ps(), \ 840 (__mmask8)-1); }) 841 842#define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 843 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 844 (__v4sf)(__m128)(B), (int)(C), \ 845 (__v4sf)(__m128)(W), (__mmask8)(U)); }) 846 847#define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \ 848 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 849 (__v4sf)(__m128)(B), (int)(C), \ 850 (__v4sf)_mm_setzero_ps(), \ 851 (__mmask8)(U)); }) 852 853#define _mm256_range_ps(A, B, C) __extension__ ({ \ 854 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 855 (__v8sf)(__m256)(B), (int)(C), \ 856 (__v8sf)_mm256_setzero_ps(), \ 857 (__mmask8)-1); }) 858 859#define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \ 860 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 861 (__v8sf)(__m256)(B), (int)(C), \ 862 (__v8sf)(__m256)(W), (__mmask8)(U)); }) 863 864#define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \ 865 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 866 (__v8sf)(__m256)(B), (int)(C), \ 867 (__v8sf)_mm256_setzero_ps(), \ 868 (__mmask8)(U)); }) 869 870#define _mm_reduce_pd(A, B) __extension__ ({ \ 871 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 872 (__v2df)_mm_setzero_pd(), \ 873 (__mmask8)-1); }) 874 875#define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 876 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 877 (__v2df)(__m128d)(W), \ 878 (__mmask8)(U)); }) 879 880#define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \ 881 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 882 (__v2df)_mm_setzero_pd(), \ 883 (__mmask8)(U)); }) 884 885#define _mm256_reduce_pd(A, B) __extension__ ({ \ 886 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 887 (__v4df)_mm256_setzero_pd(), \ 888 (__mmask8)-1); }) 889 890#define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \ 891 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 892 (__v4df)(__m256d)(W), \ 893 (__mmask8)(U)); }) 894 895#define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \ 896 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 897 (__v4df)_mm256_setzero_pd(), \ 898 (__mmask8)(U)); }) 899 900#define _mm_reduce_ps(A, B) __extension__ ({ \ 901 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 902 (__v4sf)_mm_setzero_ps(), \ 903 (__mmask8)-1); }) 904 905#define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 906 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 907 (__v4sf)(__m128)(W), \ 908 (__mmask8)(U)); }) 909 910#define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \ 911 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 912 (__v4sf)_mm_setzero_ps(), \ 913 (__mmask8)(U)); }) 914 915#define _mm256_reduce_ps(A, B) __extension__ ({ \ 916 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 917 (__v8sf)_mm256_setzero_ps(), \ 918 (__mmask8)-1); }) 919 920#define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \ 921 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 922 (__v8sf)(__m256)(W), \ 923 (__mmask8)(U)); }) 924 925#define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \ 926 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 927 (__v8sf)_mm256_setzero_ps(), \ 928 (__mmask8)(U)); }) 929 930static __inline__ __mmask8 __DEFAULT_FN_ATTRS 931_mm_movepi32_mask (__m128i __A) 932{ 933 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 934} 935 936static __inline__ __mmask8 __DEFAULT_FN_ATTRS 937_mm256_movepi32_mask (__m256i __A) 938{ 939 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 940} 941 942static __inline__ __m128i __DEFAULT_FN_ATTRS 943_mm_movm_epi32 (__mmask8 __A) 944{ 945 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 946} 947 948static __inline__ __m256i __DEFAULT_FN_ATTRS 949_mm256_movm_epi32 (__mmask8 __A) 950{ 951 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 952} 953 954static __inline__ __m128i __DEFAULT_FN_ATTRS 955_mm_movm_epi64 (__mmask8 __A) 956{ 957 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 958} 959 960static __inline__ __m256i __DEFAULT_FN_ATTRS 961_mm256_movm_epi64 (__mmask8 __A) 962{ 963 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 964} 965 966static __inline__ __mmask8 __DEFAULT_FN_ATTRS 967_mm_movepi64_mask (__m128i __A) 968{ 969 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 970} 971 972static __inline__ __mmask8 __DEFAULT_FN_ATTRS 973_mm256_movepi64_mask (__m256i __A) 974{ 975 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 976} 977 978static __inline__ __m256 __DEFAULT_FN_ATTRS 979_mm256_broadcast_f32x2 (__m128 __A) 980{ 981 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 982 (__v8sf)_mm256_undefined_ps(), 983 (__mmask8) -1); 984} 985 986static __inline__ __m256 __DEFAULT_FN_ATTRS 987_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 988{ 989 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 990 (__v8sf) __O, 991 __M); 992} 993 994static __inline__ __m256 __DEFAULT_FN_ATTRS 995_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 996{ 997 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 998 (__v8sf) _mm256_setzero_ps (), 999 __M); 1000} 1001 1002static __inline__ __m256d __DEFAULT_FN_ATTRS 1003_mm256_broadcast_f64x2(__m128d __A) 1004{ 1005 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 1006 0, 1, 0, 1); 1007} 1008 1009static __inline__ __m256d __DEFAULT_FN_ATTRS 1010_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) 1011{ 1012 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 1013 (__v4df)_mm256_broadcast_f64x2(__A), 1014 (__v4df)__O); 1015} 1016 1017static __inline__ __m256d __DEFAULT_FN_ATTRS 1018_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 1019{ 1020 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 1021 (__v4df)_mm256_broadcast_f64x2(__A), 1022 (__v4df)_mm256_setzero_pd()); 1023} 1024 1025static __inline__ __m128i __DEFAULT_FN_ATTRS 1026_mm_broadcast_i32x2 (__m128i __A) 1027{ 1028 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1029 (__v4si)_mm_undefined_si128(), 1030 (__mmask8) -1); 1031} 1032 1033static __inline__ __m128i __DEFAULT_FN_ATTRS 1034_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 1035{ 1036 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1037 (__v4si) __O, 1038 __M); 1039} 1040 1041static __inline__ __m128i __DEFAULT_FN_ATTRS 1042_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1043{ 1044 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, 1045 (__v4si) _mm_setzero_si128 (), 1046 __M); 1047} 1048 1049static __inline__ __m256i __DEFAULT_FN_ATTRS 1050_mm256_broadcast_i32x2 (__m128i __A) 1051{ 1052 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1053 (__v8si)_mm256_undefined_si256(), 1054 (__mmask8) -1); 1055} 1056 1057static __inline__ __m256i __DEFAULT_FN_ATTRS 1058_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 1059{ 1060 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1061 (__v8si) __O, 1062 __M); 1063} 1064 1065static __inline__ __m256i __DEFAULT_FN_ATTRS 1066_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1067{ 1068 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, 1069 (__v8si) _mm256_setzero_si256 (), 1070 __M); 1071} 1072 1073static __inline__ __m256i __DEFAULT_FN_ATTRS 1074_mm256_broadcast_i64x2(__m128i __A) 1075{ 1076 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1077 0, 1, 0, 1); 1078} 1079 1080static __inline__ __m256i __DEFAULT_FN_ATTRS 1081_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) 1082{ 1083 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1084 (__v4di)_mm256_broadcast_i64x2(__A), 1085 (__v4di)__O); 1086} 1087 1088static __inline__ __m256i __DEFAULT_FN_ATTRS 1089_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1090{ 1091 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1092 (__v4di)_mm256_broadcast_i64x2(__A), 1093 (__v4di)_mm256_setzero_si256()); 1094} 1095 1096#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ 1097 (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \ 1098 (__v4df)_mm256_undefined_pd(), \ 1099 ((imm) & 1) ? 2 : 0, \ 1100 ((imm) & 1) ? 3 : 1); }) 1101 1102#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ 1103 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1104 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ 1105 (__v2df)(W)); }) 1106 1107#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ 1108 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 1109 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ 1110 (__v2df)_mm_setzero_pd()); }) 1111 1112#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ 1113 (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \ 1114 (__v4di)_mm256_undefined_si256(), \ 1115 ((imm) & 1) ? 2 : 0, \ 1116 ((imm) & 1) ? 3 : 1); }) 1117 1118#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ 1119 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 1120 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ 1121 (__v2di)(W)); }) 1122 1123#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ 1124 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 1125 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ 1126 (__v2di)_mm_setzero_di()); }) 1127 1128#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ 1129 (__m256d)__builtin_shufflevector((__v4df)(A), \ 1130 (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \ 1131 ((imm) & 0x1) ? 0 : 4, \ 1132 ((imm) & 0x1) ? 1 : 5, \ 1133 ((imm) & 0x1) ? 4 : 2, \ 1134 ((imm) & 0x1) ? 5 : 3); }) 1135 1136#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \ 1137 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1138 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1139 (__v4df)(W)); }) 1140 1141#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \ 1142 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1143 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1144 (__v4df)_mm256_setzero_pd()); }) 1145 1146#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \ 1147 (__m256i)__builtin_shufflevector((__v4di)(A), \ 1148 (__v4di)_mm256_castsi128_si256((__m128i)(B)), \ 1149 ((imm) & 0x1) ? 0 : 4, \ 1150 ((imm) & 0x1) ? 1 : 5, \ 1151 ((imm) & 0x1) ? 4 : 2, \ 1152 ((imm) & 0x1) ? 5 : 3); }) 1153 1154#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \ 1155 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1156 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1157 (__v4di)(W)); }) 1158 1159#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \ 1160 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1161 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1162 (__v4di)_mm256_setzero_si256()); }) 1163 1164#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1165 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1166 (__mmask8)(U)); }) 1167 1168#define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \ 1169 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1170 (__mmask8)-1); }) 1171 1172#define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \ 1173 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1174 (__mmask8)(U)); }) 1175 1176#define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \ 1177 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1178 (__mmask8)-1); }) 1179 1180#define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1181 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1182 (__mmask8)(U)); }) 1183 1184#define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \ 1185 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1186 (__mmask8)-1); }) 1187 1188#define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \ 1189 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1190 (__mmask8)(U)); }) 1191 1192#define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \ 1193 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1194 (__mmask8)-1); }) 1195 1196#undef __DEFAULT_FN_ATTRS 1197 1198#endif 1199