1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLINTRIN_H 29#define __AVX512VLINTRIN_H 30 31#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) 32 33/* Doesn't require avx512vl, used in avx512dqintrin.h */ 34static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 35_mm_setzero_di(void) { 36 return (__m128i)(__v2di){ 0LL, 0LL}; 37} 38 39/* Integer compare */ 40 41static __inline__ __mmask8 __DEFAULT_FN_ATTRS 42_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { 43 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 44 (__mmask8)-1); 45} 46 47static __inline__ __mmask8 __DEFAULT_FN_ATTRS 48_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 49 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 50 __u); 51} 52 53static __inline__ __mmask8 __DEFAULT_FN_ATTRS 54_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { 55 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 56 (__mmask8)-1); 57} 58 59static __inline__ __mmask8 __DEFAULT_FN_ATTRS 60_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 61 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 62 __u); 63} 64 65static __inline__ __mmask8 __DEFAULT_FN_ATTRS 66_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { 67 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 68 (__mmask8)-1); 69} 70 71static __inline__ __mmask8 __DEFAULT_FN_ATTRS 72_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 73 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 74 __u); 75} 76 77static __inline__ __mmask8 __DEFAULT_FN_ATTRS 78_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { 79 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 80 (__mmask8)-1); 81} 82 83static __inline__ __mmask8 __DEFAULT_FN_ATTRS 84_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 85 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 86 __u); 87} 88 89static __inline__ __mmask8 __DEFAULT_FN_ATTRS 90_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { 91 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 92 (__mmask8)-1); 93} 94 95static __inline__ __mmask8 __DEFAULT_FN_ATTRS 96_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 97 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 98 __u); 99} 100 101static __inline__ __mmask8 __DEFAULT_FN_ATTRS 102_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { 103 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 104 (__mmask8)-1); 105} 106 107static __inline__ __mmask8 __DEFAULT_FN_ATTRS 108_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 109 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 110 __u); 111} 112 113static __inline__ __mmask8 __DEFAULT_FN_ATTRS 114_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { 115 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 116 (__mmask8)-1); 117} 118 119static __inline__ __mmask8 __DEFAULT_FN_ATTRS 120_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 121 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 122 __u); 123} 124 125static __inline__ __mmask8 __DEFAULT_FN_ATTRS 126_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { 127 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 128 (__mmask8)-1); 129} 130 131static __inline__ __mmask8 __DEFAULT_FN_ATTRS 132_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 133 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 134 __u); 135} 136 137 138static __inline__ __mmask8 __DEFAULT_FN_ATTRS 139_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { 140 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 141 (__mmask8)-1); 142} 143 144static __inline__ __mmask8 __DEFAULT_FN_ATTRS 145_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 146 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 147 __u); 148} 149 150static __inline__ __mmask8 __DEFAULT_FN_ATTRS 151_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { 152 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 153 (__mmask8)-1); 154} 155 156static __inline__ __mmask8 __DEFAULT_FN_ATTRS 157_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 158 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 159 __u); 160} 161 162static __inline__ __mmask8 __DEFAULT_FN_ATTRS 163_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { 164 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 165 (__mmask8)-1); 166} 167 168static __inline__ __mmask8 __DEFAULT_FN_ATTRS 169_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 170 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 171 __u); 172} 173 174static __inline__ __mmask8 __DEFAULT_FN_ATTRS 175_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { 176 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 177 (__mmask8)-1); 178} 179 180static __inline__ __mmask8 __DEFAULT_FN_ATTRS 181_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 182 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 183 __u); 184} 185 186static __inline__ __mmask8 __DEFAULT_FN_ATTRS 187_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { 188 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 189 (__mmask8)-1); 190} 191 192static __inline__ __mmask8 __DEFAULT_FN_ATTRS 193_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 194 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 195 __u); 196} 197 198static __inline__ __mmask8 __DEFAULT_FN_ATTRS 199_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { 200 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 201 (__mmask8)-1); 202} 203 204static __inline__ __mmask8 __DEFAULT_FN_ATTRS 205_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 206 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 207 __u); 208} 209 210static __inline__ __mmask8 __DEFAULT_FN_ATTRS 211_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { 212 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 213 (__mmask8)-1); 214} 215 216static __inline__ __mmask8 __DEFAULT_FN_ATTRS 217_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 218 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 219 __u); 220} 221 222static __inline__ __mmask8 __DEFAULT_FN_ATTRS 223_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { 224 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 225 (__mmask8)-1); 226} 227 228static __inline__ __mmask8 __DEFAULT_FN_ATTRS 229_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 230 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 231 __u); 232} 233 234static __inline__ __mmask8 __DEFAULT_FN_ATTRS 235_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { 236 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 237 (__mmask8)-1); 238} 239 240static __inline__ __mmask8 __DEFAULT_FN_ATTRS 241_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 242 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 243 __u); 244} 245 246static __inline__ __mmask8 __DEFAULT_FN_ATTRS 247_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { 248 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 249 (__mmask8)-1); 250} 251 252static __inline__ __mmask8 __DEFAULT_FN_ATTRS 253_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 254 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 255 __u); 256} 257 258static __inline__ __mmask8 __DEFAULT_FN_ATTRS 259_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { 260 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 261 (__mmask8)-1); 262} 263 264static __inline__ __mmask8 __DEFAULT_FN_ATTRS 265_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 266 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 267 __u); 268} 269 270static __inline__ __mmask8 __DEFAULT_FN_ATTRS 271_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { 272 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 273 (__mmask8)-1); 274} 275 276static __inline__ __mmask8 __DEFAULT_FN_ATTRS 277_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 278 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 279 __u); 280} 281 282static __inline__ __mmask8 __DEFAULT_FN_ATTRS 283_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { 284 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 285 (__mmask8)-1); 286} 287 288static __inline__ __mmask8 __DEFAULT_FN_ATTRS 289_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 290 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 291 __u); 292} 293 294static __inline__ __mmask8 __DEFAULT_FN_ATTRS 295_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { 296 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 297 (__mmask8)-1); 298} 299 300static __inline__ __mmask8 __DEFAULT_FN_ATTRS 301_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 302 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 303 __u); 304} 305 306static __inline__ __mmask8 __DEFAULT_FN_ATTRS 307_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { 308 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 309 (__mmask8)-1); 310} 311 312static __inline__ __mmask8 __DEFAULT_FN_ATTRS 313_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 314 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 315 __u); 316} 317 318static __inline__ __mmask8 __DEFAULT_FN_ATTRS 319_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { 320 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 321 (__mmask8)-1); 322} 323 324static __inline__ __mmask8 __DEFAULT_FN_ATTRS 325_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 326 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 327 __u); 328} 329 330static __inline__ __mmask8 __DEFAULT_FN_ATTRS 331_mm_cmple_epi32_mask(__m128i __a, __m128i __b) { 332 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 333 (__mmask8)-1); 334} 335 336static __inline__ __mmask8 __DEFAULT_FN_ATTRS 337_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 338 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 339 __u); 340} 341 342static __inline__ __mmask8 __DEFAULT_FN_ATTRS 343_mm_cmple_epu32_mask(__m128i __a, __m128i __b) { 344 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 345 (__mmask8)-1); 346} 347 348static __inline__ __mmask8 __DEFAULT_FN_ATTRS 349_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 350 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 351 __u); 352} 353 354static __inline__ __mmask8 __DEFAULT_FN_ATTRS 355_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { 356 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 357 (__mmask8)-1); 358} 359 360static __inline__ __mmask8 __DEFAULT_FN_ATTRS 361_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 362 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 363 __u); 364} 365 366static __inline__ __mmask8 __DEFAULT_FN_ATTRS 367_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { 368 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 369 (__mmask8)-1); 370} 371 372static __inline__ __mmask8 __DEFAULT_FN_ATTRS 373_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 374 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 375 __u); 376} 377 378static __inline__ __mmask8 __DEFAULT_FN_ATTRS 379_mm_cmple_epi64_mask(__m128i __a, __m128i __b) { 380 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 381 (__mmask8)-1); 382} 383 384static __inline__ __mmask8 __DEFAULT_FN_ATTRS 385_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 386 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 387 __u); 388} 389 390static __inline__ __mmask8 __DEFAULT_FN_ATTRS 391_mm_cmple_epu64_mask(__m128i __a, __m128i __b) { 392 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 393 (__mmask8)-1); 394} 395 396static __inline__ __mmask8 __DEFAULT_FN_ATTRS 397_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 398 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 399 __u); 400} 401 402static __inline__ __mmask8 __DEFAULT_FN_ATTRS 403_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { 404 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 405 (__mmask8)-1); 406} 407 408static __inline__ __mmask8 __DEFAULT_FN_ATTRS 409_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 410 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 411 __u); 412} 413 414static __inline__ __mmask8 __DEFAULT_FN_ATTRS 415_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { 416 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 417 (__mmask8)-1); 418} 419 420static __inline__ __mmask8 __DEFAULT_FN_ATTRS 421_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 422 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 423 __u); 424} 425 426static __inline__ __mmask8 __DEFAULT_FN_ATTRS 427_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { 428 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 429 (__mmask8)-1); 430} 431 432static __inline__ __mmask8 __DEFAULT_FN_ATTRS 433_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 434 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 435 __u); 436} 437 438static __inline__ __mmask8 __DEFAULT_FN_ATTRS 439_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { 440 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 441 (__mmask8)-1); 442} 443 444static __inline__ __mmask8 __DEFAULT_FN_ATTRS 445_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 446 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 447 __u); 448} 449 450static __inline__ __mmask8 __DEFAULT_FN_ATTRS 451_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { 452 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 453 (__mmask8)-1); 454} 455 456static __inline__ __mmask8 __DEFAULT_FN_ATTRS 457_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 458 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 459 __u); 460} 461 462static __inline__ __mmask8 __DEFAULT_FN_ATTRS 463_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { 464 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 465 (__mmask8)-1); 466} 467 468static __inline__ __mmask8 __DEFAULT_FN_ATTRS 469_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 470 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 471 __u); 472} 473 474static __inline__ __mmask8 __DEFAULT_FN_ATTRS 475_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { 476 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 477 (__mmask8)-1); 478} 479 480static __inline__ __mmask8 __DEFAULT_FN_ATTRS 481_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 482 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 483 __u); 484} 485 486static __inline__ __mmask8 __DEFAULT_FN_ATTRS 487_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { 488 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 489 (__mmask8)-1); 490} 491 492static __inline__ __mmask8 __DEFAULT_FN_ATTRS 493_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 494 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 495 __u); 496} 497 498static __inline__ __mmask8 __DEFAULT_FN_ATTRS 499_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { 500 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 501 (__mmask8)-1); 502} 503 504static __inline__ __mmask8 __DEFAULT_FN_ATTRS 505_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 506 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 507 __u); 508} 509 510static __inline__ __mmask8 __DEFAULT_FN_ATTRS 511_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { 512 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 513 (__mmask8)-1); 514} 515 516static __inline__ __mmask8 __DEFAULT_FN_ATTRS 517_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 518 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 519 __u); 520} 521 522static __inline__ __mmask8 __DEFAULT_FN_ATTRS 523_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { 524 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 525 (__mmask8)-1); 526} 527 528static __inline__ __mmask8 __DEFAULT_FN_ATTRS 529_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 530 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 531 __u); 532} 533 534static __inline__ __mmask8 __DEFAULT_FN_ATTRS 535_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { 536 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 537 (__mmask8)-1); 538} 539 540static __inline__ __mmask8 __DEFAULT_FN_ATTRS 541_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 542 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 543 __u); 544} 545 546static __inline__ __mmask8 __DEFAULT_FN_ATTRS 547_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { 548 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 549 (__mmask8)-1); 550} 551 552static __inline__ __mmask8 __DEFAULT_FN_ATTRS 553_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 554 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 555 __u); 556} 557 558static __inline__ __mmask8 __DEFAULT_FN_ATTRS 559_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { 560 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 561 (__mmask8)-1); 562} 563 564static __inline__ __mmask8 __DEFAULT_FN_ATTRS 565_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 566 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 567 __u); 568} 569 570static __inline__ __mmask8 __DEFAULT_FN_ATTRS 571_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { 572 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 573 (__mmask8)-1); 574} 575 576static __inline__ __mmask8 __DEFAULT_FN_ATTRS 577_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 578 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 579 __u); 580} 581 582static __inline__ __mmask8 __DEFAULT_FN_ATTRS 583_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { 584 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 585 (__mmask8)-1); 586} 587 588static __inline__ __mmask8 __DEFAULT_FN_ATTRS 589_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 590 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 591 __u); 592} 593 594static __inline__ __mmask8 __DEFAULT_FN_ATTRS 595_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { 596 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 597 (__mmask8)-1); 598} 599 600static __inline__ __mmask8 __DEFAULT_FN_ATTRS 601_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 602 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 603 __u); 604} 605 606static __inline__ __mmask8 __DEFAULT_FN_ATTRS 607_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { 608 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 609 (__mmask8)-1); 610} 611 612static __inline__ __mmask8 __DEFAULT_FN_ATTRS 613_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 614 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 615 __u); 616} 617 618static __inline__ __m256i __DEFAULT_FN_ATTRS 619_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 620{ 621 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 622 (__v8si)_mm256_add_epi32(__A, __B), 623 (__v8si)__W); 624} 625 626static __inline__ __m256i __DEFAULT_FN_ATTRS 627_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 628{ 629 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 630 (__v8si)_mm256_add_epi32(__A, __B), 631 (__v8si)_mm256_setzero_si256()); 632} 633 634static __inline__ __m256i __DEFAULT_FN_ATTRS 635_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 636{ 637 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 638 (__v4di)_mm256_add_epi64(__A, __B), 639 (__v4di)__W); 640} 641 642static __inline__ __m256i __DEFAULT_FN_ATTRS 643_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 644{ 645 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 646 (__v4di)_mm256_add_epi64(__A, __B), 647 (__v4di)_mm256_setzero_si256()); 648} 649 650static __inline__ __m256i __DEFAULT_FN_ATTRS 651_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 652{ 653 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 654 (__v8si)_mm256_sub_epi32(__A, __B), 655 (__v8si)__W); 656} 657 658static __inline__ __m256i __DEFAULT_FN_ATTRS 659_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 660{ 661 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 662 (__v8si)_mm256_sub_epi32(__A, __B), 663 (__v8si)_mm256_setzero_si256()); 664} 665 666static __inline__ __m256i __DEFAULT_FN_ATTRS 667_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 668{ 669 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 670 (__v4di)_mm256_sub_epi64(__A, __B), 671 (__v4di)__W); 672} 673 674static __inline__ __m256i __DEFAULT_FN_ATTRS 675_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 676{ 677 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 678 (__v4di)_mm256_sub_epi64(__A, __B), 679 (__v4di)_mm256_setzero_si256()); 680} 681 682static __inline__ __m128i __DEFAULT_FN_ATTRS 683_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 684{ 685 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 686 (__v4si)_mm_add_epi32(__A, __B), 687 (__v4si)__W); 688} 689 690static __inline__ __m128i __DEFAULT_FN_ATTRS 691_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 692{ 693 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 694 (__v4si)_mm_add_epi32(__A, __B), 695 (__v4si)_mm_setzero_si128()); 696} 697 698static __inline__ __m128i __DEFAULT_FN_ATTRS 699_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 700{ 701 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 702 (__v2di)_mm_add_epi64(__A, __B), 703 (__v2di)__W); 704} 705 706static __inline__ __m128i __DEFAULT_FN_ATTRS 707_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 708{ 709 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 710 (__v2di)_mm_add_epi64(__A, __B), 711 (__v2di)_mm_setzero_si128()); 712} 713 714static __inline__ __m128i __DEFAULT_FN_ATTRS 715_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 716{ 717 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 718 (__v4si)_mm_sub_epi32(__A, __B), 719 (__v4si)__W); 720} 721 722static __inline__ __m128i __DEFAULT_FN_ATTRS 723_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 724{ 725 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 726 (__v4si)_mm_sub_epi32(__A, __B), 727 (__v4si)_mm_setzero_si128()); 728} 729 730static __inline__ __m128i __DEFAULT_FN_ATTRS 731_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 732{ 733 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 734 (__v2di)_mm_sub_epi64(__A, __B), 735 (__v2di)__W); 736} 737 738static __inline__ __m128i __DEFAULT_FN_ATTRS 739_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 740{ 741 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 742 (__v2di)_mm_sub_epi64(__A, __B), 743 (__v2di)_mm_setzero_si128()); 744} 745 746static __inline__ __m256i __DEFAULT_FN_ATTRS 747_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 748{ 749 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 750 (__v4di)_mm256_mul_epi32(__X, __Y), 751 (__v4di)__W); 752} 753 754static __inline__ __m256i __DEFAULT_FN_ATTRS 755_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 756{ 757 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 758 (__v4di)_mm256_mul_epi32(__X, __Y), 759 (__v4di)_mm256_setzero_si256()); 760} 761 762static __inline__ __m128i __DEFAULT_FN_ATTRS 763_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 764{ 765 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 766 (__v2di)_mm_mul_epi32(__X, __Y), 767 (__v2di)__W); 768} 769 770static __inline__ __m128i __DEFAULT_FN_ATTRS 771_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 772{ 773 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 774 (__v2di)_mm_mul_epi32(__X, __Y), 775 (__v2di)_mm_setzero_si128()); 776} 777 778static __inline__ __m256i __DEFAULT_FN_ATTRS 779_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 780{ 781 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 782 (__v4di)_mm256_mul_epu32(__X, __Y), 783 (__v4di)__W); 784} 785 786static __inline__ __m256i __DEFAULT_FN_ATTRS 787_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 788{ 789 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 790 (__v4di)_mm256_mul_epu32(__X, __Y), 791 (__v4di)_mm256_setzero_si256()); 792} 793 794static __inline__ __m128i __DEFAULT_FN_ATTRS 795_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 796{ 797 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 798 (__v2di)_mm_mul_epu32(__X, __Y), 799 (__v2di)__W); 800} 801 802static __inline__ __m128i __DEFAULT_FN_ATTRS 803_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 804{ 805 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 806 (__v2di)_mm_mul_epu32(__X, __Y), 807 (__v2di)_mm_setzero_si128()); 808} 809 810static __inline__ __m256i __DEFAULT_FN_ATTRS 811_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 812{ 813 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 814 (__v8si)_mm256_mullo_epi32(__A, __B), 815 (__v8si)_mm256_setzero_si256()); 816} 817 818static __inline__ __m256i __DEFAULT_FN_ATTRS 819_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 820{ 821 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 822 (__v8si)_mm256_mullo_epi32(__A, __B), 823 (__v8si)__W); 824} 825 826static __inline__ __m128i __DEFAULT_FN_ATTRS 827_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 828{ 829 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 830 (__v4si)_mm_mullo_epi32(__A, __B), 831 (__v4si)_mm_setzero_si128()); 832} 833 834static __inline__ __m128i __DEFAULT_FN_ATTRS 835_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 836{ 837 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 838 (__v4si)_mm_mullo_epi32(__A, __B), 839 (__v4si)__W); 840} 841 842static __inline__ __m256i __DEFAULT_FN_ATTRS 843_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 844{ 845 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 846 (__v8si)_mm256_and_si256(__A, __B), 847 (__v8si)__W); 848} 849 850static __inline__ __m256i __DEFAULT_FN_ATTRS 851_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 852{ 853 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 854} 855 856static __inline__ __m128i __DEFAULT_FN_ATTRS 857_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 858{ 859 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 860 (__v4si)_mm_and_si128(__A, __B), 861 (__v4si)__W); 862} 863 864static __inline__ __m128i __DEFAULT_FN_ATTRS 865_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 866{ 867 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 868} 869 870static __inline__ __m256i __DEFAULT_FN_ATTRS 871_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 872{ 873 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 874 (__v8si)_mm256_andnot_si256(__A, __B), 875 (__v8si)__W); 876} 877 878static __inline__ __m256i __DEFAULT_FN_ATTRS 879_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 880{ 881 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 882 __U, __A, __B); 883} 884 885static __inline__ __m128i __DEFAULT_FN_ATTRS 886_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 887{ 888 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 889 (__v4si)_mm_andnot_si128(__A, __B), 890 (__v4si)__W); 891} 892 893static __inline__ __m128i __DEFAULT_FN_ATTRS 894_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 895{ 896 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 897} 898 899static __inline__ __m256i __DEFAULT_FN_ATTRS 900_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 901{ 902 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 903 (__v8si)_mm256_or_si256(__A, __B), 904 (__v8si)__W); 905} 906 907static __inline__ __m256i __DEFAULT_FN_ATTRS 908_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 909{ 910 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 911} 912 913static __inline__ __m128i __DEFAULT_FN_ATTRS 914_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 915{ 916 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 917 (__v4si)_mm_or_si128(__A, __B), 918 (__v4si)__W); 919} 920 921static __inline__ __m128i __DEFAULT_FN_ATTRS 922_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 923{ 924 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 925} 926 927static __inline__ __m256i __DEFAULT_FN_ATTRS 928_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 929{ 930 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 931 (__v8si)_mm256_xor_si256(__A, __B), 932 (__v8si)__W); 933} 934 935static __inline__ __m256i __DEFAULT_FN_ATTRS 936_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 937{ 938 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 939} 940 941static __inline__ __m128i __DEFAULT_FN_ATTRS 942_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, 943 __m128i __B) 944{ 945 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 946 (__v4si)_mm_xor_si128(__A, __B), 947 (__v4si)__W); 948} 949 950static __inline__ __m128i __DEFAULT_FN_ATTRS 951_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 952{ 953 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 954} 955 956static __inline__ __m256i __DEFAULT_FN_ATTRS 957_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 958{ 959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 960 (__v4di)_mm256_and_si256(__A, __B), 961 (__v4di)__W); 962} 963 964static __inline__ __m256i __DEFAULT_FN_ATTRS 965_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 966{ 967 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 968} 969 970static __inline__ __m128i __DEFAULT_FN_ATTRS 971_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 972{ 973 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 974 (__v2di)_mm_and_si128(__A, __B), 975 (__v2di)__W); 976} 977 978static __inline__ __m128i __DEFAULT_FN_ATTRS 979_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 980{ 981 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 982} 983 984static __inline__ __m256i __DEFAULT_FN_ATTRS 985_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 986{ 987 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 988 (__v4di)_mm256_andnot_si256(__A, __B), 989 (__v4di)__W); 990} 991 992static __inline__ __m256i __DEFAULT_FN_ATTRS 993_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 994{ 995 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 996 __U, __A, __B); 997} 998 999static __inline__ __m128i __DEFAULT_FN_ATTRS 1000_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1001{ 1002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1003 (__v2di)_mm_andnot_si128(__A, __B), 1004 (__v2di)__W); 1005} 1006 1007static __inline__ __m128i __DEFAULT_FN_ATTRS 1008_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1009{ 1010 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 1011} 1012 1013static __inline__ __m256i __DEFAULT_FN_ATTRS 1014_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1015{ 1016 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1017 (__v4di)_mm256_or_si256(__A, __B), 1018 (__v4di)__W); 1019} 1020 1021static __inline__ __m256i __DEFAULT_FN_ATTRS 1022_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1023{ 1024 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 1025} 1026 1027static __inline__ __m128i __DEFAULT_FN_ATTRS 1028_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1029{ 1030 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1031 (__v2di)_mm_or_si128(__A, __B), 1032 (__v2di)__W); 1033} 1034 1035static __inline__ __m128i __DEFAULT_FN_ATTRS 1036_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1037{ 1038 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 1039} 1040 1041static __inline__ __m256i __DEFAULT_FN_ATTRS 1042_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1043{ 1044 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1045 (__v4di)_mm256_xor_si256(__A, __B), 1046 (__v4di)__W); 1047} 1048 1049static __inline__ __m256i __DEFAULT_FN_ATTRS 1050_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1051{ 1052 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 1053} 1054 1055static __inline__ __m128i __DEFAULT_FN_ATTRS 1056_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 1057 __m128i __B) 1058{ 1059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1060 (__v2di)_mm_xor_si128(__A, __B), 1061 (__v2di)__W); 1062} 1063 1064static __inline__ __m128i __DEFAULT_FN_ATTRS 1065_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1066{ 1067 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 1068} 1069 1070#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1071 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1072 (__v4si)(__m128i)(b), (int)(p), \ 1073 (__mmask8)-1); }) 1074 1075#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1076 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1077 (__v4si)(__m128i)(b), (int)(p), \ 1078 (__mmask8)(m)); }) 1079 1080#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1081 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1082 (__v4si)(__m128i)(b), (int)(p), \ 1083 (__mmask8)-1); }) 1084 1085#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1086 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1087 (__v4si)(__m128i)(b), (int)(p), \ 1088 (__mmask8)(m)); }) 1089 1090#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1091 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1092 (__v8si)(__m256i)(b), (int)(p), \ 1093 (__mmask8)-1); }) 1094 1095#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1096 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1097 (__v8si)(__m256i)(b), (int)(p), \ 1098 (__mmask8)(m)); }) 1099 1100#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1101 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1102 (__v8si)(__m256i)(b), (int)(p), \ 1103 (__mmask8)-1); }) 1104 1105#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1106 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1107 (__v8si)(__m256i)(b), (int)(p), \ 1108 (__mmask8)(m)); }) 1109 1110#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1111 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1112 (__v2di)(__m128i)(b), (int)(p), \ 1113 (__mmask8)-1); }) 1114 1115#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1116 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1117 (__v2di)(__m128i)(b), (int)(p), \ 1118 (__mmask8)(m)); }) 1119 1120#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1121 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1122 (__v2di)(__m128i)(b), (int)(p), \ 1123 (__mmask8)-1); }) 1124 1125#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1126 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1127 (__v2di)(__m128i)(b), (int)(p), \ 1128 (__mmask8)(m)); }) 1129 1130#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1131 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1132 (__v4di)(__m256i)(b), (int)(p), \ 1133 (__mmask8)-1); }) 1134 1135#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1136 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1137 (__v4di)(__m256i)(b), (int)(p), \ 1138 (__mmask8)(m)); }) 1139 1140#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1141 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1142 (__v4di)(__m256i)(b), (int)(p), \ 1143 (__mmask8)-1); }) 1144 1145#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1146 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1147 (__v4di)(__m256i)(b), (int)(p), \ 1148 (__mmask8)(m)); }) 1149 1150#define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ 1151 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1152 (__v8sf)(__m256)(b), (int)(p), \ 1153 (__mmask8)-1); }) 1154 1155#define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1156 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1157 (__v8sf)(__m256)(b), (int)(p), \ 1158 (__mmask8)(m)); }) 1159 1160#define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ 1161 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1162 (__v4df)(__m256d)(b), (int)(p), \ 1163 (__mmask8)-1); }) 1164 1165#define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1166 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1167 (__v4df)(__m256d)(b), (int)(p), \ 1168 (__mmask8)(m)); }) 1169 1170#define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ 1171 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1172 (__v4sf)(__m128)(b), (int)(p), \ 1173 (__mmask8)-1); }) 1174 1175#define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1176 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1177 (__v4sf)(__m128)(b), (int)(p), \ 1178 (__mmask8)(m)); }) 1179 1180#define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ 1181 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1182 (__v2df)(__m128d)(b), (int)(p), \ 1183 (__mmask8)-1); }) 1184 1185#define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1186 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1187 (__v2df)(__m128d)(b), (int)(p), \ 1188 (__mmask8)(m)); }) 1189 1190static __inline__ __m128d __DEFAULT_FN_ATTRS 1191_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1192{ 1193 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1194 (__v2df) __B, 1195 (__v2df) __C, 1196 (__mmask8) __U); 1197} 1198 1199static __inline__ __m128d __DEFAULT_FN_ATTRS 1200_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1201{ 1202 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, 1203 (__v2df) __B, 1204 (__v2df) __C, 1205 (__mmask8) __U); 1206} 1207 1208static __inline__ __m128d __DEFAULT_FN_ATTRS 1209_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1210{ 1211 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1212 (__v2df) __B, 1213 (__v2df) __C, 1214 (__mmask8) __U); 1215} 1216 1217static __inline__ __m128d __DEFAULT_FN_ATTRS 1218_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1219{ 1220 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1221 (__v2df) __B, 1222 -(__v2df) __C, 1223 (__mmask8) __U); 1224} 1225 1226static __inline__ __m128d __DEFAULT_FN_ATTRS 1227_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1228{ 1229 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1230 (__v2df) __B, 1231 -(__v2df) __C, 1232 (__mmask8) __U); 1233} 1234 1235static __inline__ __m128d __DEFAULT_FN_ATTRS 1236_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1237{ 1238 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, 1239 (__v2df) __B, 1240 (__v2df) __C, 1241 (__mmask8) __U); 1242} 1243 1244static __inline__ __m128d __DEFAULT_FN_ATTRS 1245_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1246{ 1247 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1248 (__v2df) __B, 1249 (__v2df) __C, 1250 (__mmask8) __U); 1251} 1252 1253static __inline__ __m128d __DEFAULT_FN_ATTRS 1254_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255{ 1256 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1257 (__v2df) __B, 1258 -(__v2df) __C, 1259 (__mmask8) __U); 1260} 1261 1262static __inline__ __m256d __DEFAULT_FN_ATTRS 1263_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1264{ 1265 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1266 (__v4df) __B, 1267 (__v4df) __C, 1268 (__mmask8) __U); 1269} 1270 1271static __inline__ __m256d __DEFAULT_FN_ATTRS 1272_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1273{ 1274 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, 1275 (__v4df) __B, 1276 (__v4df) __C, 1277 (__mmask8) __U); 1278} 1279 1280static __inline__ __m256d __DEFAULT_FN_ATTRS 1281_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1282{ 1283 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1284 (__v4df) __B, 1285 (__v4df) __C, 1286 (__mmask8) __U); 1287} 1288 1289static __inline__ __m256d __DEFAULT_FN_ATTRS 1290_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1291{ 1292 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1293 (__v4df) __B, 1294 -(__v4df) __C, 1295 (__mmask8) __U); 1296} 1297 1298static __inline__ __m256d __DEFAULT_FN_ATTRS 1299_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1300{ 1301 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1302 (__v4df) __B, 1303 -(__v4df) __C, 1304 (__mmask8) __U); 1305} 1306 1307static __inline__ __m256d __DEFAULT_FN_ATTRS 1308_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1309{ 1310 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, 1311 (__v4df) __B, 1312 (__v4df) __C, 1313 (__mmask8) __U); 1314} 1315 1316static __inline__ __m256d __DEFAULT_FN_ATTRS 1317_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1318{ 1319 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1320 (__v4df) __B, 1321 (__v4df) __C, 1322 (__mmask8) __U); 1323} 1324 1325static __inline__ __m256d __DEFAULT_FN_ATTRS 1326_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1327{ 1328 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1329 (__v4df) __B, 1330 -(__v4df) __C, 1331 (__mmask8) __U); 1332} 1333 1334static __inline__ __m128 __DEFAULT_FN_ATTRS 1335_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1336{ 1337 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C, 1340 (__mmask8) __U); 1341} 1342 1343static __inline__ __m128 __DEFAULT_FN_ATTRS 1344_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1345{ 1346 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, 1347 (__v4sf) __B, 1348 (__v4sf) __C, 1349 (__mmask8) __U); 1350} 1351 1352static __inline__ __m128 __DEFAULT_FN_ATTRS 1353_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1354{ 1355 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1356 (__v4sf) __B, 1357 (__v4sf) __C, 1358 (__mmask8) __U); 1359} 1360 1361static __inline__ __m128 __DEFAULT_FN_ATTRS 1362_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1363{ 1364 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1365 (__v4sf) __B, 1366 -(__v4sf) __C, 1367 (__mmask8) __U); 1368} 1369 1370static __inline__ __m128 __DEFAULT_FN_ATTRS 1371_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1372{ 1373 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1374 (__v4sf) __B, 1375 -(__v4sf) __C, 1376 (__mmask8) __U); 1377} 1378 1379static __inline__ __m128 __DEFAULT_FN_ATTRS 1380_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1381{ 1382 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, 1383 (__v4sf) __B, 1384 (__v4sf) __C, 1385 (__mmask8) __U); 1386} 1387 1388static __inline__ __m128 __DEFAULT_FN_ATTRS 1389_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1390{ 1391 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1392 (__v4sf) __B, 1393 (__v4sf) __C, 1394 (__mmask8) __U); 1395} 1396 1397static __inline__ __m128 __DEFAULT_FN_ATTRS 1398_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1399{ 1400 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1401 (__v4sf) __B, 1402 -(__v4sf) __C, 1403 (__mmask8) __U); 1404} 1405 1406static __inline__ __m256 __DEFAULT_FN_ATTRS 1407_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1408{ 1409 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1410 (__v8sf) __B, 1411 (__v8sf) __C, 1412 (__mmask8) __U); 1413} 1414 1415static __inline__ __m256 __DEFAULT_FN_ATTRS 1416_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1417{ 1418 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, 1419 (__v8sf) __B, 1420 (__v8sf) __C, 1421 (__mmask8) __U); 1422} 1423 1424static __inline__ __m256 __DEFAULT_FN_ATTRS 1425_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1426{ 1427 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1428 (__v8sf) __B, 1429 (__v8sf) __C, 1430 (__mmask8) __U); 1431} 1432 1433static __inline__ __m256 __DEFAULT_FN_ATTRS 1434_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1435{ 1436 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1437 (__v8sf) __B, 1438 -(__v8sf) __C, 1439 (__mmask8) __U); 1440} 1441 1442static __inline__ __m256 __DEFAULT_FN_ATTRS 1443_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1444{ 1445 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1446 (__v8sf) __B, 1447 -(__v8sf) __C, 1448 (__mmask8) __U); 1449} 1450 1451static __inline__ __m256 __DEFAULT_FN_ATTRS 1452_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1453{ 1454 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, 1455 (__v8sf) __B, 1456 (__v8sf) __C, 1457 (__mmask8) __U); 1458} 1459 1460static __inline__ __m256 __DEFAULT_FN_ATTRS 1461_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1462{ 1463 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1464 (__v8sf) __B, 1465 (__v8sf) __C, 1466 (__mmask8) __U); 1467} 1468 1469static __inline__ __m256 __DEFAULT_FN_ATTRS 1470_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1471{ 1472 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1473 (__v8sf) __B, 1474 -(__v8sf) __C, 1475 (__mmask8) __U); 1476} 1477 1478static __inline__ __m128d __DEFAULT_FN_ATTRS 1479_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1480{ 1481 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1482 (__v2df) __B, 1483 (__v2df) __C, 1484 (__mmask8) __U); 1485} 1486 1487static __inline__ __m128d __DEFAULT_FN_ATTRS 1488_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1489{ 1490 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, 1491 (__v2df) __B, 1492 (__v2df) __C, 1493 (__mmask8) 1494 __U); 1495} 1496 1497static __inline__ __m128d __DEFAULT_FN_ATTRS 1498_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1499{ 1500 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1501 (__v2df) __B, 1502 (__v2df) __C, 1503 (__mmask8) 1504 __U); 1505} 1506 1507static __inline__ __m128d __DEFAULT_FN_ATTRS 1508_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1509{ 1510 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1511 (__v2df) __B, 1512 -(__v2df) __C, 1513 (__mmask8) __U); 1514} 1515 1516static __inline__ __m128d __DEFAULT_FN_ATTRS 1517_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1518{ 1519 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1520 (__v2df) __B, 1521 -(__v2df) __C, 1522 (__mmask8) 1523 __U); 1524} 1525 1526static __inline__ __m256d __DEFAULT_FN_ATTRS 1527_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1528{ 1529 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1530 (__v4df) __B, 1531 (__v4df) __C, 1532 (__mmask8) __U); 1533} 1534 1535static __inline__ __m256d __DEFAULT_FN_ATTRS 1536_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1537{ 1538 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, 1539 (__v4df) __B, 1540 (__v4df) __C, 1541 (__mmask8) 1542 __U); 1543} 1544 1545static __inline__ __m256d __DEFAULT_FN_ATTRS 1546_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1547{ 1548 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1549 (__v4df) __B, 1550 (__v4df) __C, 1551 (__mmask8) 1552 __U); 1553} 1554 1555static __inline__ __m256d __DEFAULT_FN_ATTRS 1556_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1557{ 1558 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1559 (__v4df) __B, 1560 -(__v4df) __C, 1561 (__mmask8) __U); 1562} 1563 1564static __inline__ __m256d __DEFAULT_FN_ATTRS 1565_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1566{ 1567 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1568 (__v4df) __B, 1569 -(__v4df) __C, 1570 (__mmask8) 1571 __U); 1572} 1573 1574static __inline__ __m128 __DEFAULT_FN_ATTRS 1575_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576{ 1577 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1578 (__v4sf) __B, 1579 (__v4sf) __C, 1580 (__mmask8) __U); 1581} 1582 1583static __inline__ __m128 __DEFAULT_FN_ATTRS 1584_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1585{ 1586 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, 1587 (__v4sf) __B, 1588 (__v4sf) __C, 1589 (__mmask8) __U); 1590} 1591 1592static __inline__ __m128 __DEFAULT_FN_ATTRS 1593_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1594{ 1595 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1596 (__v4sf) __B, 1597 (__v4sf) __C, 1598 (__mmask8) __U); 1599} 1600 1601static __inline__ __m128 __DEFAULT_FN_ATTRS 1602_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1603{ 1604 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1605 (__v4sf) __B, 1606 -(__v4sf) __C, 1607 (__mmask8) __U); 1608} 1609 1610static __inline__ __m128 __DEFAULT_FN_ATTRS 1611_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1612{ 1613 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1614 (__v4sf) __B, 1615 -(__v4sf) __C, 1616 (__mmask8) __U); 1617} 1618 1619static __inline__ __m256 __DEFAULT_FN_ATTRS 1620_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1621 __m256 __C) 1622{ 1623 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1624 (__v8sf) __B, 1625 (__v8sf) __C, 1626 (__mmask8) __U); 1627} 1628 1629static __inline__ __m256 __DEFAULT_FN_ATTRS 1630_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1631{ 1632 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, 1633 (__v8sf) __B, 1634 (__v8sf) __C, 1635 (__mmask8) __U); 1636} 1637 1638static __inline__ __m256 __DEFAULT_FN_ATTRS 1639_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1640{ 1641 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1642 (__v8sf) __B, 1643 (__v8sf) __C, 1644 (__mmask8) __U); 1645} 1646 1647static __inline__ __m256 __DEFAULT_FN_ATTRS 1648_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1649{ 1650 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1651 (__v8sf) __B, 1652 -(__v8sf) __C, 1653 (__mmask8) __U); 1654} 1655 1656static __inline__ __m256 __DEFAULT_FN_ATTRS 1657_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1658{ 1659 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1660 (__v8sf) __B, 1661 -(__v8sf) __C, 1662 (__mmask8) __U); 1663} 1664 1665static __inline__ __m128d __DEFAULT_FN_ATTRS 1666_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1667{ 1668 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, 1669 (__v2df) __B, 1670 (__v2df) __C, 1671 (__mmask8) __U); 1672} 1673 1674static __inline__ __m256d __DEFAULT_FN_ATTRS 1675_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1676{ 1677 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, 1678 (__v4df) __B, 1679 (__v4df) __C, 1680 (__mmask8) __U); 1681} 1682 1683static __inline__ __m128 __DEFAULT_FN_ATTRS 1684_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1685{ 1686 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, 1687 (__v4sf) __B, 1688 (__v4sf) __C, 1689 (__mmask8) __U); 1690} 1691 1692static __inline__ __m256 __DEFAULT_FN_ATTRS 1693_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1694{ 1695 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, 1696 (__v8sf) __B, 1697 (__v8sf) __C, 1698 (__mmask8) __U); 1699} 1700 1701static __inline__ __m128d __DEFAULT_FN_ATTRS 1702_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1703{ 1704 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, 1705 (__v2df) __B, 1706 (__v2df) __C, 1707 (__mmask8) 1708 __U); 1709} 1710 1711static __inline__ __m256d __DEFAULT_FN_ATTRS 1712_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1713{ 1714 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, 1715 (__v4df) __B, 1716 (__v4df) __C, 1717 (__mmask8) 1718 __U); 1719} 1720 1721static __inline__ __m128 __DEFAULT_FN_ATTRS 1722_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1723{ 1724 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, 1725 (__v4sf) __B, 1726 (__v4sf) __C, 1727 (__mmask8) __U); 1728} 1729 1730static __inline__ __m256 __DEFAULT_FN_ATTRS 1731_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1732{ 1733 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, 1734 (__v8sf) __B, 1735 (__v8sf) __C, 1736 (__mmask8) __U); 1737} 1738 1739static __inline__ __m128d __DEFAULT_FN_ATTRS 1740_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1741{ 1742 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, 1743 (__v2df) __B, 1744 (__v2df) __C, 1745 (__mmask8) __U); 1746} 1747 1748static __inline__ __m256d __DEFAULT_FN_ATTRS 1749_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1750{ 1751 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, 1752 (__v4df) __B, 1753 (__v4df) __C, 1754 (__mmask8) __U); 1755} 1756 1757static __inline__ __m128 __DEFAULT_FN_ATTRS 1758_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1759{ 1760 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, 1761 (__v4sf) __B, 1762 (__v4sf) __C, 1763 (__mmask8) __U); 1764} 1765 1766static __inline__ __m256 __DEFAULT_FN_ATTRS 1767_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1768{ 1769 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, 1770 (__v8sf) __B, 1771 (__v8sf) __C, 1772 (__mmask8) __U); 1773} 1774 1775static __inline__ __m128d __DEFAULT_FN_ATTRS 1776_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1777{ 1778 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, 1779 (__v2df) __B, 1780 (__v2df) __C, 1781 (__mmask8) __U); 1782} 1783 1784static __inline__ __m128d __DEFAULT_FN_ATTRS 1785_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1786{ 1787 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, 1788 (__v2df) __B, 1789 (__v2df) __C, 1790 (__mmask8) __U); 1791} 1792 1793static __inline__ __m256d __DEFAULT_FN_ATTRS 1794_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1795{ 1796 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, 1797 (__v4df) __B, 1798 (__v4df) __C, 1799 (__mmask8) __U); 1800} 1801 1802static __inline__ __m256d __DEFAULT_FN_ATTRS 1803_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1804{ 1805 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, 1806 (__v4df) __B, 1807 (__v4df) __C, 1808 (__mmask8) __U); 1809} 1810 1811static __inline__ __m128 __DEFAULT_FN_ATTRS 1812_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1813{ 1814 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, 1815 (__v4sf) __B, 1816 (__v4sf) __C, 1817 (__mmask8) __U); 1818} 1819 1820static __inline__ __m128 __DEFAULT_FN_ATTRS 1821_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1822{ 1823 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, 1824 (__v4sf) __B, 1825 (__v4sf) __C, 1826 (__mmask8) __U); 1827} 1828 1829static __inline__ __m256 __DEFAULT_FN_ATTRS 1830_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1831{ 1832 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, 1833 (__v8sf) __B, 1834 (__v8sf) __C, 1835 (__mmask8) __U); 1836} 1837 1838static __inline__ __m256 __DEFAULT_FN_ATTRS 1839_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1840{ 1841 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, 1842 (__v8sf) __B, 1843 (__v8sf) __C, 1844 (__mmask8) __U); 1845} 1846 1847static __inline__ __m128d __DEFAULT_FN_ATTRS 1848_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1849 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1850 (__v2df)_mm_add_pd(__A, __B), 1851 (__v2df)__W); 1852} 1853 1854static __inline__ __m128d __DEFAULT_FN_ATTRS 1855_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1856 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1857 (__v2df)_mm_add_pd(__A, __B), 1858 (__v2df)_mm_setzero_pd()); 1859} 1860 1861static __inline__ __m256d __DEFAULT_FN_ATTRS 1862_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1863 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1864 (__v4df)_mm256_add_pd(__A, __B), 1865 (__v4df)__W); 1866} 1867 1868static __inline__ __m256d __DEFAULT_FN_ATTRS 1869_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1870 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1871 (__v4df)_mm256_add_pd(__A, __B), 1872 (__v4df)_mm256_setzero_pd()); 1873} 1874 1875static __inline__ __m128 __DEFAULT_FN_ATTRS 1876_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1877 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1878 (__v4sf)_mm_add_ps(__A, __B), 1879 (__v4sf)__W); 1880} 1881 1882static __inline__ __m128 __DEFAULT_FN_ATTRS 1883_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1884 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1885 (__v4sf)_mm_add_ps(__A, __B), 1886 (__v4sf)_mm_setzero_ps()); 1887} 1888 1889static __inline__ __m256 __DEFAULT_FN_ATTRS 1890_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1891 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1892 (__v8sf)_mm256_add_ps(__A, __B), 1893 (__v8sf)__W); 1894} 1895 1896static __inline__ __m256 __DEFAULT_FN_ATTRS 1897_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1898 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1899 (__v8sf)_mm256_add_ps(__A, __B), 1900 (__v8sf)_mm256_setzero_ps()); 1901} 1902 1903static __inline__ __m128i __DEFAULT_FN_ATTRS 1904_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1905 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1906 (__v4si) __W, 1907 (__v4si) __A); 1908} 1909 1910static __inline__ __m256i __DEFAULT_FN_ATTRS 1911_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1912 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1913 (__v8si) __W, 1914 (__v8si) __A); 1915} 1916 1917static __inline__ __m128d __DEFAULT_FN_ATTRS 1918_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1919 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1920 (__v2df) __W, 1921 (__v2df) __A); 1922} 1923 1924static __inline__ __m256d __DEFAULT_FN_ATTRS 1925_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1926 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1927 (__v4df) __W, 1928 (__v4df) __A); 1929} 1930 1931static __inline__ __m128 __DEFAULT_FN_ATTRS 1932_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1933 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1934 (__v4sf) __W, 1935 (__v4sf) __A); 1936} 1937 1938static __inline__ __m256 __DEFAULT_FN_ATTRS 1939_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1940 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1941 (__v8sf) __W, 1942 (__v8sf) __A); 1943} 1944 1945static __inline__ __m128i __DEFAULT_FN_ATTRS 1946_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1947 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1948 (__v2di) __W, 1949 (__v2di) __A); 1950} 1951 1952static __inline__ __m256i __DEFAULT_FN_ATTRS 1953_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1954 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1955 (__v4di) __W, 1956 (__v4di) __A); 1957} 1958 1959static __inline__ __m128d __DEFAULT_FN_ATTRS 1960_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1961 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1962 (__v2df) __W, 1963 (__mmask8) __U); 1964} 1965 1966static __inline__ __m128d __DEFAULT_FN_ATTRS 1967_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1968 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1969 (__v2df) 1970 _mm_setzero_pd (), 1971 (__mmask8) __U); 1972} 1973 1974static __inline__ __m256d __DEFAULT_FN_ATTRS 1975_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1976 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1977 (__v4df) __W, 1978 (__mmask8) __U); 1979} 1980 1981static __inline__ __m256d __DEFAULT_FN_ATTRS 1982_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1983 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1984 (__v4df) 1985 _mm256_setzero_pd (), 1986 (__mmask8) __U); 1987} 1988 1989static __inline__ __m128i __DEFAULT_FN_ATTRS 1990_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1991 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1992 (__v2di) __W, 1993 (__mmask8) __U); 1994} 1995 1996static __inline__ __m128i __DEFAULT_FN_ATTRS 1997_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1998 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1999 (__v2di) 2000 _mm_setzero_si128 (), 2001 (__mmask8) __U); 2002} 2003 2004static __inline__ __m256i __DEFAULT_FN_ATTRS 2005_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2006 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2007 (__v4di) __W, 2008 (__mmask8) __U); 2009} 2010 2011static __inline__ __m256i __DEFAULT_FN_ATTRS 2012_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 2013 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2014 (__v4di) 2015 _mm256_setzero_si256 (), 2016 (__mmask8) __U); 2017} 2018 2019static __inline__ __m128 __DEFAULT_FN_ATTRS 2020_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2021 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2022 (__v4sf) __W, 2023 (__mmask8) __U); 2024} 2025 2026static __inline__ __m128 __DEFAULT_FN_ATTRS 2027_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 2028 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2029 (__v4sf) 2030 _mm_setzero_ps (), 2031 (__mmask8) __U); 2032} 2033 2034static __inline__ __m256 __DEFAULT_FN_ATTRS 2035_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2036 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2037 (__v8sf) __W, 2038 (__mmask8) __U); 2039} 2040 2041static __inline__ __m256 __DEFAULT_FN_ATTRS 2042_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 2043 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2044 (__v8sf) 2045 _mm256_setzero_ps (), 2046 (__mmask8) __U); 2047} 2048 2049static __inline__ __m128i __DEFAULT_FN_ATTRS 2050_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2051 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2052 (__v4si) __W, 2053 (__mmask8) __U); 2054} 2055 2056static __inline__ __m128i __DEFAULT_FN_ATTRS 2057_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 2058 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2059 (__v4si) 2060 _mm_setzero_si128 (), 2061 (__mmask8) __U); 2062} 2063 2064static __inline__ __m256i __DEFAULT_FN_ATTRS 2065_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2066 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2067 (__v8si) __W, 2068 (__mmask8) __U); 2069} 2070 2071static __inline__ __m256i __DEFAULT_FN_ATTRS 2072_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 2073 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2074 (__v8si) 2075 _mm256_setzero_si256 (), 2076 (__mmask8) __U); 2077} 2078 2079static __inline__ void __DEFAULT_FN_ATTRS 2080_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 2081 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 2082 (__v2df) __A, 2083 (__mmask8) __U); 2084} 2085 2086static __inline__ void __DEFAULT_FN_ATTRS 2087_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 2088 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 2089 (__v4df) __A, 2090 (__mmask8) __U); 2091} 2092 2093static __inline__ void __DEFAULT_FN_ATTRS 2094_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 2095 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 2096 (__v2di) __A, 2097 (__mmask8) __U); 2098} 2099 2100static __inline__ void __DEFAULT_FN_ATTRS 2101_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 2102 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 2103 (__v4di) __A, 2104 (__mmask8) __U); 2105} 2106 2107static __inline__ void __DEFAULT_FN_ATTRS 2108_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 2109 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 2110 (__v4sf) __A, 2111 (__mmask8) __U); 2112} 2113 2114static __inline__ void __DEFAULT_FN_ATTRS 2115_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 2116 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 2117 (__v8sf) __A, 2118 (__mmask8) __U); 2119} 2120 2121static __inline__ void __DEFAULT_FN_ATTRS 2122_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 2123 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 2124 (__v4si) __A, 2125 (__mmask8) __U); 2126} 2127 2128static __inline__ void __DEFAULT_FN_ATTRS 2129_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 2130 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 2131 (__v8si) __A, 2132 (__mmask8) __U); 2133} 2134 2135static __inline__ __m128d __DEFAULT_FN_ATTRS 2136_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2137 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A, 2138 (__v2df) __W, 2139 (__mmask8) __U); 2140} 2141 2142static __inline__ __m128d __DEFAULT_FN_ATTRS 2143_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2144 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A, 2145 (__v2df) 2146 _mm_setzero_pd (), 2147 (__mmask8) __U); 2148} 2149 2150static __inline__ __m256d __DEFAULT_FN_ATTRS 2151_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2152 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A, 2153 (__v4df) __W, 2154 (__mmask8) __U); 2155} 2156 2157static __inline__ __m256d __DEFAULT_FN_ATTRS 2158_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2159 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A, 2160 (__v4df) 2161 _mm256_setzero_pd (), 2162 (__mmask8) __U); 2163} 2164 2165static __inline__ __m128 __DEFAULT_FN_ATTRS 2166_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2167 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2168 (__v4sf) __W, 2169 (__mmask8) __U); 2170} 2171 2172static __inline__ __m128 __DEFAULT_FN_ATTRS 2173_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { 2174 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2175 (__v4sf) 2176 _mm_setzero_ps (), 2177 (__mmask8) __U); 2178} 2179 2180static __inline__ __m256 __DEFAULT_FN_ATTRS 2181_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2182 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2183 (__v8sf) __W, 2184 (__mmask8) __U); 2185} 2186 2187static __inline__ __m256 __DEFAULT_FN_ATTRS 2188_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { 2189 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2190 (__v8sf) 2191 _mm256_setzero_ps (), 2192 (__mmask8) __U); 2193} 2194 2195static __inline__ __m128i __DEFAULT_FN_ATTRS 2196_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2197 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2198 (__v4si) __W, 2199 (__mmask8) __U); 2200} 2201 2202static __inline__ __m128i __DEFAULT_FN_ATTRS 2203_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 2204 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2205 (__v4si) 2206 _mm_setzero_si128 (), 2207 (__mmask8) __U); 2208} 2209 2210static __inline__ __m128i __DEFAULT_FN_ATTRS 2211_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2212 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2213 (__v4si) __W, 2214 (__mmask8) __U); 2215} 2216 2217static __inline__ __m128i __DEFAULT_FN_ATTRS 2218_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 2219 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2220 (__v4si) 2221 _mm_setzero_si128 (), 2222 (__mmask8) __U); 2223} 2224 2225static __inline__ __m128 __DEFAULT_FN_ATTRS 2226_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 2227 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2228 (__v4sf) __W, 2229 (__mmask8) __U); 2230} 2231 2232static __inline__ __m128 __DEFAULT_FN_ATTRS 2233_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 2234 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2235 (__v4sf) 2236 _mm_setzero_ps (), 2237 (__mmask8) __U); 2238} 2239 2240static __inline__ __m128 __DEFAULT_FN_ATTRS 2241_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2242 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2243 (__v4sf) __W, 2244 (__mmask8) __U); 2245} 2246 2247static __inline__ __m128 __DEFAULT_FN_ATTRS 2248_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2249 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2250 (__v4sf) 2251 _mm_setzero_ps (), 2252 (__mmask8) __U); 2253} 2254 2255static __inline__ __m128i __DEFAULT_FN_ATTRS 2256_mm_cvtpd_epu32 (__m128d __A) { 2257 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2258 (__v4si) 2259 _mm_setzero_si128 (), 2260 (__mmask8) -1); 2261} 2262 2263static __inline__ __m128i __DEFAULT_FN_ATTRS 2264_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2265 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2266 (__v4si) __W, 2267 (__mmask8) __U); 2268} 2269 2270static __inline__ __m128i __DEFAULT_FN_ATTRS 2271_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2272 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2273 (__v4si) 2274 _mm_setzero_si128 (), 2275 (__mmask8) __U); 2276} 2277 2278static __inline__ __m128i __DEFAULT_FN_ATTRS 2279_mm256_cvtpd_epu32 (__m256d __A) { 2280 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2281 (__v4si) 2282 _mm_setzero_si128 (), 2283 (__mmask8) -1); 2284} 2285 2286static __inline__ __m128i __DEFAULT_FN_ATTRS 2287_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2288 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2289 (__v4si) __W, 2290 (__mmask8) __U); 2291} 2292 2293static __inline__ __m128i __DEFAULT_FN_ATTRS 2294_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2295 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2296 (__v4si) 2297 _mm_setzero_si128 (), 2298 (__mmask8) __U); 2299} 2300 2301static __inline__ __m128i __DEFAULT_FN_ATTRS 2302_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2303 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2304 (__v4si) __W, 2305 (__mmask8) __U); 2306} 2307 2308static __inline__ __m128i __DEFAULT_FN_ATTRS 2309_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2310 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2311 (__v4si) 2312 _mm_setzero_si128 (), 2313 (__mmask8) __U); 2314} 2315 2316static __inline__ __m256i __DEFAULT_FN_ATTRS 2317_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2318 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2319 (__v8si) __W, 2320 (__mmask8) __U); 2321} 2322 2323static __inline__ __m256i __DEFAULT_FN_ATTRS 2324_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2325 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2326 (__v8si) 2327 _mm256_setzero_si256 (), 2328 (__mmask8) __U); 2329} 2330 2331static __inline__ __m128d __DEFAULT_FN_ATTRS 2332_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2333 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2334 (__v2df) __W, 2335 (__mmask8) __U); 2336} 2337 2338static __inline__ __m128d __DEFAULT_FN_ATTRS 2339_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2340 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2341 (__v2df) 2342 _mm_setzero_pd (), 2343 (__mmask8) __U); 2344} 2345 2346static __inline__ __m256d __DEFAULT_FN_ATTRS 2347_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2348 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2349 (__v4df) __W, 2350 (__mmask8) __U); 2351} 2352 2353static __inline__ __m256d __DEFAULT_FN_ATTRS 2354_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2355 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2356 (__v4df) 2357 _mm256_setzero_pd (), 2358 (__mmask8) __U); 2359} 2360 2361static __inline__ __m128i __DEFAULT_FN_ATTRS 2362_mm_cvtps_epu32 (__m128 __A) { 2363 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2364 (__v4si) 2365 _mm_setzero_si128 (), 2366 (__mmask8) -1); 2367} 2368 2369static __inline__ __m128i __DEFAULT_FN_ATTRS 2370_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2371 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2372 (__v4si) __W, 2373 (__mmask8) __U); 2374} 2375 2376static __inline__ __m128i __DEFAULT_FN_ATTRS 2377_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2378 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2379 (__v4si) 2380 _mm_setzero_si128 (), 2381 (__mmask8) __U); 2382} 2383 2384static __inline__ __m256i __DEFAULT_FN_ATTRS 2385_mm256_cvtps_epu32 (__m256 __A) { 2386 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2387 (__v8si) 2388 _mm256_setzero_si256 (), 2389 (__mmask8) -1); 2390} 2391 2392static __inline__ __m256i __DEFAULT_FN_ATTRS 2393_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2394 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2395 (__v8si) __W, 2396 (__mmask8) __U); 2397} 2398 2399static __inline__ __m256i __DEFAULT_FN_ATTRS 2400_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2401 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2402 (__v8si) 2403 _mm256_setzero_si256 (), 2404 (__mmask8) __U); 2405} 2406 2407static __inline__ __m128i __DEFAULT_FN_ATTRS 2408_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2409 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2410 (__v4si) __W, 2411 (__mmask8) __U); 2412} 2413 2414static __inline__ __m128i __DEFAULT_FN_ATTRS 2415_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2416 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2417 (__v4si) 2418 _mm_setzero_si128 (), 2419 (__mmask8) __U); 2420} 2421 2422static __inline__ __m128i __DEFAULT_FN_ATTRS 2423_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2424 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2425 (__v4si) __W, 2426 (__mmask8) __U); 2427} 2428 2429static __inline__ __m128i __DEFAULT_FN_ATTRS 2430_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2431 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2432 (__v4si) 2433 _mm_setzero_si128 (), 2434 (__mmask8) __U); 2435} 2436 2437static __inline__ __m128i __DEFAULT_FN_ATTRS 2438_mm_cvttpd_epu32 (__m128d __A) { 2439 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2440 (__v4si) 2441 _mm_setzero_si128 (), 2442 (__mmask8) -1); 2443} 2444 2445static __inline__ __m128i __DEFAULT_FN_ATTRS 2446_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2447 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2448 (__v4si) __W, 2449 (__mmask8) __U); 2450} 2451 2452static __inline__ __m128i __DEFAULT_FN_ATTRS 2453_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2454 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2455 (__v4si) 2456 _mm_setzero_si128 (), 2457 (__mmask8) __U); 2458} 2459 2460static __inline__ __m128i __DEFAULT_FN_ATTRS 2461_mm256_cvttpd_epu32 (__m256d __A) { 2462 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2463 (__v4si) 2464 _mm_setzero_si128 (), 2465 (__mmask8) -1); 2466} 2467 2468static __inline__ __m128i __DEFAULT_FN_ATTRS 2469_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2470 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2471 (__v4si) __W, 2472 (__mmask8) __U); 2473} 2474 2475static __inline__ __m128i __DEFAULT_FN_ATTRS 2476_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2477 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2478 (__v4si) 2479 _mm_setzero_si128 (), 2480 (__mmask8) __U); 2481} 2482 2483static __inline__ __m128i __DEFAULT_FN_ATTRS 2484_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2485 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2486 (__v4si) __W, 2487 (__mmask8) __U); 2488} 2489 2490static __inline__ __m128i __DEFAULT_FN_ATTRS 2491_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2492 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2493 (__v4si) 2494 _mm_setzero_si128 (), 2495 (__mmask8) __U); 2496} 2497 2498static __inline__ __m256i __DEFAULT_FN_ATTRS 2499_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2500 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2501 (__v8si) __W, 2502 (__mmask8) __U); 2503} 2504 2505static __inline__ __m256i __DEFAULT_FN_ATTRS 2506_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2507 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2508 (__v8si) 2509 _mm256_setzero_si256 (), 2510 (__mmask8) __U); 2511} 2512 2513static __inline__ __m128i __DEFAULT_FN_ATTRS 2514_mm_cvttps_epu32 (__m128 __A) { 2515 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2516 (__v4si) 2517 _mm_setzero_si128 (), 2518 (__mmask8) -1); 2519} 2520 2521static __inline__ __m128i __DEFAULT_FN_ATTRS 2522_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2523 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2524 (__v4si) __W, 2525 (__mmask8) __U); 2526} 2527 2528static __inline__ __m128i __DEFAULT_FN_ATTRS 2529_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2530 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2531 (__v4si) 2532 _mm_setzero_si128 (), 2533 (__mmask8) __U); 2534} 2535 2536static __inline__ __m256i __DEFAULT_FN_ATTRS 2537_mm256_cvttps_epu32 (__m256 __A) { 2538 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2539 (__v8si) 2540 _mm256_setzero_si256 (), 2541 (__mmask8) -1); 2542} 2543 2544static __inline__ __m256i __DEFAULT_FN_ATTRS 2545_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2546 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2547 (__v8si) __W, 2548 (__mmask8) __U); 2549} 2550 2551static __inline__ __m256i __DEFAULT_FN_ATTRS 2552_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2553 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2554 (__v8si) 2555 _mm256_setzero_si256 (), 2556 (__mmask8) __U); 2557} 2558 2559static __inline__ __m128d __DEFAULT_FN_ATTRS 2560_mm_cvtepu32_pd (__m128i __A) { 2561 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 2562 (__v2df) 2563 _mm_setzero_pd (), 2564 (__mmask8) -1); 2565} 2566 2567static __inline__ __m128d __DEFAULT_FN_ATTRS 2568_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2569 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 2570 (__v2df) __W, 2571 (__mmask8) __U); 2572} 2573 2574static __inline__ __m128d __DEFAULT_FN_ATTRS 2575_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2576 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 2577 (__v2df) 2578 _mm_setzero_pd (), 2579 (__mmask8) __U); 2580} 2581 2582static __inline__ __m256d __DEFAULT_FN_ATTRS 2583_mm256_cvtepu32_pd (__m128i __A) { 2584 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 2585 (__v4df) 2586 _mm256_setzero_pd (), 2587 (__mmask8) -1); 2588} 2589 2590static __inline__ __m256d __DEFAULT_FN_ATTRS 2591_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2592 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 2593 (__v4df) __W, 2594 (__mmask8) __U); 2595} 2596 2597static __inline__ __m256d __DEFAULT_FN_ATTRS 2598_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2599 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 2600 (__v4df) 2601 _mm256_setzero_pd (), 2602 (__mmask8) __U); 2603} 2604 2605static __inline__ __m128 __DEFAULT_FN_ATTRS 2606_mm_cvtepu32_ps (__m128i __A) { 2607 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2608 (__v4sf) 2609 _mm_setzero_ps (), 2610 (__mmask8) -1); 2611} 2612 2613static __inline__ __m128 __DEFAULT_FN_ATTRS 2614_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2615 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2616 (__v4sf) __W, 2617 (__mmask8) __U); 2618} 2619 2620static __inline__ __m128 __DEFAULT_FN_ATTRS 2621_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2622 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2623 (__v4sf) 2624 _mm_setzero_ps (), 2625 (__mmask8) __U); 2626} 2627 2628static __inline__ __m256 __DEFAULT_FN_ATTRS 2629_mm256_cvtepu32_ps (__m256i __A) { 2630 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2631 (__v8sf) 2632 _mm256_setzero_ps (), 2633 (__mmask8) -1); 2634} 2635 2636static __inline__ __m256 __DEFAULT_FN_ATTRS 2637_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2638 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2639 (__v8sf) __W, 2640 (__mmask8) __U); 2641} 2642 2643static __inline__ __m256 __DEFAULT_FN_ATTRS 2644_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2645 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2646 (__v8sf) 2647 _mm256_setzero_ps (), 2648 (__mmask8) __U); 2649} 2650 2651static __inline__ __m128d __DEFAULT_FN_ATTRS 2652_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2653 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2654 (__v2df)_mm_div_pd(__A, __B), 2655 (__v2df)__W); 2656} 2657 2658static __inline__ __m128d __DEFAULT_FN_ATTRS 2659_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2660 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2661 (__v2df)_mm_div_pd(__A, __B), 2662 (__v2df)_mm_setzero_pd()); 2663} 2664 2665static __inline__ __m256d __DEFAULT_FN_ATTRS 2666_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2667 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2668 (__v4df)_mm256_div_pd(__A, __B), 2669 (__v4df)__W); 2670} 2671 2672static __inline__ __m256d __DEFAULT_FN_ATTRS 2673_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2674 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2675 (__v4df)_mm256_div_pd(__A, __B), 2676 (__v4df)_mm256_setzero_pd()); 2677} 2678 2679static __inline__ __m128 __DEFAULT_FN_ATTRS 2680_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2681 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2682 (__v4sf)_mm_div_ps(__A, __B), 2683 (__v4sf)__W); 2684} 2685 2686static __inline__ __m128 __DEFAULT_FN_ATTRS 2687_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2688 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2689 (__v4sf)_mm_div_ps(__A, __B), 2690 (__v4sf)_mm_setzero_ps()); 2691} 2692 2693static __inline__ __m256 __DEFAULT_FN_ATTRS 2694_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2695 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2696 (__v8sf)_mm256_div_ps(__A, __B), 2697 (__v8sf)__W); 2698} 2699 2700static __inline__ __m256 __DEFAULT_FN_ATTRS 2701_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2702 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2703 (__v8sf)_mm256_div_ps(__A, __B), 2704 (__v8sf)_mm256_setzero_ps()); 2705} 2706 2707static __inline__ __m128d __DEFAULT_FN_ATTRS 2708_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2709 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2710 (__v2df) __W, 2711 (__mmask8) __U); 2712} 2713 2714static __inline__ __m128d __DEFAULT_FN_ATTRS 2715_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2716 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2717 (__v2df) 2718 _mm_setzero_pd (), 2719 (__mmask8) __U); 2720} 2721 2722static __inline__ __m256d __DEFAULT_FN_ATTRS 2723_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2724 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2725 (__v4df) __W, 2726 (__mmask8) __U); 2727} 2728 2729static __inline__ __m256d __DEFAULT_FN_ATTRS 2730_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2731 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2732 (__v4df) 2733 _mm256_setzero_pd (), 2734 (__mmask8) __U); 2735} 2736 2737static __inline__ __m128i __DEFAULT_FN_ATTRS 2738_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2739 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2740 (__v2di) __W, 2741 (__mmask8) __U); 2742} 2743 2744static __inline__ __m128i __DEFAULT_FN_ATTRS 2745_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2746 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2747 (__v2di) 2748 _mm_setzero_si128 (), 2749 (__mmask8) __U); 2750} 2751 2752static __inline__ __m256i __DEFAULT_FN_ATTRS 2753_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2754 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2755 (__v4di) __W, 2756 (__mmask8) __U); 2757} 2758 2759static __inline__ __m256i __DEFAULT_FN_ATTRS 2760_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2761 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2762 (__v4di) 2763 _mm256_setzero_si256 (), 2764 (__mmask8) __U); 2765} 2766 2767static __inline__ __m128d __DEFAULT_FN_ATTRS 2768_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2769 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2770 (__v2df) __W, 2771 (__mmask8) 2772 __U); 2773} 2774 2775static __inline__ __m128d __DEFAULT_FN_ATTRS 2776_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2777 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2778 (__v2df) 2779 _mm_setzero_pd (), 2780 (__mmask8) 2781 __U); 2782} 2783 2784static __inline__ __m256d __DEFAULT_FN_ATTRS 2785_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2786 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2787 (__v4df) __W, 2788 (__mmask8) 2789 __U); 2790} 2791 2792static __inline__ __m256d __DEFAULT_FN_ATTRS 2793_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2794 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2795 (__v4df) 2796 _mm256_setzero_pd (), 2797 (__mmask8) 2798 __U); 2799} 2800 2801static __inline__ __m128i __DEFAULT_FN_ATTRS 2802_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2803 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2804 (__v2di) __W, 2805 (__mmask8) 2806 __U); 2807} 2808 2809static __inline__ __m128i __DEFAULT_FN_ATTRS 2810_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2811 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2812 (__v2di) 2813 _mm_setzero_si128 (), 2814 (__mmask8) 2815 __U); 2816} 2817 2818static __inline__ __m256i __DEFAULT_FN_ATTRS 2819_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2820 void const *__P) { 2821 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2822 (__v4di) __W, 2823 (__mmask8) 2824 __U); 2825} 2826 2827static __inline__ __m256i __DEFAULT_FN_ATTRS 2828_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2829 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2830 (__v4di) 2831 _mm256_setzero_si256 (), 2832 (__mmask8) 2833 __U); 2834} 2835 2836static __inline__ __m128 __DEFAULT_FN_ATTRS 2837_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2838 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2839 (__v4sf) __W, 2840 (__mmask8) __U); 2841} 2842 2843static __inline__ __m128 __DEFAULT_FN_ATTRS 2844_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2845 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2846 (__v4sf) 2847 _mm_setzero_ps (), 2848 (__mmask8) 2849 __U); 2850} 2851 2852static __inline__ __m256 __DEFAULT_FN_ATTRS 2853_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2854 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2855 (__v8sf) __W, 2856 (__mmask8) __U); 2857} 2858 2859static __inline__ __m256 __DEFAULT_FN_ATTRS 2860_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2861 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2862 (__v8sf) 2863 _mm256_setzero_ps (), 2864 (__mmask8) 2865 __U); 2866} 2867 2868static __inline__ __m128i __DEFAULT_FN_ATTRS 2869_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2870 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2871 (__v4si) __W, 2872 (__mmask8) 2873 __U); 2874} 2875 2876static __inline__ __m128i __DEFAULT_FN_ATTRS 2877_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2878 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2879 (__v4si) 2880 _mm_setzero_si128 (), 2881 (__mmask8) __U); 2882} 2883 2884static __inline__ __m256i __DEFAULT_FN_ATTRS 2885_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2886 void const *__P) { 2887 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2888 (__v8si) __W, 2889 (__mmask8) 2890 __U); 2891} 2892 2893static __inline__ __m256i __DEFAULT_FN_ATTRS 2894_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2895 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2896 (__v8si) 2897 _mm256_setzero_si256 (), 2898 (__mmask8) 2899 __U); 2900} 2901 2902static __inline__ __m128 __DEFAULT_FN_ATTRS 2903_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2904 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2905 (__v4sf) __W, 2906 (__mmask8) __U); 2907} 2908 2909static __inline__ __m128 __DEFAULT_FN_ATTRS 2910_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2911 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2912 (__v4sf) 2913 _mm_setzero_ps (), 2914 (__mmask8) __U); 2915} 2916 2917static __inline__ __m256 __DEFAULT_FN_ATTRS 2918_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2919 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2920 (__v8sf) __W, 2921 (__mmask8) __U); 2922} 2923 2924static __inline__ __m256 __DEFAULT_FN_ATTRS 2925_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2926 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2927 (__v8sf) 2928 _mm256_setzero_ps (), 2929 (__mmask8) __U); 2930} 2931 2932static __inline__ __m128i __DEFAULT_FN_ATTRS 2933_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2934 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2935 (__v4si) __W, 2936 (__mmask8) __U); 2937} 2938 2939static __inline__ __m128i __DEFAULT_FN_ATTRS 2940_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2941 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2942 (__v4si) 2943 _mm_setzero_si128 (), 2944 (__mmask8) __U); 2945} 2946 2947static __inline__ __m256i __DEFAULT_FN_ATTRS 2948_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2949 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2950 (__v8si) __W, 2951 (__mmask8) __U); 2952} 2953 2954static __inline__ __m256i __DEFAULT_FN_ATTRS 2955_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2956 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2957 (__v8si) 2958 _mm256_setzero_si256 (), 2959 (__mmask8) __U); 2960} 2961 2962static __inline__ __m128d __DEFAULT_FN_ATTRS 2963_mm_getexp_pd (__m128d __A) { 2964 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2965 (__v2df) 2966 _mm_setzero_pd (), 2967 (__mmask8) -1); 2968} 2969 2970static __inline__ __m128d __DEFAULT_FN_ATTRS 2971_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2972 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2973 (__v2df) __W, 2974 (__mmask8) __U); 2975} 2976 2977static __inline__ __m128d __DEFAULT_FN_ATTRS 2978_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2979 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2980 (__v2df) 2981 _mm_setzero_pd (), 2982 (__mmask8) __U); 2983} 2984 2985static __inline__ __m256d __DEFAULT_FN_ATTRS 2986_mm256_getexp_pd (__m256d __A) { 2987 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2988 (__v4df) 2989 _mm256_setzero_pd (), 2990 (__mmask8) -1); 2991} 2992 2993static __inline__ __m256d __DEFAULT_FN_ATTRS 2994_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2995 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2996 (__v4df) __W, 2997 (__mmask8) __U); 2998} 2999 3000static __inline__ __m256d __DEFAULT_FN_ATTRS 3001_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 3002 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3003 (__v4df) 3004 _mm256_setzero_pd (), 3005 (__mmask8) __U); 3006} 3007 3008static __inline__ __m128 __DEFAULT_FN_ATTRS 3009_mm_getexp_ps (__m128 __A) { 3010 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3011 (__v4sf) 3012 _mm_setzero_ps (), 3013 (__mmask8) -1); 3014} 3015 3016static __inline__ __m128 __DEFAULT_FN_ATTRS 3017_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 3018 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3019 (__v4sf) __W, 3020 (__mmask8) __U); 3021} 3022 3023static __inline__ __m128 __DEFAULT_FN_ATTRS 3024_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 3025 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3026 (__v4sf) 3027 _mm_setzero_ps (), 3028 (__mmask8) __U); 3029} 3030 3031static __inline__ __m256 __DEFAULT_FN_ATTRS 3032_mm256_getexp_ps (__m256 __A) { 3033 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3034 (__v8sf) 3035 _mm256_setzero_ps (), 3036 (__mmask8) -1); 3037} 3038 3039static __inline__ __m256 __DEFAULT_FN_ATTRS 3040_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 3041 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3042 (__v8sf) __W, 3043 (__mmask8) __U); 3044} 3045 3046static __inline__ __m256 __DEFAULT_FN_ATTRS 3047_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 3048 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3049 (__v8sf) 3050 _mm256_setzero_ps (), 3051 (__mmask8) __U); 3052} 3053 3054static __inline__ __m128d __DEFAULT_FN_ATTRS 3055_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3056 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3057 (__v2df)_mm_max_pd(__A, __B), 3058 (__v2df)__W); 3059} 3060 3061static __inline__ __m128d __DEFAULT_FN_ATTRS 3062_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3063 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3064 (__v2df)_mm_max_pd(__A, __B), 3065 (__v2df)_mm_setzero_pd()); 3066} 3067 3068static __inline__ __m256d __DEFAULT_FN_ATTRS 3069_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3070 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3071 (__v4df)_mm256_max_pd(__A, __B), 3072 (__v4df)__W); 3073} 3074 3075static __inline__ __m256d __DEFAULT_FN_ATTRS 3076_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3077 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3078 (__v4df)_mm256_max_pd(__A, __B), 3079 (__v4df)_mm256_setzero_pd()); 3080} 3081 3082static __inline__ __m128 __DEFAULT_FN_ATTRS 3083_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3084 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3085 (__v4sf)_mm_max_ps(__A, __B), 3086 (__v4sf)__W); 3087} 3088 3089static __inline__ __m128 __DEFAULT_FN_ATTRS 3090_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3091 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3092 (__v4sf)_mm_max_ps(__A, __B), 3093 (__v4sf)_mm_setzero_ps()); 3094} 3095 3096static __inline__ __m256 __DEFAULT_FN_ATTRS 3097_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3098 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3099 (__v8sf)_mm256_max_ps(__A, __B), 3100 (__v8sf)__W); 3101} 3102 3103static __inline__ __m256 __DEFAULT_FN_ATTRS 3104_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3105 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3106 (__v8sf)_mm256_max_ps(__A, __B), 3107 (__v8sf)_mm256_setzero_ps()); 3108} 3109 3110static __inline__ __m128d __DEFAULT_FN_ATTRS 3111_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3112 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3113 (__v2df)_mm_min_pd(__A, __B), 3114 (__v2df)__W); 3115} 3116 3117static __inline__ __m128d __DEFAULT_FN_ATTRS 3118_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3119 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3120 (__v2df)_mm_min_pd(__A, __B), 3121 (__v2df)_mm_setzero_pd()); 3122} 3123 3124static __inline__ __m256d __DEFAULT_FN_ATTRS 3125_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3126 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3127 (__v4df)_mm256_min_pd(__A, __B), 3128 (__v4df)__W); 3129} 3130 3131static __inline__ __m256d __DEFAULT_FN_ATTRS 3132_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3133 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3134 (__v4df)_mm256_min_pd(__A, __B), 3135 (__v4df)_mm256_setzero_pd()); 3136} 3137 3138static __inline__ __m128 __DEFAULT_FN_ATTRS 3139_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3140 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3141 (__v4sf)_mm_min_ps(__A, __B), 3142 (__v4sf)__W); 3143} 3144 3145static __inline__ __m128 __DEFAULT_FN_ATTRS 3146_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3147 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3148 (__v4sf)_mm_min_ps(__A, __B), 3149 (__v4sf)_mm_setzero_ps()); 3150} 3151 3152static __inline__ __m256 __DEFAULT_FN_ATTRS 3153_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3154 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3155 (__v8sf)_mm256_min_ps(__A, __B), 3156 (__v8sf)__W); 3157} 3158 3159static __inline__ __m256 __DEFAULT_FN_ATTRS 3160_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3161 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3162 (__v8sf)_mm256_min_ps(__A, __B), 3163 (__v8sf)_mm256_setzero_ps()); 3164} 3165 3166static __inline__ __m128d __DEFAULT_FN_ATTRS 3167_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3168 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3169 (__v2df)_mm_mul_pd(__A, __B), 3170 (__v2df)__W); 3171} 3172 3173static __inline__ __m128d __DEFAULT_FN_ATTRS 3174_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3175 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3176 (__v2df)_mm_mul_pd(__A, __B), 3177 (__v2df)_mm_setzero_pd()); 3178} 3179 3180static __inline__ __m256d __DEFAULT_FN_ATTRS 3181_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3182 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3183 (__v4df)_mm256_mul_pd(__A, __B), 3184 (__v4df)__W); 3185} 3186 3187static __inline__ __m256d __DEFAULT_FN_ATTRS 3188_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3189 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3190 (__v4df)_mm256_mul_pd(__A, __B), 3191 (__v4df)_mm256_setzero_pd()); 3192} 3193 3194static __inline__ __m128 __DEFAULT_FN_ATTRS 3195_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3196 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3197 (__v4sf)_mm_mul_ps(__A, __B), 3198 (__v4sf)__W); 3199} 3200 3201static __inline__ __m128 __DEFAULT_FN_ATTRS 3202_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3203 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3204 (__v4sf)_mm_mul_ps(__A, __B), 3205 (__v4sf)_mm_setzero_ps()); 3206} 3207 3208static __inline__ __m256 __DEFAULT_FN_ATTRS 3209_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3210 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3211 (__v8sf)_mm256_mul_ps(__A, __B), 3212 (__v8sf)__W); 3213} 3214 3215static __inline__ __m256 __DEFAULT_FN_ATTRS 3216_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3217 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3218 (__v8sf)_mm256_mul_ps(__A, __B), 3219 (__v8sf)_mm256_setzero_ps()); 3220} 3221 3222static __inline__ __m128i __DEFAULT_FN_ATTRS 3223_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 3224 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3225 (__v4si)_mm_abs_epi32(__A), 3226 (__v4si)__W); 3227} 3228 3229static __inline__ __m128i __DEFAULT_FN_ATTRS 3230_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 3231 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3232 (__v4si)_mm_abs_epi32(__A), 3233 (__v4si)_mm_setzero_si128()); 3234} 3235 3236static __inline__ __m256i __DEFAULT_FN_ATTRS 3237_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 3238 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, 3239 (__v8si)_mm256_abs_epi32(__A), 3240 (__v8si)__W); 3241} 3242 3243static __inline__ __m256i __DEFAULT_FN_ATTRS 3244_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 3245 return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, 3246 (__v8si)_mm256_abs_epi32(__A), 3247 (__v8si)_mm256_setzero_si256()); 3248} 3249 3250static __inline__ __m128i __DEFAULT_FN_ATTRS 3251_mm_abs_epi64 (__m128i __A) { 3252 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3253 (__v2di) 3254 _mm_setzero_si128 (), 3255 (__mmask8) -1); 3256} 3257 3258static __inline__ __m128i __DEFAULT_FN_ATTRS 3259_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 3260 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3261 (__v2di) __W, 3262 (__mmask8) __U); 3263} 3264 3265static __inline__ __m128i __DEFAULT_FN_ATTRS 3266_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3267 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3268 (__v2di) 3269 _mm_setzero_si128 (), 3270 (__mmask8) __U); 3271} 3272 3273static __inline__ __m256i __DEFAULT_FN_ATTRS 3274_mm256_abs_epi64 (__m256i __A) { 3275 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3276 (__v4di) 3277 _mm256_setzero_si256 (), 3278 (__mmask8) -1); 3279} 3280 3281static __inline__ __m256i __DEFAULT_FN_ATTRS 3282_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3283 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3284 (__v4di) __W, 3285 (__mmask8) __U); 3286} 3287 3288static __inline__ __m256i __DEFAULT_FN_ATTRS 3289_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3290 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3291 (__v4di) 3292 _mm256_setzero_si256 (), 3293 (__mmask8) __U); 3294} 3295 3296static __inline__ __m128i __DEFAULT_FN_ATTRS 3297_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3298 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3299 (__v4si)_mm_max_epi32(__A, __B), 3300 (__v4si)_mm_setzero_si128()); 3301} 3302 3303static __inline__ __m128i __DEFAULT_FN_ATTRS 3304_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3305 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3306 (__v4si)_mm_max_epi32(__A, __B), 3307 (__v4si)__W); 3308} 3309 3310static __inline__ __m256i __DEFAULT_FN_ATTRS 3311_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3312 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3313 (__v8si)_mm256_max_epi32(__A, __B), 3314 (__v8si)_mm256_setzero_si256()); 3315} 3316 3317static __inline__ __m256i __DEFAULT_FN_ATTRS 3318_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3319 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3320 (__v8si)_mm256_max_epi32(__A, __B), 3321 (__v8si)__W); 3322} 3323 3324static __inline__ __m128i __DEFAULT_FN_ATTRS 3325_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3326 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3327 (__v2di) __B, 3328 (__v2di) 3329 _mm_setzero_si128 (), 3330 __M); 3331} 3332 3333static __inline__ __m128i __DEFAULT_FN_ATTRS 3334_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3335 __m128i __B) { 3336 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3337 (__v2di) __B, 3338 (__v2di) __W, __M); 3339} 3340 3341static __inline__ __m128i __DEFAULT_FN_ATTRS 3342_mm_max_epi64 (__m128i __A, __m128i __B) { 3343 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3344 (__v2di) __B, 3345 (__v2di) 3346 _mm_setzero_si128 (), 3347 (__mmask8) -1); 3348} 3349 3350static __inline__ __m256i __DEFAULT_FN_ATTRS 3351_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3352 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3353 (__v4di) __B, 3354 (__v4di) 3355 _mm256_setzero_si256 (), 3356 __M); 3357} 3358 3359static __inline__ __m256i __DEFAULT_FN_ATTRS 3360_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3361 __m256i __B) { 3362 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3363 (__v4di) __B, 3364 (__v4di) __W, __M); 3365} 3366 3367static __inline__ __m256i __DEFAULT_FN_ATTRS 3368_mm256_max_epi64 (__m256i __A, __m256i __B) { 3369 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3370 (__v4di) __B, 3371 (__v4di) 3372 _mm256_setzero_si256 (), 3373 (__mmask8) -1); 3374} 3375 3376static __inline__ __m128i __DEFAULT_FN_ATTRS 3377_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3379 (__v4si)_mm_max_epu32(__A, __B), 3380 (__v4si)_mm_setzero_si128()); 3381} 3382 3383static __inline__ __m128i __DEFAULT_FN_ATTRS 3384_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3385 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3386 (__v4si)_mm_max_epu32(__A, __B), 3387 (__v4si)__W); 3388} 3389 3390static __inline__ __m256i __DEFAULT_FN_ATTRS 3391_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3392 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3393 (__v8si)_mm256_max_epu32(__A, __B), 3394 (__v8si)_mm256_setzero_si256()); 3395} 3396 3397static __inline__ __m256i __DEFAULT_FN_ATTRS 3398_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3399 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3400 (__v8si)_mm256_max_epu32(__A, __B), 3401 (__v8si)__W); 3402} 3403 3404static __inline__ __m128i __DEFAULT_FN_ATTRS 3405_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3406 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3407 (__v2di) __B, 3408 (__v2di) 3409 _mm_setzero_si128 (), 3410 __M); 3411} 3412 3413static __inline__ __m128i __DEFAULT_FN_ATTRS 3414_mm_max_epu64 (__m128i __A, __m128i __B) { 3415 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3416 (__v2di) __B, 3417 (__v2di) 3418 _mm_setzero_si128 (), 3419 (__mmask8) -1); 3420} 3421 3422static __inline__ __m128i __DEFAULT_FN_ATTRS 3423_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3424 __m128i __B) { 3425 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3426 (__v2di) __B, 3427 (__v2di) __W, __M); 3428} 3429 3430static __inline__ __m256i __DEFAULT_FN_ATTRS 3431_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3432 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3433 (__v4di) __B, 3434 (__v4di) 3435 _mm256_setzero_si256 (), 3436 __M); 3437} 3438 3439static __inline__ __m256i __DEFAULT_FN_ATTRS 3440_mm256_max_epu64 (__m256i __A, __m256i __B) { 3441 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3442 (__v4di) __B, 3443 (__v4di) 3444 _mm256_setzero_si256 (), 3445 (__mmask8) -1); 3446} 3447 3448static __inline__ __m256i __DEFAULT_FN_ATTRS 3449_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3450 __m256i __B) { 3451 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3452 (__v4di) __B, 3453 (__v4di) __W, __M); 3454} 3455 3456static __inline__ __m128i __DEFAULT_FN_ATTRS 3457_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3458 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3459 (__v4si)_mm_min_epi32(__A, __B), 3460 (__v4si)_mm_setzero_si128()); 3461} 3462 3463static __inline__ __m128i __DEFAULT_FN_ATTRS 3464_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3465 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3466 (__v4si)_mm_min_epi32(__A, __B), 3467 (__v4si)__W); 3468} 3469 3470static __inline__ __m256i __DEFAULT_FN_ATTRS 3471_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3472 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3473 (__v8si)_mm256_min_epi32(__A, __B), 3474 (__v8si)_mm256_setzero_si256()); 3475} 3476 3477static __inline__ __m256i __DEFAULT_FN_ATTRS 3478_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3479 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3480 (__v8si)_mm256_min_epi32(__A, __B), 3481 (__v8si)__W); 3482} 3483 3484static __inline__ __m128i __DEFAULT_FN_ATTRS 3485_mm_min_epi64 (__m128i __A, __m128i __B) { 3486 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3487 (__v2di) __B, 3488 (__v2di) 3489 _mm_setzero_si128 (), 3490 (__mmask8) -1); 3491} 3492 3493static __inline__ __m128i __DEFAULT_FN_ATTRS 3494_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3495 __m128i __B) { 3496 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3497 (__v2di) __B, 3498 (__v2di) __W, __M); 3499} 3500 3501static __inline__ __m128i __DEFAULT_FN_ATTRS 3502_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3503 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3504 (__v2di) __B, 3505 (__v2di) 3506 _mm_setzero_si128 (), 3507 __M); 3508} 3509 3510static __inline__ __m256i __DEFAULT_FN_ATTRS 3511_mm256_min_epi64 (__m256i __A, __m256i __B) { 3512 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3513 (__v4di) __B, 3514 (__v4di) 3515 _mm256_setzero_si256 (), 3516 (__mmask8) -1); 3517} 3518 3519static __inline__ __m256i __DEFAULT_FN_ATTRS 3520_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3521 __m256i __B) { 3522 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3523 (__v4di) __B, 3524 (__v4di) __W, __M); 3525} 3526 3527static __inline__ __m256i __DEFAULT_FN_ATTRS 3528_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3529 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3530 (__v4di) __B, 3531 (__v4di) 3532 _mm256_setzero_si256 (), 3533 __M); 3534} 3535 3536static __inline__ __m128i __DEFAULT_FN_ATTRS 3537_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3538 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3539 (__v4si)_mm_min_epu32(__A, __B), 3540 (__v4si)_mm_setzero_si128()); 3541} 3542 3543static __inline__ __m128i __DEFAULT_FN_ATTRS 3544_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3545 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3546 (__v4si)_mm_min_epu32(__A, __B), 3547 (__v4si)__W); 3548} 3549 3550static __inline__ __m256i __DEFAULT_FN_ATTRS 3551_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3552 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3553 (__v8si)_mm256_min_epu32(__A, __B), 3554 (__v8si)_mm256_setzero_si256()); 3555} 3556 3557static __inline__ __m256i __DEFAULT_FN_ATTRS 3558_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3559 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3560 (__v8si)_mm256_min_epu32(__A, __B), 3561 (__v8si)__W); 3562} 3563 3564static __inline__ __m128i __DEFAULT_FN_ATTRS 3565_mm_min_epu64 (__m128i __A, __m128i __B) { 3566 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3567 (__v2di) __B, 3568 (__v2di) 3569 _mm_setzero_si128 (), 3570 (__mmask8) -1); 3571} 3572 3573static __inline__ __m128i __DEFAULT_FN_ATTRS 3574_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3575 __m128i __B) { 3576 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3577 (__v2di) __B, 3578 (__v2di) __W, __M); 3579} 3580 3581static __inline__ __m128i __DEFAULT_FN_ATTRS 3582_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3583 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3584 (__v2di) __B, 3585 (__v2di) 3586 _mm_setzero_si128 (), 3587 __M); 3588} 3589 3590static __inline__ __m256i __DEFAULT_FN_ATTRS 3591_mm256_min_epu64 (__m256i __A, __m256i __B) { 3592 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3593 (__v4di) __B, 3594 (__v4di) 3595 _mm256_setzero_si256 (), 3596 (__mmask8) -1); 3597} 3598 3599static __inline__ __m256i __DEFAULT_FN_ATTRS 3600_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3601 __m256i __B) { 3602 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3603 (__v4di) __B, 3604 (__v4di) __W, __M); 3605} 3606 3607static __inline__ __m256i __DEFAULT_FN_ATTRS 3608_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3609 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3610 (__v4di) __B, 3611 (__v4di) 3612 _mm256_setzero_si256 (), 3613 __M); 3614} 3615 3616#define _mm_roundscale_pd(A, imm) __extension__ ({ \ 3617 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3618 (int)(imm), \ 3619 (__v2df)_mm_setzero_pd(), \ 3620 (__mmask8)-1); }) 3621 3622 3623#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3624 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3625 (int)(imm), \ 3626 (__v2df)(__m128d)(W), \ 3627 (__mmask8)(U)); }) 3628 3629 3630#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3631 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3632 (int)(imm), \ 3633 (__v2df)_mm_setzero_pd(), \ 3634 (__mmask8)(U)); }) 3635 3636 3637#define _mm256_roundscale_pd(A, imm) __extension__ ({ \ 3638 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3639 (int)(imm), \ 3640 (__v4df)_mm256_setzero_pd(), \ 3641 (__mmask8)-1); }) 3642 3643 3644#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3645 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3646 (int)(imm), \ 3647 (__v4df)(__m256d)(W), \ 3648 (__mmask8)(U)); }) 3649 3650 3651#define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3652 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3653 (int)(imm), \ 3654 (__v4df)_mm256_setzero_pd(), \ 3655 (__mmask8)(U)); }) 3656 3657#define _mm_roundscale_ps(A, imm) __extension__ ({ \ 3658 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3659 (__v4sf)_mm_setzero_ps(), \ 3660 (__mmask8)-1); }) 3661 3662 3663#define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3664 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3665 (__v4sf)(__m128)(W), \ 3666 (__mmask8)(U)); }) 3667 3668 3669#define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3670 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3671 (__v4sf)_mm_setzero_ps(), \ 3672 (__mmask8)(U)); }) 3673 3674#define _mm256_roundscale_ps(A, imm) __extension__ ({ \ 3675 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3676 (__v8sf)_mm256_setzero_ps(), \ 3677 (__mmask8)-1); }) 3678 3679#define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3680 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3681 (__v8sf)(__m256)(W), \ 3682 (__mmask8)(U)); }) 3683 3684 3685#define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3686 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3687 (__v8sf)_mm256_setzero_ps(), \ 3688 (__mmask8)(U)); }) 3689 3690static __inline__ __m128d __DEFAULT_FN_ATTRS 3691_mm_scalef_pd (__m128d __A, __m128d __B) { 3692 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3693 (__v2df) __B, 3694 (__v2df) 3695 _mm_setzero_pd (), 3696 (__mmask8) -1); 3697} 3698 3699static __inline__ __m128d __DEFAULT_FN_ATTRS 3700_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3701 __m128d __B) { 3702 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3703 (__v2df) __B, 3704 (__v2df) __W, 3705 (__mmask8) __U); 3706} 3707 3708static __inline__ __m128d __DEFAULT_FN_ATTRS 3709_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3710 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3711 (__v2df) __B, 3712 (__v2df) 3713 _mm_setzero_pd (), 3714 (__mmask8) __U); 3715} 3716 3717static __inline__ __m256d __DEFAULT_FN_ATTRS 3718_mm256_scalef_pd (__m256d __A, __m256d __B) { 3719 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3720 (__v4df) __B, 3721 (__v4df) 3722 _mm256_setzero_pd (), 3723 (__mmask8) -1); 3724} 3725 3726static __inline__ __m256d __DEFAULT_FN_ATTRS 3727_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3728 __m256d __B) { 3729 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3730 (__v4df) __B, 3731 (__v4df) __W, 3732 (__mmask8) __U); 3733} 3734 3735static __inline__ __m256d __DEFAULT_FN_ATTRS 3736_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3737 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3738 (__v4df) __B, 3739 (__v4df) 3740 _mm256_setzero_pd (), 3741 (__mmask8) __U); 3742} 3743 3744static __inline__ __m128 __DEFAULT_FN_ATTRS 3745_mm_scalef_ps (__m128 __A, __m128 __B) { 3746 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3747 (__v4sf) __B, 3748 (__v4sf) 3749 _mm_setzero_ps (), 3750 (__mmask8) -1); 3751} 3752 3753static __inline__ __m128 __DEFAULT_FN_ATTRS 3754_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3755 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3756 (__v4sf) __B, 3757 (__v4sf) __W, 3758 (__mmask8) __U); 3759} 3760 3761static __inline__ __m128 __DEFAULT_FN_ATTRS 3762_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3763 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3764 (__v4sf) __B, 3765 (__v4sf) 3766 _mm_setzero_ps (), 3767 (__mmask8) __U); 3768} 3769 3770static __inline__ __m256 __DEFAULT_FN_ATTRS 3771_mm256_scalef_ps (__m256 __A, __m256 __B) { 3772 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3773 (__v8sf) __B, 3774 (__v8sf) 3775 _mm256_setzero_ps (), 3776 (__mmask8) -1); 3777} 3778 3779static __inline__ __m256 __DEFAULT_FN_ATTRS 3780_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3781 __m256 __B) { 3782 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3783 (__v8sf) __B, 3784 (__v8sf) __W, 3785 (__mmask8) __U); 3786} 3787 3788static __inline__ __m256 __DEFAULT_FN_ATTRS 3789_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3790 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3791 (__v8sf) __B, 3792 (__v8sf) 3793 _mm256_setzero_ps (), 3794 (__mmask8) __U); 3795} 3796 3797#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3798 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ 3799 (__v2di)(__m128i)(index), \ 3800 (__v2df)(__m128d)(v1), (int)(scale)); }) 3801 3802#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3803 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ 3804 (__v2di)(__m128i)(index), \ 3805 (__v2df)(__m128d)(v1), (int)(scale)); }) 3806 3807#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3808 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ 3809 (__v2di)(__m128i)(index), \ 3810 (__v2di)(__m128i)(v1), (int)(scale)); }) 3811 3812#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3813 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ 3814 (__v2di)(__m128i)(index), \ 3815 (__v2di)(__m128i)(v1), (int)(scale)); }) 3816 3817#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3818 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ 3819 (__v4di)(__m256i)(index), \ 3820 (__v4df)(__m256d)(v1), (int)(scale)); }) 3821 3822#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3823 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ 3824 (__v4di)(__m256i)(index), \ 3825 (__v4df)(__m256d)(v1), (int)(scale)); }) 3826 3827#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3828 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ 3829 (__v4di)(__m256i)(index), \ 3830 (__v4di)(__m256i)(v1), (int)(scale)); }) 3831 3832#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3833 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ 3834 (__v4di)(__m256i)(index), \ 3835 (__v4di)(__m256i)(v1), (int)(scale)); }) 3836 3837#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3838 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ 3839 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3840 (int)(scale)); }) 3841 3842#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3843 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ 3844 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3845 (int)(scale)); }) 3846 3847#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3848 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ 3849 (__v2di)(__m128i)(index), \ 3850 (__v4si)(__m128i)(v1), (int)(scale)); }) 3851 3852#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3853 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ 3854 (__v2di)(__m128i)(index), \ 3855 (__v4si)(__m128i)(v1), (int)(scale)); }) 3856 3857#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3858 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ 3859 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3860 (int)(scale)); }) 3861 3862#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3863 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ 3864 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3865 (int)(scale)); }) 3866 3867#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3868 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ 3869 (__v4di)(__m256i)(index), \ 3870 (__v4si)(__m128i)(v1), (int)(scale)); }) 3871 3872#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3873 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ 3874 (__v4di)(__m256i)(index), \ 3875 (__v4si)(__m128i)(v1), (int)(scale)); }) 3876 3877#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3878 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ 3879 (__v4si)(__m128i)(index), \ 3880 (__v2df)(__m128d)(v1), (int)(scale)); }) 3881 3882#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3883 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ 3884 (__v4si)(__m128i)(index), \ 3885 (__v2df)(__m128d)(v1), (int)(scale)); }) 3886 3887#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3888 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ 3889 (__v4si)(__m128i)(index), \ 3890 (__v2di)(__m128i)(v1), (int)(scale)); }) 3891 3892#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3893 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ 3894 (__v4si)(__m128i)(index), \ 3895 (__v2di)(__m128i)(v1), (int)(scale)); }) 3896 3897#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3898 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ 3899 (__v4si)(__m128i)(index), \ 3900 (__v4df)(__m256d)(v1), (int)(scale)); }) 3901 3902#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3903 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ 3904 (__v4si)(__m128i)(index), \ 3905 (__v4df)(__m256d)(v1), (int)(scale)); }) 3906 3907#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3908 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ 3909 (__v4si)(__m128i)(index), \ 3910 (__v4di)(__m256i)(v1), (int)(scale)); }) 3911 3912#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3913 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ 3914 (__v4si)(__m128i)(index), \ 3915 (__v4di)(__m256i)(v1), (int)(scale)); }) 3916 3917#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3918 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ 3919 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3920 (int)(scale)); }) 3921 3922#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3923 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ 3924 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3925 (int)(scale)); }) 3926 3927#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3928 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ 3929 (__v4si)(__m128i)(index), \ 3930 (__v4si)(__m128i)(v1), (int)(scale)); }) 3931 3932#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3933 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ 3934 (__v4si)(__m128i)(index), \ 3935 (__v4si)(__m128i)(v1), (int)(scale)); }) 3936 3937#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3938 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ 3939 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3940 (int)(scale)); }) 3941 3942#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3943 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ 3944 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3945 (int)(scale)); }) 3946 3947#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3948 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ 3949 (__v8si)(__m256i)(index), \ 3950 (__v8si)(__m256i)(v1), (int)(scale)); }) 3951 3952#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3953 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ 3954 (__v8si)(__m256i)(index), \ 3955 (__v8si)(__m256i)(v1), (int)(scale)); }) 3956 3957static __inline__ __m128d __DEFAULT_FN_ATTRS 3958_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3959 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3960 (__v2df)_mm_sqrt_pd(__A), 3961 (__v2df)__W); 3962} 3963 3964static __inline__ __m128d __DEFAULT_FN_ATTRS 3965_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3966 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3967 (__v2df)_mm_sqrt_pd(__A), 3968 (__v2df)_mm_setzero_pd()); 3969} 3970 3971static __inline__ __m256d __DEFAULT_FN_ATTRS 3972_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3973 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3974 (__v4df)_mm256_sqrt_pd(__A), 3975 (__v4df)__W); 3976} 3977 3978static __inline__ __m256d __DEFAULT_FN_ATTRS 3979_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3980 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3981 (__v4df)_mm256_sqrt_pd(__A), 3982 (__v4df)_mm256_setzero_pd()); 3983} 3984 3985static __inline__ __m128 __DEFAULT_FN_ATTRS 3986_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3987 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3988 (__v4sf)_mm_sqrt_ps(__A), 3989 (__v4sf)__W); 3990} 3991 3992static __inline__ __m128 __DEFAULT_FN_ATTRS 3993_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3994 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3995 (__v4sf)_mm_sqrt_ps(__A), 3996 (__v4sf)_mm_setzero_pd()); 3997} 3998 3999static __inline__ __m256 __DEFAULT_FN_ATTRS 4000_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 4001 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4002 (__v8sf)_mm256_sqrt_ps(__A), 4003 (__v8sf)__W); 4004} 4005 4006static __inline__ __m256 __DEFAULT_FN_ATTRS 4007_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 4008 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4009 (__v8sf)_mm256_sqrt_ps(__A), 4010 (__v8sf)_mm256_setzero_ps()); 4011} 4012 4013static __inline__ __m128d __DEFAULT_FN_ATTRS 4014_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 4015 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4016 (__v2df)_mm_sub_pd(__A, __B), 4017 (__v2df)__W); 4018} 4019 4020static __inline__ __m128d __DEFAULT_FN_ATTRS 4021_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 4022 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 4023 (__v2df)_mm_sub_pd(__A, __B), 4024 (__v2df)_mm_setzero_pd()); 4025} 4026 4027static __inline__ __m256d __DEFAULT_FN_ATTRS 4028_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 4029 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4030 (__v4df)_mm256_sub_pd(__A, __B), 4031 (__v4df)__W); 4032} 4033 4034static __inline__ __m256d __DEFAULT_FN_ATTRS 4035_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 4036 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 4037 (__v4df)_mm256_sub_pd(__A, __B), 4038 (__v4df)_mm256_setzero_pd()); 4039} 4040 4041static __inline__ __m128 __DEFAULT_FN_ATTRS 4042_mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 4043 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 4044 (__v4sf)_mm_sub_ps(__A, __B), 4045 (__v4sf)__W); 4046} 4047 4048static __inline__ __m128 __DEFAULT_FN_ATTRS 4049_mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 4050 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 4051 (__v4sf)_mm_sub_ps(__A, __B), 4052 (__v4sf)_mm_setzero_ps()); 4053} 4054 4055static __inline__ __m256 __DEFAULT_FN_ATTRS 4056_mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 4057 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4058 (__v8sf)_mm256_sub_ps(__A, __B), 4059 (__v8sf)__W); 4060} 4061 4062static __inline__ __m256 __DEFAULT_FN_ATTRS 4063_mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 4064 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 4065 (__v8sf)_mm256_sub_ps(__A, __B), 4066 (__v8sf)_mm256_setzero_ps()); 4067} 4068 4069static __inline__ __m128i __DEFAULT_FN_ATTRS 4070_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U, 4071 __m128i __B) { 4072 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A, 4073 (__v4si) __I 4074 /* idx */ , 4075 (__v4si) __B, 4076 (__mmask8) __U); 4077} 4078 4079static __inline__ __m256i __DEFAULT_FN_ATTRS 4080_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I, 4081 __mmask8 __U, __m256i __B) { 4082 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A, 4083 (__v8si) __I 4084 /* idx */ , 4085 (__v8si) __B, 4086 (__mmask8) __U); 4087} 4088 4089static __inline__ __m128d __DEFAULT_FN_ATTRS 4090_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U, 4091 __m128d __B) { 4092 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A, 4093 (__v2di) __I 4094 /* idx */ , 4095 (__v2df) __B, 4096 (__mmask8) 4097 __U); 4098} 4099 4100static __inline__ __m256d __DEFAULT_FN_ATTRS 4101_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U, 4102 __m256d __B) { 4103 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A, 4104 (__v4di) __I 4105 /* idx */ , 4106 (__v4df) __B, 4107 (__mmask8) 4108 __U); 4109} 4110 4111static __inline__ __m128 __DEFAULT_FN_ATTRS 4112_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U, 4113 __m128 __B) { 4114 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A, 4115 (__v4si) __I 4116 /* idx */ , 4117 (__v4sf) __B, 4118 (__mmask8) __U); 4119} 4120 4121static __inline__ __m256 __DEFAULT_FN_ATTRS 4122_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U, 4123 __m256 __B) { 4124 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A, 4125 (__v8si) __I 4126 /* idx */ , 4127 (__v8sf) __B, 4128 (__mmask8) __U); 4129} 4130 4131static __inline__ __m128i __DEFAULT_FN_ATTRS 4132_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U, 4133 __m128i __B) { 4134 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A, 4135 (__v2di) __I 4136 /* idx */ , 4137 (__v2di) __B, 4138 (__mmask8) __U); 4139} 4140 4141static __inline__ __m256i __DEFAULT_FN_ATTRS 4142_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I, 4143 __mmask8 __U, __m256i __B) { 4144 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A, 4145 (__v4di) __I 4146 /* idx */ , 4147 (__v4di) __B, 4148 (__mmask8) __U); 4149} 4150 4151static __inline__ __m128i __DEFAULT_FN_ATTRS 4152_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) { 4153 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4154 /* idx */ , 4155 (__v4si) __A, 4156 (__v4si) __B, 4157 (__mmask8) -1); 4158} 4159 4160static __inline__ __m128i __DEFAULT_FN_ATTRS 4161_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I, 4162 __m128i __B) { 4163 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4164 /* idx */ , 4165 (__v4si) __A, 4166 (__v4si) __B, 4167 (__mmask8) __U); 4168} 4169 4170static __inline__ __m128i __DEFAULT_FN_ATTRS 4171_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I, 4172 __m128i __B) { 4173 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I 4174 /* idx */ , 4175 (__v4si) __A, 4176 (__v4si) __B, 4177 (__mmask8) 4178 __U); 4179} 4180 4181static __inline__ __m256i __DEFAULT_FN_ATTRS 4182_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) { 4183 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4184 /* idx */ , 4185 (__v8si) __A, 4186 (__v8si) __B, 4187 (__mmask8) -1); 4188} 4189 4190static __inline__ __m256i __DEFAULT_FN_ATTRS 4191_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I, 4192 __m256i __B) { 4193 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4194 /* idx */ , 4195 (__v8si) __A, 4196 (__v8si) __B, 4197 (__mmask8) __U); 4198} 4199 4200static __inline__ __m256i __DEFAULT_FN_ATTRS 4201_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A, 4202 __m256i __I, __m256i __B) { 4203 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I 4204 /* idx */ , 4205 (__v8si) __A, 4206 (__v8si) __B, 4207 (__mmask8) 4208 __U); 4209} 4210 4211static __inline__ __m128d __DEFAULT_FN_ATTRS 4212_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) { 4213 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4214 /* idx */ , 4215 (__v2df) __A, 4216 (__v2df) __B, 4217 (__mmask8) - 4218 1); 4219} 4220 4221static __inline__ __m128d __DEFAULT_FN_ATTRS 4222_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I, 4223 __m128d __B) { 4224 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4225 /* idx */ , 4226 (__v2df) __A, 4227 (__v2df) __B, 4228 (__mmask8) 4229 __U); 4230} 4231 4232static __inline__ __m128d __DEFAULT_FN_ATTRS 4233_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I, 4234 __m128d __B) { 4235 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I 4236 /* idx */ , 4237 (__v2df) __A, 4238 (__v2df) __B, 4239 (__mmask8) 4240 __U); 4241} 4242 4243static __inline__ __m256d __DEFAULT_FN_ATTRS 4244_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) { 4245 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4246 /* idx */ , 4247 (__v4df) __A, 4248 (__v4df) __B, 4249 (__mmask8) - 4250 1); 4251} 4252 4253static __inline__ __m256d __DEFAULT_FN_ATTRS 4254_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I, 4255 __m256d __B) { 4256 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4257 /* idx */ , 4258 (__v4df) __A, 4259 (__v4df) __B, 4260 (__mmask8) 4261 __U); 4262} 4263 4264static __inline__ __m256d __DEFAULT_FN_ATTRS 4265_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I, 4266 __m256d __B) { 4267 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I 4268 /* idx */ , 4269 (__v4df) __A, 4270 (__v4df) __B, 4271 (__mmask8) 4272 __U); 4273} 4274 4275static __inline__ __m128 __DEFAULT_FN_ATTRS 4276_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) { 4277 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4278 /* idx */ , 4279 (__v4sf) __A, 4280 (__v4sf) __B, 4281 (__mmask8) -1); 4282} 4283 4284static __inline__ __m128 __DEFAULT_FN_ATTRS 4285_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I, 4286 __m128 __B) { 4287 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4288 /* idx */ , 4289 (__v4sf) __A, 4290 (__v4sf) __B, 4291 (__mmask8) __U); 4292} 4293 4294static __inline__ __m128 __DEFAULT_FN_ATTRS 4295_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I, 4296 __m128 __B) { 4297 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I 4298 /* idx */ , 4299 (__v4sf) __A, 4300 (__v4sf) __B, 4301 (__mmask8) 4302 __U); 4303} 4304 4305static __inline__ __m256 __DEFAULT_FN_ATTRS 4306_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) { 4307 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4308 /* idx */ , 4309 (__v8sf) __A, 4310 (__v8sf) __B, 4311 (__mmask8) -1); 4312} 4313 4314static __inline__ __m256 __DEFAULT_FN_ATTRS 4315_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I, 4316 __m256 __B) { 4317 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4318 /* idx */ , 4319 (__v8sf) __A, 4320 (__v8sf) __B, 4321 (__mmask8) __U); 4322} 4323 4324static __inline__ __m256 __DEFAULT_FN_ATTRS 4325_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I, 4326 __m256 __B) { 4327 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I 4328 /* idx */ , 4329 (__v8sf) __A, 4330 (__v8sf) __B, 4331 (__mmask8) 4332 __U); 4333} 4334 4335static __inline__ __m128i __DEFAULT_FN_ATTRS 4336_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) { 4337 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4338 /* idx */ , 4339 (__v2di) __A, 4340 (__v2di) __B, 4341 (__mmask8) -1); 4342} 4343 4344static __inline__ __m128i __DEFAULT_FN_ATTRS 4345_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I, 4346 __m128i __B) { 4347 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4348 /* idx */ , 4349 (__v2di) __A, 4350 (__v2di) __B, 4351 (__mmask8) __U); 4352} 4353 4354static __inline__ __m128i __DEFAULT_FN_ATTRS 4355_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I, 4356 __m128i __B) { 4357 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I 4358 /* idx */ , 4359 (__v2di) __A, 4360 (__v2di) __B, 4361 (__mmask8) 4362 __U); 4363} 4364 4365 4366static __inline__ __m256i __DEFAULT_FN_ATTRS 4367_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) { 4368 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4369 /* idx */ , 4370 (__v4di) __A, 4371 (__v4di) __B, 4372 (__mmask8) -1); 4373} 4374 4375static __inline__ __m256i __DEFAULT_FN_ATTRS 4376_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I, 4377 __m256i __B) { 4378 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4379 /* idx */ , 4380 (__v4di) __A, 4381 (__v4di) __B, 4382 (__mmask8) __U); 4383} 4384 4385static __inline__ __m256i __DEFAULT_FN_ATTRS 4386_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A, 4387 __m256i __I, __m256i __B) { 4388 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I 4389 /* idx */ , 4390 (__v4di) __A, 4391 (__v4di) __B, 4392 (__mmask8) 4393 __U); 4394} 4395 4396static __inline__ __m128i __DEFAULT_FN_ATTRS 4397_mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4398{ 4399 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4400 (__v4si)_mm_cvtepi8_epi32(__A), 4401 (__v4si)__W); 4402} 4403 4404static __inline__ __m128i __DEFAULT_FN_ATTRS 4405_mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 4406{ 4407 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4408 (__v4si)_mm_cvtepi8_epi32(__A), 4409 (__v4si)_mm_setzero_si128()); 4410} 4411 4412static __inline__ __m256i __DEFAULT_FN_ATTRS 4413_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4414{ 4415 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4416 (__v8si)_mm256_cvtepi8_epi32(__A), 4417 (__v8si)__W); 4418} 4419 4420static __inline__ __m256i __DEFAULT_FN_ATTRS 4421_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4422{ 4423 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4424 (__v8si)_mm256_cvtepi8_epi32(__A), 4425 (__v8si)_mm256_setzero_si256()); 4426} 4427 4428static __inline__ __m128i __DEFAULT_FN_ATTRS 4429_mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4430{ 4431 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4432 (__v2di)_mm_cvtepi8_epi64(__A), 4433 (__v2di)__W); 4434} 4435 4436static __inline__ __m128i __DEFAULT_FN_ATTRS 4437_mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4438{ 4439 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4440 (__v2di)_mm_cvtepi8_epi64(__A), 4441 (__v2di)_mm_setzero_si128()); 4442} 4443 4444static __inline__ __m256i __DEFAULT_FN_ATTRS 4445_mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4446{ 4447 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4448 (__v4di)_mm256_cvtepi8_epi64(__A), 4449 (__v4di)__W); 4450} 4451 4452static __inline__ __m256i __DEFAULT_FN_ATTRS 4453_mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4454{ 4455 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4456 (__v4di)_mm256_cvtepi8_epi64(__A), 4457 (__v4di)_mm256_setzero_si256()); 4458} 4459 4460static __inline__ __m128i __DEFAULT_FN_ATTRS 4461_mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4462{ 4463 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4464 (__v2di)_mm_cvtepi32_epi64(__X), 4465 (__v2di)__W); 4466} 4467 4468static __inline__ __m128i __DEFAULT_FN_ATTRS 4469_mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4470{ 4471 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4472 (__v2di)_mm_cvtepi32_epi64(__X), 4473 (__v2di)_mm_setzero_si128()); 4474} 4475 4476static __inline__ __m256i __DEFAULT_FN_ATTRS 4477_mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4478{ 4479 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4480 (__v4di)_mm256_cvtepi32_epi64(__X), 4481 (__v4di)__W); 4482} 4483 4484static __inline__ __m256i __DEFAULT_FN_ATTRS 4485_mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4486{ 4487 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4488 (__v4di)_mm256_cvtepi32_epi64(__X), 4489 (__v4di)_mm256_setzero_si256()); 4490} 4491 4492static __inline__ __m128i __DEFAULT_FN_ATTRS 4493_mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4494{ 4495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4496 (__v4si)_mm_cvtepi16_epi32(__A), 4497 (__v4si)__W); 4498} 4499 4500static __inline__ __m128i __DEFAULT_FN_ATTRS 4501_mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4502{ 4503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4504 (__v4si)_mm_cvtepi16_epi32(__A), 4505 (__v4si)_mm_setzero_si128()); 4506} 4507 4508static __inline__ __m256i __DEFAULT_FN_ATTRS 4509_mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4510{ 4511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4512 (__v8si)_mm256_cvtepi16_epi32(__A), 4513 (__v8si)__W); 4514} 4515 4516static __inline__ __m256i __DEFAULT_FN_ATTRS 4517_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4518{ 4519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4520 (__v8si)_mm256_cvtepi16_epi32(__A), 4521 (__v8si)_mm256_setzero_si256()); 4522} 4523 4524static __inline__ __m128i __DEFAULT_FN_ATTRS 4525_mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4526{ 4527 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4528 (__v2di)_mm_cvtepi16_epi64(__A), 4529 (__v2di)__W); 4530} 4531 4532static __inline__ __m128i __DEFAULT_FN_ATTRS 4533_mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4534{ 4535 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4536 (__v2di)_mm_cvtepi16_epi64(__A), 4537 (__v2di)_mm_setzero_si128()); 4538} 4539 4540static __inline__ __m256i __DEFAULT_FN_ATTRS 4541_mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4542{ 4543 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4544 (__v4di)_mm256_cvtepi16_epi64(__A), 4545 (__v4di)__W); 4546} 4547 4548static __inline__ __m256i __DEFAULT_FN_ATTRS 4549_mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4550{ 4551 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4552 (__v4di)_mm256_cvtepi16_epi64(__A), 4553 (__v4di)_mm256_setzero_si256()); 4554} 4555 4556 4557static __inline__ __m128i __DEFAULT_FN_ATTRS 4558_mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4559{ 4560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4561 (__v4si)_mm_cvtepu8_epi32(__A), 4562 (__v4si)__W); 4563} 4564 4565static __inline__ __m128i __DEFAULT_FN_ATTRS 4566_mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4567{ 4568 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4569 (__v4si)_mm_cvtepu8_epi32(__A), 4570 (__v4si)_mm_setzero_si128()); 4571} 4572 4573static __inline__ __m256i __DEFAULT_FN_ATTRS 4574_mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4575{ 4576 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4577 (__v8si)_mm256_cvtepu8_epi32(__A), 4578 (__v8si)__W); 4579} 4580 4581static __inline__ __m256i __DEFAULT_FN_ATTRS 4582_mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4583{ 4584 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4585 (__v8si)_mm256_cvtepu8_epi32(__A), 4586 (__v8si)_mm256_setzero_si256()); 4587} 4588 4589static __inline__ __m128i __DEFAULT_FN_ATTRS 4590_mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4591{ 4592 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4593 (__v2di)_mm_cvtepu8_epi64(__A), 4594 (__v2di)__W); 4595} 4596 4597static __inline__ __m128i __DEFAULT_FN_ATTRS 4598_mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4599{ 4600 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4601 (__v2di)_mm_cvtepu8_epi64(__A), 4602 (__v2di)_mm_setzero_si128()); 4603} 4604 4605static __inline__ __m256i __DEFAULT_FN_ATTRS 4606_mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4607{ 4608 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4609 (__v4di)_mm256_cvtepu8_epi64(__A), 4610 (__v4di)__W); 4611} 4612 4613static __inline__ __m256i __DEFAULT_FN_ATTRS 4614_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4615{ 4616 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4617 (__v4di)_mm256_cvtepu8_epi64(__A), 4618 (__v4di)_mm256_setzero_si256()); 4619} 4620 4621static __inline__ __m128i __DEFAULT_FN_ATTRS 4622_mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4623{ 4624 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4625 (__v2di)_mm_cvtepu32_epi64(__X), 4626 (__v2di)__W); 4627} 4628 4629static __inline__ __m128i __DEFAULT_FN_ATTRS 4630_mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4631{ 4632 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4633 (__v2di)_mm_cvtepu32_epi64(__X), 4634 (__v2di)_mm_setzero_si128()); 4635} 4636 4637static __inline__ __m256i __DEFAULT_FN_ATTRS 4638_mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4639{ 4640 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4641 (__v4di)_mm256_cvtepu32_epi64(__X), 4642 (__v4di)__W); 4643} 4644 4645static __inline__ __m256i __DEFAULT_FN_ATTRS 4646_mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4647{ 4648 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4649 (__v4di)_mm256_cvtepu32_epi64(__X), 4650 (__v4di)_mm256_setzero_si256()); 4651} 4652 4653static __inline__ __m128i __DEFAULT_FN_ATTRS 4654_mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4655{ 4656 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4657 (__v4si)_mm_cvtepu16_epi32(__A), 4658 (__v4si)__W); 4659} 4660 4661static __inline__ __m128i __DEFAULT_FN_ATTRS 4662_mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4663{ 4664 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4665 (__v4si)_mm_cvtepu16_epi32(__A), 4666 (__v4si)_mm_setzero_si128()); 4667} 4668 4669static __inline__ __m256i __DEFAULT_FN_ATTRS 4670_mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4671{ 4672 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4673 (__v8si)_mm256_cvtepu16_epi32(__A), 4674 (__v8si)__W); 4675} 4676 4677static __inline__ __m256i __DEFAULT_FN_ATTRS 4678_mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4679{ 4680 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4681 (__v8si)_mm256_cvtepu16_epi32(__A), 4682 (__v8si)_mm256_setzero_si256()); 4683} 4684 4685static __inline__ __m128i __DEFAULT_FN_ATTRS 4686_mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4687{ 4688 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4689 (__v2di)_mm_cvtepu16_epi64(__A), 4690 (__v2di)__W); 4691} 4692 4693static __inline__ __m128i __DEFAULT_FN_ATTRS 4694_mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4695{ 4696 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4697 (__v2di)_mm_cvtepu16_epi64(__A), 4698 (__v2di)_mm_setzero_si128()); 4699} 4700 4701static __inline__ __m256i __DEFAULT_FN_ATTRS 4702_mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4703{ 4704 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4705 (__v4di)_mm256_cvtepu16_epi64(__A), 4706 (__v4di)__W); 4707} 4708 4709static __inline__ __m256i __DEFAULT_FN_ATTRS 4710_mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4711{ 4712 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4713 (__v4di)_mm256_cvtepu16_epi64(__A), 4714 (__v4di)_mm256_setzero_si256()); 4715} 4716 4717 4718#define _mm_rol_epi32(a, b) __extension__ ({\ 4719 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4720 (__v4si)_mm_setzero_si128(), \ 4721 (__mmask8)-1); }) 4722 4723#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4724 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4725 (__v4si)(__m128i)(w), (__mmask8)(u)); }) 4726 4727#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ 4728 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4729 (__v4si)_mm_setzero_si128(), \ 4730 (__mmask8)(u)); }) 4731 4732#define _mm256_rol_epi32(a, b) __extension__ ({\ 4733 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4734 (__v8si)_mm256_setzero_si256(), \ 4735 (__mmask8)-1); }) 4736 4737#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4738 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4739 (__v8si)(__m256i)(w), (__mmask8)(u)); }) 4740 4741#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ 4742 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4743 (__v8si)_mm256_setzero_si256(), \ 4744 (__mmask8)(u)); }) 4745 4746#define _mm_rol_epi64(a, b) __extension__ ({\ 4747 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4748 (__v2di)_mm_setzero_di(), \ 4749 (__mmask8)-1); }) 4750 4751#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4752 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4753 (__v2di)(__m128i)(w), (__mmask8)(u)); }) 4754 4755#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ 4756 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4757 (__v2di)_mm_setzero_di(), \ 4758 (__mmask8)(u)); }) 4759 4760#define _mm256_rol_epi64(a, b) __extension__ ({\ 4761 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4762 (__v4di)_mm256_setzero_si256(), \ 4763 (__mmask8)-1); }) 4764 4765#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4766 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4767 (__v4di)(__m256i)(w), (__mmask8)(u)); }) 4768 4769#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ 4770 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4771 (__v4di)_mm256_setzero_si256(), \ 4772 (__mmask8)(u)); }) 4773 4774static __inline__ __m128i __DEFAULT_FN_ATTRS 4775_mm_rolv_epi32 (__m128i __A, __m128i __B) 4776{ 4777 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4778 (__v4si) __B, 4779 (__v4si) 4780 _mm_setzero_si128 (), 4781 (__mmask8) -1); 4782} 4783 4784static __inline__ __m128i __DEFAULT_FN_ATTRS 4785_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4786 __m128i __B) 4787{ 4788 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4789 (__v4si) __B, 4790 (__v4si) __W, 4791 (__mmask8) __U); 4792} 4793 4794static __inline__ __m128i __DEFAULT_FN_ATTRS 4795_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4796{ 4797 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4798 (__v4si) __B, 4799 (__v4si) 4800 _mm_setzero_si128 (), 4801 (__mmask8) __U); 4802} 4803 4804static __inline__ __m256i __DEFAULT_FN_ATTRS 4805_mm256_rolv_epi32 (__m256i __A, __m256i __B) 4806{ 4807 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4808 (__v8si) __B, 4809 (__v8si) 4810 _mm256_setzero_si256 (), 4811 (__mmask8) -1); 4812} 4813 4814static __inline__ __m256i __DEFAULT_FN_ATTRS 4815_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4816 __m256i __B) 4817{ 4818 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4819 (__v8si) __B, 4820 (__v8si) __W, 4821 (__mmask8) __U); 4822} 4823 4824static __inline__ __m256i __DEFAULT_FN_ATTRS 4825_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4826{ 4827 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4828 (__v8si) __B, 4829 (__v8si) 4830 _mm256_setzero_si256 (), 4831 (__mmask8) __U); 4832} 4833 4834static __inline__ __m128i __DEFAULT_FN_ATTRS 4835_mm_rolv_epi64 (__m128i __A, __m128i __B) 4836{ 4837 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4838 (__v2di) __B, 4839 (__v2di) 4840 _mm_setzero_di (), 4841 (__mmask8) -1); 4842} 4843 4844static __inline__ __m128i __DEFAULT_FN_ATTRS 4845_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 4846 __m128i __B) 4847{ 4848 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4849 (__v2di) __B, 4850 (__v2di) __W, 4851 (__mmask8) __U); 4852} 4853 4854static __inline__ __m128i __DEFAULT_FN_ATTRS 4855_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4856{ 4857 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 4858 (__v2di) __B, 4859 (__v2di) 4860 _mm_setzero_di (), 4861 (__mmask8) __U); 4862} 4863 4864static __inline__ __m256i __DEFAULT_FN_ATTRS 4865_mm256_rolv_epi64 (__m256i __A, __m256i __B) 4866{ 4867 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4868 (__v4di) __B, 4869 (__v4di) 4870 _mm256_setzero_si256 (), 4871 (__mmask8) -1); 4872} 4873 4874static __inline__ __m256i __DEFAULT_FN_ATTRS 4875_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 4876 __m256i __B) 4877{ 4878 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4879 (__v4di) __B, 4880 (__v4di) __W, 4881 (__mmask8) __U); 4882} 4883 4884static __inline__ __m256i __DEFAULT_FN_ATTRS 4885_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4886{ 4887 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 4888 (__v4di) __B, 4889 (__v4di) 4890 _mm256_setzero_si256 (), 4891 (__mmask8) __U); 4892} 4893 4894#define _mm_ror_epi32(A, B) __extension__ ({ \ 4895 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4896 (__v4si)_mm_setzero_si128(), \ 4897 (__mmask8)-1); }) 4898 4899#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4900 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4901 (__v4si)(__m128i)(W), (__mmask8)(U)); }) 4902 4903#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4904 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 4905 (__v4si)_mm_setzero_si128(), \ 4906 (__mmask8)(U)); }) 4907 4908#define _mm256_ror_epi32(A, B) __extension__ ({ \ 4909 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4910 (__v8si)_mm256_setzero_si256(), \ 4911 (__mmask8)-1); }) 4912 4913#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 4914 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4915 (__v8si)(__m256i)(W), (__mmask8)(U)); }) 4916 4917#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ 4918 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 4919 (__v8si)_mm256_setzero_si256(), \ 4920 (__mmask8)(U)); }) 4921 4922#define _mm_ror_epi64(A, B) __extension__ ({ \ 4923 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4924 (__v2di)_mm_setzero_di(), \ 4925 (__mmask8)-1); }) 4926 4927#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4928 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4929 (__v2di)(__m128i)(W), (__mmask8)(U)); }) 4930 4931#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4932 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 4933 (__v2di)_mm_setzero_di(), \ 4934 (__mmask8)(U)); }) 4935 4936#define _mm256_ror_epi64(A, B) __extension__ ({ \ 4937 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4938 (__v4di)_mm256_setzero_si256(), \ 4939 (__mmask8)-1); }) 4940 4941#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 4942 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4943 (__v4di)(__m256i)(W), (__mmask8)(U)); }) 4944 4945#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ 4946 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 4947 (__v4di)_mm256_setzero_si256(), \ 4948 (__mmask8)(U)); }) 4949 4950static __inline__ __m128i __DEFAULT_FN_ATTRS 4951_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4952{ 4953 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4954 (__v4si)_mm_sll_epi32(__A, __B), 4955 (__v4si)__W); 4956} 4957 4958static __inline__ __m128i __DEFAULT_FN_ATTRS 4959_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4960{ 4961 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4962 (__v4si)_mm_sll_epi32(__A, __B), 4963 (__v4si)_mm_setzero_si128()); 4964} 4965 4966static __inline__ __m256i __DEFAULT_FN_ATTRS 4967_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4968{ 4969 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4970 (__v8si)_mm256_sll_epi32(__A, __B), 4971 (__v8si)__W); 4972} 4973 4974static __inline__ __m256i __DEFAULT_FN_ATTRS 4975_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4976{ 4977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4978 (__v8si)_mm256_sll_epi32(__A, __B), 4979 (__v8si)_mm256_setzero_si256()); 4980} 4981 4982static __inline__ __m128i __DEFAULT_FN_ATTRS 4983_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4984{ 4985 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4986 (__v4si)_mm_slli_epi32(__A, __B), 4987 (__v4si)__W); 4988} 4989 4990static __inline__ __m128i __DEFAULT_FN_ATTRS 4991_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) 4992{ 4993 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4994 (__v4si)_mm_slli_epi32(__A, __B), 4995 (__v4si)_mm_setzero_si128()); 4996} 4997 4998static __inline__ __m256i __DEFAULT_FN_ATTRS 4999_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5000{ 5001 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5002 (__v8si)_mm256_slli_epi32(__A, __B), 5003 (__v8si)__W); 5004} 5005 5006static __inline__ __m256i __DEFAULT_FN_ATTRS 5007_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) 5008{ 5009 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5010 (__v8si)_mm256_slli_epi32(__A, __B), 5011 (__v8si)_mm256_setzero_si256()); 5012} 5013 5014static __inline__ __m128i __DEFAULT_FN_ATTRS 5015_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5016{ 5017 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5018 (__v2di)_mm_sll_epi64(__A, __B), 5019 (__v2di)__W); 5020} 5021 5022static __inline__ __m128i __DEFAULT_FN_ATTRS 5023_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 5024{ 5025 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5026 (__v2di)_mm_sll_epi64(__A, __B), 5027 (__v2di)_mm_setzero_di()); 5028} 5029 5030static __inline__ __m256i __DEFAULT_FN_ATTRS 5031_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5032{ 5033 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5034 (__v4di)_mm256_sll_epi64(__A, __B), 5035 (__v4di)__W); 5036} 5037 5038static __inline__ __m256i __DEFAULT_FN_ATTRS 5039_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 5040{ 5041 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5042 (__v4di)_mm256_sll_epi64(__A, __B), 5043 (__v4di)_mm256_setzero_si256()); 5044} 5045 5046static __inline__ __m128i __DEFAULT_FN_ATTRS 5047_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5048{ 5049 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5050 (__v2di)_mm_slli_epi64(__A, __B), 5051 (__v2di)__W); 5052} 5053 5054static __inline__ __m128i __DEFAULT_FN_ATTRS 5055_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) 5056{ 5057 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5058 (__v2di)_mm_slli_epi64(__A, __B), 5059 (__v2di)_mm_setzero_di()); 5060} 5061 5062static __inline__ __m256i __DEFAULT_FN_ATTRS 5063_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5064{ 5065 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5066 (__v4di)_mm256_slli_epi64(__A, __B), 5067 (__v4di)__W); 5068} 5069 5070static __inline__ __m256i __DEFAULT_FN_ATTRS 5071_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) 5072{ 5073 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5074 (__v4di)_mm256_slli_epi64(__A, __B), 5075 (__v4di)_mm256_setzero_si256()); 5076} 5077 5078static __inline__ __m128i __DEFAULT_FN_ATTRS 5079_mm_rorv_epi32 (__m128i __A, __m128i __B) 5080{ 5081 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5082 (__v4si) __B, 5083 (__v4si) 5084 _mm_setzero_si128 (), 5085 (__mmask8) -1); 5086} 5087 5088static __inline__ __m128i __DEFAULT_FN_ATTRS 5089_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5090 __m128i __B) 5091{ 5092 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5093 (__v4si) __B, 5094 (__v4si) __W, 5095 (__mmask8) __U); 5096} 5097 5098static __inline__ __m128i __DEFAULT_FN_ATTRS 5099_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5100{ 5101 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5102 (__v4si) __B, 5103 (__v4si) 5104 _mm_setzero_si128 (), 5105 (__mmask8) __U); 5106} 5107 5108static __inline__ __m256i __DEFAULT_FN_ATTRS 5109_mm256_rorv_epi32 (__m256i __A, __m256i __B) 5110{ 5111 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5112 (__v8si) __B, 5113 (__v8si) 5114 _mm256_setzero_si256 (), 5115 (__mmask8) -1); 5116} 5117 5118static __inline__ __m256i __DEFAULT_FN_ATTRS 5119_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5120 __m256i __B) 5121{ 5122 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5123 (__v8si) __B, 5124 (__v8si) __W, 5125 (__mmask8) __U); 5126} 5127 5128static __inline__ __m256i __DEFAULT_FN_ATTRS 5129_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5130{ 5131 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5132 (__v8si) __B, 5133 (__v8si) 5134 _mm256_setzero_si256 (), 5135 (__mmask8) __U); 5136} 5137 5138static __inline__ __m128i __DEFAULT_FN_ATTRS 5139_mm_rorv_epi64 (__m128i __A, __m128i __B) 5140{ 5141 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5142 (__v2di) __B, 5143 (__v2di) 5144 _mm_setzero_di (), 5145 (__mmask8) -1); 5146} 5147 5148static __inline__ __m128i __DEFAULT_FN_ATTRS 5149_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5150 __m128i __B) 5151{ 5152 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5153 (__v2di) __B, 5154 (__v2di) __W, 5155 (__mmask8) __U); 5156} 5157 5158static __inline__ __m128i __DEFAULT_FN_ATTRS 5159_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5160{ 5161 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5162 (__v2di) __B, 5163 (__v2di) 5164 _mm_setzero_di (), 5165 (__mmask8) __U); 5166} 5167 5168static __inline__ __m256i __DEFAULT_FN_ATTRS 5169_mm256_rorv_epi64 (__m256i __A, __m256i __B) 5170{ 5171 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5172 (__v4di) __B, 5173 (__v4di) 5174 _mm256_setzero_si256 (), 5175 (__mmask8) -1); 5176} 5177 5178static __inline__ __m256i __DEFAULT_FN_ATTRS 5179_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5180 __m256i __B) 5181{ 5182 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5183 (__v4di) __B, 5184 (__v4di) __W, 5185 (__mmask8) __U); 5186} 5187 5188static __inline__ __m256i __DEFAULT_FN_ATTRS 5189_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5190{ 5191 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5192 (__v4di) __B, 5193 (__v4di) 5194 _mm256_setzero_si256 (), 5195 (__mmask8) __U); 5196} 5197 5198static __inline__ __m128i __DEFAULT_FN_ATTRS 5199_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5200{ 5201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5202 (__v2di)_mm_sllv_epi64(__X, __Y), 5203 (__v2di)__W); 5204} 5205 5206static __inline__ __m128i __DEFAULT_FN_ATTRS 5207_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5208{ 5209 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5210 (__v2di)_mm_sllv_epi64(__X, __Y), 5211 (__v2di)_mm_setzero_di()); 5212} 5213 5214static __inline__ __m256i __DEFAULT_FN_ATTRS 5215_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5216{ 5217 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5218 (__v4di)_mm256_sllv_epi64(__X, __Y), 5219 (__v4di)__W); 5220} 5221 5222static __inline__ __m256i __DEFAULT_FN_ATTRS 5223_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 5224{ 5225 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5226 (__v4di)_mm256_sllv_epi64(__X, __Y), 5227 (__v4di)_mm256_setzero_si256()); 5228} 5229 5230static __inline__ __m128i __DEFAULT_FN_ATTRS 5231_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5232{ 5233 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5234 (__v4si)_mm_sllv_epi32(__X, __Y), 5235 (__v4si)__W); 5236} 5237 5238static __inline__ __m128i __DEFAULT_FN_ATTRS 5239_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5240{ 5241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5242 (__v4si)_mm_sllv_epi32(__X, __Y), 5243 (__v4si)_mm_setzero_si128()); 5244} 5245 5246static __inline__ __m256i __DEFAULT_FN_ATTRS 5247_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5248{ 5249 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5250 (__v8si)_mm256_sllv_epi32(__X, __Y), 5251 (__v8si)__W); 5252} 5253 5254static __inline__ __m256i __DEFAULT_FN_ATTRS 5255_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5256{ 5257 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5258 (__v8si)_mm256_sllv_epi32(__X, __Y), 5259 (__v8si)_mm256_setzero_si256()); 5260} 5261 5262static __inline__ __m128i __DEFAULT_FN_ATTRS 5263_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5264{ 5265 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5266 (__v2di)_mm_srlv_epi64(__X, __Y), 5267 (__v2di)__W); 5268} 5269 5270static __inline__ __m128i __DEFAULT_FN_ATTRS 5271_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5272{ 5273 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5274 (__v2di)_mm_srlv_epi64(__X, __Y), 5275 (__v2di)_mm_setzero_di()); 5276} 5277 5278static __inline__ __m256i __DEFAULT_FN_ATTRS 5279_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5280{ 5281 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5282 (__v4di)_mm256_srlv_epi64(__X, __Y), 5283 (__v4di)__W); 5284} 5285 5286static __inline__ __m256i __DEFAULT_FN_ATTRS 5287_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 5288{ 5289 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5290 (__v4di)_mm256_srlv_epi64(__X, __Y), 5291 (__v4di)_mm256_setzero_si256()); 5292} 5293 5294static __inline__ __m128i __DEFAULT_FN_ATTRS 5295_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5296{ 5297 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5298 (__v4si)_mm_srlv_epi32(__X, __Y), 5299 (__v4si)__W); 5300} 5301 5302static __inline__ __m128i __DEFAULT_FN_ATTRS 5303_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5304{ 5305 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5306 (__v4si)_mm_srlv_epi32(__X, __Y), 5307 (__v4si)_mm_setzero_si128()); 5308} 5309 5310static __inline__ __m256i __DEFAULT_FN_ATTRS 5311_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5312{ 5313 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5314 (__v8si)_mm256_srlv_epi32(__X, __Y), 5315 (__v8si)__W); 5316} 5317 5318static __inline__ __m256i __DEFAULT_FN_ATTRS 5319_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5320{ 5321 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5322 (__v8si)_mm256_srlv_epi32(__X, __Y), 5323 (__v8si)_mm256_setzero_si256()); 5324} 5325 5326static __inline__ __m128i __DEFAULT_FN_ATTRS 5327_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5328{ 5329 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5330 (__v4si)_mm_srl_epi32(__A, __B), 5331 (__v4si)__W); 5332} 5333 5334static __inline__ __m128i __DEFAULT_FN_ATTRS 5335_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 5336{ 5337 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5338 (__v4si)_mm_srl_epi32(__A, __B), 5339 (__v4si)_mm_setzero_si128()); 5340} 5341 5342static __inline__ __m256i __DEFAULT_FN_ATTRS 5343_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5344{ 5345 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5346 (__v8si)_mm256_srl_epi32(__A, __B), 5347 (__v8si)__W); 5348} 5349 5350static __inline__ __m256i __DEFAULT_FN_ATTRS 5351_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 5352{ 5353 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5354 (__v8si)_mm256_srl_epi32(__A, __B), 5355 (__v8si)_mm256_setzero_si256()); 5356} 5357 5358static __inline__ __m128i __DEFAULT_FN_ATTRS 5359_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5360{ 5361 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5362 (__v4si)_mm_srli_epi32(__A, __B), 5363 (__v4si)__W); 5364} 5365 5366static __inline__ __m128i __DEFAULT_FN_ATTRS 5367_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) 5368{ 5369 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5370 (__v4si)_mm_srli_epi32(__A, __B), 5371 (__v4si)_mm_setzero_si128()); 5372} 5373 5374static __inline__ __m256i __DEFAULT_FN_ATTRS 5375_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5376{ 5377 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5378 (__v8si)_mm256_srli_epi32(__A, __B), 5379 (__v8si)__W); 5380} 5381 5382static __inline__ __m256i __DEFAULT_FN_ATTRS 5383_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) 5384{ 5385 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5386 (__v8si)_mm256_srli_epi32(__A, __B), 5387 (__v8si)_mm256_setzero_si256()); 5388} 5389 5390static __inline__ __m128i __DEFAULT_FN_ATTRS 5391_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 5392{ 5393 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5394 (__v2di)_mm_srl_epi64(__A, __B), 5395 (__v2di)__W); 5396} 5397 5398static __inline__ __m128i __DEFAULT_FN_ATTRS 5399_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 5400{ 5401 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5402 (__v2di)_mm_srl_epi64(__A, __B), 5403 (__v2di)_mm_setzero_di()); 5404} 5405 5406static __inline__ __m256i __DEFAULT_FN_ATTRS 5407_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 5408{ 5409 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5410 (__v4di)_mm256_srl_epi64(__A, __B), 5411 (__v4di)__W); 5412} 5413 5414static __inline__ __m256i __DEFAULT_FN_ATTRS 5415_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 5416{ 5417 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5418 (__v4di)_mm256_srl_epi64(__A, __B), 5419 (__v4di)_mm256_setzero_si256()); 5420} 5421 5422static __inline__ __m128i __DEFAULT_FN_ATTRS 5423_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 5424{ 5425 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5426 (__v2di)_mm_srli_epi64(__A, __B), 5427 (__v2di)__W); 5428} 5429 5430static __inline__ __m128i __DEFAULT_FN_ATTRS 5431_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) 5432{ 5433 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5434 (__v2di)_mm_srli_epi64(__A, __B), 5435 (__v2di)_mm_setzero_di()); 5436} 5437 5438static __inline__ __m256i __DEFAULT_FN_ATTRS 5439_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 5440{ 5441 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5442 (__v4di)_mm256_srli_epi64(__A, __B), 5443 (__v4di)__W); 5444} 5445 5446static __inline__ __m256i __DEFAULT_FN_ATTRS 5447_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) 5448{ 5449 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5450 (__v4di)_mm256_srli_epi64(__A, __B), 5451 (__v4di)_mm256_setzero_si256()); 5452} 5453 5454static __inline__ __m128i __DEFAULT_FN_ATTRS 5455_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5456{ 5457 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5458 (__v4si)_mm_srav_epi32(__X, __Y), 5459 (__v4si)__W); 5460} 5461 5462static __inline__ __m128i __DEFAULT_FN_ATTRS 5463_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 5464{ 5465 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 5466 (__v4si)_mm_srav_epi32(__X, __Y), 5467 (__v4si)_mm_setzero_si128()); 5468} 5469 5470static __inline__ __m256i __DEFAULT_FN_ATTRS 5471_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5472{ 5473 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5474 (__v8si)_mm256_srav_epi32(__X, __Y), 5475 (__v8si)__W); 5476} 5477 5478static __inline__ __m256i __DEFAULT_FN_ATTRS 5479_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 5480{ 5481 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 5482 (__v8si)_mm256_srav_epi32(__X, __Y), 5483 (__v8si)_mm256_setzero_si256()); 5484} 5485 5486static __inline__ __m128i __DEFAULT_FN_ATTRS 5487_mm_srav_epi64 (__m128i __X, __m128i __Y) 5488{ 5489 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 5490 (__v2di) __Y, 5491 (__v2di) 5492 _mm_setzero_di (), 5493 (__mmask8) -1); 5494} 5495 5496static __inline__ __m128i __DEFAULT_FN_ATTRS 5497_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 5498 __m128i __Y) 5499{ 5500 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 5501 (__v2di) __Y, 5502 (__v2di) __W, 5503 (__mmask8) __U); 5504} 5505 5506static __inline__ __m128i __DEFAULT_FN_ATTRS 5507_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 5508{ 5509 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 5510 (__v2di) __Y, 5511 (__v2di) 5512 _mm_setzero_di (), 5513 (__mmask8) __U); 5514} 5515 5516static __inline__ __m256i __DEFAULT_FN_ATTRS 5517_mm256_srav_epi64 (__m256i __X, __m256i __Y) 5518{ 5519 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 5520 (__v4di) __Y, 5521 (__v4di) 5522 _mm256_setzero_si256 (), 5523 (__mmask8) -1); 5524} 5525 5526static __inline__ __m256i __DEFAULT_FN_ATTRS 5527_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 5528 __m256i __Y) 5529{ 5530 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 5531 (__v4di) __Y, 5532 (__v4di) __W, 5533 (__mmask8) __U); 5534} 5535 5536static __inline__ __m256i __DEFAULT_FN_ATTRS 5537_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5538{ 5539 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 5540 (__v4di) __Y, 5541 (__v4di) 5542 _mm256_setzero_si256 (), 5543 (__mmask8) __U); 5544} 5545 5546static __inline__ __m128i __DEFAULT_FN_ATTRS 5547_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5548{ 5549 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5550 (__v4si) __A, 5551 (__v4si) __W); 5552} 5553 5554static __inline__ __m128i __DEFAULT_FN_ATTRS 5555_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5556{ 5557 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5558 (__v4si) __A, 5559 (__v4si) _mm_setzero_si128 ()); 5560} 5561 5562 5563static __inline__ __m256i __DEFAULT_FN_ATTRS 5564_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5565{ 5566 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5567 (__v8si) __A, 5568 (__v8si) __W); 5569} 5570 5571static __inline__ __m256i __DEFAULT_FN_ATTRS 5572_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5573{ 5574 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5575 (__v8si) __A, 5576 (__v8si) _mm256_setzero_si256 ()); 5577} 5578 5579static __inline__ __m128i __DEFAULT_FN_ATTRS 5580_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5581{ 5582 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5583 (__v4si) __W, 5584 (__mmask8) 5585 __U); 5586} 5587 5588static __inline__ __m128i __DEFAULT_FN_ATTRS 5589_mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5590{ 5591 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5592 (__v4si) 5593 _mm_setzero_si128 (), 5594 (__mmask8) 5595 __U); 5596} 5597 5598static __inline__ __m256i __DEFAULT_FN_ATTRS 5599_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5600{ 5601 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5602 (__v8si) __W, 5603 (__mmask8) 5604 __U); 5605} 5606 5607static __inline__ __m256i __DEFAULT_FN_ATTRS 5608_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5609{ 5610 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5611 (__v8si) 5612 _mm256_setzero_si256 (), 5613 (__mmask8) 5614 __U); 5615} 5616 5617static __inline__ void __DEFAULT_FN_ATTRS 5618_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5619{ 5620 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5621 (__v4si) __A, 5622 (__mmask8) __U); 5623} 5624 5625static __inline__ void __DEFAULT_FN_ATTRS 5626_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5627{ 5628 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5629 (__v8si) __A, 5630 (__mmask8) __U); 5631} 5632 5633static __inline__ __m128i __DEFAULT_FN_ATTRS 5634_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5635{ 5636 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5637 (__v2di) __A, 5638 (__v2di) __W); 5639} 5640 5641static __inline__ __m128i __DEFAULT_FN_ATTRS 5642_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5643{ 5644 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5645 (__v2di) __A, 5646 (__v2di) _mm_setzero_di ()); 5647} 5648 5649static __inline__ __m256i __DEFAULT_FN_ATTRS 5650_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5651{ 5652 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5653 (__v4di) __A, 5654 (__v4di) __W); 5655} 5656 5657static __inline__ __m256i __DEFAULT_FN_ATTRS 5658_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5659{ 5660 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5661 (__v4di) __A, 5662 (__v4di) _mm256_setzero_si256 ()); 5663} 5664 5665static __inline__ __m128i __DEFAULT_FN_ATTRS 5666_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5667{ 5668 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5669 (__v2di) __W, 5670 (__mmask8) 5671 __U); 5672} 5673 5674static __inline__ __m128i __DEFAULT_FN_ATTRS 5675_mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5676{ 5677 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5678 (__v2di) 5679 _mm_setzero_di (), 5680 (__mmask8) 5681 __U); 5682} 5683 5684static __inline__ __m256i __DEFAULT_FN_ATTRS 5685_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5686{ 5687 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5688 (__v4di) __W, 5689 (__mmask8) 5690 __U); 5691} 5692 5693static __inline__ __m256i __DEFAULT_FN_ATTRS 5694_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5695{ 5696 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5697 (__v4di) 5698 _mm256_setzero_si256 (), 5699 (__mmask8) 5700 __U); 5701} 5702 5703static __inline__ void __DEFAULT_FN_ATTRS 5704_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5705{ 5706 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5707 (__v2di) __A, 5708 (__mmask8) __U); 5709} 5710 5711static __inline__ void __DEFAULT_FN_ATTRS 5712_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5713{ 5714 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5715 (__v4di) __A, 5716 (__mmask8) __U); 5717} 5718 5719static __inline__ __m128d __DEFAULT_FN_ATTRS 5720_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5721{ 5722 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5723 (__v2df)_mm_movedup_pd(__A), 5724 (__v2df)__W); 5725} 5726 5727static __inline__ __m128d __DEFAULT_FN_ATTRS 5728_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5729{ 5730 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5731 (__v2df)_mm_movedup_pd(__A), 5732 (__v2df)_mm_setzero_pd()); 5733} 5734 5735static __inline__ __m256d __DEFAULT_FN_ATTRS 5736_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5737{ 5738 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5739 (__v4df)_mm256_movedup_pd(__A), 5740 (__v4df)__W); 5741} 5742 5743static __inline__ __m256d __DEFAULT_FN_ATTRS 5744_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5745{ 5746 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5747 (__v4df)_mm256_movedup_pd(__A), 5748 (__v4df)_mm256_setzero_pd()); 5749} 5750 5751 5752#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \ 5753 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ 5754 (__v4si)(__m128i)(O), \ 5755 (__mmask8)(M)); }) 5756 5757#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \ 5758 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ 5759 (__v4si)_mm_setzero_si128(), \ 5760 (__mmask8)(M)); }) 5761 5762#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \ 5763 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ 5764 (__v8si)(__m256i)(O), \ 5765 (__mmask8)(M)); }) 5766 5767#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \ 5768 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ 5769 (__v8si)_mm256_setzero_si256(), \ 5770 (__mmask8)(M)); }) 5771 5772#ifdef __x86_64__ 5773static __inline__ __m128i __DEFAULT_FN_ATTRS 5774_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5775{ 5776 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, 5777 __M); 5778} 5779 5780static __inline__ __m128i __DEFAULT_FN_ATTRS 5781_mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5782{ 5783 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, 5784 (__v2di) 5785 _mm_setzero_si128 (), 5786 __M); 5787} 5788 5789static __inline__ __m256i __DEFAULT_FN_ATTRS 5790_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5791{ 5792 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, 5793 __M); 5794} 5795 5796static __inline__ __m256i __DEFAULT_FN_ATTRS 5797_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5798{ 5799 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, 5800 (__v4di) 5801 _mm256_setzero_si256 (), 5802 __M); 5803} 5804#endif 5805 5806#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5807 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5808 (__v2df)(__m128d)(B), \ 5809 (__v2di)(__m128i)(C), (int)(imm), \ 5810 (__mmask8)-1); }) 5811 5812#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5813 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5814 (__v2df)(__m128d)(B), \ 5815 (__v2di)(__m128i)(C), (int)(imm), \ 5816 (__mmask8)(U)); }) 5817 5818#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5819 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5820 (__v2df)(__m128d)(B), \ 5821 (__v2di)(__m128i)(C), \ 5822 (int)(imm), (__mmask8)(U)); }) 5823 5824#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 5825 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5826 (__v4df)(__m256d)(B), \ 5827 (__v4di)(__m256i)(C), (int)(imm), \ 5828 (__mmask8)-1); }) 5829 5830#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 5831 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5832 (__v4df)(__m256d)(B), \ 5833 (__v4di)(__m256i)(C), (int)(imm), \ 5834 (__mmask8)(U)); }) 5835 5836#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 5837 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5838 (__v4df)(__m256d)(B), \ 5839 (__v4di)(__m256i)(C), \ 5840 (int)(imm), (__mmask8)(U)); }) 5841 5842#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5843 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5844 (__v4sf)(__m128)(B), \ 5845 (__v4si)(__m128i)(C), (int)(imm), \ 5846 (__mmask8)-1); }) 5847 5848#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5849 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5850 (__v4sf)(__m128)(B), \ 5851 (__v4si)(__m128i)(C), (int)(imm), \ 5852 (__mmask8)(U)); }) 5853 5854#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5855 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5856 (__v4sf)(__m128)(B), \ 5857 (__v4si)(__m128i)(C), (int)(imm), \ 5858 (__mmask8)(U)); }) 5859 5860#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 5861 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5862 (__v8sf)(__m256)(B), \ 5863 (__v8si)(__m256i)(C), (int)(imm), \ 5864 (__mmask8)-1); }) 5865 5866#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 5867 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5868 (__v8sf)(__m256)(B), \ 5869 (__v8si)(__m256i)(C), (int)(imm), \ 5870 (__mmask8)(U)); }) 5871 5872#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 5873 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5874 (__v8sf)(__m256)(B), \ 5875 (__v8si)(__m256i)(C), (int)(imm), \ 5876 (__mmask8)(U)); }) 5877 5878static __inline__ __m128d __DEFAULT_FN_ATTRS 5879_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5880{ 5881 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5882 (__v2df) __W, 5883 (__mmask8) __U); 5884} 5885 5886static __inline__ __m128d __DEFAULT_FN_ATTRS 5887_mm_maskz_load_pd (__mmask8 __U, void const *__P) 5888{ 5889 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5890 (__v2df) 5891 _mm_setzero_pd (), 5892 (__mmask8) __U); 5893} 5894 5895static __inline__ __m256d __DEFAULT_FN_ATTRS 5896_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5897{ 5898 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5899 (__v4df) __W, 5900 (__mmask8) __U); 5901} 5902 5903static __inline__ __m256d __DEFAULT_FN_ATTRS 5904_mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5905{ 5906 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5907 (__v4df) 5908 _mm256_setzero_pd (), 5909 (__mmask8) __U); 5910} 5911 5912static __inline__ __m128 __DEFAULT_FN_ATTRS 5913_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5914{ 5915 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5916 (__v4sf) __W, 5917 (__mmask8) __U); 5918} 5919 5920static __inline__ __m128 __DEFAULT_FN_ATTRS 5921_mm_maskz_load_ps (__mmask8 __U, void const *__P) 5922{ 5923 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5924 (__v4sf) 5925 _mm_setzero_ps (), 5926 (__mmask8) __U); 5927} 5928 5929static __inline__ __m256 __DEFAULT_FN_ATTRS 5930_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5931{ 5932 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5933 (__v8sf) __W, 5934 (__mmask8) __U); 5935} 5936 5937static __inline__ __m256 __DEFAULT_FN_ATTRS 5938_mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5939{ 5940 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5941 (__v8sf) 5942 _mm256_setzero_ps (), 5943 (__mmask8) __U); 5944} 5945 5946static __inline__ __m128i __DEFAULT_FN_ATTRS 5947_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5948{ 5949 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5950 (__v2di) __W, 5951 (__mmask8) __U); 5952} 5953 5954static __inline__ __m128i __DEFAULT_FN_ATTRS 5955_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5956{ 5957 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5958 (__v2di) 5959 _mm_setzero_si128 (), 5960 (__mmask8) __U); 5961} 5962 5963static __inline__ __m256i __DEFAULT_FN_ATTRS 5964_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5965{ 5966 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5967 (__v4di) __W, 5968 (__mmask8) __U); 5969} 5970 5971static __inline__ __m256i __DEFAULT_FN_ATTRS 5972_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5973{ 5974 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5975 (__v4di) 5976 _mm256_setzero_si256 (), 5977 (__mmask8) __U); 5978} 5979 5980static __inline__ __m128i __DEFAULT_FN_ATTRS 5981_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5982{ 5983 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5984 (__v4si) __W, 5985 (__mmask8) __U); 5986} 5987 5988static __inline__ __m128i __DEFAULT_FN_ATTRS 5989_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5990{ 5991 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5992 (__v4si) 5993 _mm_setzero_si128 (), 5994 (__mmask8) __U); 5995} 5996 5997static __inline__ __m256i __DEFAULT_FN_ATTRS 5998_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5999{ 6000 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 6001 (__v8si) __W, 6002 (__mmask8) __U); 6003} 6004 6005static __inline__ __m256i __DEFAULT_FN_ATTRS 6006_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 6007{ 6008 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 6009 (__v8si) 6010 _mm256_setzero_si256 (), 6011 (__mmask8) __U); 6012} 6013 6014static __inline__ __m128d __DEFAULT_FN_ATTRS 6015_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 6016{ 6017 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6018 (__v2df) __W, 6019 (__mmask8) __U); 6020} 6021 6022static __inline__ __m128d __DEFAULT_FN_ATTRS 6023_mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 6024{ 6025 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6026 (__v2df) 6027 _mm_setzero_pd (), 6028 (__mmask8) __U); 6029} 6030 6031static __inline__ __m256d __DEFAULT_FN_ATTRS 6032_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 6033{ 6034 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6035 (__v4df) __W, 6036 (__mmask8) __U); 6037} 6038 6039static __inline__ __m256d __DEFAULT_FN_ATTRS 6040_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 6041{ 6042 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6043 (__v4df) 6044 _mm256_setzero_pd (), 6045 (__mmask8) __U); 6046} 6047 6048static __inline__ __m128 __DEFAULT_FN_ATTRS 6049_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 6050{ 6051 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6052 (__v4sf) __W, 6053 (__mmask8) __U); 6054} 6055 6056static __inline__ __m128 __DEFAULT_FN_ATTRS 6057_mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 6058{ 6059 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6060 (__v4sf) 6061 _mm_setzero_ps (), 6062 (__mmask8) __U); 6063} 6064 6065static __inline__ __m256 __DEFAULT_FN_ATTRS 6066_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 6067{ 6068 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6069 (__v8sf) __W, 6070 (__mmask8) __U); 6071} 6072 6073static __inline__ __m256 __DEFAULT_FN_ATTRS 6074_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 6075{ 6076 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6077 (__v8sf) 6078 _mm256_setzero_ps (), 6079 (__mmask8) __U); 6080} 6081 6082static __inline__ void __DEFAULT_FN_ATTRS 6083_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 6084{ 6085 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 6086 (__v2df) __A, 6087 (__mmask8) __U); 6088} 6089 6090static __inline__ void __DEFAULT_FN_ATTRS 6091_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 6092{ 6093 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 6094 (__v4df) __A, 6095 (__mmask8) __U); 6096} 6097 6098static __inline__ void __DEFAULT_FN_ATTRS 6099_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 6100{ 6101 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 6102 (__v4sf) __A, 6103 (__mmask8) __U); 6104} 6105 6106static __inline__ void __DEFAULT_FN_ATTRS 6107_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 6108{ 6109 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 6110 (__v8sf) __A, 6111 (__mmask8) __U); 6112} 6113 6114static __inline__ void __DEFAULT_FN_ATTRS 6115_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 6116{ 6117 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 6118 (__v2di) __A, 6119 (__mmask8) __U); 6120} 6121 6122static __inline__ void __DEFAULT_FN_ATTRS 6123_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 6124{ 6125 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 6126 (__v4di) __A, 6127 (__mmask8) __U); 6128} 6129 6130static __inline__ void __DEFAULT_FN_ATTRS 6131_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 6132{ 6133 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 6134 (__v4si) __A, 6135 (__mmask8) __U); 6136} 6137 6138static __inline__ void __DEFAULT_FN_ATTRS 6139_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 6140{ 6141 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 6142 (__v8si) __A, 6143 (__mmask8) __U); 6144} 6145 6146static __inline__ void __DEFAULT_FN_ATTRS 6147_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 6148{ 6149 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 6150 (__v2df) __A, 6151 (__mmask8) __U); 6152} 6153 6154static __inline__ void __DEFAULT_FN_ATTRS 6155_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 6156{ 6157 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 6158 (__v4df) __A, 6159 (__mmask8) __U); 6160} 6161 6162static __inline__ void __DEFAULT_FN_ATTRS 6163_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 6164{ 6165 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 6166 (__v4sf) __A, 6167 (__mmask8) __U); 6168} 6169 6170static __inline__ void __DEFAULT_FN_ATTRS 6171_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 6172{ 6173 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 6174 (__v8sf) __A, 6175 (__mmask8) __U); 6176} 6177 6178 6179static __inline__ __m128d __DEFAULT_FN_ATTRS 6180_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6181{ 6182 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6183 (__v2df)_mm_unpackhi_pd(__A, __B), 6184 (__v2df)__W); 6185} 6186 6187static __inline__ __m128d __DEFAULT_FN_ATTRS 6188_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 6189{ 6190 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6191 (__v2df)_mm_unpackhi_pd(__A, __B), 6192 (__v2df)_mm_setzero_pd()); 6193} 6194 6195static __inline__ __m256d __DEFAULT_FN_ATTRS 6196_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6197{ 6198 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6199 (__v4df)_mm256_unpackhi_pd(__A, __B), 6200 (__v4df)__W); 6201} 6202 6203static __inline__ __m256d __DEFAULT_FN_ATTRS 6204_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 6205{ 6206 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6207 (__v4df)_mm256_unpackhi_pd(__A, __B), 6208 (__v4df)_mm256_setzero_pd()); 6209} 6210 6211static __inline__ __m128 __DEFAULT_FN_ATTRS 6212_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6213{ 6214 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6215 (__v4sf)_mm_unpackhi_ps(__A, __B), 6216 (__v4sf)__W); 6217} 6218 6219static __inline__ __m128 __DEFAULT_FN_ATTRS 6220_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 6221{ 6222 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6223 (__v4sf)_mm_unpackhi_ps(__A, __B), 6224 (__v4sf)_mm_setzero_ps()); 6225} 6226 6227static __inline__ __m256 __DEFAULT_FN_ATTRS 6228_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6229{ 6230 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6231 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6232 (__v8sf)__W); 6233} 6234 6235static __inline__ __m256 __DEFAULT_FN_ATTRS 6236_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 6237{ 6238 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6239 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6240 (__v8sf)_mm256_setzero_ps()); 6241} 6242 6243static __inline__ __m128d __DEFAULT_FN_ATTRS 6244_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6245{ 6246 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6247 (__v2df)_mm_unpacklo_pd(__A, __B), 6248 (__v2df)__W); 6249} 6250 6251static __inline__ __m128d __DEFAULT_FN_ATTRS 6252_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 6253{ 6254 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6255 (__v2df)_mm_unpacklo_pd(__A, __B), 6256 (__v2df)_mm_setzero_pd()); 6257} 6258 6259static __inline__ __m256d __DEFAULT_FN_ATTRS 6260_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6261{ 6262 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6263 (__v4df)_mm256_unpacklo_pd(__A, __B), 6264 (__v4df)__W); 6265} 6266 6267static __inline__ __m256d __DEFAULT_FN_ATTRS 6268_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 6269{ 6270 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6271 (__v4df)_mm256_unpacklo_pd(__A, __B), 6272 (__v4df)_mm256_setzero_pd()); 6273} 6274 6275static __inline__ __m128 __DEFAULT_FN_ATTRS 6276_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6277{ 6278 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6279 (__v4sf)_mm_unpacklo_ps(__A, __B), 6280 (__v4sf)__W); 6281} 6282 6283static __inline__ __m128 __DEFAULT_FN_ATTRS 6284_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 6285{ 6286 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6287 (__v4sf)_mm_unpacklo_ps(__A, __B), 6288 (__v4sf)_mm_setzero_ps()); 6289} 6290 6291static __inline__ __m256 __DEFAULT_FN_ATTRS 6292_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6293{ 6294 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6295 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6296 (__v8sf)__W); 6297} 6298 6299static __inline__ __m256 __DEFAULT_FN_ATTRS 6300_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 6301{ 6302 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6303 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6304 (__v8sf)_mm256_setzero_ps()); 6305} 6306 6307static __inline__ __m128d __DEFAULT_FN_ATTRS 6308_mm_rcp14_pd (__m128d __A) 6309{ 6310 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6311 (__v2df) 6312 _mm_setzero_pd (), 6313 (__mmask8) -1); 6314} 6315 6316static __inline__ __m128d __DEFAULT_FN_ATTRS 6317_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6318{ 6319 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6320 (__v2df) __W, 6321 (__mmask8) __U); 6322} 6323 6324static __inline__ __m128d __DEFAULT_FN_ATTRS 6325_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 6326{ 6327 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6328 (__v2df) 6329 _mm_setzero_pd (), 6330 (__mmask8) __U); 6331} 6332 6333static __inline__ __m256d __DEFAULT_FN_ATTRS 6334_mm256_rcp14_pd (__m256d __A) 6335{ 6336 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6337 (__v4df) 6338 _mm256_setzero_pd (), 6339 (__mmask8) -1); 6340} 6341 6342static __inline__ __m256d __DEFAULT_FN_ATTRS 6343_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6344{ 6345 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6346 (__v4df) __W, 6347 (__mmask8) __U); 6348} 6349 6350static __inline__ __m256d __DEFAULT_FN_ATTRS 6351_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 6352{ 6353 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6354 (__v4df) 6355 _mm256_setzero_pd (), 6356 (__mmask8) __U); 6357} 6358 6359static __inline__ __m128 __DEFAULT_FN_ATTRS 6360_mm_rcp14_ps (__m128 __A) 6361{ 6362 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6363 (__v4sf) 6364 _mm_setzero_ps (), 6365 (__mmask8) -1); 6366} 6367 6368static __inline__ __m128 __DEFAULT_FN_ATTRS 6369_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6370{ 6371 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6372 (__v4sf) __W, 6373 (__mmask8) __U); 6374} 6375 6376static __inline__ __m128 __DEFAULT_FN_ATTRS 6377_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6378{ 6379 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6380 (__v4sf) 6381 _mm_setzero_ps (), 6382 (__mmask8) __U); 6383} 6384 6385static __inline__ __m256 __DEFAULT_FN_ATTRS 6386_mm256_rcp14_ps (__m256 __A) 6387{ 6388 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6389 (__v8sf) 6390 _mm256_setzero_ps (), 6391 (__mmask8) -1); 6392} 6393 6394static __inline__ __m256 __DEFAULT_FN_ATTRS 6395_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6396{ 6397 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6398 (__v8sf) __W, 6399 (__mmask8) __U); 6400} 6401 6402static __inline__ __m256 __DEFAULT_FN_ATTRS 6403_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6404{ 6405 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6406 (__v8sf) 6407 _mm256_setzero_ps (), 6408 (__mmask8) __U); 6409} 6410 6411#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6412 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6413 (__v2df)_mm_permute_pd((X), (C)), \ 6414 (__v2df)(__m128d)(W)); }) 6415 6416#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ 6417 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6418 (__v2df)_mm_permute_pd((X), (C)), \ 6419 (__v2df)_mm_setzero_pd()); }) 6420 6421#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6422 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6423 (__v4df)_mm256_permute_pd((X), (C)), \ 6424 (__v4df)(__m256d)(W)); }) 6425 6426#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ 6427 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6428 (__v4df)_mm256_permute_pd((X), (C)), \ 6429 (__v4df)_mm256_setzero_pd()); }) 6430 6431#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6432 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6433 (__v4sf)_mm_permute_ps((X), (C)), \ 6434 (__v4sf)(__m128)(W)); }) 6435 6436#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ 6437 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6438 (__v4sf)_mm_permute_ps((X), (C)), \ 6439 (__v4sf)_mm_setzero_ps()); }) 6440 6441#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6442 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6443 (__v8sf)_mm256_permute_ps((X), (C)), \ 6444 (__v8sf)(__m256)(W)); }) 6445 6446#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ 6447 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6448 (__v8sf)_mm256_permute_ps((X), (C)), \ 6449 (__v8sf)_mm256_setzero_ps()); }) 6450 6451static __inline__ __m128d __DEFAULT_FN_ATTRS 6452_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A, 6453 __m128i __C) 6454{ 6455 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A, 6456 (__v2di) __C, 6457 (__v2df) __W, 6458 (__mmask8) __U); 6459} 6460 6461static __inline__ __m128d __DEFAULT_FN_ATTRS 6462_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C) 6463{ 6464 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A, 6465 (__v2di) __C, 6466 (__v2df) 6467 _mm_setzero_pd (), 6468 (__mmask8) __U); 6469} 6470 6471static __inline__ __m256d __DEFAULT_FN_ATTRS 6472_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A, 6473 __m256i __C) 6474{ 6475 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A, 6476 (__v4di) __C, 6477 (__v4df) __W, 6478 (__mmask8) 6479 __U); 6480} 6481 6482static __inline__ __m256d __DEFAULT_FN_ATTRS 6483_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C) 6484{ 6485 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A, 6486 (__v4di) __C, 6487 (__v4df) 6488 _mm256_setzero_pd (), 6489 (__mmask8) 6490 __U); 6491} 6492 6493static __inline__ __m128 __DEFAULT_FN_ATTRS 6494_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A, 6495 __m128i __C) 6496{ 6497 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A, 6498 (__v4si) __C, 6499 (__v4sf) __W, 6500 (__mmask8) __U); 6501} 6502 6503static __inline__ __m128 __DEFAULT_FN_ATTRS 6504_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C) 6505{ 6506 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A, 6507 (__v4si) __C, 6508 (__v4sf) 6509 _mm_setzero_ps (), 6510 (__mmask8) __U); 6511} 6512 6513static __inline__ __m256 __DEFAULT_FN_ATTRS 6514_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A, 6515 __m256i __C) 6516{ 6517 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A, 6518 (__v8si) __C, 6519 (__v8sf) __W, 6520 (__mmask8) __U); 6521} 6522 6523static __inline__ __m256 __DEFAULT_FN_ATTRS 6524_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C) 6525{ 6526 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A, 6527 (__v8si) __C, 6528 (__v8sf) 6529 _mm256_setzero_ps (), 6530 (__mmask8) __U); 6531} 6532 6533static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6534_mm_test_epi32_mask (__m128i __A, __m128i __B) 6535{ 6536 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6537 (__v4si) __B, 6538 (__mmask8) -1); 6539} 6540 6541static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6542_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6543{ 6544 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6545 (__v4si) __B, __U); 6546} 6547 6548static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6549_mm256_test_epi32_mask (__m256i __A, __m256i __B) 6550{ 6551 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6552 (__v8si) __B, 6553 (__mmask8) -1); 6554} 6555 6556static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6557_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6558{ 6559 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6560 (__v8si) __B, __U); 6561} 6562 6563static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6564_mm_test_epi64_mask (__m128i __A, __m128i __B) 6565{ 6566 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6567 (__v2di) __B, 6568 (__mmask8) -1); 6569} 6570 6571static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6572_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6573{ 6574 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6575 (__v2di) __B, __U); 6576} 6577 6578static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6579_mm256_test_epi64_mask (__m256i __A, __m256i __B) 6580{ 6581 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6582 (__v4di) __B, 6583 (__mmask8) -1); 6584} 6585 6586static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6587_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6588{ 6589 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6590 (__v4di) __B, __U); 6591} 6592 6593static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6594_mm_testn_epi32_mask (__m128i __A, __m128i __B) 6595{ 6596 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6597 (__v4si) __B, 6598 (__mmask8) -1); 6599} 6600 6601static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6602_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6603{ 6604 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6605 (__v4si) __B, __U); 6606} 6607 6608static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6609_mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6610{ 6611 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6612 (__v8si) __B, 6613 (__mmask8) -1); 6614} 6615 6616static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6617_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6618{ 6619 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6620 (__v8si) __B, __U); 6621} 6622 6623static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6624_mm_testn_epi64_mask (__m128i __A, __m128i __B) 6625{ 6626 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6627 (__v2di) __B, 6628 (__mmask8) -1); 6629} 6630 6631static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6632_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6633{ 6634 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6635 (__v2di) __B, __U); 6636} 6637 6638static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6639_mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6640{ 6641 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6642 (__v4di) __B, 6643 (__mmask8) -1); 6644} 6645 6646static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6647_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6648{ 6649 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6650 (__v4di) __B, __U); 6651} 6652 6653 6654 6655static __inline__ __m128i __DEFAULT_FN_ATTRS 6656_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6657{ 6658 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6659 (__v4si)_mm_unpackhi_epi32(__A, __B), 6660 (__v4si)__W); 6661} 6662 6663static __inline__ __m128i __DEFAULT_FN_ATTRS 6664_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6665{ 6666 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6667 (__v4si)_mm_unpackhi_epi32(__A, __B), 6668 (__v4si)_mm_setzero_si128()); 6669} 6670 6671static __inline__ __m256i __DEFAULT_FN_ATTRS 6672_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6673{ 6674 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6675 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6676 (__v8si)__W); 6677} 6678 6679static __inline__ __m256i __DEFAULT_FN_ATTRS 6680_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6681{ 6682 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6683 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6684 (__v8si)_mm256_setzero_si256()); 6685} 6686 6687static __inline__ __m128i __DEFAULT_FN_ATTRS 6688_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6689{ 6690 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6691 (__v2di)_mm_unpackhi_epi64(__A, __B), 6692 (__v2di)__W); 6693} 6694 6695static __inline__ __m128i __DEFAULT_FN_ATTRS 6696_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6697{ 6698 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6699 (__v2di)_mm_unpackhi_epi64(__A, __B), 6700 (__v2di)_mm_setzero_di()); 6701} 6702 6703static __inline__ __m256i __DEFAULT_FN_ATTRS 6704_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6705{ 6706 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6707 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6708 (__v4di)__W); 6709} 6710 6711static __inline__ __m256i __DEFAULT_FN_ATTRS 6712_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6713{ 6714 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6715 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6716 (__v4di)_mm256_setzero_si256()); 6717} 6718 6719static __inline__ __m128i __DEFAULT_FN_ATTRS 6720_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6721{ 6722 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6723 (__v4si)_mm_unpacklo_epi32(__A, __B), 6724 (__v4si)__W); 6725} 6726 6727static __inline__ __m128i __DEFAULT_FN_ATTRS 6728_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6729{ 6730 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6731 (__v4si)_mm_unpacklo_epi32(__A, __B), 6732 (__v4si)_mm_setzero_si128()); 6733} 6734 6735static __inline__ __m256i __DEFAULT_FN_ATTRS 6736_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6737{ 6738 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6739 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6740 (__v8si)__W); 6741} 6742 6743static __inline__ __m256i __DEFAULT_FN_ATTRS 6744_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6745{ 6746 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6747 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6748 (__v8si)_mm256_setzero_si256()); 6749} 6750 6751static __inline__ __m128i __DEFAULT_FN_ATTRS 6752_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6753{ 6754 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6755 (__v2di)_mm_unpacklo_epi64(__A, __B), 6756 (__v2di)__W); 6757} 6758 6759static __inline__ __m128i __DEFAULT_FN_ATTRS 6760_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6761{ 6762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6763 (__v2di)_mm_unpacklo_epi64(__A, __B), 6764 (__v2di)_mm_setzero_di()); 6765} 6766 6767static __inline__ __m256i __DEFAULT_FN_ATTRS 6768_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6769{ 6770 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6771 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6772 (__v4di)__W); 6773} 6774 6775static __inline__ __m256i __DEFAULT_FN_ATTRS 6776_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6777{ 6778 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6779 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6780 (__v4di)_mm256_setzero_si256()); 6781} 6782 6783static __inline__ __m128i __DEFAULT_FN_ATTRS 6784_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6785{ 6786 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6787 (__v4si)_mm_sra_epi32(__A, __B), 6788 (__v4si)__W); 6789} 6790 6791static __inline__ __m128i __DEFAULT_FN_ATTRS 6792_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6793{ 6794 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6795 (__v4si)_mm_sra_epi32(__A, __B), 6796 (__v4si)_mm_setzero_si128()); 6797} 6798 6799static __inline__ __m256i __DEFAULT_FN_ATTRS 6800_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6801{ 6802 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6803 (__v8si)_mm256_sra_epi32(__A, __B), 6804 (__v8si)__W); 6805} 6806 6807static __inline__ __m256i __DEFAULT_FN_ATTRS 6808_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6809{ 6810 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6811 (__v8si)_mm256_sra_epi32(__A, __B), 6812 (__v8si)_mm256_setzero_si256()); 6813} 6814 6815static __inline__ __m128i __DEFAULT_FN_ATTRS 6816_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 6817{ 6818 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6819 (__v4si)_mm_srai_epi32(__A, __B), 6820 (__v4si)__W); 6821} 6822 6823static __inline__ __m128i __DEFAULT_FN_ATTRS 6824_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) 6825{ 6826 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6827 (__v4si)_mm_srai_epi32(__A, __B), 6828 (__v4si)_mm_setzero_si128()); 6829} 6830 6831static __inline__ __m256i __DEFAULT_FN_ATTRS 6832_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 6833{ 6834 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6835 (__v8si)_mm256_srai_epi32(__A, __B), 6836 (__v8si)__W); 6837} 6838 6839static __inline__ __m256i __DEFAULT_FN_ATTRS 6840_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) 6841{ 6842 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6843 (__v8si)_mm256_srai_epi32(__A, __B), 6844 (__v8si)_mm256_setzero_si256()); 6845} 6846 6847static __inline__ __m128i __DEFAULT_FN_ATTRS 6848_mm_sra_epi64 (__m128i __A, __m128i __B) 6849{ 6850 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 6851 (__v2di) __B, 6852 (__v2di) 6853 _mm_setzero_di (), 6854 (__mmask8) -1); 6855} 6856 6857static __inline__ __m128i __DEFAULT_FN_ATTRS 6858_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 6859 __m128i __B) 6860{ 6861 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 6862 (__v2di) __B, 6863 (__v2di) __W, 6864 (__mmask8) __U); 6865} 6866 6867static __inline__ __m128i __DEFAULT_FN_ATTRS 6868_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 6869{ 6870 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 6871 (__v2di) __B, 6872 (__v2di) 6873 _mm_setzero_di (), 6874 (__mmask8) __U); 6875} 6876 6877static __inline__ __m256i __DEFAULT_FN_ATTRS 6878_mm256_sra_epi64 (__m256i __A, __m128i __B) 6879{ 6880 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 6881 (__v2di) __B, 6882 (__v4di) 6883 _mm256_setzero_si256 (), 6884 (__mmask8) -1); 6885} 6886 6887static __inline__ __m256i __DEFAULT_FN_ATTRS 6888_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 6889 __m128i __B) 6890{ 6891 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 6892 (__v2di) __B, 6893 (__v4di) __W, 6894 (__mmask8) __U); 6895} 6896 6897static __inline__ __m256i __DEFAULT_FN_ATTRS 6898_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 6899{ 6900 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 6901 (__v2di) __B, 6902 (__v4di) 6903 _mm256_setzero_si256 (), 6904 (__mmask8) __U); 6905} 6906 6907#define _mm_srai_epi64(A, imm) __extension__ ({ \ 6908 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 6909 (__v2di)_mm_setzero_di(), \ 6910 (__mmask8)-1); }) 6911 6912#define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \ 6913 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 6914 (__v2di)(__m128i)(W), \ 6915 (__mmask8)(U)); }) 6916 6917#define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \ 6918 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 6919 (__v2di)_mm_setzero_si128(), \ 6920 (__mmask8)(U)); }) 6921 6922#define _mm256_srai_epi64(A, imm) __extension__ ({ \ 6923 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 6924 (__v4di)_mm256_setzero_si256(), \ 6925 (__mmask8)-1); }) 6926 6927#define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \ 6928 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 6929 (__v4di)(__m256i)(W), \ 6930 (__mmask8)(U)); }) 6931 6932#define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \ 6933 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 6934 (__v4di)_mm256_setzero_si256(), \ 6935 (__mmask8)(U)); }) 6936 6937#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6938 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6939 (__v4si)(__m128i)(B), \ 6940 (__v4si)(__m128i)(C), (int)(imm), \ 6941 (__mmask8)-1); }) 6942 6943#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6944 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6945 (__v4si)(__m128i)(B), \ 6946 (__v4si)(__m128i)(C), (int)(imm), \ 6947 (__mmask8)(U)); }) 6948 6949#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6950 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 6951 (__v4si)(__m128i)(B), \ 6952 (__v4si)(__m128i)(C), (int)(imm), \ 6953 (__mmask8)(U)); }) 6954 6955#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 6956 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6957 (__v8si)(__m256i)(B), \ 6958 (__v8si)(__m256i)(C), (int)(imm), \ 6959 (__mmask8)-1); }) 6960 6961#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 6962 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6963 (__v8si)(__m256i)(B), \ 6964 (__v8si)(__m256i)(C), (int)(imm), \ 6965 (__mmask8)(U)); }) 6966 6967#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 6968 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 6969 (__v8si)(__m256i)(B), \ 6970 (__v8si)(__m256i)(C), (int)(imm), \ 6971 (__mmask8)(U)); }) 6972 6973#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6974 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6975 (__v2di)(__m128i)(B), \ 6976 (__v2di)(__m128i)(C), (int)(imm), \ 6977 (__mmask8)-1); }) 6978 6979#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6980 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6981 (__v2di)(__m128i)(B), \ 6982 (__v2di)(__m128i)(C), (int)(imm), \ 6983 (__mmask8)(U)); }) 6984 6985#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 6986 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 6987 (__v2di)(__m128i)(B), \ 6988 (__v2di)(__m128i)(C), (int)(imm), \ 6989 (__mmask8)(U)); }) 6990 6991#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 6992 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6993 (__v4di)(__m256i)(B), \ 6994 (__v4di)(__m256i)(C), (int)(imm), \ 6995 (__mmask8)-1); }) 6996 6997#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 6998 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6999 (__v4di)(__m256i)(B), \ 7000 (__v4di)(__m256i)(C), (int)(imm), \ 7001 (__mmask8)(U)); }) 7002 7003#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 7004 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 7005 (__v4di)(__m256i)(B), \ 7006 (__v4di)(__m256i)(C), (int)(imm), \ 7007 (__mmask8)(U)); }) 7008 7009 7010 7011#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ 7012 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 7013 (__v8sf)(__m256)(B), (int)(imm), \ 7014 (__v8sf)_mm256_setzero_ps(), \ 7015 (__mmask8)-1); }) 7016 7017#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 7018 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 7019 (__v8sf)(__m256)(B), (int)(imm), \ 7020 (__v8sf)(__m256)(W), \ 7021 (__mmask8)(U)); }) 7022 7023#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 7024 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 7025 (__v8sf)(__m256)(B), (int)(imm), \ 7026 (__v8sf)_mm256_setzero_ps(), \ 7027 (__mmask8)(U)); }) 7028 7029#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ 7030 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7031 (__v4df)(__m256d)(B), \ 7032 (int)(imm), \ 7033 (__v4df)_mm256_setzero_pd(), \ 7034 (__mmask8)-1); }) 7035 7036#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7037 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7038 (__v4df)(__m256d)(B), \ 7039 (int)(imm), \ 7040 (__v4df)(__m256d)(W), \ 7041 (__mmask8)(U)); }) 7042 7043#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7044 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7045 (__v4df)(__m256d)(B), \ 7046 (int)(imm), \ 7047 (__v4df)_mm256_setzero_pd(), \ 7048 (__mmask8)(U)); }) 7049 7050#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7051 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7052 (__v8si)(__m256i)(B), \ 7053 (int)(imm), \ 7054 (__v8si)_mm256_setzero_si256(), \ 7055 (__mmask8)-1); }) 7056 7057#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7058 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7059 (__v8si)(__m256i)(B), \ 7060 (int)(imm), \ 7061 (__v8si)(__m256i)(W), \ 7062 (__mmask8)(U)); }) 7063 7064#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7065 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7066 (__v8si)(__m256i)(B), \ 7067 (int)(imm), \ 7068 (__v8si)_mm256_setzero_si256(), \ 7069 (__mmask8)(U)); }) 7070 7071#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7072 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7073 (__v4di)(__m256i)(B), \ 7074 (int)(imm), \ 7075 (__v4di)_mm256_setzero_si256(), \ 7076 (__mmask8)-1); }) 7077 7078#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7079 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7080 (__v4di)(__m256i)(B), \ 7081 (int)(imm), \ 7082 (__v4di)(__m256i)(W), \ 7083 (__mmask8)(U)); }) 7084 7085#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7086 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7087 (__v4di)(__m256i)(B), \ 7088 (int)(imm), \ 7089 (__v4di)_mm256_setzero_si256(), \ 7090 (__mmask8)(U)); }) 7091 7092#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7093 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7094 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7095 (__v2df)(__m128d)(W)); }) 7096 7097#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7098 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7099 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7100 (__v2df)_mm_setzero_pd()); }) 7101 7102#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7103 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7104 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7105 (__v4df)(__m256d)(W)); }) 7106 7107#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7108 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7109 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7110 (__v4df)_mm256_setzero_pd()); }) 7111 7112#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7113 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7114 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7115 (__v4sf)(__m128)(W)); }) 7116 7117#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7118 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7119 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7120 (__v4sf)_mm_setzero_ps()); }) 7121 7122#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7123 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7124 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7125 (__v8sf)(__m256)(W)); }) 7126 7127#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7128 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7129 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7130 (__v8sf)_mm256_setzero_ps()); }) 7131 7132static __inline__ __m128d __DEFAULT_FN_ATTRS 7133_mm_rsqrt14_pd (__m128d __A) 7134{ 7135 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7136 (__v2df) 7137 _mm_setzero_pd (), 7138 (__mmask8) -1); 7139} 7140 7141static __inline__ __m128d __DEFAULT_FN_ATTRS 7142_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 7143{ 7144 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7145 (__v2df) __W, 7146 (__mmask8) __U); 7147} 7148 7149static __inline__ __m128d __DEFAULT_FN_ATTRS 7150_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 7151{ 7152 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7153 (__v2df) 7154 _mm_setzero_pd (), 7155 (__mmask8) __U); 7156} 7157 7158static __inline__ __m256d __DEFAULT_FN_ATTRS 7159_mm256_rsqrt14_pd (__m256d __A) 7160{ 7161 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7162 (__v4df) 7163 _mm256_setzero_pd (), 7164 (__mmask8) -1); 7165} 7166 7167static __inline__ __m256d __DEFAULT_FN_ATTRS 7168_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 7169{ 7170 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7171 (__v4df) __W, 7172 (__mmask8) __U); 7173} 7174 7175static __inline__ __m256d __DEFAULT_FN_ATTRS 7176_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 7177{ 7178 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7179 (__v4df) 7180 _mm256_setzero_pd (), 7181 (__mmask8) __U); 7182} 7183 7184static __inline__ __m128 __DEFAULT_FN_ATTRS 7185_mm_rsqrt14_ps (__m128 __A) 7186{ 7187 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7188 (__v4sf) 7189 _mm_setzero_ps (), 7190 (__mmask8) -1); 7191} 7192 7193static __inline__ __m128 __DEFAULT_FN_ATTRS 7194_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 7195{ 7196 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7197 (__v4sf) __W, 7198 (__mmask8) __U); 7199} 7200 7201static __inline__ __m128 __DEFAULT_FN_ATTRS 7202_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 7203{ 7204 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7205 (__v4sf) 7206 _mm_setzero_ps (), 7207 (__mmask8) __U); 7208} 7209 7210static __inline__ __m256 __DEFAULT_FN_ATTRS 7211_mm256_rsqrt14_ps (__m256 __A) 7212{ 7213 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7214 (__v8sf) 7215 _mm256_setzero_ps (), 7216 (__mmask8) -1); 7217} 7218 7219static __inline__ __m256 __DEFAULT_FN_ATTRS 7220_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 7221{ 7222 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7223 (__v8sf) __W, 7224 (__mmask8) __U); 7225} 7226 7227static __inline__ __m256 __DEFAULT_FN_ATTRS 7228_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 7229{ 7230 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7231 (__v8sf) 7232 _mm256_setzero_ps (), 7233 (__mmask8) __U); 7234} 7235 7236static __inline__ __m256 __DEFAULT_FN_ATTRS 7237_mm256_broadcast_f32x4 (__m128 __A) 7238{ 7239 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 7240 (__v8sf)_mm256_undefined_pd (), 7241 (__mmask8) -1); 7242} 7243 7244static __inline__ __m256 __DEFAULT_FN_ATTRS 7245_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A) 7246{ 7247 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 7248 (__v8sf) __O, 7249 __M); 7250} 7251 7252static __inline__ __m256 __DEFAULT_FN_ATTRS 7253_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 7254{ 7255 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 7256 (__v8sf) _mm256_setzero_ps (), 7257 __M); 7258} 7259 7260static __inline__ __m256i __DEFAULT_FN_ATTRS 7261_mm256_broadcast_i32x4 (__m128i __A) 7262{ 7263 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, 7264 (__v8si)_mm256_undefined_si256 (), 7265 (__mmask8) -1); 7266} 7267 7268static __inline__ __m256i __DEFAULT_FN_ATTRS 7269_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A) 7270{ 7271 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, 7272 (__v8si) 7273 __O, __M); 7274} 7275 7276static __inline__ __m256i __DEFAULT_FN_ATTRS 7277_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A) 7278{ 7279 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) 7280 __A, 7281 (__v8si) _mm256_setzero_si256 (), 7282 __M); 7283} 7284 7285static __inline__ __m256d __DEFAULT_FN_ATTRS 7286_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 7287{ 7288 return (__m256d)__builtin_ia32_selectpd_256(__M, 7289 (__v4df) _mm256_broadcastsd_pd(__A), 7290 (__v4df) __O); 7291} 7292 7293static __inline__ __m256d __DEFAULT_FN_ATTRS 7294_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7295{ 7296 return (__m256d)__builtin_ia32_selectpd_256(__M, 7297 (__v4df) _mm256_broadcastsd_pd(__A), 7298 (__v4df) _mm256_setzero_pd()); 7299} 7300 7301static __inline__ __m128 __DEFAULT_FN_ATTRS 7302_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 7303{ 7304 return (__m128)__builtin_ia32_selectps_128(__M, 7305 (__v4sf) _mm_broadcastss_ps(__A), 7306 (__v4sf) __O); 7307} 7308 7309static __inline__ __m128 __DEFAULT_FN_ATTRS 7310_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7311{ 7312 return (__m128)__builtin_ia32_selectps_128(__M, 7313 (__v4sf) _mm_broadcastss_ps(__A), 7314 (__v4sf) _mm_setzero_ps()); 7315} 7316 7317static __inline__ __m256 __DEFAULT_FN_ATTRS 7318_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 7319{ 7320 return (__m256)__builtin_ia32_selectps_256(__M, 7321 (__v8sf) _mm256_broadcastss_ps(__A), 7322 (__v8sf) __O); 7323} 7324 7325static __inline__ __m256 __DEFAULT_FN_ATTRS 7326_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7327{ 7328 return (__m256)__builtin_ia32_selectps_256(__M, 7329 (__v8sf) _mm256_broadcastss_ps(__A), 7330 (__v8sf) _mm256_setzero_ps()); 7331} 7332 7333static __inline__ __m128i __DEFAULT_FN_ATTRS 7334_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7335{ 7336 return (__m128i)__builtin_ia32_selectd_128(__M, 7337 (__v4si) _mm_broadcastd_epi32(__A), 7338 (__v4si) __O); 7339} 7340 7341static __inline__ __m128i __DEFAULT_FN_ATTRS 7342_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7343{ 7344 return (__m128i)__builtin_ia32_selectd_128(__M, 7345 (__v4si) _mm_broadcastd_epi32(__A), 7346 (__v4si) _mm_setzero_si128()); 7347} 7348 7349static __inline__ __m256i __DEFAULT_FN_ATTRS 7350_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 7351{ 7352 return (__m256i)__builtin_ia32_selectd_256(__M, 7353 (__v8si) _mm256_broadcastd_epi32(__A), 7354 (__v8si) __O); 7355} 7356 7357static __inline__ __m256i __DEFAULT_FN_ATTRS 7358_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7359{ 7360 return (__m256i)__builtin_ia32_selectd_256(__M, 7361 (__v8si) _mm256_broadcastd_epi32(__A), 7362 (__v8si) _mm256_setzero_si256()); 7363} 7364 7365static __inline__ __m128i __DEFAULT_FN_ATTRS 7366_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 7367{ 7368 return (__m128i)__builtin_ia32_selectq_128(__M, 7369 (__v2di) _mm_broadcastq_epi64(__A), 7370 (__v2di) __O); 7371} 7372 7373static __inline__ __m128i __DEFAULT_FN_ATTRS 7374_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7375{ 7376 return (__m128i)__builtin_ia32_selectq_128(__M, 7377 (__v2di) _mm_broadcastq_epi64(__A), 7378 (__v2di) _mm_setzero_si128()); 7379} 7380 7381static __inline__ __m256i __DEFAULT_FN_ATTRS 7382_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 7383{ 7384 return (__m256i)__builtin_ia32_selectq_256(__M, 7385 (__v4di) _mm256_broadcastq_epi64(__A), 7386 (__v4di) __O); 7387} 7388 7389static __inline__ __m256i __DEFAULT_FN_ATTRS 7390_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7391{ 7392 return (__m256i)__builtin_ia32_selectq_256(__M, 7393 (__v4di) _mm256_broadcastq_epi64(__A), 7394 (__v4di) _mm256_setzero_si256()); 7395} 7396 7397static __inline__ __m128i __DEFAULT_FN_ATTRS 7398_mm_cvtsepi32_epi8 (__m128i __A) 7399{ 7400 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7401 (__v16qi)_mm_undefined_si128(), 7402 (__mmask8) -1); 7403} 7404 7405static __inline__ __m128i __DEFAULT_FN_ATTRS 7406_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7407{ 7408 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7409 (__v16qi) __O, __M); 7410} 7411 7412static __inline__ __m128i __DEFAULT_FN_ATTRS 7413_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 7414{ 7415 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7416 (__v16qi) _mm_setzero_si128 (), 7417 __M); 7418} 7419 7420static __inline__ void __DEFAULT_FN_ATTRS 7421_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7422{ 7423 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7424} 7425 7426static __inline__ __m128i __DEFAULT_FN_ATTRS 7427_mm256_cvtsepi32_epi8 (__m256i __A) 7428{ 7429 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7430 (__v16qi)_mm_undefined_si128(), 7431 (__mmask8) -1); 7432} 7433 7434static __inline__ __m128i __DEFAULT_FN_ATTRS 7435_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7436{ 7437 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7438 (__v16qi) __O, __M); 7439} 7440 7441static __inline__ __m128i __DEFAULT_FN_ATTRS 7442_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 7443{ 7444 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7445 (__v16qi) _mm_setzero_si128 (), 7446 __M); 7447} 7448 7449static __inline__ void __DEFAULT_FN_ATTRS 7450_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7451{ 7452 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7453} 7454 7455static __inline__ __m128i __DEFAULT_FN_ATTRS 7456_mm_cvtsepi32_epi16 (__m128i __A) 7457{ 7458 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7459 (__v8hi)_mm_setzero_si128 (), 7460 (__mmask8) -1); 7461} 7462 7463static __inline__ __m128i __DEFAULT_FN_ATTRS 7464_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7465{ 7466 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7467 (__v8hi)__O, 7468 __M); 7469} 7470 7471static __inline__ __m128i __DEFAULT_FN_ATTRS 7472_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7473{ 7474 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7475 (__v8hi) _mm_setzero_si128 (), 7476 __M); 7477} 7478 7479static __inline__ void __DEFAULT_FN_ATTRS 7480_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7481{ 7482 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7483} 7484 7485static __inline__ __m128i __DEFAULT_FN_ATTRS 7486_mm256_cvtsepi32_epi16 (__m256i __A) 7487{ 7488 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7489 (__v8hi)_mm_undefined_si128(), 7490 (__mmask8) -1); 7491} 7492 7493static __inline__ __m128i __DEFAULT_FN_ATTRS 7494_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7495{ 7496 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7497 (__v8hi) __O, __M); 7498} 7499 7500static __inline__ __m128i __DEFAULT_FN_ATTRS 7501_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7502{ 7503 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7504 (__v8hi) _mm_setzero_si128 (), 7505 __M); 7506} 7507 7508static __inline__ void __DEFAULT_FN_ATTRS 7509_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7510{ 7511 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7512} 7513 7514static __inline__ __m128i __DEFAULT_FN_ATTRS 7515_mm_cvtsepi64_epi8 (__m128i __A) 7516{ 7517 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7518 (__v16qi)_mm_undefined_si128(), 7519 (__mmask8) -1); 7520} 7521 7522static __inline__ __m128i __DEFAULT_FN_ATTRS 7523_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7524{ 7525 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7526 (__v16qi) __O, __M); 7527} 7528 7529static __inline__ __m128i __DEFAULT_FN_ATTRS 7530_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7531{ 7532 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7533 (__v16qi) _mm_setzero_si128 (), 7534 __M); 7535} 7536 7537static __inline__ void __DEFAULT_FN_ATTRS 7538_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7539{ 7540 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7541} 7542 7543static __inline__ __m128i __DEFAULT_FN_ATTRS 7544_mm256_cvtsepi64_epi8 (__m256i __A) 7545{ 7546 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7547 (__v16qi)_mm_undefined_si128(), 7548 (__mmask8) -1); 7549} 7550 7551static __inline__ __m128i __DEFAULT_FN_ATTRS 7552_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7553{ 7554 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7555 (__v16qi) __O, __M); 7556} 7557 7558static __inline__ __m128i __DEFAULT_FN_ATTRS 7559_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7560{ 7561 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7562 (__v16qi) _mm_setzero_si128 (), 7563 __M); 7564} 7565 7566static __inline__ void __DEFAULT_FN_ATTRS 7567_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7568{ 7569 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7570} 7571 7572static __inline__ __m128i __DEFAULT_FN_ATTRS 7573_mm_cvtsepi64_epi32 (__m128i __A) 7574{ 7575 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7576 (__v4si)_mm_undefined_si128(), 7577 (__mmask8) -1); 7578} 7579 7580static __inline__ __m128i __DEFAULT_FN_ATTRS 7581_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7582{ 7583 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7584 (__v4si) __O, __M); 7585} 7586 7587static __inline__ __m128i __DEFAULT_FN_ATTRS 7588_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7589{ 7590 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7591 (__v4si) _mm_setzero_si128 (), 7592 __M); 7593} 7594 7595static __inline__ void __DEFAULT_FN_ATTRS 7596_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7597{ 7598 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7599} 7600 7601static __inline__ __m128i __DEFAULT_FN_ATTRS 7602_mm256_cvtsepi64_epi32 (__m256i __A) 7603{ 7604 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7605 (__v4si)_mm_undefined_si128(), 7606 (__mmask8) -1); 7607} 7608 7609static __inline__ __m128i __DEFAULT_FN_ATTRS 7610_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7611{ 7612 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7613 (__v4si)__O, 7614 __M); 7615} 7616 7617static __inline__ __m128i __DEFAULT_FN_ATTRS 7618_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7619{ 7620 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7621 (__v4si) _mm_setzero_si128 (), 7622 __M); 7623} 7624 7625static __inline__ void __DEFAULT_FN_ATTRS 7626_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7627{ 7628 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7629} 7630 7631static __inline__ __m128i __DEFAULT_FN_ATTRS 7632_mm_cvtsepi64_epi16 (__m128i __A) 7633{ 7634 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7635 (__v8hi)_mm_undefined_si128(), 7636 (__mmask8) -1); 7637} 7638 7639static __inline__ __m128i __DEFAULT_FN_ATTRS 7640_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7641{ 7642 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7643 (__v8hi) __O, __M); 7644} 7645 7646static __inline__ __m128i __DEFAULT_FN_ATTRS 7647_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7648{ 7649 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7650 (__v8hi) _mm_setzero_si128 (), 7651 __M); 7652} 7653 7654static __inline__ void __DEFAULT_FN_ATTRS 7655_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7656{ 7657 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7658} 7659 7660static __inline__ __m128i __DEFAULT_FN_ATTRS 7661_mm256_cvtsepi64_epi16 (__m256i __A) 7662{ 7663 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7664 (__v8hi)_mm_undefined_si128(), 7665 (__mmask8) -1); 7666} 7667 7668static __inline__ __m128i __DEFAULT_FN_ATTRS 7669_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7670{ 7671 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7672 (__v8hi) __O, __M); 7673} 7674 7675static __inline__ __m128i __DEFAULT_FN_ATTRS 7676_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7677{ 7678 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7679 (__v8hi) _mm_setzero_si128 (), 7680 __M); 7681} 7682 7683static __inline__ void __DEFAULT_FN_ATTRS 7684_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7685{ 7686 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7687} 7688 7689static __inline__ __m128i __DEFAULT_FN_ATTRS 7690_mm_cvtusepi32_epi8 (__m128i __A) 7691{ 7692 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7693 (__v16qi)_mm_undefined_si128(), 7694 (__mmask8) -1); 7695} 7696 7697static __inline__ __m128i __DEFAULT_FN_ATTRS 7698_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7699{ 7700 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7701 (__v16qi) __O, 7702 __M); 7703} 7704 7705static __inline__ __m128i __DEFAULT_FN_ATTRS 7706_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7707{ 7708 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7709 (__v16qi) _mm_setzero_si128 (), 7710 __M); 7711} 7712 7713static __inline__ void __DEFAULT_FN_ATTRS 7714_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7715{ 7716 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7717} 7718 7719static __inline__ __m128i __DEFAULT_FN_ATTRS 7720_mm256_cvtusepi32_epi8 (__m256i __A) 7721{ 7722 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7723 (__v16qi)_mm_undefined_si128(), 7724 (__mmask8) -1); 7725} 7726 7727static __inline__ __m128i __DEFAULT_FN_ATTRS 7728_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7729{ 7730 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7731 (__v16qi) __O, 7732 __M); 7733} 7734 7735static __inline__ __m128i __DEFAULT_FN_ATTRS 7736_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7737{ 7738 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7739 (__v16qi) _mm_setzero_si128 (), 7740 __M); 7741} 7742 7743static __inline__ void __DEFAULT_FN_ATTRS 7744_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7745{ 7746 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7747} 7748 7749static __inline__ __m128i __DEFAULT_FN_ATTRS 7750_mm_cvtusepi32_epi16 (__m128i __A) 7751{ 7752 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7753 (__v8hi)_mm_undefined_si128(), 7754 (__mmask8) -1); 7755} 7756 7757static __inline__ __m128i __DEFAULT_FN_ATTRS 7758_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7759{ 7760 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7761 (__v8hi) __O, __M); 7762} 7763 7764static __inline__ __m128i __DEFAULT_FN_ATTRS 7765_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7766{ 7767 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7768 (__v8hi) _mm_setzero_si128 (), 7769 __M); 7770} 7771 7772static __inline__ void __DEFAULT_FN_ATTRS 7773_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7774{ 7775 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7776} 7777 7778static __inline__ __m128i __DEFAULT_FN_ATTRS 7779_mm256_cvtusepi32_epi16 (__m256i __A) 7780{ 7781 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7782 (__v8hi) _mm_undefined_si128(), 7783 (__mmask8) -1); 7784} 7785 7786static __inline__ __m128i __DEFAULT_FN_ATTRS 7787_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7788{ 7789 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7790 (__v8hi) __O, __M); 7791} 7792 7793static __inline__ __m128i __DEFAULT_FN_ATTRS 7794_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7795{ 7796 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7797 (__v8hi) _mm_setzero_si128 (), 7798 __M); 7799} 7800 7801static __inline__ void __DEFAULT_FN_ATTRS 7802_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7803{ 7804 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7805} 7806 7807static __inline__ __m128i __DEFAULT_FN_ATTRS 7808_mm_cvtusepi64_epi8 (__m128i __A) 7809{ 7810 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7811 (__v16qi)_mm_undefined_si128(), 7812 (__mmask8) -1); 7813} 7814 7815static __inline__ __m128i __DEFAULT_FN_ATTRS 7816_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7817{ 7818 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7819 (__v16qi) __O, 7820 __M); 7821} 7822 7823static __inline__ __m128i __DEFAULT_FN_ATTRS 7824_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7825{ 7826 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7827 (__v16qi) _mm_setzero_si128 (), 7828 __M); 7829} 7830 7831static __inline__ void __DEFAULT_FN_ATTRS 7832_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7833{ 7834 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7835} 7836 7837static __inline__ __m128i __DEFAULT_FN_ATTRS 7838_mm256_cvtusepi64_epi8 (__m256i __A) 7839{ 7840 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7841 (__v16qi)_mm_undefined_si128(), 7842 (__mmask8) -1); 7843} 7844 7845static __inline__ __m128i __DEFAULT_FN_ATTRS 7846_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7847{ 7848 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7849 (__v16qi) __O, 7850 __M); 7851} 7852 7853static __inline__ __m128i __DEFAULT_FN_ATTRS 7854_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7855{ 7856 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7857 (__v16qi) _mm_setzero_si128 (), 7858 __M); 7859} 7860 7861static __inline__ void __DEFAULT_FN_ATTRS 7862_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7863{ 7864 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7865} 7866 7867static __inline__ __m128i __DEFAULT_FN_ATTRS 7868_mm_cvtusepi64_epi32 (__m128i __A) 7869{ 7870 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7871 (__v4si)_mm_undefined_si128(), 7872 (__mmask8) -1); 7873} 7874 7875static __inline__ __m128i __DEFAULT_FN_ATTRS 7876_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7877{ 7878 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7879 (__v4si) __O, __M); 7880} 7881 7882static __inline__ __m128i __DEFAULT_FN_ATTRS 7883_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7884{ 7885 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7886 (__v4si) _mm_setzero_si128 (), 7887 __M); 7888} 7889 7890static __inline__ void __DEFAULT_FN_ATTRS 7891_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7892{ 7893 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7894} 7895 7896static __inline__ __m128i __DEFAULT_FN_ATTRS 7897_mm256_cvtusepi64_epi32 (__m256i __A) 7898{ 7899 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7900 (__v4si)_mm_undefined_si128(), 7901 (__mmask8) -1); 7902} 7903 7904static __inline__ __m128i __DEFAULT_FN_ATTRS 7905_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7906{ 7907 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7908 (__v4si) __O, __M); 7909} 7910 7911static __inline__ __m128i __DEFAULT_FN_ATTRS 7912_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7913{ 7914 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7915 (__v4si) _mm_setzero_si128 (), 7916 __M); 7917} 7918 7919static __inline__ void __DEFAULT_FN_ATTRS 7920_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7921{ 7922 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7923} 7924 7925static __inline__ __m128i __DEFAULT_FN_ATTRS 7926_mm_cvtusepi64_epi16 (__m128i __A) 7927{ 7928 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7929 (__v8hi)_mm_undefined_si128(), 7930 (__mmask8) -1); 7931} 7932 7933static __inline__ __m128i __DEFAULT_FN_ATTRS 7934_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7935{ 7936 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7937 (__v8hi) __O, __M); 7938} 7939 7940static __inline__ __m128i __DEFAULT_FN_ATTRS 7941_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7942{ 7943 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7944 (__v8hi) _mm_setzero_si128 (), 7945 __M); 7946} 7947 7948static __inline__ void __DEFAULT_FN_ATTRS 7949_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7950{ 7951 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7952} 7953 7954static __inline__ __m128i __DEFAULT_FN_ATTRS 7955_mm256_cvtusepi64_epi16 (__m256i __A) 7956{ 7957 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7958 (__v8hi)_mm_undefined_si128(), 7959 (__mmask8) -1); 7960} 7961 7962static __inline__ __m128i __DEFAULT_FN_ATTRS 7963_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7964{ 7965 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7966 (__v8hi) __O, __M); 7967} 7968 7969static __inline__ __m128i __DEFAULT_FN_ATTRS 7970_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7971{ 7972 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7973 (__v8hi) _mm_setzero_si128 (), 7974 __M); 7975} 7976 7977static __inline__ void __DEFAULT_FN_ATTRS 7978_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7979{ 7980 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7981} 7982 7983static __inline__ __m128i __DEFAULT_FN_ATTRS 7984_mm_cvtepi32_epi8 (__m128i __A) 7985{ 7986 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7987 (__v16qi)_mm_undefined_si128(), 7988 (__mmask8) -1); 7989} 7990 7991static __inline__ __m128i __DEFAULT_FN_ATTRS 7992_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7993{ 7994 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7995 (__v16qi) __O, __M); 7996} 7997 7998static __inline__ __m128i __DEFAULT_FN_ATTRS 7999_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 8000{ 8001 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 8002 (__v16qi) 8003 _mm_setzero_si128 (), 8004 __M); 8005} 8006 8007static __inline__ void __DEFAULT_FN_ATTRS 8008_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8009{ 8010 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 8011} 8012 8013static __inline__ __m128i __DEFAULT_FN_ATTRS 8014_mm256_cvtepi32_epi8 (__m256i __A) 8015{ 8016 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 8017 (__v16qi)_mm_undefined_si128(), 8018 (__mmask8) -1); 8019} 8020 8021static __inline__ __m128i __DEFAULT_FN_ATTRS 8022_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8023{ 8024 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 8025 (__v16qi) __O, __M); 8026} 8027 8028static __inline__ __m128i __DEFAULT_FN_ATTRS 8029_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 8030{ 8031 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 8032 (__v16qi) _mm_setzero_si128 (), 8033 __M); 8034} 8035 8036static __inline__ void __DEFAULT_FN_ATTRS 8037_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8038{ 8039 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 8040} 8041 8042static __inline__ __m128i __DEFAULT_FN_ATTRS 8043_mm_cvtepi32_epi16 (__m128i __A) 8044{ 8045 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8046 (__v8hi) _mm_setzero_si128 (), 8047 (__mmask8) -1); 8048} 8049 8050static __inline__ __m128i __DEFAULT_FN_ATTRS 8051_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8052{ 8053 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8054 (__v8hi) __O, __M); 8055} 8056 8057static __inline__ __m128i __DEFAULT_FN_ATTRS 8058_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 8059{ 8060 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8061 (__v8hi) _mm_setzero_si128 (), 8062 __M); 8063} 8064 8065static __inline__ void __DEFAULT_FN_ATTRS 8066_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8067{ 8068 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 8069} 8070 8071static __inline__ __m128i __DEFAULT_FN_ATTRS 8072_mm256_cvtepi32_epi16 (__m256i __A) 8073{ 8074 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8075 (__v8hi)_mm_setzero_si128 (), 8076 (__mmask8) -1); 8077} 8078 8079static __inline__ __m128i __DEFAULT_FN_ATTRS 8080_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8081{ 8082 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8083 (__v8hi) __O, __M); 8084} 8085 8086static __inline__ __m128i __DEFAULT_FN_ATTRS 8087_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 8088{ 8089 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8090 (__v8hi) _mm_setzero_si128 (), 8091 __M); 8092} 8093 8094static __inline__ void __DEFAULT_FN_ATTRS 8095_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8096{ 8097 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 8098} 8099 8100static __inline__ __m128i __DEFAULT_FN_ATTRS 8101_mm_cvtepi64_epi8 (__m128i __A) 8102{ 8103 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8104 (__v16qi) _mm_undefined_si128(), 8105 (__mmask8) -1); 8106} 8107 8108static __inline__ __m128i __DEFAULT_FN_ATTRS 8109_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 8110{ 8111 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8112 (__v16qi) __O, __M); 8113} 8114 8115static __inline__ __m128i __DEFAULT_FN_ATTRS 8116_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 8117{ 8118 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8119 (__v16qi) _mm_setzero_si128 (), 8120 __M); 8121} 8122 8123static __inline__ void __DEFAULT_FN_ATTRS 8124_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8125{ 8126 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 8127} 8128 8129static __inline__ __m128i __DEFAULT_FN_ATTRS 8130_mm256_cvtepi64_epi8 (__m256i __A) 8131{ 8132 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8133 (__v16qi) _mm_undefined_si128(), 8134 (__mmask8) -1); 8135} 8136 8137static __inline__ __m128i __DEFAULT_FN_ATTRS 8138_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8139{ 8140 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8141 (__v16qi) __O, __M); 8142} 8143 8144static __inline__ __m128i __DEFAULT_FN_ATTRS 8145_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 8146{ 8147 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8148 (__v16qi) _mm_setzero_si128 (), 8149 __M); 8150} 8151 8152static __inline__ void __DEFAULT_FN_ATTRS 8153_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8154{ 8155 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 8156} 8157 8158static __inline__ __m128i __DEFAULT_FN_ATTRS 8159_mm_cvtepi64_epi32 (__m128i __A) 8160{ 8161 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8162 (__v4si)_mm_undefined_si128(), 8163 (__mmask8) -1); 8164} 8165 8166static __inline__ __m128i __DEFAULT_FN_ATTRS 8167_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 8168{ 8169 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8170 (__v4si) __O, __M); 8171} 8172 8173static __inline__ __m128i __DEFAULT_FN_ATTRS 8174_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 8175{ 8176 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8177 (__v4si) _mm_setzero_si128 (), 8178 __M); 8179} 8180 8181static __inline__ void __DEFAULT_FN_ATTRS 8182_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 8183{ 8184 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 8185} 8186 8187static __inline__ __m128i __DEFAULT_FN_ATTRS 8188_mm256_cvtepi64_epi32 (__m256i __A) 8189{ 8190 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8191 (__v4si) _mm_undefined_si128(), 8192 (__mmask8) -1); 8193} 8194 8195static __inline__ __m128i __DEFAULT_FN_ATTRS 8196_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 8197{ 8198 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8199 (__v4si) __O, __M); 8200} 8201 8202static __inline__ __m128i __DEFAULT_FN_ATTRS 8203_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 8204{ 8205 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8206 (__v4si) _mm_setzero_si128 (), 8207 __M); 8208} 8209 8210static __inline__ void __DEFAULT_FN_ATTRS 8211_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 8212{ 8213 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 8214} 8215 8216static __inline__ __m128i __DEFAULT_FN_ATTRS 8217_mm_cvtepi64_epi16 (__m128i __A) 8218{ 8219 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8220 (__v8hi) _mm_undefined_si128(), 8221 (__mmask8) -1); 8222} 8223 8224static __inline__ __m128i __DEFAULT_FN_ATTRS 8225_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8226{ 8227 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8228 (__v8hi)__O, 8229 __M); 8230} 8231 8232static __inline__ __m128i __DEFAULT_FN_ATTRS 8233_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 8234{ 8235 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8236 (__v8hi) _mm_setzero_si128 (), 8237 __M); 8238} 8239 8240static __inline__ void __DEFAULT_FN_ATTRS 8241_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8242{ 8243 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 8244} 8245 8246static __inline__ __m128i __DEFAULT_FN_ATTRS 8247_mm256_cvtepi64_epi16 (__m256i __A) 8248{ 8249 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8250 (__v8hi)_mm_undefined_si128(), 8251 (__mmask8) -1); 8252} 8253 8254static __inline__ __m128i __DEFAULT_FN_ATTRS 8255_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8256{ 8257 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8258 (__v8hi) __O, __M); 8259} 8260 8261static __inline__ __m128i __DEFAULT_FN_ATTRS 8262_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 8263{ 8264 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8265 (__v8hi) _mm_setzero_si128 (), 8266 __M); 8267} 8268 8269static __inline__ void __DEFAULT_FN_ATTRS 8270_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8271{ 8272 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 8273} 8274 8275#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ 8276 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ 8277 (__v8sf)_mm256_undefined_ps(), \ 8278 ((imm) & 1) ? 4 : 0, \ 8279 ((imm) & 1) ? 5 : 1, \ 8280 ((imm) & 1) ? 6 : 2, \ 8281 ((imm) & 1) ? 7 : 3); }) 8282 8283#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ 8284 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 8285 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 8286 (__v4sf)(W)); }) 8287 8288#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ 8289 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 8290 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ 8291 (__v4sf)_mm_setzero_ps()); }) 8292 8293#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ 8294 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ 8295 (__v8si)_mm256_undefined_si256(), \ 8296 ((imm) & 1) ? 4 : 0, \ 8297 ((imm) & 1) ? 5 : 1, \ 8298 ((imm) & 1) ? 6 : 2, \ 8299 ((imm) & 1) ? 7 : 3); }) 8300 8301#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 8302 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8303 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 8304 (__v4si)(W)); }) 8305 8306#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 8307 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8308 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ 8309 (__v4si)_mm_setzero_si128()); }) 8310 8311#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ 8312 (__m256)__builtin_shufflevector((__v8sf)(A), \ 8313 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ 8314 ((imm) & 0x1) ? 0 : 8, \ 8315 ((imm) & 0x1) ? 1 : 9, \ 8316 ((imm) & 0x1) ? 2 : 10, \ 8317 ((imm) & 0x1) ? 3 : 11, \ 8318 ((imm) & 0x1) ? 8 : 4, \ 8319 ((imm) & 0x1) ? 9 : 5, \ 8320 ((imm) & 0x1) ? 10 : 6, \ 8321 ((imm) & 0x1) ? 11 : 7); }) 8322 8323#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 8324 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 8325 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 8326 (__v8sf)(W)); }) 8327 8328#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 8329 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 8330 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 8331 (__v8sf)_mm256_setzero_ps()); }) 8332 8333#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ 8334 (__m256i)__builtin_shufflevector((__v8si)(A), \ 8335 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ 8336 ((imm) & 0x1) ? 0 : 8, \ 8337 ((imm) & 0x1) ? 1 : 9, \ 8338 ((imm) & 0x1) ? 2 : 10, \ 8339 ((imm) & 0x1) ? 3 : 11, \ 8340 ((imm) & 0x1) ? 8 : 4, \ 8341 ((imm) & 0x1) ? 9 : 5, \ 8342 ((imm) & 0x1) ? 10 : 6, \ 8343 ((imm) & 0x1) ? 11 : 7); }) 8344 8345#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 8346 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8347 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 8348 (__v8si)(W)); }) 8349 8350#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 8351 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8352 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 8353 (__v8si)_mm256_setzero_si256()); }) 8354 8355#define _mm_getmant_pd(A, B, C) __extension__({\ 8356 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8357 (int)(((C)<<2) | (B)), \ 8358 (__v2df)_mm_setzero_pd(), \ 8359 (__mmask8)-1); }) 8360 8361#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ 8362 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8363 (int)(((C)<<2) | (B)), \ 8364 (__v2df)(__m128d)(W), \ 8365 (__mmask8)(U)); }) 8366 8367#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ 8368 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8369 (int)(((C)<<2) | (B)), \ 8370 (__v2df)_mm_setzero_pd(), \ 8371 (__mmask8)(U)); }) 8372 8373#define _mm256_getmant_pd(A, B, C) __extension__ ({ \ 8374 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8375 (int)(((C)<<2) | (B)), \ 8376 (__v4df)_mm256_setzero_pd(), \ 8377 (__mmask8)-1); }) 8378 8379#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8380 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8381 (int)(((C)<<2) | (B)), \ 8382 (__v4df)(__m256d)(W), \ 8383 (__mmask8)(U)); }) 8384 8385#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8386 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8387 (int)(((C)<<2) | (B)), \ 8388 (__v4df)_mm256_setzero_pd(), \ 8389 (__mmask8)(U)); }) 8390 8391#define _mm_getmant_ps(A, B, C) __extension__ ({ \ 8392 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8393 (int)(((C)<<2) | (B)), \ 8394 (__v4sf)_mm_setzero_ps(), \ 8395 (__mmask8)-1); }) 8396 8397#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8398 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8399 (int)(((C)<<2) | (B)), \ 8400 (__v4sf)(__m128)(W), \ 8401 (__mmask8)(U)); }) 8402 8403#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8404 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8405 (int)(((C)<<2) | (B)), \ 8406 (__v4sf)_mm_setzero_ps(), \ 8407 (__mmask8)(U)); }) 8408 8409#define _mm256_getmant_ps(A, B, C) __extension__ ({ \ 8410 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8411 (int)(((C)<<2) | (B)), \ 8412 (__v8sf)_mm256_setzero_ps(), \ 8413 (__mmask8)-1); }) 8414 8415#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8416 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8417 (int)(((C)<<2) | (B)), \ 8418 (__v8sf)(__m256)(W), \ 8419 (__mmask8)(U)); }) 8420 8421#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8422 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8423 (int)(((C)<<2) | (B)), \ 8424 (__v8sf)_mm256_setzero_ps(), \ 8425 (__mmask8)(U)); }) 8426 8427#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8428 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 8429 (double const *)(addr), \ 8430 (__v2di)(__m128i)(index), \ 8431 (__mmask8)(mask), (int)(scale)); }) 8432 8433#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8434 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 8435 (long long const *)(addr), \ 8436 (__v2di)(__m128i)(index), \ 8437 (__mmask8)(mask), (int)(scale)); }) 8438 8439#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8440 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 8441 (double const *)(addr), \ 8442 (__v4di)(__m256i)(index), \ 8443 (__mmask8)(mask), (int)(scale)); }) 8444 8445#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8446 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 8447 (long long const *)(addr), \ 8448 (__v4di)(__m256i)(index), \ 8449 (__mmask8)(mask), (int)(scale)); }) 8450 8451#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8452 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 8453 (float const *)(addr), \ 8454 (__v2di)(__m128i)(index), \ 8455 (__mmask8)(mask), (int)(scale)); }) 8456 8457#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8458 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8459 (int const *)(addr), \ 8460 (__v2di)(__m128i)(index), \ 8461 (__mmask8)(mask), (int)(scale)); }) 8462 8463#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8464 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8465 (float const *)(addr), \ 8466 (__v4di)(__m256i)(index), \ 8467 (__mmask8)(mask), (int)(scale)); }) 8468 8469#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8470 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8471 (int const *)(addr), \ 8472 (__v4di)(__m256i)(index), \ 8473 (__mmask8)(mask), (int)(scale)); }) 8474 8475#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8476 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8477 (double const *)(addr), \ 8478 (__v4si)(__m128i)(index), \ 8479 (__mmask8)(mask), (int)(scale)); }) 8480 8481#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8482 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8483 (long long const *)(addr), \ 8484 (__v4si)(__m128i)(index), \ 8485 (__mmask8)(mask), (int)(scale)); }) 8486 8487#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8488 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8489 (double const *)(addr), \ 8490 (__v4si)(__m128i)(index), \ 8491 (__mmask8)(mask), (int)(scale)); }) 8492 8493#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8494 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8495 (long long const *)(addr), \ 8496 (__v4si)(__m128i)(index), \ 8497 (__mmask8)(mask), (int)(scale)); }) 8498 8499#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8500 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8501 (float const *)(addr), \ 8502 (__v4si)(__m128i)(index), \ 8503 (__mmask8)(mask), (int)(scale)); }) 8504 8505#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8506 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8507 (int const *)(addr), \ 8508 (__v4si)(__m128i)(index), \ 8509 (__mmask8)(mask), (int)(scale)); }) 8510 8511#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8512 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8513 (float const *)(addr), \ 8514 (__v8si)(__m256i)(index), \ 8515 (__mmask8)(mask), (int)(scale)); }) 8516 8517#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8518 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8519 (int const *)(addr), \ 8520 (__v8si)(__m256i)(index), \ 8521 (__mmask8)(mask), (int)(scale)); }) 8522 8523#define _mm256_permutex_pd(X, C) __extension__ ({ \ 8524 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ 8525 (__v4df)_mm256_undefined_pd(), \ 8526 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8527 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8528 8529#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8530 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8531 (__v4df)_mm256_permutex_pd((X), (C)), \ 8532 (__v4df)(__m256d)(W)); }) 8533 8534#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8535 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8536 (__v4df)_mm256_permutex_pd((X), (C)), \ 8537 (__v4df)_mm256_setzero_pd()); }) 8538 8539#define _mm256_permutex_epi64(X, C) __extension__ ({ \ 8540 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ 8541 (__v4di)_mm256_undefined_si256(), \ 8542 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8543 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8544 8545#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8546 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8547 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8548 (__v4di)(__m256i)(W)); }) 8549 8550#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8551 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8552 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8553 (__v4di)_mm256_setzero_si256()); }) 8554 8555static __inline__ __m256d __DEFAULT_FN_ATTRS 8556_mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8557{ 8558 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8559 (__v4di) __X, 8560 (__v4df) _mm256_undefined_si256 (), 8561 (__mmask8) -1); 8562} 8563 8564static __inline__ __m256d __DEFAULT_FN_ATTRS 8565_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8566 __m256d __Y) 8567{ 8568 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8569 (__v4di) __X, 8570 (__v4df) __W, 8571 (__mmask8) __U); 8572} 8573 8574static __inline__ __m256d __DEFAULT_FN_ATTRS 8575_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8576{ 8577 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8578 (__v4di) __X, 8579 (__v4df) _mm256_setzero_pd (), 8580 (__mmask8) __U); 8581} 8582 8583static __inline__ __m256i __DEFAULT_FN_ATTRS 8584_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8585{ 8586 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8587 (__v4di) __X, 8588 (__v4di) _mm256_setzero_si256 (), 8589 (__mmask8) __M); 8590} 8591 8592static __inline__ __m256i __DEFAULT_FN_ATTRS 8593_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8594{ 8595 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8596 (__v4di) __X, 8597 (__v4di) _mm256_undefined_si256 (), 8598 (__mmask8) -1); 8599} 8600 8601static __inline__ __m256i __DEFAULT_FN_ATTRS 8602_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8603 __m256i __Y) 8604{ 8605 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8606 (__v4di) __X, 8607 (__v4di) __W, 8608 __M); 8609} 8610 8611static __inline__ __m256 __DEFAULT_FN_ATTRS 8612_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, 8613 __m256 __Y) 8614{ 8615 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8616 (__v8si) __X, 8617 (__v8sf) __W, 8618 (__mmask8) __U); 8619} 8620 8621static __inline__ __m256 __DEFAULT_FN_ATTRS 8622_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) 8623{ 8624 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8625 (__v8si) __X, 8626 (__v8sf) _mm256_setzero_ps (), 8627 (__mmask8) __U); 8628} 8629 8630static __inline__ __m256 __DEFAULT_FN_ATTRS 8631_mm256_permutexvar_ps (__m256i __X, __m256 __Y) 8632{ 8633 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8634 (__v8si) __X, 8635 (__v8sf) _mm256_undefined_si256 (), 8636 (__mmask8) -1); 8637} 8638 8639static __inline__ __m256i __DEFAULT_FN_ATTRS 8640_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 8641{ 8642 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8643 (__v8si) __X, 8644 (__v8si) _mm256_setzero_si256 (), 8645 __M); 8646} 8647 8648static __inline__ __m256i __DEFAULT_FN_ATTRS 8649_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 8650 __m256i __Y) 8651{ 8652 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8653 (__v8si) __X, 8654 (__v8si) __W, 8655 (__mmask8) __M); 8656} 8657 8658static __inline__ __m256i __DEFAULT_FN_ATTRS 8659_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) 8660{ 8661 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8662 (__v8si) __X, 8663 (__v8si) _mm256_undefined_si256(), 8664 (__mmask8) -1); 8665} 8666 8667#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ 8668 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ 8669 (__v4si)(__m128i)(B), (int)(imm), \ 8670 (__v4si)_mm_undefined_si128(), \ 8671 (__mmask8)-1); }) 8672 8673#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8674 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ 8675 (__v4si)(__m128i)(B), (int)(imm), \ 8676 (__v4si)(__m128i)(W), \ 8677 (__mmask8)(U)); }) 8678 8679#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8680 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ 8681 (__v4si)(__m128i)(B), (int)(imm), \ 8682 (__v4si)_mm_setzero_si128(), \ 8683 (__mmask8)(U)); }) 8684 8685#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ 8686 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ 8687 (__v8si)(__m256i)(B), (int)(imm), \ 8688 (__v8si)_mm256_undefined_si256(), \ 8689 (__mmask8)-1); }) 8690 8691#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8692 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ 8693 (__v8si)(__m256i)(B), (int)(imm), \ 8694 (__v8si)(__m256i)(W), \ 8695 (__mmask8)(U)); }) 8696 8697#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8698 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ 8699 (__v8si)(__m256i)(B), (int)(imm), \ 8700 (__v8si)_mm256_setzero_si256(), \ 8701 (__mmask8)(U)); }) 8702 8703#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ 8704 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ 8705 (__v2di)(__m128i)(B), (int)(imm), \ 8706 (__v2di)_mm_setzero_di(), \ 8707 (__mmask8)-1); }) 8708 8709#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8710 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ 8711 (__v2di)(__m128i)(B), (int)(imm), \ 8712 (__v2di)(__m128i)(W), \ 8713 (__mmask8)(U)); }) 8714 8715#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8716 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ 8717 (__v2di)(__m128i)(B), (int)(imm), \ 8718 (__v2di)_mm_setzero_di(), \ 8719 (__mmask8)(U)); }) 8720 8721#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ 8722 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ 8723 (__v4di)(__m256i)(B), (int)(imm), \ 8724 (__v4di)_mm256_undefined_pd(), \ 8725 (__mmask8)-1); }) 8726 8727#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8728 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ 8729 (__v4di)(__m256i)(B), (int)(imm), \ 8730 (__v4di)(__m256i)(W), \ 8731 (__mmask8)(U)); }) 8732 8733#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8734 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ 8735 (__v4di)(__m256i)(B), (int)(imm), \ 8736 (__v4di)_mm256_setzero_si256(), \ 8737 (__mmask8)(U)); }) 8738 8739static __inline__ __m128 __DEFAULT_FN_ATTRS 8740_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8741{ 8742 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8743 (__v4sf)_mm_movehdup_ps(__A), 8744 (__v4sf)__W); 8745} 8746 8747static __inline__ __m128 __DEFAULT_FN_ATTRS 8748_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8749{ 8750 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8751 (__v4sf)_mm_movehdup_ps(__A), 8752 (__v4sf)_mm_setzero_ps()); 8753} 8754 8755static __inline__ __m256 __DEFAULT_FN_ATTRS 8756_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8757{ 8758 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8759 (__v8sf)_mm256_movehdup_ps(__A), 8760 (__v8sf)__W); 8761} 8762 8763static __inline__ __m256 __DEFAULT_FN_ATTRS 8764_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8765{ 8766 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8767 (__v8sf)_mm256_movehdup_ps(__A), 8768 (__v8sf)_mm256_setzero_ps()); 8769} 8770 8771static __inline__ __m128 __DEFAULT_FN_ATTRS 8772_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8773{ 8774 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8775 (__v4sf)_mm_moveldup_ps(__A), 8776 (__v4sf)__W); 8777} 8778 8779static __inline__ __m128 __DEFAULT_FN_ATTRS 8780_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8781{ 8782 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8783 (__v4sf)_mm_moveldup_ps(__A), 8784 (__v4sf)_mm_setzero_ps()); 8785} 8786 8787static __inline__ __m256 __DEFAULT_FN_ATTRS 8788_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8789{ 8790 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8791 (__v8sf)_mm256_moveldup_ps(__A), 8792 (__v8sf)__W); 8793} 8794 8795static __inline__ __m256 __DEFAULT_FN_ATTRS 8796_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8797{ 8798 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8799 (__v8sf)_mm256_moveldup_ps(__A), 8800 (__v8sf)_mm256_setzero_ps()); 8801} 8802 8803#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8804 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8805 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8806 (__v8si)(__m256i)(W)); }) 8807 8808#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ 8809 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8810 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8811 (__v8si)_mm256_setzero_si256()); }) 8812 8813#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8814 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8815 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8816 (__v4si)(__m128i)(W)); }) 8817 8818#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ 8819 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8820 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8821 (__v4si)_mm_setzero_si128()); }) 8822 8823static __inline__ __m128d __DEFAULT_FN_ATTRS 8824_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8825{ 8826 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8827 (__v2df) __A, 8828 (__v2df) __W); 8829} 8830 8831static __inline__ __m128d __DEFAULT_FN_ATTRS 8832_mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8833{ 8834 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8835 (__v2df) __A, 8836 (__v2df) _mm_setzero_pd ()); 8837} 8838 8839static __inline__ __m256d __DEFAULT_FN_ATTRS 8840_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8841{ 8842 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8843 (__v4df) __A, 8844 (__v4df) __W); 8845} 8846 8847static __inline__ __m256d __DEFAULT_FN_ATTRS 8848_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8849{ 8850 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8851 (__v4df) __A, 8852 (__v4df) _mm256_setzero_pd ()); 8853} 8854 8855static __inline__ __m128 __DEFAULT_FN_ATTRS 8856_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8857{ 8858 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8859 (__v4sf) __A, 8860 (__v4sf) __W); 8861} 8862 8863static __inline__ __m128 __DEFAULT_FN_ATTRS 8864_mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8865{ 8866 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8867 (__v4sf) __A, 8868 (__v4sf) _mm_setzero_ps ()); 8869} 8870 8871static __inline__ __m256 __DEFAULT_FN_ATTRS 8872_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8873{ 8874 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8875 (__v8sf) __A, 8876 (__v8sf) __W); 8877} 8878 8879static __inline__ __m256 __DEFAULT_FN_ATTRS 8880_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8881{ 8882 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8883 (__v8sf) __A, 8884 (__v8sf) _mm256_setzero_ps ()); 8885} 8886 8887static __inline__ __m128 __DEFAULT_FN_ATTRS 8888_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8889{ 8890 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8891 (__v4sf) __W, 8892 (__mmask8) __U); 8893} 8894 8895static __inline__ __m128 __DEFAULT_FN_ATTRS 8896_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8897{ 8898 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8899 (__v4sf) 8900 _mm_setzero_ps (), 8901 (__mmask8) __U); 8902} 8903 8904static __inline__ __m256 __DEFAULT_FN_ATTRS 8905_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8906{ 8907 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8908 (__v8sf) __W, 8909 (__mmask8) __U); 8910} 8911 8912static __inline__ __m256 __DEFAULT_FN_ATTRS 8913_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8914{ 8915 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8916 (__v8sf) 8917 _mm256_setzero_ps (), 8918 (__mmask8) __U); 8919} 8920 8921static __inline __m128i __DEFAULT_FN_ATTRS 8922_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) 8923{ 8924 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 8925 (__v8hi) __W, 8926 (__mmask8) __U); 8927} 8928 8929static __inline __m128i __DEFAULT_FN_ATTRS 8930_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) 8931{ 8932 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 8933 (__v8hi) _mm_setzero_si128 (), 8934 (__mmask8) __U); 8935} 8936 8937#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8938 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8939 (__v8hi)(__m128i)(W), \ 8940 (__mmask8)(U)); }) 8941 8942#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8943 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8944 (__v8hi)_mm_setzero_si128(), \ 8945 (__mmask8)(U)); }) 8946 8947static __inline __m128i __DEFAULT_FN_ATTRS 8948_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) 8949{ 8950 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 8951 (__v8hi) __W, 8952 (__mmask8) __U); 8953} 8954 8955static __inline __m128i __DEFAULT_FN_ATTRS 8956_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) 8957{ 8958 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 8959 (__v8hi) _mm_setzero_si128(), 8960 (__mmask8) __U); 8961} 8962#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 8963 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8964 (__v8hi)(__m128i)(W), \ 8965 (__mmask8)(U)); }) 8966 8967#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 8968 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8969 (__v8hi)_mm_setzero_si128(), \ 8970 (__mmask8)(U)); }) 8971 8972 8973#undef __DEFAULT_FN_ATTRS 8974 8975#endif /* __AVX512VLINTRIN_H */ 8976