1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLINTRIN_H 29#define __AVX512VLINTRIN_H 30 31#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) 32 33/* Doesn't require avx512vl, used in avx512dqintrin.h */ 34static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 35_mm_setzero_di(void) { 36 return (__m128i)(__v2di){ 0LL, 0LL}; 37} 38 39/* Integer compare */ 40 41static __inline__ __mmask8 __DEFAULT_FN_ATTRS 42_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) { 43 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 44 (__mmask8)-1); 45} 46 47static __inline__ __mmask8 __DEFAULT_FN_ATTRS 48_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 49 return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b, 50 __u); 51} 52 53static __inline__ __mmask8 __DEFAULT_FN_ATTRS 54_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) { 55 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 56 (__mmask8)-1); 57} 58 59static __inline__ __mmask8 __DEFAULT_FN_ATTRS 60_mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 61 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0, 62 __u); 63} 64 65static __inline__ __mmask8 __DEFAULT_FN_ATTRS 66_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) { 67 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 68 (__mmask8)-1); 69} 70 71static __inline__ __mmask8 __DEFAULT_FN_ATTRS 72_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 73 return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b, 74 __u); 75} 76 77static __inline__ __mmask8 __DEFAULT_FN_ATTRS 78_mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b) { 79 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 80 (__mmask8)-1); 81} 82 83static __inline__ __mmask8 __DEFAULT_FN_ATTRS 84_mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 85 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0, 86 __u); 87} 88 89static __inline__ __mmask8 __DEFAULT_FN_ATTRS 90_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) { 91 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 92 (__mmask8)-1); 93} 94 95static __inline__ __mmask8 __DEFAULT_FN_ATTRS 96_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 97 return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b, 98 __u); 99} 100 101static __inline__ __mmask8 __DEFAULT_FN_ATTRS 102_mm_cmpeq_epu64_mask(__m128i __a, __m128i __b) { 103 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 104 (__mmask8)-1); 105} 106 107static __inline__ __mmask8 __DEFAULT_FN_ATTRS 108_mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 109 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0, 110 __u); 111} 112 113static __inline__ __mmask8 __DEFAULT_FN_ATTRS 114_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) { 115 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 116 (__mmask8)-1); 117} 118 119static __inline__ __mmask8 __DEFAULT_FN_ATTRS 120_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 121 return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b, 122 __u); 123} 124 125static __inline__ __mmask8 __DEFAULT_FN_ATTRS 126_mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b) { 127 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 128 (__mmask8)-1); 129} 130 131static __inline__ __mmask8 __DEFAULT_FN_ATTRS 132_mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 133 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0, 134 __u); 135} 136 137 138static __inline__ __mmask8 __DEFAULT_FN_ATTRS 139_mm_cmpge_epi32_mask(__m128i __a, __m128i __b) { 140 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 141 (__mmask8)-1); 142} 143 144static __inline__ __mmask8 __DEFAULT_FN_ATTRS 145_mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 146 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5, 147 __u); 148} 149 150static __inline__ __mmask8 __DEFAULT_FN_ATTRS 151_mm_cmpge_epu32_mask(__m128i __a, __m128i __b) { 152 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 153 (__mmask8)-1); 154} 155 156static __inline__ __mmask8 __DEFAULT_FN_ATTRS 157_mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 158 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5, 159 __u); 160} 161 162static __inline__ __mmask8 __DEFAULT_FN_ATTRS 163_mm256_cmpge_epi32_mask(__m256i __a, __m256i __b) { 164 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 165 (__mmask8)-1); 166} 167 168static __inline__ __mmask8 __DEFAULT_FN_ATTRS 169_mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 170 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5, 171 __u); 172} 173 174static __inline__ __mmask8 __DEFAULT_FN_ATTRS 175_mm256_cmpge_epu32_mask(__m256i __a, __m256i __b) { 176 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 177 (__mmask8)-1); 178} 179 180static __inline__ __mmask8 __DEFAULT_FN_ATTRS 181_mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 182 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5, 183 __u); 184} 185 186static __inline__ __mmask8 __DEFAULT_FN_ATTRS 187_mm_cmpge_epi64_mask(__m128i __a, __m128i __b) { 188 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 189 (__mmask8)-1); 190} 191 192static __inline__ __mmask8 __DEFAULT_FN_ATTRS 193_mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 194 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5, 195 __u); 196} 197 198static __inline__ __mmask8 __DEFAULT_FN_ATTRS 199_mm_cmpge_epu64_mask(__m128i __a, __m128i __b) { 200 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 201 (__mmask8)-1); 202} 203 204static __inline__ __mmask8 __DEFAULT_FN_ATTRS 205_mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 206 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5, 207 __u); 208} 209 210static __inline__ __mmask8 __DEFAULT_FN_ATTRS 211_mm256_cmpge_epi64_mask(__m256i __a, __m256i __b) { 212 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 213 (__mmask8)-1); 214} 215 216static __inline__ __mmask8 __DEFAULT_FN_ATTRS 217_mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 218 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5, 219 __u); 220} 221 222static __inline__ __mmask8 __DEFAULT_FN_ATTRS 223_mm256_cmpge_epu64_mask(__m256i __a, __m256i __b) { 224 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 225 (__mmask8)-1); 226} 227 228static __inline__ __mmask8 __DEFAULT_FN_ATTRS 229_mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 230 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5, 231 __u); 232} 233 234static __inline__ __mmask8 __DEFAULT_FN_ATTRS 235_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) { 236 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 237 (__mmask8)-1); 238} 239 240static __inline__ __mmask8 __DEFAULT_FN_ATTRS 241_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 242 return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b, 243 __u); 244} 245 246static __inline__ __mmask8 __DEFAULT_FN_ATTRS 247_mm_cmpgt_epu32_mask(__m128i __a, __m128i __b) { 248 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 249 (__mmask8)-1); 250} 251 252static __inline__ __mmask8 __DEFAULT_FN_ATTRS 253_mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 254 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6, 255 __u); 256} 257 258static __inline__ __mmask8 __DEFAULT_FN_ATTRS 259_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) { 260 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 261 (__mmask8)-1); 262} 263 264static __inline__ __mmask8 __DEFAULT_FN_ATTRS 265_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 266 return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b, 267 __u); 268} 269 270static __inline__ __mmask8 __DEFAULT_FN_ATTRS 271_mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b) { 272 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 273 (__mmask8)-1); 274} 275 276static __inline__ __mmask8 __DEFAULT_FN_ATTRS 277_mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 278 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6, 279 __u); 280} 281 282static __inline__ __mmask8 __DEFAULT_FN_ATTRS 283_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) { 284 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 285 (__mmask8)-1); 286} 287 288static __inline__ __mmask8 __DEFAULT_FN_ATTRS 289_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 290 return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b, 291 __u); 292} 293 294static __inline__ __mmask8 __DEFAULT_FN_ATTRS 295_mm_cmpgt_epu64_mask(__m128i __a, __m128i __b) { 296 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 297 (__mmask8)-1); 298} 299 300static __inline__ __mmask8 __DEFAULT_FN_ATTRS 301_mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 302 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6, 303 __u); 304} 305 306static __inline__ __mmask8 __DEFAULT_FN_ATTRS 307_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) { 308 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 309 (__mmask8)-1); 310} 311 312static __inline__ __mmask8 __DEFAULT_FN_ATTRS 313_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 314 return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b, 315 __u); 316} 317 318static __inline__ __mmask8 __DEFAULT_FN_ATTRS 319_mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b) { 320 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 321 (__mmask8)-1); 322} 323 324static __inline__ __mmask8 __DEFAULT_FN_ATTRS 325_mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 326 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6, 327 __u); 328} 329 330static __inline__ __mmask8 __DEFAULT_FN_ATTRS 331_mm_cmple_epi32_mask(__m128i __a, __m128i __b) { 332 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 333 (__mmask8)-1); 334} 335 336static __inline__ __mmask8 __DEFAULT_FN_ATTRS 337_mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 338 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2, 339 __u); 340} 341 342static __inline__ __mmask8 __DEFAULT_FN_ATTRS 343_mm_cmple_epu32_mask(__m128i __a, __m128i __b) { 344 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 345 (__mmask8)-1); 346} 347 348static __inline__ __mmask8 __DEFAULT_FN_ATTRS 349_mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 350 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2, 351 __u); 352} 353 354static __inline__ __mmask8 __DEFAULT_FN_ATTRS 355_mm256_cmple_epi32_mask(__m256i __a, __m256i __b) { 356 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 357 (__mmask8)-1); 358} 359 360static __inline__ __mmask8 __DEFAULT_FN_ATTRS 361_mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 362 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2, 363 __u); 364} 365 366static __inline__ __mmask8 __DEFAULT_FN_ATTRS 367_mm256_cmple_epu32_mask(__m256i __a, __m256i __b) { 368 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 369 (__mmask8)-1); 370} 371 372static __inline__ __mmask8 __DEFAULT_FN_ATTRS 373_mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 374 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2, 375 __u); 376} 377 378static __inline__ __mmask8 __DEFAULT_FN_ATTRS 379_mm_cmple_epi64_mask(__m128i __a, __m128i __b) { 380 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 381 (__mmask8)-1); 382} 383 384static __inline__ __mmask8 __DEFAULT_FN_ATTRS 385_mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 386 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2, 387 __u); 388} 389 390static __inline__ __mmask8 __DEFAULT_FN_ATTRS 391_mm_cmple_epu64_mask(__m128i __a, __m128i __b) { 392 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 393 (__mmask8)-1); 394} 395 396static __inline__ __mmask8 __DEFAULT_FN_ATTRS 397_mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 398 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2, 399 __u); 400} 401 402static __inline__ __mmask8 __DEFAULT_FN_ATTRS 403_mm256_cmple_epi64_mask(__m256i __a, __m256i __b) { 404 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 405 (__mmask8)-1); 406} 407 408static __inline__ __mmask8 __DEFAULT_FN_ATTRS 409_mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 410 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2, 411 __u); 412} 413 414static __inline__ __mmask8 __DEFAULT_FN_ATTRS 415_mm256_cmple_epu64_mask(__m256i __a, __m256i __b) { 416 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 417 (__mmask8)-1); 418} 419 420static __inline__ __mmask8 __DEFAULT_FN_ATTRS 421_mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 422 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2, 423 __u); 424} 425 426static __inline__ __mmask8 __DEFAULT_FN_ATTRS 427_mm_cmplt_epi32_mask(__m128i __a, __m128i __b) { 428 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 429 (__mmask8)-1); 430} 431 432static __inline__ __mmask8 __DEFAULT_FN_ATTRS 433_mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 434 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1, 435 __u); 436} 437 438static __inline__ __mmask8 __DEFAULT_FN_ATTRS 439_mm_cmplt_epu32_mask(__m128i __a, __m128i __b) { 440 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 441 (__mmask8)-1); 442} 443 444static __inline__ __mmask8 __DEFAULT_FN_ATTRS 445_mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 446 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1, 447 __u); 448} 449 450static __inline__ __mmask8 __DEFAULT_FN_ATTRS 451_mm256_cmplt_epi32_mask(__m256i __a, __m256i __b) { 452 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 453 (__mmask8)-1); 454} 455 456static __inline__ __mmask8 __DEFAULT_FN_ATTRS 457_mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 458 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1, 459 __u); 460} 461 462static __inline__ __mmask8 __DEFAULT_FN_ATTRS 463_mm256_cmplt_epu32_mask(__m256i __a, __m256i __b) { 464 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 465 (__mmask8)-1); 466} 467 468static __inline__ __mmask8 __DEFAULT_FN_ATTRS 469_mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 470 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1, 471 __u); 472} 473 474static __inline__ __mmask8 __DEFAULT_FN_ATTRS 475_mm_cmplt_epi64_mask(__m128i __a, __m128i __b) { 476 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 477 (__mmask8)-1); 478} 479 480static __inline__ __mmask8 __DEFAULT_FN_ATTRS 481_mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 482 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1, 483 __u); 484} 485 486static __inline__ __mmask8 __DEFAULT_FN_ATTRS 487_mm_cmplt_epu64_mask(__m128i __a, __m128i __b) { 488 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 489 (__mmask8)-1); 490} 491 492static __inline__ __mmask8 __DEFAULT_FN_ATTRS 493_mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 494 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1, 495 __u); 496} 497 498static __inline__ __mmask8 __DEFAULT_FN_ATTRS 499_mm256_cmplt_epi64_mask(__m256i __a, __m256i __b) { 500 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 501 (__mmask8)-1); 502} 503 504static __inline__ __mmask8 __DEFAULT_FN_ATTRS 505_mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 506 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1, 507 __u); 508} 509 510static __inline__ __mmask8 __DEFAULT_FN_ATTRS 511_mm256_cmplt_epu64_mask(__m256i __a, __m256i __b) { 512 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 513 (__mmask8)-1); 514} 515 516static __inline__ __mmask8 __DEFAULT_FN_ATTRS 517_mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 518 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1, 519 __u); 520} 521 522static __inline__ __mmask8 __DEFAULT_FN_ATTRS 523_mm_cmpneq_epi32_mask(__m128i __a, __m128i __b) { 524 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 525 (__mmask8)-1); 526} 527 528static __inline__ __mmask8 __DEFAULT_FN_ATTRS 529_mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 530 return (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4, 531 __u); 532} 533 534static __inline__ __mmask8 __DEFAULT_FN_ATTRS 535_mm_cmpneq_epu32_mask(__m128i __a, __m128i __b) { 536 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 537 (__mmask8)-1); 538} 539 540static __inline__ __mmask8 __DEFAULT_FN_ATTRS 541_mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) { 542 return (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4, 543 __u); 544} 545 546static __inline__ __mmask8 __DEFAULT_FN_ATTRS 547_mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b) { 548 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 549 (__mmask8)-1); 550} 551 552static __inline__ __mmask8 __DEFAULT_FN_ATTRS 553_mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 554 return (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4, 555 __u); 556} 557 558static __inline__ __mmask8 __DEFAULT_FN_ATTRS 559_mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b) { 560 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 561 (__mmask8)-1); 562} 563 564static __inline__ __mmask8 __DEFAULT_FN_ATTRS 565_mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) { 566 return (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4, 567 __u); 568} 569 570static __inline__ __mmask8 __DEFAULT_FN_ATTRS 571_mm_cmpneq_epi64_mask(__m128i __a, __m128i __b) { 572 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 573 (__mmask8)-1); 574} 575 576static __inline__ __mmask8 __DEFAULT_FN_ATTRS 577_mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 578 return (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4, 579 __u); 580} 581 582static __inline__ __mmask8 __DEFAULT_FN_ATTRS 583_mm_cmpneq_epu64_mask(__m128i __a, __m128i __b) { 584 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 585 (__mmask8)-1); 586} 587 588static __inline__ __mmask8 __DEFAULT_FN_ATTRS 589_mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) { 590 return (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4, 591 __u); 592} 593 594static __inline__ __mmask8 __DEFAULT_FN_ATTRS 595_mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b) { 596 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 597 (__mmask8)-1); 598} 599 600static __inline__ __mmask8 __DEFAULT_FN_ATTRS 601_mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 602 return (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4, 603 __u); 604} 605 606static __inline__ __mmask8 __DEFAULT_FN_ATTRS 607_mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b) { 608 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 609 (__mmask8)-1); 610} 611 612static __inline__ __mmask8 __DEFAULT_FN_ATTRS 613_mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) { 614 return (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4, 615 __u); 616} 617 618static __inline__ __m256i __DEFAULT_FN_ATTRS 619_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 620 __m256i __B) 621{ 622 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, 623 (__v8si) __B, 624 (__v8si) __W, 625 (__mmask8) __U); 626} 627 628static __inline__ __m256i __DEFAULT_FN_ATTRS 629_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 630{ 631 return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A, 632 (__v8si) __B, 633 (__v8si) 634 _mm256_setzero_si256 (), 635 (__mmask8) __U); 636} 637 638static __inline__ __m256i __DEFAULT_FN_ATTRS 639_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 640 __m256i __B) 641{ 642 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, 643 (__v4di) __B, 644 (__v4di) __W, 645 (__mmask8) __U); 646} 647 648static __inline__ __m256i __DEFAULT_FN_ATTRS 649_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 650{ 651 return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A, 652 (__v4di) __B, 653 (__v4di) 654 _mm256_setzero_si256 (), 655 (__mmask8) __U); 656} 657 658static __inline__ __m256i __DEFAULT_FN_ATTRS 659_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 660 __m256i __B) 661{ 662 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, 663 (__v8si) __B, 664 (__v8si) __W, 665 (__mmask8) __U); 666} 667 668static __inline__ __m256i __DEFAULT_FN_ATTRS 669_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 670{ 671 return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A, 672 (__v8si) __B, 673 (__v8si) 674 _mm256_setzero_si256 (), 675 (__mmask8) __U); 676} 677 678static __inline__ __m256i __DEFAULT_FN_ATTRS 679_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 680 __m256i __B) 681{ 682 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, 683 (__v4di) __B, 684 (__v4di) __W, 685 (__mmask8) __U); 686} 687 688static __inline__ __m256i __DEFAULT_FN_ATTRS 689_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 690{ 691 return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A, 692 (__v4di) __B, 693 (__v4di) 694 _mm256_setzero_si256 (), 695 (__mmask8) __U); 696} 697 698static __inline__ __m128i __DEFAULT_FN_ATTRS 699_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 700 __m128i __B) 701{ 702 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, 703 (__v4si) __B, 704 (__v4si) __W, 705 (__mmask8) __U); 706} 707 708static __inline__ __m128i __DEFAULT_FN_ATTRS 709_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 710{ 711 return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A, 712 (__v4si) __B, 713 (__v4si) 714 _mm_setzero_si128 (), 715 (__mmask8) __U); 716} 717 718static __inline__ __m128i __DEFAULT_FN_ATTRS 719_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 720 __m128i __B) 721{ 722 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, 723 (__v2di) __B, 724 (__v2di) __W, 725 (__mmask8) __U); 726} 727 728static __inline__ __m128i __DEFAULT_FN_ATTRS 729_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 730{ 731 return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A, 732 (__v2di) __B, 733 (__v2di) 734 _mm_setzero_si128 (), 735 (__mmask8) __U); 736} 737 738static __inline__ __m128i __DEFAULT_FN_ATTRS 739_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 740 __m128i __B) 741{ 742 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, 743 (__v4si) __B, 744 (__v4si) __W, 745 (__mmask8) __U); 746} 747 748static __inline__ __m128i __DEFAULT_FN_ATTRS 749_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 750{ 751 return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A, 752 (__v4si) __B, 753 (__v4si) 754 _mm_setzero_si128 (), 755 (__mmask8) __U); 756} 757 758static __inline__ __m128i __DEFAULT_FN_ATTRS 759_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 760 __m128i __B) 761{ 762 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, 763 (__v2di) __B, 764 (__v2di) __W, 765 (__mmask8) __U); 766} 767 768static __inline__ __m128i __DEFAULT_FN_ATTRS 769_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 770{ 771 return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A, 772 (__v2di) __B, 773 (__v2di) 774 _mm_setzero_si128 (), 775 (__mmask8) __U); 776} 777 778static __inline__ __m256i __DEFAULT_FN_ATTRS 779_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 780 __m256i __Y) 781{ 782 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, 783 (__v8si) __Y, 784 (__v4di) __W, __M); 785} 786 787static __inline__ __m256i __DEFAULT_FN_ATTRS 788_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 789{ 790 return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X, 791 (__v8si) __Y, 792 (__v4di) 793 _mm256_setzero_si256 (), 794 __M); 795} 796 797static __inline__ __m128i __DEFAULT_FN_ATTRS 798_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X, 799 __m128i __Y) 800{ 801 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, 802 (__v4si) __Y, 803 (__v2di) __W, __M); 804} 805 806static __inline__ __m128i __DEFAULT_FN_ATTRS 807_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) 808{ 809 return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X, 810 (__v4si) __Y, 811 (__v2di) 812 _mm_setzero_si128 (), 813 __M); 814} 815 816static __inline__ __m256i __DEFAULT_FN_ATTRS 817_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X, 818 __m256i __Y) 819{ 820 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, 821 (__v8si) __Y, 822 (__v4di) __W, __M); 823} 824 825static __inline__ __m256i __DEFAULT_FN_ATTRS 826_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) 827{ 828 return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X, 829 (__v8si) __Y, 830 (__v4di) 831 _mm256_setzero_si256 (), 832 __M); 833} 834 835static __inline__ __m128i __DEFAULT_FN_ATTRS 836_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X, 837 __m128i __Y) 838{ 839 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, 840 (__v4si) __Y, 841 (__v2di) __W, __M); 842} 843 844static __inline__ __m128i __DEFAULT_FN_ATTRS 845_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) 846{ 847 return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X, 848 (__v4si) __Y, 849 (__v2di) 850 _mm_setzero_si128 (), 851 __M); 852} 853 854static __inline__ __m256i __DEFAULT_FN_ATTRS 855_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B) 856{ 857 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, 858 (__v8si) __B, 859 (__v8si) 860 _mm256_setzero_si256 (), 861 __M); 862} 863 864static __inline__ __m256i __DEFAULT_FN_ATTRS 865_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A, 866 __m256i __B) 867{ 868 return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A, 869 (__v8si) __B, 870 (__v8si) __W, __M); 871} 872 873static __inline__ __m128i __DEFAULT_FN_ATTRS 874_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B) 875{ 876 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, 877 (__v4si) __B, 878 (__v4si) 879 _mm_setzero_si128 (), 880 __M); 881} 882 883static __inline__ __m128i __DEFAULT_FN_ATTRS 884_mm_mask_mullo_epi32 (__m128i __W, __mmask16 __M, __m128i __A, 885 __m128i __B) 886{ 887 return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A, 888 (__v4si) __B, 889 (__v4si) __W, __M); 890} 891 892static __inline__ __m256i __DEFAULT_FN_ATTRS 893_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 894{ 895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 896 (__v8si)_mm256_and_si256(__A, __B), 897 (__v8si)__W); 898} 899 900static __inline__ __m256i __DEFAULT_FN_ATTRS 901_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 902{ 903 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 904} 905 906static __inline__ __m128i __DEFAULT_FN_ATTRS 907_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 908{ 909 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 910 (__v4si)_mm_and_si128(__A, __B), 911 (__v4si)__W); 912} 913 914static __inline__ __m128i __DEFAULT_FN_ATTRS 915_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 916{ 917 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 918} 919 920static __inline__ __m256i __DEFAULT_FN_ATTRS 921_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 922{ 923 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 924 (__v8si)_mm256_andnot_si256(__A, __B), 925 (__v8si)__W); 926} 927 928static __inline__ __m256i __DEFAULT_FN_ATTRS 929_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 930{ 931 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 932 __U, __A, __B); 933} 934 935static __inline__ __m128i __DEFAULT_FN_ATTRS 936_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 937{ 938 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 939 (__v4si)_mm_andnot_si128(__A, __B), 940 (__v4si)__W); 941} 942 943static __inline__ __m128i __DEFAULT_FN_ATTRS 944_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 945{ 946 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 947} 948 949static __inline__ __m256i __DEFAULT_FN_ATTRS 950_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 951{ 952 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 953 (__v8si)_mm256_or_si256(__A, __B), 954 (__v8si)__W); 955} 956 957static __inline__ __m256i __DEFAULT_FN_ATTRS 958_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 959{ 960 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 961} 962 963static __inline__ __m128i __DEFAULT_FN_ATTRS 964_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 965{ 966 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 967 (__v4si)_mm_or_si128(__A, __B), 968 (__v4si)__W); 969} 970 971static __inline__ __m128i __DEFAULT_FN_ATTRS 972_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 973{ 974 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 975} 976 977static __inline__ __m256i __DEFAULT_FN_ATTRS 978_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 979{ 980 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 981 (__v8si)_mm256_xor_si256(__A, __B), 982 (__v8si)__W); 983} 984 985static __inline__ __m256i __DEFAULT_FN_ATTRS 986_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 987{ 988 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 989} 990 991static __inline__ __m128i __DEFAULT_FN_ATTRS 992_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, 993 __m128i __B) 994{ 995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 996 (__v4si)_mm_xor_si128(__A, __B), 997 (__v4si)__W); 998} 999 1000static __inline__ __m128i __DEFAULT_FN_ATTRS 1001_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 1002{ 1003 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 1004} 1005 1006static __inline__ __m256i __DEFAULT_FN_ATTRS 1007_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1008{ 1009 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1010 (__v4di)_mm256_and_si256(__A, __B), 1011 (__v4di)__W); 1012} 1013 1014static __inline__ __m256i __DEFAULT_FN_ATTRS 1015_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1016{ 1017 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 1018} 1019 1020static __inline__ __m128i __DEFAULT_FN_ATTRS 1021_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1022{ 1023 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1024 (__v2di)_mm_and_si128(__A, __B), 1025 (__v2di)__W); 1026} 1027 1028static __inline__ __m128i __DEFAULT_FN_ATTRS 1029_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1030{ 1031 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 1032} 1033 1034static __inline__ __m256i __DEFAULT_FN_ATTRS 1035_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1036{ 1037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1038 (__v4di)_mm256_andnot_si256(__A, __B), 1039 (__v4di)__W); 1040} 1041 1042static __inline__ __m256i __DEFAULT_FN_ATTRS 1043_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1044{ 1045 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 1046 __U, __A, __B); 1047} 1048 1049static __inline__ __m128i __DEFAULT_FN_ATTRS 1050_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1051{ 1052 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1053 (__v2di)_mm_andnot_si128(__A, __B), 1054 (__v2di)__W); 1055} 1056 1057static __inline__ __m128i __DEFAULT_FN_ATTRS 1058_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1059{ 1060 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 1061} 1062 1063static __inline__ __m256i __DEFAULT_FN_ATTRS 1064_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1065{ 1066 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1067 (__v4di)_mm256_or_si256(__A, __B), 1068 (__v4di)__W); 1069} 1070 1071static __inline__ __m256i __DEFAULT_FN_ATTRS 1072_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1073{ 1074 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 1075} 1076 1077static __inline__ __m128i __DEFAULT_FN_ATTRS 1078_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1079{ 1080 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1081 (__v2di)_mm_or_si128(__A, __B), 1082 (__v2di)__W); 1083} 1084 1085static __inline__ __m128i __DEFAULT_FN_ATTRS 1086_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1087{ 1088 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 1089} 1090 1091static __inline__ __m256i __DEFAULT_FN_ATTRS 1092_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 1093{ 1094 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 1095 (__v4di)_mm256_xor_si256(__A, __B), 1096 (__v4di)__W); 1097} 1098 1099static __inline__ __m256i __DEFAULT_FN_ATTRS 1100_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 1101{ 1102 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 1103} 1104 1105static __inline__ __m128i __DEFAULT_FN_ATTRS 1106_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 1107 __m128i __B) 1108{ 1109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 1110 (__v2di)_mm_xor_si128(__A, __B), 1111 (__v2di)__W); 1112} 1113 1114static __inline__ __m128i __DEFAULT_FN_ATTRS 1115_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 1116{ 1117 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 1118} 1119 1120#define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1121 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1122 (__v4si)(__m128i)(b), (int)(p), \ 1123 (__mmask8)-1); }) 1124 1125#define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1126 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 1127 (__v4si)(__m128i)(b), (int)(p), \ 1128 (__mmask8)(m)); }) 1129 1130#define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1131 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1132 (__v4si)(__m128i)(b), (int)(p), \ 1133 (__mmask8)-1); }) 1134 1135#define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1136 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 1137 (__v4si)(__m128i)(b), (int)(p), \ 1138 (__mmask8)(m)); }) 1139 1140#define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \ 1141 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1142 (__v8si)(__m256i)(b), (int)(p), \ 1143 (__mmask8)-1); }) 1144 1145#define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 1146 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 1147 (__v8si)(__m256i)(b), (int)(p), \ 1148 (__mmask8)(m)); }) 1149 1150#define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \ 1151 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1152 (__v8si)(__m256i)(b), (int)(p), \ 1153 (__mmask8)-1); }) 1154 1155#define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 1156 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 1157 (__v8si)(__m256i)(b), (int)(p), \ 1158 (__mmask8)(m)); }) 1159 1160#define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1161 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1162 (__v2di)(__m128i)(b), (int)(p), \ 1163 (__mmask8)-1); }) 1164 1165#define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1166 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 1167 (__v2di)(__m128i)(b), (int)(p), \ 1168 (__mmask8)(m)); }) 1169 1170#define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1171 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1172 (__v2di)(__m128i)(b), (int)(p), \ 1173 (__mmask8)-1); }) 1174 1175#define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1176 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 1177 (__v2di)(__m128i)(b), (int)(p), \ 1178 (__mmask8)(m)); }) 1179 1180#define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \ 1181 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1182 (__v4di)(__m256i)(b), (int)(p), \ 1183 (__mmask8)-1); }) 1184 1185#define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 1186 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 1187 (__v4di)(__m256i)(b), (int)(p), \ 1188 (__mmask8)(m)); }) 1189 1190#define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \ 1191 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1192 (__v4di)(__m256i)(b), (int)(p), \ 1193 (__mmask8)-1); }) 1194 1195#define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 1196 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 1197 (__v4di)(__m256i)(b), (int)(p), \ 1198 (__mmask8)(m)); }) 1199 1200#define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \ 1201 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1202 (__v8sf)(__m256)(b), (int)(p), \ 1203 (__mmask8)-1); }) 1204 1205#define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1206 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 1207 (__v8sf)(__m256)(b), (int)(p), \ 1208 (__mmask8)(m)); }) 1209 1210#define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \ 1211 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1212 (__v4df)(__m256d)(b), (int)(p), \ 1213 (__mmask8)-1); }) 1214 1215#define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1216 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 1217 (__v4df)(__m256d)(b), (int)(p), \ 1218 (__mmask8)(m)); }) 1219 1220#define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \ 1221 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1222 (__v4sf)(__m128)(b), (int)(p), \ 1223 (__mmask8)-1); }) 1224 1225#define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \ 1226 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 1227 (__v4sf)(__m128)(b), (int)(p), \ 1228 (__mmask8)(m)); }) 1229 1230#define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \ 1231 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1232 (__v2df)(__m128d)(b), (int)(p), \ 1233 (__mmask8)-1); }) 1234 1235#define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \ 1236 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 1237 (__v2df)(__m128d)(b), (int)(p), \ 1238 (__mmask8)(m)); }) 1239 1240static __inline__ __m128d __DEFAULT_FN_ATTRS 1241_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1242{ 1243 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1244 (__v2df) __B, 1245 (__v2df) __C, 1246 (__mmask8) __U); 1247} 1248 1249static __inline__ __m128d __DEFAULT_FN_ATTRS 1250_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1251{ 1252 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A, 1253 (__v2df) __B, 1254 (__v2df) __C, 1255 (__mmask8) __U); 1256} 1257 1258static __inline__ __m128d __DEFAULT_FN_ATTRS 1259_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1260{ 1261 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1262 (__v2df) __B, 1263 (__v2df) __C, 1264 (__mmask8) __U); 1265} 1266 1267static __inline__ __m128d __DEFAULT_FN_ATTRS 1268_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1269{ 1270 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A, 1271 (__v2df) __B, 1272 -(__v2df) __C, 1273 (__mmask8) __U); 1274} 1275 1276static __inline__ __m128d __DEFAULT_FN_ATTRS 1277_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1278{ 1279 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A, 1280 (__v2df) __B, 1281 -(__v2df) __C, 1282 (__mmask8) __U); 1283} 1284 1285static __inline__ __m128d __DEFAULT_FN_ATTRS 1286_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1287{ 1288 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A, 1289 (__v2df) __B, 1290 (__v2df) __C, 1291 (__mmask8) __U); 1292} 1293 1294static __inline__ __m128d __DEFAULT_FN_ATTRS 1295_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1296{ 1297 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1298 (__v2df) __B, 1299 (__v2df) __C, 1300 (__mmask8) __U); 1301} 1302 1303static __inline__ __m128d __DEFAULT_FN_ATTRS 1304_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1305{ 1306 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, 1307 (__v2df) __B, 1308 -(__v2df) __C, 1309 (__mmask8) __U); 1310} 1311 1312static __inline__ __m256d __DEFAULT_FN_ATTRS 1313_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1314{ 1315 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1316 (__v4df) __B, 1317 (__v4df) __C, 1318 (__mmask8) __U); 1319} 1320 1321static __inline__ __m256d __DEFAULT_FN_ATTRS 1322_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1323{ 1324 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A, 1325 (__v4df) __B, 1326 (__v4df) __C, 1327 (__mmask8) __U); 1328} 1329 1330static __inline__ __m256d __DEFAULT_FN_ATTRS 1331_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1332{ 1333 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1334 (__v4df) __B, 1335 (__v4df) __C, 1336 (__mmask8) __U); 1337} 1338 1339static __inline__ __m256d __DEFAULT_FN_ATTRS 1340_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1341{ 1342 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A, 1343 (__v4df) __B, 1344 -(__v4df) __C, 1345 (__mmask8) __U); 1346} 1347 1348static __inline__ __m256d __DEFAULT_FN_ATTRS 1349_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1350{ 1351 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A, 1352 (__v4df) __B, 1353 -(__v4df) __C, 1354 (__mmask8) __U); 1355} 1356 1357static __inline__ __m256d __DEFAULT_FN_ATTRS 1358_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1359{ 1360 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A, 1361 (__v4df) __B, 1362 (__v4df) __C, 1363 (__mmask8) __U); 1364} 1365 1366static __inline__ __m256d __DEFAULT_FN_ATTRS 1367_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1368{ 1369 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1370 (__v4df) __B, 1371 (__v4df) __C, 1372 (__mmask8) __U); 1373} 1374 1375static __inline__ __m256d __DEFAULT_FN_ATTRS 1376_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1377{ 1378 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, 1379 (__v4df) __B, 1380 -(__v4df) __C, 1381 (__mmask8) __U); 1382} 1383 1384static __inline__ __m128 __DEFAULT_FN_ATTRS 1385_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1386{ 1387 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1388 (__v4sf) __B, 1389 (__v4sf) __C, 1390 (__mmask8) __U); 1391} 1392 1393static __inline__ __m128 __DEFAULT_FN_ATTRS 1394_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1395{ 1396 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A, 1397 (__v4sf) __B, 1398 (__v4sf) __C, 1399 (__mmask8) __U); 1400} 1401 1402static __inline__ __m128 __DEFAULT_FN_ATTRS 1403_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1404{ 1405 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1406 (__v4sf) __B, 1407 (__v4sf) __C, 1408 (__mmask8) __U); 1409} 1410 1411static __inline__ __m128 __DEFAULT_FN_ATTRS 1412_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1413{ 1414 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A, 1415 (__v4sf) __B, 1416 -(__v4sf) __C, 1417 (__mmask8) __U); 1418} 1419 1420static __inline__ __m128 __DEFAULT_FN_ATTRS 1421_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1422{ 1423 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A, 1424 (__v4sf) __B, 1425 -(__v4sf) __C, 1426 (__mmask8) __U); 1427} 1428 1429static __inline__ __m128 __DEFAULT_FN_ATTRS 1430_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1431{ 1432 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A, 1433 (__v4sf) __B, 1434 (__v4sf) __C, 1435 (__mmask8) __U); 1436} 1437 1438static __inline__ __m128 __DEFAULT_FN_ATTRS 1439_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1440{ 1441 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1442 (__v4sf) __B, 1443 (__v4sf) __C, 1444 (__mmask8) __U); 1445} 1446 1447static __inline__ __m128 __DEFAULT_FN_ATTRS 1448_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1449{ 1450 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, 1451 (__v4sf) __B, 1452 -(__v4sf) __C, 1453 (__mmask8) __U); 1454} 1455 1456static __inline__ __m256 __DEFAULT_FN_ATTRS 1457_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1458{ 1459 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1460 (__v8sf) __B, 1461 (__v8sf) __C, 1462 (__mmask8) __U); 1463} 1464 1465static __inline__ __m256 __DEFAULT_FN_ATTRS 1466_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1467{ 1468 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A, 1469 (__v8sf) __B, 1470 (__v8sf) __C, 1471 (__mmask8) __U); 1472} 1473 1474static __inline__ __m256 __DEFAULT_FN_ATTRS 1475_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1476{ 1477 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1478 (__v8sf) __B, 1479 (__v8sf) __C, 1480 (__mmask8) __U); 1481} 1482 1483static __inline__ __m256 __DEFAULT_FN_ATTRS 1484_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1485{ 1486 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A, 1487 (__v8sf) __B, 1488 -(__v8sf) __C, 1489 (__mmask8) __U); 1490} 1491 1492static __inline__ __m256 __DEFAULT_FN_ATTRS 1493_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1494{ 1495 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A, 1496 (__v8sf) __B, 1497 -(__v8sf) __C, 1498 (__mmask8) __U); 1499} 1500 1501static __inline__ __m256 __DEFAULT_FN_ATTRS 1502_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1503{ 1504 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A, 1505 (__v8sf) __B, 1506 (__v8sf) __C, 1507 (__mmask8) __U); 1508} 1509 1510static __inline__ __m256 __DEFAULT_FN_ATTRS 1511_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1512{ 1513 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1514 (__v8sf) __B, 1515 (__v8sf) __C, 1516 (__mmask8) __U); 1517} 1518 1519static __inline__ __m256 __DEFAULT_FN_ATTRS 1520_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1521{ 1522 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, 1523 (__v8sf) __B, 1524 -(__v8sf) __C, 1525 (__mmask8) __U); 1526} 1527 1528static __inline__ __m128d __DEFAULT_FN_ATTRS 1529_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1530{ 1531 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1532 (__v2df) __B, 1533 (__v2df) __C, 1534 (__mmask8) __U); 1535} 1536 1537static __inline__ __m128d __DEFAULT_FN_ATTRS 1538_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1539{ 1540 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A, 1541 (__v2df) __B, 1542 (__v2df) __C, 1543 (__mmask8) 1544 __U); 1545} 1546 1547static __inline__ __m128d __DEFAULT_FN_ATTRS 1548_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1549{ 1550 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1551 (__v2df) __B, 1552 (__v2df) __C, 1553 (__mmask8) 1554 __U); 1555} 1556 1557static __inline__ __m128d __DEFAULT_FN_ATTRS 1558_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1559{ 1560 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A, 1561 (__v2df) __B, 1562 -(__v2df) __C, 1563 (__mmask8) __U); 1564} 1565 1566static __inline__ __m128d __DEFAULT_FN_ATTRS 1567_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1568{ 1569 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A, 1570 (__v2df) __B, 1571 -(__v2df) __C, 1572 (__mmask8) 1573 __U); 1574} 1575 1576static __inline__ __m256d __DEFAULT_FN_ATTRS 1577_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1578{ 1579 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1580 (__v4df) __B, 1581 (__v4df) __C, 1582 (__mmask8) __U); 1583} 1584 1585static __inline__ __m256d __DEFAULT_FN_ATTRS 1586_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1587{ 1588 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A, 1589 (__v4df) __B, 1590 (__v4df) __C, 1591 (__mmask8) 1592 __U); 1593} 1594 1595static __inline__ __m256d __DEFAULT_FN_ATTRS 1596_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1597{ 1598 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1599 (__v4df) __B, 1600 (__v4df) __C, 1601 (__mmask8) 1602 __U); 1603} 1604 1605static __inline__ __m256d __DEFAULT_FN_ATTRS 1606_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1607{ 1608 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A, 1609 (__v4df) __B, 1610 -(__v4df) __C, 1611 (__mmask8) __U); 1612} 1613 1614static __inline__ __m256d __DEFAULT_FN_ATTRS 1615_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1616{ 1617 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A, 1618 (__v4df) __B, 1619 -(__v4df) __C, 1620 (__mmask8) 1621 __U); 1622} 1623 1624static __inline__ __m128 __DEFAULT_FN_ATTRS 1625_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1626{ 1627 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1628 (__v4sf) __B, 1629 (__v4sf) __C, 1630 (__mmask8) __U); 1631} 1632 1633static __inline__ __m128 __DEFAULT_FN_ATTRS 1634_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1635{ 1636 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A, 1637 (__v4sf) __B, 1638 (__v4sf) __C, 1639 (__mmask8) __U); 1640} 1641 1642static __inline__ __m128 __DEFAULT_FN_ATTRS 1643_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1644{ 1645 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1646 (__v4sf) __B, 1647 (__v4sf) __C, 1648 (__mmask8) __U); 1649} 1650 1651static __inline__ __m128 __DEFAULT_FN_ATTRS 1652_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1653{ 1654 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A, 1655 (__v4sf) __B, 1656 -(__v4sf) __C, 1657 (__mmask8) __U); 1658} 1659 1660static __inline__ __m128 __DEFAULT_FN_ATTRS 1661_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1662{ 1663 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A, 1664 (__v4sf) __B, 1665 -(__v4sf) __C, 1666 (__mmask8) __U); 1667} 1668 1669static __inline__ __m256 __DEFAULT_FN_ATTRS 1670_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1671 __m256 __C) 1672{ 1673 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1674 (__v8sf) __B, 1675 (__v8sf) __C, 1676 (__mmask8) __U); 1677} 1678 1679static __inline__ __m256 __DEFAULT_FN_ATTRS 1680_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1681{ 1682 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A, 1683 (__v8sf) __B, 1684 (__v8sf) __C, 1685 (__mmask8) __U); 1686} 1687 1688static __inline__ __m256 __DEFAULT_FN_ATTRS 1689_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1690{ 1691 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1692 (__v8sf) __B, 1693 (__v8sf) __C, 1694 (__mmask8) __U); 1695} 1696 1697static __inline__ __m256 __DEFAULT_FN_ATTRS 1698_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1699{ 1700 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A, 1701 (__v8sf) __B, 1702 -(__v8sf) __C, 1703 (__mmask8) __U); 1704} 1705 1706static __inline__ __m256 __DEFAULT_FN_ATTRS 1707_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1708{ 1709 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A, 1710 (__v8sf) __B, 1711 -(__v8sf) __C, 1712 (__mmask8) __U); 1713} 1714 1715static __inline__ __m128d __DEFAULT_FN_ATTRS 1716_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1717{ 1718 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A, 1719 (__v2df) __B, 1720 (__v2df) __C, 1721 (__mmask8) __U); 1722} 1723 1724static __inline__ __m256d __DEFAULT_FN_ATTRS 1725_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1726{ 1727 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A, 1728 (__v4df) __B, 1729 (__v4df) __C, 1730 (__mmask8) __U); 1731} 1732 1733static __inline__ __m128 __DEFAULT_FN_ATTRS 1734_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1735{ 1736 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A, 1737 (__v4sf) __B, 1738 (__v4sf) __C, 1739 (__mmask8) __U); 1740} 1741 1742static __inline__ __m256 __DEFAULT_FN_ATTRS 1743_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1744{ 1745 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A, 1746 (__v8sf) __B, 1747 (__v8sf) __C, 1748 (__mmask8) __U); 1749} 1750 1751static __inline__ __m128d __DEFAULT_FN_ATTRS 1752_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1753{ 1754 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A, 1755 (__v2df) __B, 1756 (__v2df) __C, 1757 (__mmask8) 1758 __U); 1759} 1760 1761static __inline__ __m256d __DEFAULT_FN_ATTRS 1762_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1763{ 1764 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A, 1765 (__v4df) __B, 1766 (__v4df) __C, 1767 (__mmask8) 1768 __U); 1769} 1770 1771static __inline__ __m128 __DEFAULT_FN_ATTRS 1772_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1773{ 1774 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A, 1775 (__v4sf) __B, 1776 (__v4sf) __C, 1777 (__mmask8) __U); 1778} 1779 1780static __inline__ __m256 __DEFAULT_FN_ATTRS 1781_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1782{ 1783 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A, 1784 (__v8sf) __B, 1785 (__v8sf) __C, 1786 (__mmask8) __U); 1787} 1788 1789static __inline__ __m128d __DEFAULT_FN_ATTRS 1790_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1791{ 1792 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A, 1793 (__v2df) __B, 1794 (__v2df) __C, 1795 (__mmask8) __U); 1796} 1797 1798static __inline__ __m256d __DEFAULT_FN_ATTRS 1799_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1800{ 1801 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A, 1802 (__v4df) __B, 1803 (__v4df) __C, 1804 (__mmask8) __U); 1805} 1806 1807static __inline__ __m128 __DEFAULT_FN_ATTRS 1808_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1809{ 1810 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A, 1811 (__v4sf) __B, 1812 (__v4sf) __C, 1813 (__mmask8) __U); 1814} 1815 1816static __inline__ __m256 __DEFAULT_FN_ATTRS 1817_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1818{ 1819 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A, 1820 (__v8sf) __B, 1821 (__v8sf) __C, 1822 (__mmask8) __U); 1823} 1824 1825static __inline__ __m128d __DEFAULT_FN_ATTRS 1826_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1827{ 1828 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A, 1829 (__v2df) __B, 1830 (__v2df) __C, 1831 (__mmask8) __U); 1832} 1833 1834static __inline__ __m128d __DEFAULT_FN_ATTRS 1835_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1836{ 1837 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A, 1838 (__v2df) __B, 1839 (__v2df) __C, 1840 (__mmask8) __U); 1841} 1842 1843static __inline__ __m256d __DEFAULT_FN_ATTRS 1844_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1845{ 1846 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A, 1847 (__v4df) __B, 1848 (__v4df) __C, 1849 (__mmask8) __U); 1850} 1851 1852static __inline__ __m256d __DEFAULT_FN_ATTRS 1853_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1854{ 1855 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A, 1856 (__v4df) __B, 1857 (__v4df) __C, 1858 (__mmask8) __U); 1859} 1860 1861static __inline__ __m128 __DEFAULT_FN_ATTRS 1862_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1863{ 1864 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A, 1865 (__v4sf) __B, 1866 (__v4sf) __C, 1867 (__mmask8) __U); 1868} 1869 1870static __inline__ __m128 __DEFAULT_FN_ATTRS 1871_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1872{ 1873 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A, 1874 (__v4sf) __B, 1875 (__v4sf) __C, 1876 (__mmask8) __U); 1877} 1878 1879static __inline__ __m256 __DEFAULT_FN_ATTRS 1880_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1881{ 1882 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A, 1883 (__v8sf) __B, 1884 (__v8sf) __C, 1885 (__mmask8) __U); 1886} 1887 1888static __inline__ __m256 __DEFAULT_FN_ATTRS 1889_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1890{ 1891 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A, 1892 (__v8sf) __B, 1893 (__v8sf) __C, 1894 (__mmask8) __U); 1895} 1896 1897static __inline__ __m128d __DEFAULT_FN_ATTRS 1898_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1899 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A, 1900 (__v2df) __B, 1901 (__v2df) __W, 1902 (__mmask8) __U); 1903} 1904 1905static __inline__ __m128d __DEFAULT_FN_ATTRS 1906_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B) { 1907 return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A, 1908 (__v2df) __B, 1909 (__v2df) 1910 _mm_setzero_pd (), 1911 (__mmask8) __U); 1912} 1913 1914static __inline__ __m256d __DEFAULT_FN_ATTRS 1915_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1916 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A, 1917 (__v4df) __B, 1918 (__v4df) __W, 1919 (__mmask8) __U); 1920} 1921 1922static __inline__ __m256d __DEFAULT_FN_ATTRS 1923_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B) { 1924 return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A, 1925 (__v4df) __B, 1926 (__v4df) 1927 _mm256_setzero_pd (), 1928 (__mmask8) __U); 1929} 1930 1931static __inline__ __m128 __DEFAULT_FN_ATTRS 1932_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) { 1933 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A, 1934 (__v4sf) __B, 1935 (__v4sf) __W, 1936 (__mmask8) __U); 1937} 1938 1939static __inline__ __m128 __DEFAULT_FN_ATTRS 1940_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B) { 1941 return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A, 1942 (__v4sf) __B, 1943 (__v4sf) 1944 _mm_setzero_ps (), 1945 (__mmask8) __U); 1946} 1947 1948static __inline__ __m256 __DEFAULT_FN_ATTRS 1949_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) { 1950 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A, 1951 (__v8sf) __B, 1952 (__v8sf) __W, 1953 (__mmask8) __U); 1954} 1955 1956static __inline__ __m256 __DEFAULT_FN_ATTRS 1957_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) { 1958 return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A, 1959 (__v8sf) __B, 1960 (__v8sf) 1961 _mm256_setzero_ps (), 1962 (__mmask8) __U); 1963} 1964 1965static __inline__ __m128i __DEFAULT_FN_ATTRS 1966_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1967 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1968 (__v4si) __W, 1969 (__v4si) __A); 1970} 1971 1972static __inline__ __m256i __DEFAULT_FN_ATTRS 1973_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1974 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1975 (__v8si) __W, 1976 (__v8si) __A); 1977} 1978 1979static __inline__ __m128d __DEFAULT_FN_ATTRS 1980_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1981 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1982 (__v2df) __W, 1983 (__v2df) __A); 1984} 1985 1986static __inline__ __m256d __DEFAULT_FN_ATTRS 1987_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1988 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1989 (__v4df) __W, 1990 (__v4df) __A); 1991} 1992 1993static __inline__ __m128 __DEFAULT_FN_ATTRS 1994_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1995 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1996 (__v4sf) __W, 1997 (__v4sf) __A); 1998} 1999 2000static __inline__ __m256 __DEFAULT_FN_ATTRS 2001_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 2002 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 2003 (__v8sf) __W, 2004 (__v8sf) __A); 2005} 2006 2007static __inline__ __m128i __DEFAULT_FN_ATTRS 2008_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 2009 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 2010 (__v2di) __W, 2011 (__v2di) __A); 2012} 2013 2014static __inline__ __m256i __DEFAULT_FN_ATTRS 2015_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 2016 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 2017 (__v4di) __W, 2018 (__v4di) __A); 2019} 2020 2021static __inline__ __m128d __DEFAULT_FN_ATTRS 2022_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2023 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 2024 (__v2df) __W, 2025 (__mmask8) __U); 2026} 2027 2028static __inline__ __m128d __DEFAULT_FN_ATTRS 2029_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 2030 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 2031 (__v2df) 2032 _mm_setzero_pd (), 2033 (__mmask8) __U); 2034} 2035 2036static __inline__ __m256d __DEFAULT_FN_ATTRS 2037_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2038 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 2039 (__v4df) __W, 2040 (__mmask8) __U); 2041} 2042 2043static __inline__ __m256d __DEFAULT_FN_ATTRS 2044_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 2045 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 2046 (__v4df) 2047 _mm256_setzero_pd (), 2048 (__mmask8) __U); 2049} 2050 2051static __inline__ __m128i __DEFAULT_FN_ATTRS 2052_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2053 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 2054 (__v2di) __W, 2055 (__mmask8) __U); 2056} 2057 2058static __inline__ __m128i __DEFAULT_FN_ATTRS 2059_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 2060 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 2061 (__v2di) 2062 _mm_setzero_si128 (), 2063 (__mmask8) __U); 2064} 2065 2066static __inline__ __m256i __DEFAULT_FN_ATTRS 2067_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2068 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2069 (__v4di) __W, 2070 (__mmask8) __U); 2071} 2072 2073static __inline__ __m256i __DEFAULT_FN_ATTRS 2074_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 2075 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 2076 (__v4di) 2077 _mm256_setzero_si256 (), 2078 (__mmask8) __U); 2079} 2080 2081static __inline__ __m128 __DEFAULT_FN_ATTRS 2082_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2083 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2084 (__v4sf) __W, 2085 (__mmask8) __U); 2086} 2087 2088static __inline__ __m128 __DEFAULT_FN_ATTRS 2089_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 2090 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 2091 (__v4sf) 2092 _mm_setzero_ps (), 2093 (__mmask8) __U); 2094} 2095 2096static __inline__ __m256 __DEFAULT_FN_ATTRS 2097_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2098 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2099 (__v8sf) __W, 2100 (__mmask8) __U); 2101} 2102 2103static __inline__ __m256 __DEFAULT_FN_ATTRS 2104_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 2105 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 2106 (__v8sf) 2107 _mm256_setzero_ps (), 2108 (__mmask8) __U); 2109} 2110 2111static __inline__ __m128i __DEFAULT_FN_ATTRS 2112_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2113 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2114 (__v4si) __W, 2115 (__mmask8) __U); 2116} 2117 2118static __inline__ __m128i __DEFAULT_FN_ATTRS 2119_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 2120 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 2121 (__v4si) 2122 _mm_setzero_si128 (), 2123 (__mmask8) __U); 2124} 2125 2126static __inline__ __m256i __DEFAULT_FN_ATTRS 2127_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2128 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2129 (__v8si) __W, 2130 (__mmask8) __U); 2131} 2132 2133static __inline__ __m256i __DEFAULT_FN_ATTRS 2134_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 2135 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 2136 (__v8si) 2137 _mm256_setzero_si256 (), 2138 (__mmask8) __U); 2139} 2140 2141static __inline__ void __DEFAULT_FN_ATTRS 2142_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 2143 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 2144 (__v2df) __A, 2145 (__mmask8) __U); 2146} 2147 2148static __inline__ void __DEFAULT_FN_ATTRS 2149_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 2150 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 2151 (__v4df) __A, 2152 (__mmask8) __U); 2153} 2154 2155static __inline__ void __DEFAULT_FN_ATTRS 2156_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 2157 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 2158 (__v2di) __A, 2159 (__mmask8) __U); 2160} 2161 2162static __inline__ void __DEFAULT_FN_ATTRS 2163_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 2164 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 2165 (__v4di) __A, 2166 (__mmask8) __U); 2167} 2168 2169static __inline__ void __DEFAULT_FN_ATTRS 2170_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 2171 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 2172 (__v4sf) __A, 2173 (__mmask8) __U); 2174} 2175 2176static __inline__ void __DEFAULT_FN_ATTRS 2177_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 2178 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 2179 (__v8sf) __A, 2180 (__mmask8) __U); 2181} 2182 2183static __inline__ void __DEFAULT_FN_ATTRS 2184_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 2185 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 2186 (__v4si) __A, 2187 (__mmask8) __U); 2188} 2189 2190static __inline__ void __DEFAULT_FN_ATTRS 2191_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 2192 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 2193 (__v8si) __A, 2194 (__mmask8) __U); 2195} 2196 2197static __inline__ __m128d __DEFAULT_FN_ATTRS 2198_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2199 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A, 2200 (__v2df) __W, 2201 (__mmask8) __U); 2202} 2203 2204static __inline__ __m128d __DEFAULT_FN_ATTRS 2205_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2206 return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A, 2207 (__v2df) 2208 _mm_setzero_pd (), 2209 (__mmask8) __U); 2210} 2211 2212static __inline__ __m256d __DEFAULT_FN_ATTRS 2213_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2214 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A, 2215 (__v4df) __W, 2216 (__mmask8) __U); 2217} 2218 2219static __inline__ __m256d __DEFAULT_FN_ATTRS 2220_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 2221 return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A, 2222 (__v4df) 2223 _mm256_setzero_pd (), 2224 (__mmask8) __U); 2225} 2226 2227static __inline__ __m128 __DEFAULT_FN_ATTRS 2228_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2229 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2230 (__v4sf) __W, 2231 (__mmask8) __U); 2232} 2233 2234static __inline__ __m128 __DEFAULT_FN_ATTRS 2235_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { 2236 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, 2237 (__v4sf) 2238 _mm_setzero_ps (), 2239 (__mmask8) __U); 2240} 2241 2242static __inline__ __m256 __DEFAULT_FN_ATTRS 2243_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2244 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2245 (__v8sf) __W, 2246 (__mmask8) __U); 2247} 2248 2249static __inline__ __m256 __DEFAULT_FN_ATTRS 2250_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { 2251 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, 2252 (__v8sf) 2253 _mm256_setzero_ps (), 2254 (__mmask8) __U); 2255} 2256 2257static __inline__ __m128i __DEFAULT_FN_ATTRS 2258_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2259 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2260 (__v4si) __W, 2261 (__mmask8) __U); 2262} 2263 2264static __inline__ __m128i __DEFAULT_FN_ATTRS 2265_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 2266 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 2267 (__v4si) 2268 _mm_setzero_si128 (), 2269 (__mmask8) __U); 2270} 2271 2272static __inline__ __m128i __DEFAULT_FN_ATTRS 2273_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2274 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2275 (__v4si) __W, 2276 (__mmask8) __U); 2277} 2278 2279static __inline__ __m128i __DEFAULT_FN_ATTRS 2280_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 2281 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A, 2282 (__v4si) 2283 _mm_setzero_si128 (), 2284 (__mmask8) __U); 2285} 2286 2287static __inline__ __m128 __DEFAULT_FN_ATTRS 2288_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 2289 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2290 (__v4sf) __W, 2291 (__mmask8) __U); 2292} 2293 2294static __inline__ __m128 __DEFAULT_FN_ATTRS 2295_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 2296 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2297 (__v4sf) 2298 _mm_setzero_ps (), 2299 (__mmask8) __U); 2300} 2301 2302static __inline__ __m128 __DEFAULT_FN_ATTRS 2303_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2304 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2305 (__v4sf) __W, 2306 (__mmask8) __U); 2307} 2308 2309static __inline__ __m128 __DEFAULT_FN_ATTRS 2310_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2311 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A, 2312 (__v4sf) 2313 _mm_setzero_ps (), 2314 (__mmask8) __U); 2315} 2316 2317static __inline__ __m128i __DEFAULT_FN_ATTRS 2318_mm_cvtpd_epu32 (__m128d __A) { 2319 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2320 (__v4si) 2321 _mm_setzero_si128 (), 2322 (__mmask8) -1); 2323} 2324 2325static __inline__ __m128i __DEFAULT_FN_ATTRS 2326_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2327 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2328 (__v4si) __W, 2329 (__mmask8) __U); 2330} 2331 2332static __inline__ __m128i __DEFAULT_FN_ATTRS 2333_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2334 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2335 (__v4si) 2336 _mm_setzero_si128 (), 2337 (__mmask8) __U); 2338} 2339 2340static __inline__ __m128i __DEFAULT_FN_ATTRS 2341_mm256_cvtpd_epu32 (__m256d __A) { 2342 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2343 (__v4si) 2344 _mm_setzero_si128 (), 2345 (__mmask8) -1); 2346} 2347 2348static __inline__ __m128i __DEFAULT_FN_ATTRS 2349_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2350 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2351 (__v4si) __W, 2352 (__mmask8) __U); 2353} 2354 2355static __inline__ __m128i __DEFAULT_FN_ATTRS 2356_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2357 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2358 (__v4si) 2359 _mm_setzero_si128 (), 2360 (__mmask8) __U); 2361} 2362 2363static __inline__ __m128i __DEFAULT_FN_ATTRS 2364_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2365 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2366 (__v4si) __W, 2367 (__mmask8) __U); 2368} 2369 2370static __inline__ __m128i __DEFAULT_FN_ATTRS 2371_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2372 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A, 2373 (__v4si) 2374 _mm_setzero_si128 (), 2375 (__mmask8) __U); 2376} 2377 2378static __inline__ __m256i __DEFAULT_FN_ATTRS 2379_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2380 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2381 (__v8si) __W, 2382 (__mmask8) __U); 2383} 2384 2385static __inline__ __m256i __DEFAULT_FN_ATTRS 2386_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2387 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A, 2388 (__v8si) 2389 _mm256_setzero_si256 (), 2390 (__mmask8) __U); 2391} 2392 2393static __inline__ __m128d __DEFAULT_FN_ATTRS 2394_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2395 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2396 (__v2df) __W, 2397 (__mmask8) __U); 2398} 2399 2400static __inline__ __m128d __DEFAULT_FN_ATTRS 2401_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2402 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A, 2403 (__v2df) 2404 _mm_setzero_pd (), 2405 (__mmask8) __U); 2406} 2407 2408static __inline__ __m256d __DEFAULT_FN_ATTRS 2409_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2410 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2411 (__v4df) __W, 2412 (__mmask8) __U); 2413} 2414 2415static __inline__ __m256d __DEFAULT_FN_ATTRS 2416_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2417 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A, 2418 (__v4df) 2419 _mm256_setzero_pd (), 2420 (__mmask8) __U); 2421} 2422 2423static __inline__ __m128i __DEFAULT_FN_ATTRS 2424_mm_cvtps_epu32 (__m128 __A) { 2425 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2426 (__v4si) 2427 _mm_setzero_si128 (), 2428 (__mmask8) -1); 2429} 2430 2431static __inline__ __m128i __DEFAULT_FN_ATTRS 2432_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2433 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2434 (__v4si) __W, 2435 (__mmask8) __U); 2436} 2437 2438static __inline__ __m128i __DEFAULT_FN_ATTRS 2439_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2440 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2441 (__v4si) 2442 _mm_setzero_si128 (), 2443 (__mmask8) __U); 2444} 2445 2446static __inline__ __m256i __DEFAULT_FN_ATTRS 2447_mm256_cvtps_epu32 (__m256 __A) { 2448 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2449 (__v8si) 2450 _mm256_setzero_si256 (), 2451 (__mmask8) -1); 2452} 2453 2454static __inline__ __m256i __DEFAULT_FN_ATTRS 2455_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2456 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2457 (__v8si) __W, 2458 (__mmask8) __U); 2459} 2460 2461static __inline__ __m256i __DEFAULT_FN_ATTRS 2462_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2463 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2464 (__v8si) 2465 _mm256_setzero_si256 (), 2466 (__mmask8) __U); 2467} 2468 2469static __inline__ __m128i __DEFAULT_FN_ATTRS 2470_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2471 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2472 (__v4si) __W, 2473 (__mmask8) __U); 2474} 2475 2476static __inline__ __m128i __DEFAULT_FN_ATTRS 2477_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2478 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2479 (__v4si) 2480 _mm_setzero_si128 (), 2481 (__mmask8) __U); 2482} 2483 2484static __inline__ __m128i __DEFAULT_FN_ATTRS 2485_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2486 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2487 (__v4si) __W, 2488 (__mmask8) __U); 2489} 2490 2491static __inline__ __m128i __DEFAULT_FN_ATTRS 2492_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2493 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A, 2494 (__v4si) 2495 _mm_setzero_si128 (), 2496 (__mmask8) __U); 2497} 2498 2499static __inline__ __m128i __DEFAULT_FN_ATTRS 2500_mm_cvttpd_epu32 (__m128d __A) { 2501 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2502 (__v4si) 2503 _mm_setzero_si128 (), 2504 (__mmask8) -1); 2505} 2506 2507static __inline__ __m128i __DEFAULT_FN_ATTRS 2508_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2509 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2510 (__v4si) __W, 2511 (__mmask8) __U); 2512} 2513 2514static __inline__ __m128i __DEFAULT_FN_ATTRS 2515_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2516 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2517 (__v4si) 2518 _mm_setzero_si128 (), 2519 (__mmask8) __U); 2520} 2521 2522static __inline__ __m128i __DEFAULT_FN_ATTRS 2523_mm256_cvttpd_epu32 (__m256d __A) { 2524 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2525 (__v4si) 2526 _mm_setzero_si128 (), 2527 (__mmask8) -1); 2528} 2529 2530static __inline__ __m128i __DEFAULT_FN_ATTRS 2531_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2532 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2533 (__v4si) __W, 2534 (__mmask8) __U); 2535} 2536 2537static __inline__ __m128i __DEFAULT_FN_ATTRS 2538_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2539 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2540 (__v4si) 2541 _mm_setzero_si128 (), 2542 (__mmask8) __U); 2543} 2544 2545static __inline__ __m128i __DEFAULT_FN_ATTRS 2546_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2547 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2548 (__v4si) __W, 2549 (__mmask8) __U); 2550} 2551 2552static __inline__ __m128i __DEFAULT_FN_ATTRS 2553_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2554 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A, 2555 (__v4si) 2556 _mm_setzero_si128 (), 2557 (__mmask8) __U); 2558} 2559 2560static __inline__ __m256i __DEFAULT_FN_ATTRS 2561_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2562 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2563 (__v8si) __W, 2564 (__mmask8) __U); 2565} 2566 2567static __inline__ __m256i __DEFAULT_FN_ATTRS 2568_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2569 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A, 2570 (__v8si) 2571 _mm256_setzero_si256 (), 2572 (__mmask8) __U); 2573} 2574 2575static __inline__ __m128i __DEFAULT_FN_ATTRS 2576_mm_cvttps_epu32 (__m128 __A) { 2577 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2578 (__v4si) 2579 _mm_setzero_si128 (), 2580 (__mmask8) -1); 2581} 2582 2583static __inline__ __m128i __DEFAULT_FN_ATTRS 2584_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2585 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2586 (__v4si) __W, 2587 (__mmask8) __U); 2588} 2589 2590static __inline__ __m128i __DEFAULT_FN_ATTRS 2591_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2592 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2593 (__v4si) 2594 _mm_setzero_si128 (), 2595 (__mmask8) __U); 2596} 2597 2598static __inline__ __m256i __DEFAULT_FN_ATTRS 2599_mm256_cvttps_epu32 (__m256 __A) { 2600 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2601 (__v8si) 2602 _mm256_setzero_si256 (), 2603 (__mmask8) -1); 2604} 2605 2606static __inline__ __m256i __DEFAULT_FN_ATTRS 2607_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2608 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2609 (__v8si) __W, 2610 (__mmask8) __U); 2611} 2612 2613static __inline__ __m256i __DEFAULT_FN_ATTRS 2614_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2615 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2616 (__v8si) 2617 _mm256_setzero_si256 (), 2618 (__mmask8) __U); 2619} 2620 2621static __inline__ __m128d __DEFAULT_FN_ATTRS 2622_mm_cvtepu32_pd (__m128i __A) { 2623 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 2624 (__v2df) 2625 _mm_setzero_pd (), 2626 (__mmask8) -1); 2627} 2628 2629static __inline__ __m128d __DEFAULT_FN_ATTRS 2630_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2631 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 2632 (__v2df) __W, 2633 (__mmask8) __U); 2634} 2635 2636static __inline__ __m128d __DEFAULT_FN_ATTRS 2637_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2638 return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A, 2639 (__v2df) 2640 _mm_setzero_pd (), 2641 (__mmask8) __U); 2642} 2643 2644static __inline__ __m256d __DEFAULT_FN_ATTRS 2645_mm256_cvtepu32_pd (__m128i __A) { 2646 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 2647 (__v4df) 2648 _mm256_setzero_pd (), 2649 (__mmask8) -1); 2650} 2651 2652static __inline__ __m256d __DEFAULT_FN_ATTRS 2653_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2654 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 2655 (__v4df) __W, 2656 (__mmask8) __U); 2657} 2658 2659static __inline__ __m256d __DEFAULT_FN_ATTRS 2660_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2661 return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A, 2662 (__v4df) 2663 _mm256_setzero_pd (), 2664 (__mmask8) __U); 2665} 2666 2667static __inline__ __m128 __DEFAULT_FN_ATTRS 2668_mm_cvtepu32_ps (__m128i __A) { 2669 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2670 (__v4sf) 2671 _mm_setzero_ps (), 2672 (__mmask8) -1); 2673} 2674 2675static __inline__ __m128 __DEFAULT_FN_ATTRS 2676_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2677 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2678 (__v4sf) __W, 2679 (__mmask8) __U); 2680} 2681 2682static __inline__ __m128 __DEFAULT_FN_ATTRS 2683_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2684 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A, 2685 (__v4sf) 2686 _mm_setzero_ps (), 2687 (__mmask8) __U); 2688} 2689 2690static __inline__ __m256 __DEFAULT_FN_ATTRS 2691_mm256_cvtepu32_ps (__m256i __A) { 2692 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2693 (__v8sf) 2694 _mm256_setzero_ps (), 2695 (__mmask8) -1); 2696} 2697 2698static __inline__ __m256 __DEFAULT_FN_ATTRS 2699_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2700 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2701 (__v8sf) __W, 2702 (__mmask8) __U); 2703} 2704 2705static __inline__ __m256 __DEFAULT_FN_ATTRS 2706_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2707 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A, 2708 (__v8sf) 2709 _mm256_setzero_ps (), 2710 (__mmask8) __U); 2711} 2712 2713static __inline__ __m128d __DEFAULT_FN_ATTRS 2714_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2715 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A, 2716 (__v2df) __B, 2717 (__v2df) __W, 2718 (__mmask8) __U); 2719} 2720 2721static __inline__ __m128d __DEFAULT_FN_ATTRS 2722_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B) { 2723 return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A, 2724 (__v2df) __B, 2725 (__v2df) 2726 _mm_setzero_pd (), 2727 (__mmask8) __U); 2728} 2729 2730static __inline__ __m256d __DEFAULT_FN_ATTRS 2731_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A, 2732 __m256d __B) { 2733 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A, 2734 (__v4df) __B, 2735 (__v4df) __W, 2736 (__mmask8) __U); 2737} 2738 2739static __inline__ __m256d __DEFAULT_FN_ATTRS 2740_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B) { 2741 return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A, 2742 (__v4df) __B, 2743 (__v4df) 2744 _mm256_setzero_pd (), 2745 (__mmask8) __U); 2746} 2747 2748static __inline__ __m128 __DEFAULT_FN_ATTRS 2749_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2750 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A, 2751 (__v4sf) __B, 2752 (__v4sf) __W, 2753 (__mmask8) __U); 2754} 2755 2756static __inline__ __m128 __DEFAULT_FN_ATTRS 2757_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B) { 2758 return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A, 2759 (__v4sf) __B, 2760 (__v4sf) 2761 _mm_setzero_ps (), 2762 (__mmask8) __U); 2763} 2764 2765static __inline__ __m256 __DEFAULT_FN_ATTRS 2766_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2767 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A, 2768 (__v8sf) __B, 2769 (__v8sf) __W, 2770 (__mmask8) __U); 2771} 2772 2773static __inline__ __m256 __DEFAULT_FN_ATTRS 2774_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B) { 2775 return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A, 2776 (__v8sf) __B, 2777 (__v8sf) 2778 _mm256_setzero_ps (), 2779 (__mmask8) __U); 2780} 2781 2782static __inline__ __m128d __DEFAULT_FN_ATTRS 2783_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2784 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2785 (__v2df) __W, 2786 (__mmask8) __U); 2787} 2788 2789static __inline__ __m128d __DEFAULT_FN_ATTRS 2790_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2791 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2792 (__v2df) 2793 _mm_setzero_pd (), 2794 (__mmask8) __U); 2795} 2796 2797static __inline__ __m256d __DEFAULT_FN_ATTRS 2798_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2799 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2800 (__v4df) __W, 2801 (__mmask8) __U); 2802} 2803 2804static __inline__ __m256d __DEFAULT_FN_ATTRS 2805_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2806 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2807 (__v4df) 2808 _mm256_setzero_pd (), 2809 (__mmask8) __U); 2810} 2811 2812static __inline__ __m128i __DEFAULT_FN_ATTRS 2813_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2814 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2815 (__v2di) __W, 2816 (__mmask8) __U); 2817} 2818 2819static __inline__ __m128i __DEFAULT_FN_ATTRS 2820_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2821 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2822 (__v2di) 2823 _mm_setzero_si128 (), 2824 (__mmask8) __U); 2825} 2826 2827static __inline__ __m256i __DEFAULT_FN_ATTRS 2828_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2829 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2830 (__v4di) __W, 2831 (__mmask8) __U); 2832} 2833 2834static __inline__ __m256i __DEFAULT_FN_ATTRS 2835_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2836 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2837 (__v4di) 2838 _mm256_setzero_si256 (), 2839 (__mmask8) __U); 2840} 2841 2842static __inline__ __m128d __DEFAULT_FN_ATTRS 2843_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2844 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2845 (__v2df) __W, 2846 (__mmask8) 2847 __U); 2848} 2849 2850static __inline__ __m128d __DEFAULT_FN_ATTRS 2851_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2852 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2853 (__v2df) 2854 _mm_setzero_pd (), 2855 (__mmask8) 2856 __U); 2857} 2858 2859static __inline__ __m256d __DEFAULT_FN_ATTRS 2860_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2861 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2862 (__v4df) __W, 2863 (__mmask8) 2864 __U); 2865} 2866 2867static __inline__ __m256d __DEFAULT_FN_ATTRS 2868_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2869 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2870 (__v4df) 2871 _mm256_setzero_pd (), 2872 (__mmask8) 2873 __U); 2874} 2875 2876static __inline__ __m128i __DEFAULT_FN_ATTRS 2877_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2878 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2879 (__v2di) __W, 2880 (__mmask8) 2881 __U); 2882} 2883 2884static __inline__ __m128i __DEFAULT_FN_ATTRS 2885_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2886 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2887 (__v2di) 2888 _mm_setzero_si128 (), 2889 (__mmask8) 2890 __U); 2891} 2892 2893static __inline__ __m256i __DEFAULT_FN_ATTRS 2894_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2895 void const *__P) { 2896 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2897 (__v4di) __W, 2898 (__mmask8) 2899 __U); 2900} 2901 2902static __inline__ __m256i __DEFAULT_FN_ATTRS 2903_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2904 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2905 (__v4di) 2906 _mm256_setzero_si256 (), 2907 (__mmask8) 2908 __U); 2909} 2910 2911static __inline__ __m128 __DEFAULT_FN_ATTRS 2912_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2913 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2914 (__v4sf) __W, 2915 (__mmask8) __U); 2916} 2917 2918static __inline__ __m128 __DEFAULT_FN_ATTRS 2919_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2920 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2921 (__v4sf) 2922 _mm_setzero_ps (), 2923 (__mmask8) 2924 __U); 2925} 2926 2927static __inline__ __m256 __DEFAULT_FN_ATTRS 2928_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2929 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2930 (__v8sf) __W, 2931 (__mmask8) __U); 2932} 2933 2934static __inline__ __m256 __DEFAULT_FN_ATTRS 2935_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2936 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2937 (__v8sf) 2938 _mm256_setzero_ps (), 2939 (__mmask8) 2940 __U); 2941} 2942 2943static __inline__ __m128i __DEFAULT_FN_ATTRS 2944_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2945 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2946 (__v4si) __W, 2947 (__mmask8) 2948 __U); 2949} 2950 2951static __inline__ __m128i __DEFAULT_FN_ATTRS 2952_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2953 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2954 (__v4si) 2955 _mm_setzero_si128 (), 2956 (__mmask8) __U); 2957} 2958 2959static __inline__ __m256i __DEFAULT_FN_ATTRS 2960_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2961 void const *__P) { 2962 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2963 (__v8si) __W, 2964 (__mmask8) 2965 __U); 2966} 2967 2968static __inline__ __m256i __DEFAULT_FN_ATTRS 2969_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2970 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2971 (__v8si) 2972 _mm256_setzero_si256 (), 2973 (__mmask8) 2974 __U); 2975} 2976 2977static __inline__ __m128 __DEFAULT_FN_ATTRS 2978_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2979 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2980 (__v4sf) __W, 2981 (__mmask8) __U); 2982} 2983 2984static __inline__ __m128 __DEFAULT_FN_ATTRS 2985_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2986 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2987 (__v4sf) 2988 _mm_setzero_ps (), 2989 (__mmask8) __U); 2990} 2991 2992static __inline__ __m256 __DEFAULT_FN_ATTRS 2993_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2994 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2995 (__v8sf) __W, 2996 (__mmask8) __U); 2997} 2998 2999static __inline__ __m256 __DEFAULT_FN_ATTRS 3000_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 3001 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 3002 (__v8sf) 3003 _mm256_setzero_ps (), 3004 (__mmask8) __U); 3005} 3006 3007static __inline__ __m128i __DEFAULT_FN_ATTRS 3008_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 3009 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 3010 (__v4si) __W, 3011 (__mmask8) __U); 3012} 3013 3014static __inline__ __m128i __DEFAULT_FN_ATTRS 3015_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 3016 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 3017 (__v4si) 3018 _mm_setzero_si128 (), 3019 (__mmask8) __U); 3020} 3021 3022static __inline__ __m256i __DEFAULT_FN_ATTRS 3023_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 3024 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 3025 (__v8si) __W, 3026 (__mmask8) __U); 3027} 3028 3029static __inline__ __m256i __DEFAULT_FN_ATTRS 3030_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 3031 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 3032 (__v8si) 3033 _mm256_setzero_si256 (), 3034 (__mmask8) __U); 3035} 3036 3037static __inline__ __m128d __DEFAULT_FN_ATTRS 3038_mm_getexp_pd (__m128d __A) { 3039 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 3040 (__v2df) 3041 _mm_setzero_pd (), 3042 (__mmask8) -1); 3043} 3044 3045static __inline__ __m128d __DEFAULT_FN_ATTRS 3046_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 3047 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 3048 (__v2df) __W, 3049 (__mmask8) __U); 3050} 3051 3052static __inline__ __m128d __DEFAULT_FN_ATTRS 3053_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 3054 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 3055 (__v2df) 3056 _mm_setzero_pd (), 3057 (__mmask8) __U); 3058} 3059 3060static __inline__ __m256d __DEFAULT_FN_ATTRS 3061_mm256_getexp_pd (__m256d __A) { 3062 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3063 (__v4df) 3064 _mm256_setzero_pd (), 3065 (__mmask8) -1); 3066} 3067 3068static __inline__ __m256d __DEFAULT_FN_ATTRS 3069_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 3070 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3071 (__v4df) __W, 3072 (__mmask8) __U); 3073} 3074 3075static __inline__ __m256d __DEFAULT_FN_ATTRS 3076_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 3077 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 3078 (__v4df) 3079 _mm256_setzero_pd (), 3080 (__mmask8) __U); 3081} 3082 3083static __inline__ __m128 __DEFAULT_FN_ATTRS 3084_mm_getexp_ps (__m128 __A) { 3085 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3086 (__v4sf) 3087 _mm_setzero_ps (), 3088 (__mmask8) -1); 3089} 3090 3091static __inline__ __m128 __DEFAULT_FN_ATTRS 3092_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 3093 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3094 (__v4sf) __W, 3095 (__mmask8) __U); 3096} 3097 3098static __inline__ __m128 __DEFAULT_FN_ATTRS 3099_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 3100 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 3101 (__v4sf) 3102 _mm_setzero_ps (), 3103 (__mmask8) __U); 3104} 3105 3106static __inline__ __m256 __DEFAULT_FN_ATTRS 3107_mm256_getexp_ps (__m256 __A) { 3108 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3109 (__v8sf) 3110 _mm256_setzero_ps (), 3111 (__mmask8) -1); 3112} 3113 3114static __inline__ __m256 __DEFAULT_FN_ATTRS 3115_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 3116 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3117 (__v8sf) __W, 3118 (__mmask8) __U); 3119} 3120 3121static __inline__ __m256 __DEFAULT_FN_ATTRS 3122_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 3123 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 3124 (__v8sf) 3125 _mm256_setzero_ps (), 3126 (__mmask8) __U); 3127} 3128 3129static __inline__ __m128d __DEFAULT_FN_ATTRS 3130_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3131 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A, 3132 (__v2df) __B, 3133 (__v2df) __W, 3134 (__mmask8) __U); 3135} 3136 3137static __inline__ __m128d __DEFAULT_FN_ATTRS 3138_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3139 return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A, 3140 (__v2df) __B, 3141 (__v2df) 3142 _mm_setzero_pd (), 3143 (__mmask8) __U); 3144} 3145 3146static __inline__ __m256d __DEFAULT_FN_ATTRS 3147_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A, 3148 __m256d __B) { 3149 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A, 3150 (__v4df) __B, 3151 (__v4df) __W, 3152 (__mmask8) __U); 3153} 3154 3155static __inline__ __m256d __DEFAULT_FN_ATTRS 3156_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3157 return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A, 3158 (__v4df) __B, 3159 (__v4df) 3160 _mm256_setzero_pd (), 3161 (__mmask8) __U); 3162} 3163 3164static __inline__ __m128 __DEFAULT_FN_ATTRS 3165_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3166 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A, 3167 (__v4sf) __B, 3168 (__v4sf) __W, 3169 (__mmask8) __U); 3170} 3171 3172static __inline__ __m128 __DEFAULT_FN_ATTRS 3173_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3174 return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A, 3175 (__v4sf) __B, 3176 (__v4sf) 3177 _mm_setzero_ps (), 3178 (__mmask8) __U); 3179} 3180 3181static __inline__ __m256 __DEFAULT_FN_ATTRS 3182_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3183 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A, 3184 (__v8sf) __B, 3185 (__v8sf) __W, 3186 (__mmask8) __U); 3187} 3188 3189static __inline__ __m256 __DEFAULT_FN_ATTRS 3190_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3191 return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A, 3192 (__v8sf) __B, 3193 (__v8sf) 3194 _mm256_setzero_ps (), 3195 (__mmask8) __U); 3196} 3197 3198static __inline__ __m128d __DEFAULT_FN_ATTRS 3199_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3200 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A, 3201 (__v2df) __B, 3202 (__v2df) __W, 3203 (__mmask8) __U); 3204} 3205 3206static __inline__ __m128d __DEFAULT_FN_ATTRS 3207_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3208 return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A, 3209 (__v2df) __B, 3210 (__v2df) 3211 _mm_setzero_pd (), 3212 (__mmask8) __U); 3213} 3214 3215static __inline__ __m256d __DEFAULT_FN_ATTRS 3216_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A, 3217 __m256d __B) { 3218 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A, 3219 (__v4df) __B, 3220 (__v4df) __W, 3221 (__mmask8) __U); 3222} 3223 3224static __inline__ __m256d __DEFAULT_FN_ATTRS 3225_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3226 return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A, 3227 (__v4df) __B, 3228 (__v4df) 3229 _mm256_setzero_pd (), 3230 (__mmask8) __U); 3231} 3232 3233static __inline__ __m128 __DEFAULT_FN_ATTRS 3234_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3235 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A, 3236 (__v4sf) __B, 3237 (__v4sf) __W, 3238 (__mmask8) __U); 3239} 3240 3241static __inline__ __m128 __DEFAULT_FN_ATTRS 3242_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3243 return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A, 3244 (__v4sf) __B, 3245 (__v4sf) 3246 _mm_setzero_ps (), 3247 (__mmask8) __U); 3248} 3249 3250static __inline__ __m256 __DEFAULT_FN_ATTRS 3251_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3252 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A, 3253 (__v8sf) __B, 3254 (__v8sf) __W, 3255 (__mmask8) __U); 3256} 3257 3258static __inline__ __m256 __DEFAULT_FN_ATTRS 3259_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3260 return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A, 3261 (__v8sf) __B, 3262 (__v8sf) 3263 _mm256_setzero_ps (), 3264 (__mmask8) __U); 3265} 3266 3267static __inline__ __m128d __DEFAULT_FN_ATTRS 3268_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3269 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A, 3270 (__v2df) __B, 3271 (__v2df) __W, 3272 (__mmask8) __U); 3273} 3274 3275static __inline__ __m128d __DEFAULT_FN_ATTRS 3276_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3277 return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A, 3278 (__v2df) __B, 3279 (__v2df) 3280 _mm_setzero_pd (), 3281 (__mmask8) __U); 3282} 3283 3284static __inline__ __m256d __DEFAULT_FN_ATTRS 3285_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A, 3286 __m256d __B) { 3287 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A, 3288 (__v4df) __B, 3289 (__v4df) __W, 3290 (__mmask8) __U); 3291} 3292 3293static __inline__ __m256d __DEFAULT_FN_ATTRS 3294_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3295 return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A, 3296 (__v4df) __B, 3297 (__v4df) 3298 _mm256_setzero_pd (), 3299 (__mmask8) __U); 3300} 3301 3302static __inline__ __m128 __DEFAULT_FN_ATTRS 3303_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3304 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A, 3305 (__v4sf) __B, 3306 (__v4sf) __W, 3307 (__mmask8) __U); 3308} 3309 3310static __inline__ __m128 __DEFAULT_FN_ATTRS 3311_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3312 return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A, 3313 (__v4sf) __B, 3314 (__v4sf) 3315 _mm_setzero_ps (), 3316 (__mmask8) __U); 3317} 3318 3319static __inline__ __m256 __DEFAULT_FN_ATTRS 3320_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3321 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A, 3322 (__v8sf) __B, 3323 (__v8sf) __W, 3324 (__mmask8) __U); 3325} 3326 3327static __inline__ __m256 __DEFAULT_FN_ATTRS 3328_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3329 return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A, 3330 (__v8sf) __B, 3331 (__v8sf) 3332 _mm256_setzero_ps (), 3333 (__mmask8) __U); 3334} 3335 3336static __inline__ __m128i __DEFAULT_FN_ATTRS 3337_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 3338 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A, 3339 (__v4si) __W, 3340 (__mmask8) __U); 3341} 3342 3343static __inline__ __m128i __DEFAULT_FN_ATTRS 3344_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A) { 3345 return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A, 3346 (__v4si) 3347 _mm_setzero_si128 (), 3348 (__mmask8) __U); 3349} 3350 3351static __inline__ __m256i __DEFAULT_FN_ATTRS 3352_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 3353 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A, 3354 (__v8si) __W, 3355 (__mmask8) __U); 3356} 3357 3358static __inline__ __m256i __DEFAULT_FN_ATTRS 3359_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A) { 3360 return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A, 3361 (__v8si) 3362 _mm256_setzero_si256 (), 3363 (__mmask8) __U); 3364} 3365 3366static __inline__ __m128i __DEFAULT_FN_ATTRS 3367_mm_abs_epi64 (__m128i __A) { 3368 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3369 (__v2di) 3370 _mm_setzero_si128 (), 3371 (__mmask8) -1); 3372} 3373 3374static __inline__ __m128i __DEFAULT_FN_ATTRS 3375_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 3376 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3377 (__v2di) __W, 3378 (__mmask8) __U); 3379} 3380 3381static __inline__ __m128i __DEFAULT_FN_ATTRS 3382_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3383 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A, 3384 (__v2di) 3385 _mm_setzero_si128 (), 3386 (__mmask8) __U); 3387} 3388 3389static __inline__ __m256i __DEFAULT_FN_ATTRS 3390_mm256_abs_epi64 (__m256i __A) { 3391 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3392 (__v4di) 3393 _mm256_setzero_si256 (), 3394 (__mmask8) -1); 3395} 3396 3397static __inline__ __m256i __DEFAULT_FN_ATTRS 3398_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3399 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3400 (__v4di) __W, 3401 (__mmask8) __U); 3402} 3403 3404static __inline__ __m256i __DEFAULT_FN_ATTRS 3405_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3406 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A, 3407 (__v4di) 3408 _mm256_setzero_si256 (), 3409 (__mmask8) __U); 3410} 3411 3412static __inline__ __m128i __DEFAULT_FN_ATTRS 3413_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B) { 3414 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A, 3415 (__v4si) __B, 3416 (__v4si) 3417 _mm_setzero_si128 (), 3418 __M); 3419} 3420 3421static __inline__ __m128i __DEFAULT_FN_ATTRS 3422_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A, 3423 __m128i __B) { 3424 return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A, 3425 (__v4si) __B, 3426 (__v4si) __W, __M); 3427} 3428 3429static __inline__ __m256i __DEFAULT_FN_ATTRS 3430_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B) { 3431 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A, 3432 (__v8si) __B, 3433 (__v8si) 3434 _mm256_setzero_si256 (), 3435 __M); 3436} 3437 3438static __inline__ __m256i __DEFAULT_FN_ATTRS 3439_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A, 3440 __m256i __B) { 3441 return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A, 3442 (__v8si) __B, 3443 (__v8si) __W, __M); 3444} 3445 3446static __inline__ __m128i __DEFAULT_FN_ATTRS 3447_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3448 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3449 (__v2di) __B, 3450 (__v2di) 3451 _mm_setzero_si128 (), 3452 __M); 3453} 3454 3455static __inline__ __m128i __DEFAULT_FN_ATTRS 3456_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3457 __m128i __B) { 3458 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3459 (__v2di) __B, 3460 (__v2di) __W, __M); 3461} 3462 3463static __inline__ __m128i __DEFAULT_FN_ATTRS 3464_mm_max_epi64 (__m128i __A, __m128i __B) { 3465 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A, 3466 (__v2di) __B, 3467 (__v2di) 3468 _mm_setzero_si128 (), 3469 (__mmask8) -1); 3470} 3471 3472static __inline__ __m256i __DEFAULT_FN_ATTRS 3473_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3474 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3475 (__v4di) __B, 3476 (__v4di) 3477 _mm256_setzero_si256 (), 3478 __M); 3479} 3480 3481static __inline__ __m256i __DEFAULT_FN_ATTRS 3482_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3483 __m256i __B) { 3484 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3485 (__v4di) __B, 3486 (__v4di) __W, __M); 3487} 3488 3489static __inline__ __m256i __DEFAULT_FN_ATTRS 3490_mm256_max_epi64 (__m256i __A, __m256i __B) { 3491 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A, 3492 (__v4di) __B, 3493 (__v4di) 3494 _mm256_setzero_si256 (), 3495 (__mmask8) -1); 3496} 3497 3498static __inline__ __m128i __DEFAULT_FN_ATTRS 3499_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B) { 3500 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A, 3501 (__v4si) __B, 3502 (__v4si) 3503 _mm_setzero_si128 (), 3504 __M); 3505} 3506 3507static __inline__ __m128i __DEFAULT_FN_ATTRS 3508_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A, 3509 __m128i __B) { 3510 return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A, 3511 (__v4si) __B, 3512 (__v4si) __W, __M); 3513} 3514 3515static __inline__ __m256i __DEFAULT_FN_ATTRS 3516_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B) { 3517 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A, 3518 (__v8si) __B, 3519 (__v8si) 3520 _mm256_setzero_si256 (), 3521 __M); 3522} 3523 3524static __inline__ __m256i __DEFAULT_FN_ATTRS 3525_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A, 3526 __m256i __B) { 3527 return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A, 3528 (__v8si) __B, 3529 (__v8si) __W, __M); 3530} 3531 3532static __inline__ __m128i __DEFAULT_FN_ATTRS 3533_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3534 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3535 (__v2di) __B, 3536 (__v2di) 3537 _mm_setzero_si128 (), 3538 __M); 3539} 3540 3541static __inline__ __m128i __DEFAULT_FN_ATTRS 3542_mm_max_epu64 (__m128i __A, __m128i __B) { 3543 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3544 (__v2di) __B, 3545 (__v2di) 3546 _mm_setzero_si128 (), 3547 (__mmask8) -1); 3548} 3549 3550static __inline__ __m128i __DEFAULT_FN_ATTRS 3551_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3552 __m128i __B) { 3553 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A, 3554 (__v2di) __B, 3555 (__v2di) __W, __M); 3556} 3557 3558static __inline__ __m256i __DEFAULT_FN_ATTRS 3559_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3560 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3561 (__v4di) __B, 3562 (__v4di) 3563 _mm256_setzero_si256 (), 3564 __M); 3565} 3566 3567static __inline__ __m256i __DEFAULT_FN_ATTRS 3568_mm256_max_epu64 (__m256i __A, __m256i __B) { 3569 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3570 (__v4di) __B, 3571 (__v4di) 3572 _mm256_setzero_si256 (), 3573 (__mmask8) -1); 3574} 3575 3576static __inline__ __m256i __DEFAULT_FN_ATTRS 3577_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3578 __m256i __B) { 3579 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A, 3580 (__v4di) __B, 3581 (__v4di) __W, __M); 3582} 3583 3584static __inline__ __m128i __DEFAULT_FN_ATTRS 3585_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B) { 3586 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A, 3587 (__v4si) __B, 3588 (__v4si) 3589 _mm_setzero_si128 (), 3590 __M); 3591} 3592 3593static __inline__ __m128i __DEFAULT_FN_ATTRS 3594_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A, 3595 __m128i __B) { 3596 return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A, 3597 (__v4si) __B, 3598 (__v4si) __W, __M); 3599} 3600 3601static __inline__ __m256i __DEFAULT_FN_ATTRS 3602_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B) { 3603 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A, 3604 (__v8si) __B, 3605 (__v8si) 3606 _mm256_setzero_si256 (), 3607 __M); 3608} 3609 3610static __inline__ __m256i __DEFAULT_FN_ATTRS 3611_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A, 3612 __m256i __B) { 3613 return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A, 3614 (__v8si) __B, 3615 (__v8si) __W, __M); 3616} 3617 3618static __inline__ __m128i __DEFAULT_FN_ATTRS 3619_mm_min_epi64 (__m128i __A, __m128i __B) { 3620 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3621 (__v2di) __B, 3622 (__v2di) 3623 _mm_setzero_si128 (), 3624 (__mmask8) -1); 3625} 3626 3627static __inline__ __m128i __DEFAULT_FN_ATTRS 3628_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, 3629 __m128i __B) { 3630 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3631 (__v2di) __B, 3632 (__v2di) __W, __M); 3633} 3634 3635static __inline__ __m128i __DEFAULT_FN_ATTRS 3636_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3637 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A, 3638 (__v2di) __B, 3639 (__v2di) 3640 _mm_setzero_si128 (), 3641 __M); 3642} 3643 3644static __inline__ __m256i __DEFAULT_FN_ATTRS 3645_mm256_min_epi64 (__m256i __A, __m256i __B) { 3646 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3647 (__v4di) __B, 3648 (__v4di) 3649 _mm256_setzero_si256 (), 3650 (__mmask8) -1); 3651} 3652 3653static __inline__ __m256i __DEFAULT_FN_ATTRS 3654_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, 3655 __m256i __B) { 3656 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3657 (__v4di) __B, 3658 (__v4di) __W, __M); 3659} 3660 3661static __inline__ __m256i __DEFAULT_FN_ATTRS 3662_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3663 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A, 3664 (__v4di) __B, 3665 (__v4di) 3666 _mm256_setzero_si256 (), 3667 __M); 3668} 3669 3670static __inline__ __m128i __DEFAULT_FN_ATTRS 3671_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B) { 3672 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A, 3673 (__v4si) __B, 3674 (__v4si) 3675 _mm_setzero_si128 (), 3676 __M); 3677} 3678 3679static __inline__ __m128i __DEFAULT_FN_ATTRS 3680_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A, 3681 __m128i __B) { 3682 return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A, 3683 (__v4si) __B, 3684 (__v4si) __W, __M); 3685} 3686 3687static __inline__ __m256i __DEFAULT_FN_ATTRS 3688_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B) { 3689 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A, 3690 (__v8si) __B, 3691 (__v8si) 3692 _mm256_setzero_si256 (), 3693 __M); 3694} 3695 3696static __inline__ __m256i __DEFAULT_FN_ATTRS 3697_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A, 3698 __m256i __B) { 3699 return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A, 3700 (__v8si) __B, 3701 (__v8si) __W, __M); 3702} 3703 3704static __inline__ __m128i __DEFAULT_FN_ATTRS 3705_mm_min_epu64 (__m128i __A, __m128i __B) { 3706 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3707 (__v2di) __B, 3708 (__v2di) 3709 _mm_setzero_si128 (), 3710 (__mmask8) -1); 3711} 3712 3713static __inline__ __m128i __DEFAULT_FN_ATTRS 3714_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, 3715 __m128i __B) { 3716 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3717 (__v2di) __B, 3718 (__v2di) __W, __M); 3719} 3720 3721static __inline__ __m128i __DEFAULT_FN_ATTRS 3722_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3723 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A, 3724 (__v2di) __B, 3725 (__v2di) 3726 _mm_setzero_si128 (), 3727 __M); 3728} 3729 3730static __inline__ __m256i __DEFAULT_FN_ATTRS 3731_mm256_min_epu64 (__m256i __A, __m256i __B) { 3732 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3733 (__v4di) __B, 3734 (__v4di) 3735 _mm256_setzero_si256 (), 3736 (__mmask8) -1); 3737} 3738 3739static __inline__ __m256i __DEFAULT_FN_ATTRS 3740_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, 3741 __m256i __B) { 3742 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3743 (__v4di) __B, 3744 (__v4di) __W, __M); 3745} 3746 3747static __inline__ __m256i __DEFAULT_FN_ATTRS 3748_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3749 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A, 3750 (__v4di) __B, 3751 (__v4di) 3752 _mm256_setzero_si256 (), 3753 __M); 3754} 3755 3756#define _mm_roundscale_pd(A, imm) __extension__ ({ \ 3757 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3758 (int)(imm), \ 3759 (__v2df)_mm_setzero_pd(), \ 3760 (__mmask8)-1); }) 3761 3762 3763#define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3764 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3765 (int)(imm), \ 3766 (__v2df)(__m128d)(W), \ 3767 (__mmask8)(U)); }) 3768 3769 3770#define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3771 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3772 (int)(imm), \ 3773 (__v2df)_mm_setzero_pd(), \ 3774 (__mmask8)(U)); }) 3775 3776 3777#define _mm256_roundscale_pd(A, imm) __extension__ ({ \ 3778 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3779 (int)(imm), \ 3780 (__v4df)_mm256_setzero_pd(), \ 3781 (__mmask8)-1); }) 3782 3783 3784#define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \ 3785 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3786 (int)(imm), \ 3787 (__v4df)(__m256d)(W), \ 3788 (__mmask8)(U)); }) 3789 3790 3791#define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \ 3792 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3793 (int)(imm), \ 3794 (__v4df)_mm256_setzero_pd(), \ 3795 (__mmask8)(U)); }) 3796 3797#define _mm_roundscale_ps(A, imm) __extension__ ({ \ 3798 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3799 (__v4sf)_mm_setzero_ps(), \ 3800 (__mmask8)-1); }) 3801 3802 3803#define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3804 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3805 (__v4sf)(__m128)(W), \ 3806 (__mmask8)(U)); }) 3807 3808 3809#define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3810 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3811 (__v4sf)_mm_setzero_ps(), \ 3812 (__mmask8)(U)); }) 3813 3814#define _mm256_roundscale_ps(A, imm) __extension__ ({ \ 3815 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3816 (__v8sf)_mm256_setzero_ps(), \ 3817 (__mmask8)-1); }) 3818 3819#define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \ 3820 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3821 (__v8sf)(__m256)(W), \ 3822 (__mmask8)(U)); }) 3823 3824 3825#define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \ 3826 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3827 (__v8sf)_mm256_setzero_ps(), \ 3828 (__mmask8)(U)); }) 3829 3830static __inline__ __m128d __DEFAULT_FN_ATTRS 3831_mm_scalef_pd (__m128d __A, __m128d __B) { 3832 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3833 (__v2df) __B, 3834 (__v2df) 3835 _mm_setzero_pd (), 3836 (__mmask8) -1); 3837} 3838 3839static __inline__ __m128d __DEFAULT_FN_ATTRS 3840_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3841 __m128d __B) { 3842 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3843 (__v2df) __B, 3844 (__v2df) __W, 3845 (__mmask8) __U); 3846} 3847 3848static __inline__ __m128d __DEFAULT_FN_ATTRS 3849_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3850 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3851 (__v2df) __B, 3852 (__v2df) 3853 _mm_setzero_pd (), 3854 (__mmask8) __U); 3855} 3856 3857static __inline__ __m256d __DEFAULT_FN_ATTRS 3858_mm256_scalef_pd (__m256d __A, __m256d __B) { 3859 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3860 (__v4df) __B, 3861 (__v4df) 3862 _mm256_setzero_pd (), 3863 (__mmask8) -1); 3864} 3865 3866static __inline__ __m256d __DEFAULT_FN_ATTRS 3867_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3868 __m256d __B) { 3869 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3870 (__v4df) __B, 3871 (__v4df) __W, 3872 (__mmask8) __U); 3873} 3874 3875static __inline__ __m256d __DEFAULT_FN_ATTRS 3876_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3877 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3878 (__v4df) __B, 3879 (__v4df) 3880 _mm256_setzero_pd (), 3881 (__mmask8) __U); 3882} 3883 3884static __inline__ __m128 __DEFAULT_FN_ATTRS 3885_mm_scalef_ps (__m128 __A, __m128 __B) { 3886 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3887 (__v4sf) __B, 3888 (__v4sf) 3889 _mm_setzero_ps (), 3890 (__mmask8) -1); 3891} 3892 3893static __inline__ __m128 __DEFAULT_FN_ATTRS 3894_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3895 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3896 (__v4sf) __B, 3897 (__v4sf) __W, 3898 (__mmask8) __U); 3899} 3900 3901static __inline__ __m128 __DEFAULT_FN_ATTRS 3902_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3903 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3904 (__v4sf) __B, 3905 (__v4sf) 3906 _mm_setzero_ps (), 3907 (__mmask8) __U); 3908} 3909 3910static __inline__ __m256 __DEFAULT_FN_ATTRS 3911_mm256_scalef_ps (__m256 __A, __m256 __B) { 3912 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3913 (__v8sf) __B, 3914 (__v8sf) 3915 _mm256_setzero_ps (), 3916 (__mmask8) -1); 3917} 3918 3919static __inline__ __m256 __DEFAULT_FN_ATTRS 3920_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3921 __m256 __B) { 3922 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3923 (__v8sf) __B, 3924 (__v8sf) __W, 3925 (__mmask8) __U); 3926} 3927 3928static __inline__ __m256 __DEFAULT_FN_ATTRS 3929_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3930 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3931 (__v8sf) __B, 3932 (__v8sf) 3933 _mm256_setzero_ps (), 3934 (__mmask8) __U); 3935} 3936 3937#define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3938 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \ 3939 (__v2di)(__m128i)(index), \ 3940 (__v2df)(__m128d)(v1), (int)(scale)); }) 3941 3942#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3943 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \ 3944 (__v2di)(__m128i)(index), \ 3945 (__v2df)(__m128d)(v1), (int)(scale)); }) 3946 3947#define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3948 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \ 3949 (__v2di)(__m128i)(index), \ 3950 (__v2di)(__m128i)(v1), (int)(scale)); }) 3951 3952#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3953 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \ 3954 (__v2di)(__m128i)(index), \ 3955 (__v2di)(__m128i)(v1), (int)(scale)); }) 3956 3957#define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \ 3958 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \ 3959 (__v4di)(__m256i)(index), \ 3960 (__v4df)(__m256d)(v1), (int)(scale)); }) 3961 3962#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 3963 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \ 3964 (__v4di)(__m256i)(index), \ 3965 (__v4df)(__m256d)(v1), (int)(scale)); }) 3966 3967#define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 3968 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \ 3969 (__v4di)(__m256i)(index), \ 3970 (__v4di)(__m256i)(v1), (int)(scale)); }) 3971 3972#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 3973 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \ 3974 (__v4di)(__m256i)(index), \ 3975 (__v4di)(__m256i)(v1), (int)(scale)); }) 3976 3977#define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3978 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \ 3979 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3980 (int)(scale)); }) 3981 3982#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 3983 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \ 3984 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3985 (int)(scale)); }) 3986 3987#define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 3988 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \ 3989 (__v2di)(__m128i)(index), \ 3990 (__v4si)(__m128i)(v1), (int)(scale)); }) 3991 3992#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 3993 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \ 3994 (__v2di)(__m128i)(index), \ 3995 (__v4si)(__m128i)(v1), (int)(scale)); }) 3996 3997#define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \ 3998 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \ 3999 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 4000 (int)(scale)); }) 4001 4002#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 4003 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \ 4004 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 4005 (int)(scale)); }) 4006 4007#define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 4008 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \ 4009 (__v4di)(__m256i)(index), \ 4010 (__v4si)(__m128i)(v1), (int)(scale)); }) 4011 4012#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 4013 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \ 4014 (__v4di)(__m256i)(index), \ 4015 (__v4si)(__m128i)(v1), (int)(scale)); }) 4016 4017#define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 4018 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \ 4019 (__v4si)(__m128i)(index), \ 4020 (__v2df)(__m128d)(v1), (int)(scale)); }) 4021 4022#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 4023 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \ 4024 (__v4si)(__m128i)(index), \ 4025 (__v2df)(__m128d)(v1), (int)(scale)); }) 4026 4027#define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 4028 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \ 4029 (__v4si)(__m128i)(index), \ 4030 (__v2di)(__m128i)(v1), (int)(scale)); }) 4031 4032#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 4033 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \ 4034 (__v4si)(__m128i)(index), \ 4035 (__v2di)(__m128i)(v1), (int)(scale)); }) 4036 4037#define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \ 4038 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \ 4039 (__v4si)(__m128i)(index), \ 4040 (__v4df)(__m256d)(v1), (int)(scale)); }) 4041 4042#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \ 4043 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \ 4044 (__v4si)(__m128i)(index), \ 4045 (__v4df)(__m256d)(v1), (int)(scale)); }) 4046 4047#define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \ 4048 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \ 4049 (__v4si)(__m128i)(index), \ 4050 (__v4di)(__m256i)(v1), (int)(scale)); }) 4051 4052#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \ 4053 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \ 4054 (__v4si)(__m128i)(index), \ 4055 (__v4di)(__m256i)(v1), (int)(scale)); }) 4056 4057#define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 4058 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \ 4059 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 4060 (int)(scale)); }) 4061 4062#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 4063 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \ 4064 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 4065 (int)(scale)); }) 4066 4067#define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 4068 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \ 4069 (__v4si)(__m128i)(index), \ 4070 (__v4si)(__m128i)(v1), (int)(scale)); }) 4071 4072#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 4073 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \ 4074 (__v4si)(__m128i)(index), \ 4075 (__v4si)(__m128i)(v1), (int)(scale)); }) 4076 4077#define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \ 4078 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \ 4079 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 4080 (int)(scale)); }) 4081 4082#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \ 4083 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \ 4084 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 4085 (int)(scale)); }) 4086 4087#define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \ 4088 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \ 4089 (__v8si)(__m256i)(index), \ 4090 (__v8si)(__m256i)(v1), (int)(scale)); }) 4091 4092#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \ 4093 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \ 4094 (__v8si)(__m256i)(index), \ 4095 (__v8si)(__m256i)(v1), (int)(scale)); }) 4096 4097static __inline__ __m128d __DEFAULT_FN_ATTRS 4098_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A) { 4099 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A, 4100 (__v2df) __W, 4101 (__mmask8) __U); 4102} 4103 4104static __inline__ __m128d __DEFAULT_FN_ATTRS 4105_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A) { 4106 return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A, 4107 (__v2df) 4108 _mm_setzero_pd (), 4109 (__mmask8) __U); 4110} 4111 4112static __inline__ __m256d __DEFAULT_FN_ATTRS 4113_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A) { 4114 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A, 4115 (__v4df) __W, 4116 (__mmask8) __U); 4117} 4118 4119static __inline__ __m256d __DEFAULT_FN_ATTRS 4120_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A) { 4121 return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A, 4122 (__v4df) 4123 _mm256_setzero_pd (), 4124 (__mmask8) __U); 4125} 4126 4127static __inline__ __m128 __DEFAULT_FN_ATTRS 4128_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A) { 4129 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A, 4130 (__v4sf) __W, 4131 (__mmask8) __U); 4132} 4133 4134static __inline__ __m128 __DEFAULT_FN_ATTRS 4135_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A) { 4136 return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A, 4137 (__v4sf) 4138 _mm_setzero_ps (), 4139 (__mmask8) __U); 4140} 4141 4142static __inline__ __m256 __DEFAULT_FN_ATTRS 4143_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A) { 4144 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A, 4145 (__v8sf) __W, 4146 (__mmask8) __U); 4147} 4148 4149static __inline__ __m256 __DEFAULT_FN_ATTRS 4150_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A) { 4151 return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A, 4152 (__v8sf) 4153 _mm256_setzero_ps (), 4154 (__mmask8) __U); 4155} 4156 4157static __inline__ __m128d __DEFAULT_FN_ATTRS 4158_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 4159 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A, 4160 (__v2df) __B, 4161 (__v2df) __W, 4162 (__mmask8) __U); 4163} 4164 4165static __inline__ __m128d __DEFAULT_FN_ATTRS 4166_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B) { 4167 return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A, 4168 (__v2df) __B, 4169 (__v2df) 4170 _mm_setzero_pd (), 4171 (__mmask8) __U); 4172} 4173 4174static __inline__ __m256d __DEFAULT_FN_ATTRS 4175_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A, 4176 __m256d __B) { 4177 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A, 4178 (__v4df) __B, 4179 (__v4df) __W, 4180 (__mmask8) __U); 4181} 4182 4183static __inline__ __m256d __DEFAULT_FN_ATTRS 4184_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B) { 4185 return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A, 4186 (__v4df) __B, 4187 (__v4df) 4188 _mm256_setzero_pd (), 4189 (__mmask8) __U); 4190} 4191 4192static __inline__ __m128 __DEFAULT_FN_ATTRS 4193_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) { 4194 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A, 4195 (__v4sf) __B, 4196 (__v4sf) __W, 4197 (__mmask8) __U); 4198} 4199 4200static __inline__ __m128 __DEFAULT_FN_ATTRS 4201_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B) { 4202 return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A, 4203 (__v4sf) __B, 4204 (__v4sf) 4205 _mm_setzero_ps (), 4206 (__mmask8) __U); 4207} 4208 4209static __inline__ __m256 __DEFAULT_FN_ATTRS 4210_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) { 4211 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A, 4212 (__v8sf) __B, 4213 (__v8sf) __W, 4214 (__mmask8) __U); 4215} 4216 4217static __inline__ __m256 __DEFAULT_FN_ATTRS 4218_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B) { 4219 return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A, 4220 (__v8sf) __B, 4221 (__v8sf) 4222 _mm256_setzero_ps (), 4223 (__mmask8) __U); 4224} 4225 4226static __inline__ __m128i __DEFAULT_FN_ATTRS 4227_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U, 4228 __m128i __B) { 4229 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A, 4230 (__v4si) __I 4231 /* idx */ , 4232 (__v4si) __B, 4233 (__mmask8) __U); 4234} 4235 4236static __inline__ __m256i __DEFAULT_FN_ATTRS 4237_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I, 4238 __mmask8 __U, __m256i __B) { 4239 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A, 4240 (__v8si) __I 4241 /* idx */ , 4242 (__v8si) __B, 4243 (__mmask8) __U); 4244} 4245 4246static __inline__ __m128d __DEFAULT_FN_ATTRS 4247_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U, 4248 __m128d __B) { 4249 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A, 4250 (__v2di) __I 4251 /* idx */ , 4252 (__v2df) __B, 4253 (__mmask8) 4254 __U); 4255} 4256 4257static __inline__ __m256d __DEFAULT_FN_ATTRS 4258_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U, 4259 __m256d __B) { 4260 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A, 4261 (__v4di) __I 4262 /* idx */ , 4263 (__v4df) __B, 4264 (__mmask8) 4265 __U); 4266} 4267 4268static __inline__ __m128 __DEFAULT_FN_ATTRS 4269_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U, 4270 __m128 __B) { 4271 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A, 4272 (__v4si) __I 4273 /* idx */ , 4274 (__v4sf) __B, 4275 (__mmask8) __U); 4276} 4277 4278static __inline__ __m256 __DEFAULT_FN_ATTRS 4279_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U, 4280 __m256 __B) { 4281 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A, 4282 (__v8si) __I 4283 /* idx */ , 4284 (__v8sf) __B, 4285 (__mmask8) __U); 4286} 4287 4288static __inline__ __m128i __DEFAULT_FN_ATTRS 4289_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U, 4290 __m128i __B) { 4291 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A, 4292 (__v2di) __I 4293 /* idx */ , 4294 (__v2di) __B, 4295 (__mmask8) __U); 4296} 4297 4298static __inline__ __m256i __DEFAULT_FN_ATTRS 4299_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I, 4300 __mmask8 __U, __m256i __B) { 4301 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A, 4302 (__v4di) __I 4303 /* idx */ , 4304 (__v4di) __B, 4305 (__mmask8) __U); 4306} 4307 4308static __inline__ __m128i __DEFAULT_FN_ATTRS 4309_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) { 4310 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4311 /* idx */ , 4312 (__v4si) __A, 4313 (__v4si) __B, 4314 (__mmask8) -1); 4315} 4316 4317static __inline__ __m128i __DEFAULT_FN_ATTRS 4318_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I, 4319 __m128i __B) { 4320 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I 4321 /* idx */ , 4322 (__v4si) __A, 4323 (__v4si) __B, 4324 (__mmask8) __U); 4325} 4326 4327static __inline__ __m128i __DEFAULT_FN_ATTRS 4328_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I, 4329 __m128i __B) { 4330 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I 4331 /* idx */ , 4332 (__v4si) __A, 4333 (__v4si) __B, 4334 (__mmask8) 4335 __U); 4336} 4337 4338static __inline__ __m256i __DEFAULT_FN_ATTRS 4339_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) { 4340 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4341 /* idx */ , 4342 (__v8si) __A, 4343 (__v8si) __B, 4344 (__mmask8) -1); 4345} 4346 4347static __inline__ __m256i __DEFAULT_FN_ATTRS 4348_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I, 4349 __m256i __B) { 4350 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I 4351 /* idx */ , 4352 (__v8si) __A, 4353 (__v8si) __B, 4354 (__mmask8) __U); 4355} 4356 4357static __inline__ __m256i __DEFAULT_FN_ATTRS 4358_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A, 4359 __m256i __I, __m256i __B) { 4360 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I 4361 /* idx */ , 4362 (__v8si) __A, 4363 (__v8si) __B, 4364 (__mmask8) 4365 __U); 4366} 4367 4368static __inline__ __m128d __DEFAULT_FN_ATTRS 4369_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) { 4370 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4371 /* idx */ , 4372 (__v2df) __A, 4373 (__v2df) __B, 4374 (__mmask8) - 4375 1); 4376} 4377 4378static __inline__ __m128d __DEFAULT_FN_ATTRS 4379_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I, 4380 __m128d __B) { 4381 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I 4382 /* idx */ , 4383 (__v2df) __A, 4384 (__v2df) __B, 4385 (__mmask8) 4386 __U); 4387} 4388 4389static __inline__ __m128d __DEFAULT_FN_ATTRS 4390_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I, 4391 __m128d __B) { 4392 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I 4393 /* idx */ , 4394 (__v2df) __A, 4395 (__v2df) __B, 4396 (__mmask8) 4397 __U); 4398} 4399 4400static __inline__ __m256d __DEFAULT_FN_ATTRS 4401_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) { 4402 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4403 /* idx */ , 4404 (__v4df) __A, 4405 (__v4df) __B, 4406 (__mmask8) - 4407 1); 4408} 4409 4410static __inline__ __m256d __DEFAULT_FN_ATTRS 4411_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I, 4412 __m256d __B) { 4413 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I 4414 /* idx */ , 4415 (__v4df) __A, 4416 (__v4df) __B, 4417 (__mmask8) 4418 __U); 4419} 4420 4421static __inline__ __m256d __DEFAULT_FN_ATTRS 4422_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I, 4423 __m256d __B) { 4424 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I 4425 /* idx */ , 4426 (__v4df) __A, 4427 (__v4df) __B, 4428 (__mmask8) 4429 __U); 4430} 4431 4432static __inline__ __m128 __DEFAULT_FN_ATTRS 4433_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) { 4434 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4435 /* idx */ , 4436 (__v4sf) __A, 4437 (__v4sf) __B, 4438 (__mmask8) -1); 4439} 4440 4441static __inline__ __m128 __DEFAULT_FN_ATTRS 4442_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I, 4443 __m128 __B) { 4444 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I 4445 /* idx */ , 4446 (__v4sf) __A, 4447 (__v4sf) __B, 4448 (__mmask8) __U); 4449} 4450 4451static __inline__ __m128 __DEFAULT_FN_ATTRS 4452_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I, 4453 __m128 __B) { 4454 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I 4455 /* idx */ , 4456 (__v4sf) __A, 4457 (__v4sf) __B, 4458 (__mmask8) 4459 __U); 4460} 4461 4462static __inline__ __m256 __DEFAULT_FN_ATTRS 4463_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) { 4464 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4465 /* idx */ , 4466 (__v8sf) __A, 4467 (__v8sf) __B, 4468 (__mmask8) -1); 4469} 4470 4471static __inline__ __m256 __DEFAULT_FN_ATTRS 4472_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I, 4473 __m256 __B) { 4474 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I 4475 /* idx */ , 4476 (__v8sf) __A, 4477 (__v8sf) __B, 4478 (__mmask8) __U); 4479} 4480 4481static __inline__ __m256 __DEFAULT_FN_ATTRS 4482_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I, 4483 __m256 __B) { 4484 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I 4485 /* idx */ , 4486 (__v8sf) __A, 4487 (__v8sf) __B, 4488 (__mmask8) 4489 __U); 4490} 4491 4492static __inline__ __m128i __DEFAULT_FN_ATTRS 4493_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) { 4494 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4495 /* idx */ , 4496 (__v2di) __A, 4497 (__v2di) __B, 4498 (__mmask8) -1); 4499} 4500 4501static __inline__ __m128i __DEFAULT_FN_ATTRS 4502_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I, 4503 __m128i __B) { 4504 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I 4505 /* idx */ , 4506 (__v2di) __A, 4507 (__v2di) __B, 4508 (__mmask8) __U); 4509} 4510 4511static __inline__ __m128i __DEFAULT_FN_ATTRS 4512_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I, 4513 __m128i __B) { 4514 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I 4515 /* idx */ , 4516 (__v2di) __A, 4517 (__v2di) __B, 4518 (__mmask8) 4519 __U); 4520} 4521 4522 4523static __inline__ __m256i __DEFAULT_FN_ATTRS 4524_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) { 4525 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4526 /* idx */ , 4527 (__v4di) __A, 4528 (__v4di) __B, 4529 (__mmask8) -1); 4530} 4531 4532static __inline__ __m256i __DEFAULT_FN_ATTRS 4533_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I, 4534 __m256i __B) { 4535 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I 4536 /* idx */ , 4537 (__v4di) __A, 4538 (__v4di) __B, 4539 (__mmask8) __U); 4540} 4541 4542static __inline__ __m256i __DEFAULT_FN_ATTRS 4543_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A, 4544 __m256i __I, __m256i __B) { 4545 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I 4546 /* idx */ , 4547 (__v4di) __A, 4548 (__v4di) __B, 4549 (__mmask8) 4550 __U); 4551} 4552 4553static __inline__ __m128i __DEFAULT_FN_ATTRS 4554_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 4555{ 4556 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A, 4557 (__v4si) __W, 4558 (__mmask8) __U); 4559} 4560 4561static __inline__ __m128i __DEFAULT_FN_ATTRS 4562_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4563{ 4564 return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A, 4565 (__v4si) 4566 _mm_setzero_si128 (), 4567 (__mmask8) __U); 4568} 4569 4570static __inline__ __m256i __DEFAULT_FN_ATTRS 4571_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4572{ 4573 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A, 4574 (__v8si) __W, 4575 (__mmask8) __U); 4576} 4577 4578static __inline__ __m256i __DEFAULT_FN_ATTRS 4579_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4580{ 4581 return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A, 4582 (__v8si) 4583 _mm256_setzero_si256 (), 4584 (__mmask8) __U); 4585} 4586 4587static __inline__ __m128i __DEFAULT_FN_ATTRS 4588_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 4589{ 4590 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A, 4591 (__v2di) __W, 4592 (__mmask8) __U); 4593} 4594 4595static __inline__ __m128i __DEFAULT_FN_ATTRS 4596_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 4597{ 4598 return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A, 4599 (__v2di) 4600 _mm_setzero_si128 (), 4601 (__mmask8) __U); 4602} 4603 4604static __inline__ __m256i __DEFAULT_FN_ATTRS 4605_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 4606{ 4607 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A, 4608 (__v4di) __W, 4609 (__mmask8) __U); 4610} 4611 4612static __inline__ __m256i __DEFAULT_FN_ATTRS 4613_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 4614{ 4615 return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A, 4616 (__v4di) 4617 _mm256_setzero_si256 (), 4618 (__mmask8) __U); 4619} 4620 4621static __inline__ __m128i __DEFAULT_FN_ATTRS 4622_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X) 4623{ 4624 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X, 4625 (__v2di) __W, 4626 (__mmask8) __U); 4627} 4628 4629static __inline__ __m128i __DEFAULT_FN_ATTRS 4630_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X) 4631{ 4632 return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X, 4633 (__v2di) 4634 _mm_setzero_si128 (), 4635 (__mmask8) __U); 4636} 4637 4638static __inline__ __m256i __DEFAULT_FN_ATTRS 4639_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X) 4640{ 4641 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X, 4642 (__v4di) __W, 4643 (__mmask8) __U); 4644} 4645 4646static __inline__ __m256i __DEFAULT_FN_ATTRS 4647_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X) 4648{ 4649 return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X, 4650 (__v4di) 4651 _mm256_setzero_si256 (), 4652 (__mmask8) __U); 4653} 4654 4655static __inline__ __m128i __DEFAULT_FN_ATTRS 4656_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 4657{ 4658 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A, 4659 (__v4si) __W, 4660 (__mmask8) __U); 4661} 4662 4663static __inline__ __m128i __DEFAULT_FN_ATTRS 4664_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4665{ 4666 return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A, 4667 (__v4si) 4668 _mm_setzero_si128 (), 4669 (__mmask8) __U); 4670} 4671 4672static __inline__ __m256i __DEFAULT_FN_ATTRS 4673_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4674{ 4675 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A, 4676 (__v8si) __W, 4677 (__mmask8) __U); 4678} 4679 4680static __inline__ __m256i __DEFAULT_FN_ATTRS 4681_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4682{ 4683 return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A, 4684 (__v8si) 4685 _mm256_setzero_si256 (), 4686 (__mmask8) __U); 4687} 4688 4689static __inline__ __m128i __DEFAULT_FN_ATTRS 4690_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 4691{ 4692 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A, 4693 (__v2di) __W, 4694 (__mmask8) __U); 4695} 4696 4697static __inline__ __m128i __DEFAULT_FN_ATTRS 4698_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 4699{ 4700 return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A, 4701 (__v2di) 4702 _mm_setzero_si128 (), 4703 (__mmask8) __U); 4704} 4705 4706static __inline__ __m256i __DEFAULT_FN_ATTRS 4707_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 4708{ 4709 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A, 4710 (__v4di) __W, 4711 (__mmask8) __U); 4712} 4713 4714static __inline__ __m256i __DEFAULT_FN_ATTRS 4715_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 4716{ 4717 return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A, 4718 (__v4di) 4719 _mm256_setzero_si256 (), 4720 (__mmask8) __U); 4721} 4722 4723 4724static __inline__ __m128i __DEFAULT_FN_ATTRS 4725_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 4726{ 4727 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A, 4728 (__v4si) __W, 4729 (__mmask8) __U); 4730} 4731 4732static __inline__ __m128i __DEFAULT_FN_ATTRS 4733_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A) 4734{ 4735 return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A, 4736 (__v4si) 4737 _mm_setzero_si128 (), 4738 (__mmask8) __U); 4739} 4740 4741static __inline__ __m256i __DEFAULT_FN_ATTRS 4742_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4743{ 4744 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A, 4745 (__v8si) __W, 4746 (__mmask8) __U); 4747} 4748 4749static __inline__ __m256i __DEFAULT_FN_ATTRS 4750_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A) 4751{ 4752 return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A, 4753 (__v8si) 4754 _mm256_setzero_si256 (), 4755 (__mmask8) __U); 4756} 4757 4758static __inline__ __m128i __DEFAULT_FN_ATTRS 4759_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 4760{ 4761 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A, 4762 (__v2di) __W, 4763 (__mmask8) __U); 4764} 4765 4766static __inline__ __m128i __DEFAULT_FN_ATTRS 4767_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4768{ 4769 return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A, 4770 (__v2di) 4771 _mm_setzero_si128 (), 4772 (__mmask8) __U); 4773} 4774 4775static __inline__ __m256i __DEFAULT_FN_ATTRS 4776_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 4777{ 4778 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A, 4779 (__v4di) __W, 4780 (__mmask8) __U); 4781} 4782 4783static __inline__ __m256i __DEFAULT_FN_ATTRS 4784_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4785{ 4786 return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A, 4787 (__v4di) 4788 _mm256_setzero_si256 (), 4789 (__mmask8) __U); 4790} 4791 4792static __inline__ __m128i __DEFAULT_FN_ATTRS 4793_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X) 4794{ 4795 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X, 4796 (__v2di) __W, 4797 (__mmask8) __U); 4798} 4799 4800static __inline__ __m128i __DEFAULT_FN_ATTRS 4801_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X) 4802{ 4803 return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X, 4804 (__v2di) 4805 _mm_setzero_si128 (), 4806 (__mmask8) __U); 4807} 4808 4809static __inline__ __m256i __DEFAULT_FN_ATTRS 4810_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X) 4811{ 4812 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X, 4813 (__v4di) __W, 4814 (__mmask8) __U); 4815} 4816 4817static __inline__ __m256i __DEFAULT_FN_ATTRS 4818_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X) 4819{ 4820 return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X, 4821 (__v4di) 4822 _mm256_setzero_si256 (), 4823 (__mmask8) __U); 4824} 4825 4826static __inline__ __m128i __DEFAULT_FN_ATTRS 4827_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 4828{ 4829 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A, 4830 (__v4si) __W, 4831 (__mmask8) __U); 4832} 4833 4834static __inline__ __m128i __DEFAULT_FN_ATTRS 4835_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A) 4836{ 4837 return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A, 4838 (__v4si) 4839 _mm_setzero_si128 (), 4840 (__mmask8) __U); 4841} 4842 4843static __inline__ __m256i __DEFAULT_FN_ATTRS 4844_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4845{ 4846 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A, 4847 (__v8si) __W, 4848 (__mmask8) __U); 4849} 4850 4851static __inline__ __m256i __DEFAULT_FN_ATTRS 4852_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A) 4853{ 4854 return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A, 4855 (__v8si) 4856 _mm256_setzero_si256 (), 4857 (__mmask8) __U); 4858} 4859 4860static __inline__ __m128i __DEFAULT_FN_ATTRS 4861_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 4862{ 4863 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A, 4864 (__v2di) __W, 4865 (__mmask8) __U); 4866} 4867 4868static __inline__ __m128i __DEFAULT_FN_ATTRS 4869_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 4870{ 4871 return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A, 4872 (__v2di) 4873 _mm_setzero_si128 (), 4874 (__mmask8) __U); 4875} 4876 4877static __inline__ __m256i __DEFAULT_FN_ATTRS 4878_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A) 4879{ 4880 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A, 4881 (__v4di) __W, 4882 (__mmask8) __U); 4883} 4884 4885static __inline__ __m256i __DEFAULT_FN_ATTRS 4886_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 4887{ 4888 return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A, 4889 (__v4di) 4890 _mm256_setzero_si256 (), 4891 (__mmask8) __U); 4892} 4893 4894 4895#define _mm_rol_epi32(a, b) __extension__ ({\ 4896 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4897 (__v4si)_mm_setzero_si128(), \ 4898 (__mmask8)-1); }) 4899 4900#define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4901 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4902 (__v4si)(__m128i)(w), (__mmask8)(u)); }) 4903 4904#define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\ 4905 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \ 4906 (__v4si)_mm_setzero_si128(), \ 4907 (__mmask8)(u)); }) 4908 4909#define _mm256_rol_epi32(a, b) __extension__ ({\ 4910 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4911 (__v8si)_mm256_setzero_si256(), \ 4912 (__mmask8)-1); }) 4913 4914#define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\ 4915 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4916 (__v8si)(__m256i)(w), (__mmask8)(u)); }) 4917 4918#define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\ 4919 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \ 4920 (__v8si)_mm256_setzero_si256(), \ 4921 (__mmask8)(u)); }) 4922 4923#define _mm_rol_epi64(a, b) __extension__ ({\ 4924 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4925 (__v2di)_mm_setzero_di(), \ 4926 (__mmask8)-1); }) 4927 4928#define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4929 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4930 (__v2di)(__m128i)(w), (__mmask8)(u)); }) 4931 4932#define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\ 4933 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \ 4934 (__v2di)_mm_setzero_di(), \ 4935 (__mmask8)(u)); }) 4936 4937#define _mm256_rol_epi64(a, b) __extension__ ({\ 4938 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4939 (__v4di)_mm256_setzero_si256(), \ 4940 (__mmask8)-1); }) 4941 4942#define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\ 4943 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4944 (__v4di)(__m256i)(w), (__mmask8)(u)); }) 4945 4946#define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\ 4947 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \ 4948 (__v4di)_mm256_setzero_si256(), \ 4949 (__mmask8)(u)); }) 4950 4951static __inline__ __m128i __DEFAULT_FN_ATTRS 4952_mm_rolv_epi32 (__m128i __A, __m128i __B) 4953{ 4954 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4955 (__v4si) __B, 4956 (__v4si) 4957 _mm_setzero_si128 (), 4958 (__mmask8) -1); 4959} 4960 4961static __inline__ __m128i __DEFAULT_FN_ATTRS 4962_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 4963 __m128i __B) 4964{ 4965 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4966 (__v4si) __B, 4967 (__v4si) __W, 4968 (__mmask8) __U); 4969} 4970 4971static __inline__ __m128i __DEFAULT_FN_ATTRS 4972_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4973{ 4974 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, 4975 (__v4si) __B, 4976 (__v4si) 4977 _mm_setzero_si128 (), 4978 (__mmask8) __U); 4979} 4980 4981static __inline__ __m256i __DEFAULT_FN_ATTRS 4982_mm256_rolv_epi32 (__m256i __A, __m256i __B) 4983{ 4984 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4985 (__v8si) __B, 4986 (__v8si) 4987 _mm256_setzero_si256 (), 4988 (__mmask8) -1); 4989} 4990 4991static __inline__ __m256i __DEFAULT_FN_ATTRS 4992_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 4993 __m256i __B) 4994{ 4995 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 4996 (__v8si) __B, 4997 (__v8si) __W, 4998 (__mmask8) __U); 4999} 5000 5001static __inline__ __m256i __DEFAULT_FN_ATTRS 5002_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5003{ 5004 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, 5005 (__v8si) __B, 5006 (__v8si) 5007 _mm256_setzero_si256 (), 5008 (__mmask8) __U); 5009} 5010 5011static __inline__ __m128i __DEFAULT_FN_ATTRS 5012_mm_rolv_epi64 (__m128i __A, __m128i __B) 5013{ 5014 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 5015 (__v2di) __B, 5016 (__v2di) 5017 _mm_setzero_di (), 5018 (__mmask8) -1); 5019} 5020 5021static __inline__ __m128i __DEFAULT_FN_ATTRS 5022_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5023 __m128i __B) 5024{ 5025 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 5026 (__v2di) __B, 5027 (__v2di) __W, 5028 (__mmask8) __U); 5029} 5030 5031static __inline__ __m128i __DEFAULT_FN_ATTRS 5032_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5033{ 5034 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, 5035 (__v2di) __B, 5036 (__v2di) 5037 _mm_setzero_di (), 5038 (__mmask8) __U); 5039} 5040 5041static __inline__ __m256i __DEFAULT_FN_ATTRS 5042_mm256_rolv_epi64 (__m256i __A, __m256i __B) 5043{ 5044 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 5045 (__v4di) __B, 5046 (__v4di) 5047 _mm256_setzero_si256 (), 5048 (__mmask8) -1); 5049} 5050 5051static __inline__ __m256i __DEFAULT_FN_ATTRS 5052_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5053 __m256i __B) 5054{ 5055 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 5056 (__v4di) __B, 5057 (__v4di) __W, 5058 (__mmask8) __U); 5059} 5060 5061static __inline__ __m256i __DEFAULT_FN_ATTRS 5062_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5063{ 5064 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, 5065 (__v4di) __B, 5066 (__v4di) 5067 _mm256_setzero_si256 (), 5068 (__mmask8) __U); 5069} 5070 5071#define _mm_ror_epi32(A, B) __extension__ ({ \ 5072 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 5073 (__v4si)_mm_setzero_si128(), \ 5074 (__mmask8)-1); }) 5075 5076#define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5077 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 5078 (__v4si)(__m128i)(W), (__mmask8)(U)); }) 5079 5080#define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5081 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \ 5082 (__v4si)_mm_setzero_si128(), \ 5083 (__mmask8)(U)); }) 5084 5085#define _mm256_ror_epi32(A, B) __extension__ ({ \ 5086 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 5087 (__v8si)_mm256_setzero_si256(), \ 5088 (__mmask8)-1); }) 5089 5090#define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \ 5091 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 5092 (__v8si)(__m256i)(W), (__mmask8)(U)); }) 5093 5094#define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \ 5095 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \ 5096 (__v8si)_mm256_setzero_si256(), \ 5097 (__mmask8)(U)); }) 5098 5099#define _mm_ror_epi64(A, B) __extension__ ({ \ 5100 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 5101 (__v2di)_mm_setzero_di(), \ 5102 (__mmask8)-1); }) 5103 5104#define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5105 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 5106 (__v2di)(__m128i)(W), (__mmask8)(U)); }) 5107 5108#define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5109 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \ 5110 (__v2di)_mm_setzero_di(), \ 5111 (__mmask8)(U)); }) 5112 5113#define _mm256_ror_epi64(A, B) __extension__ ({ \ 5114 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 5115 (__v4di)_mm256_setzero_si256(), \ 5116 (__mmask8)-1); }) 5117 5118#define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \ 5119 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 5120 (__v4di)(__m256i)(W), (__mmask8)(U)); }) 5121 5122#define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \ 5123 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \ 5124 (__v4di)_mm256_setzero_si256(), \ 5125 (__mmask8)(U)); }) 5126 5127static __inline__ __m128i __DEFAULT_FN_ATTRS 5128_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5129 __m128i __B) 5130{ 5131 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A, 5132 (__v4si) __B, 5133 (__v4si) __W, 5134 (__mmask8) __U); 5135} 5136 5137static __inline__ __m128i __DEFAULT_FN_ATTRS 5138_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5139{ 5140 return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A, 5141 (__v4si) __B, 5142 (__v4si) 5143 _mm_setzero_si128 (), 5144 (__mmask8) __U); 5145} 5146 5147static __inline__ __m256i __DEFAULT_FN_ATTRS 5148_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5149 __m128i __B) 5150{ 5151 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A, 5152 (__v4si) __B, 5153 (__v8si) __W, 5154 (__mmask8) __U); 5155} 5156 5157static __inline__ __m256i __DEFAULT_FN_ATTRS 5158_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B) 5159{ 5160 return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A, 5161 (__v4si) __B, 5162 (__v8si) 5163 _mm256_setzero_si256 (), 5164 (__mmask8) __U); 5165} 5166 5167#define _mm_mask_slli_epi32(W, U, A, B) __extension__ ({ \ 5168 (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \ 5169 (__v4si)(__m128i)(W), \ 5170 (__mmask8)(U)); }) 5171 5172#define _mm_maskz_slli_epi32(U, A, B) __extension__ ({ \ 5173 (__m128i)__builtin_ia32_pslldi128_mask((__v4si)(__m128i)(A), (int)(B), \ 5174 (__v4si)_mm_setzero_si128(), \ 5175 (__mmask8)(U)); }) 5176 5177#define _mm256_mask_slli_epi32(W, U, A, B) __extension__ ({ \ 5178 (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \ 5179 (__v8si)(__m256i)(W), \ 5180 (__mmask8)(U)); }) 5181 5182#define _mm256_maskz_slli_epi32(U, A, B) __extension__ ({ \ 5183 (__m256i)__builtin_ia32_pslldi256_mask((__v8si)(__m256i)(A), (int)(B), \ 5184 (__v8si)_mm256_setzero_si256(), \ 5185 (__mmask8)(U)); }) 5186 5187static __inline__ __m128i __DEFAULT_FN_ATTRS 5188_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5189 __m128i __B) 5190{ 5191 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A, 5192 (__v2di) __B, 5193 (__v2di) __W, 5194 (__mmask8) __U); 5195} 5196 5197static __inline__ __m128i __DEFAULT_FN_ATTRS 5198_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5199{ 5200 return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A, 5201 (__v2di) __B, 5202 (__v2di) 5203 _mm_setzero_di (), 5204 (__mmask8) __U); 5205} 5206 5207static __inline__ __m256i __DEFAULT_FN_ATTRS 5208_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5209 __m128i __B) 5210{ 5211 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A, 5212 (__v2di) __B, 5213 (__v4di) __W, 5214 (__mmask8) __U); 5215} 5216 5217static __inline__ __m256i __DEFAULT_FN_ATTRS 5218_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 5219{ 5220 return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A, 5221 (__v2di) __B, 5222 (__v4di) 5223 _mm256_setzero_si256 (), 5224 (__mmask8) __U); 5225} 5226 5227#define _mm_mask_slli_epi64(W, U, A, B) __extension__ ({ \ 5228 (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \ 5229 (__v2di)(__m128i)(W), \ 5230 (__mmask8)(U)); }) 5231 5232#define _mm_maskz_slli_epi64(U, A, B) __extension__ ({ \ 5233 (__m128i)__builtin_ia32_psllqi128_mask((__v2di)(__m128i)(A), (int)(B), \ 5234 (__v2di)_mm_setzero_di(), \ 5235 (__mmask8)(U)); }) 5236 5237#define _mm256_mask_slli_epi64(W, U, A, B) __extension__ ({ \ 5238 (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \ 5239 (__v4di)(__m256i)(W), \ 5240 (__mmask8)(U)); }) 5241 5242#define _mm256_maskz_slli_epi64(U, A, B) __extension__ ({ \ 5243 (__m256i)__builtin_ia32_psllqi256_mask((__v4di)(__m256i)(A), (int)(B), \ 5244 (__v4di)_mm256_setzero_si256(), \ 5245 (__mmask8)(U)); }) 5246 5247 5248static __inline__ __m128i __DEFAULT_FN_ATTRS 5249_mm_rorv_epi32 (__m128i __A, __m128i __B) 5250{ 5251 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5252 (__v4si) __B, 5253 (__v4si) 5254 _mm_setzero_si128 (), 5255 (__mmask8) -1); 5256} 5257 5258static __inline__ __m128i __DEFAULT_FN_ATTRS 5259_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5260 __m128i __B) 5261{ 5262 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5263 (__v4si) __B, 5264 (__v4si) __W, 5265 (__mmask8) __U); 5266} 5267 5268static __inline__ __m128i __DEFAULT_FN_ATTRS 5269_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5270{ 5271 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, 5272 (__v4si) __B, 5273 (__v4si) 5274 _mm_setzero_si128 (), 5275 (__mmask8) __U); 5276} 5277 5278static __inline__ __m256i __DEFAULT_FN_ATTRS 5279_mm256_rorv_epi32 (__m256i __A, __m256i __B) 5280{ 5281 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5282 (__v8si) __B, 5283 (__v8si) 5284 _mm256_setzero_si256 (), 5285 (__mmask8) -1); 5286} 5287 5288static __inline__ __m256i __DEFAULT_FN_ATTRS 5289_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5290 __m256i __B) 5291{ 5292 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5293 (__v8si) __B, 5294 (__v8si) __W, 5295 (__mmask8) __U); 5296} 5297 5298static __inline__ __m256i __DEFAULT_FN_ATTRS 5299_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 5300{ 5301 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, 5302 (__v8si) __B, 5303 (__v8si) 5304 _mm256_setzero_si256 (), 5305 (__mmask8) __U); 5306} 5307 5308static __inline__ __m128i __DEFAULT_FN_ATTRS 5309_mm_rorv_epi64 (__m128i __A, __m128i __B) 5310{ 5311 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5312 (__v2di) __B, 5313 (__v2di) 5314 _mm_setzero_di (), 5315 (__mmask8) -1); 5316} 5317 5318static __inline__ __m128i __DEFAULT_FN_ATTRS 5319_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5320 __m128i __B) 5321{ 5322 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5323 (__v2di) __B, 5324 (__v2di) __W, 5325 (__mmask8) __U); 5326} 5327 5328static __inline__ __m128i __DEFAULT_FN_ATTRS 5329_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5330{ 5331 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, 5332 (__v2di) __B, 5333 (__v2di) 5334 _mm_setzero_di (), 5335 (__mmask8) __U); 5336} 5337 5338static __inline__ __m256i __DEFAULT_FN_ATTRS 5339_mm256_rorv_epi64 (__m256i __A, __m256i __B) 5340{ 5341 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5342 (__v4di) __B, 5343 (__v4di) 5344 _mm256_setzero_si256 (), 5345 (__mmask8) -1); 5346} 5347 5348static __inline__ __m256i __DEFAULT_FN_ATTRS 5349_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5350 __m256i __B) 5351{ 5352 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5353 (__v4di) __B, 5354 (__v4di) __W, 5355 (__mmask8) __U); 5356} 5357 5358static __inline__ __m256i __DEFAULT_FN_ATTRS 5359_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 5360{ 5361 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, 5362 (__v4di) __B, 5363 (__v4di) 5364 _mm256_setzero_si256 (), 5365 (__mmask8) __U); 5366} 5367 5368static __inline__ __m128i __DEFAULT_FN_ATTRS 5369_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 5370 __m128i __Y) 5371{ 5372 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X, 5373 (__v2di) __Y, 5374 (__v2di) __W, 5375 (__mmask8) __U); 5376} 5377 5378static __inline__ __m128i __DEFAULT_FN_ATTRS 5379_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 5380{ 5381 return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X, 5382 (__v2di) __Y, 5383 (__v2di) 5384 _mm_setzero_di (), 5385 (__mmask8) __U); 5386} 5387 5388static __inline__ __m256i __DEFAULT_FN_ATTRS 5389_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 5390 __m256i __Y) 5391{ 5392 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X, 5393 (__v4di) __Y, 5394 (__v4di) __W, 5395 (__mmask8) __U); 5396} 5397 5398static __inline__ __m256i __DEFAULT_FN_ATTRS 5399_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5400{ 5401 return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X, 5402 (__v4di) __Y, 5403 (__v4di) 5404 _mm256_setzero_si256 (), 5405 (__mmask8) __U); 5406} 5407 5408static __inline__ __m128i __DEFAULT_FN_ATTRS 5409_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X, 5410 __m128i __Y) 5411{ 5412 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X, 5413 (__v4si) __Y, 5414 (__v4si) __W, 5415 (__mmask8) __U); 5416} 5417 5418static __inline__ __m128i __DEFAULT_FN_ATTRS 5419_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y) 5420{ 5421 return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X, 5422 (__v4si) __Y, 5423 (__v4si) 5424 _mm_setzero_si128 (), 5425 (__mmask8) __U); 5426} 5427 5428static __inline__ __m256i __DEFAULT_FN_ATTRS 5429_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X, 5430 __m256i __Y) 5431{ 5432 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X, 5433 (__v8si) __Y, 5434 (__v8si) __W, 5435 (__mmask8) __U); 5436} 5437 5438static __inline__ __m256i __DEFAULT_FN_ATTRS 5439_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y) 5440{ 5441 return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X, 5442 (__v8si) __Y, 5443 (__v8si) 5444 _mm256_setzero_si256 (), 5445 (__mmask8) __U); 5446} 5447 5448 5449 5450static __inline__ __m128i __DEFAULT_FN_ATTRS 5451_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 5452 __m128i __Y) 5453{ 5454 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X, 5455 (__v2di) __Y, 5456 (__v2di) __W, 5457 (__mmask8) __U); 5458} 5459 5460static __inline__ __m128i __DEFAULT_FN_ATTRS 5461_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 5462{ 5463 return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X, 5464 (__v2di) __Y, 5465 (__v2di) 5466 _mm_setzero_di (), 5467 (__mmask8) __U); 5468} 5469 5470static __inline__ __m256i __DEFAULT_FN_ATTRS 5471_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 5472 __m256i __Y) 5473{ 5474 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X, 5475 (__v4di) __Y, 5476 (__v4di) __W, 5477 (__mmask8) __U); 5478} 5479 5480static __inline__ __m256i __DEFAULT_FN_ATTRS 5481_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5482{ 5483 return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X, 5484 (__v4di) __Y, 5485 (__v4di) 5486 _mm256_setzero_si256 (), 5487 (__mmask8) __U); 5488} 5489 5490static __inline__ __m128i __DEFAULT_FN_ATTRS 5491_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X, 5492 __m128i __Y) 5493{ 5494 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X, 5495 (__v4si) __Y, 5496 (__v4si) __W, 5497 (__mmask8) __U); 5498} 5499 5500static __inline__ __m128i __DEFAULT_FN_ATTRS 5501_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y) 5502{ 5503 return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X, 5504 (__v4si) __Y, 5505 (__v4si) 5506 _mm_setzero_si128 (), 5507 (__mmask8) __U); 5508} 5509 5510static __inline__ __m256i __DEFAULT_FN_ATTRS 5511_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X, 5512 __m256i __Y) 5513{ 5514 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X, 5515 (__v8si) __Y, 5516 (__v8si) __W, 5517 (__mmask8) __U); 5518} 5519 5520static __inline__ __m256i __DEFAULT_FN_ATTRS 5521_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y) 5522{ 5523 return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X, 5524 (__v8si) __Y, 5525 (__v8si) 5526 _mm256_setzero_si256 (), 5527 (__mmask8) __U); 5528} 5529 5530 5531 5532static __inline__ __m128i __DEFAULT_FN_ATTRS 5533_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 5534 __m128i __B) 5535{ 5536 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A, 5537 (__v4si) __B, 5538 (__v4si) __W, 5539 (__mmask8) __U); 5540} 5541 5542static __inline__ __m128i __DEFAULT_FN_ATTRS 5543_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 5544{ 5545 return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A, 5546 (__v4si) __B, 5547 (__v4si) 5548 _mm_setzero_si128 (), 5549 (__mmask8) __U); 5550} 5551 5552static __inline__ __m256i __DEFAULT_FN_ATTRS 5553_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 5554 __m128i __B) 5555{ 5556 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A, 5557 (__v4si) __B, 5558 (__v8si) __W, 5559 (__mmask8) __U); 5560} 5561 5562static __inline__ __m256i __DEFAULT_FN_ATTRS 5563_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B) 5564{ 5565 return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A, 5566 (__v4si) __B, 5567 (__v8si) 5568 _mm256_setzero_si256 (), 5569 (__mmask8) __U); 5570} 5571 5572#define _mm_mask_srli_epi32(W, U, A, imm) __extension__ ({ \ 5573 (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \ 5574 (__v4si)(__m128i)(W), \ 5575 (__mmask8)(U)); }) 5576 5577#define _mm_maskz_srli_epi32(U, A, imm) __extension__ ({ \ 5578 (__m128i)__builtin_ia32_psrldi128_mask((__v4si)(__m128i)(A), (int)(imm), \ 5579 (__v4si)_mm_setzero_si128(), \ 5580 (__mmask8)(U)); }) 5581 5582#define _mm256_mask_srli_epi32(W, U, A, imm) __extension__ ({ \ 5583 (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \ 5584 (__v8si)(__m256i)(W), \ 5585 (__mmask8)(U)); }) 5586 5587#define _mm256_maskz_srli_epi32(U, A, imm) __extension__ ({ \ 5588 (__m256i)__builtin_ia32_psrldi256_mask((__v8si)(__m256i)(A), (int)(imm), \ 5589 (__v8si)_mm256_setzero_si256(), \ 5590 (__mmask8)(U)); }) 5591 5592static __inline__ __m128i __DEFAULT_FN_ATTRS 5593_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 5594 __m128i __B) 5595{ 5596 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A, 5597 (__v2di) __B, 5598 (__v2di) __W, 5599 (__mmask8) __U); 5600} 5601 5602static __inline__ __m128i __DEFAULT_FN_ATTRS 5603_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 5604{ 5605 return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A, 5606 (__v2di) __B, 5607 (__v2di) 5608 _mm_setzero_di (), 5609 (__mmask8) __U); 5610} 5611 5612static __inline__ __m256i __DEFAULT_FN_ATTRS 5613_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 5614 __m128i __B) 5615{ 5616 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A, 5617 (__v2di) __B, 5618 (__v4di) __W, 5619 (__mmask8) __U); 5620} 5621 5622static __inline__ __m256i __DEFAULT_FN_ATTRS 5623_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 5624{ 5625 return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A, 5626 (__v2di) __B, 5627 (__v4di) 5628 _mm256_setzero_si256 (), 5629 (__mmask8) __U); 5630} 5631 5632#define _mm_mask_srli_epi64(W, U, A, imm) __extension__ ({ \ 5633 (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 5634 (__v2di)(__m128i)(W), \ 5635 (__mmask8)(U)); }) 5636 5637#define _mm_maskz_srli_epi64(U, A, imm) __extension__ ({ \ 5638 (__m128i)__builtin_ia32_psrlqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 5639 (__v2di)_mm_setzero_si128(), \ 5640 (__mmask8)(U)); }) 5641 5642#define _mm256_mask_srli_epi64(W, U, A, imm) __extension__ ({ \ 5643 (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 5644 (__v4di)(__m256i)(W), \ 5645 (__mmask8)(U)); }) 5646 5647#define _mm256_maskz_srli_epi64(U, A, imm) __extension__ ({ \ 5648 (__m256i)__builtin_ia32_psrlqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 5649 (__v4di)_mm256_setzero_si256(), \ 5650 (__mmask8)(U)); }) 5651 5652static __inline__ __m128i __DEFAULT_FN_ATTRS 5653_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X, 5654 __m128i __Y) 5655{ 5656 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X, 5657 (__v4si) __Y, 5658 (__v4si) __W, 5659 (__mmask8) __U); 5660} 5661 5662static __inline__ __m128i __DEFAULT_FN_ATTRS 5663_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y) 5664{ 5665 return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X, 5666 (__v4si) __Y, 5667 (__v4si) 5668 _mm_setzero_si128 (), 5669 (__mmask8) __U); 5670} 5671 5672static __inline__ __m256i __DEFAULT_FN_ATTRS 5673_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X, 5674 __m256i __Y) 5675{ 5676 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X, 5677 (__v8si) __Y, 5678 (__v8si) __W, 5679 (__mmask8) __U); 5680} 5681 5682static __inline__ __m256i __DEFAULT_FN_ATTRS 5683_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y) 5684{ 5685 return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X, 5686 (__v8si) __Y, 5687 (__v8si) 5688 _mm256_setzero_si256 (), 5689 (__mmask8) __U); 5690} 5691 5692static __inline__ __m128i __DEFAULT_FN_ATTRS 5693_mm_srav_epi64 (__m128i __X, __m128i __Y) 5694{ 5695 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 5696 (__v2di) __Y, 5697 (__v2di) 5698 _mm_setzero_di (), 5699 (__mmask8) -1); 5700} 5701 5702static __inline__ __m128i __DEFAULT_FN_ATTRS 5703_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X, 5704 __m128i __Y) 5705{ 5706 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 5707 (__v2di) __Y, 5708 (__v2di) __W, 5709 (__mmask8) __U); 5710} 5711 5712static __inline__ __m128i __DEFAULT_FN_ATTRS 5713_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y) 5714{ 5715 return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X, 5716 (__v2di) __Y, 5717 (__v2di) 5718 _mm_setzero_di (), 5719 (__mmask8) __U); 5720} 5721 5722static __inline__ __m256i __DEFAULT_FN_ATTRS 5723_mm256_srav_epi64 (__m256i __X, __m256i __Y) 5724{ 5725 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 5726 (__v4di) __Y, 5727 (__v4di) 5728 _mm256_setzero_si256 (), 5729 (__mmask8) -1); 5730} 5731 5732static __inline__ __m256i __DEFAULT_FN_ATTRS 5733_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X, 5734 __m256i __Y) 5735{ 5736 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 5737 (__v4di) __Y, 5738 (__v4di) __W, 5739 (__mmask8) __U); 5740} 5741 5742static __inline__ __m256i __DEFAULT_FN_ATTRS 5743_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5744{ 5745 return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X, 5746 (__v4di) __Y, 5747 (__v4di) 5748 _mm256_setzero_si256 (), 5749 (__mmask8) __U); 5750} 5751 5752static __inline__ __m128i __DEFAULT_FN_ATTRS 5753_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5754{ 5755 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5756 (__v4si) __A, 5757 (__v4si) __W); 5758} 5759 5760static __inline__ __m128i __DEFAULT_FN_ATTRS 5761_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5762{ 5763 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5764 (__v4si) __A, 5765 (__v4si) _mm_setzero_si128 ()); 5766} 5767 5768 5769static __inline__ __m256i __DEFAULT_FN_ATTRS 5770_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5771{ 5772 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5773 (__v8si) __A, 5774 (__v8si) __W); 5775} 5776 5777static __inline__ __m256i __DEFAULT_FN_ATTRS 5778_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5779{ 5780 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5781 (__v8si) __A, 5782 (__v8si) _mm256_setzero_si256 ()); 5783} 5784 5785static __inline__ __m128i __DEFAULT_FN_ATTRS 5786_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5787{ 5788 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5789 (__v4si) __W, 5790 (__mmask8) 5791 __U); 5792} 5793 5794static __inline__ __m128i __DEFAULT_FN_ATTRS 5795_mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5796{ 5797 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5798 (__v4si) 5799 _mm_setzero_si128 (), 5800 (__mmask8) 5801 __U); 5802} 5803 5804static __inline__ __m256i __DEFAULT_FN_ATTRS 5805_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5806{ 5807 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5808 (__v8si) __W, 5809 (__mmask8) 5810 __U); 5811} 5812 5813static __inline__ __m256i __DEFAULT_FN_ATTRS 5814_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5815{ 5816 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5817 (__v8si) 5818 _mm256_setzero_si256 (), 5819 (__mmask8) 5820 __U); 5821} 5822 5823static __inline__ void __DEFAULT_FN_ATTRS 5824_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5825{ 5826 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5827 (__v4si) __A, 5828 (__mmask8) __U); 5829} 5830 5831static __inline__ void __DEFAULT_FN_ATTRS 5832_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5833{ 5834 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5835 (__v8si) __A, 5836 (__mmask8) __U); 5837} 5838 5839static __inline__ __m128i __DEFAULT_FN_ATTRS 5840_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5841{ 5842 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5843 (__v2di) __A, 5844 (__v2di) __W); 5845} 5846 5847static __inline__ __m128i __DEFAULT_FN_ATTRS 5848_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5849{ 5850 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5851 (__v2di) __A, 5852 (__v2di) _mm_setzero_di ()); 5853} 5854 5855static __inline__ __m256i __DEFAULT_FN_ATTRS 5856_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5857{ 5858 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5859 (__v4di) __A, 5860 (__v4di) __W); 5861} 5862 5863static __inline__ __m256i __DEFAULT_FN_ATTRS 5864_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5865{ 5866 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5867 (__v4di) __A, 5868 (__v4di) _mm256_setzero_si256 ()); 5869} 5870 5871static __inline__ __m128i __DEFAULT_FN_ATTRS 5872_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5873{ 5874 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5875 (__v2di) __W, 5876 (__mmask8) 5877 __U); 5878} 5879 5880static __inline__ __m128i __DEFAULT_FN_ATTRS 5881_mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5882{ 5883 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5884 (__v2di) 5885 _mm_setzero_di (), 5886 (__mmask8) 5887 __U); 5888} 5889 5890static __inline__ __m256i __DEFAULT_FN_ATTRS 5891_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5892{ 5893 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5894 (__v4di) __W, 5895 (__mmask8) 5896 __U); 5897} 5898 5899static __inline__ __m256i __DEFAULT_FN_ATTRS 5900_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5901{ 5902 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5903 (__v4di) 5904 _mm256_setzero_si256 (), 5905 (__mmask8) 5906 __U); 5907} 5908 5909static __inline__ void __DEFAULT_FN_ATTRS 5910_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5911{ 5912 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5913 (__v2di) __A, 5914 (__mmask8) __U); 5915} 5916 5917static __inline__ void __DEFAULT_FN_ATTRS 5918_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5919{ 5920 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5921 (__v4di) __A, 5922 (__mmask8) __U); 5923} 5924 5925static __inline__ __m128d __DEFAULT_FN_ATTRS 5926_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5927{ 5928 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5929 (__v2df)_mm_movedup_pd(__A), 5930 (__v2df)__W); 5931} 5932 5933static __inline__ __m128d __DEFAULT_FN_ATTRS 5934_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5935{ 5936 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5937 (__v2df)_mm_movedup_pd(__A), 5938 (__v2df)_mm_setzero_pd()); 5939} 5940 5941static __inline__ __m256d __DEFAULT_FN_ATTRS 5942_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5943{ 5944 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5945 (__v4df)_mm256_movedup_pd(__A), 5946 (__v4df)__W); 5947} 5948 5949static __inline__ __m256d __DEFAULT_FN_ATTRS 5950_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5951{ 5952 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5953 (__v4df)_mm256_movedup_pd(__A), 5954 (__v4df)_mm256_setzero_pd()); 5955} 5956 5957 5958#define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \ 5959 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ 5960 (__v4si)(__m128i)(O), \ 5961 (__mmask8)(M)); }) 5962 5963#define _mm_maskz_set1_epi32(M, A) __extension__ ({ \ 5964 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \ 5965 (__v4si)_mm_setzero_si128(), \ 5966 (__mmask8)(M)); }) 5967 5968#define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \ 5969 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ 5970 (__v8si)(__m256i)(O), \ 5971 (__mmask8)(M)); }) 5972 5973#define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \ 5974 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \ 5975 (__v8si)_mm256_setzero_si256(), \ 5976 (__mmask8)(M)); }) 5977 5978static __inline__ __m128i __DEFAULT_FN_ATTRS 5979_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5980{ 5981 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O, 5982 __M); 5983} 5984 5985static __inline__ __m128i __DEFAULT_FN_ATTRS 5986_mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5987{ 5988 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, 5989 (__v2di) 5990 _mm_setzero_si128 (), 5991 __M); 5992} 5993 5994static __inline__ __m256i __DEFAULT_FN_ATTRS 5995_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5996{ 5997 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O, 5998 __M); 5999} 6000 6001static __inline__ __m256i __DEFAULT_FN_ATTRS 6002_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 6003{ 6004 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, 6005 (__v4di) 6006 _mm256_setzero_si256 (), 6007 __M); 6008} 6009 6010#define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 6011 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 6012 (__v2df)(__m128d)(B), \ 6013 (__v2di)(__m128i)(C), (int)(imm), \ 6014 (__mmask8)-1); }) 6015 6016#define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 6017 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 6018 (__v2df)(__m128d)(B), \ 6019 (__v2di)(__m128i)(C), (int)(imm), \ 6020 (__mmask8)(U)); }) 6021 6022#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 6023 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 6024 (__v2df)(__m128d)(B), \ 6025 (__v2di)(__m128i)(C), \ 6026 (int)(imm), (__mmask8)(U)); }) 6027 6028#define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \ 6029 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 6030 (__v4df)(__m256d)(B), \ 6031 (__v4di)(__m256i)(C), (int)(imm), \ 6032 (__mmask8)-1); }) 6033 6034#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \ 6035 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 6036 (__v4df)(__m256d)(B), \ 6037 (__v4di)(__m256i)(C), (int)(imm), \ 6038 (__mmask8)(U)); }) 6039 6040#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \ 6041 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 6042 (__v4df)(__m256d)(B), \ 6043 (__v4di)(__m256i)(C), \ 6044 (int)(imm), (__mmask8)(U)); }) 6045 6046#define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 6047 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 6048 (__v4sf)(__m128)(B), \ 6049 (__v4si)(__m128i)(C), (int)(imm), \ 6050 (__mmask8)-1); }) 6051 6052#define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 6053 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 6054 (__v4sf)(__m128)(B), \ 6055 (__v4si)(__m128i)(C), (int)(imm), \ 6056 (__mmask8)(U)); }) 6057 6058#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 6059 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 6060 (__v4sf)(__m128)(B), \ 6061 (__v4si)(__m128i)(C), (int)(imm), \ 6062 (__mmask8)(U)); }) 6063 6064#define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \ 6065 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 6066 (__v8sf)(__m256)(B), \ 6067 (__v8si)(__m256i)(C), (int)(imm), \ 6068 (__mmask8)-1); }) 6069 6070#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \ 6071 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 6072 (__v8sf)(__m256)(B), \ 6073 (__v8si)(__m256i)(C), (int)(imm), \ 6074 (__mmask8)(U)); }) 6075 6076#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \ 6077 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 6078 (__v8sf)(__m256)(B), \ 6079 (__v8si)(__m256i)(C), (int)(imm), \ 6080 (__mmask8)(U)); }) 6081 6082static __inline__ __m128d __DEFAULT_FN_ATTRS 6083_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 6084{ 6085 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 6086 (__v2df) __W, 6087 (__mmask8) __U); 6088} 6089 6090static __inline__ __m128d __DEFAULT_FN_ATTRS 6091_mm_maskz_load_pd (__mmask8 __U, void const *__P) 6092{ 6093 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 6094 (__v2df) 6095 _mm_setzero_pd (), 6096 (__mmask8) __U); 6097} 6098 6099static __inline__ __m256d __DEFAULT_FN_ATTRS 6100_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 6101{ 6102 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 6103 (__v4df) __W, 6104 (__mmask8) __U); 6105} 6106 6107static __inline__ __m256d __DEFAULT_FN_ATTRS 6108_mm256_maskz_load_pd (__mmask8 __U, void const *__P) 6109{ 6110 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 6111 (__v4df) 6112 _mm256_setzero_pd (), 6113 (__mmask8) __U); 6114} 6115 6116static __inline__ __m128 __DEFAULT_FN_ATTRS 6117_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 6118{ 6119 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 6120 (__v4sf) __W, 6121 (__mmask8) __U); 6122} 6123 6124static __inline__ __m128 __DEFAULT_FN_ATTRS 6125_mm_maskz_load_ps (__mmask8 __U, void const *__P) 6126{ 6127 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 6128 (__v4sf) 6129 _mm_setzero_ps (), 6130 (__mmask8) __U); 6131} 6132 6133static __inline__ __m256 __DEFAULT_FN_ATTRS 6134_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 6135{ 6136 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 6137 (__v8sf) __W, 6138 (__mmask8) __U); 6139} 6140 6141static __inline__ __m256 __DEFAULT_FN_ATTRS 6142_mm256_maskz_load_ps (__mmask8 __U, void const *__P) 6143{ 6144 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 6145 (__v8sf) 6146 _mm256_setzero_ps (), 6147 (__mmask8) __U); 6148} 6149 6150static __inline__ __m128i __DEFAULT_FN_ATTRS 6151_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 6152{ 6153 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 6154 (__v2di) __W, 6155 (__mmask8) __U); 6156} 6157 6158static __inline__ __m128i __DEFAULT_FN_ATTRS 6159_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 6160{ 6161 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 6162 (__v2di) 6163 _mm_setzero_si128 (), 6164 (__mmask8) __U); 6165} 6166 6167static __inline__ __m256i __DEFAULT_FN_ATTRS 6168_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 6169{ 6170 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 6171 (__v4di) __W, 6172 (__mmask8) __U); 6173} 6174 6175static __inline__ __m256i __DEFAULT_FN_ATTRS 6176_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 6177{ 6178 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 6179 (__v4di) 6180 _mm256_setzero_si256 (), 6181 (__mmask8) __U); 6182} 6183 6184static __inline__ __m128i __DEFAULT_FN_ATTRS 6185_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 6186{ 6187 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 6188 (__v4si) __W, 6189 (__mmask8) __U); 6190} 6191 6192static __inline__ __m128i __DEFAULT_FN_ATTRS 6193_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 6194{ 6195 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 6196 (__v4si) 6197 _mm_setzero_si128 (), 6198 (__mmask8) __U); 6199} 6200 6201static __inline__ __m256i __DEFAULT_FN_ATTRS 6202_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 6203{ 6204 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 6205 (__v8si) __W, 6206 (__mmask8) __U); 6207} 6208 6209static __inline__ __m256i __DEFAULT_FN_ATTRS 6210_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 6211{ 6212 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 6213 (__v8si) 6214 _mm256_setzero_si256 (), 6215 (__mmask8) __U); 6216} 6217 6218static __inline__ __m128d __DEFAULT_FN_ATTRS 6219_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 6220{ 6221 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6222 (__v2df) __W, 6223 (__mmask8) __U); 6224} 6225 6226static __inline__ __m128d __DEFAULT_FN_ATTRS 6227_mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 6228{ 6229 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 6230 (__v2df) 6231 _mm_setzero_pd (), 6232 (__mmask8) __U); 6233} 6234 6235static __inline__ __m256d __DEFAULT_FN_ATTRS 6236_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 6237{ 6238 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6239 (__v4df) __W, 6240 (__mmask8) __U); 6241} 6242 6243static __inline__ __m256d __DEFAULT_FN_ATTRS 6244_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 6245{ 6246 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 6247 (__v4df) 6248 _mm256_setzero_pd (), 6249 (__mmask8) __U); 6250} 6251 6252static __inline__ __m128 __DEFAULT_FN_ATTRS 6253_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 6254{ 6255 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6256 (__v4sf) __W, 6257 (__mmask8) __U); 6258} 6259 6260static __inline__ __m128 __DEFAULT_FN_ATTRS 6261_mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 6262{ 6263 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 6264 (__v4sf) 6265 _mm_setzero_ps (), 6266 (__mmask8) __U); 6267} 6268 6269static __inline__ __m256 __DEFAULT_FN_ATTRS 6270_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 6271{ 6272 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6273 (__v8sf) __W, 6274 (__mmask8) __U); 6275} 6276 6277static __inline__ __m256 __DEFAULT_FN_ATTRS 6278_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 6279{ 6280 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 6281 (__v8sf) 6282 _mm256_setzero_ps (), 6283 (__mmask8) __U); 6284} 6285 6286static __inline__ void __DEFAULT_FN_ATTRS 6287_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 6288{ 6289 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 6290 (__v2df) __A, 6291 (__mmask8) __U); 6292} 6293 6294static __inline__ void __DEFAULT_FN_ATTRS 6295_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 6296{ 6297 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 6298 (__v4df) __A, 6299 (__mmask8) __U); 6300} 6301 6302static __inline__ void __DEFAULT_FN_ATTRS 6303_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 6304{ 6305 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 6306 (__v4sf) __A, 6307 (__mmask8) __U); 6308} 6309 6310static __inline__ void __DEFAULT_FN_ATTRS 6311_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 6312{ 6313 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 6314 (__v8sf) __A, 6315 (__mmask8) __U); 6316} 6317 6318static __inline__ void __DEFAULT_FN_ATTRS 6319_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 6320{ 6321 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 6322 (__v2di) __A, 6323 (__mmask8) __U); 6324} 6325 6326static __inline__ void __DEFAULT_FN_ATTRS 6327_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 6328{ 6329 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 6330 (__v4di) __A, 6331 (__mmask8) __U); 6332} 6333 6334static __inline__ void __DEFAULT_FN_ATTRS 6335_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 6336{ 6337 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 6338 (__v4si) __A, 6339 (__mmask8) __U); 6340} 6341 6342static __inline__ void __DEFAULT_FN_ATTRS 6343_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 6344{ 6345 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 6346 (__v8si) __A, 6347 (__mmask8) __U); 6348} 6349 6350static __inline__ void __DEFAULT_FN_ATTRS 6351_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 6352{ 6353 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 6354 (__v2df) __A, 6355 (__mmask8) __U); 6356} 6357 6358static __inline__ void __DEFAULT_FN_ATTRS 6359_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 6360{ 6361 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 6362 (__v4df) __A, 6363 (__mmask8) __U); 6364} 6365 6366static __inline__ void __DEFAULT_FN_ATTRS 6367_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 6368{ 6369 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 6370 (__v4sf) __A, 6371 (__mmask8) __U); 6372} 6373 6374static __inline__ void __DEFAULT_FN_ATTRS 6375_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 6376{ 6377 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 6378 (__v8sf) __A, 6379 (__mmask8) __U); 6380} 6381 6382 6383static __inline__ __m128d __DEFAULT_FN_ATTRS 6384_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6385{ 6386 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6387 (__v2df)_mm_unpackhi_pd(__A, __B), 6388 (__v2df)__W); 6389} 6390 6391static __inline__ __m128d __DEFAULT_FN_ATTRS 6392_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 6393{ 6394 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6395 (__v2df)_mm_unpackhi_pd(__A, __B), 6396 (__v2df)_mm_setzero_pd()); 6397} 6398 6399static __inline__ __m256d __DEFAULT_FN_ATTRS 6400_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6401{ 6402 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6403 (__v4df)_mm256_unpackhi_pd(__A, __B), 6404 (__v4df)__W); 6405} 6406 6407static __inline__ __m256d __DEFAULT_FN_ATTRS 6408_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 6409{ 6410 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6411 (__v4df)_mm256_unpackhi_pd(__A, __B), 6412 (__v4df)_mm256_setzero_pd()); 6413} 6414 6415static __inline__ __m128 __DEFAULT_FN_ATTRS 6416_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6417{ 6418 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6419 (__v4sf)_mm_unpackhi_ps(__A, __B), 6420 (__v4sf)__W); 6421} 6422 6423static __inline__ __m128 __DEFAULT_FN_ATTRS 6424_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 6425{ 6426 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6427 (__v4sf)_mm_unpackhi_ps(__A, __B), 6428 (__v4sf)_mm_setzero_ps()); 6429} 6430 6431static __inline__ __m256 __DEFAULT_FN_ATTRS 6432_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6433{ 6434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6435 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6436 (__v8sf)__W); 6437} 6438 6439static __inline__ __m256 __DEFAULT_FN_ATTRS 6440_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 6441{ 6442 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6443 (__v8sf)_mm256_unpackhi_ps(__A, __B), 6444 (__v8sf)_mm256_setzero_ps()); 6445} 6446 6447static __inline__ __m128d __DEFAULT_FN_ATTRS 6448_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 6449{ 6450 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6451 (__v2df)_mm_unpacklo_pd(__A, __B), 6452 (__v2df)__W); 6453} 6454 6455static __inline__ __m128d __DEFAULT_FN_ATTRS 6456_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 6457{ 6458 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6459 (__v2df)_mm_unpacklo_pd(__A, __B), 6460 (__v2df)_mm_setzero_pd()); 6461} 6462 6463static __inline__ __m256d __DEFAULT_FN_ATTRS 6464_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 6465{ 6466 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6467 (__v4df)_mm256_unpacklo_pd(__A, __B), 6468 (__v4df)__W); 6469} 6470 6471static __inline__ __m256d __DEFAULT_FN_ATTRS 6472_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 6473{ 6474 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6475 (__v4df)_mm256_unpacklo_pd(__A, __B), 6476 (__v4df)_mm256_setzero_pd()); 6477} 6478 6479static __inline__ __m128 __DEFAULT_FN_ATTRS 6480_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 6481{ 6482 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6483 (__v4sf)_mm_unpacklo_ps(__A, __B), 6484 (__v4sf)__W); 6485} 6486 6487static __inline__ __m128 __DEFAULT_FN_ATTRS 6488_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 6489{ 6490 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6491 (__v4sf)_mm_unpacklo_ps(__A, __B), 6492 (__v4sf)_mm_setzero_ps()); 6493} 6494 6495static __inline__ __m256 __DEFAULT_FN_ATTRS 6496_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 6497{ 6498 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6499 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6500 (__v8sf)__W); 6501} 6502 6503static __inline__ __m256 __DEFAULT_FN_ATTRS 6504_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 6505{ 6506 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6507 (__v8sf)_mm256_unpacklo_ps(__A, __B), 6508 (__v8sf)_mm256_setzero_ps()); 6509} 6510 6511static __inline__ __m128d __DEFAULT_FN_ATTRS 6512_mm_rcp14_pd (__m128d __A) 6513{ 6514 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6515 (__v2df) 6516 _mm_setzero_pd (), 6517 (__mmask8) -1); 6518} 6519 6520static __inline__ __m128d __DEFAULT_FN_ATTRS 6521_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6522{ 6523 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6524 (__v2df) __W, 6525 (__mmask8) __U); 6526} 6527 6528static __inline__ __m128d __DEFAULT_FN_ATTRS 6529_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 6530{ 6531 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 6532 (__v2df) 6533 _mm_setzero_pd (), 6534 (__mmask8) __U); 6535} 6536 6537static __inline__ __m256d __DEFAULT_FN_ATTRS 6538_mm256_rcp14_pd (__m256d __A) 6539{ 6540 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6541 (__v4df) 6542 _mm256_setzero_pd (), 6543 (__mmask8) -1); 6544} 6545 6546static __inline__ __m256d __DEFAULT_FN_ATTRS 6547_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6548{ 6549 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6550 (__v4df) __W, 6551 (__mmask8) __U); 6552} 6553 6554static __inline__ __m256d __DEFAULT_FN_ATTRS 6555_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 6556{ 6557 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 6558 (__v4df) 6559 _mm256_setzero_pd (), 6560 (__mmask8) __U); 6561} 6562 6563static __inline__ __m128 __DEFAULT_FN_ATTRS 6564_mm_rcp14_ps (__m128 __A) 6565{ 6566 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6567 (__v4sf) 6568 _mm_setzero_ps (), 6569 (__mmask8) -1); 6570} 6571 6572static __inline__ __m128 __DEFAULT_FN_ATTRS 6573_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6574{ 6575 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6576 (__v4sf) __W, 6577 (__mmask8) __U); 6578} 6579 6580static __inline__ __m128 __DEFAULT_FN_ATTRS 6581_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6582{ 6583 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6584 (__v4sf) 6585 _mm_setzero_ps (), 6586 (__mmask8) __U); 6587} 6588 6589static __inline__ __m256 __DEFAULT_FN_ATTRS 6590_mm256_rcp14_ps (__m256 __A) 6591{ 6592 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6593 (__v8sf) 6594 _mm256_setzero_ps (), 6595 (__mmask8) -1); 6596} 6597 6598static __inline__ __m256 __DEFAULT_FN_ATTRS 6599_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6600{ 6601 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6602 (__v8sf) __W, 6603 (__mmask8) __U); 6604} 6605 6606static __inline__ __m256 __DEFAULT_FN_ATTRS 6607_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6608{ 6609 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6610 (__v8sf) 6611 _mm256_setzero_ps (), 6612 (__mmask8) __U); 6613} 6614 6615#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6616 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6617 (__v2df)_mm_permute_pd((X), (C)), \ 6618 (__v2df)(__m128d)(W)); }) 6619 6620#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \ 6621 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6622 (__v2df)_mm_permute_pd((X), (C)), \ 6623 (__v2df)_mm_setzero_pd()); }) 6624 6625#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \ 6626 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6627 (__v4df)_mm256_permute_pd((X), (C)), \ 6628 (__v4df)(__m256d)(W)); }) 6629 6630#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \ 6631 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6632 (__v4df)_mm256_permute_pd((X), (C)), \ 6633 (__v4df)_mm256_setzero_pd()); }) 6634 6635#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6636 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6637 (__v4sf)_mm_permute_ps((X), (C)), \ 6638 (__v4sf)(__m128)(W)); }) 6639 6640#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \ 6641 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6642 (__v4sf)_mm_permute_ps((X), (C)), \ 6643 (__v4sf)_mm_setzero_ps()); }) 6644 6645#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \ 6646 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6647 (__v8sf)_mm256_permute_ps((X), (C)), \ 6648 (__v8sf)(__m256)(W)); }) 6649 6650#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \ 6651 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6652 (__v8sf)_mm256_permute_ps((X), (C)), \ 6653 (__v8sf)_mm256_setzero_ps()); }) 6654 6655static __inline__ __m128d __DEFAULT_FN_ATTRS 6656_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A, 6657 __m128i __C) 6658{ 6659 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A, 6660 (__v2di) __C, 6661 (__v2df) __W, 6662 (__mmask8) __U); 6663} 6664 6665static __inline__ __m128d __DEFAULT_FN_ATTRS 6666_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C) 6667{ 6668 return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A, 6669 (__v2di) __C, 6670 (__v2df) 6671 _mm_setzero_pd (), 6672 (__mmask8) __U); 6673} 6674 6675static __inline__ __m256d __DEFAULT_FN_ATTRS 6676_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A, 6677 __m256i __C) 6678{ 6679 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A, 6680 (__v4di) __C, 6681 (__v4df) __W, 6682 (__mmask8) 6683 __U); 6684} 6685 6686static __inline__ __m256d __DEFAULT_FN_ATTRS 6687_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C) 6688{ 6689 return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A, 6690 (__v4di) __C, 6691 (__v4df) 6692 _mm256_setzero_pd (), 6693 (__mmask8) 6694 __U); 6695} 6696 6697static __inline__ __m128 __DEFAULT_FN_ATTRS 6698_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A, 6699 __m128i __C) 6700{ 6701 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A, 6702 (__v4si) __C, 6703 (__v4sf) __W, 6704 (__mmask8) __U); 6705} 6706 6707static __inline__ __m128 __DEFAULT_FN_ATTRS 6708_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C) 6709{ 6710 return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A, 6711 (__v4si) __C, 6712 (__v4sf) 6713 _mm_setzero_ps (), 6714 (__mmask8) __U); 6715} 6716 6717static __inline__ __m256 __DEFAULT_FN_ATTRS 6718_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A, 6719 __m256i __C) 6720{ 6721 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A, 6722 (__v8si) __C, 6723 (__v8sf) __W, 6724 (__mmask8) __U); 6725} 6726 6727static __inline__ __m256 __DEFAULT_FN_ATTRS 6728_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C) 6729{ 6730 return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A, 6731 (__v8si) __C, 6732 (__v8sf) 6733 _mm256_setzero_ps (), 6734 (__mmask8) __U); 6735} 6736 6737static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6738_mm_test_epi32_mask (__m128i __A, __m128i __B) 6739{ 6740 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6741 (__v4si) __B, 6742 (__mmask8) -1); 6743} 6744 6745static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6746_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6747{ 6748 return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A, 6749 (__v4si) __B, __U); 6750} 6751 6752static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6753_mm256_test_epi32_mask (__m256i __A, __m256i __B) 6754{ 6755 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6756 (__v8si) __B, 6757 (__mmask8) -1); 6758} 6759 6760static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6761_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6762{ 6763 return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A, 6764 (__v8si) __B, __U); 6765} 6766 6767static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6768_mm_test_epi64_mask (__m128i __A, __m128i __B) 6769{ 6770 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6771 (__v2di) __B, 6772 (__mmask8) -1); 6773} 6774 6775static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6776_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6777{ 6778 return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A, 6779 (__v2di) __B, __U); 6780} 6781 6782static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6783_mm256_test_epi64_mask (__m256i __A, __m256i __B) 6784{ 6785 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6786 (__v4di) __B, 6787 (__mmask8) -1); 6788} 6789 6790static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6791_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6792{ 6793 return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A, 6794 (__v4di) __B, __U); 6795} 6796 6797static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6798_mm_testn_epi32_mask (__m128i __A, __m128i __B) 6799{ 6800 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6801 (__v4si) __B, 6802 (__mmask8) -1); 6803} 6804 6805static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6806_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6807{ 6808 return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A, 6809 (__v4si) __B, __U); 6810} 6811 6812static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6813_mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6814{ 6815 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6816 (__v8si) __B, 6817 (__mmask8) -1); 6818} 6819 6820static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6821_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6822{ 6823 return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A, 6824 (__v8si) __B, __U); 6825} 6826 6827static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6828_mm_testn_epi64_mask (__m128i __A, __m128i __B) 6829{ 6830 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6831 (__v2di) __B, 6832 (__mmask8) -1); 6833} 6834 6835static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6836_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6837{ 6838 return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A, 6839 (__v2di) __B, __U); 6840} 6841 6842static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6843_mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6844{ 6845 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6846 (__v4di) __B, 6847 (__mmask8) -1); 6848} 6849 6850static __inline__ __mmask8 __DEFAULT_FN_ATTRS 6851_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6852{ 6853 return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A, 6854 (__v4di) __B, __U); 6855} 6856 6857 6858 6859static __inline__ __m128i __DEFAULT_FN_ATTRS 6860_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6861{ 6862 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6863 (__v4si)_mm_unpackhi_epi32(__A, __B), 6864 (__v4si)__W); 6865} 6866 6867static __inline__ __m128i __DEFAULT_FN_ATTRS 6868_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6869{ 6870 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6871 (__v4si)_mm_unpackhi_epi32(__A, __B), 6872 (__v4si)_mm_setzero_si128()); 6873} 6874 6875static __inline__ __m256i __DEFAULT_FN_ATTRS 6876_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6877{ 6878 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6879 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6880 (__v8si)__W); 6881} 6882 6883static __inline__ __m256i __DEFAULT_FN_ATTRS 6884_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6885{ 6886 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6887 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6888 (__v8si)_mm256_setzero_si256()); 6889} 6890 6891static __inline__ __m128i __DEFAULT_FN_ATTRS 6892_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6893{ 6894 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6895 (__v2di)_mm_unpackhi_epi64(__A, __B), 6896 (__v2di)__W); 6897} 6898 6899static __inline__ __m128i __DEFAULT_FN_ATTRS 6900_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6901{ 6902 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6903 (__v2di)_mm_unpackhi_epi64(__A, __B), 6904 (__v2di)_mm_setzero_di()); 6905} 6906 6907static __inline__ __m256i __DEFAULT_FN_ATTRS 6908_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6909{ 6910 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6911 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6912 (__v4di)__W); 6913} 6914 6915static __inline__ __m256i __DEFAULT_FN_ATTRS 6916_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6917{ 6918 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6919 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6920 (__v4di)_mm256_setzero_si256()); 6921} 6922 6923static __inline__ __m128i __DEFAULT_FN_ATTRS 6924_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6925{ 6926 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6927 (__v4si)_mm_unpacklo_epi32(__A, __B), 6928 (__v4si)__W); 6929} 6930 6931static __inline__ __m128i __DEFAULT_FN_ATTRS 6932_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6933{ 6934 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6935 (__v4si)_mm_unpacklo_epi32(__A, __B), 6936 (__v4si)_mm_setzero_si128()); 6937} 6938 6939static __inline__ __m256i __DEFAULT_FN_ATTRS 6940_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6941{ 6942 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6943 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6944 (__v8si)__W); 6945} 6946 6947static __inline__ __m256i __DEFAULT_FN_ATTRS 6948_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6949{ 6950 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6951 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6952 (__v8si)_mm256_setzero_si256()); 6953} 6954 6955static __inline__ __m128i __DEFAULT_FN_ATTRS 6956_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6957{ 6958 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6959 (__v2di)_mm_unpacklo_epi64(__A, __B), 6960 (__v2di)__W); 6961} 6962 6963static __inline__ __m128i __DEFAULT_FN_ATTRS 6964_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6965{ 6966 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6967 (__v2di)_mm_unpacklo_epi64(__A, __B), 6968 (__v2di)_mm_setzero_di()); 6969} 6970 6971static __inline__ __m256i __DEFAULT_FN_ATTRS 6972_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6973{ 6974 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6975 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6976 (__v4di)__W); 6977} 6978 6979static __inline__ __m256i __DEFAULT_FN_ATTRS 6980_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6981{ 6982 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6983 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6984 (__v4di)_mm256_setzero_si256()); 6985} 6986 6987static __inline__ __m128i __DEFAULT_FN_ATTRS 6988_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A, 6989 __m128i __B) 6990{ 6991 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A, 6992 (__v4si) __B, 6993 (__v4si) __W, 6994 (__mmask8) __U); 6995} 6996 6997static __inline__ __m128i __DEFAULT_FN_ATTRS 6998_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 6999{ 7000 return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A, 7001 (__v4si) __B, 7002 (__v4si) 7003 _mm_setzero_si128 (), 7004 (__mmask8) __U); 7005} 7006 7007static __inline__ __m256i __DEFAULT_FN_ATTRS 7008_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A, 7009 __m128i __B) 7010{ 7011 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A, 7012 (__v4si) __B, 7013 (__v8si) __W, 7014 (__mmask8) __U); 7015} 7016 7017static __inline__ __m256i __DEFAULT_FN_ATTRS 7018_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B) 7019{ 7020 return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A, 7021 (__v4si) __B, 7022 (__v8si) 7023 _mm256_setzero_si256 (), 7024 (__mmask8) __U); 7025} 7026 7027#define _mm_mask_srai_epi32(W, U, A, imm) __extension__ ({ \ 7028 (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \ 7029 (__v4si)(__m128i)(W), \ 7030 (__mmask8)(U)); }) 7031 7032#define _mm_maskz_srai_epi32(U, A, imm) __extension__ ({ \ 7033 (__m128i)__builtin_ia32_psradi128_mask((__v4si)(__m128i)(A), (int)(imm), \ 7034 (__v4si)_mm_setzero_si128(), \ 7035 (__mmask8)(U)); }) 7036 7037#define _mm256_mask_srai_epi32(W, U, A, imm) __extension__ ({ \ 7038 (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \ 7039 (__v8si)(__m256i)(W), \ 7040 (__mmask8)(U)); }) 7041 7042#define _mm256_maskz_srai_epi32(U, A, imm) __extension__ ({ \ 7043 (__m256i)__builtin_ia32_psradi256_mask((__v8si)(__m256i)(A), (int)(imm), \ 7044 (__v8si)_mm256_setzero_si256(), \ 7045 (__mmask8)(U)); }) 7046 7047static __inline__ __m128i __DEFAULT_FN_ATTRS 7048_mm_sra_epi64 (__m128i __A, __m128i __B) 7049{ 7050 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 7051 (__v2di) __B, 7052 (__v2di) 7053 _mm_setzero_di (), 7054 (__mmask8) -1); 7055} 7056 7057static __inline__ __m128i __DEFAULT_FN_ATTRS 7058_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 7059 __m128i __B) 7060{ 7061 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 7062 (__v2di) __B, 7063 (__v2di) __W, 7064 (__mmask8) __U); 7065} 7066 7067static __inline__ __m128i __DEFAULT_FN_ATTRS 7068_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 7069{ 7070 return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, 7071 (__v2di) __B, 7072 (__v2di) 7073 _mm_setzero_di (), 7074 (__mmask8) __U); 7075} 7076 7077static __inline__ __m256i __DEFAULT_FN_ATTRS 7078_mm256_sra_epi64 (__m256i __A, __m128i __B) 7079{ 7080 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 7081 (__v2di) __B, 7082 (__v4di) 7083 _mm256_setzero_si256 (), 7084 (__mmask8) -1); 7085} 7086 7087static __inline__ __m256i __DEFAULT_FN_ATTRS 7088_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 7089 __m128i __B) 7090{ 7091 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 7092 (__v2di) __B, 7093 (__v4di) __W, 7094 (__mmask8) __U); 7095} 7096 7097static __inline__ __m256i __DEFAULT_FN_ATTRS 7098_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B) 7099{ 7100 return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, 7101 (__v2di) __B, 7102 (__v4di) 7103 _mm256_setzero_si256 (), 7104 (__mmask8) __U); 7105} 7106 7107#define _mm_srai_epi64(A, imm) __extension__ ({ \ 7108 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 7109 (__v2di)_mm_setzero_di(), \ 7110 (__mmask8)-1); }) 7111 7112#define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \ 7113 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 7114 (__v2di)(__m128i)(W), \ 7115 (__mmask8)(U)); }) 7116 7117#define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \ 7118 (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ 7119 (__v2di)_mm_setzero_si128(), \ 7120 (__mmask8)(U)); }) 7121 7122#define _mm256_srai_epi64(A, imm) __extension__ ({ \ 7123 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 7124 (__v4di)_mm256_setzero_si256(), \ 7125 (__mmask8)-1); }) 7126 7127#define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \ 7128 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 7129 (__v4di)(__m256i)(W), \ 7130 (__mmask8)(U)); }) 7131 7132#define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \ 7133 (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ 7134 (__v4di)_mm256_setzero_si256(), \ 7135 (__mmask8)(U)); }) 7136 7137#define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 7138 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 7139 (__v4si)(__m128i)(B), \ 7140 (__v4si)(__m128i)(C), (int)(imm), \ 7141 (__mmask8)-1); }) 7142 7143#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 7144 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 7145 (__v4si)(__m128i)(B), \ 7146 (__v4si)(__m128i)(C), (int)(imm), \ 7147 (__mmask8)(U)); }) 7148 7149#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 7150 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 7151 (__v4si)(__m128i)(B), \ 7152 (__v4si)(__m128i)(C), (int)(imm), \ 7153 (__mmask8)(U)); }) 7154 7155#define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ 7156 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 7157 (__v8si)(__m256i)(B), \ 7158 (__v8si)(__m256i)(C), (int)(imm), \ 7159 (__mmask8)-1); }) 7160 7161#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \ 7162 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 7163 (__v8si)(__m256i)(B), \ 7164 (__v8si)(__m256i)(C), (int)(imm), \ 7165 (__mmask8)(U)); }) 7166 7167#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \ 7168 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 7169 (__v8si)(__m256i)(B), \ 7170 (__v8si)(__m256i)(C), (int)(imm), \ 7171 (__mmask8)(U)); }) 7172 7173#define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 7174 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 7175 (__v2di)(__m128i)(B), \ 7176 (__v2di)(__m128i)(C), (int)(imm), \ 7177 (__mmask8)-1); }) 7178 7179#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 7180 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 7181 (__v2di)(__m128i)(B), \ 7182 (__v2di)(__m128i)(C), (int)(imm), \ 7183 (__mmask8)(U)); }) 7184 7185#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 7186 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 7187 (__v2di)(__m128i)(B), \ 7188 (__v2di)(__m128i)(C), (int)(imm), \ 7189 (__mmask8)(U)); }) 7190 7191#define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \ 7192 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 7193 (__v4di)(__m256i)(B), \ 7194 (__v4di)(__m256i)(C), (int)(imm), \ 7195 (__mmask8)-1); }) 7196 7197#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \ 7198 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 7199 (__v4di)(__m256i)(B), \ 7200 (__v4di)(__m256i)(C), (int)(imm), \ 7201 (__mmask8)(U)); }) 7202 7203#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \ 7204 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 7205 (__v4di)(__m256i)(B), \ 7206 (__v4di)(__m256i)(C), (int)(imm), \ 7207 (__mmask8)(U)); }) 7208 7209 7210 7211#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \ 7212 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 7213 (__v8sf)(__m256)(B), (int)(imm), \ 7214 (__v8sf)_mm256_setzero_ps(), \ 7215 (__mmask8)-1); }) 7216 7217#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \ 7218 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 7219 (__v8sf)(__m256)(B), (int)(imm), \ 7220 (__v8sf)(__m256)(W), \ 7221 (__mmask8)(U)); }) 7222 7223#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \ 7224 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \ 7225 (__v8sf)(__m256)(B), (int)(imm), \ 7226 (__v8sf)_mm256_setzero_ps(), \ 7227 (__mmask8)(U)); }) 7228 7229#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \ 7230 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7231 (__v4df)(__m256d)(B), \ 7232 (int)(imm), \ 7233 (__v4df)_mm256_setzero_pd(), \ 7234 (__mmask8)-1); }) 7235 7236#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \ 7237 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7238 (__v4df)(__m256d)(B), \ 7239 (int)(imm), \ 7240 (__v4df)(__m256d)(W), \ 7241 (__mmask8)(U)); }) 7242 7243#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \ 7244 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \ 7245 (__v4df)(__m256d)(B), \ 7246 (int)(imm), \ 7247 (__v4df)_mm256_setzero_pd(), \ 7248 (__mmask8)(U)); }) 7249 7250#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \ 7251 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7252 (__v8si)(__m256i)(B), \ 7253 (int)(imm), \ 7254 (__v8si)_mm256_setzero_si256(), \ 7255 (__mmask8)-1); }) 7256 7257#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \ 7258 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7259 (__v8si)(__m256i)(B), \ 7260 (int)(imm), \ 7261 (__v8si)(__m256i)(W), \ 7262 (__mmask8)(U)); }) 7263 7264#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \ 7265 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \ 7266 (__v8si)(__m256i)(B), \ 7267 (int)(imm), \ 7268 (__v8si)_mm256_setzero_si256(), \ 7269 (__mmask8)(U)); }) 7270 7271#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \ 7272 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7273 (__v4di)(__m256i)(B), \ 7274 (int)(imm), \ 7275 (__v4di)_mm256_setzero_si256(), \ 7276 (__mmask8)-1); }) 7277 7278#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \ 7279 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7280 (__v4di)(__m256i)(B), \ 7281 (int)(imm), \ 7282 (__v4di)(__m256i)(W), \ 7283 (__mmask8)(U)); }) 7284 7285#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \ 7286 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \ 7287 (__v4di)(__m256i)(B), \ 7288 (int)(imm), \ 7289 (__v4di)_mm256_setzero_si256(), \ 7290 (__mmask8)(U)); }) 7291 7292#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7293 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7294 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7295 (__v2df)(__m128d)(W)); }) 7296 7297#define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7298 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 7299 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 7300 (__v2df)_mm_setzero_pd()); }) 7301 7302#define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \ 7303 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7304 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7305 (__v4df)(__m256d)(W)); }) 7306 7307#define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \ 7308 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 7309 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 7310 (__v4df)_mm256_setzero_pd()); }) 7311 7312#define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7313 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7314 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7315 (__v4sf)(__m128)(W)); }) 7316 7317#define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7318 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 7319 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 7320 (__v4sf)_mm_setzero_ps()); }) 7321 7322#define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \ 7323 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7324 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7325 (__v8sf)(__m256)(W)); }) 7326 7327#define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \ 7328 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7329 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 7330 (__v8sf)_mm256_setzero_ps()); }) 7331 7332static __inline__ __m128d __DEFAULT_FN_ATTRS 7333_mm_rsqrt14_pd (__m128d __A) 7334{ 7335 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7336 (__v2df) 7337 _mm_setzero_pd (), 7338 (__mmask8) -1); 7339} 7340 7341static __inline__ __m128d __DEFAULT_FN_ATTRS 7342_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 7343{ 7344 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7345 (__v2df) __W, 7346 (__mmask8) __U); 7347} 7348 7349static __inline__ __m128d __DEFAULT_FN_ATTRS 7350_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 7351{ 7352 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 7353 (__v2df) 7354 _mm_setzero_pd (), 7355 (__mmask8) __U); 7356} 7357 7358static __inline__ __m256d __DEFAULT_FN_ATTRS 7359_mm256_rsqrt14_pd (__m256d __A) 7360{ 7361 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7362 (__v4df) 7363 _mm256_setzero_pd (), 7364 (__mmask8) -1); 7365} 7366 7367static __inline__ __m256d __DEFAULT_FN_ATTRS 7368_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 7369{ 7370 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7371 (__v4df) __W, 7372 (__mmask8) __U); 7373} 7374 7375static __inline__ __m256d __DEFAULT_FN_ATTRS 7376_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 7377{ 7378 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 7379 (__v4df) 7380 _mm256_setzero_pd (), 7381 (__mmask8) __U); 7382} 7383 7384static __inline__ __m128 __DEFAULT_FN_ATTRS 7385_mm_rsqrt14_ps (__m128 __A) 7386{ 7387 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7388 (__v4sf) 7389 _mm_setzero_ps (), 7390 (__mmask8) -1); 7391} 7392 7393static __inline__ __m128 __DEFAULT_FN_ATTRS 7394_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 7395{ 7396 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7397 (__v4sf) __W, 7398 (__mmask8) __U); 7399} 7400 7401static __inline__ __m128 __DEFAULT_FN_ATTRS 7402_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 7403{ 7404 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 7405 (__v4sf) 7406 _mm_setzero_ps (), 7407 (__mmask8) __U); 7408} 7409 7410static __inline__ __m256 __DEFAULT_FN_ATTRS 7411_mm256_rsqrt14_ps (__m256 __A) 7412{ 7413 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7414 (__v8sf) 7415 _mm256_setzero_ps (), 7416 (__mmask8) -1); 7417} 7418 7419static __inline__ __m256 __DEFAULT_FN_ATTRS 7420_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 7421{ 7422 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7423 (__v8sf) __W, 7424 (__mmask8) __U); 7425} 7426 7427static __inline__ __m256 __DEFAULT_FN_ATTRS 7428_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 7429{ 7430 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 7431 (__v8sf) 7432 _mm256_setzero_ps (), 7433 (__mmask8) __U); 7434} 7435 7436static __inline__ __m256 __DEFAULT_FN_ATTRS 7437_mm256_broadcast_f32x4 (__m128 __A) 7438{ 7439 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 7440 (__v8sf)_mm256_undefined_pd (), 7441 (__mmask8) -1); 7442} 7443 7444static __inline__ __m256 __DEFAULT_FN_ATTRS 7445_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A) 7446{ 7447 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 7448 (__v8sf) __O, 7449 __M); 7450} 7451 7452static __inline__ __m256 __DEFAULT_FN_ATTRS 7453_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 7454{ 7455 return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, 7456 (__v8sf) _mm256_setzero_ps (), 7457 __M); 7458} 7459 7460static __inline__ __m256i __DEFAULT_FN_ATTRS 7461_mm256_broadcast_i32x4 (__m128i __A) 7462{ 7463 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, 7464 (__v8si)_mm256_undefined_si256 (), 7465 (__mmask8) -1); 7466} 7467 7468static __inline__ __m256i __DEFAULT_FN_ATTRS 7469_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A) 7470{ 7471 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) __A, 7472 (__v8si) 7473 __O, __M); 7474} 7475 7476static __inline__ __m256i __DEFAULT_FN_ATTRS 7477_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A) 7478{ 7479 return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) 7480 __A, 7481 (__v8si) _mm256_setzero_si256 (), 7482 __M); 7483} 7484 7485static __inline__ __m256d __DEFAULT_FN_ATTRS 7486_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 7487{ 7488 return (__m256d)__builtin_ia32_selectpd_256(__M, 7489 (__v4df) _mm256_broadcastsd_pd(__A), 7490 (__v4df) __O); 7491} 7492 7493static __inline__ __m256d __DEFAULT_FN_ATTRS 7494_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 7495{ 7496 return (__m256d)__builtin_ia32_selectpd_256(__M, 7497 (__v4df) _mm256_broadcastsd_pd(__A), 7498 (__v4df) _mm256_setzero_pd()); 7499} 7500 7501static __inline__ __m128 __DEFAULT_FN_ATTRS 7502_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 7503{ 7504 return (__m128)__builtin_ia32_selectps_128(__M, 7505 (__v4sf) _mm_broadcastss_ps(__A), 7506 (__v4sf) __O); 7507} 7508 7509static __inline__ __m128 __DEFAULT_FN_ATTRS 7510_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7511{ 7512 return (__m128)__builtin_ia32_selectps_128(__M, 7513 (__v4sf) _mm_broadcastss_ps(__A), 7514 (__v4sf) _mm_setzero_ps()); 7515} 7516 7517static __inline__ __m256 __DEFAULT_FN_ATTRS 7518_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 7519{ 7520 return (__m256)__builtin_ia32_selectps_256(__M, 7521 (__v8sf) _mm256_broadcastss_ps(__A), 7522 (__v8sf) __O); 7523} 7524 7525static __inline__ __m256 __DEFAULT_FN_ATTRS 7526_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 7527{ 7528 return (__m256)__builtin_ia32_selectps_256(__M, 7529 (__v8sf) _mm256_broadcastss_ps(__A), 7530 (__v8sf) _mm256_setzero_ps()); 7531} 7532 7533static __inline__ __m128i __DEFAULT_FN_ATTRS 7534_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7535{ 7536 return (__m128i)__builtin_ia32_selectd_128(__M, 7537 (__v4si) _mm_broadcastd_epi32(__A), 7538 (__v4si) __O); 7539} 7540 7541static __inline__ __m128i __DEFAULT_FN_ATTRS 7542_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7543{ 7544 return (__m128i)__builtin_ia32_selectd_128(__M, 7545 (__v4si) _mm_broadcastd_epi32(__A), 7546 (__v4si) _mm_setzero_si128()); 7547} 7548 7549static __inline__ __m256i __DEFAULT_FN_ATTRS 7550_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 7551{ 7552 return (__m256i)__builtin_ia32_selectd_256(__M, 7553 (__v8si) _mm256_broadcastd_epi32(__A), 7554 (__v8si) __O); 7555} 7556 7557static __inline__ __m256i __DEFAULT_FN_ATTRS 7558_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 7559{ 7560 return (__m256i)__builtin_ia32_selectd_256(__M, 7561 (__v8si) _mm256_broadcastd_epi32(__A), 7562 (__v8si) _mm256_setzero_si256()); 7563} 7564 7565static __inline__ __m128i __DEFAULT_FN_ATTRS 7566_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 7567{ 7568 return (__m128i)__builtin_ia32_selectq_128(__M, 7569 (__v2di) _mm_broadcastq_epi64(__A), 7570 (__v2di) __O); 7571} 7572 7573static __inline__ __m128i __DEFAULT_FN_ATTRS 7574_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7575{ 7576 return (__m128i)__builtin_ia32_selectq_128(__M, 7577 (__v2di) _mm_broadcastq_epi64(__A), 7578 (__v2di) _mm_setzero_si128()); 7579} 7580 7581static __inline__ __m256i __DEFAULT_FN_ATTRS 7582_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 7583{ 7584 return (__m256i)__builtin_ia32_selectq_256(__M, 7585 (__v4di) _mm256_broadcastq_epi64(__A), 7586 (__v4di) __O); 7587} 7588 7589static __inline__ __m256i __DEFAULT_FN_ATTRS 7590_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 7591{ 7592 return (__m256i)__builtin_ia32_selectq_256(__M, 7593 (__v4di) _mm256_broadcastq_epi64(__A), 7594 (__v4di) _mm256_setzero_si256()); 7595} 7596 7597static __inline__ __m128i __DEFAULT_FN_ATTRS 7598_mm_cvtsepi32_epi8 (__m128i __A) 7599{ 7600 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7601 (__v16qi)_mm_undefined_si128(), 7602 (__mmask8) -1); 7603} 7604 7605static __inline__ __m128i __DEFAULT_FN_ATTRS 7606_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7607{ 7608 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7609 (__v16qi) __O, __M); 7610} 7611 7612static __inline__ __m128i __DEFAULT_FN_ATTRS 7613_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 7614{ 7615 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 7616 (__v16qi) _mm_setzero_si128 (), 7617 __M); 7618} 7619 7620static __inline__ void __DEFAULT_FN_ATTRS 7621_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7622{ 7623 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7624} 7625 7626static __inline__ __m128i __DEFAULT_FN_ATTRS 7627_mm256_cvtsepi32_epi8 (__m256i __A) 7628{ 7629 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7630 (__v16qi)_mm_undefined_si128(), 7631 (__mmask8) -1); 7632} 7633 7634static __inline__ __m128i __DEFAULT_FN_ATTRS 7635_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7636{ 7637 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7638 (__v16qi) __O, __M); 7639} 7640 7641static __inline__ __m128i __DEFAULT_FN_ATTRS 7642_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 7643{ 7644 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7645 (__v16qi) _mm_setzero_si128 (), 7646 __M); 7647} 7648 7649static __inline__ void __DEFAULT_FN_ATTRS 7650_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7651{ 7652 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7653} 7654 7655static __inline__ __m128i __DEFAULT_FN_ATTRS 7656_mm_cvtsepi32_epi16 (__m128i __A) 7657{ 7658 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7659 (__v8hi)_mm_setzero_si128 (), 7660 (__mmask8) -1); 7661} 7662 7663static __inline__ __m128i __DEFAULT_FN_ATTRS 7664_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7665{ 7666 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7667 (__v8hi)__O, 7668 __M); 7669} 7670 7671static __inline__ __m128i __DEFAULT_FN_ATTRS 7672_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7673{ 7674 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7675 (__v8hi) _mm_setzero_si128 (), 7676 __M); 7677} 7678 7679static __inline__ void __DEFAULT_FN_ATTRS 7680_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7681{ 7682 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7683} 7684 7685static __inline__ __m128i __DEFAULT_FN_ATTRS 7686_mm256_cvtsepi32_epi16 (__m256i __A) 7687{ 7688 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7689 (__v8hi)_mm_undefined_si128(), 7690 (__mmask8) -1); 7691} 7692 7693static __inline__ __m128i __DEFAULT_FN_ATTRS 7694_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7695{ 7696 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7697 (__v8hi) __O, __M); 7698} 7699 7700static __inline__ __m128i __DEFAULT_FN_ATTRS 7701_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7702{ 7703 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7704 (__v8hi) _mm_setzero_si128 (), 7705 __M); 7706} 7707 7708static __inline__ void __DEFAULT_FN_ATTRS 7709_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7710{ 7711 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7712} 7713 7714static __inline__ __m128i __DEFAULT_FN_ATTRS 7715_mm_cvtsepi64_epi8 (__m128i __A) 7716{ 7717 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7718 (__v16qi)_mm_undefined_si128(), 7719 (__mmask8) -1); 7720} 7721 7722static __inline__ __m128i __DEFAULT_FN_ATTRS 7723_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7724{ 7725 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7726 (__v16qi) __O, __M); 7727} 7728 7729static __inline__ __m128i __DEFAULT_FN_ATTRS 7730_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7731{ 7732 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7733 (__v16qi) _mm_setzero_si128 (), 7734 __M); 7735} 7736 7737static __inline__ void __DEFAULT_FN_ATTRS 7738_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7739{ 7740 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7741} 7742 7743static __inline__ __m128i __DEFAULT_FN_ATTRS 7744_mm256_cvtsepi64_epi8 (__m256i __A) 7745{ 7746 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7747 (__v16qi)_mm_undefined_si128(), 7748 (__mmask8) -1); 7749} 7750 7751static __inline__ __m128i __DEFAULT_FN_ATTRS 7752_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7753{ 7754 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7755 (__v16qi) __O, __M); 7756} 7757 7758static __inline__ __m128i __DEFAULT_FN_ATTRS 7759_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7760{ 7761 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7762 (__v16qi) _mm_setzero_si128 (), 7763 __M); 7764} 7765 7766static __inline__ void __DEFAULT_FN_ATTRS 7767_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7768{ 7769 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7770} 7771 7772static __inline__ __m128i __DEFAULT_FN_ATTRS 7773_mm_cvtsepi64_epi32 (__m128i __A) 7774{ 7775 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7776 (__v4si)_mm_undefined_si128(), 7777 (__mmask8) -1); 7778} 7779 7780static __inline__ __m128i __DEFAULT_FN_ATTRS 7781_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7782{ 7783 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7784 (__v4si) __O, __M); 7785} 7786 7787static __inline__ __m128i __DEFAULT_FN_ATTRS 7788_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7789{ 7790 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7791 (__v4si) _mm_setzero_si128 (), 7792 __M); 7793} 7794 7795static __inline__ void __DEFAULT_FN_ATTRS 7796_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7797{ 7798 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7799} 7800 7801static __inline__ __m128i __DEFAULT_FN_ATTRS 7802_mm256_cvtsepi64_epi32 (__m256i __A) 7803{ 7804 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7805 (__v4si)_mm_undefined_si128(), 7806 (__mmask8) -1); 7807} 7808 7809static __inline__ __m128i __DEFAULT_FN_ATTRS 7810_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7811{ 7812 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7813 (__v4si)__O, 7814 __M); 7815} 7816 7817static __inline__ __m128i __DEFAULT_FN_ATTRS 7818_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7819{ 7820 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7821 (__v4si) _mm_setzero_si128 (), 7822 __M); 7823} 7824 7825static __inline__ void __DEFAULT_FN_ATTRS 7826_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7827{ 7828 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7829} 7830 7831static __inline__ __m128i __DEFAULT_FN_ATTRS 7832_mm_cvtsepi64_epi16 (__m128i __A) 7833{ 7834 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7835 (__v8hi)_mm_undefined_si128(), 7836 (__mmask8) -1); 7837} 7838 7839static __inline__ __m128i __DEFAULT_FN_ATTRS 7840_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7841{ 7842 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7843 (__v8hi) __O, __M); 7844} 7845 7846static __inline__ __m128i __DEFAULT_FN_ATTRS 7847_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7848{ 7849 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7850 (__v8hi) _mm_setzero_si128 (), 7851 __M); 7852} 7853 7854static __inline__ void __DEFAULT_FN_ATTRS 7855_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7856{ 7857 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7858} 7859 7860static __inline__ __m128i __DEFAULT_FN_ATTRS 7861_mm256_cvtsepi64_epi16 (__m256i __A) 7862{ 7863 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7864 (__v8hi)_mm_undefined_si128(), 7865 (__mmask8) -1); 7866} 7867 7868static __inline__ __m128i __DEFAULT_FN_ATTRS 7869_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7870{ 7871 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7872 (__v8hi) __O, __M); 7873} 7874 7875static __inline__ __m128i __DEFAULT_FN_ATTRS 7876_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7877{ 7878 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7879 (__v8hi) _mm_setzero_si128 (), 7880 __M); 7881} 7882 7883static __inline__ void __DEFAULT_FN_ATTRS 7884_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7885{ 7886 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7887} 7888 7889static __inline__ __m128i __DEFAULT_FN_ATTRS 7890_mm_cvtusepi32_epi8 (__m128i __A) 7891{ 7892 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7893 (__v16qi)_mm_undefined_si128(), 7894 (__mmask8) -1); 7895} 7896 7897static __inline__ __m128i __DEFAULT_FN_ATTRS 7898_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7899{ 7900 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7901 (__v16qi) __O, 7902 __M); 7903} 7904 7905static __inline__ __m128i __DEFAULT_FN_ATTRS 7906_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7907{ 7908 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7909 (__v16qi) _mm_setzero_si128 (), 7910 __M); 7911} 7912 7913static __inline__ void __DEFAULT_FN_ATTRS 7914_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7915{ 7916 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7917} 7918 7919static __inline__ __m128i __DEFAULT_FN_ATTRS 7920_mm256_cvtusepi32_epi8 (__m256i __A) 7921{ 7922 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7923 (__v16qi)_mm_undefined_si128(), 7924 (__mmask8) -1); 7925} 7926 7927static __inline__ __m128i __DEFAULT_FN_ATTRS 7928_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7929{ 7930 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7931 (__v16qi) __O, 7932 __M); 7933} 7934 7935static __inline__ __m128i __DEFAULT_FN_ATTRS 7936_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7937{ 7938 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7939 (__v16qi) _mm_setzero_si128 (), 7940 __M); 7941} 7942 7943static __inline__ void __DEFAULT_FN_ATTRS 7944_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7945{ 7946 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7947} 7948 7949static __inline__ __m128i __DEFAULT_FN_ATTRS 7950_mm_cvtusepi32_epi16 (__m128i __A) 7951{ 7952 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7953 (__v8hi)_mm_undefined_si128(), 7954 (__mmask8) -1); 7955} 7956 7957static __inline__ __m128i __DEFAULT_FN_ATTRS 7958_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7959{ 7960 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7961 (__v8hi) __O, __M); 7962} 7963 7964static __inline__ __m128i __DEFAULT_FN_ATTRS 7965_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7966{ 7967 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7968 (__v8hi) _mm_setzero_si128 (), 7969 __M); 7970} 7971 7972static __inline__ void __DEFAULT_FN_ATTRS 7973_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7974{ 7975 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7976} 7977 7978static __inline__ __m128i __DEFAULT_FN_ATTRS 7979_mm256_cvtusepi32_epi16 (__m256i __A) 7980{ 7981 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7982 (__v8hi) _mm_undefined_si128(), 7983 (__mmask8) -1); 7984} 7985 7986static __inline__ __m128i __DEFAULT_FN_ATTRS 7987_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7988{ 7989 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7990 (__v8hi) __O, __M); 7991} 7992 7993static __inline__ __m128i __DEFAULT_FN_ATTRS 7994_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7995{ 7996 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7997 (__v8hi) _mm_setzero_si128 (), 7998 __M); 7999} 8000 8001static __inline__ void __DEFAULT_FN_ATTRS 8002_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8003{ 8004 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 8005} 8006 8007static __inline__ __m128i __DEFAULT_FN_ATTRS 8008_mm_cvtusepi64_epi8 (__m128i __A) 8009{ 8010 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 8011 (__v16qi)_mm_undefined_si128(), 8012 (__mmask8) -1); 8013} 8014 8015static __inline__ __m128i __DEFAULT_FN_ATTRS 8016_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 8017{ 8018 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 8019 (__v16qi) __O, 8020 __M); 8021} 8022 8023static __inline__ __m128i __DEFAULT_FN_ATTRS 8024_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 8025{ 8026 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 8027 (__v16qi) _mm_setzero_si128 (), 8028 __M); 8029} 8030 8031static __inline__ void __DEFAULT_FN_ATTRS 8032_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8033{ 8034 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 8035} 8036 8037static __inline__ __m128i __DEFAULT_FN_ATTRS 8038_mm256_cvtusepi64_epi8 (__m256i __A) 8039{ 8040 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 8041 (__v16qi)_mm_undefined_si128(), 8042 (__mmask8) -1); 8043} 8044 8045static __inline__ __m128i __DEFAULT_FN_ATTRS 8046_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8047{ 8048 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 8049 (__v16qi) __O, 8050 __M); 8051} 8052 8053static __inline__ __m128i __DEFAULT_FN_ATTRS 8054_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 8055{ 8056 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 8057 (__v16qi) _mm_setzero_si128 (), 8058 __M); 8059} 8060 8061static __inline__ void __DEFAULT_FN_ATTRS 8062_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8063{ 8064 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 8065} 8066 8067static __inline__ __m128i __DEFAULT_FN_ATTRS 8068_mm_cvtusepi64_epi32 (__m128i __A) 8069{ 8070 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 8071 (__v4si)_mm_undefined_si128(), 8072 (__mmask8) -1); 8073} 8074 8075static __inline__ __m128i __DEFAULT_FN_ATTRS 8076_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 8077{ 8078 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 8079 (__v4si) __O, __M); 8080} 8081 8082static __inline__ __m128i __DEFAULT_FN_ATTRS 8083_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 8084{ 8085 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 8086 (__v4si) _mm_setzero_si128 (), 8087 __M); 8088} 8089 8090static __inline__ void __DEFAULT_FN_ATTRS 8091_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 8092{ 8093 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 8094} 8095 8096static __inline__ __m128i __DEFAULT_FN_ATTRS 8097_mm256_cvtusepi64_epi32 (__m256i __A) 8098{ 8099 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 8100 (__v4si)_mm_undefined_si128(), 8101 (__mmask8) -1); 8102} 8103 8104static __inline__ __m128i __DEFAULT_FN_ATTRS 8105_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 8106{ 8107 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 8108 (__v4si) __O, __M); 8109} 8110 8111static __inline__ __m128i __DEFAULT_FN_ATTRS 8112_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 8113{ 8114 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 8115 (__v4si) _mm_setzero_si128 (), 8116 __M); 8117} 8118 8119static __inline__ void __DEFAULT_FN_ATTRS 8120_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 8121{ 8122 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 8123} 8124 8125static __inline__ __m128i __DEFAULT_FN_ATTRS 8126_mm_cvtusepi64_epi16 (__m128i __A) 8127{ 8128 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 8129 (__v8hi)_mm_undefined_si128(), 8130 (__mmask8) -1); 8131} 8132 8133static __inline__ __m128i __DEFAULT_FN_ATTRS 8134_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8135{ 8136 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 8137 (__v8hi) __O, __M); 8138} 8139 8140static __inline__ __m128i __DEFAULT_FN_ATTRS 8141_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 8142{ 8143 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 8144 (__v8hi) _mm_setzero_si128 (), 8145 __M); 8146} 8147 8148static __inline__ void __DEFAULT_FN_ATTRS 8149_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8150{ 8151 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 8152} 8153 8154static __inline__ __m128i __DEFAULT_FN_ATTRS 8155_mm256_cvtusepi64_epi16 (__m256i __A) 8156{ 8157 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 8158 (__v8hi)_mm_undefined_si128(), 8159 (__mmask8) -1); 8160} 8161 8162static __inline__ __m128i __DEFAULT_FN_ATTRS 8163_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8164{ 8165 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 8166 (__v8hi) __O, __M); 8167} 8168 8169static __inline__ __m128i __DEFAULT_FN_ATTRS 8170_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 8171{ 8172 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 8173 (__v8hi) _mm_setzero_si128 (), 8174 __M); 8175} 8176 8177static __inline__ void __DEFAULT_FN_ATTRS 8178_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8179{ 8180 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 8181} 8182 8183static __inline__ __m128i __DEFAULT_FN_ATTRS 8184_mm_cvtepi32_epi8 (__m128i __A) 8185{ 8186 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 8187 (__v16qi)_mm_undefined_si128(), 8188 (__mmask8) -1); 8189} 8190 8191static __inline__ __m128i __DEFAULT_FN_ATTRS 8192_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 8193{ 8194 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 8195 (__v16qi) __O, __M); 8196} 8197 8198static __inline__ __m128i __DEFAULT_FN_ATTRS 8199_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 8200{ 8201 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 8202 (__v16qi) 8203 _mm_setzero_si128 (), 8204 __M); 8205} 8206 8207static __inline__ void __DEFAULT_FN_ATTRS 8208_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8209{ 8210 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 8211} 8212 8213static __inline__ __m128i __DEFAULT_FN_ATTRS 8214_mm256_cvtepi32_epi8 (__m256i __A) 8215{ 8216 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 8217 (__v16qi)_mm_undefined_si128(), 8218 (__mmask8) -1); 8219} 8220 8221static __inline__ __m128i __DEFAULT_FN_ATTRS 8222_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8223{ 8224 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 8225 (__v16qi) __O, __M); 8226} 8227 8228static __inline__ __m128i __DEFAULT_FN_ATTRS 8229_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 8230{ 8231 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 8232 (__v16qi) _mm_setzero_si128 (), 8233 __M); 8234} 8235 8236static __inline__ void __DEFAULT_FN_ATTRS 8237_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8238{ 8239 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 8240} 8241 8242static __inline__ __m128i __DEFAULT_FN_ATTRS 8243_mm_cvtepi32_epi16 (__m128i __A) 8244{ 8245 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8246 (__v8hi) _mm_setzero_si128 (), 8247 (__mmask8) -1); 8248} 8249 8250static __inline__ __m128i __DEFAULT_FN_ATTRS 8251_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8252{ 8253 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8254 (__v8hi) __O, __M); 8255} 8256 8257static __inline__ __m128i __DEFAULT_FN_ATTRS 8258_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 8259{ 8260 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 8261 (__v8hi) _mm_setzero_si128 (), 8262 __M); 8263} 8264 8265static __inline__ void __DEFAULT_FN_ATTRS 8266_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8267{ 8268 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 8269} 8270 8271static __inline__ __m128i __DEFAULT_FN_ATTRS 8272_mm256_cvtepi32_epi16 (__m256i __A) 8273{ 8274 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8275 (__v8hi)_mm_setzero_si128 (), 8276 (__mmask8) -1); 8277} 8278 8279static __inline__ __m128i __DEFAULT_FN_ATTRS 8280_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8281{ 8282 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8283 (__v8hi) __O, __M); 8284} 8285 8286static __inline__ __m128i __DEFAULT_FN_ATTRS 8287_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 8288{ 8289 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 8290 (__v8hi) _mm_setzero_si128 (), 8291 __M); 8292} 8293 8294static __inline__ void __DEFAULT_FN_ATTRS 8295_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8296{ 8297 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 8298} 8299 8300static __inline__ __m128i __DEFAULT_FN_ATTRS 8301_mm_cvtepi64_epi8 (__m128i __A) 8302{ 8303 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8304 (__v16qi) _mm_undefined_si128(), 8305 (__mmask8) -1); 8306} 8307 8308static __inline__ __m128i __DEFAULT_FN_ATTRS 8309_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 8310{ 8311 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8312 (__v16qi) __O, __M); 8313} 8314 8315static __inline__ __m128i __DEFAULT_FN_ATTRS 8316_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 8317{ 8318 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 8319 (__v16qi) _mm_setzero_si128 (), 8320 __M); 8321} 8322 8323static __inline__ void __DEFAULT_FN_ATTRS 8324_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 8325{ 8326 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 8327} 8328 8329static __inline__ __m128i __DEFAULT_FN_ATTRS 8330_mm256_cvtepi64_epi8 (__m256i __A) 8331{ 8332 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8333 (__v16qi) _mm_undefined_si128(), 8334 (__mmask8) -1); 8335} 8336 8337static __inline__ __m128i __DEFAULT_FN_ATTRS 8338_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 8339{ 8340 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8341 (__v16qi) __O, __M); 8342} 8343 8344static __inline__ __m128i __DEFAULT_FN_ATTRS 8345_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 8346{ 8347 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 8348 (__v16qi) _mm_setzero_si128 (), 8349 __M); 8350} 8351 8352static __inline__ void __DEFAULT_FN_ATTRS 8353_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 8354{ 8355 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 8356} 8357 8358static __inline__ __m128i __DEFAULT_FN_ATTRS 8359_mm_cvtepi64_epi32 (__m128i __A) 8360{ 8361 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8362 (__v4si)_mm_undefined_si128(), 8363 (__mmask8) -1); 8364} 8365 8366static __inline__ __m128i __DEFAULT_FN_ATTRS 8367_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 8368{ 8369 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8370 (__v4si) __O, __M); 8371} 8372 8373static __inline__ __m128i __DEFAULT_FN_ATTRS 8374_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 8375{ 8376 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 8377 (__v4si) _mm_setzero_si128 (), 8378 __M); 8379} 8380 8381static __inline__ void __DEFAULT_FN_ATTRS 8382_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 8383{ 8384 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 8385} 8386 8387static __inline__ __m128i __DEFAULT_FN_ATTRS 8388_mm256_cvtepi64_epi32 (__m256i __A) 8389{ 8390 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8391 (__v4si) _mm_undefined_si128(), 8392 (__mmask8) -1); 8393} 8394 8395static __inline__ __m128i __DEFAULT_FN_ATTRS 8396_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 8397{ 8398 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8399 (__v4si) __O, __M); 8400} 8401 8402static __inline__ __m128i __DEFAULT_FN_ATTRS 8403_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 8404{ 8405 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, 8406 (__v4si) _mm_setzero_si128 (), 8407 __M); 8408} 8409 8410static __inline__ void __DEFAULT_FN_ATTRS 8411_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 8412{ 8413 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 8414} 8415 8416static __inline__ __m128i __DEFAULT_FN_ATTRS 8417_mm_cvtepi64_epi16 (__m128i __A) 8418{ 8419 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8420 (__v8hi) _mm_undefined_si128(), 8421 (__mmask8) -1); 8422} 8423 8424static __inline__ __m128i __DEFAULT_FN_ATTRS 8425_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 8426{ 8427 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8428 (__v8hi)__O, 8429 __M); 8430} 8431 8432static __inline__ __m128i __DEFAULT_FN_ATTRS 8433_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 8434{ 8435 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 8436 (__v8hi) _mm_setzero_si128 (), 8437 __M); 8438} 8439 8440static __inline__ void __DEFAULT_FN_ATTRS 8441_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 8442{ 8443 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 8444} 8445 8446static __inline__ __m128i __DEFAULT_FN_ATTRS 8447_mm256_cvtepi64_epi16 (__m256i __A) 8448{ 8449 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8450 (__v8hi)_mm_undefined_si128(), 8451 (__mmask8) -1); 8452} 8453 8454static __inline__ __m128i __DEFAULT_FN_ATTRS 8455_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 8456{ 8457 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8458 (__v8hi) __O, __M); 8459} 8460 8461static __inline__ __m128i __DEFAULT_FN_ATTRS 8462_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 8463{ 8464 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 8465 (__v8hi) _mm_setzero_si128 (), 8466 __M); 8467} 8468 8469static __inline__ void __DEFAULT_FN_ATTRS 8470_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 8471{ 8472 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 8473} 8474 8475#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ 8476 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 8477 (int)(imm), \ 8478 (__v4sf)_mm_setzero_ps(), \ 8479 (__mmask8)-1); }) 8480 8481#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ 8482 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 8483 (int)(imm), \ 8484 (__v4sf)(__m128)(W), \ 8485 (__mmask8)(U)); }) 8486 8487#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ 8488 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 8489 (int)(imm), \ 8490 (__v4sf)_mm_setzero_ps(), \ 8491 (__mmask8)(U)); }) 8492 8493#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ 8494 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 8495 (int)(imm), \ 8496 (__v4si)_mm_setzero_si128(), \ 8497 (__mmask8)-1); }) 8498 8499#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ 8500 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 8501 (int)(imm), \ 8502 (__v4si)(__m128i)(W), \ 8503 (__mmask8)(U)); }) 8504 8505#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ 8506 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 8507 (int)(imm), \ 8508 (__v4si)_mm_setzero_si128(), \ 8509 (__mmask8)(U)); }) 8510 8511#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ 8512 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \ 8513 (__v4sf)(__m128)(B), (int)(imm), \ 8514 (__v8sf)_mm256_setzero_ps(), \ 8515 (__mmask8)-1); }) 8516 8517#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \ 8518 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \ 8519 (__v4sf)(__m128)(B), (int)(imm), \ 8520 (__v8sf)(__m256)(W), \ 8521 (__mmask8)(U)); }) 8522 8523#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \ 8524 (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \ 8525 (__v4sf)(__m128)(B), (int)(imm), \ 8526 (__v8sf)_mm256_setzero_ps(), \ 8527 (__mmask8)(U)); }) 8528 8529#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \ 8530 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \ 8531 (__v4si)(__m128i)(B), \ 8532 (int)(imm), \ 8533 (__v8si)_mm256_setzero_si256(), \ 8534 (__mmask8)-1); }) 8535 8536#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \ 8537 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \ 8538 (__v4si)(__m128i)(B), \ 8539 (int)(imm), \ 8540 (__v8si)(__m256i)(W), \ 8541 (__mmask8)(U)); }) 8542 8543#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \ 8544 (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \ 8545 (__v4si)(__m128i)(B), \ 8546 (int)(imm), \ 8547 (__v8si)_mm256_setzero_si256(), \ 8548 (__mmask8)(U)); }) 8549 8550#define _mm_getmant_pd(A, B, C) __extension__({\ 8551 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8552 (int)(((C)<<2) | (B)), \ 8553 (__v2df)_mm_setzero_pd(), \ 8554 (__mmask8)-1); }) 8555 8556#define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\ 8557 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8558 (int)(((C)<<2) | (B)), \ 8559 (__v2df)(__m128d)(W), \ 8560 (__mmask8)(U)); }) 8561 8562#define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\ 8563 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 8564 (int)(((C)<<2) | (B)), \ 8565 (__v2df)_mm_setzero_pd(), \ 8566 (__mmask8)(U)); }) 8567 8568#define _mm256_getmant_pd(A, B, C) __extension__ ({ \ 8569 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8570 (int)(((C)<<2) | (B)), \ 8571 (__v4df)_mm256_setzero_pd(), \ 8572 (__mmask8)-1); }) 8573 8574#define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \ 8575 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8576 (int)(((C)<<2) | (B)), \ 8577 (__v4df)(__m256d)(W), \ 8578 (__mmask8)(U)); }) 8579 8580#define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \ 8581 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 8582 (int)(((C)<<2) | (B)), \ 8583 (__v4df)_mm256_setzero_pd(), \ 8584 (__mmask8)(U)); }) 8585 8586#define _mm_getmant_ps(A, B, C) __extension__ ({ \ 8587 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8588 (int)(((C)<<2) | (B)), \ 8589 (__v4sf)_mm_setzero_ps(), \ 8590 (__mmask8)-1); }) 8591 8592#define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8593 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8594 (int)(((C)<<2) | (B)), \ 8595 (__v4sf)(__m128)(W), \ 8596 (__mmask8)(U)); }) 8597 8598#define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8599 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 8600 (int)(((C)<<2) | (B)), \ 8601 (__v4sf)_mm_setzero_ps(), \ 8602 (__mmask8)(U)); }) 8603 8604#define _mm256_getmant_ps(A, B, C) __extension__ ({ \ 8605 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8606 (int)(((C)<<2) | (B)), \ 8607 (__v8sf)_mm256_setzero_ps(), \ 8608 (__mmask8)-1); }) 8609 8610#define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \ 8611 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8612 (int)(((C)<<2) | (B)), \ 8613 (__v8sf)(__m256)(W), \ 8614 (__mmask8)(U)); }) 8615 8616#define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \ 8617 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 8618 (int)(((C)<<2) | (B)), \ 8619 (__v8sf)_mm256_setzero_ps(), \ 8620 (__mmask8)(U)); }) 8621 8622#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8623 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 8624 (double const *)(addr), \ 8625 (__v2di)(__m128i)(index), \ 8626 (__mmask8)(mask), (int)(scale)); }) 8627 8628#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8629 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 8630 (long long const *)(addr), \ 8631 (__v2di)(__m128i)(index), \ 8632 (__mmask8)(mask), (int)(scale)); }) 8633 8634#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8635 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 8636 (double const *)(addr), \ 8637 (__v4di)(__m256i)(index), \ 8638 (__mmask8)(mask), (int)(scale)); }) 8639 8640#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8641 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 8642 (long long const *)(addr), \ 8643 (__v4di)(__m256i)(index), \ 8644 (__mmask8)(mask), (int)(scale)); }) 8645 8646#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8647 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 8648 (float const *)(addr), \ 8649 (__v2di)(__m128i)(index), \ 8650 (__mmask8)(mask), (int)(scale)); }) 8651 8652#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8653 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8654 (int const *)(addr), \ 8655 (__v2di)(__m128i)(index), \ 8656 (__mmask8)(mask), (int)(scale)); }) 8657 8658#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8659 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8660 (float const *)(addr), \ 8661 (__v4di)(__m256i)(index), \ 8662 (__mmask8)(mask), (int)(scale)); }) 8663 8664#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8665 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8666 (int const *)(addr), \ 8667 (__v4di)(__m256i)(index), \ 8668 (__mmask8)(mask), (int)(scale)); }) 8669 8670#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8671 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8672 (double const *)(addr), \ 8673 (__v4si)(__m128i)(index), \ 8674 (__mmask8)(mask), (int)(scale)); }) 8675 8676#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8677 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8678 (long long const *)(addr), \ 8679 (__v4si)(__m128i)(index), \ 8680 (__mmask8)(mask), (int)(scale)); }) 8681 8682#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\ 8683 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8684 (double const *)(addr), \ 8685 (__v4si)(__m128i)(index), \ 8686 (__mmask8)(mask), (int)(scale)); }) 8687 8688#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\ 8689 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8690 (long long const *)(addr), \ 8691 (__v4si)(__m128i)(index), \ 8692 (__mmask8)(mask), (int)(scale)); }) 8693 8694#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8695 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8696 (float const *)(addr), \ 8697 (__v4si)(__m128i)(index), \ 8698 (__mmask8)(mask), (int)(scale)); }) 8699 8700#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8701 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8702 (int const *)(addr), \ 8703 (__v4si)(__m128i)(index), \ 8704 (__mmask8)(mask), (int)(scale)); }) 8705 8706#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\ 8707 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8708 (float const *)(addr), \ 8709 (__v8si)(__m256i)(index), \ 8710 (__mmask8)(mask), (int)(scale)); }) 8711 8712#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\ 8713 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8714 (int const *)(addr), \ 8715 (__v8si)(__m256i)(index), \ 8716 (__mmask8)(mask), (int)(scale)); }) 8717 8718#define _mm256_permutex_pd(X, C) __extension__ ({ \ 8719 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \ 8720 (__v4df)_mm256_undefined_pd(), \ 8721 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8722 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8723 8724#define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \ 8725 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8726 (__v4df)_mm256_permutex_pd((X), (C)), \ 8727 (__v4df)(__m256d)(W)); }) 8728 8729#define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \ 8730 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8731 (__v4df)_mm256_permutex_pd((X), (C)), \ 8732 (__v4df)_mm256_setzero_pd()); }) 8733 8734#define _mm256_permutex_epi64(X, C) __extension__ ({ \ 8735 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \ 8736 (__v4di)_mm256_undefined_si256(), \ 8737 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ 8738 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); }) 8739 8740#define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \ 8741 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8742 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8743 (__v4di)(__m256i)(W)); }) 8744 8745#define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \ 8746 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8747 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8748 (__v4di)_mm256_setzero_si256()); }) 8749 8750static __inline__ __m256d __DEFAULT_FN_ATTRS 8751_mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8752{ 8753 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8754 (__v4di) __X, 8755 (__v4df) _mm256_undefined_si256 (), 8756 (__mmask8) -1); 8757} 8758 8759static __inline__ __m256d __DEFAULT_FN_ATTRS 8760_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8761 __m256d __Y) 8762{ 8763 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8764 (__v4di) __X, 8765 (__v4df) __W, 8766 (__mmask8) __U); 8767} 8768 8769static __inline__ __m256d __DEFAULT_FN_ATTRS 8770_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8771{ 8772 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, 8773 (__v4di) __X, 8774 (__v4df) _mm256_setzero_pd (), 8775 (__mmask8) __U); 8776} 8777 8778static __inline__ __m256i __DEFAULT_FN_ATTRS 8779_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8780{ 8781 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8782 (__v4di) __X, 8783 (__v4di) _mm256_setzero_si256 (), 8784 (__mmask8) __M); 8785} 8786 8787static __inline__ __m256i __DEFAULT_FN_ATTRS 8788_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8789{ 8790 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8791 (__v4di) __X, 8792 (__v4di) _mm256_undefined_si256 (), 8793 (__mmask8) -1); 8794} 8795 8796static __inline__ __m256i __DEFAULT_FN_ATTRS 8797_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8798 __m256i __Y) 8799{ 8800 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, 8801 (__v4di) __X, 8802 (__v4di) __W, 8803 __M); 8804} 8805 8806static __inline__ __m256 __DEFAULT_FN_ATTRS 8807_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, 8808 __m256 __Y) 8809{ 8810 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8811 (__v8si) __X, 8812 (__v8sf) __W, 8813 (__mmask8) __U); 8814} 8815 8816static __inline__ __m256 __DEFAULT_FN_ATTRS 8817_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) 8818{ 8819 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8820 (__v8si) __X, 8821 (__v8sf) _mm256_setzero_ps (), 8822 (__mmask8) __U); 8823} 8824 8825static __inline__ __m256 __DEFAULT_FN_ATTRS 8826_mm256_permutexvar_ps (__m256i __X, __m256 __Y) 8827{ 8828 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, 8829 (__v8si) __X, 8830 (__v8sf) _mm256_undefined_si256 (), 8831 (__mmask8) -1); 8832} 8833 8834static __inline__ __m256i __DEFAULT_FN_ATTRS 8835_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) 8836{ 8837 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8838 (__v8si) __X, 8839 (__v8si) _mm256_setzero_si256 (), 8840 __M); 8841} 8842 8843static __inline__ __m256i __DEFAULT_FN_ATTRS 8844_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, 8845 __m256i __Y) 8846{ 8847 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8848 (__v8si) __X, 8849 (__v8si) __W, 8850 (__mmask8) __M); 8851} 8852 8853static __inline__ __m256i __DEFAULT_FN_ATTRS 8854_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) 8855{ 8856 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, 8857 (__v8si) __X, 8858 (__v8si) _mm256_undefined_si256(), 8859 (__mmask8) -1); 8860} 8861 8862#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \ 8863 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ 8864 (__v4si)(__m128i)(B), (int)(imm), \ 8865 (__v4si)_mm_undefined_si128(), \ 8866 (__mmask8)-1); }) 8867 8868#define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8869 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ 8870 (__v4si)(__m128i)(B), (int)(imm), \ 8871 (__v4si)(__m128i)(W), \ 8872 (__mmask8)(U)); }) 8873 8874#define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8875 (__m128i)__builtin_ia32_alignd128_mask((__v4si)(__m128i)(A), \ 8876 (__v4si)(__m128i)(B), (int)(imm), \ 8877 (__v4si)_mm_setzero_si128(), \ 8878 (__mmask8)(U)); }) 8879 8880#define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \ 8881 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ 8882 (__v8si)(__m256i)(B), (int)(imm), \ 8883 (__v8si)_mm256_undefined_si256(), \ 8884 (__mmask8)-1); }) 8885 8886#define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \ 8887 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ 8888 (__v8si)(__m256i)(B), (int)(imm), \ 8889 (__v8si)(__m256i)(W), \ 8890 (__mmask8)(U)); }) 8891 8892#define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \ 8893 (__m256i)__builtin_ia32_alignd256_mask((__v8si)(__m256i)(A), \ 8894 (__v8si)(__m256i)(B), (int)(imm), \ 8895 (__v8si)_mm256_setzero_si256(), \ 8896 (__mmask8)(U)); }) 8897 8898#define _mm_alignr_epi64(A, B, imm) __extension__ ({ \ 8899 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ 8900 (__v2di)(__m128i)(B), (int)(imm), \ 8901 (__v2di)_mm_setzero_di(), \ 8902 (__mmask8)-1); }) 8903 8904#define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8905 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ 8906 (__v2di)(__m128i)(B), (int)(imm), \ 8907 (__v2di)(__m128i)(W), \ 8908 (__mmask8)(U)); }) 8909 8910#define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8911 (__m128i)__builtin_ia32_alignq128_mask((__v2di)(__m128i)(A), \ 8912 (__v2di)(__m128i)(B), (int)(imm), \ 8913 (__v2di)_mm_setzero_di(), \ 8914 (__mmask8)(U)); }) 8915 8916#define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \ 8917 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ 8918 (__v4di)(__m256i)(B), (int)(imm), \ 8919 (__v4di)_mm256_undefined_pd(), \ 8920 (__mmask8)-1); }) 8921 8922#define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \ 8923 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ 8924 (__v4di)(__m256i)(B), (int)(imm), \ 8925 (__v4di)(__m256i)(W), \ 8926 (__mmask8)(U)); }) 8927 8928#define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \ 8929 (__m256i)__builtin_ia32_alignq256_mask((__v4di)(__m256i)(A), \ 8930 (__v4di)(__m256i)(B), (int)(imm), \ 8931 (__v4di)_mm256_setzero_si256(), \ 8932 (__mmask8)(U)); }) 8933 8934static __inline__ __m128 __DEFAULT_FN_ATTRS 8935_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8936{ 8937 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8938 (__v4sf)_mm_movehdup_ps(__A), 8939 (__v4sf)__W); 8940} 8941 8942static __inline__ __m128 __DEFAULT_FN_ATTRS 8943_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8944{ 8945 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8946 (__v4sf)_mm_movehdup_ps(__A), 8947 (__v4sf)_mm_setzero_ps()); 8948} 8949 8950static __inline__ __m256 __DEFAULT_FN_ATTRS 8951_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8952{ 8953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8954 (__v8sf)_mm256_movehdup_ps(__A), 8955 (__v8sf)__W); 8956} 8957 8958static __inline__ __m256 __DEFAULT_FN_ATTRS 8959_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8960{ 8961 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8962 (__v8sf)_mm256_movehdup_ps(__A), 8963 (__v8sf)_mm256_setzero_ps()); 8964} 8965 8966static __inline__ __m128 __DEFAULT_FN_ATTRS 8967_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8968{ 8969 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8970 (__v4sf)_mm_moveldup_ps(__A), 8971 (__v4sf)__W); 8972} 8973 8974static __inline__ __m128 __DEFAULT_FN_ATTRS 8975_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8976{ 8977 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8978 (__v4sf)_mm_moveldup_ps(__A), 8979 (__v4sf)_mm_setzero_ps()); 8980} 8981 8982static __inline__ __m256 __DEFAULT_FN_ATTRS 8983_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8984{ 8985 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8986 (__v8sf)_mm256_moveldup_ps(__A), 8987 (__v8sf)__W); 8988} 8989 8990static __inline__ __m256 __DEFAULT_FN_ATTRS 8991_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8992{ 8993 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8994 (__v8sf)_mm256_moveldup_ps(__A), 8995 (__v8sf)_mm256_setzero_ps()); 8996} 8997 8998#define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ 8999 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 9000 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 9001 (__v8si)(__m256i)(W)); }) 9002 9003#define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\ 9004 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 9005 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 9006 (__v8si)_mm256_setzero_si256()); }) 9007 9008#define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\ 9009 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 9010 (__v4si)_mm_shuffle_epi32((A), (I)), \ 9011 (__v4si)(__m128i)(W)); }) 9012 9013#define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\ 9014 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 9015 (__v4si)_mm_shuffle_epi32((A), (I)), \ 9016 (__v4si)_mm_setzero_si128()); }) 9017 9018static __inline__ __m128d __DEFAULT_FN_ATTRS 9019_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 9020{ 9021 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 9022 (__v2df) __A, 9023 (__v2df) __W); 9024} 9025 9026static __inline__ __m128d __DEFAULT_FN_ATTRS 9027_mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 9028{ 9029 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 9030 (__v2df) __A, 9031 (__v2df) _mm_setzero_pd ()); 9032} 9033 9034static __inline__ __m256d __DEFAULT_FN_ATTRS 9035_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 9036{ 9037 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 9038 (__v4df) __A, 9039 (__v4df) __W); 9040} 9041 9042static __inline__ __m256d __DEFAULT_FN_ATTRS 9043_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 9044{ 9045 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 9046 (__v4df) __A, 9047 (__v4df) _mm256_setzero_pd ()); 9048} 9049 9050static __inline__ __m128 __DEFAULT_FN_ATTRS 9051_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 9052{ 9053 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 9054 (__v4sf) __A, 9055 (__v4sf) __W); 9056} 9057 9058static __inline__ __m128 __DEFAULT_FN_ATTRS 9059_mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 9060{ 9061 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 9062 (__v4sf) __A, 9063 (__v4sf) _mm_setzero_ps ()); 9064} 9065 9066static __inline__ __m256 __DEFAULT_FN_ATTRS 9067_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 9068{ 9069 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 9070 (__v8sf) __A, 9071 (__v8sf) __W); 9072} 9073 9074static __inline__ __m256 __DEFAULT_FN_ATTRS 9075_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 9076{ 9077 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 9078 (__v8sf) __A, 9079 (__v8sf) _mm256_setzero_ps ()); 9080} 9081 9082static __inline__ __m128 __DEFAULT_FN_ATTRS 9083_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 9084{ 9085 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 9086 (__v4sf) __W, 9087 (__mmask8) __U); 9088} 9089 9090static __inline__ __m128 __DEFAULT_FN_ATTRS 9091_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 9092{ 9093 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 9094 (__v4sf) 9095 _mm_setzero_ps (), 9096 (__mmask8) __U); 9097} 9098 9099static __inline__ __m256 __DEFAULT_FN_ATTRS 9100_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 9101{ 9102 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 9103 (__v8sf) __W, 9104 (__mmask8) __U); 9105} 9106 9107static __inline__ __m256 __DEFAULT_FN_ATTRS 9108_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 9109{ 9110 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 9111 (__v8sf) 9112 _mm256_setzero_ps (), 9113 (__mmask8) __U); 9114} 9115 9116static __inline __m128i __DEFAULT_FN_ATTRS 9117_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) 9118{ 9119 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 9120 (__v8hi) __W, 9121 (__mmask8) __U); 9122} 9123 9124static __inline __m128i __DEFAULT_FN_ATTRS 9125_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) 9126{ 9127 return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, 9128 (__v8hi) _mm_setzero_si128 (), 9129 (__mmask8) __U); 9130} 9131 9132#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 9133 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 9134 (__v8hi)(__m128i)(W), \ 9135 (__mmask8)(U)); }) 9136 9137#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 9138 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 9139 (__v8hi)_mm_setzero_si128(), \ 9140 (__mmask8)(U)); }) 9141 9142static __inline __m128i __DEFAULT_FN_ATTRS 9143_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) 9144{ 9145 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 9146 (__v8hi) __W, 9147 (__mmask8) __U); 9148} 9149 9150static __inline __m128i __DEFAULT_FN_ATTRS 9151_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) 9152{ 9153 return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, 9154 (__v8hi) _mm_setzero_si128(), 9155 (__mmask8) __U); 9156} 9157#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ 9158 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 9159 (__v8hi)(__m128i)(W), \ 9160 (__mmask8)(U)); }) 9161 9162#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ 9163 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 9164 (__v8hi)_mm_setzero_si128(), \ 9165 (__mmask8)(U)); }) 9166 9167 9168#undef __DEFAULT_FN_ATTRS 9169 9170#endif /* __AVX512VLINTRIN_H */ 9171