avx512bwintrin.h revision b6d6993e6e6d3daf4d9876794254d20a134e37c2
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512BWINTRIN_H 29#define __AVX512BWINTRIN_H 30 31typedef unsigned int __mmask32; 32typedef unsigned long long __mmask64; 33typedef char __v64qi __attribute__ ((__vector_size__ (64))); 34typedef short __v32hi __attribute__ ((__vector_size__ (64))); 35 36static __inline __v64qi __attribute__ ((__always_inline__, __nodebug__)) 37_mm512_setzero_qi (void) { 38 return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0, 39 0, 0, 0, 0, 0, 0, 0, 0, 40 0, 0, 0, 0, 0, 0, 0, 0, 41 0, 0, 0, 0, 0, 0, 0, 0, 42 0, 0, 0, 0, 0, 0, 0, 0, 43 0, 0, 0, 0, 0, 0, 0, 0, 44 0, 0, 0, 0, 0, 0, 0, 0, 45 0, 0, 0, 0, 0, 0, 0, 0 }; 46} 47 48static __inline __v32hi __attribute__ ((__always_inline__, __nodebug__)) 49_mm512_setzero_hi (void) { 50 return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0, 51 0, 0, 0, 0, 0, 0, 0, 0, 52 0, 0, 0, 0, 0, 0, 0, 0, 53 0, 0, 0, 0, 0, 0, 0, 0 }; 54} 55 56/* Integer compare */ 57 58static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 59_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { 60 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, 61 (__mmask64)-1); 62} 63 64static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 65_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 66 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, 67 __u); 68} 69 70static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 71_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) { 72 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, 73 (__mmask64)-1); 74} 75 76static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 77_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 78 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, 79 __u); 80} 81 82static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 83_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { 84 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, 85 (__mmask32)-1); 86} 87 88static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 89_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 90 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, 91 __u); 92} 93 94static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 95_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) { 96 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, 97 (__mmask32)-1); 98} 99 100static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 101_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 102 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, 103 __u); 104} 105 106static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 107_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) { 108 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 109 (__mmask64)-1); 110} 111 112static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 113_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 114 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 115 __u); 116} 117 118static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 119_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) { 120 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 121 (__mmask64)-1); 122} 123 124static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 125_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 126 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 127 __u); 128} 129 130static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 131_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) { 132 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 133 (__mmask32)-1); 134} 135 136static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 137_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 138 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 139 __u); 140} 141 142static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 143_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) { 144 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 145 (__mmask32)-1); 146} 147 148static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 149_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 150 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 151 __u); 152} 153 154static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 155_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) { 156 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, 157 (__mmask64)-1); 158} 159 160static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 161_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 162 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, 163 __u); 164} 165 166static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 167_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) { 168 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, 169 (__mmask64)-1); 170} 171 172static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 173_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 174 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, 175 __u); 176} 177 178static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 179_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) { 180 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, 181 (__mmask32)-1); 182} 183 184static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 185_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 186 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, 187 __u); 188} 189 190static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 191_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) { 192 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, 193 (__mmask32)-1); 194} 195 196static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 197_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 198 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, 199 __u); 200} 201 202static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 203_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) { 204 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 205 (__mmask64)-1); 206} 207 208static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 209_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 210 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 211 __u); 212} 213 214static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 215_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) { 216 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 217 (__mmask64)-1); 218} 219 220static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 221_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 222 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 223 __u); 224} 225 226static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 227_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) { 228 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 229 (__mmask32)-1); 230} 231 232static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 233_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 234 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 235 __u); 236} 237 238static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 239_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) { 240 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 241 (__mmask32)-1); 242} 243 244static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 245_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 246 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 247 __u); 248} 249 250static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 251_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) { 252 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 253 (__mmask64)-1); 254} 255 256static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 257_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 258 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 259 __u); 260} 261 262static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 263_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) { 264 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 265 (__mmask64)-1); 266} 267 268static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 269_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 270 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 271 __u); 272} 273 274static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 275_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) { 276 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 277 (__mmask32)-1); 278} 279 280static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 281_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 282 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 283 __u); 284} 285 286static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 287_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) { 288 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 289 (__mmask32)-1); 290} 291 292static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 293_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 294 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 295 __u); 296} 297 298static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 299_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) { 300 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 301 (__mmask64)-1); 302} 303 304static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 305_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 306 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 307 __u); 308} 309 310static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 311_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) { 312 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 313 (__mmask64)-1); 314} 315 316static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 317_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 318 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 319 __u); 320} 321 322static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 323_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) { 324 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 325 (__mmask32)-1); 326} 327 328static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 329_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 330 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 331 __u); 332} 333 334static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 335_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) { 336 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 337 (__mmask32)-1); 338} 339 340static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 341_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 342 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 343 __u); 344} 345 346static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 347_mm512_add_epi8 (__m512i __A, __m512i __B) { 348 return (__m512i) ((__v64qi) __A + (__v64qi) __B); 349} 350 351static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 352_mm512_mask_add_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 353 return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, 354 (__v64qi) __B, 355 (__v64qi) __W, 356 (__mmask64) __U); 357} 358 359static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 360_mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { 361 return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A, 362 (__v64qi) __B, 363 (__v64qi) 364 _mm512_setzero_qi (), 365 (__mmask64) __U); 366} 367 368static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 369_mm512_sub_epi8 (__m512i __A, __m512i __B) { 370 return (__m512i) ((__v64qi) __A - (__v64qi) __B); 371} 372 373static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 374_mm512_mask_sub_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 375 return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, 376 (__v64qi) __B, 377 (__v64qi) __W, 378 (__mmask64) __U); 379} 380 381static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 382_mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) { 383 return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A, 384 (__v64qi) __B, 385 (__v64qi) 386 _mm512_setzero_qi (), 387 (__mmask64) __U); 388} 389 390static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 391_mm512_add_epi16 (__m512i __A, __m512i __B) { 392 return (__m512i) ((__v32hi) __A + (__v32hi) __B); 393} 394 395static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 396_mm512_mask_add_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 397 return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, 398 (__v32hi) __B, 399 (__v32hi) __W, 400 (__mmask32) __U); 401} 402 403static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 404_mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { 405 return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A, 406 (__v32hi) __B, 407 (__v32hi) 408 _mm512_setzero_hi (), 409 (__mmask32) __U); 410} 411 412static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 413_mm512_sub_epi16 (__m512i __A, __m512i __B) { 414 return (__m512i) ((__v32hi) __A - (__v32hi) __B); 415} 416 417static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 418_mm512_mask_sub_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 419 return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, 420 (__v32hi) __B, 421 (__v32hi) __W, 422 (__mmask32) __U); 423} 424 425static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 426_mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { 427 return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A, 428 (__v32hi) __B, 429 (__v32hi) 430 _mm512_setzero_hi (), 431 (__mmask32) __U); 432} 433 434static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 435_mm512_mullo_epi16 (__m512i __A, __m512i __B) { 436 return (__m512i) ((__v32hi) __A * (__v32hi) __B); 437} 438 439static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 440_mm512_mask_mullo_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 441 return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, 442 (__v32hi) __B, 443 (__v32hi) __W, 444 (__mmask32) __U); 445} 446 447static __inline__ __m512i __attribute__((__always_inline__, __nodebug__)) 448_mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) { 449 return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A, 450 (__v32hi) __B, 451 (__v32hi) 452 _mm512_setzero_hi (), 453 (__mmask32) __U); 454} 455 456#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ 457 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 458 (__v64qi)(__m512i)(b), \ 459 (p), (__mmask64)-1); }) 460 461#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ 462 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 463 (__v64qi)(__m512i)(b), \ 464 (p), (__mmask64)(m)); }) 465 466#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ 467 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 468 (__v64qi)(__m512i)(b), \ 469 (p), (__mmask64)-1); }) 470 471#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ 472 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 473 (__v64qi)(__m512i)(b), \ 474 (p), (__mmask64)(m)); }) 475 476#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ 477 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 478 (__v32hi)(__m512i)(b), \ 479 (p), (__mmask32)-1); }) 480 481#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ 482 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 483 (__v32hi)(__m512i)(b), \ 484 (p), (__mmask32)(m)); }) 485 486#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ 487 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 488 (__v32hi)(__m512i)(b), \ 489 (p), (__mmask32)-1); }) 490 491#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ 492 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 493 (__v32hi)(__m512i)(b), \ 494 (p), (__mmask32)(m)); }) 495 496#endif 497