1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512BWINTRIN_H 29#define __AVX512BWINTRIN_H 30 31typedef unsigned int __mmask32; 32typedef unsigned long long __mmask64; 33typedef char __v64qi __attribute__ ((__vector_size__ (64))); 34typedef short __v32hi __attribute__ ((__vector_size__ (64))); 35 36 37/* Integer compare */ 38 39static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 40_mm512_cmpeq_epi8_mask(__m512i __a, __m512i __b) { 41 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, 42 (__mmask64)-1); 43} 44 45static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 46_mm512_mask_cmpeq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 47 return (__mmask64)__builtin_ia32_pcmpeqb512_mask((__v64qi)__a, (__v64qi)__b, 48 __u); 49} 50 51static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 52_mm512_cmpeq_epu8_mask(__m512i __a, __m512i __b) { 53 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, 54 (__mmask64)-1); 55} 56 57static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 58_mm512_mask_cmpeq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 59 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 0, 60 __u); 61} 62 63static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 64_mm512_cmpeq_epi16_mask(__m512i __a, __m512i __b) { 65 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, 66 (__mmask32)-1); 67} 68 69static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 70_mm512_mask_cmpeq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 71 return (__mmask32)__builtin_ia32_pcmpeqw512_mask((__v32hi)__a, (__v32hi)__b, 72 __u); 73} 74 75static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 76_mm512_cmpeq_epu16_mask(__m512i __a, __m512i __b) { 77 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, 78 (__mmask32)-1); 79} 80 81static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 82_mm512_mask_cmpeq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 83 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 0, 84 __u); 85} 86 87static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 88_mm512_cmpge_epi8_mask(__m512i __a, __m512i __b) { 89 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 90 (__mmask64)-1); 91} 92 93static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 94_mm512_mask_cmpge_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 95 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 96 __u); 97} 98 99static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 100_mm512_cmpge_epu8_mask(__m512i __a, __m512i __b) { 101 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 102 (__mmask64)-1); 103} 104 105static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 106_mm512_mask_cmpge_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 107 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 5, 108 __u); 109} 110 111static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 112_mm512_cmpge_epi16_mask(__m512i __a, __m512i __b) { 113 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 114 (__mmask32)-1); 115} 116 117static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 118_mm512_mask_cmpge_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 119 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 120 __u); 121} 122 123static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 124_mm512_cmpge_epu16_mask(__m512i __a, __m512i __b) { 125 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 126 (__mmask32)-1); 127} 128 129static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 130_mm512_mask_cmpge_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 131 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 5, 132 __u); 133} 134 135static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 136_mm512_cmpgt_epi8_mask(__m512i __a, __m512i __b) { 137 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, 138 (__mmask64)-1); 139} 140 141static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 142_mm512_mask_cmpgt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 143 return (__mmask64)__builtin_ia32_pcmpgtb512_mask((__v64qi)__a, (__v64qi)__b, 144 __u); 145} 146 147static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 148_mm512_cmpgt_epu8_mask(__m512i __a, __m512i __b) { 149 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, 150 (__mmask64)-1); 151} 152 153static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 154_mm512_mask_cmpgt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 155 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 6, 156 __u); 157} 158 159static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 160_mm512_cmpgt_epi16_mask(__m512i __a, __m512i __b) { 161 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, 162 (__mmask32)-1); 163} 164 165static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 166_mm512_mask_cmpgt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 167 return (__mmask32)__builtin_ia32_pcmpgtw512_mask((__v32hi)__a, (__v32hi)__b, 168 __u); 169} 170 171static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 172_mm512_cmpgt_epu16_mask(__m512i __a, __m512i __b) { 173 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, 174 (__mmask32)-1); 175} 176 177static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 178_mm512_mask_cmpgt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 179 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 6, 180 __u); 181} 182 183static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 184_mm512_cmple_epi8_mask(__m512i __a, __m512i __b) { 185 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 186 (__mmask64)-1); 187} 188 189static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 190_mm512_mask_cmple_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 191 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 192 __u); 193} 194 195static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 196_mm512_cmple_epu8_mask(__m512i __a, __m512i __b) { 197 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 198 (__mmask64)-1); 199} 200 201static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 202_mm512_mask_cmple_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 203 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 2, 204 __u); 205} 206 207static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 208_mm512_cmple_epi16_mask(__m512i __a, __m512i __b) { 209 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 210 (__mmask32)-1); 211} 212 213static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 214_mm512_mask_cmple_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 215 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 216 __u); 217} 218 219static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 220_mm512_cmple_epu16_mask(__m512i __a, __m512i __b) { 221 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 222 (__mmask32)-1); 223} 224 225static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 226_mm512_mask_cmple_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 227 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 2, 228 __u); 229} 230 231static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 232_mm512_cmplt_epi8_mask(__m512i __a, __m512i __b) { 233 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 234 (__mmask64)-1); 235} 236 237static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 238_mm512_mask_cmplt_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 239 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 240 __u); 241} 242 243static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 244_mm512_cmplt_epu8_mask(__m512i __a, __m512i __b) { 245 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 246 (__mmask64)-1); 247} 248 249static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 250_mm512_mask_cmplt_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 251 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 1, 252 __u); 253} 254 255static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 256_mm512_cmplt_epi16_mask(__m512i __a, __m512i __b) { 257 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 258 (__mmask32)-1); 259} 260 261static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 262_mm512_mask_cmplt_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 263 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 264 __u); 265} 266 267static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 268_mm512_cmplt_epu16_mask(__m512i __a, __m512i __b) { 269 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 270 (__mmask32)-1); 271} 272 273static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 274_mm512_mask_cmplt_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 275 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 1, 276 __u); 277} 278 279static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 280_mm512_cmpneq_epi8_mask(__m512i __a, __m512i __b) { 281 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 282 (__mmask64)-1); 283} 284 285static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 286_mm512_mask_cmpneq_epi8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 287 return (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 288 __u); 289} 290 291static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 292_mm512_cmpneq_epu8_mask(__m512i __a, __m512i __b) { 293 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 294 (__mmask64)-1); 295} 296 297static __inline__ __mmask64 __attribute__((__always_inline__, __nodebug__)) 298_mm512_mask_cmpneq_epu8_mask(__mmask64 __u, __m512i __a, __m512i __b) { 299 return (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)__a, (__v64qi)__b, 4, 300 __u); 301} 302 303static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 304_mm512_cmpneq_epi16_mask(__m512i __a, __m512i __b) { 305 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 306 (__mmask32)-1); 307} 308 309static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 310_mm512_mask_cmpneq_epi16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 311 return (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 312 __u); 313} 314 315static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 316_mm512_cmpneq_epu16_mask(__m512i __a, __m512i __b) { 317 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 318 (__mmask32)-1); 319} 320 321static __inline__ __mmask32 __attribute__((__always_inline__, __nodebug__)) 322_mm512_mask_cmpneq_epu16_mask(__mmask32 __u, __m512i __a, __m512i __b) { 323 return (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)__a, (__v32hi)__b, 4, 324 __u); 325} 326 327#define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \ 328 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 329 (__v64qi)(__m512i)(b), \ 330 (p), (__mmask64)-1); }) 331 332#define _mm512_mask_cmp_epi8_mask(m, a, b, p) __extension__ ({ \ 333 (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 334 (__v64qi)(__m512i)(b), \ 335 (p), (__mmask64)(m)); }) 336 337#define _mm512_cmp_epu8_mask(a, b, p) __extension__ ({ \ 338 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 339 (__v64qi)(__m512i)(b), \ 340 (p), (__mmask64)-1); }) 341 342#define _mm512_mask_cmp_epu8_mask(m, a, b, p) __extension__ ({ \ 343 (__mmask16)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 344 (__v64qi)(__m512i)(b), \ 345 (p), (__mmask64)(m)); }) 346 347#define _mm512_cmp_epi16_mask(a, b, p) __extension__ ({ \ 348 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 349 (__v32hi)(__m512i)(b), \ 350 (p), (__mmask32)-1); }) 351 352#define _mm512_mask_cmp_epi16_mask(m, a, b, p) __extension__ ({ \ 353 (__mmask16)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 354 (__v32hi)(__m512i)(b), \ 355 (p), (__mmask32)(m)); }) 356 357#define _mm512_cmp_epu16_mask(a, b, p) __extension__ ({ \ 358 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 359 (__v32hi)(__m512i)(b), \ 360 (p), (__mmask32)-1); }) 361 362#define _mm512_mask_cmp_epu16_mask(m, a, b, p) __extension__ ({ \ 363 (__mmask16)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 364 (__v32hi)(__m512i)(b), \ 365 (p), (__mmask32)(m)); }) 366 367#endif 368