1/*===---- xopintrin.h - XOP intrinsics -------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __X86INTRIN_H 25#error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26#endif 27 28#ifndef __XOPINTRIN_H 29#define __XOPINTRIN_H 30 31#ifndef __XOP__ 32# error "XOP instruction set is not enabled" 33#else 34 35#include <fma4intrin.h> 36 37static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 38_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 39{ 40 return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 41} 42 43static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 44_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 45{ 46 return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 47} 48 49static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 50_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 51{ 52 return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 53} 54 55static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 56_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 57{ 58 return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 59} 60 61static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 62_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 63{ 64 return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 65} 66 67static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 68_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 69{ 70 return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C); 71} 72 73static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 74_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 75{ 76 return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 77} 78 79static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 80_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 81{ 82 return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C); 83} 84 85static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 86_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 87{ 88 return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 89} 90 91static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 92_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 93{ 94 return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C); 95} 96 97static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 98_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 99{ 100 return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 101} 102 103static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 104_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 105{ 106 return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 107} 108 109static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 110_mm_haddw_epi8(__m128i __A) 111{ 112 return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A); 113} 114 115static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 116_mm_haddd_epi8(__m128i __A) 117{ 118 return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A); 119} 120 121static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 122_mm_haddq_epi8(__m128i __A) 123{ 124 return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A); 125} 126 127static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 128_mm_haddd_epi16(__m128i __A) 129{ 130 return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A); 131} 132 133static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 134_mm_haddq_epi16(__m128i __A) 135{ 136 return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A); 137} 138 139static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 140_mm_haddq_epi32(__m128i __A) 141{ 142 return (__m128i)__builtin_ia32_vphadddq((__v4si)__A); 143} 144 145static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 146_mm_haddw_epu8(__m128i __A) 147{ 148 return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A); 149} 150 151static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 152_mm_haddd_epu8(__m128i __A) 153{ 154 return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A); 155} 156 157static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 158_mm_haddq_epu8(__m128i __A) 159{ 160 return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A); 161} 162 163static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 164_mm_haddd_epu16(__m128i __A) 165{ 166 return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A); 167} 168 169static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 170_mm_haddq_epu16(__m128i __A) 171{ 172 return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A); 173} 174 175static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 176_mm_haddq_epu32(__m128i __A) 177{ 178 return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A); 179} 180 181static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 182_mm_hsubw_epi8(__m128i __A) 183{ 184 return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A); 185} 186 187static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 188_mm_hsubd_epi16(__m128i __A) 189{ 190 return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A); 191} 192 193static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 194_mm_hsubq_epi32(__m128i __A) 195{ 196 return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A); 197} 198 199static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 200_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 201{ 202 return (__m128i)__builtin_ia32_vpcmov(__A, __B, __C); 203} 204 205static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) 206_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) 207{ 208 return (__m256i)__builtin_ia32_vpcmov_256(__A, __B, __C); 209} 210 211static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 212_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 213{ 214 return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 215} 216 217static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 218_mm_rot_epi8(__m128i __A, __m128i __B) 219{ 220 return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); 221} 222 223static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 224_mm_rot_epi16(__m128i __A, __m128i __B) 225{ 226 return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); 227} 228 229static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 230_mm_rot_epi32(__m128i __A, __m128i __B) 231{ 232 return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); 233} 234 235static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 236_mm_rot_epi64(__m128i __A, __m128i __B) 237{ 238 return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); 239} 240 241#define _mm_roti_epi8(A, N) __extension__ ({ \ 242 __m128i __A = (A); \ 243 (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); }) 244 245#define _mm_roti_epi16(A, N) __extension__ ({ \ 246 __m128i __A = (A); \ 247 (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); }) 248 249#define _mm_roti_epi32(A, N) __extension__ ({ \ 250 __m128i __A = (A); \ 251 (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); }) 252 253#define _mm_roti_epi64(A, N) __extension__ ({ \ 254 __m128i __A = (A); \ 255 (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); }) 256 257static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 258_mm_shl_epi8(__m128i __A, __m128i __B) 259{ 260 return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B); 261} 262 263static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 264_mm_shl_epi16(__m128i __A, __m128i __B) 265{ 266 return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B); 267} 268 269static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 270_mm_shl_epi32(__m128i __A, __m128i __B) 271{ 272 return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B); 273} 274 275static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 276_mm_shl_epi64(__m128i __A, __m128i __B) 277{ 278 return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B); 279} 280 281static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 282_mm_sha_epi8(__m128i __A, __m128i __B) 283{ 284 return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B); 285} 286 287static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 288_mm_sha_epi16(__m128i __A, __m128i __B) 289{ 290 return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B); 291} 292 293static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 294_mm_sha_epi32(__m128i __A, __m128i __B) 295{ 296 return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B); 297} 298 299static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 300_mm_sha_epi64(__m128i __A, __m128i __B) 301{ 302 return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B); 303} 304 305#define _mm_com_epu8(A, B, N) __extension__ ({ \ 306 __m128i __A = (A); \ 307 __m128i __B = (B); \ 308 (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); }) 309 310#define _mm_com_epu16(A, B, N) __extension__ ({ \ 311 __m128i __A = (A); \ 312 __m128i __B = (B); \ 313 (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); }) 314 315#define _mm_com_epu32(A, B, N) __extension__ ({ \ 316 __m128i __A = (A); \ 317 __m128i __B = (B); \ 318 (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); }) 319 320#define _mm_com_epu64(A, B, N) __extension__ ({ \ 321 __m128i __A = (A); \ 322 __m128i __B = (B); \ 323 (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); }) 324 325#define _mm_com_epi8(A, B, N) __extension__ ({ \ 326 __m128i __A = (A); \ 327 __m128i __B = (B); \ 328 (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); }) 329 330#define _mm_com_epi16(A, B, N) __extension__ ({ \ 331 __m128i __A = (A); \ 332 __m128i __B = (B); \ 333 (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); }) 334 335#define _mm_com_epi32(A, B, N) __extension__ ({ \ 336 __m128i __A = (A); \ 337 __m128i __B = (B); \ 338 (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); }) 339 340#define _mm_com_epi64(A, B, N) __extension__ ({ \ 341 __m128i __A = (A); \ 342 __m128i __B = (B); \ 343 (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); }) 344 345#define _MM_PCOMCTRL_LT 0 346#define _MM_PCOMCTRL_LE 1 347#define _MM_PCOMCTRL_GT 2 348#define _MM_PCOMCTRL_GE 3 349#define _MM_PCOMCTRL_EQ 4 350#define _MM_PCOMCTRL_NEQ 5 351#define _MM_PCOMCTRL_FALSE 6 352#define _MM_PCOMCTRL_TRUE 7 353 354static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 355_mm_comlt_epu8(__m128i __A, __m128i __B) 356{ 357 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT); 358} 359 360static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 361_mm_comle_epu8(__m128i __A, __m128i __B) 362{ 363 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE); 364} 365 366static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 367_mm_comgt_epu8(__m128i __A, __m128i __B) 368{ 369 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT); 370} 371 372static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 373_mm_comge_epu8(__m128i __A, __m128i __B) 374{ 375 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE); 376} 377 378static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 379_mm_comeq_epu8(__m128i __A, __m128i __B) 380{ 381 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ); 382} 383 384static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 385_mm_comneq_epu8(__m128i __A, __m128i __B) 386{ 387 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ); 388} 389 390static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 391_mm_comfalse_epu8(__m128i __A, __m128i __B) 392{ 393 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE); 394} 395 396static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 397_mm_comtrue_epu8(__m128i __A, __m128i __B) 398{ 399 return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE); 400} 401 402static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 403_mm_comlt_epu16(__m128i __A, __m128i __B) 404{ 405 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT); 406} 407 408static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 409_mm_comle_epu16(__m128i __A, __m128i __B) 410{ 411 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE); 412} 413 414static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 415_mm_comgt_epu16(__m128i __A, __m128i __B) 416{ 417 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT); 418} 419 420static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 421_mm_comge_epu16(__m128i __A, __m128i __B) 422{ 423 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE); 424} 425 426static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 427_mm_comeq_epu16(__m128i __A, __m128i __B) 428{ 429 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ); 430} 431 432static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 433_mm_comneq_epu16(__m128i __A, __m128i __B) 434{ 435 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ); 436} 437 438static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 439_mm_comfalse_epu16(__m128i __A, __m128i __B) 440{ 441 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE); 442} 443 444static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 445_mm_comtrue_epu16(__m128i __A, __m128i __B) 446{ 447 return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE); 448} 449 450static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 451_mm_comlt_epu32(__m128i __A, __m128i __B) 452{ 453 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT); 454} 455 456static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 457_mm_comle_epu32(__m128i __A, __m128i __B) 458{ 459 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE); 460} 461 462static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 463_mm_comgt_epu32(__m128i __A, __m128i __B) 464{ 465 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT); 466} 467 468static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 469_mm_comge_epu32(__m128i __A, __m128i __B) 470{ 471 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE); 472} 473 474static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 475_mm_comeq_epu32(__m128i __A, __m128i __B) 476{ 477 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ); 478} 479 480static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 481_mm_comneq_epu32(__m128i __A, __m128i __B) 482{ 483 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ); 484} 485 486static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 487_mm_comfalse_epu32(__m128i __A, __m128i __B) 488{ 489 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE); 490} 491 492static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 493_mm_comtrue_epu32(__m128i __A, __m128i __B) 494{ 495 return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE); 496} 497 498static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 499_mm_comlt_epu64(__m128i __A, __m128i __B) 500{ 501 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT); 502} 503 504static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 505_mm_comle_epu64(__m128i __A, __m128i __B) 506{ 507 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE); 508} 509 510static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 511_mm_comgt_epu64(__m128i __A, __m128i __B) 512{ 513 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT); 514} 515 516static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 517_mm_comge_epu64(__m128i __A, __m128i __B) 518{ 519 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE); 520} 521 522static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 523_mm_comeq_epu64(__m128i __A, __m128i __B) 524{ 525 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ); 526} 527 528static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 529_mm_comneq_epu64(__m128i __A, __m128i __B) 530{ 531 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ); 532} 533 534static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 535_mm_comfalse_epu64(__m128i __A, __m128i __B) 536{ 537 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE); 538} 539 540static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 541_mm_comtrue_epu64(__m128i __A, __m128i __B) 542{ 543 return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE); 544} 545 546static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 547_mm_comlt_epi8(__m128i __A, __m128i __B) 548{ 549 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT); 550} 551 552static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 553_mm_comle_epi8(__m128i __A, __m128i __B) 554{ 555 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE); 556} 557 558static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 559_mm_comgt_epi8(__m128i __A, __m128i __B) 560{ 561 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT); 562} 563 564static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 565_mm_comge_epi8(__m128i __A, __m128i __B) 566{ 567 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE); 568} 569 570static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 571_mm_comeq_epi8(__m128i __A, __m128i __B) 572{ 573 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ); 574} 575 576static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 577_mm_comneq_epi8(__m128i __A, __m128i __B) 578{ 579 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ); 580} 581 582static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 583_mm_comfalse_epi8(__m128i __A, __m128i __B) 584{ 585 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE); 586} 587 588static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 589_mm_comtrue_epi8(__m128i __A, __m128i __B) 590{ 591 return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE); 592} 593 594static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 595_mm_comlt_epi16(__m128i __A, __m128i __B) 596{ 597 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT); 598} 599 600static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 601_mm_comle_epi16(__m128i __A, __m128i __B) 602{ 603 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE); 604} 605 606static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 607_mm_comgt_epi16(__m128i __A, __m128i __B) 608{ 609 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT); 610} 611 612static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 613_mm_comge_epi16(__m128i __A, __m128i __B) 614{ 615 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE); 616} 617 618static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 619_mm_comeq_epi16(__m128i __A, __m128i __B) 620{ 621 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ); 622} 623 624static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 625_mm_comneq_epi16(__m128i __A, __m128i __B) 626{ 627 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ); 628} 629 630static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 631_mm_comfalse_epi16(__m128i __A, __m128i __B) 632{ 633 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE); 634} 635 636static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 637_mm_comtrue_epi16(__m128i __A, __m128i __B) 638{ 639 return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE); 640} 641 642static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 643_mm_comlt_epi32(__m128i __A, __m128i __B) 644{ 645 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT); 646} 647 648static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 649_mm_comle_epi32(__m128i __A, __m128i __B) 650{ 651 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE); 652} 653 654static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 655_mm_comgt_epi32(__m128i __A, __m128i __B) 656{ 657 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT); 658} 659 660static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 661_mm_comge_epi32(__m128i __A, __m128i __B) 662{ 663 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE); 664} 665 666static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 667_mm_comeq_epi32(__m128i __A, __m128i __B) 668{ 669 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ); 670} 671 672static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 673_mm_comneq_epi32(__m128i __A, __m128i __B) 674{ 675 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ); 676} 677 678static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 679_mm_comfalse_epi32(__m128i __A, __m128i __B) 680{ 681 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE); 682} 683 684static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 685_mm_comtrue_epi32(__m128i __A, __m128i __B) 686{ 687 return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE); 688} 689 690static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 691_mm_comlt_epi64(__m128i __A, __m128i __B) 692{ 693 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT); 694} 695 696static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 697_mm_comle_epi64(__m128i __A, __m128i __B) 698{ 699 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE); 700} 701 702static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 703_mm_comgt_epi64(__m128i __A, __m128i __B) 704{ 705 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT); 706} 707 708static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 709_mm_comge_epi64(__m128i __A, __m128i __B) 710{ 711 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE); 712} 713 714static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 715_mm_comeq_epi64(__m128i __A, __m128i __B) 716{ 717 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ); 718} 719 720static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 721_mm_comneq_epi64(__m128i __A, __m128i __B) 722{ 723 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ); 724} 725 726static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 727_mm_comfalse_epi64(__m128i __A, __m128i __B) 728{ 729 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE); 730} 731 732static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 733_mm_comtrue_epi64(__m128i __A, __m128i __B) 734{ 735 return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE); 736} 737 738#define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \ 739 __m128d __X = (X); \ 740 __m128d __Y = (Y); \ 741 __m128i __C = (C); \ 742 (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \ 743 (__v2di)__C, (I)); }) 744 745#define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \ 746 __m256d __X = (X); \ 747 __m256d __Y = (Y); \ 748 __m256i __C = (C); \ 749 (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \ 750 (__v4di)__C, (I)); }) 751 752#define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \ 753 __m128 __X = (X); \ 754 __m128 __Y = (Y); \ 755 __m128i __C = (C); \ 756 (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \ 757 (__v4si)__C, (I)); }) 758 759#define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \ 760 __m256 __X = (X); \ 761 __m256 __Y = (Y); \ 762 __m256i __C = (C); \ 763 (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \ 764 (__v8si)__C, (I)); }) 765 766static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 767_mm_frcz_ss(__m128 __A) 768{ 769 return (__m128)__builtin_ia32_vfrczss((__v4sf)__A); 770} 771 772static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 773_mm_frcz_sd(__m128d __A) 774{ 775 return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A); 776} 777 778static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 779_mm_frcz_ps(__m128 __A) 780{ 781 return (__m128)__builtin_ia32_vfrczps((__v4sf)__A); 782} 783 784static __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 785_mm_frcz_pd(__m128d __A) 786{ 787 return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); 788} 789 790static __inline__ __m256 __attribute__((__always_inline__, __nodebug__)) 791_mm256_frcz_ps(__m256 __A) 792{ 793 return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); 794} 795 796static __inline__ __m256d __attribute__((__always_inline__, __nodebug__)) 797_mm256_frcz_pd(__m256d __A) 798{ 799 return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); 800} 801 802#endif /* __XOP__ */ 803 804#endif /* __XOPINTRIN_H */ 805