1/* Copyright (C) 2007-2013 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#ifndef _X86INTRIN_H_INCLUDED 25# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead." 26#endif 27 28#ifndef _XOPMMINTRIN_H_INCLUDED 29#define _XOPMMINTRIN_H_INCLUDED 30 31#ifndef __XOP__ 32# error "XOP instruction set not enabled" 33#else 34 35#include <fma4intrin.h> 36 37/* Integer multiply/add intructions. */ 38extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) 40{ 41 return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C); 42} 43 44extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 45_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C) 46{ 47 return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C); 48} 49 50extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 51_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C) 52{ 53 return (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 54} 55 56extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 57_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C) 58{ 59 return (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C); 60} 61 62extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 63_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C) 64{ 65 return (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 66} 67 68extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 69_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C) 70{ 71 return (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C); 72} 73 74extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 75_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C) 76{ 77 return (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 78} 79 80extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 81_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C) 82{ 83 return (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C); 84} 85 86extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 87_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C) 88{ 89 return (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 90} 91 92extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 93_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C) 94{ 95 return (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C); 96} 97 98extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 99_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C) 100{ 101 return (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 102} 103 104extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 105_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C) 106{ 107 return (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C); 108} 109 110/* Packed Integer Horizontal Add and Subtract */ 111extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 112_mm_haddw_epi8(__m128i __A) 113{ 114 return (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A); 115} 116 117extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 118_mm_haddd_epi8(__m128i __A) 119{ 120 return (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A); 121} 122 123extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 124_mm_haddq_epi8(__m128i __A) 125{ 126 return (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A); 127} 128 129extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 130_mm_haddd_epi16(__m128i __A) 131{ 132 return (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A); 133} 134 135extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 136_mm_haddq_epi16(__m128i __A) 137{ 138 return (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A); 139} 140 141extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 142_mm_haddq_epi32(__m128i __A) 143{ 144 return (__m128i) __builtin_ia32_vphadddq ((__v4si)__A); 145} 146 147extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 148_mm_haddw_epu8(__m128i __A) 149{ 150 return (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A); 151} 152 153extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 154_mm_haddd_epu8(__m128i __A) 155{ 156 return (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A); 157} 158 159extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 160_mm_haddq_epu8(__m128i __A) 161{ 162 return (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A); 163} 164 165extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 166_mm_haddd_epu16(__m128i __A) 167{ 168 return (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A); 169} 170 171extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 172_mm_haddq_epu16(__m128i __A) 173{ 174 return (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A); 175} 176 177extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 178_mm_haddq_epu32(__m128i __A) 179{ 180 return (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A); 181} 182 183extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 184_mm_hsubw_epi8(__m128i __A) 185{ 186 return (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A); 187} 188 189extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 190_mm_hsubd_epi16(__m128i __A) 191{ 192 return (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A); 193} 194 195extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 196_mm_hsubq_epi32(__m128i __A) 197{ 198 return (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A); 199} 200 201/* Vector conditional move and permute */ 202 203extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 204_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C) 205{ 206 return (__m128i) __builtin_ia32_vpcmov (__A, __B, __C); 207} 208 209extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 210_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) 211{ 212 return (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C); 213} 214 215/* Packed Integer Rotates and Shifts 216 Rotates - Non-Immediate form */ 217 218extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 219_mm_rot_epi8(__m128i __A, __m128i __B) 220{ 221 return (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B); 222} 223 224extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 225_mm_rot_epi16(__m128i __A, __m128i __B) 226{ 227 return (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B); 228} 229 230extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 231_mm_rot_epi32(__m128i __A, __m128i __B) 232{ 233 return (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B); 234} 235 236extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237_mm_rot_epi64(__m128i __A, __m128i __B) 238{ 239 return (__m128i) __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B); 240} 241 242/* Rotates - Immediate form */ 243 244#ifdef __OPTIMIZE__ 245extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 246_mm_roti_epi8(__m128i __A, const int __B) 247{ 248 return (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B); 249} 250 251extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 252_mm_roti_epi16(__m128i __A, const int __B) 253{ 254 return (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B); 255} 256 257extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 258_mm_roti_epi32(__m128i __A, const int __B) 259{ 260 return (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B); 261} 262 263extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 264_mm_roti_epi64(__m128i __A, const int __B) 265{ 266 return (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B); 267} 268#else 269#define _mm_roti_epi8(A, N) \ 270 ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N))) 271#define _mm_roti_epi16(A, N) \ 272 ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N))) 273#define _mm_roti_epi32(A, N) \ 274 ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N))) 275#define _mm_roti_epi64(A, N) \ 276 ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N))) 277#endif 278 279/* Shifts */ 280 281extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 282_mm_shl_epi8(__m128i __A, __m128i __B) 283{ 284 return (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B); 285} 286 287extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 288_mm_shl_epi16(__m128i __A, __m128i __B) 289{ 290 return (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B); 291} 292 293extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 294_mm_shl_epi32(__m128i __A, __m128i __B) 295{ 296 return (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B); 297} 298 299extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 300_mm_shl_epi64(__m128i __A, __m128i __B) 301{ 302 return (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B); 303} 304 305 306extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 307_mm_sha_epi8(__m128i __A, __m128i __B) 308{ 309 return (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B); 310} 311 312extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 313_mm_sha_epi16(__m128i __A, __m128i __B) 314{ 315 return (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B); 316} 317 318extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 319_mm_sha_epi32(__m128i __A, __m128i __B) 320{ 321 return (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B); 322} 323 324extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 325_mm_sha_epi64(__m128i __A, __m128i __B) 326{ 327 return (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B); 328} 329 330/* Compare and Predicate Generation 331 pcom (integer, unsinged bytes) */ 332 333extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 334_mm_comlt_epu8(__m128i __A, __m128i __B) 335{ 336 return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B); 337} 338 339extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 340_mm_comle_epu8(__m128i __A, __m128i __B) 341{ 342 return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B); 343} 344 345extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 346_mm_comgt_epu8(__m128i __A, __m128i __B) 347{ 348 return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B); 349} 350 351extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 352_mm_comge_epu8(__m128i __A, __m128i __B) 353{ 354 return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B); 355} 356 357extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 358_mm_comeq_epu8(__m128i __A, __m128i __B) 359{ 360 return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B); 361} 362 363extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 364_mm_comneq_epu8(__m128i __A, __m128i __B) 365{ 366 return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B); 367} 368 369extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 370_mm_comfalse_epu8(__m128i __A, __m128i __B) 371{ 372 return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B); 373} 374 375extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 376_mm_comtrue_epu8(__m128i __A, __m128i __B) 377{ 378 return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B); 379} 380 381/*pcom (integer, unsinged words) */ 382 383extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 384_mm_comlt_epu16(__m128i __A, __m128i __B) 385{ 386 return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B); 387} 388 389extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 390_mm_comle_epu16(__m128i __A, __m128i __B) 391{ 392 return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B); 393} 394 395extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 396_mm_comgt_epu16(__m128i __A, __m128i __B) 397{ 398 return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B); 399} 400 401extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 402_mm_comge_epu16(__m128i __A, __m128i __B) 403{ 404 return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B); 405} 406 407extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 408_mm_comeq_epu16(__m128i __A, __m128i __B) 409{ 410 return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B); 411} 412 413extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 414_mm_comneq_epu16(__m128i __A, __m128i __B) 415{ 416 return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B); 417} 418 419extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 420_mm_comfalse_epu16(__m128i __A, __m128i __B) 421{ 422 return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B); 423} 424 425extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 426_mm_comtrue_epu16(__m128i __A, __m128i __B) 427{ 428 return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B); 429} 430 431/*pcom (integer, unsinged double words) */ 432 433extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 434_mm_comlt_epu32(__m128i __A, __m128i __B) 435{ 436 return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B); 437} 438 439extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 440_mm_comle_epu32(__m128i __A, __m128i __B) 441{ 442 return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B); 443} 444 445extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 446_mm_comgt_epu32(__m128i __A, __m128i __B) 447{ 448 return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B); 449} 450 451extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 452_mm_comge_epu32(__m128i __A, __m128i __B) 453{ 454 return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B); 455} 456 457extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 458_mm_comeq_epu32(__m128i __A, __m128i __B) 459{ 460 return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B); 461} 462 463extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 464_mm_comneq_epu32(__m128i __A, __m128i __B) 465{ 466 return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B); 467} 468 469extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 470_mm_comfalse_epu32(__m128i __A, __m128i __B) 471{ 472 return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B); 473} 474 475extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 476_mm_comtrue_epu32(__m128i __A, __m128i __B) 477{ 478 return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B); 479} 480 481/*pcom (integer, unsinged quad words) */ 482 483extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 484_mm_comlt_epu64(__m128i __A, __m128i __B) 485{ 486 return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B); 487} 488 489extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 490_mm_comle_epu64(__m128i __A, __m128i __B) 491{ 492 return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B); 493} 494 495extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 496_mm_comgt_epu64(__m128i __A, __m128i __B) 497{ 498 return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B); 499} 500 501extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 502_mm_comge_epu64(__m128i __A, __m128i __B) 503{ 504 return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B); 505} 506 507extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 508_mm_comeq_epu64(__m128i __A, __m128i __B) 509{ 510 return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B); 511} 512 513extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 514_mm_comneq_epu64(__m128i __A, __m128i __B) 515{ 516 return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B); 517} 518 519extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 520_mm_comfalse_epu64(__m128i __A, __m128i __B) 521{ 522 return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B); 523} 524 525extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 526_mm_comtrue_epu64(__m128i __A, __m128i __B) 527{ 528 return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B); 529} 530 531/*pcom (integer, signed bytes) */ 532 533extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 534_mm_comlt_epi8(__m128i __A, __m128i __B) 535{ 536 return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B); 537} 538 539extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 540_mm_comle_epi8(__m128i __A, __m128i __B) 541{ 542 return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B); 543} 544 545extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 546_mm_comgt_epi8(__m128i __A, __m128i __B) 547{ 548 return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B); 549} 550 551extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 552_mm_comge_epi8(__m128i __A, __m128i __B) 553{ 554 return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B); 555} 556 557extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 558_mm_comeq_epi8(__m128i __A, __m128i __B) 559{ 560 return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B); 561} 562 563extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 564_mm_comneq_epi8(__m128i __A, __m128i __B) 565{ 566 return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B); 567} 568 569extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 570_mm_comfalse_epi8(__m128i __A, __m128i __B) 571{ 572 return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B); 573} 574 575extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 576_mm_comtrue_epi8(__m128i __A, __m128i __B) 577{ 578 return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B); 579} 580 581/*pcom (integer, signed words) */ 582 583extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 584_mm_comlt_epi16(__m128i __A, __m128i __B) 585{ 586 return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B); 587} 588 589extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 590_mm_comle_epi16(__m128i __A, __m128i __B) 591{ 592 return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B); 593} 594 595extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 596_mm_comgt_epi16(__m128i __A, __m128i __B) 597{ 598 return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B); 599} 600 601extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 602_mm_comge_epi16(__m128i __A, __m128i __B) 603{ 604 return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B); 605} 606 607extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 608_mm_comeq_epi16(__m128i __A, __m128i __B) 609{ 610 return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B); 611} 612 613extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 614_mm_comneq_epi16(__m128i __A, __m128i __B) 615{ 616 return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B); 617} 618 619extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 620_mm_comfalse_epi16(__m128i __A, __m128i __B) 621{ 622 return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B); 623} 624 625extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 626_mm_comtrue_epi16(__m128i __A, __m128i __B) 627{ 628 return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B); 629} 630 631/*pcom (integer, signed double words) */ 632 633extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 634_mm_comlt_epi32(__m128i __A, __m128i __B) 635{ 636 return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B); 637} 638 639extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 640_mm_comle_epi32(__m128i __A, __m128i __B) 641{ 642 return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B); 643} 644 645extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 646_mm_comgt_epi32(__m128i __A, __m128i __B) 647{ 648 return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B); 649} 650 651extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 652_mm_comge_epi32(__m128i __A, __m128i __B) 653{ 654 return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B); 655} 656 657extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 658_mm_comeq_epi32(__m128i __A, __m128i __B) 659{ 660 return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B); 661} 662 663extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 664_mm_comneq_epi32(__m128i __A, __m128i __B) 665{ 666 return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B); 667} 668 669extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 670_mm_comfalse_epi32(__m128i __A, __m128i __B) 671{ 672 return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B); 673} 674 675extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 676_mm_comtrue_epi32(__m128i __A, __m128i __B) 677{ 678 return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B); 679} 680 681/*pcom (integer, signed quad words) */ 682 683extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 684_mm_comlt_epi64(__m128i __A, __m128i __B) 685{ 686 return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B); 687} 688 689extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 690_mm_comle_epi64(__m128i __A, __m128i __B) 691{ 692 return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B); 693} 694 695extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 696_mm_comgt_epi64(__m128i __A, __m128i __B) 697{ 698 return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B); 699} 700 701extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 702_mm_comge_epi64(__m128i __A, __m128i __B) 703{ 704 return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B); 705} 706 707extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 708_mm_comeq_epi64(__m128i __A, __m128i __B) 709{ 710 return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B); 711} 712 713extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 714_mm_comneq_epi64(__m128i __A, __m128i __B) 715{ 716 return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B); 717} 718 719extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 720_mm_comfalse_epi64(__m128i __A, __m128i __B) 721{ 722 return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B); 723} 724 725extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 726_mm_comtrue_epi64(__m128i __A, __m128i __B) 727{ 728 return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B); 729} 730 731/* FRCZ */ 732 733extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 734_mm_frcz_ps (__m128 __A) 735{ 736 return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A); 737} 738 739extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 740_mm_frcz_pd (__m128d __A) 741{ 742 return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A); 743} 744 745extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 746_mm_frcz_ss (__m128 __A, __m128 __B) 747{ 748 return (__m128) __builtin_ia32_movss ((__v4sf)__A, 749 (__v4sf) 750 __builtin_ia32_vfrczss ((__v4sf)__B)); 751} 752 753extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 754_mm_frcz_sd (__m128d __A, __m128d __B) 755{ 756 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, 757 (__v2df) 758 __builtin_ia32_vfrczsd ((__v2df)__B)); 759} 760 761extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 762_mm256_frcz_ps (__m256 __A) 763{ 764 return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A); 765} 766 767extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 768_mm256_frcz_pd (__m256d __A) 769{ 770 return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A); 771} 772 773/* PERMIL2 */ 774 775#ifdef __OPTIMIZE__ 776extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 777_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I) 778{ 779 return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X, 780 (__v2df)__Y, 781 (__v2di)__C, 782 __I); 783} 784 785extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 786_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I) 787{ 788 return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X, 789 (__v4df)__Y, 790 (__v4di)__C, 791 __I); 792} 793 794extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 795_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I) 796{ 797 return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X, 798 (__v4sf)__Y, 799 (__v4si)__C, 800 __I); 801} 802 803extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 804_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) 805{ 806 return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X, 807 (__v8sf)__Y, 808 (__v8si)__C, 809 __I); 810} 811#else 812#define _mm_permute2_pd(X, Y, C, I) \ 813 ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ 814 (__v2df)(__m128d)(Y), \ 815 (__v2di)(__m128d)(C), \ 816 (int)(I))) 817 818#define _mm256_permute2_pd(X, Y, C, I) \ 819 ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ 820 (__v4df)(__m256d)(Y), \ 821 (__v4di)(__m256d)(C), \ 822 (int)(I))) 823 824#define _mm_permute2_ps(X, Y, C, I) \ 825 ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ 826 (__v4sf)(__m128)(Y), \ 827 (__v4si)(__m128)(C), \ 828 (int)(I))) 829 830#define _mm256_permute2_ps(X, Y, C, I) \ 831 ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ 832 (__v8sf)(__m256)(Y), \ 833 (__v8si)(__m256)(C), \ 834 (int)(I))) 835#endif /* __OPTIMIZE__ */ 836 837#endif /* __XOP__ */ 838 839#endif /* _XOPMMINTRIN_H_INCLUDED */ 840