1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __TMMINTRIN_H 25#define __TMMINTRIN_H 26 27#include <pmmintrin.h> 28 29/* Define the default attributes for the functions in this file. */ 30#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) 31 32/// \brief Computes the absolute value of each of the packed 8-bit signed 33/// integers in the source operand and stores the 8-bit unsigned integer 34/// results in the destination. 35/// 36/// \headerfile <x86intrin.h> 37/// 38/// This intrinsic corresponds to the \c PABSB instruction. 39/// 40/// \param __a 41/// A 64-bit vector of [8 x i8]. 42/// \returns A 64-bit integer vector containing the absolute values of the 43/// elements in the operand. 44static __inline__ __m64 __DEFAULT_FN_ATTRS 45_mm_abs_pi8(__m64 __a) 46{ 47 return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 48} 49 50/// \brief Computes the absolute value of each of the packed 8-bit signed 51/// integers in the source operand and stores the 8-bit unsigned integer 52/// results in the destination. 53/// 54/// \headerfile <x86intrin.h> 55/// 56/// This intrinsic corresponds to the \c VPABSB instruction. 57/// 58/// \param __a 59/// A 128-bit vector of [16 x i8]. 60/// \returns A 128-bit integer vector containing the absolute values of the 61/// elements in the operand. 62static __inline__ __m128i __DEFAULT_FN_ATTRS 63_mm_abs_epi8(__m128i __a) 64{ 65 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); 66} 67 68/// \brief Computes the absolute value of each of the packed 16-bit signed 69/// integers in the source operand and stores the 16-bit unsigned integer 70/// results in the destination. 71/// 72/// \headerfile <x86intrin.h> 73/// 74/// This intrinsic corresponds to the \c PABSW instruction. 75/// 76/// \param __a 77/// A 64-bit vector of [4 x i16]. 78/// \returns A 64-bit integer vector containing the absolute values of the 79/// elements in the operand. 80static __inline__ __m64 __DEFAULT_FN_ATTRS 81_mm_abs_pi16(__m64 __a) 82{ 83 return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 84} 85 86/// \brief Computes the absolute value of each of the packed 16-bit signed 87/// integers in the source operand and stores the 16-bit unsigned integer 88/// results in the destination. 89/// 90/// \headerfile <x86intrin.h> 91/// 92/// This intrinsic corresponds to the \c VPABSW instruction. 93/// 94/// \param __a 95/// A 128-bit vector of [8 x i16]. 96/// \returns A 128-bit integer vector containing the absolute values of the 97/// elements in the operand. 98static __inline__ __m128i __DEFAULT_FN_ATTRS 99_mm_abs_epi16(__m128i __a) 100{ 101 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); 102} 103 104/// \brief Computes the absolute value of each of the packed 32-bit signed 105/// integers in the source operand and stores the 32-bit unsigned integer 106/// results in the destination. 107/// 108/// \headerfile <x86intrin.h> 109/// 110/// This intrinsic corresponds to the \c PABSD instruction. 111/// 112/// \param __a 113/// A 64-bit vector of [2 x i32]. 114/// \returns A 64-bit integer vector containing the absolute values of the 115/// elements in the operand. 116static __inline__ __m64 __DEFAULT_FN_ATTRS 117_mm_abs_pi32(__m64 __a) 118{ 119 return (__m64)__builtin_ia32_pabsd((__v2si)__a); 120} 121 122/// \brief Computes the absolute value of each of the packed 32-bit signed 123/// integers in the source operand and stores the 32-bit unsigned integer 124/// results in the destination. 125/// 126/// \headerfile <x86intrin.h> 127/// 128/// This intrinsic corresponds to the \c VPABSD instruction. 129/// 130/// \param __a 131/// A 128-bit vector of [4 x i32]. 132/// \returns A 128-bit integer vector containing the absolute values of the 133/// elements in the operand. 134static __inline__ __m128i __DEFAULT_FN_ATTRS 135_mm_abs_epi32(__m128i __a) 136{ 137 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); 138} 139 140/// \brief Concatenates the two 128-bit integer vector operands, and 141/// right-shifts the result by the number of bytes specified in the immediate 142/// operand. 143/// 144/// \headerfile <x86intrin.h> 145/// 146/// \code 147/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 148/// \endcode 149/// 150/// This intrinsic corresponds to the \c PALIGNR instruction. 151/// 152/// \param a 153/// A 128-bit vector of [16 x i8] containing one of the source operands. 154/// \param b 155/// A 128-bit vector of [16 x i8] containing one of the source operands. 156/// \param n 157/// An immediate operand specifying how many bytes to right-shift the result. 158/// \returns A 128-bit integer vector containing the concatenated right-shifted 159/// value. 160#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ 161 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 162 (__v16qi)(__m128i)(b), (n)); }) 163 164/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts 165/// the result by the number of bytes specified in the immediate operand. 166/// 167/// \headerfile <x86intrin.h> 168/// 169/// \code 170/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 171/// \endcode 172/// 173/// This intrinsic corresponds to the \c PALIGNR instruction. 174/// 175/// \param a 176/// A 64-bit vector of [8 x i8] containing one of the source operands. 177/// \param b 178/// A 64-bit vector of [8 x i8] containing one of the source operands. 179/// \param n 180/// An immediate operand specifying how many bytes to right-shift the result. 181/// \returns A 64-bit integer vector containing the concatenated right-shifted 182/// value. 183#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ 184 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); }) 185 186/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 187/// 128-bit vectors of [8 x i16]. 188/// 189/// \headerfile <x86intrin.h> 190/// 191/// This intrinsic corresponds to the \c VPHADDW instruction. 192/// 193/// \param __a 194/// A 128-bit vector of [8 x i16] containing one of the source operands. The 195/// horizontal sums of the values are stored in the lower bits of the 196/// destination. 197/// \param __b 198/// A 128-bit vector of [8 x i16] containing one of the source operands. The 199/// horizontal sums of the values are stored in the upper bits of the 200/// destination. 201/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 202/// both operands. 203static __inline__ __m128i __DEFAULT_FN_ATTRS 204_mm_hadd_epi16(__m128i __a, __m128i __b) 205{ 206 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 207} 208 209/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 210/// 128-bit vectors of [4 x i32]. 211/// 212/// \headerfile <x86intrin.h> 213/// 214/// This intrinsic corresponds to the \c VPHADDD instruction. 215/// 216/// \param __a 217/// A 128-bit vector of [4 x i32] containing one of the source operands. The 218/// horizontal sums of the values are stored in the lower bits of the 219/// destination. 220/// \param __b 221/// A 128-bit vector of [4 x i32] containing one of the source operands. The 222/// horizontal sums of the values are stored in the upper bits of the 223/// destination. 224/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 225/// both operands. 226static __inline__ __m128i __DEFAULT_FN_ATTRS 227_mm_hadd_epi32(__m128i __a, __m128i __b) 228{ 229 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 230} 231 232/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 233/// 64-bit vectors of [4 x i16]. 234/// 235/// \headerfile <x86intrin.h> 236/// 237/// This intrinsic corresponds to the \c PHADDW instruction. 238/// 239/// \param __a 240/// A 64-bit vector of [4 x i16] containing one of the source operands. The 241/// horizontal sums of the values are stored in the lower bits of the 242/// destination. 243/// \param __b 244/// A 64-bit vector of [4 x i16] containing one of the source operands. The 245/// horizontal sums of the values are stored in the upper bits of the 246/// destination. 247/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 248/// operands. 249static __inline__ __m64 __DEFAULT_FN_ATTRS 250_mm_hadd_pi16(__m64 __a, __m64 __b) 251{ 252 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 253} 254 255/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 256/// 64-bit vectors of [2 x i32]. 257/// 258/// \headerfile <x86intrin.h> 259/// 260/// This intrinsic corresponds to the \c PHADDD instruction. 261/// 262/// \param __a 263/// A 64-bit vector of [2 x i32] containing one of the source operands. The 264/// horizontal sums of the values are stored in the lower bits of the 265/// destination. 266/// \param __b 267/// A 64-bit vector of [2 x i32] containing one of the source operands. The 268/// horizontal sums of the values are stored in the upper bits of the 269/// destination. 270/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 271/// operands. 272static __inline__ __m64 __DEFAULT_FN_ATTRS 273_mm_hadd_pi32(__m64 __a, __m64 __b) 274{ 275 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 276} 277 278/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 279/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are 280/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. 281/// 282/// \headerfile <x86intrin.h> 283/// 284/// This intrinsic corresponds to the \c VPHADDSW instruction. 285/// 286/// \param __a 287/// A 128-bit vector of [8 x i16] containing one of the source operands. The 288/// horizontal sums of the values are stored in the lower bits of the 289/// destination. 290/// \param __b 291/// A 128-bit vector of [8 x i16] containing one of the source operands. The 292/// horizontal sums of the values are stored in the upper bits of the 293/// destination. 294/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 295/// sums of both operands. 296static __inline__ __m128i __DEFAULT_FN_ATTRS 297_mm_hadds_epi16(__m128i __a, __m128i __b) 298{ 299 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 300} 301 302/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 303/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are 304/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. 305/// 306/// \headerfile <x86intrin.h> 307/// 308/// This intrinsic corresponds to the \c PHADDSW instruction. 309/// 310/// \param __a 311/// A 64-bit vector of [4 x i16] containing one of the source operands. The 312/// horizontal sums of the values are stored in the lower bits of the 313/// destination. 314/// \param __b 315/// A 64-bit vector of [4 x i16] containing one of the source operands. The 316/// horizontal sums of the values are stored in the upper bits of the 317/// destination. 318/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 319/// sums of both operands. 320static __inline__ __m64 __DEFAULT_FN_ATTRS 321_mm_hadds_pi16(__m64 __a, __m64 __b) 322{ 323 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 324} 325 326/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 327/// packed 128-bit vectors of [8 x i16]. 328/// 329/// \headerfile <x86intrin.h> 330/// 331/// This intrinsic corresponds to the \c VPHSUBW instruction. 332/// 333/// \param __a 334/// A 128-bit vector of [8 x i16] containing one of the source operands. The 335/// horizontal differences between the values are stored in the lower bits of 336/// the destination. 337/// \param __b 338/// A 128-bit vector of [8 x i16] containing one of the source operands. The 339/// horizontal differences between the values are stored in the upper bits of 340/// the destination. 341/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 342/// of both operands. 343static __inline__ __m128i __DEFAULT_FN_ATTRS 344_mm_hsub_epi16(__m128i __a, __m128i __b) 345{ 346 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 347} 348 349/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 350/// packed 128-bit vectors of [4 x i32]. 351/// 352/// \headerfile <x86intrin.h> 353/// 354/// This intrinsic corresponds to the \c VPHSUBD instruction. 355/// 356/// \param __a 357/// A 128-bit vector of [4 x i32] containing one of the source operands. The 358/// horizontal differences between the values are stored in the lower bits of 359/// the destination. 360/// \param __b 361/// A 128-bit vector of [4 x i32] containing one of the source operands. The 362/// horizontal differences between the values are stored in the upper bits of 363/// the destination. 364/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 365/// of both operands. 366static __inline__ __m128i __DEFAULT_FN_ATTRS 367_mm_hsub_epi32(__m128i __a, __m128i __b) 368{ 369 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 370} 371 372/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 373/// packed 64-bit vectors of [4 x i16]. 374/// 375/// \headerfile <x86intrin.h> 376/// 377/// This intrinsic corresponds to the \c PHSUBW instruction. 378/// 379/// \param __a 380/// A 64-bit vector of [4 x i16] containing one of the source operands. The 381/// horizontal differences between the values are stored in the lower bits of 382/// the destination. 383/// \param __b 384/// A 64-bit vector of [4 x i16] containing one of the source operands. The 385/// horizontal differences between the values are stored in the upper bits of 386/// the destination. 387/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 388/// of both operands. 389static __inline__ __m64 __DEFAULT_FN_ATTRS 390_mm_hsub_pi16(__m64 __a, __m64 __b) 391{ 392 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 393} 394 395/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 396/// packed 64-bit vectors of [2 x i32]. 397/// 398/// \headerfile <x86intrin.h> 399/// 400/// This intrinsic corresponds to the \c PHSUBD instruction. 401/// 402/// \param __a 403/// A 64-bit vector of [2 x i32] containing one of the source operands. The 404/// horizontal differences between the values are stored in the lower bits of 405/// the destination. 406/// \param __b 407/// A 64-bit vector of [2 x i32] containing one of the source operands. The 408/// horizontal differences between the values are stored in the upper bits of 409/// the destination. 410/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 411/// of both operands. 412static __inline__ __m64 __DEFAULT_FN_ATTRS 413_mm_hsub_pi32(__m64 __a, __m64 __b) 414{ 415 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 416} 417 418/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 419/// packed 128-bit vectors of [8 x i16]. Positive differences greater than 420/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are 421/// saturated to 8000h. 422/// 423/// \headerfile <x86intrin.h> 424/// 425/// This intrinsic corresponds to the \c VPHSUBSW instruction. 426/// 427/// \param __a 428/// A 128-bit vector of [8 x i16] containing one of the source operands. The 429/// horizontal differences between the values are stored in the lower bits of 430/// the destination. 431/// \param __b 432/// A 128-bit vector of [8 x i16] containing one of the source operands. The 433/// horizontal differences between the values are stored in the upper bits of 434/// the destination. 435/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 436/// differences of both operands. 437static __inline__ __m128i __DEFAULT_FN_ATTRS 438_mm_hsubs_epi16(__m128i __a, __m128i __b) 439{ 440 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 441} 442 443/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 444/// packed 64-bit vectors of [4 x i16]. Positive differences greater than 445/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are 446/// saturated to 8000h. 447/// 448/// \headerfile <x86intrin.h> 449/// 450/// This intrinsic corresponds to the \c PHSUBSW instruction. 451/// 452/// \param __a 453/// A 64-bit vector of [4 x i16] containing one of the source operands. The 454/// horizontal differences between the values are stored in the lower bits of 455/// the destination. 456/// \param __b 457/// A 64-bit vector of [4 x i16] containing one of the source operands. The 458/// horizontal differences between the values are stored in the upper bits of 459/// the destination. 460/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 461/// differences of both operands. 462static __inline__ __m64 __DEFAULT_FN_ATTRS 463_mm_hsubs_pi16(__m64 __a, __m64 __b) 464{ 465 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 466} 467 468/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer 469/// values contained in the first source operand and packed 8-bit signed 470/// integer values contained in the second source operand, adds pairs of 471/// contiguous products with signed saturation, and writes the 16-bit sums to 472/// the corresponding bits in the destination. For example, bits [7:0] of 473/// both operands are multiplied, bits [15:8] of both operands are 474/// multiplied, and the sum of both results is written to bits [15:0] of the 475/// destination. 476/// 477/// \headerfile <x86intrin.h> 478/// 479/// This intrinsic corresponds to the \c VPMADDUBSW instruction. 480/// 481/// \param __a 482/// A 128-bit integer vector containing the first source operand. 483/// \param __b 484/// A 128-bit integer vector containing the second source operand. 485/// \returns A 128-bit integer vector containing the sums of products of both 486/// operands: 487/// R0 := (__a0 * __b0) + (__a1 * __b1) 488/// R1 := (__a2 * __b2) + (__a3 * __b3) 489/// R2 := (__a4 * __b4) + (__a5 * __b5) 490/// R3 := (__a6 * __b6) + (__a7 * __b7) 491/// R4 := (__a8 * __b8) + (__a9 * __b9) 492/// R5 := (__a10 * __b10) + (__a11 * __b11) 493/// R6 := (__a12 * __b12) + (__a13 * __b13) 494/// R7 := (__a14 * __b14) + (__a15 * __b15) 495static __inline__ __m128i __DEFAULT_FN_ATTRS 496_mm_maddubs_epi16(__m128i __a, __m128i __b) 497{ 498 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 499} 500 501/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer 502/// values contained in the first source operand and packed 8-bit signed 503/// integer values contained in the second source operand, adds pairs of 504/// contiguous products with signed saturation, and writes the 16-bit sums to 505/// the corresponding bits in the destination. For example, bits [7:0] of 506/// both operands are multiplied, bits [15:8] of both operands are 507/// multiplied, and the sum of both results is written to bits [15:0] of the 508/// destination. 509/// 510/// \headerfile <x86intrin.h> 511/// 512/// This intrinsic corresponds to the \c PMADDUBSW instruction. 513/// 514/// \param __a 515/// A 64-bit integer vector containing the first source operand. 516/// \param __b 517/// A 64-bit integer vector containing the second source operand. 518/// \returns A 64-bit integer vector containing the sums of products of both 519/// operands: 520/// R0 := (__a0 * __b0) + (__a1 * __b1) 521/// R1 := (__a2 * __b2) + (__a3 * __b3) 522/// R2 := (__a4 * __b4) + (__a5 * __b5) 523/// R3 := (__a6 * __b6) + (__a7 * __b7) 524static __inline__ __m64 __DEFAULT_FN_ATTRS 525_mm_maddubs_pi16(__m64 __a, __m64 __b) 526{ 527 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 528} 529 530/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit 531/// products to the 18 most significant bits by right-shifting, rounds the 532/// truncated value by adding 1, and writes bits [16:1] to the destination. 533/// 534/// \headerfile <x86intrin.h> 535/// 536/// This intrinsic corresponds to the \c VPMULHRSW instruction. 537/// 538/// \param __a 539/// A 128-bit vector of [8 x i16] containing one of the source operands. 540/// \param __b 541/// A 128-bit vector of [8 x i16] containing one of the source operands. 542/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 543/// products of both operands. 544static __inline__ __m128i __DEFAULT_FN_ATTRS 545_mm_mulhrs_epi16(__m128i __a, __m128i __b) 546{ 547 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 548} 549 550/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit 551/// products to the 18 most significant bits by right-shifting, rounds the 552/// truncated value by adding 1, and writes bits [16:1] to the destination. 553/// 554/// \headerfile <x86intrin.h> 555/// 556/// This intrinsic corresponds to the \c PMULHRSW instruction. 557/// 558/// \param __a 559/// A 64-bit vector of [4 x i16] containing one of the source operands. 560/// \param __b 561/// A 64-bit vector of [4 x i16] containing one of the source operands. 562/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 563/// products of both operands. 564static __inline__ __m64 __DEFAULT_FN_ATTRS 565_mm_mulhrs_pi16(__m64 __a, __m64 __b) 566{ 567 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 568} 569 570/// \brief Copies the 8-bit integers from a 128-bit integer vector to the 571/// destination or clears 8-bit values in the destination, as specified by 572/// the second source operand. 573/// 574/// \headerfile <x86intrin.h> 575/// 576/// This intrinsic corresponds to the \c VPSHUFB instruction. 577/// 578/// \param __a 579/// A 128-bit integer vector containing the values to be copied. 580/// \param __b 581/// A 128-bit integer vector containing control bytes corresponding to 582/// positions in the destination: 583/// Bit 7: 584/// 1: Clear the corresponding byte in the destination. 585/// 0: Copy the selected source byte to the corresponding byte in the 586/// destination. 587/// Bits [6:4] Reserved. 588/// Bits [3:0] select the source byte to be copied. 589/// \returns A 128-bit integer vector containing the copied or cleared values. 590static __inline__ __m128i __DEFAULT_FN_ATTRS 591_mm_shuffle_epi8(__m128i __a, __m128i __b) 592{ 593 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 594} 595 596/// \brief Copies the 8-bit integers from a 64-bit integer vector to the 597/// destination or clears 8-bit values in the destination, as specified by 598/// the second source operand. 599/// 600/// \headerfile <x86intrin.h> 601/// 602/// This intrinsic corresponds to the \c PSHUFB instruction. 603/// 604/// \param __a 605/// A 64-bit integer vector containing the values to be copied. 606/// \param __b 607/// A 64-bit integer vector containing control bytes corresponding to 608/// positions in the destination: 609/// Bit 7: 610/// 1: Clear the corresponding byte in the destination. 611/// 0: Copy the selected source byte to the corresponding byte in the 612/// destination. 613/// Bits [3:0] select the source byte to be copied. 614/// \returns A 64-bit integer vector containing the copied or cleared values. 615static __inline__ __m64 __DEFAULT_FN_ATTRS 616_mm_shuffle_pi8(__m64 __a, __m64 __b) 617{ 618 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 619} 620 621/// \brief For each 8-bit integer in the first source operand, perform one of 622/// the following actions as specified by the second source operand: If the 623/// byte in the second source is negative, calculate the two's complement of 624/// the corresponding byte in the first source, and write that value to the 625/// destination. If the byte in the second source is positive, copy the 626/// corresponding byte from the first source to the destination. If the byte 627/// in the second source is zero, clear the corresponding byte in the 628/// destination. 629/// 630/// \headerfile <x86intrin.h> 631/// 632/// This intrinsic corresponds to the \c VPSIGNB instruction. 633/// 634/// \param __a 635/// A 128-bit integer vector containing the values to be copied. 636/// \param __b 637/// A 128-bit integer vector containing control bytes corresponding to 638/// positions in the destination. 639/// \returns A 128-bit integer vector containing the resultant values. 640static __inline__ __m128i __DEFAULT_FN_ATTRS 641_mm_sign_epi8(__m128i __a, __m128i __b) 642{ 643 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 644} 645 646/// \brief For each 16-bit integer in the first source operand, perform one of 647/// the following actions as specified by the second source operand: If the 648/// word in the second source is negative, calculate the two's complement of 649/// the corresponding word in the first source, and write that value to the 650/// destination. If the word in the second source is positive, copy the 651/// corresponding word from the first source to the destination. If the word 652/// in the second source is zero, clear the corresponding word in the 653/// destination. 654/// 655/// \headerfile <x86intrin.h> 656/// 657/// This intrinsic corresponds to the \c VPSIGNW instruction. 658/// 659/// \param __a 660/// A 128-bit integer vector containing the values to be copied. 661/// \param __b 662/// A 128-bit integer vector containing control words corresponding to 663/// positions in the destination. 664/// \returns A 128-bit integer vector containing the resultant values. 665static __inline__ __m128i __DEFAULT_FN_ATTRS 666_mm_sign_epi16(__m128i __a, __m128i __b) 667{ 668 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 669} 670 671/// \brief For each 32-bit integer in the first source operand, perform one of 672/// the following actions as specified by the second source operand: If the 673/// doubleword in the second source is negative, calculate the two's 674/// complement of the corresponding word in the first source, and write that 675/// value to the destination. If the doubleword in the second source is 676/// positive, copy the corresponding word from the first source to the 677/// destination. If the doubleword in the second source is zero, clear the 678/// corresponding word in the destination. 679/// 680/// \headerfile <x86intrin.h> 681/// 682/// This intrinsic corresponds to the \c VPSIGND instruction. 683/// 684/// \param __a 685/// A 128-bit integer vector containing the values to be copied. 686/// \param __b 687/// A 128-bit integer vector containing control doublewords corresponding to 688/// positions in the destination. 689/// \returns A 128-bit integer vector containing the resultant values. 690static __inline__ __m128i __DEFAULT_FN_ATTRS 691_mm_sign_epi32(__m128i __a, __m128i __b) 692{ 693 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 694} 695 696/// \brief For each 8-bit integer in the first source operand, perform one of 697/// the following actions as specified by the second source operand: If the 698/// byte in the second source is negative, calculate the two's complement of 699/// the corresponding byte in the first source, and write that value to the 700/// destination. If the byte in the second source is positive, copy the 701/// corresponding byte from the first source to the destination. If the byte 702/// in the second source is zero, clear the corresponding byte in the 703/// destination. 704/// 705/// \headerfile <x86intrin.h> 706/// 707/// This intrinsic corresponds to the \c PSIGNB instruction. 708/// 709/// \param __a 710/// A 64-bit integer vector containing the values to be copied. 711/// \param __b 712/// A 64-bit integer vector containing control bytes corresponding to 713/// positions in the destination. 714/// \returns A 64-bit integer vector containing the resultant values. 715static __inline__ __m64 __DEFAULT_FN_ATTRS 716_mm_sign_pi8(__m64 __a, __m64 __b) 717{ 718 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 719} 720 721/// \brief For each 16-bit integer in the first source operand, perform one of 722/// the following actions as specified by the second source operand: If the 723/// word in the second source is negative, calculate the two's complement of 724/// the corresponding word in the first source, and write that value to the 725/// destination. If the word in the second source is positive, copy the 726/// corresponding word from the first source to the destination. If the word 727/// in the second source is zero, clear the corresponding word in the 728/// destination. 729/// 730/// \headerfile <x86intrin.h> 731/// 732/// This intrinsic corresponds to the \c PSIGNW instruction. 733/// 734/// \param __a 735/// A 64-bit integer vector containing the values to be copied. 736/// \param __b 737/// A 64-bit integer vector containing control words corresponding to 738/// positions in the destination. 739/// \returns A 64-bit integer vector containing the resultant values. 740static __inline__ __m64 __DEFAULT_FN_ATTRS 741_mm_sign_pi16(__m64 __a, __m64 __b) 742{ 743 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 744} 745 746/// \brief For each 32-bit integer in the first source operand, perform one of 747/// the following actions as specified by the second source operand: If the 748/// doubleword in the second source is negative, calculate the two's 749/// complement of the corresponding doubleword in the first source, and 750/// write that value to the destination. If the doubleword in the second 751/// source is positive, copy the corresponding doubleword from the first 752/// source to the destination. If the doubleword in the second source is 753/// zero, clear the corresponding doubleword in the destination. 754/// 755/// \headerfile <x86intrin.h> 756/// 757/// This intrinsic corresponds to the \c PSIGND instruction. 758/// 759/// \param __a 760/// A 64-bit integer vector containing the values to be copied. 761/// \param __b 762/// A 64-bit integer vector containing two control doublewords corresponding 763/// to positions in the destination. 764/// \returns A 64-bit integer vector containing the resultant values. 765static __inline__ __m64 __DEFAULT_FN_ATTRS 766_mm_sign_pi32(__m64 __a, __m64 __b) 767{ 768 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 769} 770 771#undef __DEFAULT_FN_ATTRS 772 773#endif /* __TMMINTRIN_H */ 774