1/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __MMINTRIN_H 25#define __MMINTRIN_H 26 27typedef long long __m64 __attribute__((__vector_size__(8))); 28 29typedef long long __v1di __attribute__((__vector_size__(8))); 30typedef int __v2si __attribute__((__vector_size__(8))); 31typedef short __v4hi __attribute__((__vector_size__(8))); 32typedef char __v8qi __attribute__((__vector_size__(8))); 33 34/* Define the default attributes for the functions in this file. */ 35#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 36 37/// \brief Clears the MMX state by setting the state of the x87 stack registers 38/// to empty. 39/// 40/// \headerfile <x86intrin.h> 41/// 42/// This intrinsic corresponds to the \c EMMS instruction. 43/// 44static __inline__ void __DEFAULT_FN_ATTRS 45_mm_empty(void) 46{ 47 __builtin_ia32_emms(); 48} 49 50/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the 51/// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 52/// 53/// \headerfile <x86intrin.h> 54/// 55/// This intrinsic corresponds to the \c VMOVD / MOVD instruction. 56/// 57/// \param __i 58/// A 32-bit integer value. 59/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 60/// parameter. The upper 32 bits are set to 0. 61static __inline__ __m64 __DEFAULT_FN_ATTRS 62_mm_cvtsi32_si64(int __i) 63{ 64 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 65} 66 67/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 68/// signed integer. 69/// 70/// \headerfile <x86intrin.h> 71/// 72/// This intrinsic corresponds to the \c VMOVD / MOVD instruction. 73/// 74/// \param __m 75/// A 64-bit integer vector. 76/// \returns A 32-bit signed integer value containing the lower 32 bits of the 77/// parameter. 78static __inline__ int __DEFAULT_FN_ATTRS 79_mm_cvtsi64_si32(__m64 __m) 80{ 81 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 82} 83 84/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector. 85/// 86/// \headerfile <x86intrin.h> 87/// 88/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction. 89/// 90/// \param __i 91/// A 64-bit signed integer. 92/// \returns A 64-bit integer vector containing the same bitwise pattern as the 93/// parameter. 94static __inline__ __m64 __DEFAULT_FN_ATTRS 95_mm_cvtsi64_m64(long long __i) 96{ 97 return (__m64)__i; 98} 99 100/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value. 101/// 102/// \headerfile <x86intrin.h> 103/// 104/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction. 105/// 106/// \param __m 107/// A 64-bit integer vector. 108/// \returns A 64-bit signed integer containing the same bitwise pattern as the 109/// parameter. 110static __inline__ long long __DEFAULT_FN_ATTRS 111_mm_cvtm64_si64(__m64 __m) 112{ 113 return (long long)__m; 114} 115 116/// \brief Converts 16-bit signed integers from both 64-bit integer vector 117/// parameters of [4 x i16] into 8-bit signed integer values, and constructs 118/// a 64-bit integer vector of [8 x i8] as the result. Positive values 119/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 120/// are saturated to 0x80. 121/// 122/// \headerfile <x86intrin.h> 123/// 124/// This intrinsic corresponds to the \c PACKSSWB instruction. 125/// 126/// \param __m1 127/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 128/// 16-bit signed integer and is converted to an 8-bit signed integer with 129/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 130/// Negative values less than 0x80 are saturated to 0x80. The converted 131/// [4 x i8] values are written to the lower 32 bits of the result. 132/// \param __m2 133/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 134/// 16-bit signed integer and is converted to an 8-bit signed integer with 135/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 136/// Negative values less than 0x80 are saturated to 0x80. The converted 137/// [4 x i8] values are written to the upper 32 bits of the result. 138/// \returns A 64-bit integer vector of [8 x i8] containing the converted 139/// values. 140static __inline__ __m64 __DEFAULT_FN_ATTRS 141_mm_packs_pi16(__m64 __m1, __m64 __m2) 142{ 143 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 144} 145 146/// \brief Converts 32-bit signed integers from both 64-bit integer vector 147/// parameters of [2 x i32] into 16-bit signed integer values, and constructs 148/// a 64-bit integer vector of [4 x i16] as the result. Positive values 149/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 150/// 0x8000 are saturated to 0x8000. 151/// 152/// \headerfile <x86intrin.h> 153/// 154/// This intrinsic corresponds to the \c PACKSSDW instruction. 155/// 156/// \param __m1 157/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 158/// 32-bit signed integer and is converted to a 16-bit signed integer with 159/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 160/// Negative values less than 0x8000 are saturated to 0x8000. The converted 161/// [2 x i16] values are written to the lower 32 bits of the result. 162/// \param __m2 163/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 164/// 32-bit signed integer and is converted to a 16-bit signed integer with 165/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 166/// Negative values less than 0x8000 are saturated to 0x8000. The converted 167/// [2 x i16] values are written to the upper 32 bits of the result. 168/// \returns A 64-bit integer vector of [4 x i16] containing the converted 169/// values. 170static __inline__ __m64 __DEFAULT_FN_ATTRS 171_mm_packs_pi32(__m64 __m1, __m64 __m2) 172{ 173 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 174} 175 176/// \brief Converts 16-bit signed integers from both 64-bit integer vector 177/// parameters of [4 x i16] into 8-bit unsigned integer values, and 178/// constructs a 64-bit integer vector of [8 x i8] as the result. Values 179/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 180/// to 0. 181/// 182/// \headerfile <x86intrin.h> 183/// 184/// This intrinsic corresponds to the \c PACKUSWB instruction. 185/// 186/// \param __m1 187/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 188/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 189/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 190/// than 0 are saturated to 0. The converted [4 x i8] values are written to 191/// the lower 32 bits of the result. 192/// \param __m2 193/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 194/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 195/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 196/// than 0 are saturated to 0. The converted [4 x i8] values are written to 197/// the upper 32 bits of the result. 198/// \returns A 64-bit integer vector of [8 x i8] containing the converted 199/// values. 200static __inline__ __m64 __DEFAULT_FN_ATTRS 201_mm_packs_pu16(__m64 __m1, __m64 __m2) 202{ 203 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 204} 205 206/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 207/// and interleaves them into a 64-bit integer vector of [8 x i8]. 208/// 209/// \headerfile <x86intrin.h> 210/// 211/// This intrinsic corresponds to the \c PUNPCKHBW instruction. 212/// 213/// \param __m1 214/// A 64-bit integer vector of [8 x i8]. 215/// Bits [39:32] are written to bits [7:0] of the result. 216/// Bits [47:40] are written to bits [23:16] of the result. 217/// Bits [55:48] are written to bits [39:32] of the result. 218/// Bits [63:56] are written to bits [55:48] of the result. 219/// \param __m2 220/// A 64-bit integer vector of [8 x i8]. 221/// Bits [39:32] are written to bits [15:8] of the result. 222/// Bits [47:40] are written to bits [31:24] of the result. 223/// Bits [55:48] are written to bits [47:40] of the result. 224/// Bits [63:56] are written to bits [63:56] of the result. 225/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 226/// values. 227static __inline__ __m64 __DEFAULT_FN_ATTRS 228_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 229{ 230 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 231} 232 233/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of 234/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 235/// 236/// \headerfile <x86intrin.h> 237/// 238/// This intrinsic corresponds to the \c PUNPCKHWD instruction. 239/// 240/// \param __m1 241/// A 64-bit integer vector of [4 x i16]. 242/// Bits [47:32] are written to bits [15:0] of the result. 243/// Bits [63:48] are written to bits [47:32] of the result. 244/// \param __m2 245/// A 64-bit integer vector of [4 x i16]. 246/// Bits [47:32] are written to bits [31:16] of the result. 247/// Bits [63:48] are written to bits [63:48] of the result. 248/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 249/// values. 250static __inline__ __m64 __DEFAULT_FN_ATTRS 251_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 252{ 253 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 254} 255 256/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of 257/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 258/// 259/// \headerfile <x86intrin.h> 260/// 261/// This intrinsic corresponds to the \c PUNPCKHDQ instruction. 262/// 263/// \param __m1 264/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 265/// the lower 32 bits of the result. 266/// \param __m2 267/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 268/// the upper 32 bits of the result. 269/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 270/// values. 271static __inline__ __m64 __DEFAULT_FN_ATTRS 272_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 273{ 274 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 275} 276 277/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 278/// and interleaves them into a 64-bit integer vector of [8 x i8]. 279/// 280/// \headerfile <x86intrin.h> 281/// 282/// This intrinsic corresponds to the \c PUNPCKLBW instruction. 283/// 284/// \param __m1 285/// A 64-bit integer vector of [8 x i8]. 286/// Bits [7:0] are written to bits [7:0] of the result. 287/// Bits [15:8] are written to bits [23:16] of the result. 288/// Bits [23:16] are written to bits [39:32] of the result. 289/// Bits [31:24] are written to bits [55:48] of the result. 290/// \param __m2 291/// A 64-bit integer vector of [8 x i8]. 292/// Bits [7:0] are written to bits [15:8] of the result. 293/// Bits [15:8] are written to bits [31:24] of the result. 294/// Bits [23:16] are written to bits [47:40] of the result. 295/// Bits [31:24] are written to bits [63:56] of the result. 296/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 297/// values. 298static __inline__ __m64 __DEFAULT_FN_ATTRS 299_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 300{ 301 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 302} 303 304/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of 305/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 306/// 307/// \headerfile <x86intrin.h> 308/// 309/// This intrinsic corresponds to the \c PUNPCKLWD instruction. 310/// 311/// \param __m1 312/// A 64-bit integer vector of [4 x i16]. 313/// Bits [15:0] are written to bits [15:0] of the result. 314/// Bits [31:16] are written to bits [47:32] of the result. 315/// \param __m2 316/// A 64-bit integer vector of [4 x i16]. 317/// Bits [15:0] are written to bits [31:16] of the result. 318/// Bits [31:16] are written to bits [63:48] of the result. 319/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 320/// values. 321static __inline__ __m64 __DEFAULT_FN_ATTRS 322_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 323{ 324 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 325} 326 327/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of 328/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 329/// 330/// \headerfile <x86intrin.h> 331/// 332/// This intrinsic corresponds to the \c PUNPCKLDQ instruction. 333/// 334/// \param __m1 335/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 336/// the lower 32 bits of the result. 337/// \param __m2 338/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 339/// the upper 32 bits of the result. 340/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 341/// values. 342static __inline__ __m64 __DEFAULT_FN_ATTRS 343_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 344{ 345 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 346} 347 348/// \brief Adds each 8-bit integer element of the first 64-bit integer vector 349/// of [8 x i8] to the corresponding 8-bit integer element of the second 350/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 351/// packed into a 64-bit integer vector of [8 x i8]. 352/// 353/// \headerfile <x86intrin.h> 354/// 355/// This intrinsic corresponds to the \c PADDB instruction. 356/// 357/// \param __m1 358/// A 64-bit integer vector of [8 x i8]. 359/// \param __m2 360/// A 64-bit integer vector of [8 x i8]. 361/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 362/// parameters. 363static __inline__ __m64 __DEFAULT_FN_ATTRS 364_mm_add_pi8(__m64 __m1, __m64 __m2) 365{ 366 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 367} 368 369/// \brief Adds each 16-bit integer element of the first 64-bit integer vector 370/// of [4 x i16] to the corresponding 16-bit integer element of the second 371/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 372/// packed into a 64-bit integer vector of [4 x i16]. 373/// 374/// \headerfile <x86intrin.h> 375/// 376/// This intrinsic corresponds to the \c PADDW instruction. 377/// 378/// \param __m1 379/// A 64-bit integer vector of [4 x i16]. 380/// \param __m2 381/// A 64-bit integer vector of [4 x i16]. 382/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 383/// parameters. 384static __inline__ __m64 __DEFAULT_FN_ATTRS 385_mm_add_pi16(__m64 __m1, __m64 __m2) 386{ 387 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 388} 389 390/// \brief Adds each 32-bit integer element of the first 64-bit integer vector 391/// of [2 x i32] to the corresponding 32-bit integer element of the second 392/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 393/// packed into a 64-bit integer vector of [2 x i32]. 394/// 395/// \headerfile <x86intrin.h> 396/// 397/// This intrinsic corresponds to the \c PADDD instruction. 398/// 399/// \param __m1 400/// A 64-bit integer vector of [2 x i32]. 401/// \param __m2 402/// A 64-bit integer vector of [2 x i32]. 403/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 404/// parameters. 405static __inline__ __m64 __DEFAULT_FN_ATTRS 406_mm_add_pi32(__m64 __m1, __m64 __m2) 407{ 408 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 409} 410 411/// \brief Adds each 8-bit signed integer element of the first 64-bit integer 412/// vector of [8 x i8] to the corresponding 8-bit signed integer element of 413/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 414/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 415/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 416/// 417/// \headerfile <x86intrin.h> 418/// 419/// This intrinsic corresponds to the \c PADDSB instruction. 420/// 421/// \param __m1 422/// A 64-bit integer vector of [8 x i8]. 423/// \param __m2 424/// A 64-bit integer vector of [8 x i8]. 425/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 426/// of both parameters. 427static __inline__ __m64 __DEFAULT_FN_ATTRS 428_mm_adds_pi8(__m64 __m1, __m64 __m2) 429{ 430 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 431} 432 433/// \brief Adds each 16-bit signed integer element of the first 64-bit integer 434/// vector of [4 x i16] to the corresponding 16-bit signed integer element of 435/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 436/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 437/// saturated to 0x8000. The results are packed into a 64-bit integer vector 438/// of [4 x i16]. 439/// 440/// \headerfile <x86intrin.h> 441/// 442/// This intrinsic corresponds to the \c PADDSW instruction. 443/// 444/// \param __m1 445/// A 64-bit integer vector of [4 x i16]. 446/// \param __m2 447/// A 64-bit integer vector of [4 x i16]. 448/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 449/// of both parameters. 450static __inline__ __m64 __DEFAULT_FN_ATTRS 451_mm_adds_pi16(__m64 __m1, __m64 __m2) 452{ 453 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 454} 455 456/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer 457/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 458/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 459/// saturated to 0xFF. The results are packed into a 64-bit integer vector of 460/// [8 x i8]. 461/// 462/// \headerfile <x86intrin.h> 463/// 464/// This intrinsic corresponds to the \c PADDUSB instruction. 465/// 466/// \param __m1 467/// A 64-bit integer vector of [8 x i8]. 468/// \param __m2 469/// A 64-bit integer vector of [8 x i8]. 470/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 471/// unsigned sums of both parameters. 472static __inline__ __m64 __DEFAULT_FN_ATTRS 473_mm_adds_pu8(__m64 __m1, __m64 __m2) 474{ 475 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 476} 477 478/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer 479/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 480/// of the second 64-bit integer vector of [4 x i16]. Sums greater than 481/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 482/// integer vector of [4 x i16]. 483/// 484/// \headerfile <x86intrin.h> 485/// 486/// This intrinsic corresponds to the \c PADDUSW instruction. 487/// 488/// \param __m1 489/// A 64-bit integer vector of [4 x i16]. 490/// \param __m2 491/// A 64-bit integer vector of [4 x i16]. 492/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 493/// unsigned sums of both parameters. 494static __inline__ __m64 __DEFAULT_FN_ATTRS 495_mm_adds_pu16(__m64 __m1, __m64 __m2) 496{ 497 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 498} 499 500/// \brief Subtracts each 8-bit integer element of the second 64-bit integer 501/// vector of [8 x i8] from the corresponding 8-bit integer element of the 502/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 503/// are packed into a 64-bit integer vector of [8 x i8]. 504/// 505/// \headerfile <x86intrin.h> 506/// 507/// This intrinsic corresponds to the \c PSUBB instruction. 508/// 509/// \param __m1 510/// A 64-bit integer vector of [8 x i8] containing the minuends. 511/// \param __m2 512/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 513/// \returns A 64-bit integer vector of [8 x i8] containing the differences of 514/// both parameters. 515static __inline__ __m64 __DEFAULT_FN_ATTRS 516_mm_sub_pi8(__m64 __m1, __m64 __m2) 517{ 518 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 519} 520 521/// \brief Subtracts each 16-bit integer element of the second 64-bit integer 522/// vector of [4 x i16] from the corresponding 16-bit integer element of the 523/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 524/// results are packed into a 64-bit integer vector of [4 x i16]. 525/// 526/// \headerfile <x86intrin.h> 527/// 528/// This intrinsic corresponds to the \c PSUBW instruction. 529/// 530/// \param __m1 531/// A 64-bit integer vector of [4 x i16] containing the minuends. 532/// \param __m2 533/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 534/// \returns A 64-bit integer vector of [4 x i16] containing the differences of 535/// both parameters. 536static __inline__ __m64 __DEFAULT_FN_ATTRS 537_mm_sub_pi16(__m64 __m1, __m64 __m2) 538{ 539 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 540} 541 542/// \brief Subtracts each 32-bit integer element of the second 64-bit integer 543/// vector of [2 x i32] from the corresponding 32-bit integer element of the 544/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 545/// results are packed into a 64-bit integer vector of [2 x i32]. 546/// 547/// \headerfile <x86intrin.h> 548/// 549/// This intrinsic corresponds to the \c PSUBD instruction. 550/// 551/// \param __m1 552/// A 64-bit integer vector of [2 x i32] containing the minuends. 553/// \param __m2 554/// A 64-bit integer vector of [2 x i32] containing the subtrahends. 555/// \returns A 64-bit integer vector of [2 x i32] containing the differences of 556/// both parameters. 557static __inline__ __m64 __DEFAULT_FN_ATTRS 558_mm_sub_pi32(__m64 __m1, __m64 __m2) 559{ 560 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 561} 562 563/// \brief Subtracts each 8-bit signed integer element of the second 64-bit 564/// integer vector of [8 x i8] from the corresponding 8-bit signed integer 565/// element of the first 64-bit integer vector of [8 x i8]. Positive results 566/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 567/// are saturated to 0x80. The results are packed into a 64-bit integer 568/// vector of [8 x i8]. 569/// 570/// \headerfile <x86intrin.h> 571/// 572/// This intrinsic corresponds to the \c PSUBSB instruction. 573/// 574/// \param __m1 575/// A 64-bit integer vector of [8 x i8] containing the minuends. 576/// \param __m2 577/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 578/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 579/// differences of both parameters. 580static __inline__ __m64 __DEFAULT_FN_ATTRS 581_mm_subs_pi8(__m64 __m1, __m64 __m2) 582{ 583 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 584} 585 586/// \brief Subtracts each 16-bit signed integer element of the second 64-bit 587/// integer vector of [4 x i16] from the corresponding 16-bit signed integer 588/// element of the first 64-bit integer vector of [4 x i16]. Positive results 589/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 590/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 591/// integer vector of [4 x i16]. 592/// 593/// \headerfile <x86intrin.h> 594/// 595/// This intrinsic corresponds to the \c PSUBSW instruction. 596/// 597/// \param __m1 598/// A 64-bit integer vector of [4 x i16] containing the minuends. 599/// \param __m2 600/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 601/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 602/// differences of both parameters. 603static __inline__ __m64 __DEFAULT_FN_ATTRS 604_mm_subs_pi16(__m64 __m1, __m64 __m2) 605{ 606 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 607} 608 609/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit 610/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 611/// element of the first 64-bit integer vector of [8 x i8]. If an element of 612/// the first vector is less than the corresponding element of the second 613/// vector, the result is saturated to 0. The results are packed into a 614/// 64-bit integer vector of [8 x i8]. 615/// 616/// \headerfile <x86intrin.h> 617/// 618/// This intrinsic corresponds to the \c PSUBUSB instruction. 619/// 620/// \param __m1 621/// A 64-bit integer vector of [8 x i8] containing the minuends. 622/// \param __m2 623/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 624/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 625/// differences of both parameters. 626static __inline__ __m64 __DEFAULT_FN_ATTRS 627_mm_subs_pu8(__m64 __m1, __m64 __m2) 628{ 629 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 630} 631 632/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit 633/// integer vector of [4 x i16] from the corresponding 16-bit unsigned 634/// integer element of the first 64-bit integer vector of [4 x i16]. If an 635/// element of the first vector is less than the corresponding element of the 636/// second vector, the result is saturated to 0. The results are packed into 637/// a 64-bit integer vector of [4 x i16]. 638/// 639/// \headerfile <x86intrin.h> 640/// 641/// This intrinsic corresponds to the \c PSUBUSW instruction. 642/// 643/// \param __m1 644/// A 64-bit integer vector of [4 x i16] containing the minuends. 645/// \param __m2 646/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 647/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 648/// differences of both parameters. 649static __inline__ __m64 __DEFAULT_FN_ATTRS 650_mm_subs_pu16(__m64 __m1, __m64 __m2) 651{ 652 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 653} 654 655/// \brief Multiplies each 16-bit signed integer element of the first 64-bit 656/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 657/// element of the second 64-bit integer vector of [4 x i16] and get four 658/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 659/// The lower 32 bits of these two sums are packed into a 64-bit integer 660/// vector of [2 x i32]. For example, bits [15:0] of both parameters are 661/// multiplied, bits [31:16] of both parameters are multiplied, and the sum 662/// of both results is written to bits [31:0] of the result. 663/// 664/// \headerfile <x86intrin.h> 665/// 666/// This intrinsic corresponds to the \c PMADDWD instruction. 667/// 668/// \param __m1 669/// A 64-bit integer vector of [4 x i16]. 670/// \param __m2 671/// A 64-bit integer vector of [4 x i16]. 672/// \returns A 64-bit integer vector of [2 x i32] containing the sums of 673/// products of both parameters. 674static __inline__ __m64 __DEFAULT_FN_ATTRS 675_mm_madd_pi16(__m64 __m1, __m64 __m2) 676{ 677 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 678} 679 680/// \brief Multiplies each 16-bit signed integer element of the first 64-bit 681/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 682/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 683/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 684/// 685/// \headerfile <x86intrin.h> 686/// 687/// This intrinsic corresponds to the \c PMULHW instruction. 688/// 689/// \param __m1 690/// A 64-bit integer vector of [4 x i16]. 691/// \param __m2 692/// A 64-bit integer vector of [4 x i16]. 693/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 694/// of the products of both parameters. 695static __inline__ __m64 __DEFAULT_FN_ATTRS 696_mm_mulhi_pi16(__m64 __m1, __m64 __m2) 697{ 698 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 699} 700 701/// \brief Multiplies each 16-bit signed integer element of the first 64-bit 702/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 703/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 704/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 705/// 706/// \headerfile <x86intrin.h> 707/// 708/// This intrinsic corresponds to the \c PMULLW instruction. 709/// 710/// \param __m1 711/// A 64-bit integer vector of [4 x i16]. 712/// \param __m2 713/// A 64-bit integer vector of [4 x i16]. 714/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 715/// of the products of both parameters. 716static __inline__ __m64 __DEFAULT_FN_ATTRS 717_mm_mullo_pi16(__m64 __m1, __m64 __m2) 718{ 719 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 720} 721 722/// \brief Left-shifts each 16-bit signed integer element of the first 723/// parameter, which is a 64-bit integer vector of [4 x i16], by the number 724/// of bits specified by the second parameter, which is a 64-bit integer. The 725/// lower 16 bits of the results are packed into a 64-bit integer vector of 726/// [4 x i16]. 727/// 728/// \headerfile <x86intrin.h> 729/// 730/// This intrinsic corresponds to the \c PSLLW instruction. 731/// 732/// \param __m 733/// A 64-bit integer vector of [4 x i16]. 734/// \param __count 735/// A 64-bit integer vector interpreted as a single 64-bit integer. 736/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 737/// values. If __count is greater or equal to 16, the result is set to all 0. 738static __inline__ __m64 __DEFAULT_FN_ATTRS 739_mm_sll_pi16(__m64 __m, __m64 __count) 740{ 741 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 742} 743 744/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer 745/// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 746/// The lower 16 bits of the results are packed into a 64-bit integer vector 747/// of [4 x i16]. 748/// 749/// \headerfile <x86intrin.h> 750/// 751/// This intrinsic corresponds to the \c PSLLW instruction. 752/// 753/// \param __m 754/// A 64-bit integer vector of [4 x i16]. 755/// \param __count 756/// A 32-bit integer value. 757/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 758/// values. If __count is greater or equal to 16, the result is set to all 0. 759static __inline__ __m64 __DEFAULT_FN_ATTRS 760_mm_slli_pi16(__m64 __m, int __count) 761{ 762 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 763} 764 765/// \brief Left-shifts each 32-bit signed integer element of the first 766/// parameter, which is a 64-bit integer vector of [2 x i32], by the number 767/// of bits specified by the second parameter, which is a 64-bit integer. The 768/// lower 32 bits of the results are packed into a 64-bit integer vector of 769/// [2 x i32]. 770/// 771/// \headerfile <x86intrin.h> 772/// 773/// This intrinsic corresponds to the \c PSLLD instruction. 774/// 775/// \param __m 776/// A 64-bit integer vector of [2 x i32]. 777/// \param __count 778/// A 64-bit integer vector interpreted as a single 64-bit integer. 779/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 780/// values. If __count is greater or equal to 32, the result is set to all 0. 781static __inline__ __m64 __DEFAULT_FN_ATTRS 782_mm_sll_pi32(__m64 __m, __m64 __count) 783{ 784 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 785} 786 787/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer 788/// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 789/// The lower 32 bits of the results are packed into a 64-bit integer vector 790/// of [2 x i32]. 791/// 792/// \headerfile <x86intrin.h> 793/// 794/// This intrinsic corresponds to the \c PSLLD instruction. 795/// 796/// \param __m 797/// A 64-bit integer vector of [2 x i32]. 798/// \param __count 799/// A 32-bit integer value. 800/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 801/// values. If __count is greater or equal to 32, the result is set to all 0. 802static __inline__ __m64 __DEFAULT_FN_ATTRS 803_mm_slli_pi32(__m64 __m, int __count) 804{ 805 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 806} 807 808/// \brief Left-shifts the first 64-bit integer parameter by the number of bits 809/// specified by the second 64-bit integer parameter. The lower 64 bits of 810/// result are returned. 811/// 812/// \headerfile <x86intrin.h> 813/// 814/// This intrinsic corresponds to the \c PSLLQ instruction. 815/// 816/// \param __m 817/// A 64-bit integer vector interpreted as a single 64-bit integer. 818/// \param __count 819/// A 64-bit integer vector interpreted as a single 64-bit integer. 820/// \returns A 64-bit integer vector containing the left-shifted value. If 821/// __count is greater or equal to 64, the result is set to 0. 822static __inline__ __m64 __DEFAULT_FN_ATTRS 823_mm_sll_si64(__m64 __m, __m64 __count) 824{ 825 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 826} 827 828/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the 829/// number of bits specified by the second parameter, which is a 32-bit 830/// integer. The lower 64 bits of result are returned. 831/// 832/// \headerfile <x86intrin.h> 833/// 834/// This intrinsic corresponds to the \c PSLLQ instruction. 835/// 836/// \param __m 837/// A 64-bit integer vector interpreted as a single 64-bit integer. 838/// \param __count 839/// A 32-bit integer value. 840/// \returns A 64-bit integer vector containing the left-shifted value. If 841/// __count is greater or equal to 64, the result is set to 0. 842static __inline__ __m64 __DEFAULT_FN_ATTRS 843_mm_slli_si64(__m64 __m, int __count) 844{ 845 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 846} 847 848/// \brief Right-shifts each 16-bit integer element of the first parameter, 849/// which is a 64-bit integer vector of [4 x i16], by the number of bits 850/// specified by the second parameter, which is a 64-bit integer. High-order 851/// bits are filled with the sign bit of the initial value of each 16-bit 852/// element. The 16-bit results are packed into a 64-bit integer vector of 853/// [4 x i16]. 854/// 855/// \headerfile <x86intrin.h> 856/// 857/// This intrinsic corresponds to the \c PSRAW instruction. 858/// 859/// \param __m 860/// A 64-bit integer vector of [4 x i16]. 861/// \param __count 862/// A 64-bit integer vector interpreted as a single 64-bit integer. 863/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 864/// values. 865static __inline__ __m64 __DEFAULT_FN_ATTRS 866_mm_sra_pi16(__m64 __m, __m64 __count) 867{ 868 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 869} 870 871/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector 872/// of [4 x i16] by the number of bits specified by a 32-bit integer. 873/// High-order bits are filled with the sign bit of the initial value of each 874/// 16-bit element. The 16-bit results are packed into a 64-bit integer 875/// vector of [4 x i16]. 876/// 877/// \headerfile <x86intrin.h> 878/// 879/// This intrinsic corresponds to the \c PSRAW instruction. 880/// 881/// \param __m 882/// A 64-bit integer vector of [4 x i16]. 883/// \param __count 884/// A 32-bit integer value. 885/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 886/// values. 887static __inline__ __m64 __DEFAULT_FN_ATTRS 888_mm_srai_pi16(__m64 __m, int __count) 889{ 890 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 891} 892 893/// \brief Right-shifts each 32-bit integer element of the first parameter, 894/// which is a 64-bit integer vector of [2 x i32], by the number of bits 895/// specified by the second parameter, which is a 64-bit integer. High-order 896/// bits are filled with the sign bit of the initial value of each 32-bit 897/// element. The 32-bit results are packed into a 64-bit integer vector of 898/// [2 x i32]. 899/// 900/// \headerfile <x86intrin.h> 901/// 902/// This intrinsic corresponds to the \c PSRAD instruction. 903/// 904/// \param __m 905/// A 64-bit integer vector of [2 x i32]. 906/// \param __count 907/// A 64-bit integer vector interpreted as a single 64-bit integer. 908/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 909/// values. 910static __inline__ __m64 __DEFAULT_FN_ATTRS 911_mm_sra_pi32(__m64 __m, __m64 __count) 912{ 913 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 914} 915 916/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector 917/// of [2 x i32] by the number of bits specified by a 32-bit integer. 918/// High-order bits are filled with the sign bit of the initial value of each 919/// 32-bit element. The 32-bit results are packed into a 64-bit integer 920/// vector of [2 x i32]. 921/// 922/// \headerfile <x86intrin.h> 923/// 924/// This intrinsic corresponds to the \c PSRAD instruction. 925/// 926/// \param __m 927/// A 64-bit integer vector of [2 x i32]. 928/// \param __count 929/// A 32-bit integer value. 930/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 931/// values. 932static __inline__ __m64 __DEFAULT_FN_ATTRS 933_mm_srai_pi32(__m64 __m, int __count) 934{ 935 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 936} 937 938/// \brief Right-shifts each 16-bit integer element of the first parameter, 939/// which is a 64-bit integer vector of [4 x i16], by the number of bits 940/// specified by the second parameter, which is a 64-bit integer. High-order 941/// bits are cleared. The 16-bit results are packed into a 64-bit integer 942/// vector of [4 x i16]. 943/// 944/// \headerfile <x86intrin.h> 945/// 946/// This intrinsic corresponds to the \c PSRLW instruction. 947/// 948/// \param __m 949/// A 64-bit integer vector of [4 x i16]. 950/// \param __count 951/// A 64-bit integer vector interpreted as a single 64-bit integer. 952/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 953/// values. 954static __inline__ __m64 __DEFAULT_FN_ATTRS 955_mm_srl_pi16(__m64 __m, __m64 __count) 956{ 957 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 958} 959 960/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector 961/// of [4 x i16] by the number of bits specified by a 32-bit integer. 962/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 963/// integer vector of [4 x i16]. 964/// 965/// \headerfile <x86intrin.h> 966/// 967/// This intrinsic corresponds to the \c PSRLW instruction. 968/// 969/// \param __m 970/// A 64-bit integer vector of [4 x i16]. 971/// \param __count 972/// A 32-bit integer value. 973/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 974/// values. 975static __inline__ __m64 __DEFAULT_FN_ATTRS 976_mm_srli_pi16(__m64 __m, int __count) 977{ 978 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 979} 980 981/// \brief Right-shifts each 32-bit integer element of the first parameter, 982/// which is a 64-bit integer vector of [2 x i32], by the number of bits 983/// specified by the second parameter, which is a 64-bit integer. High-order 984/// bits are cleared. The 32-bit results are packed into a 64-bit integer 985/// vector of [2 x i32]. 986/// 987/// \headerfile <x86intrin.h> 988/// 989/// This intrinsic corresponds to the \c PSRLD instruction. 990/// 991/// \param __m 992/// A 64-bit integer vector of [2 x i32]. 993/// \param __count 994/// A 64-bit integer vector interpreted as a single 64-bit integer. 995/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 996/// values. 997static __inline__ __m64 __DEFAULT_FN_ATTRS 998_mm_srl_pi32(__m64 __m, __m64 __count) 999{ 1000 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1001} 1002 1003/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector 1004/// of [2 x i32] by the number of bits specified by a 32-bit integer. 1005/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1006/// integer vector of [2 x i32]. 1007/// 1008/// \headerfile <x86intrin.h> 1009/// 1010/// This intrinsic corresponds to the \c PSRLD instruction. 1011/// 1012/// \param __m 1013/// A 64-bit integer vector of [2 x i32]. 1014/// \param __count 1015/// A 32-bit integer value. 1016/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1017/// values. 1018static __inline__ __m64 __DEFAULT_FN_ATTRS 1019_mm_srli_pi32(__m64 __m, int __count) 1020{ 1021 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1022} 1023 1024/// \brief Right-shifts the first 64-bit integer parameter by the number of bits 1025/// specified by the second 64-bit integer parameter. High-order bits are 1026/// cleared. 1027/// 1028/// \headerfile <x86intrin.h> 1029/// 1030/// This intrinsic corresponds to the \c PSRLQ instruction. 1031/// 1032/// \param __m 1033/// A 64-bit integer vector interpreted as a single 64-bit integer. 1034/// \param __count 1035/// A 64-bit integer vector interpreted as a single 64-bit integer. 1036/// \returns A 64-bit integer vector containing the right-shifted value. 1037static __inline__ __m64 __DEFAULT_FN_ATTRS 1038_mm_srl_si64(__m64 __m, __m64 __count) 1039{ 1040 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1041} 1042 1043/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the 1044/// number of bits specified by the second parameter, which is a 32-bit 1045/// integer. High-order bits are cleared. 1046/// 1047/// \headerfile <x86intrin.h> 1048/// 1049/// This intrinsic corresponds to the \c PSRLQ instruction. 1050/// 1051/// \param __m 1052/// A 64-bit integer vector interpreted as a single 64-bit integer. 1053/// \param __count 1054/// A 32-bit integer value. 1055/// \returns A 64-bit integer vector containing the right-shifted value. 1056static __inline__ __m64 __DEFAULT_FN_ATTRS 1057_mm_srli_si64(__m64 __m, int __count) 1058{ 1059 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1060} 1061 1062/// \brief Performs a bitwise AND of two 64-bit integer vectors. 1063/// 1064/// \headerfile <x86intrin.h> 1065/// 1066/// This intrinsic corresponds to the \c PAND instruction. 1067/// 1068/// \param __m1 1069/// A 64-bit integer vector. 1070/// \param __m2 1071/// A 64-bit integer vector. 1072/// \returns A 64-bit integer vector containing the bitwise AND of both 1073/// parameters. 1074static __inline__ __m64 __DEFAULT_FN_ATTRS 1075_mm_and_si64(__m64 __m1, __m64 __m2) 1076{ 1077 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1078} 1079 1080/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then 1081/// performs a bitwise AND of the intermediate result and the second 64-bit 1082/// integer vector. 1083/// 1084/// \headerfile <x86intrin.h> 1085/// 1086/// This intrinsic corresponds to the \c PANDN instruction. 1087/// 1088/// \param __m1 1089/// A 64-bit integer vector. The one's complement of this parameter is used 1090/// in the bitwise AND. 1091/// \param __m2 1092/// A 64-bit integer vector. 1093/// \returns A 64-bit integer vector containing the bitwise AND of the second 1094/// parameter and the one's complement of the first parameter. 1095static __inline__ __m64 __DEFAULT_FN_ATTRS 1096_mm_andnot_si64(__m64 __m1, __m64 __m2) 1097{ 1098 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1099} 1100 1101/// \brief Performs a bitwise OR of two 64-bit integer vectors. 1102/// 1103/// \headerfile <x86intrin.h> 1104/// 1105/// This intrinsic corresponds to the \c POR instruction. 1106/// 1107/// \param __m1 1108/// A 64-bit integer vector. 1109/// \param __m2 1110/// A 64-bit integer vector. 1111/// \returns A 64-bit integer vector containing the bitwise OR of both 1112/// parameters. 1113static __inline__ __m64 __DEFAULT_FN_ATTRS 1114_mm_or_si64(__m64 __m1, __m64 __m2) 1115{ 1116 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1117} 1118 1119/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors. 1120/// 1121/// \headerfile <x86intrin.h> 1122/// 1123/// This intrinsic corresponds to the \c PXOR instruction. 1124/// 1125/// \param __m1 1126/// A 64-bit integer vector. 1127/// \param __m2 1128/// A 64-bit integer vector. 1129/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1130/// parameters. 1131static __inline__ __m64 __DEFAULT_FN_ATTRS 1132_mm_xor_si64(__m64 __m1, __m64 __m2) 1133{ 1134 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1135} 1136 1137/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of 1138/// [8 x i8] to determine if the element of the first vector is equal to the 1139/// corresponding element of the second vector. The comparison yields 0 for 1140/// false, 0xFF for true. 1141/// 1142/// \headerfile <x86intrin.h> 1143/// 1144/// This intrinsic corresponds to the \c PCMPEQB instruction. 1145/// 1146/// \param __m1 1147/// A 64-bit integer vector of [8 x i8]. 1148/// \param __m2 1149/// A 64-bit integer vector of [8 x i8]. 1150/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1151/// results. 1152static __inline__ __m64 __DEFAULT_FN_ATTRS 1153_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1154{ 1155 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1156} 1157 1158/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of 1159/// [4 x i16] to determine if the element of the first vector is equal to the 1160/// corresponding element of the second vector. The comparison yields 0 for 1161/// false, 0xFFFF for true. 1162/// 1163/// \headerfile <x86intrin.h> 1164/// 1165/// This intrinsic corresponds to the \c PCMPEQW instruction. 1166/// 1167/// \param __m1 1168/// A 64-bit integer vector of [4 x i16]. 1169/// \param __m2 1170/// A 64-bit integer vector of [4 x i16]. 1171/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1172/// results. 1173static __inline__ __m64 __DEFAULT_FN_ATTRS 1174_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1175{ 1176 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1177} 1178 1179/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of 1180/// [2 x i32] to determine if the element of the first vector is equal to the 1181/// corresponding element of the second vector. The comparison yields 0 for 1182/// false, 0xFFFFFFFF for true. 1183/// 1184/// \headerfile <x86intrin.h> 1185/// 1186/// This intrinsic corresponds to the \c PCMPEQD instruction. 1187/// 1188/// \param __m1 1189/// A 64-bit integer vector of [2 x i32]. 1190/// \param __m2 1191/// A 64-bit integer vector of [2 x i32]. 1192/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1193/// results. 1194static __inline__ __m64 __DEFAULT_FN_ATTRS 1195_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1196{ 1197 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1198} 1199 1200/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of 1201/// [8 x i8] to determine if the element of the first vector is greater than 1202/// the corresponding element of the second vector. The comparison yields 0 1203/// for false, 0xFF for true. 1204/// 1205/// \headerfile <x86intrin.h> 1206/// 1207/// This intrinsic corresponds to the \c PCMPGTB instruction. 1208/// 1209/// \param __m1 1210/// A 64-bit integer vector of [8 x i8]. 1211/// \param __m2 1212/// A 64-bit integer vector of [8 x i8]. 1213/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1214/// results. 1215static __inline__ __m64 __DEFAULT_FN_ATTRS 1216_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1217{ 1218 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1219} 1220 1221/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of 1222/// [4 x i16] to determine if the element of the first vector is greater than 1223/// the corresponding element of the second vector. The comparison yields 0 1224/// for false, 0xFFFF for true. 1225/// 1226/// \headerfile <x86intrin.h> 1227/// 1228/// This intrinsic corresponds to the \c PCMPGTW instruction. 1229/// 1230/// \param __m1 1231/// A 64-bit integer vector of [4 x i16]. 1232/// \param __m2 1233/// A 64-bit integer vector of [4 x i16]. 1234/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1235/// results. 1236static __inline__ __m64 __DEFAULT_FN_ATTRS 1237_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1238{ 1239 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1240} 1241 1242/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of 1243/// [2 x i32] to determine if the element of the first vector is greater than 1244/// the corresponding element of the second vector. The comparison yields 0 1245/// for false, 0xFFFFFFFF for true. 1246/// 1247/// \headerfile <x86intrin.h> 1248/// 1249/// This intrinsic corresponds to the \c PCMPGTD instruction. 1250/// 1251/// \param __m1 1252/// A 64-bit integer vector of [2 x i32]. 1253/// \param __m2 1254/// A 64-bit integer vector of [2 x i32]. 1255/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1256/// results. 1257static __inline__ __m64 __DEFAULT_FN_ATTRS 1258_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1259{ 1260 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1261} 1262 1263/// \brief Constructs a 64-bit integer vector initialized to zero. 1264/// 1265/// \headerfile <x86intrin.h> 1266/// 1267/// This intrinsic corresponds to the the \c VXORPS / XORPS instruction. 1268/// 1269/// \returns An initialized 64-bit integer vector with all elements set to zero. 1270static __inline__ __m64 __DEFAULT_FN_ATTRS 1271_mm_setzero_si64(void) 1272{ 1273 return (__m64){ 0LL }; 1274} 1275 1276/// \brief Constructs a 64-bit integer vector initialized with the specified 1277/// 32-bit integer values. 1278/// 1279/// \headerfile <x86intrin.h> 1280/// 1281/// This intrinsic is a utility function and does not correspond to a specific 1282/// instruction. 1283/// 1284/// \param __i1 1285/// A 32-bit integer value used to initialize the upper 32 bits of the 1286/// result. 1287/// \param __i0 1288/// A 32-bit integer value used to initialize the lower 32 bits of the 1289/// result. 1290/// \returns An initialized 64-bit integer vector. 1291static __inline__ __m64 __DEFAULT_FN_ATTRS 1292_mm_set_pi32(int __i1, int __i0) 1293{ 1294 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1295} 1296 1297/// \brief Constructs a 64-bit integer vector initialized with the specified 1298/// 16-bit integer values. 1299/// 1300/// \headerfile <x86intrin.h> 1301/// 1302/// This intrinsic is a utility function and does not correspond to a specific 1303/// instruction. 1304/// 1305/// \param __s3 1306/// A 16-bit integer value used to initialize bits [63:48] of the result. 1307/// \param __s2 1308/// A 16-bit integer value used to initialize bits [47:32] of the result. 1309/// \param __s1 1310/// A 16-bit integer value used to initialize bits [31:16] of the result. 1311/// \param __s0 1312/// A 16-bit integer value used to initialize bits [15:0] of the result. 1313/// \returns An initialized 64-bit integer vector. 1314static __inline__ __m64 __DEFAULT_FN_ATTRS 1315_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1316{ 1317 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1318} 1319 1320/// \brief Constructs a 64-bit integer vector initialized with the specified 1321/// 8-bit integer values. 1322/// 1323/// \headerfile <x86intrin.h> 1324/// 1325/// This intrinsic is a utility function and does not correspond to a specific 1326/// instruction. 1327/// 1328/// \param __b7 1329/// An 8-bit integer value used to initialize bits [63:56] of the result. 1330/// \param __b6 1331/// An 8-bit integer value used to initialize bits [55:48] of the result. 1332/// \param __b5 1333/// An 8-bit integer value used to initialize bits [47:40] of the result. 1334/// \param __b4 1335/// An 8-bit integer value used to initialize bits [39:32] of the result. 1336/// \param __b3 1337/// An 8-bit integer value used to initialize bits [31:24] of the result. 1338/// \param __b2 1339/// An 8-bit integer value used to initialize bits [23:16] of the result. 1340/// \param __b1 1341/// An 8-bit integer value used to initialize bits [15:8] of the result. 1342/// \param __b0 1343/// An 8-bit integer value used to initialize bits [7:0] of the result. 1344/// \returns An initialized 64-bit integer vector. 1345static __inline__ __m64 __DEFAULT_FN_ATTRS 1346_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1347 char __b1, char __b0) 1348{ 1349 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1350 __b4, __b5, __b6, __b7); 1351} 1352 1353/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the 1354/// 32-bit integer vector elements set to the specified 32-bit integer 1355/// value. 1356/// 1357/// \headerfile <x86intrin.h> 1358/// 1359/// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction. 1360/// 1361/// \param __i 1362/// A 32-bit integer value used to initialize each vector element of the 1363/// result. 1364/// \returns An initialized 64-bit integer vector of [2 x i32]. 1365static __inline__ __m64 __DEFAULT_FN_ATTRS 1366_mm_set1_pi32(int __i) 1367{ 1368 return _mm_set_pi32(__i, __i); 1369} 1370 1371/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the 1372/// 16-bit integer vector elements set to the specified 16-bit integer 1373/// value. 1374/// 1375/// \headerfile <x86intrin.h> 1376/// 1377/// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction. 1378/// 1379/// \param __w 1380/// A 16-bit integer value used to initialize each vector element of the 1381/// result. 1382/// \returns An initialized 64-bit integer vector of [4 x i16]. 1383static __inline__ __m64 __DEFAULT_FN_ATTRS 1384_mm_set1_pi16(short __w) 1385{ 1386 return _mm_set_pi16(__w, __w, __w, __w); 1387} 1388 1389/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the 1390/// 8-bit integer vector elements set to the specified 8-bit integer value. 1391/// 1392/// \headerfile <x86intrin.h> 1393/// 1394/// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW + 1395/// PSHUFLW instruction. 1396/// 1397/// \param __b 1398/// An 8-bit integer value used to initialize each vector element of the 1399/// result. 1400/// \returns An initialized 64-bit integer vector of [8 x i8]. 1401static __inline__ __m64 __DEFAULT_FN_ATTRS 1402_mm_set1_pi8(char __b) 1403{ 1404 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1405} 1406 1407/// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1408/// the specified 32-bit integer values. 1409/// 1410/// \headerfile <x86intrin.h> 1411/// 1412/// This intrinsic is a utility function and does not correspond to a specific 1413/// instruction. 1414/// 1415/// \param __i0 1416/// A 32-bit integer value used to initialize the lower 32 bits of the 1417/// result. 1418/// \param __i1 1419/// A 32-bit integer value used to initialize the upper 32 bits of the 1420/// result. 1421/// \returns An initialized 64-bit integer vector. 1422static __inline__ __m64 __DEFAULT_FN_ATTRS 1423_mm_setr_pi32(int __i0, int __i1) 1424{ 1425 return _mm_set_pi32(__i1, __i0); 1426} 1427 1428/// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1429/// the specified 16-bit integer values. 1430/// 1431/// \headerfile <x86intrin.h> 1432/// 1433/// This intrinsic is a utility function and does not correspond to a specific 1434/// instruction. 1435/// 1436/// \param __w0 1437/// A 16-bit integer value used to initialize bits [15:0] of the result. 1438/// \param __w1 1439/// A 16-bit integer value used to initialize bits [31:16] of the result. 1440/// \param __w2 1441/// A 16-bit integer value used to initialize bits [47:32] of the result. 1442/// \param __w3 1443/// A 16-bit integer value used to initialize bits [63:48] of the result. 1444/// \returns An initialized 64-bit integer vector. 1445static __inline__ __m64 __DEFAULT_FN_ATTRS 1446_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1447{ 1448 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1449} 1450 1451/// \brief Constructs a 64-bit integer vector, initialized in reverse order with 1452/// the specified 8-bit integer values. 1453/// 1454/// \headerfile <x86intrin.h> 1455/// 1456/// This intrinsic is a utility function and does not correspond to a specific 1457/// instruction. 1458/// 1459/// \param __b0 1460/// An 8-bit integer value used to initialize bits [7:0] of the result. 1461/// \param __b1 1462/// An 8-bit integer value used to initialize bits [15:8] of the result. 1463/// \param __b2 1464/// An 8-bit integer value used to initialize bits [23:16] of the result. 1465/// \param __b3 1466/// An 8-bit integer value used to initialize bits [31:24] of the result. 1467/// \param __b4 1468/// An 8-bit integer value used to initialize bits [39:32] of the result. 1469/// \param __b5 1470/// An 8-bit integer value used to initialize bits [47:40] of the result. 1471/// \param __b6 1472/// An 8-bit integer value used to initialize bits [55:48] of the result. 1473/// \param __b7 1474/// An 8-bit integer value used to initialize bits [63:56] of the result. 1475/// \returns An initialized 64-bit integer vector. 1476static __inline__ __m64 __DEFAULT_FN_ATTRS 1477_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1478 char __b6, char __b7) 1479{ 1480 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1481} 1482 1483#undef __DEFAULT_FN_ATTRS 1484 1485/* Aliases for compatibility. */ 1486#define _m_empty _mm_empty 1487#define _m_from_int _mm_cvtsi32_si64 1488#define _m_from_int64 _mm_cvtsi64_m64 1489#define _m_to_int _mm_cvtsi64_si32 1490#define _m_to_int64 _mm_cvtm64_si64 1491#define _m_packsswb _mm_packs_pi16 1492#define _m_packssdw _mm_packs_pi32 1493#define _m_packuswb _mm_packs_pu16 1494#define _m_punpckhbw _mm_unpackhi_pi8 1495#define _m_punpckhwd _mm_unpackhi_pi16 1496#define _m_punpckhdq _mm_unpackhi_pi32 1497#define _m_punpcklbw _mm_unpacklo_pi8 1498#define _m_punpcklwd _mm_unpacklo_pi16 1499#define _m_punpckldq _mm_unpacklo_pi32 1500#define _m_paddb _mm_add_pi8 1501#define _m_paddw _mm_add_pi16 1502#define _m_paddd _mm_add_pi32 1503#define _m_paddsb _mm_adds_pi8 1504#define _m_paddsw _mm_adds_pi16 1505#define _m_paddusb _mm_adds_pu8 1506#define _m_paddusw _mm_adds_pu16 1507#define _m_psubb _mm_sub_pi8 1508#define _m_psubw _mm_sub_pi16 1509#define _m_psubd _mm_sub_pi32 1510#define _m_psubsb _mm_subs_pi8 1511#define _m_psubsw _mm_subs_pi16 1512#define _m_psubusb _mm_subs_pu8 1513#define _m_psubusw _mm_subs_pu16 1514#define _m_pmaddwd _mm_madd_pi16 1515#define _m_pmulhw _mm_mulhi_pi16 1516#define _m_pmullw _mm_mullo_pi16 1517#define _m_psllw _mm_sll_pi16 1518#define _m_psllwi _mm_slli_pi16 1519#define _m_pslld _mm_sll_pi32 1520#define _m_pslldi _mm_slli_pi32 1521#define _m_psllq _mm_sll_si64 1522#define _m_psllqi _mm_slli_si64 1523#define _m_psraw _mm_sra_pi16 1524#define _m_psrawi _mm_srai_pi16 1525#define _m_psrad _mm_sra_pi32 1526#define _m_psradi _mm_srai_pi32 1527#define _m_psrlw _mm_srl_pi16 1528#define _m_psrlwi _mm_srli_pi16 1529#define _m_psrld _mm_srl_pi32 1530#define _m_psrldi _mm_srli_pi32 1531#define _m_psrlq _mm_srl_si64 1532#define _m_psrlqi _mm_srli_si64 1533#define _m_pand _mm_and_si64 1534#define _m_pandn _mm_andnot_si64 1535#define _m_por _mm_or_si64 1536#define _m_pxor _mm_xor_si64 1537#define _m_pcmpeqb _mm_cmpeq_pi8 1538#define _m_pcmpeqw _mm_cmpeq_pi16 1539#define _m_pcmpeqd _mm_cmpeq_pi32 1540#define _m_pcmpgtb _mm_cmpgt_pi8 1541#define _m_pcmpgtw _mm_cmpgt_pi16 1542#define _m_pcmpgtd _mm_cmpgt_pi32 1543 1544#endif /* __MMINTRIN_H */ 1545 1546