1/* Copyright (C) 2003-2014 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24/* Implemented from the specification included in the Intel C++ Compiler 25 User Guide and Reference, version 9.0. */ 26 27#ifndef _EMMINTRIN_H_INCLUDED 28#define _EMMINTRIN_H_INCLUDED 29 30/* We need definitions from the SSE header files*/ 31#include <xmmintrin.h> 32 33#ifndef __SSE2__ 34#pragma GCC push_options 35#pragma GCC target("sse2") 36#define __DISABLE_SSE2__ 37#endif /* __SSE2__ */ 38 39/* SSE2 */ 40typedef double __v2df __attribute__ ((__vector_size__ (16))); 41typedef long long __v2di __attribute__ ((__vector_size__ (16))); 42typedef int __v4si __attribute__ ((__vector_size__ (16))); 43typedef short __v8hi __attribute__ ((__vector_size__ (16))); 44typedef char __v16qi __attribute__ ((__vector_size__ (16))); 45 46/* The Intel API is flexible enough that we must allow aliasing with other 47 vector types, and their scalar components. */ 48typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); 49typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); 50 51/* Create a selector for use with the SHUFPD instruction. */ 52#define _MM_SHUFFLE2(fp1,fp0) \ 53 (((fp1) << 1) | (fp0)) 54 55/* Create a vector with element 0 as F and the rest zero. */ 56extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 57_mm_set_sd (double __F) 58{ 59 return __extension__ (__m128d){ __F, 0.0 }; 60} 61 62/* Create a vector with both elements equal to F. */ 63extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 64_mm_set1_pd (double __F) 65{ 66 return __extension__ (__m128d){ __F, __F }; 67} 68 69extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 70_mm_set_pd1 (double __F) 71{ 72 return _mm_set1_pd (__F); 73} 74 75/* Create a vector with the lower value X and upper value W. */ 76extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 77_mm_set_pd (double __W, double __X) 78{ 79 return __extension__ (__m128d){ __X, __W }; 80} 81 82/* Create a vector with the lower value W and upper value X. */ 83extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 84_mm_setr_pd (double __W, double __X) 85{ 86 return __extension__ (__m128d){ __W, __X }; 87} 88 89/* Create an undefined vector. */ 90extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 91_mm_undefined_pd (void) 92{ 93 __m128d __Y = __Y; 94 return __Y; 95} 96 97/* Create a vector of zeros. */ 98extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 99_mm_setzero_pd (void) 100{ 101 return __extension__ (__m128d){ 0.0, 0.0 }; 102} 103 104/* Sets the low DPFP value of A from the low value of B. */ 105extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 106_mm_move_sd (__m128d __A, __m128d __B) 107{ 108 return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 109} 110 111/* Load two DPFP values from P. The address must be 16-byte aligned. */ 112extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 113_mm_load_pd (double const *__P) 114{ 115 return *(__m128d *)__P; 116} 117 118/* Load two DPFP values from P. The address need not be 16-byte aligned. */ 119extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 120_mm_loadu_pd (double const *__P) 121{ 122 return __builtin_ia32_loadupd (__P); 123} 124 125/* Create a vector with all two elements equal to *P. */ 126extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 127_mm_load1_pd (double const *__P) 128{ 129 return _mm_set1_pd (*__P); 130} 131 132/* Create a vector with element 0 as *P and the rest zero. */ 133extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 134_mm_load_sd (double const *__P) 135{ 136 return _mm_set_sd (*__P); 137} 138 139extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 140_mm_load_pd1 (double const *__P) 141{ 142 return _mm_load1_pd (__P); 143} 144 145/* Load two DPFP values in reverse order. The address must be aligned. */ 146extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 147_mm_loadr_pd (double const *__P) 148{ 149 __m128d __tmp = _mm_load_pd (__P); 150 return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1)); 151} 152 153/* Store two DPFP values. The address must be 16-byte aligned. */ 154extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 155_mm_store_pd (double *__P, __m128d __A) 156{ 157 *(__m128d *)__P = __A; 158} 159 160/* Store two DPFP values. The address need not be 16-byte aligned. */ 161extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 162_mm_storeu_pd (double *__P, __m128d __A) 163{ 164 __builtin_ia32_storeupd (__P, __A); 165} 166 167/* Stores the lower DPFP value. */ 168extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 169_mm_store_sd (double *__P, __m128d __A) 170{ 171 *__P = __builtin_ia32_vec_ext_v2df (__A, 0); 172} 173 174extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 175_mm_cvtsd_f64 (__m128d __A) 176{ 177 return __builtin_ia32_vec_ext_v2df (__A, 0); 178} 179 180extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 181_mm_storel_pd (double *__P, __m128d __A) 182{ 183 _mm_store_sd (__P, __A); 184} 185 186/* Stores the upper DPFP value. */ 187extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 188_mm_storeh_pd (double *__P, __m128d __A) 189{ 190 *__P = __builtin_ia32_vec_ext_v2df (__A, 1); 191} 192 193/* Store the lower DPFP value across two words. 194 The address must be 16-byte aligned. */ 195extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 196_mm_store1_pd (double *__P, __m128d __A) 197{ 198 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0))); 199} 200 201extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 202_mm_store_pd1 (double *__P, __m128d __A) 203{ 204 _mm_store1_pd (__P, __A); 205} 206 207/* Store two DPFP values in reverse order. The address must be aligned. */ 208extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 209_mm_storer_pd (double *__P, __m128d __A) 210{ 211 _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1))); 212} 213 214extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 215_mm_cvtsi128_si32 (__m128i __A) 216{ 217 return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0); 218} 219 220#ifdef __x86_64__ 221/* Intel intrinsic. */ 222extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 223_mm_cvtsi128_si64 (__m128i __A) 224{ 225 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); 226} 227 228/* Microsoft intrinsic. */ 229extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 230_mm_cvtsi128_si64x (__m128i __A) 231{ 232 return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0); 233} 234#endif 235 236extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237_mm_add_pd (__m128d __A, __m128d __B) 238{ 239 return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B); 240} 241 242extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 243_mm_add_sd (__m128d __A, __m128d __B) 244{ 245 return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B); 246} 247 248extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 249_mm_sub_pd (__m128d __A, __m128d __B) 250{ 251 return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B); 252} 253 254extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 255_mm_sub_sd (__m128d __A, __m128d __B) 256{ 257 return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B); 258} 259 260extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 261_mm_mul_pd (__m128d __A, __m128d __B) 262{ 263 return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B); 264} 265 266extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 267_mm_mul_sd (__m128d __A, __m128d __B) 268{ 269 return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B); 270} 271 272extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 273_mm_div_pd (__m128d __A, __m128d __B) 274{ 275 return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B); 276} 277 278extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 279_mm_div_sd (__m128d __A, __m128d __B) 280{ 281 return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B); 282} 283 284extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 285_mm_sqrt_pd (__m128d __A) 286{ 287 return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A); 288} 289 290/* Return pair {sqrt (A[0), B[1]}. */ 291extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 292_mm_sqrt_sd (__m128d __A, __m128d __B) 293{ 294 __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B); 295 return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp); 296} 297 298extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 299_mm_min_pd (__m128d __A, __m128d __B) 300{ 301 return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B); 302} 303 304extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 305_mm_min_sd (__m128d __A, __m128d __B) 306{ 307 return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B); 308} 309 310extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 311_mm_max_pd (__m128d __A, __m128d __B) 312{ 313 return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B); 314} 315 316extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 317_mm_max_sd (__m128d __A, __m128d __B) 318{ 319 return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B); 320} 321 322extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 323_mm_and_pd (__m128d __A, __m128d __B) 324{ 325 return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B); 326} 327 328extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 329_mm_andnot_pd (__m128d __A, __m128d __B) 330{ 331 return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B); 332} 333 334extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 335_mm_or_pd (__m128d __A, __m128d __B) 336{ 337 return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B); 338} 339 340extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 341_mm_xor_pd (__m128d __A, __m128d __B) 342{ 343 return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B); 344} 345 346extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 347_mm_cmpeq_pd (__m128d __A, __m128d __B) 348{ 349 return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B); 350} 351 352extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 353_mm_cmplt_pd (__m128d __A, __m128d __B) 354{ 355 return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B); 356} 357 358extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 359_mm_cmple_pd (__m128d __A, __m128d __B) 360{ 361 return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B); 362} 363 364extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 365_mm_cmpgt_pd (__m128d __A, __m128d __B) 366{ 367 return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B); 368} 369 370extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 371_mm_cmpge_pd (__m128d __A, __m128d __B) 372{ 373 return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B); 374} 375 376extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 377_mm_cmpneq_pd (__m128d __A, __m128d __B) 378{ 379 return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B); 380} 381 382extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 383_mm_cmpnlt_pd (__m128d __A, __m128d __B) 384{ 385 return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B); 386} 387 388extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 389_mm_cmpnle_pd (__m128d __A, __m128d __B) 390{ 391 return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B); 392} 393 394extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 395_mm_cmpngt_pd (__m128d __A, __m128d __B) 396{ 397 return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B); 398} 399 400extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 401_mm_cmpnge_pd (__m128d __A, __m128d __B) 402{ 403 return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B); 404} 405 406extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 407_mm_cmpord_pd (__m128d __A, __m128d __B) 408{ 409 return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B); 410} 411 412extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 413_mm_cmpunord_pd (__m128d __A, __m128d __B) 414{ 415 return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B); 416} 417 418extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 419_mm_cmpeq_sd (__m128d __A, __m128d __B) 420{ 421 return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B); 422} 423 424extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 425_mm_cmplt_sd (__m128d __A, __m128d __B) 426{ 427 return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B); 428} 429 430extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 431_mm_cmple_sd (__m128d __A, __m128d __B) 432{ 433 return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B); 434} 435 436extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 437_mm_cmpgt_sd (__m128d __A, __m128d __B) 438{ 439 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 440 (__v2df) 441 __builtin_ia32_cmpltsd ((__v2df) __B, 442 (__v2df) 443 __A)); 444} 445 446extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 447_mm_cmpge_sd (__m128d __A, __m128d __B) 448{ 449 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 450 (__v2df) 451 __builtin_ia32_cmplesd ((__v2df) __B, 452 (__v2df) 453 __A)); 454} 455 456extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 457_mm_cmpneq_sd (__m128d __A, __m128d __B) 458{ 459 return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B); 460} 461 462extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 463_mm_cmpnlt_sd (__m128d __A, __m128d __B) 464{ 465 return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B); 466} 467 468extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 469_mm_cmpnle_sd (__m128d __A, __m128d __B) 470{ 471 return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B); 472} 473 474extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 475_mm_cmpngt_sd (__m128d __A, __m128d __B) 476{ 477 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 478 (__v2df) 479 __builtin_ia32_cmpnltsd ((__v2df) __B, 480 (__v2df) 481 __A)); 482} 483 484extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 485_mm_cmpnge_sd (__m128d __A, __m128d __B) 486{ 487 return (__m128d) __builtin_ia32_movsd ((__v2df) __A, 488 (__v2df) 489 __builtin_ia32_cmpnlesd ((__v2df) __B, 490 (__v2df) 491 __A)); 492} 493 494extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 495_mm_cmpord_sd (__m128d __A, __m128d __B) 496{ 497 return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B); 498} 499 500extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 501_mm_cmpunord_sd (__m128d __A, __m128d __B) 502{ 503 return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B); 504} 505 506extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 507_mm_comieq_sd (__m128d __A, __m128d __B) 508{ 509 return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B); 510} 511 512extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 513_mm_comilt_sd (__m128d __A, __m128d __B) 514{ 515 return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B); 516} 517 518extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 519_mm_comile_sd (__m128d __A, __m128d __B) 520{ 521 return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B); 522} 523 524extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 525_mm_comigt_sd (__m128d __A, __m128d __B) 526{ 527 return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B); 528} 529 530extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 531_mm_comige_sd (__m128d __A, __m128d __B) 532{ 533 return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B); 534} 535 536extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 537_mm_comineq_sd (__m128d __A, __m128d __B) 538{ 539 return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B); 540} 541 542extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 543_mm_ucomieq_sd (__m128d __A, __m128d __B) 544{ 545 return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B); 546} 547 548extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 549_mm_ucomilt_sd (__m128d __A, __m128d __B) 550{ 551 return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B); 552} 553 554extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 555_mm_ucomile_sd (__m128d __A, __m128d __B) 556{ 557 return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B); 558} 559 560extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 561_mm_ucomigt_sd (__m128d __A, __m128d __B) 562{ 563 return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B); 564} 565 566extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 567_mm_ucomige_sd (__m128d __A, __m128d __B) 568{ 569 return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B); 570} 571 572extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 573_mm_ucomineq_sd (__m128d __A, __m128d __B) 574{ 575 return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B); 576} 577 578/* Create a vector of Qi, where i is the element number. */ 579 580extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 581_mm_set_epi64x (long long __q1, long long __q0) 582{ 583 return __extension__ (__m128i)(__v2di){ __q0, __q1 }; 584} 585 586extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 587_mm_set_epi64 (__m64 __q1, __m64 __q0) 588{ 589 return _mm_set_epi64x ((long long)__q1, (long long)__q0); 590} 591 592extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 593_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) 594{ 595 return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; 596} 597 598extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 599_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, 600 short __q3, short __q2, short __q1, short __q0) 601{ 602 return __extension__ (__m128i)(__v8hi){ 603 __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; 604} 605 606extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 607_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, 608 char __q11, char __q10, char __q09, char __q08, 609 char __q07, char __q06, char __q05, char __q04, 610 char __q03, char __q02, char __q01, char __q00) 611{ 612 return __extension__ (__m128i)(__v16qi){ 613 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, 614 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 615 }; 616} 617 618/* Set all of the elements of the vector to A. */ 619 620extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 621_mm_set1_epi64x (long long __A) 622{ 623 return _mm_set_epi64x (__A, __A); 624} 625 626extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 627_mm_set1_epi64 (__m64 __A) 628{ 629 return _mm_set_epi64 (__A, __A); 630} 631 632extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 633_mm_set1_epi32 (int __A) 634{ 635 return _mm_set_epi32 (__A, __A, __A, __A); 636} 637 638extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 639_mm_set1_epi16 (short __A) 640{ 641 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); 642} 643 644extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 645_mm_set1_epi8 (char __A) 646{ 647 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, 648 __A, __A, __A, __A, __A, __A, __A, __A); 649} 650 651/* Create a vector of Qi, where i is the element number. 652 The parameter order is reversed from the _mm_set_epi* functions. */ 653 654extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 655_mm_setr_epi64 (__m64 __q0, __m64 __q1) 656{ 657 return _mm_set_epi64 (__q1, __q0); 658} 659 660extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 661_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) 662{ 663 return _mm_set_epi32 (__q3, __q2, __q1, __q0); 664} 665 666extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 667_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, 668 short __q4, short __q5, short __q6, short __q7) 669{ 670 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); 671} 672 673extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 674_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, 675 char __q04, char __q05, char __q06, char __q07, 676 char __q08, char __q09, char __q10, char __q11, 677 char __q12, char __q13, char __q14, char __q15) 678{ 679 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, 680 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); 681} 682 683/* Create a vector with element 0 as *P and the rest zero. */ 684 685extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 686_mm_load_si128 (__m128i const *__P) 687{ 688 return *__P; 689} 690 691extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 692_mm_loadu_si128 (__m128i const *__P) 693{ 694 return (__m128i) __builtin_ia32_loaddqu ((char const *)__P); 695} 696 697extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 698_mm_loadl_epi64 (__m128i const *__P) 699{ 700 return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); 701} 702 703extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 704_mm_store_si128 (__m128i *__P, __m128i __B) 705{ 706 *__P = __B; 707} 708 709extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 710_mm_storeu_si128 (__m128i *__P, __m128i __B) 711{ 712 __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B); 713} 714 715extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 716_mm_storel_epi64 (__m128i *__P, __m128i __B) 717{ 718 *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); 719} 720 721extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 722_mm_movepi64_pi64 (__m128i __B) 723{ 724 return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0); 725} 726 727extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 728_mm_movpi64_epi64 (__m64 __A) 729{ 730 return _mm_set_epi64 ((__m64)0LL, __A); 731} 732 733extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 734_mm_move_epi64 (__m128i __A) 735{ 736 return (__m128i)__builtin_ia32_movq128 ((__v2di) __A); 737} 738 739/* Create an undefined vector. */ 740extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 741_mm_undefined_si128 (void) 742{ 743 __m128i __Y = __Y; 744 return __Y; 745} 746 747/* Create a vector of zeros. */ 748extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 749_mm_setzero_si128 (void) 750{ 751 return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; 752} 753 754extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 755_mm_cvtepi32_pd (__m128i __A) 756{ 757 return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); 758} 759 760extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 761_mm_cvtepi32_ps (__m128i __A) 762{ 763 return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A); 764} 765 766extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 767_mm_cvtpd_epi32 (__m128d __A) 768{ 769 return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A); 770} 771 772extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 773_mm_cvtpd_pi32 (__m128d __A) 774{ 775 return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); 776} 777 778extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 779_mm_cvtpd_ps (__m128d __A) 780{ 781 return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A); 782} 783 784extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 785_mm_cvttpd_epi32 (__m128d __A) 786{ 787 return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A); 788} 789 790extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 791_mm_cvttpd_pi32 (__m128d __A) 792{ 793 return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A); 794} 795 796extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 797_mm_cvtpi32_pd (__m64 __A) 798{ 799 return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); 800} 801 802extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 803_mm_cvtps_epi32 (__m128 __A) 804{ 805 return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A); 806} 807 808extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 809_mm_cvttps_epi32 (__m128 __A) 810{ 811 return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A); 812} 813 814extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 815_mm_cvtps_pd (__m128 __A) 816{ 817 return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); 818} 819 820extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 821_mm_cvtsd_si32 (__m128d __A) 822{ 823 return __builtin_ia32_cvtsd2si ((__v2df) __A); 824} 825 826#ifdef __x86_64__ 827/* Intel intrinsic. */ 828extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 829_mm_cvtsd_si64 (__m128d __A) 830{ 831 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 832} 833 834/* Microsoft intrinsic. */ 835extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 836_mm_cvtsd_si64x (__m128d __A) 837{ 838 return __builtin_ia32_cvtsd2si64 ((__v2df) __A); 839} 840#endif 841 842extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 843_mm_cvttsd_si32 (__m128d __A) 844{ 845 return __builtin_ia32_cvttsd2si ((__v2df) __A); 846} 847 848#ifdef __x86_64__ 849/* Intel intrinsic. */ 850extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 851_mm_cvttsd_si64 (__m128d __A) 852{ 853 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 854} 855 856/* Microsoft intrinsic. */ 857extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 858_mm_cvttsd_si64x (__m128d __A) 859{ 860 return __builtin_ia32_cvttsd2si64 ((__v2df) __A); 861} 862#endif 863 864extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 865_mm_cvtsd_ss (__m128 __A, __m128d __B) 866{ 867 return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); 868} 869 870extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 871_mm_cvtsi32_sd (__m128d __A, int __B) 872{ 873 return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B); 874} 875 876#ifdef __x86_64__ 877/* Intel intrinsic. */ 878extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 879_mm_cvtsi64_sd (__m128d __A, long long __B) 880{ 881 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 882} 883 884/* Microsoft intrinsic. */ 885extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 886_mm_cvtsi64x_sd (__m128d __A, long long __B) 887{ 888 return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B); 889} 890#endif 891 892extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 893_mm_cvtss_sd (__m128d __A, __m128 __B) 894{ 895 return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); 896} 897 898#ifdef __OPTIMIZE__ 899extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 900_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) 901{ 902 return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); 903} 904#else 905#define _mm_shuffle_pd(A, B, N) \ 906 ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \ 907 (__v2df)(__m128d)(B), (int)(N))) 908#endif 909 910extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 911_mm_unpackhi_pd (__m128d __A, __m128d __B) 912{ 913 return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B); 914} 915 916extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 917_mm_unpacklo_pd (__m128d __A, __m128d __B) 918{ 919 return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B); 920} 921 922extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 923_mm_loadh_pd (__m128d __A, double const *__B) 924{ 925 return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B); 926} 927 928extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 929_mm_loadl_pd (__m128d __A, double const *__B) 930{ 931 return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B); 932} 933 934extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 935_mm_movemask_pd (__m128d __A) 936{ 937 return __builtin_ia32_movmskpd ((__v2df)__A); 938} 939 940extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 941_mm_packs_epi16 (__m128i __A, __m128i __B) 942{ 943 return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B); 944} 945 946extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 947_mm_packs_epi32 (__m128i __A, __m128i __B) 948{ 949 return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B); 950} 951 952extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 953_mm_packus_epi16 (__m128i __A, __m128i __B) 954{ 955 return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B); 956} 957 958extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 959_mm_unpackhi_epi8 (__m128i __A, __m128i __B) 960{ 961 return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B); 962} 963 964extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 965_mm_unpackhi_epi16 (__m128i __A, __m128i __B) 966{ 967 return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B); 968} 969 970extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 971_mm_unpackhi_epi32 (__m128i __A, __m128i __B) 972{ 973 return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B); 974} 975 976extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 977_mm_unpackhi_epi64 (__m128i __A, __m128i __B) 978{ 979 return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B); 980} 981 982extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 983_mm_unpacklo_epi8 (__m128i __A, __m128i __B) 984{ 985 return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); 986} 987 988extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 989_mm_unpacklo_epi16 (__m128i __A, __m128i __B) 990{ 991 return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B); 992} 993 994extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 995_mm_unpacklo_epi32 (__m128i __A, __m128i __B) 996{ 997 return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B); 998} 999 1000extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1001_mm_unpacklo_epi64 (__m128i __A, __m128i __B) 1002{ 1003 return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B); 1004} 1005 1006extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1007_mm_add_epi8 (__m128i __A, __m128i __B) 1008{ 1009 return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B); 1010} 1011 1012extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1013_mm_add_epi16 (__m128i __A, __m128i __B) 1014{ 1015 return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B); 1016} 1017 1018extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1019_mm_add_epi32 (__m128i __A, __m128i __B) 1020{ 1021 return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B); 1022} 1023 1024extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1025_mm_add_epi64 (__m128i __A, __m128i __B) 1026{ 1027 return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B); 1028} 1029 1030extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1031_mm_adds_epi8 (__m128i __A, __m128i __B) 1032{ 1033 return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B); 1034} 1035 1036extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1037_mm_adds_epi16 (__m128i __A, __m128i __B) 1038{ 1039 return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B); 1040} 1041 1042extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1043_mm_adds_epu8 (__m128i __A, __m128i __B) 1044{ 1045 return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B); 1046} 1047 1048extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1049_mm_adds_epu16 (__m128i __A, __m128i __B) 1050{ 1051 return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B); 1052} 1053 1054extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1055_mm_sub_epi8 (__m128i __A, __m128i __B) 1056{ 1057 return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B); 1058} 1059 1060extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1061_mm_sub_epi16 (__m128i __A, __m128i __B) 1062{ 1063 return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B); 1064} 1065 1066extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1067_mm_sub_epi32 (__m128i __A, __m128i __B) 1068{ 1069 return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B); 1070} 1071 1072extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1073_mm_sub_epi64 (__m128i __A, __m128i __B) 1074{ 1075 return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B); 1076} 1077 1078extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1079_mm_subs_epi8 (__m128i __A, __m128i __B) 1080{ 1081 return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B); 1082} 1083 1084extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1085_mm_subs_epi16 (__m128i __A, __m128i __B) 1086{ 1087 return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B); 1088} 1089 1090extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1091_mm_subs_epu8 (__m128i __A, __m128i __B) 1092{ 1093 return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B); 1094} 1095 1096extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1097_mm_subs_epu16 (__m128i __A, __m128i __B) 1098{ 1099 return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B); 1100} 1101 1102extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1103_mm_madd_epi16 (__m128i __A, __m128i __B) 1104{ 1105 return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B); 1106} 1107 1108extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1109_mm_mulhi_epi16 (__m128i __A, __m128i __B) 1110{ 1111 return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B); 1112} 1113 1114extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1115_mm_mullo_epi16 (__m128i __A, __m128i __B) 1116{ 1117 return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B); 1118} 1119 1120extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1121_mm_mul_su32 (__m64 __A, __m64 __B) 1122{ 1123 return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B); 1124} 1125 1126extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1127_mm_mul_epu32 (__m128i __A, __m128i __B) 1128{ 1129 return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B); 1130} 1131 1132extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1133_mm_slli_epi16 (__m128i __A, int __B) 1134{ 1135 return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B); 1136} 1137 1138extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1139_mm_slli_epi32 (__m128i __A, int __B) 1140{ 1141 return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B); 1142} 1143 1144extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1145_mm_slli_epi64 (__m128i __A, int __B) 1146{ 1147 return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); 1148} 1149 1150extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1151_mm_srai_epi16 (__m128i __A, int __B) 1152{ 1153 return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); 1154} 1155 1156extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1157_mm_srai_epi32 (__m128i __A, int __B) 1158{ 1159 return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); 1160} 1161 1162#ifdef __OPTIMIZE__ 1163extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1164_mm_srli_si128 (__m128i __A, const int __N) 1165{ 1166 return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); 1167} 1168 1169extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1170_mm_slli_si128 (__m128i __A, const int __N) 1171{ 1172 return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); 1173} 1174#else 1175#define _mm_srli_si128(A, N) \ 1176 ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) 1177#define _mm_slli_si128(A, N) \ 1178 ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) 1179#endif 1180 1181extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1182_mm_srli_epi16 (__m128i __A, int __B) 1183{ 1184 return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); 1185} 1186 1187extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1188_mm_srli_epi32 (__m128i __A, int __B) 1189{ 1190 return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); 1191} 1192 1193extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1194_mm_srli_epi64 (__m128i __A, int __B) 1195{ 1196 return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); 1197} 1198 1199extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1200_mm_sll_epi16 (__m128i __A, __m128i __B) 1201{ 1202 return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B); 1203} 1204 1205extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1206_mm_sll_epi32 (__m128i __A, __m128i __B) 1207{ 1208 return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B); 1209} 1210 1211extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1212_mm_sll_epi64 (__m128i __A, __m128i __B) 1213{ 1214 return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B); 1215} 1216 1217extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1218_mm_sra_epi16 (__m128i __A, __m128i __B) 1219{ 1220 return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B); 1221} 1222 1223extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1224_mm_sra_epi32 (__m128i __A, __m128i __B) 1225{ 1226 return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B); 1227} 1228 1229extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1230_mm_srl_epi16 (__m128i __A, __m128i __B) 1231{ 1232 return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B); 1233} 1234 1235extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1236_mm_srl_epi32 (__m128i __A, __m128i __B) 1237{ 1238 return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B); 1239} 1240 1241extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1242_mm_srl_epi64 (__m128i __A, __m128i __B) 1243{ 1244 return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B); 1245} 1246 1247extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1248_mm_and_si128 (__m128i __A, __m128i __B) 1249{ 1250 return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B); 1251} 1252 1253extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1254_mm_andnot_si128 (__m128i __A, __m128i __B) 1255{ 1256 return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B); 1257} 1258 1259extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1260_mm_or_si128 (__m128i __A, __m128i __B) 1261{ 1262 return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B); 1263} 1264 1265extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1266_mm_xor_si128 (__m128i __A, __m128i __B) 1267{ 1268 return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B); 1269} 1270 1271extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1272_mm_cmpeq_epi8 (__m128i __A, __m128i __B) 1273{ 1274 return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B); 1275} 1276 1277extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1278_mm_cmpeq_epi16 (__m128i __A, __m128i __B) 1279{ 1280 return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B); 1281} 1282 1283extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1284_mm_cmpeq_epi32 (__m128i __A, __m128i __B) 1285{ 1286 return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B); 1287} 1288 1289extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1290_mm_cmplt_epi8 (__m128i __A, __m128i __B) 1291{ 1292 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A); 1293} 1294 1295extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1296_mm_cmplt_epi16 (__m128i __A, __m128i __B) 1297{ 1298 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A); 1299} 1300 1301extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1302_mm_cmplt_epi32 (__m128i __A, __m128i __B) 1303{ 1304 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A); 1305} 1306 1307extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1308_mm_cmpgt_epi8 (__m128i __A, __m128i __B) 1309{ 1310 return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B); 1311} 1312 1313extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1314_mm_cmpgt_epi16 (__m128i __A, __m128i __B) 1315{ 1316 return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B); 1317} 1318 1319extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1320_mm_cmpgt_epi32 (__m128i __A, __m128i __B) 1321{ 1322 return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B); 1323} 1324 1325#ifdef __OPTIMIZE__ 1326extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1327_mm_extract_epi16 (__m128i const __A, int const __N) 1328{ 1329 return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N); 1330} 1331 1332extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1333_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) 1334{ 1335 return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N); 1336} 1337#else 1338#define _mm_extract_epi16(A, N) \ 1339 ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N))) 1340#define _mm_insert_epi16(A, D, N) \ 1341 ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \ 1342 (int)(D), (int)(N))) 1343#endif 1344 1345extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1346_mm_max_epi16 (__m128i __A, __m128i __B) 1347{ 1348 return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B); 1349} 1350 1351extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1352_mm_max_epu8 (__m128i __A, __m128i __B) 1353{ 1354 return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B); 1355} 1356 1357extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1358_mm_min_epi16 (__m128i __A, __m128i __B) 1359{ 1360 return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B); 1361} 1362 1363extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1364_mm_min_epu8 (__m128i __A, __m128i __B) 1365{ 1366 return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B); 1367} 1368 1369extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1370_mm_movemask_epi8 (__m128i __A) 1371{ 1372 return __builtin_ia32_pmovmskb128 ((__v16qi)__A); 1373} 1374 1375extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1376_mm_mulhi_epu16 (__m128i __A, __m128i __B) 1377{ 1378 return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); 1379} 1380 1381#ifdef __OPTIMIZE__ 1382extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1383_mm_shufflehi_epi16 (__m128i __A, const int __mask) 1384{ 1385 return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask); 1386} 1387 1388extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1389_mm_shufflelo_epi16 (__m128i __A, const int __mask) 1390{ 1391 return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask); 1392} 1393 1394extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1395_mm_shuffle_epi32 (__m128i __A, const int __mask) 1396{ 1397 return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); 1398} 1399#else 1400#define _mm_shufflehi_epi16(A, N) \ 1401 ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N))) 1402#define _mm_shufflelo_epi16(A, N) \ 1403 ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N))) 1404#define _mm_shuffle_epi32(A, N) \ 1405 ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N))) 1406#endif 1407 1408extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1409_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) 1410{ 1411 __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C); 1412} 1413 1414extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1415_mm_avg_epu8 (__m128i __A, __m128i __B) 1416{ 1417 return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B); 1418} 1419 1420extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1421_mm_avg_epu16 (__m128i __A, __m128i __B) 1422{ 1423 return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B); 1424} 1425 1426extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1427_mm_sad_epu8 (__m128i __A, __m128i __B) 1428{ 1429 return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B); 1430} 1431 1432extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1433_mm_stream_si32 (int *__A, int __B) 1434{ 1435 __builtin_ia32_movnti (__A, __B); 1436} 1437 1438#ifdef __x86_64__ 1439extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1440_mm_stream_si64 (long long int *__A, long long int __B) 1441{ 1442 __builtin_ia32_movnti64 (__A, __B); 1443} 1444#endif 1445 1446extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1447_mm_stream_si128 (__m128i *__A, __m128i __B) 1448{ 1449 __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B); 1450} 1451 1452extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1453_mm_stream_pd (double *__A, __m128d __B) 1454{ 1455 __builtin_ia32_movntpd (__A, (__v2df)__B); 1456} 1457 1458extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1459_mm_clflush (void const *__A) 1460{ 1461 __builtin_ia32_clflush (__A); 1462} 1463 1464extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1465_mm_lfence (void) 1466{ 1467 __builtin_ia32_lfence (); 1468} 1469 1470extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1471_mm_mfence (void) 1472{ 1473 __builtin_ia32_mfence (); 1474} 1475 1476extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1477_mm_cvtsi32_si128 (int __A) 1478{ 1479 return _mm_set_epi32 (0, 0, 0, __A); 1480} 1481 1482#ifdef __x86_64__ 1483/* Intel intrinsic. */ 1484extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1485_mm_cvtsi64_si128 (long long __A) 1486{ 1487 return _mm_set_epi64x (0, __A); 1488} 1489 1490/* Microsoft intrinsic. */ 1491extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1492_mm_cvtsi64x_si128 (long long __A) 1493{ 1494 return _mm_set_epi64x (0, __A); 1495} 1496#endif 1497 1498/* Casts between various SP, DP, INT vector types. Note that these do no 1499 conversion of values, they just change the type. */ 1500extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1501_mm_castpd_ps(__m128d __A) 1502{ 1503 return (__m128) __A; 1504} 1505 1506extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1507_mm_castpd_si128(__m128d __A) 1508{ 1509 return (__m128i) __A; 1510} 1511 1512extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1513_mm_castps_pd(__m128 __A) 1514{ 1515 return (__m128d) __A; 1516} 1517 1518extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1519_mm_castps_si128(__m128 __A) 1520{ 1521 return (__m128i) __A; 1522} 1523 1524extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1525_mm_castsi128_ps(__m128i __A) 1526{ 1527 return (__m128) __A; 1528} 1529 1530extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 1531_mm_castsi128_pd(__m128i __A) 1532{ 1533 return (__m128d) __A; 1534} 1535 1536#ifdef __DISABLE_SSE2__ 1537#undef __DISABLE_SSE2__ 1538#pragma GCC pop_options 1539#endif /* __DISABLE_SSE2__ */ 1540 1541#endif /* _EMMINTRIN_H_INCLUDED */ 1542