1/* APPLE LOCAL file mainline 2005-06-30 Radar 4131077 */ 2/* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 3 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING. If not, write to 19 the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20 Boston, MA 02110-1301, USA. */ 21 22/* As a special exception, if you include this header file into source 23 files compiled by GCC, this header file does not by itself cause 24 the resulting executable to be covered by the GNU General Public 25 License. This exception does not however invalidate any other 26 reasons why the executable file might be covered by the GNU General 27 Public License. */ 28 29/* Implemented from the specification included in the Intel C++ Compiler 30 User Guide and Reference, version 9.0. */ 31 32#ifndef _XMMINTRIN_H_INCLUDED 33#define _XMMINTRIN_H_INCLUDED 34 35#ifndef __SSE__ 36# error "SSE instruction set not enabled" 37#else 38 39/* We need type definitions from the MMX header file. */ 40#include <mmintrin.h> 41 42/* Get _mm_malloc () and _mm_free (). */ 43/* APPLE LOCAL begin xmmintrin.h for kernel 4123064 */ 44#if __STDC_HOSTED__ 45#include <mm_malloc.h> 46#endif 47/* APPLE LOCAL end xmmintrin.h for kernel 4123064 */ 48 49/* The Intel API is flexible enough that we must allow aliasing with other 50 vector types, and their scalar components. */ 51typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); 52 53/* Internal data types for implementing the intrinsics. */ 54typedef float __v4sf __attribute__ ((__vector_size__ (16))); 55 56#if defined(__clang__) && defined(WITH_SYNTAX_CHECK) 57/* Workaround for "clang -fsyntax-only" happens to use this header, but may 58 * choke on something not supported in clang 59 */ 60int __builtin_ia32_cvtss2si (__v4sf); 61int __builtin_ia32_cvttss2si (__v4sf); 62__m128 __builtin_ia32_addps (__v4sf, __v4sf); 63__m128 __builtin_ia32_addss (__v4sf, __v4sf); 64__m128 __builtin_ia32_addss (__v4sf, __v4sf); 65__m128 __builtin_ia32_addss (__v4sf, __v4sf); 66__m128 __builtin_ia32_andnps (__m128, __m128); 67__m128 __builtin_ia32_andps (__m128, __m128); 68__m128 __builtin_ia32_cmpeqps (__v4sf, __v4sf); 69__m128 __builtin_ia32_cmpeqss (__v4sf, __v4sf); 70__m128 __builtin_ia32_cmpgeps (__v4sf, __v4sf); 71__m128 __builtin_ia32_cmpgtps (__v4sf, __v4sf); 72__m128 __builtin_ia32_cmpleps (__v4sf, __v4sf); 73__m128 __builtin_ia32_cmpless (__v4sf, __v4sf); 74__m128 __builtin_ia32_cmpltps (__v4sf, __v4sf); 75__m128 __builtin_ia32_cmpltss (__v4sf, __v4sf); 76__m128 __builtin_ia32_cmpneqps (__v4sf, __v4sf); 77__m128 __builtin_ia32_cmpneqss (__v4sf, __v4sf); 78__m128 __builtin_ia32_cmpngeps (__v4sf, __v4sf); 79__m128 __builtin_ia32_cmpngtps (__v4sf, __v4sf); 80__m128 __builtin_ia32_cmpnleps (__v4sf, __v4sf); 81__m128 __builtin_ia32_cmpnless (__v4sf, __v4sf); 82__m128 __builtin_ia32_cmpnltps (__v4sf, __v4sf); 83__m128 __builtin_ia32_cmpnltss (__v4sf, __v4sf); 84__m128 __builtin_ia32_cmpordps (__v4sf, __v4sf); 85__m128 __builtin_ia32_cmpordss (__v4sf, __v4sf); 86__m128 __builtin_ia32_cmpunordps (__v4sf, __v4sf); 87__m128 __builtin_ia32_cmpunordss (__v4sf, __v4sf); 88__m128 __builtin_ia32_cvtsi2ss (__v4sf, int); 89__m128 __builtin_ia32_divps (__v4sf, __v4sf); 90__m128 __builtin_ia32_divss (__v4sf, __v4sf); 91__m128 __builtin_ia32_movss (__v4sf, __v4sf); 92__m128 __builtin_ia32_mulps (__v4sf, __v4sf); 93__m128 __builtin_ia32_mulps (__v4sf, __v4sf); 94__m128 __builtin_ia32_mulss (__v4sf, __v4sf); 95__m128 __builtin_ia32_mulss (__v4sf, __v4sf); 96__m128 __builtin_ia32_orps (__m128, __m128); 97__m128 __builtin_ia32_subps (__v4sf, __v4sf); 98__m128 __builtin_ia32_subss (__v4sf, __v4sf); 99__m128 __builtin_ia32_subss (__v4sf, __v4sf); 100__m128 __builtin_ia32_xorps (__m128, __m128); 101__m128 __builtin_ia32_loadhps (__v4sf, const __v2si *); 102__m128 __builtin_ia32_loadlps (__v4sf, const __v2si *); 103__m128 __builtin_ia32_movhlps (__v4sf, __v4sf); 104__m128 __builtin_ia32_movlhps (__v4sf, __v4sf); 105__m128 __builtin_ia32_shufps (__v4sf, __v4sf, int const); 106__m128 __builtin_ia32_unpckhps (__v4sf, __v4sf); 107__m128 __builtin_ia32_unpcklps (__v4sf, __v4sf); 108__m128 __builtin_ia32_loadups (float const *); 109__m64 __builtin_ia32_vec_set_v4hi (__v4hi, int const, int const); 110float __builtin_ia32_vec_ext_v4sf (__v4sf, const int); 111int __builtin_ia32_vec_ext_v4hi (__v4hi, const int); 112long long __builtin_ia32_cvtss2si64 (__v4sf); 113long long __builtin_ia32_cvttss2si64 (__v4sf); 114__m128 __builtin_ia32_cvtsi642ss (__v4sf, long long); 115#endif 116 117/* Create a selector for use with the SHUFPS instruction. */ 118#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ 119 (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0)) 120 121/* Constants for use with _mm_prefetch. */ 122enum _mm_hint 123{ 124 _MM_HINT_T0 = 3, 125 _MM_HINT_T1 = 2, 126 _MM_HINT_T2 = 1, 127 _MM_HINT_NTA = 0 128}; 129 130/* Bits in the MXCSR. */ 131#define _MM_EXCEPT_MASK 0x003f 132#define _MM_EXCEPT_INVALID 0x0001 133#define _MM_EXCEPT_DENORM 0x0002 134#define _MM_EXCEPT_DIV_ZERO 0x0004 135#define _MM_EXCEPT_OVERFLOW 0x0008 136#define _MM_EXCEPT_UNDERFLOW 0x0010 137#define _MM_EXCEPT_INEXACT 0x0020 138 139#define _MM_MASK_MASK 0x1f80 140#define _MM_MASK_INVALID 0x0080 141#define _MM_MASK_DENORM 0x0100 142#define _MM_MASK_DIV_ZERO 0x0200 143#define _MM_MASK_OVERFLOW 0x0400 144#define _MM_MASK_UNDERFLOW 0x0800 145#define _MM_MASK_INEXACT 0x1000 146 147#define _MM_ROUND_MASK 0x6000 148#define _MM_ROUND_NEAREST 0x0000 149#define _MM_ROUND_DOWN 0x2000 150#define _MM_ROUND_UP 0x4000 151#define _MM_ROUND_TOWARD_ZERO 0x6000 152 153#define _MM_FLUSH_ZERO_MASK 0x8000 154#define _MM_FLUSH_ZERO_ON 0x8000 155#define _MM_FLUSH_ZERO_OFF 0x0000 156 157/* APPLE LOCAL begin nodebug inline 4152603 */ 158#define __always_inline__ __always_inline__, __nodebug__ 159/* APPLE LOCAL end nodebug inline 4152603 */ 160 161/* APPLE LOCAL begin radar 5618945 */ 162#undef __STATIC_INLINE 163#ifdef __GNUC_STDC_INLINE__ 164#define __STATIC_INLINE __inline 165#else 166#define __STATIC_INLINE static __inline 167#endif 168/* APPLE LOCAL end radar 5618945 */ 169 170/* Create a vector of zeros. */ 171/* APPLE LOCAL begin radar 4152603 */ 172/* APPLE LOCAL begin radar 5618945 */ 173__STATIC_INLINE __m128 __attribute__((__always_inline__)) 174/* APPLE LOCAL end radar 5618945 */ 175_mm_setzero_ps (void) 176{ 177 return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; 178} 179 180/* Perform the respective operation on the lower SPFP (single-precision 181 floating-point) values of A and B; the upper three SPFP values are 182 passed through from A. */ 183 184/* APPLE LOCAL begin radar 5618945 */ 185__STATIC_INLINE __m128 __attribute__((__always_inline__)) 186/* APPLE LOCAL end radar 5618945 */ 187_mm_add_ss (__m128 __A, __m128 __B) 188{ 189 return (__m128) __builtin_ia32_addss ((__v4sf)__A, (__v4sf)__B); 190} 191 192/* APPLE LOCAL begin radar 5618945 */ 193__STATIC_INLINE __m128 __attribute__((__always_inline__)) 194/* APPLE LOCAL end radar 5618945 */ 195_mm_sub_ss (__m128 __A, __m128 __B) 196{ 197 return (__m128) __builtin_ia32_subss ((__v4sf)__A, (__v4sf)__B); 198} 199 200/* APPLE LOCAL begin radar 5618945 */ 201__STATIC_INLINE __m128 __attribute__((__always_inline__)) 202/* APPLE LOCAL end radar 5618945 */ 203_mm_mul_ss (__m128 __A, __m128 __B) 204{ 205 return (__m128) __builtin_ia32_mulss ((__v4sf)__A, (__v4sf)__B); 206} 207 208/* APPLE LOCAL begin radar 5618945 */ 209__STATIC_INLINE __m128 __attribute__((__always_inline__)) 210/* APPLE LOCAL end radar 5618945 */ 211_mm_div_ss (__m128 __A, __m128 __B) 212{ 213 return (__m128) __builtin_ia32_divss ((__v4sf)__A, (__v4sf)__B); 214} 215 216/* APPLE LOCAL begin radar 5618945 */ 217__STATIC_INLINE __m128 __attribute__((__always_inline__)) 218/* APPLE LOCAL end radar 5618945 */ 219_mm_sqrt_ss (__m128 __A) 220{ 221 return (__m128) __builtin_ia32_sqrtss ((__v4sf)__A); 222} 223 224/* APPLE LOCAL begin radar 5618945 */ 225__STATIC_INLINE __m128 __attribute__((__always_inline__)) 226/* APPLE LOCAL end radar 5618945 */ 227_mm_rcp_ss (__m128 __A) 228{ 229 return (__m128) __builtin_ia32_rcpss ((__v4sf)__A); 230} 231 232/* APPLE LOCAL begin radar 5618945 */ 233__STATIC_INLINE __m128 __attribute__((__always_inline__)) 234/* APPLE LOCAL end radar 5618945 */ 235_mm_rsqrt_ss (__m128 __A) 236{ 237 return (__m128) __builtin_ia32_rsqrtss ((__v4sf)__A); 238} 239 240/* APPLE LOCAL begin radar 5618945 */ 241__STATIC_INLINE __m128 __attribute__((__always_inline__)) 242/* APPLE LOCAL end radar 5618945 */ 243_mm_min_ss (__m128 __A, __m128 __B) 244{ 245 return (__m128) __builtin_ia32_minss ((__v4sf)__A, (__v4sf)__B); 246} 247 248/* APPLE LOCAL begin radar 5618945 */ 249__STATIC_INLINE __m128 __attribute__((__always_inline__)) 250/* APPLE LOCAL end radar 5618945 */ 251_mm_max_ss (__m128 __A, __m128 __B) 252{ 253 return (__m128) __builtin_ia32_maxss ((__v4sf)__A, (__v4sf)__B); 254} 255 256/* Perform the respective operation on the four SPFP values in A and B. */ 257 258/* APPLE LOCAL begin radar 5618945 */ 259__STATIC_INLINE __m128 __attribute__((__always_inline__)) 260/* APPLE LOCAL end radar 5618945 */ 261_mm_add_ps (__m128 __A, __m128 __B) 262{ 263 return (__m128) __builtin_ia32_addps ((__v4sf)__A, (__v4sf)__B); 264} 265 266/* APPLE LOCAL begin radar 5618945 */ 267__STATIC_INLINE __m128 __attribute__((__always_inline__)) 268/* APPLE LOCAL end radar 5618945 */ 269_mm_sub_ps (__m128 __A, __m128 __B) 270{ 271 return (__m128) __builtin_ia32_subps ((__v4sf)__A, (__v4sf)__B); 272} 273 274/* APPLE LOCAL begin radar 5618945 */ 275__STATIC_INLINE __m128 __attribute__((__always_inline__)) 276/* APPLE LOCAL end radar 5618945 */ 277_mm_mul_ps (__m128 __A, __m128 __B) 278{ 279 return (__m128) __builtin_ia32_mulps ((__v4sf)__A, (__v4sf)__B); 280} 281 282/* APPLE LOCAL begin radar 5618945 */ 283__STATIC_INLINE __m128 __attribute__((__always_inline__)) 284/* APPLE LOCAL end radar 5618945 */ 285_mm_div_ps (__m128 __A, __m128 __B) 286{ 287 return (__m128) __builtin_ia32_divps ((__v4sf)__A, (__v4sf)__B); 288} 289 290/* APPLE LOCAL begin radar 5618945 */ 291__STATIC_INLINE __m128 __attribute__((__always_inline__)) 292/* APPLE LOCAL end radar 5618945 */ 293_mm_sqrt_ps (__m128 __A) 294{ 295 return (__m128) __builtin_ia32_sqrtps ((__v4sf)__A); 296} 297 298/* APPLE LOCAL begin radar 5618945 */ 299__STATIC_INLINE __m128 __attribute__((__always_inline__)) 300/* APPLE LOCAL end radar 5618945 */ 301_mm_rcp_ps (__m128 __A) 302{ 303 return (__m128) __builtin_ia32_rcpps ((__v4sf)__A); 304} 305 306/* APPLE LOCAL begin radar 5618945 */ 307__STATIC_INLINE __m128 __attribute__((__always_inline__)) 308/* APPLE LOCAL end radar 5618945 */ 309_mm_rsqrt_ps (__m128 __A) 310{ 311 return (__m128) __builtin_ia32_rsqrtps ((__v4sf)__A); 312} 313 314/* APPLE LOCAL begin radar 5618945 */ 315__STATIC_INLINE __m128 __attribute__((__always_inline__)) 316/* APPLE LOCAL end radar 5618945 */ 317_mm_min_ps (__m128 __A, __m128 __B) 318{ 319 return (__m128) __builtin_ia32_minps ((__v4sf)__A, (__v4sf)__B); 320} 321 322/* APPLE LOCAL begin radar 5618945 */ 323__STATIC_INLINE __m128 __attribute__((__always_inline__)) 324/* APPLE LOCAL end radar 5618945 */ 325_mm_max_ps (__m128 __A, __m128 __B) 326{ 327 return (__m128) __builtin_ia32_maxps ((__v4sf)__A, (__v4sf)__B); 328} 329 330/* Perform logical bit-wise operations on 128-bit values. */ 331 332/* APPLE LOCAL begin radar 5618945 */ 333__STATIC_INLINE __m128 __attribute__((__always_inline__)) 334/* APPLE LOCAL end radar 5618945 */ 335_mm_and_ps (__m128 __A, __m128 __B) 336{ 337 return __builtin_ia32_andps (__A, __B); 338} 339 340/* APPLE LOCAL begin radar 5618945 */ 341__STATIC_INLINE __m128 __attribute__((__always_inline__)) 342/* APPLE LOCAL end radar 5618945 */ 343_mm_andnot_ps (__m128 __A, __m128 __B) 344{ 345 return __builtin_ia32_andnps (__A, __B); 346} 347 348/* APPLE LOCAL begin radar 5618945 */ 349__STATIC_INLINE __m128 __attribute__((__always_inline__)) 350/* APPLE LOCAL end radar 5618945 */ 351_mm_or_ps (__m128 __A, __m128 __B) 352{ 353 return __builtin_ia32_orps (__A, __B); 354} 355 356/* APPLE LOCAL begin radar 5618945 */ 357__STATIC_INLINE __m128 __attribute__((__always_inline__)) 358/* APPLE LOCAL end radar 5618945 */ 359_mm_xor_ps (__m128 __A, __m128 __B) 360{ 361 return __builtin_ia32_xorps (__A, __B); 362} 363 364/* Perform a comparison on the lower SPFP values of A and B. If the 365 comparison is true, place a mask of all ones in the result, otherwise a 366 mask of zeros. The upper three SPFP values are passed through from A. */ 367 368/* APPLE LOCAL begin radar 5618945 */ 369__STATIC_INLINE __m128 __attribute__((__always_inline__)) 370/* APPLE LOCAL end radar 5618945 */ 371_mm_cmpeq_ss (__m128 __A, __m128 __B) 372{ 373 return (__m128) __builtin_ia32_cmpeqss ((__v4sf)__A, (__v4sf)__B); 374} 375 376/* APPLE LOCAL begin radar 5618945 */ 377__STATIC_INLINE __m128 __attribute__((__always_inline__)) 378/* APPLE LOCAL end radar 5618945 */ 379_mm_cmplt_ss (__m128 __A, __m128 __B) 380{ 381 return (__m128) __builtin_ia32_cmpltss ((__v4sf)__A, (__v4sf)__B); 382} 383 384/* APPLE LOCAL begin radar 5618945 */ 385__STATIC_INLINE __m128 __attribute__((__always_inline__)) 386/* APPLE LOCAL end radar 5618945 */ 387_mm_cmple_ss (__m128 __A, __m128 __B) 388{ 389 return (__m128) __builtin_ia32_cmpless ((__v4sf)__A, (__v4sf)__B); 390} 391 392/* APPLE LOCAL begin radar 5618945 */ 393__STATIC_INLINE __m128 __attribute__((__always_inline__)) 394/* APPLE LOCAL end radar 5618945 */ 395_mm_cmpgt_ss (__m128 __A, __m128 __B) 396{ 397 return (__m128) __builtin_ia32_movss ((__v4sf) __A, 398 (__v4sf) 399 __builtin_ia32_cmpltss ((__v4sf) __B, 400 (__v4sf) 401 __A)); 402} 403 404/* APPLE LOCAL begin radar 5618945 */ 405__STATIC_INLINE __m128 __attribute__((__always_inline__)) 406/* APPLE LOCAL end radar 5618945 */ 407_mm_cmpge_ss (__m128 __A, __m128 __B) 408{ 409 return (__m128) __builtin_ia32_movss ((__v4sf) __A, 410 (__v4sf) 411 __builtin_ia32_cmpless ((__v4sf) __B, 412 (__v4sf) 413 __A)); 414} 415 416/* APPLE LOCAL begin radar 5618945 */ 417__STATIC_INLINE __m128 __attribute__((__always_inline__)) 418/* APPLE LOCAL end radar 5618945 */ 419_mm_cmpneq_ss (__m128 __A, __m128 __B) 420{ 421 return (__m128) __builtin_ia32_cmpneqss ((__v4sf)__A, (__v4sf)__B); 422} 423 424/* APPLE LOCAL begin radar 5618945 */ 425__STATIC_INLINE __m128 __attribute__((__always_inline__)) 426/* APPLE LOCAL end radar 5618945 */ 427_mm_cmpnlt_ss (__m128 __A, __m128 __B) 428{ 429 return (__m128) __builtin_ia32_cmpnltss ((__v4sf)__A, (__v4sf)__B); 430} 431 432/* APPLE LOCAL begin radar 5618945 */ 433__STATIC_INLINE __m128 __attribute__((__always_inline__)) 434/* APPLE LOCAL end radar 5618945 */ 435_mm_cmpnle_ss (__m128 __A, __m128 __B) 436{ 437 return (__m128) __builtin_ia32_cmpnless ((__v4sf)__A, (__v4sf)__B); 438} 439 440/* APPLE LOCAL begin radar 5618945 */ 441__STATIC_INLINE __m128 __attribute__((__always_inline__)) 442/* APPLE LOCAL end radar 5618945 */ 443_mm_cmpngt_ss (__m128 __A, __m128 __B) 444{ 445 return (__m128) __builtin_ia32_movss ((__v4sf) __A, 446 (__v4sf) 447 __builtin_ia32_cmpnltss ((__v4sf) __B, 448 (__v4sf) 449 __A)); 450} 451 452/* APPLE LOCAL begin radar 5618945 */ 453__STATIC_INLINE __m128 __attribute__((__always_inline__)) 454/* APPLE LOCAL end radar 5618945 */ 455_mm_cmpnge_ss (__m128 __A, __m128 __B) 456{ 457 return (__m128) __builtin_ia32_movss ((__v4sf) __A, 458 (__v4sf) 459 __builtin_ia32_cmpnless ((__v4sf) __B, 460 (__v4sf) 461 __A)); 462} 463 464/* APPLE LOCAL begin radar 5618945 */ 465__STATIC_INLINE __m128 __attribute__((__always_inline__)) 466/* APPLE LOCAL end radar 5618945 */ 467_mm_cmpord_ss (__m128 __A, __m128 __B) 468{ 469 return (__m128) __builtin_ia32_cmpordss ((__v4sf)__A, (__v4sf)__B); 470} 471 472/* APPLE LOCAL begin radar 5618945 */ 473__STATIC_INLINE __m128 __attribute__((__always_inline__)) 474/* APPLE LOCAL end radar 5618945 */ 475_mm_cmpunord_ss (__m128 __A, __m128 __B) 476{ 477 return (__m128) __builtin_ia32_cmpunordss ((__v4sf)__A, (__v4sf)__B); 478} 479 480/* Perform a comparison on the four SPFP values of A and B. For each 481 element, if the comparison is true, place a mask of all ones in the 482 result, otherwise a mask of zeros. */ 483 484/* APPLE LOCAL begin radar 5618945 */ 485__STATIC_INLINE __m128 __attribute__((__always_inline__)) 486/* APPLE LOCAL end radar 5618945 */ 487_mm_cmpeq_ps (__m128 __A, __m128 __B) 488{ 489 return (__m128) __builtin_ia32_cmpeqps ((__v4sf)__A, (__v4sf)__B); 490} 491 492/* APPLE LOCAL begin radar 5618945 */ 493__STATIC_INLINE __m128 __attribute__((__always_inline__)) 494/* APPLE LOCAL end radar 5618945 */ 495_mm_cmplt_ps (__m128 __A, __m128 __B) 496{ 497 return (__m128) __builtin_ia32_cmpltps ((__v4sf)__A, (__v4sf)__B); 498} 499 500/* APPLE LOCAL begin radar 5618945 */ 501__STATIC_INLINE __m128 __attribute__((__always_inline__)) 502/* APPLE LOCAL end radar 5618945 */ 503_mm_cmple_ps (__m128 __A, __m128 __B) 504{ 505 return (__m128) __builtin_ia32_cmpleps ((__v4sf)__A, (__v4sf)__B); 506} 507 508/* APPLE LOCAL begin radar 5618945 */ 509__STATIC_INLINE __m128 __attribute__((__always_inline__)) 510/* APPLE LOCAL end radar 5618945 */ 511_mm_cmpgt_ps (__m128 __A, __m128 __B) 512{ 513 return (__m128) __builtin_ia32_cmpgtps ((__v4sf)__A, (__v4sf)__B); 514} 515 516/* APPLE LOCAL begin radar 5618945 */ 517__STATIC_INLINE __m128 __attribute__((__always_inline__)) 518/* APPLE LOCAL end radar 5618945 */ 519_mm_cmpge_ps (__m128 __A, __m128 __B) 520{ 521 return (__m128) __builtin_ia32_cmpgeps ((__v4sf)__A, (__v4sf)__B); 522} 523 524/* APPLE LOCAL begin radar 5618945 */ 525__STATIC_INLINE __m128 __attribute__((__always_inline__)) 526/* APPLE LOCAL end radar 5618945 */ 527_mm_cmpneq_ps (__m128 __A, __m128 __B) 528{ 529 return (__m128) __builtin_ia32_cmpneqps ((__v4sf)__A, (__v4sf)__B); 530} 531 532/* APPLE LOCAL begin radar 5618945 */ 533__STATIC_INLINE __m128 __attribute__((__always_inline__)) 534/* APPLE LOCAL end radar 5618945 */ 535_mm_cmpnlt_ps (__m128 __A, __m128 __B) 536{ 537 return (__m128) __builtin_ia32_cmpnltps ((__v4sf)__A, (__v4sf)__B); 538} 539 540/* APPLE LOCAL begin radar 5618945 */ 541__STATIC_INLINE __m128 __attribute__((__always_inline__)) 542/* APPLE LOCAL end radar 5618945 */ 543_mm_cmpnle_ps (__m128 __A, __m128 __B) 544{ 545 return (__m128) __builtin_ia32_cmpnleps ((__v4sf)__A, (__v4sf)__B); 546} 547 548/* APPLE LOCAL begin radar 5618945 */ 549__STATIC_INLINE __m128 __attribute__((__always_inline__)) 550/* APPLE LOCAL end radar 5618945 */ 551_mm_cmpngt_ps (__m128 __A, __m128 __B) 552{ 553 return (__m128) __builtin_ia32_cmpngtps ((__v4sf)__A, (__v4sf)__B); 554} 555 556/* APPLE LOCAL begin radar 5618945 */ 557__STATIC_INLINE __m128 __attribute__((__always_inline__)) 558/* APPLE LOCAL end radar 5618945 */ 559_mm_cmpnge_ps (__m128 __A, __m128 __B) 560{ 561 return (__m128) __builtin_ia32_cmpngeps ((__v4sf)__A, (__v4sf)__B); 562} 563 564/* APPLE LOCAL begin radar 5618945 */ 565__STATIC_INLINE __m128 __attribute__((__always_inline__)) 566/* APPLE LOCAL end radar 5618945 */ 567_mm_cmpord_ps (__m128 __A, __m128 __B) 568{ 569 return (__m128) __builtin_ia32_cmpordps ((__v4sf)__A, (__v4sf)__B); 570} 571 572/* APPLE LOCAL begin radar 5618945 */ 573__STATIC_INLINE __m128 __attribute__((__always_inline__)) 574/* APPLE LOCAL end radar 5618945 */ 575_mm_cmpunord_ps (__m128 __A, __m128 __B) 576{ 577 return (__m128) __builtin_ia32_cmpunordps ((__v4sf)__A, (__v4sf)__B); 578} 579 580/* Compare the lower SPFP values of A and B and return 1 if true 581 and 0 if false. */ 582 583/* APPLE LOCAL begin radar 5618945 */ 584__STATIC_INLINE int __attribute__((__always_inline__)) 585/* APPLE LOCAL end radar 5618945 */ 586_mm_comieq_ss (__m128 __A, __m128 __B) 587{ 588 return __builtin_ia32_comieq ((__v4sf)__A, (__v4sf)__B); 589} 590 591/* APPLE LOCAL begin radar 5618945 */ 592__STATIC_INLINE int __attribute__((__always_inline__)) 593/* APPLE LOCAL end radar 5618945 */ 594_mm_comilt_ss (__m128 __A, __m128 __B) 595{ 596 return __builtin_ia32_comilt ((__v4sf)__A, (__v4sf)__B); 597} 598 599/* APPLE LOCAL begin radar 5618945 */ 600__STATIC_INLINE int __attribute__((__always_inline__)) 601/* APPLE LOCAL end radar 5618945 */ 602_mm_comile_ss (__m128 __A, __m128 __B) 603{ 604 return __builtin_ia32_comile ((__v4sf)__A, (__v4sf)__B); 605} 606 607/* APPLE LOCAL begin radar 5618945 */ 608__STATIC_INLINE int __attribute__((__always_inline__)) 609/* APPLE LOCAL end radar 5618945 */ 610_mm_comigt_ss (__m128 __A, __m128 __B) 611{ 612 return __builtin_ia32_comigt ((__v4sf)__A, (__v4sf)__B); 613} 614 615/* APPLE LOCAL begin radar 5618945 */ 616__STATIC_INLINE int __attribute__((__always_inline__)) 617/* APPLE LOCAL end radar 5618945 */ 618_mm_comige_ss (__m128 __A, __m128 __B) 619{ 620 return __builtin_ia32_comige ((__v4sf)__A, (__v4sf)__B); 621} 622 623/* APPLE LOCAL begin radar 5618945 */ 624__STATIC_INLINE int __attribute__((__always_inline__)) 625/* APPLE LOCAL end radar 5618945 */ 626_mm_comineq_ss (__m128 __A, __m128 __B) 627{ 628 return __builtin_ia32_comineq ((__v4sf)__A, (__v4sf)__B); 629} 630 631/* APPLE LOCAL begin radar 5618945 */ 632__STATIC_INLINE int __attribute__((__always_inline__)) 633/* APPLE LOCAL end radar 5618945 */ 634_mm_ucomieq_ss (__m128 __A, __m128 __B) 635{ 636 return __builtin_ia32_ucomieq ((__v4sf)__A, (__v4sf)__B); 637} 638 639/* APPLE LOCAL begin radar 5618945 */ 640__STATIC_INLINE int __attribute__((__always_inline__)) 641/* APPLE LOCAL end radar 5618945 */ 642_mm_ucomilt_ss (__m128 __A, __m128 __B) 643{ 644 return __builtin_ia32_ucomilt ((__v4sf)__A, (__v4sf)__B); 645} 646 647/* APPLE LOCAL begin radar 5618945 */ 648__STATIC_INLINE int __attribute__((__always_inline__)) 649/* APPLE LOCAL end radar 5618945 */ 650_mm_ucomile_ss (__m128 __A, __m128 __B) 651{ 652 return __builtin_ia32_ucomile ((__v4sf)__A, (__v4sf)__B); 653} 654 655/* APPLE LOCAL begin radar 5618945 */ 656__STATIC_INLINE int __attribute__((__always_inline__)) 657/* APPLE LOCAL end radar 5618945 */ 658_mm_ucomigt_ss (__m128 __A, __m128 __B) 659{ 660 return __builtin_ia32_ucomigt ((__v4sf)__A, (__v4sf)__B); 661} 662 663/* APPLE LOCAL begin radar 5618945 */ 664__STATIC_INLINE int __attribute__((__always_inline__)) 665/* APPLE LOCAL end radar 5618945 */ 666_mm_ucomige_ss (__m128 __A, __m128 __B) 667{ 668 return __builtin_ia32_ucomige ((__v4sf)__A, (__v4sf)__B); 669} 670 671/* APPLE LOCAL begin radar 5618945 */ 672__STATIC_INLINE int __attribute__((__always_inline__)) 673/* APPLE LOCAL end radar 5618945 */ 674_mm_ucomineq_ss (__m128 __A, __m128 __B) 675{ 676 return __builtin_ia32_ucomineq ((__v4sf)__A, (__v4sf)__B); 677} 678 679/* Convert the lower SPFP value to a 32-bit integer according to the current 680 rounding mode. */ 681/* APPLE LOCAL begin radar 5618945 */ 682__STATIC_INLINE int __attribute__((__always_inline__)) 683/* APPLE LOCAL end radar 5618945 */ 684_mm_cvtss_si32 (__m128 __A) 685{ 686 return __builtin_ia32_cvtss2si ((__v4sf) __A); 687} 688 689/* APPLE LOCAL begin radar 5618945 */ 690__STATIC_INLINE int __attribute__((__always_inline__)) 691/* APPLE LOCAL end radar 5618945 */ 692_mm_cvt_ss2si (__m128 __A) 693{ 694 return _mm_cvtss_si32 (__A); 695} 696 697#ifdef __x86_64__ 698/* Convert the lower SPFP value to a 32-bit integer according to the 699 current rounding mode. */ 700 701/* Intel intrinsic. */ 702/* APPLE LOCAL begin radar 5618945 */ 703__STATIC_INLINE long long __attribute__((__always_inline__)) 704/* APPLE LOCAL end radar 5618945 */ 705_mm_cvtss_si64 (__m128 __A) 706{ 707 return __builtin_ia32_cvtss2si64 ((__v4sf) __A); 708} 709 710/* Microsoft intrinsic. */ 711/* APPLE LOCAL begin radar 5618945 */ 712__STATIC_INLINE long long __attribute__((__always_inline__)) 713/* APPLE LOCAL end radar 5618945 */ 714_mm_cvtss_si64x (__m128 __A) 715{ 716 return __builtin_ia32_cvtss2si64 ((__v4sf) __A); 717} 718#endif 719 720/* Convert the two lower SPFP values to 32-bit integers according to the 721 current rounding mode. Return the integers in packed form. */ 722/* APPLE LOCAL begin radar 5618945 */ 723__STATIC_INLINE __m64 __attribute__((__always_inline__)) 724/* APPLE LOCAL end radar 5618945 */ 725_mm_cvtps_pi32 (__m128 __A) 726{ 727 return (__m64) __builtin_ia32_cvtps2pi ((__v4sf) __A); 728} 729 730/* APPLE LOCAL begin radar 5618945 */ 731__STATIC_INLINE __m64 __attribute__((__always_inline__)) 732/* APPLE LOCAL end radar 5618945 */ 733_mm_cvt_ps2pi (__m128 __A) 734{ 735 return _mm_cvtps_pi32 (__A); 736} 737 738/* Truncate the lower SPFP value to a 32-bit integer. */ 739/* APPLE LOCAL begin radar 5618945 */ 740__STATIC_INLINE int __attribute__((__always_inline__)) 741/* APPLE LOCAL end radar 5618945 */ 742_mm_cvttss_si32 (__m128 __A) 743{ 744 return __builtin_ia32_cvttss2si ((__v4sf) __A); 745} 746 747/* APPLE LOCAL begin radar 5618945 */ 748__STATIC_INLINE int __attribute__((__always_inline__)) 749/* APPLE LOCAL end radar 5618945 */ 750_mm_cvtt_ss2si (__m128 __A) 751{ 752 return _mm_cvttss_si32 (__A); 753} 754 755#ifdef __x86_64__ 756/* Truncate the lower SPFP value to a 32-bit integer. */ 757 758/* Intel intrinsic. */ 759/* APPLE LOCAL begin radar 5618945 */ 760__STATIC_INLINE long long __attribute__((__always_inline__)) 761/* APPLE LOCAL end radar 5618945 */ 762_mm_cvttss_si64 (__m128 __A) 763{ 764 return __builtin_ia32_cvttss2si64 ((__v4sf) __A); 765} 766 767/* Microsoft intrinsic. */ 768/* APPLE LOCAL begin radar 5618945 */ 769__STATIC_INLINE long long __attribute__((__always_inline__)) 770/* APPLE LOCAL end radar 5618945 */ 771_mm_cvttss_si64x (__m128 __A) 772{ 773 return __builtin_ia32_cvttss2si64 ((__v4sf) __A); 774} 775#endif 776 777/* Truncate the two lower SPFP values to 32-bit integers. Return the 778 integers in packed form. */ 779/* APPLE LOCAL begin radar 5618945 */ 780__STATIC_INLINE __m64 __attribute__((__always_inline__)) 781/* APPLE LOCAL end radar 5618945 */ 782_mm_cvttps_pi32 (__m128 __A) 783{ 784 return (__m64) __builtin_ia32_cvttps2pi ((__v4sf) __A); 785} 786 787/* APPLE LOCAL begin radar 5618945 */ 788__STATIC_INLINE __m64 __attribute__((__always_inline__)) 789/* APPLE LOCAL end radar 5618945 */ 790_mm_cvtt_ps2pi (__m128 __A) 791{ 792 return _mm_cvttps_pi32 (__A); 793} 794 795/* Convert B to a SPFP value and insert it as element zero in A. */ 796/* APPLE LOCAL begin radar 5618945 */ 797__STATIC_INLINE __m128 __attribute__((__always_inline__)) 798/* APPLE LOCAL end radar 5618945 */ 799_mm_cvtsi32_ss (__m128 __A, int __B) 800{ 801 return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B); 802} 803 804/* APPLE LOCAL begin radar 5618945 */ 805__STATIC_INLINE __m128 __attribute__((__always_inline__)) 806/* APPLE LOCAL end radar 5618945 */ 807_mm_cvt_si2ss (__m128 __A, int __B) 808{ 809 return _mm_cvtsi32_ss (__A, __B); 810} 811 812#ifdef __x86_64__ 813/* Convert B to a SPFP value and insert it as element zero in A. */ 814 815/* Intel intrinsic. */ 816/* APPLE LOCAL begin radar 5618945 */ 817__STATIC_INLINE __m128 __attribute__((__always_inline__)) 818/* APPLE LOCAL end radar 5618945 */ 819_mm_cvtsi64_ss (__m128 __A, long long __B) 820{ 821 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); 822} 823 824/* Microsoft intrinsic. */ 825/* APPLE LOCAL begin radar 5618945 */ 826__STATIC_INLINE __m128 __attribute__((__always_inline__)) 827/* APPLE LOCAL end radar 5618945 */ 828_mm_cvtsi64x_ss (__m128 __A, long long __B) 829{ 830 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B); 831} 832#endif 833 834/* Convert the two 32-bit values in B to SPFP form and insert them 835 as the two lower elements in A. */ 836/* APPLE LOCAL begin radar 5618945 */ 837__STATIC_INLINE __m128 __attribute__((__always_inline__)) 838/* APPLE LOCAL end radar 5618945 */ 839_mm_cvtpi32_ps (__m128 __A, __m64 __B) 840{ 841 return (__m128) __builtin_ia32_cvtpi2ps ((__v4sf) __A, (__v2si)__B); 842} 843 844/* APPLE LOCAL begin radar 5618945 */ 845__STATIC_INLINE __m128 __attribute__((__always_inline__)) 846/* APPLE LOCAL end radar 5618945 */ 847_mm_cvt_pi2ps (__m128 __A, __m64 __B) 848{ 849 return _mm_cvtpi32_ps (__A, __B); 850} 851 852/* Convert the four signed 16-bit values in A to SPFP form. */ 853/* APPLE LOCAL begin radar 5618945 */ 854__STATIC_INLINE __m128 __attribute__((__always_inline__)) 855/* APPLE LOCAL end radar 5618945 */ 856_mm_cvtpi16_ps (__m64 __A) 857{ 858 __v4hi __sign; 859 __v2si __hisi, __losi; 860 __v4sf __r; 861 862 /* This comparison against zero gives us a mask that can be used to 863 fill in the missing sign bits in the unpack operations below, so 864 that we get signed values after unpacking. */ 865 __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A); 866 867 /* Convert the four words to doublewords. */ 868 __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign); 869 __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign); 870 871 /* Convert the doublewords to floating point two at a time. */ 872 __r = (__v4sf) _mm_setzero_ps (); 873 __r = __builtin_ia32_cvtpi2ps (__r, __hisi); 874 __r = __builtin_ia32_movlhps (__r, __r); 875 __r = __builtin_ia32_cvtpi2ps (__r, __losi); 876 877 return (__m128) __r; 878} 879 880/* Convert the four unsigned 16-bit values in A to SPFP form. */ 881/* APPLE LOCAL begin radar 5618945 */ 882__STATIC_INLINE __m128 __attribute__((__always_inline__)) 883/* APPLE LOCAL end radar 5618945 */ 884_mm_cvtpu16_ps (__m64 __A) 885{ 886 __v2si __hisi, __losi; 887 __v4sf __r; 888 889 /* Convert the four words to doublewords. */ 890 __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL); 891 __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL); 892 893 /* Convert the doublewords to floating point two at a time. */ 894 __r = (__v4sf) _mm_setzero_ps (); 895 __r = __builtin_ia32_cvtpi2ps (__r, __hisi); 896 __r = __builtin_ia32_movlhps (__r, __r); 897 __r = __builtin_ia32_cvtpi2ps (__r, __losi); 898 899 return (__m128) __r; 900} 901 902/* Convert the low four signed 8-bit values in A to SPFP form. */ 903/* APPLE LOCAL begin radar 5618945 */ 904__STATIC_INLINE __m128 __attribute__((__always_inline__)) 905/* APPLE LOCAL end radar 5618945 */ 906_mm_cvtpi8_ps (__m64 __A) 907{ 908 __v8qi __sign; 909 910 /* This comparison against zero gives us a mask that can be used to 911 fill in the missing sign bits in the unpack operations below, so 912 that we get signed values after unpacking. */ 913 __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A); 914 915 /* Convert the four low bytes to words. */ 916 __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign); 917 918 return _mm_cvtpi16_ps(__A); 919} 920 921/* Convert the low four unsigned 8-bit values in A to SPFP form. */ 922/* APPLE LOCAL begin radar 5618945 */ 923__STATIC_INLINE __m128 __attribute__((__always_inline__)) 924/* APPLE LOCAL end radar 5618945 */ 925_mm_cvtpu8_ps(__m64 __A) 926{ 927 __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL); 928 return _mm_cvtpu16_ps(__A); 929} 930 931/* Convert the four signed 32-bit values in A and B to SPFP form. */ 932/* APPLE LOCAL begin radar 5618945 */ 933__STATIC_INLINE __m128 __attribute__((__always_inline__)) 934/* APPLE LOCAL end radar 5618945 */ 935_mm_cvtpi32x2_ps(__m64 __A, __m64 __B) 936{ 937 __v4sf __zero = (__v4sf) _mm_setzero_ps (); 938 __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A); 939 __v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B); 940 return (__m128) __builtin_ia32_movlhps (__sfa, __sfb); 941} 942 943/* Convert the four SPFP values in A to four signed 16-bit integers. */ 944/* APPLE LOCAL begin radar 5618945 */ 945__STATIC_INLINE __m64 __attribute__((__always_inline__)) 946/* APPLE LOCAL end radar 5618945 */ 947_mm_cvtps_pi16(__m128 __A) 948{ 949 __v4sf __hisf = (__v4sf)__A; 950 __v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf); 951 __v2si __hisi = __builtin_ia32_cvtps2pi (__hisf); 952 __v2si __losi = __builtin_ia32_cvtps2pi (__losf); 953 return (__m64) __builtin_ia32_packssdw (__hisi, __losi); 954} 955 956/* Convert the four SPFP values in A to four signed 8-bit integers. */ 957/* APPLE LOCAL begin radar 5618945 */ 958__STATIC_INLINE __m64 __attribute__((__always_inline__)) 959/* APPLE LOCAL end radar 5618945 */ 960_mm_cvtps_pi8(__m128 __A) 961{ 962 __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A); 963 return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL); 964} 965 966/* Selects four specific SPFP values from A and B based on MASK. */ 967#if 0 968/* APPLE LOCAL begin radar 5618945 */ 969__STATIC_INLINE __m128 __attribute__((__always_inline__)) 970/* APPLE LOCAL end radar 5618945 */ 971_mm_shuffle_ps (__m128 __A, __m128 __B, int __mask) 972{ 973 return (__m128) __builtin_ia32_shufps ((__v4sf)__A, (__v4sf)__B, __mask); 974} 975#else 976#define _mm_shuffle_ps(A, B, MASK) \ 977 ((__m128) __builtin_ia32_shufps ((__v4sf)(A), (__v4sf)(B), (MASK))) 978#endif 979 980 981/* Selects and interleaves the upper two SPFP values from A and B. */ 982/* APPLE LOCAL begin radar 5618945 */ 983__STATIC_INLINE __m128 __attribute__((__always_inline__)) 984/* APPLE LOCAL end radar 5618945 */ 985_mm_unpackhi_ps (__m128 __A, __m128 __B) 986{ 987 return (__m128) __builtin_ia32_unpckhps ((__v4sf)__A, (__v4sf)__B); 988} 989 990/* Selects and interleaves the lower two SPFP values from A and B. */ 991/* APPLE LOCAL begin radar 5618945 */ 992__STATIC_INLINE __m128 __attribute__((__always_inline__)) 993/* APPLE LOCAL end radar 5618945 */ 994_mm_unpacklo_ps (__m128 __A, __m128 __B) 995{ 996 return (__m128) __builtin_ia32_unpcklps ((__v4sf)__A, (__v4sf)__B); 997} 998 999/* Sets the upper two SPFP values with 64-bits of data loaded from P; 1000 the lower two values are passed through from A. */ 1001/* APPLE LOCAL begin radar 5618945 */ 1002__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1003/* APPLE LOCAL end radar 5618945 */ 1004_mm_loadh_pi (__m128 __A, __m64 const *__P) 1005{ 1006 return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (__v2si *)__P); 1007} 1008 1009/* Stores the upper two SPFP values of A into P. */ 1010/* APPLE LOCAL begin radar 5618945 */ 1011__STATIC_INLINE void __attribute__((__always_inline__)) 1012/* APPLE LOCAL end radar 5618945 */ 1013_mm_storeh_pi (__m64 *__P, __m128 __A) 1014{ 1015 __builtin_ia32_storehps ((__v2si *)__P, (__v4sf)__A); 1016} 1017 1018/* Moves the upper two values of B into the lower two values of A. */ 1019/* APPLE LOCAL begin radar 5618945 */ 1020__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1021/* APPLE LOCAL end radar 5618945 */ 1022_mm_movehl_ps (__m128 __A, __m128 __B) 1023{ 1024 return (__m128) __builtin_ia32_movhlps ((__v4sf)__A, (__v4sf)__B); 1025} 1026 1027/* Moves the lower two values of B into the upper two values of A. */ 1028/* APPLE LOCAL begin radar 5618945 */ 1029__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1030/* APPLE LOCAL end radar 5618945 */ 1031_mm_movelh_ps (__m128 __A, __m128 __B) 1032{ 1033 return (__m128) __builtin_ia32_movlhps ((__v4sf)__A, (__v4sf)__B); 1034} 1035 1036/* Sets the lower two SPFP values with 64-bits of data loaded from P; 1037 the upper two values are passed through from A. */ 1038/* APPLE LOCAL begin radar 5618945 */ 1039__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1040/* APPLE LOCAL end radar 5618945 */ 1041_mm_loadl_pi (__m128 __A, __m64 const *__P) 1042{ 1043 return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (__v2si *)__P); 1044} 1045 1046/* Stores the lower two SPFP values of A into P. */ 1047/* APPLE LOCAL begin radar 5618945 */ 1048__STATIC_INLINE void __attribute__((__always_inline__)) 1049/* APPLE LOCAL end radar 5618945 */ 1050_mm_storel_pi (__m64 *__P, __m128 __A) 1051{ 1052 __builtin_ia32_storelps ((__v2si *)__P, (__v4sf)__A); 1053} 1054 1055/* Creates a 4-bit mask from the most significant bits of the SPFP values. */ 1056/* APPLE LOCAL begin radar 5618945 */ 1057__STATIC_INLINE int __attribute__((__always_inline__)) 1058/* APPLE LOCAL end radar 5618945 */ 1059_mm_movemask_ps (__m128 __A) 1060{ 1061 return __builtin_ia32_movmskps ((__v4sf)__A); 1062} 1063 1064/* Return the contents of the control register. */ 1065/* APPLE LOCAL begin radar 5618945 */ 1066__STATIC_INLINE unsigned int __attribute__((__always_inline__)) 1067/* APPLE LOCAL end radar 5618945 */ 1068_mm_getcsr (void) 1069{ 1070 return __builtin_ia32_stmxcsr (); 1071} 1072 1073/* Read exception bits from the control register. */ 1074/* APPLE LOCAL begin radar 5618945 */ 1075__STATIC_INLINE unsigned int __attribute__((__always_inline__)) 1076/* APPLE LOCAL end radar 5618945 */ 1077_MM_GET_EXCEPTION_STATE (void) 1078{ 1079 return _mm_getcsr() & _MM_EXCEPT_MASK; 1080} 1081 1082/* APPLE LOCAL begin radar 5618945 */ 1083__STATIC_INLINE unsigned int __attribute__((__always_inline__)) 1084/* APPLE LOCAL end radar 5618945 */ 1085_MM_GET_EXCEPTION_MASK (void) 1086{ 1087 return _mm_getcsr() & _MM_MASK_MASK; 1088} 1089 1090/* APPLE LOCAL begin radar 5618945 */ 1091__STATIC_INLINE unsigned int __attribute__((__always_inline__)) 1092/* APPLE LOCAL end radar 5618945 */ 1093_MM_GET_ROUNDING_MODE (void) 1094{ 1095 return _mm_getcsr() & _MM_ROUND_MASK; 1096} 1097 1098/* APPLE LOCAL begin radar 5618945 */ 1099__STATIC_INLINE unsigned int __attribute__((__always_inline__)) 1100/* APPLE LOCAL end radar 5618945 */ 1101_MM_GET_FLUSH_ZERO_MODE (void) 1102{ 1103 return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; 1104} 1105 1106/* Set the control register to I. */ 1107/* APPLE LOCAL begin radar 5618945 */ 1108__STATIC_INLINE void __attribute__((__always_inline__)) 1109/* APPLE LOCAL end radar 5618945 */ 1110_mm_setcsr (unsigned int __I) 1111{ 1112 __builtin_ia32_ldmxcsr (__I); 1113} 1114 1115/* Set exception bits in the control register. */ 1116/* APPLE LOCAL begin radar 5618945 */ 1117__STATIC_INLINE void __attribute__((__always_inline__)) 1118/* APPLE LOCAL end radar 5618945 */ 1119_MM_SET_EXCEPTION_STATE(unsigned int __mask) 1120{ 1121 _mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | __mask); 1122} 1123 1124/* APPLE LOCAL begin radar 5618945 */ 1125__STATIC_INLINE void __attribute__((__always_inline__)) 1126/* APPLE LOCAL end radar 5618945 */ 1127_MM_SET_EXCEPTION_MASK (unsigned int __mask) 1128{ 1129 _mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | __mask); 1130} 1131 1132/* APPLE LOCAL begin radar 5618945 */ 1133__STATIC_INLINE void __attribute__((__always_inline__)) 1134/* APPLE LOCAL end radar 5618945 */ 1135_MM_SET_ROUNDING_MODE (unsigned int __mode) 1136{ 1137 _mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | __mode); 1138} 1139 1140/* APPLE LOCAL begin radar 5618945 */ 1141__STATIC_INLINE void __attribute__((__always_inline__)) 1142/* APPLE LOCAL end radar 5618945 */ 1143_MM_SET_FLUSH_ZERO_MODE (unsigned int __mode) 1144{ 1145 _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode); 1146} 1147 1148/* Create a vector with element 0 as F and the rest zero. */ 1149/* APPLE LOCAL begin radar 5618945 */ 1150__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1151/* APPLE LOCAL end radar 5618945 */ 1152_mm_set_ss (float __F) 1153{ 1154 return __extension__ (__m128)(__v4sf){ __F, 0, 0, 0 }; 1155} 1156 1157/* Create a vector with all four elements equal to F. */ 1158/* APPLE LOCAL begin radar 5618945 */ 1159__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1160/* APPLE LOCAL end radar 5618945 */ 1161_mm_set1_ps (float __F) 1162{ 1163 return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F }; 1164} 1165 1166/* APPLE LOCAL begin radar 5618945 */ 1167__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1168/* APPLE LOCAL end radar 5618945 */ 1169_mm_set_ps1 (float __F) 1170{ 1171 return _mm_set1_ps (__F); 1172} 1173 1174/* Create a vector with element 0 as *P and the rest zero. */ 1175/* APPLE LOCAL begin radar 5618945 */ 1176__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1177/* APPLE LOCAL end radar 5618945 */ 1178_mm_load_ss (float const *__P) 1179{ 1180 return _mm_set_ss (*__P); 1181} 1182 1183/* Create a vector with all four elements equal to *P. */ 1184/* APPLE LOCAL begin radar 5618945 */ 1185__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1186/* APPLE LOCAL end radar 5618945 */ 1187_mm_load1_ps (float const *__P) 1188{ 1189 return _mm_set1_ps (*__P); 1190} 1191 1192/* APPLE LOCAL begin radar 5618945 */ 1193__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1194/* APPLE LOCAL end radar 5618945 */ 1195_mm_load_ps1 (float const *__P) 1196{ 1197 return _mm_load1_ps (__P); 1198} 1199 1200/* Load four SPFP values from P. The address must be 16-byte aligned. */ 1201/* APPLE LOCAL begin radar 5618945 */ 1202__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1203/* APPLE LOCAL end radar 5618945 */ 1204_mm_load_ps (float const *__P) 1205{ 1206 return (__m128) *(__v4sf *)__P; 1207} 1208 1209/* Load four SPFP values from P. The address need not be 16-byte aligned. */ 1210/* APPLE LOCAL begin radar 5618945 */ 1211__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1212/* APPLE LOCAL end radar 5618945 */ 1213_mm_loadu_ps (float const *__P) 1214{ 1215 return (__m128) __builtin_ia32_loadups (__P); 1216} 1217 1218/* Load four SPFP values in reverse order. The address must be aligned. */ 1219/* APPLE LOCAL begin radar 5618945 */ 1220__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1221/* APPLE LOCAL end radar 5618945 */ 1222_mm_loadr_ps (float const *__P) 1223{ 1224 __v4sf __tmp = *(__v4sf *)__P; 1225 return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3)); 1226} 1227 1228/* Create the vector [Z Y X W]. */ 1229/* APPLE LOCAL begin radar 5618945 */ 1230__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1231/* APPLE LOCAL end radar 5618945 */ 1232_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) 1233{ 1234 return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; 1235} 1236 1237/* Create the vector [W X Y Z]. */ 1238/* APPLE LOCAL begin radar 5618945 */ 1239__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1240/* APPLE LOCAL end radar 5618945 */ 1241_mm_setr_ps (float __Z, float __Y, float __X, float __W) 1242{ 1243 return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W }; 1244} 1245 1246/* Stores the lower SPFP value. */ 1247/* APPLE LOCAL begin radar 5618945 */ 1248__STATIC_INLINE void __attribute__((__always_inline__)) 1249/* APPLE LOCAL end radar 5618945 */ 1250_mm_store_ss (float *__P, __m128 __A) 1251{ 1252 *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); 1253} 1254 1255/* APPLE LOCAL begin radar 5618945 */ 1256__STATIC_INLINE float __attribute__((__always_inline__)) 1257/* APPLE LOCAL end radar 5618945 */ 1258_mm_cvtss_f32 (__m128 __A) 1259{ 1260 return __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0); 1261} 1262 1263/* Store four SPFP values. The address must be 16-byte aligned. */ 1264/* APPLE LOCAL begin radar 5618945 */ 1265__STATIC_INLINE void __attribute__((__always_inline__)) 1266/* APPLE LOCAL end radar 5618945 */ 1267_mm_store_ps (float *__P, __m128 __A) 1268{ 1269 *(__v4sf *)__P = (__v4sf)__A; 1270} 1271 1272/* Store four SPFP values. The address need not be 16-byte aligned. */ 1273/* APPLE LOCAL begin radar 5618945 */ 1274__STATIC_INLINE void __attribute__((__always_inline__)) 1275/* APPLE LOCAL end radar 5618945 */ 1276_mm_storeu_ps (float *__P, __m128 __A) 1277{ 1278 __builtin_ia32_storeups (__P, (__v4sf)__A); 1279} 1280 1281/* Store the lower SPFP value across four words. */ 1282/* APPLE LOCAL begin radar 5618945 */ 1283__STATIC_INLINE void __attribute__((__always_inline__)) 1284/* APPLE LOCAL end radar 5618945 */ 1285_mm_store1_ps (float *__P, __m128 __A) 1286{ 1287 __v4sf __va = (__v4sf)__A; 1288 __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0)); 1289 _mm_storeu_ps (__P, __tmp); 1290} 1291 1292/* APPLE LOCAL begin radar 5618945 */ 1293__STATIC_INLINE void __attribute__((__always_inline__)) 1294/* APPLE LOCAL end radar 5618945 */ 1295_mm_store_ps1 (float *__P, __m128 __A) 1296{ 1297 _mm_store1_ps (__P, __A); 1298} 1299 1300/* Store four SPFP values in reverse order. The address must be aligned. */ 1301/* APPLE LOCAL begin radar 5618945 */ 1302__STATIC_INLINE void __attribute__((__always_inline__)) 1303/* APPLE LOCAL end radar 5618945 */ 1304_mm_storer_ps (float *__P, __m128 __A) 1305{ 1306 __v4sf __va = (__v4sf)__A; 1307 __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3)); 1308 _mm_store_ps (__P, __tmp); 1309} 1310 1311/* Sets the low SPFP value of A from the low value of B. */ 1312/* APPLE LOCAL begin radar 5618945 */ 1313__STATIC_INLINE __m128 __attribute__((__always_inline__)) 1314/* APPLE LOCAL end radar 5618945 */ 1315_mm_move_ss (__m128 __A, __m128 __B) 1316{ 1317 return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B); 1318} 1319 1320/* Extracts one of the four words of A. The selector N must be immediate. */ 1321#if 0 1322/* APPLE LOCAL begin radar 5618945 */ 1323__STATIC_INLINE int __attribute__((__always_inline__)) 1324/* APPLE LOCAL end radar 5618945 */ 1325_mm_extract_pi16 (__m64 const __A, int const __N) 1326{ 1327 return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N); 1328} 1329 1330/* APPLE LOCAL begin radar 5618945 */ 1331__STATIC_INLINE int __attribute__((__always_inline__)) 1332/* APPLE LOCAL end radar 5618945 */ 1333_m_pextrw (__m64 const __A, int const __N) 1334{ 1335 return _mm_extract_pi16 (__A, __N); 1336} 1337#else 1338#define _mm_extract_pi16(A, N) __builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N)) 1339#define _m_pextrw(A, N) _mm_extract_pi16((A), (N)) 1340#endif 1341 1342/* Inserts word D into one of four words of A. The selector N must be 1343 immediate. */ 1344#if 0 1345/* APPLE LOCAL begin radar 5618945 */ 1346__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1347/* APPLE LOCAL end radar 5618945 */ 1348_mm_insert_pi16 (__m64 const __A, int const __D, int const __N) 1349{ 1350 return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N); 1351} 1352 1353/* APPLE LOCAL begin radar 5618945 */ 1354__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1355/* APPLE LOCAL end radar 5618945 */ 1356_m_pinsrw (__m64 const __A, int const __D, int const __N) 1357{ 1358 return _mm_insert_pi16 (__A, __D, __N); 1359} 1360#else 1361#define _mm_insert_pi16(A, D, N) \ 1362 ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N))) 1363#define _m_pinsrw(A, D, N) _mm_insert_pi16((A), (D), (N)) 1364#endif 1365 1366/* Compute the element-wise maximum of signed 16-bit values. */ 1367/* APPLE LOCAL begin radar 5618945 */ 1368__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1369/* APPLE LOCAL end radar 5618945 */ 1370_mm_max_pi16 (__m64 __A, __m64 __B) 1371{ 1372 return (__m64) __builtin_ia32_pmaxsw ((__v4hi)__A, (__v4hi)__B); 1373} 1374 1375/* APPLE LOCAL begin radar 5618945 */ 1376__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1377/* APPLE LOCAL end radar 5618945 */ 1378_m_pmaxsw (__m64 __A, __m64 __B) 1379{ 1380 return _mm_max_pi16 (__A, __B); 1381} 1382 1383/* Compute the element-wise maximum of unsigned 8-bit values. */ 1384/* APPLE LOCAL begin radar 5618945 */ 1385__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1386/* APPLE LOCAL end radar 5618945 */ 1387_mm_max_pu8 (__m64 __A, __m64 __B) 1388{ 1389 return (__m64) __builtin_ia32_pmaxub ((__v8qi)__A, (__v8qi)__B); 1390} 1391 1392/* APPLE LOCAL begin radar 5618945 */ 1393__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1394/* APPLE LOCAL end radar 5618945 */ 1395_m_pmaxub (__m64 __A, __m64 __B) 1396{ 1397 return _mm_max_pu8 (__A, __B); 1398} 1399 1400/* Compute the element-wise minimum of signed 16-bit values. */ 1401/* APPLE LOCAL begin radar 5618945 */ 1402__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1403/* APPLE LOCAL end radar 5618945 */ 1404_mm_min_pi16 (__m64 __A, __m64 __B) 1405{ 1406 return (__m64) __builtin_ia32_pminsw ((__v4hi)__A, (__v4hi)__B); 1407} 1408 1409/* APPLE LOCAL begin radar 5618945 */ 1410__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1411/* APPLE LOCAL end radar 5618945 */ 1412_m_pminsw (__m64 __A, __m64 __B) 1413{ 1414 return _mm_min_pi16 (__A, __B); 1415} 1416 1417/* Compute the element-wise minimum of unsigned 8-bit values. */ 1418/* APPLE LOCAL begin radar 5618945 */ 1419__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1420/* APPLE LOCAL end radar 5618945 */ 1421_mm_min_pu8 (__m64 __A, __m64 __B) 1422{ 1423 return (__m64) __builtin_ia32_pminub ((__v8qi)__A, (__v8qi)__B); 1424} 1425 1426/* APPLE LOCAL begin radar 5618945 */ 1427__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1428/* APPLE LOCAL end radar 5618945 */ 1429_m_pminub (__m64 __A, __m64 __B) 1430{ 1431 return _mm_min_pu8 (__A, __B); 1432} 1433 1434/* Create an 8-bit mask of the signs of 8-bit values. */ 1435/* APPLE LOCAL begin radar 5618945 */ 1436__STATIC_INLINE int __attribute__((__always_inline__)) 1437/* APPLE LOCAL end radar 5618945 */ 1438_mm_movemask_pi8 (__m64 __A) 1439{ 1440 return __builtin_ia32_pmovmskb ((__v8qi)__A); 1441} 1442 1443/* APPLE LOCAL begin radar 5618945 */ 1444__STATIC_INLINE int __attribute__((__always_inline__)) 1445/* APPLE LOCAL end radar 5618945 */ 1446_m_pmovmskb (__m64 __A) 1447{ 1448 return _mm_movemask_pi8 (__A); 1449} 1450 1451/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values 1452 in B and produce the high 16 bits of the 32-bit results. */ 1453/* APPLE LOCAL begin radar 5618945 */ 1454__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1455/* APPLE LOCAL end radar 5618945 */ 1456_mm_mulhi_pu16 (__m64 __A, __m64 __B) 1457{ 1458 return (__m64) __builtin_ia32_pmulhuw ((__v4hi)__A, (__v4hi)__B); 1459} 1460 1461/* APPLE LOCAL begin radar 5618945 */ 1462__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1463/* APPLE LOCAL end radar 5618945 */ 1464_m_pmulhuw (__m64 __A, __m64 __B) 1465{ 1466 return _mm_mulhi_pu16 (__A, __B); 1467} 1468 1469/* Return a combination of the four 16-bit values in A. The selector 1470 must be an immediate. */ 1471#if 0 1472/* APPLE LOCAL begin radar 5618945 */ 1473__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1474/* APPLE LOCAL end radar 5618945 */ 1475_mm_shuffle_pi16 (__m64 __A, int __N) 1476{ 1477 return (__m64) __builtin_ia32_pshufw ((__v4hi)__A, __N); 1478} 1479 1480/* APPLE LOCAL begin radar 5618945 */ 1481__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1482/* APPLE LOCAL end radar 5618945 */ 1483_m_pshufw (__m64 __A, int __N) 1484{ 1485 return _mm_shuffle_pi16 (__A, __N); 1486} 1487#else 1488#define _mm_shuffle_pi16(A, N) \ 1489 ((__m64) __builtin_ia32_pshufw ((__v4hi)(A), (N))) 1490#define _m_pshufw(A, N) _mm_shuffle_pi16 ((A), (N)) 1491#endif 1492 1493/* Conditionally store byte elements of A into P. The high bit of each 1494 byte in the selector N determines whether the corresponding byte from 1495 A is stored. */ 1496/* APPLE LOCAL begin radar 5618945 */ 1497__STATIC_INLINE void __attribute__((__always_inline__)) 1498/* APPLE LOCAL end radar 5618945 */ 1499_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) 1500{ 1501 __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P); 1502} 1503 1504/* APPLE LOCAL begin radar 5618945 */ 1505__STATIC_INLINE void __attribute__((__always_inline__)) 1506/* APPLE LOCAL end radar 5618945 */ 1507_m_maskmovq (__m64 __A, __m64 __N, char *__P) 1508{ 1509 _mm_maskmove_si64 (__A, __N, __P); 1510} 1511 1512/* Compute the rounded averages of the unsigned 8-bit values in A and B. */ 1513/* APPLE LOCAL begin radar 5618945 */ 1514__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1515/* APPLE LOCAL end radar 5618945 */ 1516_mm_avg_pu8 (__m64 __A, __m64 __B) 1517{ 1518 return (__m64) __builtin_ia32_pavgb ((__v8qi)__A, (__v8qi)__B); 1519} 1520 1521/* APPLE LOCAL begin radar 5618945 */ 1522__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1523/* APPLE LOCAL end radar 5618945 */ 1524_m_pavgb (__m64 __A, __m64 __B) 1525{ 1526 return _mm_avg_pu8 (__A, __B); 1527} 1528 1529/* Compute the rounded averages of the unsigned 16-bit values in A and B. */ 1530/* APPLE LOCAL begin radar 5618945 */ 1531__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1532/* APPLE LOCAL end radar 5618945 */ 1533_mm_avg_pu16 (__m64 __A, __m64 __B) 1534{ 1535 return (__m64) __builtin_ia32_pavgw ((__v4hi)__A, (__v4hi)__B); 1536} 1537 1538/* APPLE LOCAL begin radar 5618945 */ 1539__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1540/* APPLE LOCAL end radar 5618945 */ 1541_m_pavgw (__m64 __A, __m64 __B) 1542{ 1543 return _mm_avg_pu16 (__A, __B); 1544} 1545 1546/* Compute the sum of the absolute differences of the unsigned 8-bit 1547 values in A and B. Return the value in the lower 16-bit word; the 1548 upper words are cleared. */ 1549/* APPLE LOCAL begin radar 5618945 */ 1550__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1551/* APPLE LOCAL end radar 5618945 */ 1552_mm_sad_pu8 (__m64 __A, __m64 __B) 1553{ 1554 return (__m64) __builtin_ia32_psadbw ((__v8qi)__A, (__v8qi)__B); 1555} 1556 1557/* APPLE LOCAL begin radar 5618945 */ 1558__STATIC_INLINE __m64 __attribute__((__always_inline__)) 1559/* APPLE LOCAL end radar 5618945 */ 1560_m_psadbw (__m64 __A, __m64 __B) 1561{ 1562 return _mm_sad_pu8 (__A, __B); 1563} 1564 1565/* Loads one cache line from address P to a location "closer" to the 1566 processor. The selector I specifies the type of prefetch operation. */ 1567#if 0 1568/* APPLE LOCAL begin radar 5618945 */ 1569__STATIC_INLINE void __attribute__((__always_inline__)) 1570/* APPLE LOCAL end radar 5618945 */ 1571_mm_prefetch (void *__P, enum _mm_hint __I) 1572{ 1573 __builtin_prefetch (__P, 0, __I); 1574} 1575#else 1576#define _mm_prefetch(P, I) \ 1577 __builtin_prefetch ((P), 0, (I)) 1578#endif 1579 1580/* Stores the data in A to the address P without polluting the caches. */ 1581/* APPLE LOCAL begin radar 5618945 */ 1582__STATIC_INLINE void __attribute__((__always_inline__)) 1583/* APPLE LOCAL end radar 5618945 */ 1584_mm_stream_pi (__m64 *__P, __m64 __A) 1585{ 1586 /* APPLE LOCAL 4656532 use V1DImode for _m64 */ 1587 __builtin_ia32_movntq (__P, __A); 1588} 1589 1590/* Likewise. The address must be 16-byte aligned. */ 1591/* APPLE LOCAL begin radar 5618945 */ 1592__STATIC_INLINE void __attribute__((__always_inline__)) 1593/* APPLE LOCAL end radar 5618945 */ 1594_mm_stream_ps (float *__P, __m128 __A) 1595{ 1596 __builtin_ia32_movntps (__P, (__v4sf)__A); 1597} 1598 1599/* Guarantees that every preceding store is globally visible before 1600 any subsequent store. */ 1601/* APPLE LOCAL begin radar 5618945 */ 1602__STATIC_INLINE void __attribute__((__always_inline__)) 1603/* APPLE LOCAL end radar 5618945 */ 1604_mm_sfence (void) 1605{ 1606 __builtin_ia32_sfence (); 1607} 1608 1609/* The execution of the next instruction is delayed by an implementation 1610 specific amount of time. The instruction does not modify the 1611 architectural state. */ 1612/* APPLE LOCAL begin radar 5618945 */ 1613__STATIC_INLINE void __attribute__((__always_inline__)) 1614/* APPLE LOCAL end radar 5618945 */ 1615_mm_pause (void) 1616{ 1617 __asm__ __volatile__ ("rep; nop" : : ); 1618} 1619/* APPLE LOCAL end radar 4152603 */ 1620 1621/* Transpose the 4x4 matrix composed of row[0-3]. */ 1622#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ 1623do { \ 1624 __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ 1625 __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \ 1626 __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \ 1627 __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \ 1628 __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \ 1629 (row0) = __builtin_ia32_movlhps (__t0, __t1); \ 1630 (row1) = __builtin_ia32_movhlps (__t1, __t0); \ 1631 (row2) = __builtin_ia32_movlhps (__t2, __t3); \ 1632 (row3) = __builtin_ia32_movhlps (__t3, __t2); \ 1633} while (0) 1634 1635/* APPLE LOCAL begin nodebug inline 4152603 */ 1636#undef __always_inline__ 1637/* APPLE LOCAL end nodebug inline 4152603 */ 1638 1639/* For backward source compatibility. */ 1640#include <emmintrin.h> 1641 1642#endif /* __SSE__ */ 1643#endif /* _XMMINTRIN_H_INCLUDED */ 1644