xmmintrin.h revision 6bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89
18d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== 28d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 3807291d85bf857320aff6a8ade38c5f622ab9df8Dmitry Shmidt * Permission is hereby granted, free of charge, to any person obtaining a copy 48d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * of this software and associated documentation files (the "Software"), to deal 58d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * in the Software without restriction, including without limitation the rights 6c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt * copies of the Software, and to permit persons to whom the Software is 88d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * furnished to do so, subject to the following conditions: 98d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * The above copyright notice and this permission notice shall be included in 118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * all copies or substantial portions of the Software. 128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 139d9e60286e05ae45025b672636490bd12586138dDmitry Shmidt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 149d9e60286e05ae45025b672636490bd12586138dDmitry Shmidt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * THE SOFTWARE. 208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt * 218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt *===-----------------------------------------------------------------------=== 228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt */ 238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __XMMINTRIN_H 258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define __XMMINTRIN_H 268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifndef __SSE__ 288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#error "SSE instruction set not enabled" 298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#else 306c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 316c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#include <mmintrin.h> 328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidttypedef int __v4si __attribute__((__vector_size__(16))); 348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidttypedef float __v4sf __attribute__((__vector_size__(16))); 358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidttypedef float __m128 __attribute__((__vector_size__(16))); 368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// This header should only be included in a hosted environment as it depends on 388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt// a standard library to provide allocation routines. 398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#if __STDC_HOSTED__ 408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#include <mm_malloc.h> 418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#endif 428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_add_ss(__m128 __a, __m128 __b) 458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a[0] += __b[0]; 478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a; 488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_add_ps(__m128 __a, __m128 __b) 528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a + __b; 548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sub_ss(__m128 __a, __m128 __b) 588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a[0] -= __b[0]; 608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a; 618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sub_ps(__m128 __a, __m128 __b) 658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a - __b; 678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_mul_ss(__m128 __a, __m128 __b) 718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a[0] *= __b[0]; 738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a; 748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_mul_ps(__m128 __a, __m128 __b) 788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a * __b; 808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 83d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt_mm_div_ss(__m128 __a, __m128 __b) 848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a[0] /= __b[0]; 868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a; 878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_div_ps(__m128 __a, __m128 __b) 918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a / __b; 938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sqrt_ss(__m128 __a) 978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m128 __c = __builtin_ia32_sqrtss(__a); 998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128) { __c[0], __a[1], __a[2], __a[3] }; 1008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sqrt_ps(__m128 __a) 1048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_sqrtps(__a); 1068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rcp_ss(__m128 __a) 1108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m128 __c = __builtin_ia32_rcpss(__a); 1128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128) { __c[0], __a[1], __a[2], __a[3] }; 1138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rcp_ps(__m128 __a) 1178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_rcpps(__a); 1198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rsqrt_ss(__m128 __a) 1238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m128 __c = __builtin_ia32_rsqrtss(__a); 1258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128) { __c[0], __a[1], __a[2], __a[3] }; 1268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_rsqrt_ps(__m128 __a) 1308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_rsqrtps(__a); 1328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_min_ss(__m128 __a, __m128 __b) 1368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_minss(__a, __b); 1388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_min_ps(__m128 __a, __m128 __b) 1428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_minps(__a, __b); 1448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_max_ss(__m128 __a, __m128 __b) 1488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_maxss(__a, __b); 1508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_max_ps(__m128 __a, __m128 __b) 1548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1551f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_maxps(__a, __b); 1561f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 1571f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 1581f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1591f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_and_ps(__m128 __a, __m128 __b) 1601f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 1611f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128)((__v4si)__a & (__v4si)__b); 1621f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 1631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 1648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 165d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt_mm_andnot_ps(__m128 __a, __m128 __b) 166d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt{ 1671f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128)(~(__v4si)__a & (__v4si)__b); 1687d5c8f257a74ac0d12828962a492e8b84ef83923Dmitry Shmidt} 169fb79edc9df1f20461e90e478363d207348213d35Dmitry Shmidt 1708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_or_ps(__m128 __a, __m128 __b) 1728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)((__v4si)__a | (__v4si)__b); 1748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_xor_ps(__m128 __a, __m128 __b) 1788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)((__v4si)__a ^ (__v4si)__b); 1808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpeq_ss(__m128 __a, __m128 __b) 1848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 0); 1868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpeq_ps(__m128 __a, __m128 __b) 1908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 0); 1928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 1948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmplt_ss(__m128 __a, __m128 __b) 1968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 1978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 1); 1988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 1998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2016c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_cmplt_ps(__m128 __a, __m128 __b) 2026c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{ 2036c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 1); 2046c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 2056c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 2066c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2076c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_cmple_ss(__m128 __a, __m128 __b) 2086c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{ 2096c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 2); 2106c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 2118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmple_ps(__m128 __a, __m128 __b) 2148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 2); 2168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpgt_ss(__m128 __a, __m128 __b) 2208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_shufflevector(__a, 222051af73b8f8014eff33330aead0f36944b3403e6Dmitry Shmidt __builtin_ia32_cmpss(__b, __a, 1), 2238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4, 1, 2, 3); 2248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpgt_ps(__m128 __a, __m128 __b) 2288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__b, __a, 1); 2308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpge_ss(__m128 __a, __m128 __b) 2348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_shufflevector(__a, 2368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __builtin_ia32_cmpss(__b, __a, 2), 2378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4, 1, 2, 3); 238f86232838cf712377867cb42417c1613ab5dc425Dmitry Shmidt} 2398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 241f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_cmpge_ps(__m128 __a, __m128 __b) 2428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__b, __a, 2); 2448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 246344abd362cfe2d03ed956666527352826b67bde5Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpneq_ss(__m128 __a, __m128 __b) 2488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 249d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 4); 2501f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 2516c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 2528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpneq_ps(__m128 __a, __m128 __b) 2548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 4); 256a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt} 25704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 25804949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 25904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cmpnlt_ss(__m128 __a, __m128 __b) 26004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 2611f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 5); 2628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 2648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 265051af73b8f8014eff33330aead0f36944b3403e6Dmitry Shmidt_mm_cmpnlt_ps(__m128 __a, __m128 __b) 2661f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 2676c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 5); 2686c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 2696c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 2708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2716c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_cmpnle_ss(__m128 __a, __m128 __b) 2726c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{ 273d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 6); 27404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt} 27504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 27604949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 27704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cmpnle_ps(__m128 __a, __m128 __b) 27804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 27904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 6); 28004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt} 28104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 28204949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpngt_ss(__m128 __a, __m128 __b) 2848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_shufflevector(__a, 2868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __builtin_ia32_cmpss(__b, __a, 5), 2878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4, 1, 2, 3); 2888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpngt_ps(__m128 __a, __m128 __b) 2928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 2938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__b, __a, 5); 2948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 2958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 2961846323989242844f0e857458a8939fa5836429cDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 2971f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cmpnge_ss(__m128 __a, __m128 __b) 2981f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 2998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_shufflevector(__a, 3006c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt __builtin_ia32_cmpss(__b, __a, 6), 3018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4, 1, 2, 3); 3028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 30504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cmpnge_ps(__m128 __a, __m128 __b) 3068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__b, __a, 6); 3088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 3111f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cmpord_ss(__m128 __a, __m128 __b) 3128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3131846323989242844f0e857458a8939fa5836429cDmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 7); 3141846323989242844f0e857458a8939fa5836429cDmitry Shmidt} 3151846323989242844f0e857458a8939fa5836429cDmitry Shmidt 3161846323989242844f0e857458a8939fa5836429cDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 3171846323989242844f0e857458a8939fa5836429cDmitry Shmidt_mm_cmpord_ps(__m128 __a, __m128 __b) 3181846323989242844f0e857458a8939fa5836429cDmitry Shmidt{ 3191846323989242844f0e857458a8939fa5836429cDmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 7); 3201846323989242844f0e857458a8939fa5836429cDmitry Shmidt} 3211846323989242844f0e857458a8939fa5836429cDmitry Shmidt 3221846323989242844f0e857458a8939fa5836429cDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 3231846323989242844f0e857458a8939fa5836429cDmitry Shmidt_mm_cmpunord_ss(__m128 __a, __m128 __b) 3241846323989242844f0e857458a8939fa5836429cDmitry Shmidt{ 3251846323989242844f0e857458a8939fa5836429cDmitry Shmidt return (__m128)__builtin_ia32_cmpss(__a, __b, 3); 3268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 3298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cmpunord_ps(__m128 __a, __m128 __b) 3308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128)__builtin_ia32_cmpps(__a, __b, 3); 3328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_comieq_ss(__m128 __a, __m128 __b) 3368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_comieq(__a, __b); 3388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_comilt_ss(__m128 __a, __m128 __b) 3428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_comilt(__a, __b); 3448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3456c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 3466c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3476c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_comile_ss(__m128 __a, __m128 __b) 3486c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{ 3496c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt return __builtin_ia32_comile(__a, __b); 3506c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 3516c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 3526c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3536c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_comigt_ss(__m128 __a, __m128 __b) 3546c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{ 3556c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt return __builtin_ia32_comigt(__a, __b); 3566c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 3576c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 3588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_comige_ss(__m128 __a, __m128 __b) 3608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_comige(__a, __b); 3628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3631f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 3641f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3651f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_comineq_ss(__m128 __a, __m128 __b) 3661f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 3671f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_comineq(__a, __b); 3681f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 3691f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 3701f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3711f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomieq_ss(__m128 __a, __m128 __b) 3721f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 3731f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_ucomieq(__a, __b); 3741f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 3751f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 3761f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3771f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomilt_ss(__m128 __a, __m128 __b) 3781f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 3791f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_ucomilt(__a, __b); 3808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3811f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 3828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_ucomile_ss(__m128 __a, __m128 __b) 3848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 3858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_ucomile(__a, __b); 3868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 3878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 3881f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3891f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomigt_ss(__m128 __a, __m128 __b) 3901f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 3911f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_ucomigt(__a, __b); 3921f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 3931f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 3941f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 3951f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomige_ss(__m128 __a, __m128 __b) 3961f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 3971f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_ucomige(__a, __b); 3981f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 3991f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4001f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 4011f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_ucomineq_ss(__m128 __a, __m128 __b) 4021f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 4031f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_ucomineq(__a, __b); 4041f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 4051f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 4078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtss_si32(__m128 __a) 4081f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 4091f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_cvtss2si(__a); 4101f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 4111f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4121f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 4131f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvt_ss2si(__m128 __a) 4141f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 4151f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return _mm_cvtss_si32(__a); 4161f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 4171f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4181f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#ifdef __x86_64__ 4191f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4201f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 4211f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvtss_si64(__m128 __a) 4221f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 4231f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return __builtin_ia32_cvtss2si64(__a); 4241f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 4251f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4261f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#endif 4271f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4281f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 4291f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvtps_pi32(__m128 __a) 4301f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 4311f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m64)__builtin_ia32_cvtps2pi(__a); 4321f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 4331f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 4341f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 4351f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvt_ps2pi(__m128 __a) 4361f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 4371f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return _mm_cvtps_pi32(__a); 4388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 4418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvttss_si32(__m128 __a) 4428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a[0]; 4448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 4478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtt_ss2si(__m128 __a) 4488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_cvttss_si32(__a); 4508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 4538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvttss_si64(__m128 __a) 4548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a[0]; 4568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 4598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvttps_pi32(__m128 __a) 4608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m64)__builtin_ia32_cvttps2pi(__a); 4628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 4658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtt_ps2pi(__m128 __a) 4668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_cvttps_pi32(__a); 4688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 4718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtsi32_ss(__m128 __a, int __b) 4728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a[0] = __b; 4748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a; 4758347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt} 4768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 4788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvt_si2ss(__m128 __a, int __b) 4798347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt{ 4808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_cvtsi32_ss(__a, __b); 4818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#ifdef __x86_64__ 4848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4858347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 4868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtsi64_ss(__m128 __a, long long __b) 4878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a[0] = __b; 4898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a; 4908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4928347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt#endif 4938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 4948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 4958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpi32_ps(__m128 __a, __m64 __b) 4968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 4978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_cvtpi2ps(__a, (__v2si)__b); 4988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 4998347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt 5008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvt_pi2ps(__m128 __a, __m64 __b) 5028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5038347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt return _mm_cvtpi32_ps(__a, __b); 5048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ float __attribute__((__always_inline__, __nodebug__)) 5078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtss_f32(__m128 __a) 5088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __a[0]; 5108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5118347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt 5128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_loadh_pi(__m128 __a, const __m64 *__p) 5148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); 5168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt struct __mm_loadh_pi_struct { 5178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __mm_loadh_pi_v2f32 __u; 5188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } __attribute__((__packed__, __may_alias__)); 5198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __mm_loadh_pi_v2f32 __b = ((struct __mm_loadh_pi_struct*)__p)->__u; 5208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); 5218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5); 5228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5238347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt 5248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_loadl_pi(__m128 __a, const __m64 *__p) 5268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); 5288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt struct __mm_loadl_pi_struct { 5298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __mm_loadl_pi_v2f32 __u; 5308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } __attribute__((__packed__, __may_alias__)); 5318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __mm_loadl_pi_v2f32 __b = ((struct __mm_loadl_pi_struct*)__p)->__u; 5328347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1); 5338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3); 5348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_load_ss(const float *__p) 5388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt struct __mm_load_ss_struct { 5408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt float __u; 5418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } __attribute__((__packed__, __may_alias__)); 5428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt float __u = ((struct __mm_load_ss_struct*)__p)->__u; 5438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128){ __u, 0, 0, 0 }; 5448d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5468347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_load1_ps(const float *__p) 5488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt struct __mm_load1_ps_struct { 5508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt float __u; 5518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } __attribute__((__packed__, __may_alias__)); 5528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt float __u = ((struct __mm_load1_ps_struct*)__p)->__u; 5538347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt return (__m128){ __u, __u, __u, __u }; 5548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _mm_load_ps1(p) _mm_load1_ps(p) 5578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_load_ps(const float *__p) 5608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 5618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return *(__m128*)__p; 5628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 56504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_loadu_ps(const float *__p) 5668347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt{ 56704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt struct __loadu_ps { 56804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt __m128 __v; 56904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt } __attribute__((__packed__, __may_alias__)); 57004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return ((struct __loadu_ps*)__p)->__v; 57104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt} 5728347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt 57304949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 57404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_loadr_ps(const float *__p) 57504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 5768347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt __m128 __a = _mm_load_ps(__p); 5778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_shufflevector(__a, __a, 3, 2, 1, 0); 5788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 5798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 5808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_set_ss(float __w) 5828347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt{ 5838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m128){ __w, 0, 0, 0 }; 5841f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 5851f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 5861f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5871f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_set1_ps(float __w) 5881f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 5891f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt return (__m128){ __w, __w, __w, __w }; 5901f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt} 5911f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 5921f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt// Microsoft specific. 5931f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 5948347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt_mm_set_ps1(float __w) 5951f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 596a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt return _mm_set1_ps(__w); 597a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt} 598a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt 599a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 600a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt_mm_set_ps(float __z, float __y, float __x, float __w) 601a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt{ 602a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt return (__m128){ __w, __x, __y, __z }; 603a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt} 6048347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt 605a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 60644c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt_mm_setr_ps(float __z, float __y, float __x, float __w) 60744c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt{ 60844c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt return (__m128){ __z, __y, __x, __w }; 60944c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt} 61044c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt 61144c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__)) 6128347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt_mm_setzero_ps(void) 61344c957860ca714a86357591f39aff0bfa904c743Dmitry Shmidt{ 6146c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt return (__m128){ 0, 0, 0, 0 }; 6156c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 6168347444e0bfb85e4550817fc99903f38ce8f5bccDmitry Shmidt 6176c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ void __attribute__((__always_inline__)) 6188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_storeh_pi(__m64 *__p, __m128 __a) 6198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 6208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __builtin_ia32_storehps((__v2si *)__p, __a); 6218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 6228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 6238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__)) 624c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt_mm_storel_pi(__m64 *__p, __m128 __a) 625c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt{ 626c5ec7f57ead87efa365800228aa0b09a12d9e6c4Dmitry Shmidt __builtin_ia32_storelps((__v2si *)__p, __a); 627f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 6288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 6298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__)) 630f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_store_ss(float *__p, __m128 __a) 631f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 632f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt struct __mm_store_ss_struct { 6338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt float __u; 6348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt } __attribute__((__packed__, __may_alias__)); 6358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt ((struct __mm_store_ss_struct*)__p)->__u = __a[0]; 6368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 6378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 6388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_storeu_ps(float *__p, __m128 __a) 640f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 6418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __builtin_ia32_storeups(__p, __a); 642f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 643f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 644f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 645f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_store1_ps(float *__p, __m128 __a) 646f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 647f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt __a = __builtin_shufflevector(__a, __a, 0, 0, 0, 0); 6488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt _mm_storeu_ps(__p, __a); 6498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 65004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 6515460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6525460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt_mm_store_ps1(float *__p, __m128 __a) 6535460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt{ 6545460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt return _mm_store1_ps(__p, __a); 6555460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt} 6565460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt 6575460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6585460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt_mm_store_ps(float *__p, __m128 __a) 6595460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt{ 6605460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt *(__m128 *)__p = __a; 6615460547a121207cf7a99eac45e05fcdd83be3161Dmitry Shmidt} 66204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 66304949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6647a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt_mm_storer_ps(float *__p, __m128 __a) 6657a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt{ 6667a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt __a = __builtin_shufflevector(__a, __a, 3, 2, 1, 0); 6677a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt _mm_store_ps(__p, __a); 6687a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt} 6697a5e50a0554bee77a9da492ea3d86f46147f1671Dmitry Shmidt 67004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_T0 3 67104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_T1 2 67204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_T2 1 67304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _MM_HINT_NTA 0 67404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 67504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#ifndef _MSC_VER 67604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt/* FIXME: We have to #define this because "sel" must be a constant integer, and 67704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt Sema doesn't do any form of constant propagation yet. */ 67804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 6796c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) 6806c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#endif 6816c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 6826c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6836c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_stream_pi(__m64 *__p, __m64 __a) 6846c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt{ 6856c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt __builtin_ia32_movntq(__p, __a); 6866c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 6876c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 6888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_stream_ps(float *__p, __m128 __a) 6908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 6918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __builtin_ia32_movntps(__p, __a); 6928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 6938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 6948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 6958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_sfence(void) 6967832adbbd72a1b784b7fb74a71a5d4085b0cb0d3Dmitry Shmidt{ 6978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __builtin_ia32_sfence(); 698f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 6998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 7008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 7018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_extract_pi16(__m64 __a, int __n) 7028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 7038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __v4hi __b = (__v4hi)__a; 7048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (unsigned short)__b[__n & 3]; 7058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 7068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 7078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 7088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_insert_pi16(__m64 __a, int __d, int __n) 7098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 7108d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __v4hi __b = (__v4hi)__a; 7118d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __b[__n & 3] = __d; 7128d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m64)__b; 7138d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 7148d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 7158d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 716f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_max_pi16(__m64 __a, __m64 __b) 7178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 7188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); 7198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 720f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 7218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 722f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_max_pu8(__m64 __a, __m64 __b) 7238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 724f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); 725f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 7268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 727f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 7288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_min_pi16(__m64 __a, __m64 __b) 729f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 730f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); 731f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 732f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 733f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 734f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_min_pu8(__m64 __a, __m64 __b) 735f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 736f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); 737f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 738f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 739f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 740f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_movemask_pi8(__m64 __a) 741f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 742f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return __builtin_ia32_pmovmskb((__v8qi)__a); 743f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 744f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 745f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 746f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_mulhi_pu16(__m64 __a, __m64 __b) 747f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 748f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); 749f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 750f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 751f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt#define _mm_shuffle_pi16(a, n) __extension__ ({ \ 752f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt __m64 __a = (a); \ 753f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) 754f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 755f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 756f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) 757f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 758f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); 759f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 760f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 761f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 762f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_avg_pu8(__m64 __a, __m64 __b) 763f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 764f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); 765f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 766f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 7678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 7688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_avg_pu16(__m64 __a, __m64 __b) 7698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 770f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); 7718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 772f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 773f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 774f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_sad_pu8(__m64 __a, __m64 __b) 775f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 776f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); 777f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 778f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 779f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 780f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_getcsr(void) 781f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 782f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt return __builtin_ia32_stmxcsr(); 783f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 784f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 785f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidtstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 786f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt_mm_setcsr(unsigned int __i) 787f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt{ 788f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt __builtin_ia32_ldmxcsr(__i); 789f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt} 790f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt 791f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ 792f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt __m128 __a = (a); \ 793f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt __m128 __b = (b); \ 794f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \ 795f940fbdc849eba19de7b63a74ced85e550bf4572Dmitry Shmidt (mask) & 0x3, ((mask) & 0xc) >> 2, \ 7968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (((mask) & 0x30) >> 4) + 4, \ 797d5e4923d04122f81300fa68fb07d64ede28fd44dDmitry Shmidt (((mask) & 0xc0) >> 6) + 4); }) 7988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 7998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 80004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_unpackhi_ps(__m128 __a, __m128 __b) 80104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 80204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return __builtin_shufflevector(__a, __b, 2, 6, 3, 7); 80368d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt} 8046c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 80504949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 80604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_unpacklo_ps(__m128 __a, __m128 __b) 80768d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt{ 80804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return __builtin_shufflevector(__a, __b, 0, 4, 1, 5); 80904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt} 81004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 81104949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 81204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_move_ss(__m128 __a, __m128 __b) 81304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 81404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return __builtin_shufflevector(__a, __b, 4, 1, 2, 3); 81504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt} 81668d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt 81768d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 8186c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt_mm_movehl_ps(__m128 __a, __m128 __b) 81904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 82004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return __builtin_shufflevector(__a, __b, 6, 7, 2, 3); 82168d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt} 82268d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt 8236c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 82468d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt_mm_movelh_ps(__m128 __a, __m128 __b) 82568d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt{ 82668d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt return __builtin_shufflevector(__a, __b, 0, 1, 4, 5); 8276c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt} 82868d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt 82904949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 83004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt_mm_cvtpi16_ps(__m64 __a) 83104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt{ 83204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt __m64 __b, __c; 833a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __m128 __r; 834a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt 835a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __b = _mm_setzero_si64(); 836a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __b = _mm_cmpgt_pi16(__b, __a); 837a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __c = _mm_unpackhi_pi16(__a, __b); 838a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __r = _mm_setzero_ps(); 839a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __r = _mm_cvtpi32_ps(__r, __c); 840a38abf9af7bec7e89dbfb39ac7bb77223fe47c72Dmitry Shmidt __r = _mm_movelh_ps(__r, __r); 84168d0e3ed07847339aedfac8e02f50db68c702e52Dmitry Shmidt __c = _mm_unpacklo_pi16(__a, __b); 8426c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt __r = _mm_cvtpi32_ps(__r, __c); 84304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 84404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt return __r; 84504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt} 84604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 84704949598a23f501be6eec21697465fd46a28840aDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 848bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt_mm_cvtpu16_ps(__m64 __a) 849bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt{ 850bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt __m64 __b, __c; 851bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt __m128 __r; 852bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt 853bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt __b = _mm_setzero_si64(); 854bd14a57187b024f49f5b9ace55ef457d8d04650aDmitry Shmidt __c = _mm_unpackhi_pi16(__a, __b); 855a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt __r = _mm_setzero_ps(); 856a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt __r = _mm_cvtpi32_ps(__r, __c); 857a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt __r = _mm_movelh_ps(__r, __r); 858a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt __c = _mm_unpacklo_pi16(__a, __b); 859a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt __r = _mm_cvtpi32_ps(__r, __c); 860a54fa5fb807eaeff45464139b5a7759f060cec68Dmitry Shmidt 8618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __r; 8628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 8638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8642e67f06149ff649fb6f8782bad041d3d9124685eDmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 8658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpi8_ps(__m64 __a) 8668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 8678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m64 __b; 86861d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt 86961d9df3e62aaa0e87ad05452fcb95142159a17b6Dmitry Shmidt __b = _mm_setzero_si64(); 87004949598a23f501be6eec21697465fd46a28840aDmitry Shmidt __b = _mm_cmpgt_pi8(__b, __a); 871f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt __b = _mm_unpacklo_pi8(__a, __b); 8728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_cvtpi16_ps(__b); 8748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 8758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 8778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpu8_ps(__m64 __a) 8788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 8798d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m64 __b; 8808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __b = _mm_setzero_si64(); 8828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __b = _mm_unpacklo_pi8(__a, __b); 8838d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8848d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_cvtpi16_ps(__b); 8858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 8868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 8888d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtpi32x2_ps(__m64 __a, __m64 __b) 8898d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 8908d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m128 __c; 8918d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8928d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __c = _mm_setzero_ps(); 8938d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __c = _mm_cvtpi32_ps(__c, __b); 8948d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __c = _mm_movelh_ps(__c, __c); 8958d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8968d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_cvtpi32_ps(__c, __a); 8978d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 8988d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 8998d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 9008d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_cvtps_pi16(__m128 __a) 9018d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 9028d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m64 __b, __c; 9038d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9048d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __b = _mm_cvtps_pi32(__a); 9058d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __a = _mm_movehl_ps(__a, __a); 9068d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __c = _mm_cvtps_pi32(__a); 9078d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9088d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_packs_pi32(__b, __c); 9098d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 9106c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt 9116c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidtstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 9121f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt_mm_cvtps_pi8(__m128 __a) 9131f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt{ 9141f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt __m64 __b, __c; 9151f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt 9168d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __b = _mm_cvtps_pi16(__a); 9178d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __c = _mm_setzero_si64(); 9188d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9198d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return _mm_packs_pi16(__b, __c); 9208d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 9218d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9228d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 9238d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt_mm_movemask_ps(__m128 __a) 9248d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt{ 9258d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt return __builtin_ia32_movmskps(__a); 9268d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} 9278d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9288d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) 9298d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9308d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_INVALID (0x0001) 9318d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_DENORM (0x0002) 9328d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_DIV_ZERO (0x0004) 9338d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_OVERFLOW (0x0008) 9348d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_UNDERFLOW (0x0010) 9358d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_INEXACT (0x0020) 9368d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_EXCEPT_MASK (0x003f) 9378d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9388d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_INVALID (0x0080) 9398d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_DENORM (0x0100) 9408d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_DIV_ZERO (0x0200) 9418d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_OVERFLOW (0x0400) 9428d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_UNDERFLOW (0x0800) 9438d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_MASK_INEXACT (0x1000) 9441f69aa52ea2e0a73ac502565df8c666ee49cab6aDmitry Shmidt#define _MM_MASK_MASK (0x1f80) 9458d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9468d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_NEAREST (0x0000) 9478d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_DOWN (0x2000) 9488d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_UP (0x4000) 9498d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_TOWARD_ZERO (0x6000) 9508d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_ROUND_MASK (0x6000) 9518d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9528d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_FLUSH_ZERO_MASK (0x8000) 9538d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_FLUSH_ZERO_ON (0x8000) 9548d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_FLUSH_ZERO_OFF (0x0000) 9558d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9568d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) 9578d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) 9588d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) 9598d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) 9608d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9618d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) 9628d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) 9638d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) 9648d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) 9658d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9668d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ 9678d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidtdo { \ 9688d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt __m128 tmp3, tmp2, tmp1, tmp0; \ 9698d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt tmp0 = _mm_unpacklo_ps((row0), (row1)); \ 9708d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt tmp2 = _mm_unpacklo_ps((row2), (row3)); \ 9718d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt tmp1 = _mm_unpackhi_ps((row0), (row1)); \ 9728d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt tmp3 = _mm_unpackhi_ps((row2), (row3)); \ 9738d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (row0) = _mm_movelh_ps(tmp0, tmp2); \ 9748d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (row1) = _mm_movehl_ps(tmp2, tmp0); \ 9758d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (row2) = _mm_movelh_ps(tmp1, tmp3); \ 9768d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt (row3) = _mm_movehl_ps(tmp3, tmp1); \ 9778d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt} while (0) 9788d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt 9796c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt/* Aliases for compatibility. */ 9808d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pextrw _mm_extract_pi16 9818d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pinsrw _mm_insert_pi16 9828d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pmaxsw _mm_max_pi16 9836c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#define _m_pmaxub _mm_max_pu8 9846c0da2bb83f6915d8260912362692d1a742e057bDmitry Shmidt#define _m_pminsw _mm_min_pi16 9858d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pminub _mm_min_pu8 9868d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pmovmskb _mm_movemask_pi8 9878d520ff1dc2da35cdca849e982051b86468016d8Dmitry Shmidt#define _m_pmulhuw _mm_mulhi_pu16 98804949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_pshufw _mm_shuffle_pi16 98904949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_maskmovq _mm_maskmove_si64 990f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#define _m_pavgb _mm_avg_pu8 99104949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_pavgw _mm_avg_pu16 99204949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_psadbw _mm_sad_pu8 99304949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_ _mm_ 99404949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#define _m_ _mm_ 99504949598a23f501be6eec21697465fd46a28840aDmitry Shmidt 99604949598a23f501be6eec21697465fd46a28840aDmitry Shmidt/* Ugly hack for backwards-compatibility (compatible with gcc) */ 99704949598a23f501be6eec21697465fd46a28840aDmitry Shmidt#ifdef __SSE2__ 998f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#include <emmintrin.h> 999f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#endif 1000f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt 1001f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#endif /* __SSE__ */ 1002f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt 1003f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt#endif /* __XMMINTRIN_H */ 1004f21452aea786ac056eb01f1cbba4f553bd502747Dmitry Shmidt