16bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== 26bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 36bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * Permission is hereby granted, free of charge, to any person obtaining a copy 46bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * of this software and associated documentation files (the "Software"), to deal 56bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * in the Software without restriction, including without limitation the rights 66bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 76bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * copies of the Software, and to permit persons to whom the Software is 86bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * furnished to do so, subject to the following conditions: 96bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * The above copyright notice and this permission notice shall be included in 116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * all copies or substantial portions of the Software. 126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * THE SOFTWARE. 206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * 216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker *===-----------------------------------------------------------------------=== 226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker */ 236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifndef _SMMINTRIN_H 256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SMMINTRIN_H 266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#include <tmmintrin.h> 286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Define the default attributes for the functions in this file. */ 306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) 316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Rounding macros. */ 336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_TO_NEAREST_INT 0x00 346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_TO_NEG_INF 0x01 356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_TO_POS_INF 0x02 366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_TO_ZERO 0x03 376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_CUR_DIRECTION 0x04 386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_RAISE_EXC 0x00 406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_NO_EXC 0x08 416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) 436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) 446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) 456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) 466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) 476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) 486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) 506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) 516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) 526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) 536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) 556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) 566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) 576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) 586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_round_ps(X, M) __extension__ ({ \ 606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); }) 616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_round_ss(X, Y, M) __extension__ ({ \ 636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ 646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v4sf)(__m128)(Y), (M)); }) 656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_round_pd(X, M) __extension__ ({ \ 676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); }) 686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_round_sd(X, Y, M) __extension__ ({ \ 706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \ 716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v2df)(__m128d)(Y), (M)); }) 726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Packed Blending Intrinsics. */ 746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ 756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ 766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v2df)(__m128d)(V2), \ 776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x01) ? 2 : 0), \ 786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x02) ? 3 : 1)); }) 796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ 816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ 826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x01) ? 4 : 0), \ 836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x02) ? 5 : 1), \ 846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x04) ? 6 : 2), \ 856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x08) ? 7 : 3)); }) 866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128d __DEFAULT_FN_ATTRS 886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) 896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, 916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v2df)__M); 926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128 __DEFAULT_FN_ATTRS 956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) 966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, 986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v4sf)__M); 996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) 1036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, 1056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)__M); 1066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1086bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ 1096bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ 1106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v8hi)(__m128i)(V2), \ 1116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x01) ? 8 : 0), \ 1126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x02) ? 9 : 1), \ 1136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x04) ? 10 : 2), \ 1146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x08) ? 11 : 3), \ 1156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x10) ? 12 : 4), \ 1166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x20) ? 13 : 5), \ 1176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x40) ? 14 : 6), \ 1186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (((M) & 0x80) ? 15 : 7)); }) 1196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Dword Multiply Instructions. */ 1216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_mullo_epi32 (__m128i __V1, __m128i __V2) 1236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) ((__v4su)__V1 * (__v4su)__V2); 1256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_mul_epi32 (__m128i __V1, __m128i __V2) 1296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); 1316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Floating Point Dot Product Instructions. */ 1346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_dp_ps(X, Y, M) __extension__ ({ \ 1356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \ 1366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v4sf)(__m128)(Y), (M)); }) 1376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_dp_pd(X, Y, M) __extension__ ({\ 1396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \ 1406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v2df)(__m128d)(Y), (M)); }) 1416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Streaming Load Hint Instruction. */ 1436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_stream_load_si128 (__m128i const *__V) 1456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V); 1476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Packed Integer Min/Max Instructions. */ 1506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_min_epi8 (__m128i __V1, __m128i __V2) 1526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); 1546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_max_epi8 (__m128i __V1, __m128i __V2) 1586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); 1606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_min_epu16 (__m128i __V1, __m128i __V2) 1646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); 1666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_max_epu16 (__m128i __V1, __m128i __V2) 1706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); 1726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_min_epi32 (__m128i __V1, __m128i __V2) 1766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); 1786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_max_epi32 (__m128i __V1, __m128i __V2) 1826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); 1846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_min_epu32 (__m128i __V1, __m128i __V2) 1886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); 1906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_max_epu32 (__m128i __V1, __m128i __V2) 1946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 1956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); 1966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 1976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 1986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Insertion and Extraction from XMM Register Instructions. */ 1996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) 2006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_extract_ps(X, N) (__extension__ \ 2016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ union { int __i; float __f; } __t; \ 2026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker __v4sf __a = (__v4sf)(__m128)(X); \ 2036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker __t.__f = __a[(N) & 3]; \ 2046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker __t.__i;})) 2056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Miscellaneous insert and extract macros. */ 2076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Extract a single-precision float from X at index N into D. */ 2086bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ 2096bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (D) = __a[N]; })) 2106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create 2126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker an index suitable for _mm_insert_ps. */ 2136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) 2146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Extract a float from X at index N into the first index of the return. */ 2166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ 2176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) 2186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Insert int into packed integer array at index. */ 2206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_insert_epi8(X, I, N) (__extension__ \ 2216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ __v16qi __a = (__v16qi)(__m128i)(X); \ 2226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker __a[(N) & 15] = (I); \ 2236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i)__a;})) 2246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_insert_epi32(X, I, N) (__extension__ \ 2256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ __v4si __a = (__v4si)(__m128i)(X); \ 2266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker __a[(N) & 3] = (I); \ 2276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i)__a;})) 2286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifdef __x86_64__ 2296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_insert_epi64(X, I, N) (__extension__ \ 2306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ __v2di __a = (__v2di)(__m128i)(X); \ 2316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker __a[(N) & 1] = (I); \ 2326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i)__a;})) 2336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif /* __x86_64__ */ 2346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Extract int from packed integer array at index. This returns the element 2366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker * as a zero extended value, so it is unsigned. 2376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker */ 2386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_extract_epi8(X, N) (__extension__ \ 2396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ __v16qi __a = (__v16qi)(__m128i)(X); \ 2406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(unsigned char) __a[(N) & 15];})) 2416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_extract_epi32(X, N) (__extension__ \ 2426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ __v4si __a = (__v4si)(__m128i)(X); \ 2436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__a[(N) & 3];})) 2446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifdef __x86_64__ 2456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_extract_epi64(X, N) (__extension__ \ 2466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker ({ __v2di __a = (__v2di)(__m128i)(X); \ 2476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (long long)__a[(N) & 1];})) 2486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif /* __x86_64 */ 2496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 128-bit Packed Integer Comparisons. */ 2516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ int __DEFAULT_FN_ATTRS 2526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_testz_si128(__m128i __M, __m128i __V) 2536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 2546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); 2556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ int __DEFAULT_FN_ATTRS 2586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_testc_si128(__m128i __M, __m128i __V) 2596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 2606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); 2616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ int __DEFAULT_FN_ATTRS 2646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_testnzc_si128(__m128i __M, __m128i __V) 2656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 2666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); 2676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) 2706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) 2716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) 2726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 64-bit Packed Integer Comparisons. */ 2746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) 2766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 2776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)((__v2di)__V1 == (__v2di)__V2); 2786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Packed Integer Sign-Extension. */ 2816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepi8_epi16(__m128i __V) 2836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 2846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker /* This function always performs a signed extension, but __v16qi is a char 2856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker which may be signed or unsigned, so use __v16qs. */ 2866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); 2876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepi8_epi32(__m128i __V) 2916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 2926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker /* This function always performs a signed extension, but __v16qi is a char 2936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker which may be signed or unsigned, so use __v16qs. */ 2946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); 2956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 2966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 2976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepi8_epi64(__m128i __V) 2996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker /* This function always performs a signed extension, but __v16qi is a char 3016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker which may be signed or unsigned, so use __v16qs. */ 3026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); 3036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepi16_epi32(__m128i __V) 3076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3086bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); 3096bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepi16_epi64(__m128i __V) 3136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); 3156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepi32_epi64(__m128i __V) 3196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); 3216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Packed Integer Zero-Extension. */ 3246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepu8_epi16(__m128i __V) 3266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); 3286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepu8_epi32(__m128i __V) 3326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); 3346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepu8_epi64(__m128i __V) 3386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); 3406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepu16_epi32(__m128i __V) 3446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); 3466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepu16_epi64(__m128i __V) 3506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); 3526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cvtepu32_epi64(__m128i __V) 3566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); 3586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Pack with Unsigned Saturation. */ 3616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_packus_epi32(__m128i __V1, __m128i __V2) 3636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); 3656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4 Multiple Packed Sums of Absolute Difference. */ 3686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ 3696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ 3706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(Y), (M)); }) 3716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_minpos_epu16(__m128i __V) 3746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 3756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); 3766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 3776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* Handle the sse4.2 definitions here. */ 3796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* These definitions are normally in nmmintrin.h, but gcc puts them in here 3816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker so we'll do the same. */ 3826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#undef __DEFAULT_FN_ATTRS 3846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 3856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* These specify the type of data that we're comparing. */ 3876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_UBYTE_OPS 0x00 3886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_UWORD_OPS 0x01 3896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_SBYTE_OPS 0x02 3906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_SWORD_OPS 0x03 3916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* These specify the type of comparison operation. */ 3936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_CMP_EQUAL_ANY 0x00 3946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_CMP_RANGES 0x04 3956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_CMP_EQUAL_EACH 0x08 3966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_CMP_EQUAL_ORDERED 0x0c 3976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 3986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* These macros specify the polarity of the operation. */ 3996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_POSITIVE_POLARITY 0x00 4006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_NEGATIVE_POLARITY 0x10 4016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 4026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 4036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* These macros are used in _mm_cmpXstri() to specify the return. */ 4056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_LEAST_SIGNIFICANT 0x00 4066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_MOST_SIGNIFICANT 0x40 4076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4086bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* These macros are used in _mm_cmpXstri() to specify the return. */ 4096bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_BIT_MASK 0x00 4106bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _SIDD_UNIT_MASK 0x40 4116bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4126bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4.2 Packed Comparison Intrinsics. */ 4136bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistrm(A, B, M) \ 4146bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ 4156bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4166bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistri(A, B, M) \ 4176bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ 4186bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4196bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4206bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestrm(A, LA, B, LB, M) \ 4216bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ 4226bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4236bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4246bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestri(A, LA, B, LB, M) \ 4256bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ 4266bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4276bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4286bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4296bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ 4306bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistra(A, B, M) \ 4316bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ 4326bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4336bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistrc(A, B, M) \ 4346bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ 4356bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4366bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistro(A, B, M) \ 4376bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ 4386bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4396bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistrs(A, B, M) \ 4406bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ 4416bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4426bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpistrz(A, B, M) \ 4436bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ 4446bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4456bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4466bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestra(A, LA, B, LB, M) \ 4476bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ 4486bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4496bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4506bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestrc(A, LA, B, LB, M) \ 4516bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ 4526bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4536bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4546bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestro(A, LA, B, LB, M) \ 4556bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ 4566bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4576bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4586bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestrs(A, LA, B, LB, M) \ 4596bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ 4606bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4616bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4626bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#define _mm_cmpestrz(A, LA, B, LB, M) \ 4636bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ 4646bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4656bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker (int)(M)) 4666bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4676bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4.2 Compare Packed Data -- Greater Than. */ 4686bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 4696bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) 4706bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 4716bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return (__m128i)((__v2di)__V1 > (__v2di)__V2); 4726bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 4736bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4746bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker/* SSE4.2 Accumulate CRC32. */ 4756bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 4766bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_crc32_u8(unsigned int __C, unsigned char __D) 4776bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 4786bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_crc32qi(__C, __D); 4796bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 4806bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4816bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 4826bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_crc32_u16(unsigned int __C, unsigned short __D) 4836bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 4846bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_crc32hi(__C, __D); 4856bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 4866bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4876bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 4886bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_crc32_u32(unsigned int __C, unsigned int __D) 4896bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 4906bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_crc32si(__C, __D); 4916bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 4926bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 4936bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifdef __x86_64__ 4946bdbd720989797e8a53237ef3ef213c4114f869gitbuildkickerstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 4956bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker_mm_crc32_u64(unsigned long long __C, unsigned long long __D) 4966bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker{ 4976bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker return __builtin_ia32_crc32di(__C, __D); 4986bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker} 4996bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif /* __x86_64__ */ 5006bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 5016bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#undef __DEFAULT_FN_ATTRS 5026bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 5036bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#ifdef __POPCNT__ 5046bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#include <popcntintrin.h> 5056bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif 5066bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker 5076bdbd720989797e8a53237ef3ef213c4114f869gitbuildkicker#endif /* _SMMINTRIN_H */ 508