11188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== 21188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * 31188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * Permission is hereby granted, free of charge, to any person obtaining a copy 41188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * of this software and associated documentation files (the "Software"), to deal 51188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * in the Software without restriction, including without limitation the rights 61188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 71188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * copies of the Software, and to permit persons to whom the Software is 81188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * furnished to do so, subject to the following conditions: 91188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * 101188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * The above copyright notice and this permission notice shall be included in 111188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * all copies or substantial portions of the Software. 121188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * 131188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 141188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 151188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 161188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 171188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 181188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 191188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * THE SOFTWARE. 201188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * 211188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker *===-----------------------------------------------------------------------=== 221188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker */ 231188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 241188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#ifndef _SMMINTRIN_H 251188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SMMINTRIN_H 261188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 271188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#include <tmmintrin.h> 281188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 291188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Define the default attributes for the functions in this file. */ 301188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) 311188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 321188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Rounding macros. */ 331188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_TO_NEAREST_INT 0x00 341188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_TO_NEG_INF 0x01 351188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_TO_POS_INF 0x02 361188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_TO_ZERO 0x03 371188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_CUR_DIRECTION 0x04 381188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 391188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_RAISE_EXC 0x00 401188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_NO_EXC 0x08 411188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 421188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) 431188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) 441188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) 451188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) 461188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) 471188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) 481188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 491188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) 501188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) 511188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) 521188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) 531188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 541188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) 551188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) 561188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) 571188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) 581188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 591188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_round_ps(X, M) __extension__ ({ \ 601188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); }) 611188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 621188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_round_ss(X, Y, M) __extension__ ({ \ 631188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ 641188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v4sf)(__m128)(Y), (M)); }) 651188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 661188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_round_pd(X, M) __extension__ ({ \ 671188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); }) 681188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 691188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_round_sd(X, Y, M) __extension__ ({ \ 701188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \ 711188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v2df)(__m128d)(Y), (M)); }) 721188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 731188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Packed Blending Intrinsics. */ 741188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ 751188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ 761188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v2df)(__m128d)(V2), \ 771188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x01) ? 2 : 0), \ 781188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x02) ? 3 : 1)); }) 791188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 801188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ 811188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ 821188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x01) ? 4 : 0), \ 831188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x02) ? 5 : 1), \ 841188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x04) ? 6 : 2), \ 851188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x08) ? 7 : 3)); }) 861188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 871188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128d __DEFAULT_FN_ATTRS 881188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) 891188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 901188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, 911188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v2df)__M); 921188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 931188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 941188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128 __DEFAULT_FN_ATTRS 951188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) 961188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 971188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, 981188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v4sf)__M); 991188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1001188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1011188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1021188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) 1031188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1041188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, 1051188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)__M); 1061188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1071188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1081188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ 1091188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ 1101188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v8hi)(__m128i)(V2), \ 1111188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x01) ? 8 : 0), \ 1121188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x02) ? 9 : 1), \ 1131188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x04) ? 10 : 2), \ 1141188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x08) ? 11 : 3), \ 1151188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x10) ? 12 : 4), \ 1161188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x20) ? 13 : 5), \ 1171188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x40) ? 14 : 6), \ 1181188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (((M) & 0x80) ? 15 : 7)); }) 1191188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1201188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Dword Multiply Instructions. */ 1211188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1221188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_mullo_epi32 (__m128i __V1, __m128i __V2) 1231188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1241188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) ((__v4si)__V1 * (__v4si)__V2); 1251188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1261188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1271188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1281188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_mul_epi32 (__m128i __V1, __m128i __V2) 1291188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1301188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); 1311188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1321188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1331188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Floating Point Dot Product Instructions. */ 1341188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_dp_ps(X, Y, M) __extension__ ({ \ 1351188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \ 1361188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v4sf)(__m128)(Y), (M)); }) 1371188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1381188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_dp_pd(X, Y, M) __extension__ ({\ 1391188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \ 1401188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v2df)(__m128d)(Y), (M)); }) 1411188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1421188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Streaming Load Hint Instruction. */ 1431188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1441188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_stream_load_si128 (__m128i const *__V) 1451188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1461188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V); 1471188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1481188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1491188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Packed Integer Min/Max Instructions. */ 1501188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1511188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_min_epi8 (__m128i __V1, __m128i __V2) 1521188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1531188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); 1541188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1551188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1561188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1571188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_max_epi8 (__m128i __V1, __m128i __V2) 1581188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1591188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); 1601188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1611188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1621188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1631188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_min_epu16 (__m128i __V1, __m128i __V2) 1641188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1651188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); 1661188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1671188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1681188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1691188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_max_epu16 (__m128i __V1, __m128i __V2) 1701188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1711188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); 1721188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1731188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1741188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1751188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_min_epi32 (__m128i __V1, __m128i __V2) 1761188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1771188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); 1781188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1791188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1801188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1811188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_max_epi32 (__m128i __V1, __m128i __V2) 1821188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1831188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); 1841188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1851188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1861188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1871188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_min_epu32 (__m128i __V1, __m128i __V2) 1881188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1891188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); 1901188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1911188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1921188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1931188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_max_epu32 (__m128i __V1, __m128i __V2) 1941188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 1951188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); 1961188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 1971188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 1981188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Insertion and Extraction from XMM Register Instructions. */ 1991188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) 2001188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_extract_ps(X, N) (__extension__ \ 2011188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ union { int __i; float __f; } __t; \ 2021188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker __v4sf __a = (__v4sf)(__m128)(X); \ 2031188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker __t.__f = __a[(N) & 3]; \ 2041188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker __t.__i;})) 2051188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2061188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Miscellaneous insert and extract macros. */ 2071188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Extract a single-precision float from X at index N into D. */ 2081188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ 2091188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (D) = __a[N]; })) 2101188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2111188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create 2121188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker an index suitable for _mm_insert_ps. */ 2131188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) 2141188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2151188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Extract a float from X at index N into the first index of the return. */ 2161188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ 2171188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) 2181188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2191188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Insert int into packed integer array at index. */ 2201188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_insert_epi8(X, I, N) (__extension__ \ 2211188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ __v16qi __a = (__v16qi)(__m128i)(X); \ 2221188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker __a[(N) & 15] = (I); \ 2231188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i)__a;})) 2241188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_insert_epi32(X, I, N) (__extension__ \ 2251188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ __v4si __a = (__v4si)(__m128i)(X); \ 2261188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker __a[(N) & 3] = (I); \ 2271188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i)__a;})) 2281188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#ifdef __x86_64__ 2291188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_insert_epi64(X, I, N) (__extension__ \ 2301188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ __v2di __a = (__v2di)(__m128i)(X); \ 2311188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker __a[(N) & 1] = (I); \ 2321188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i)__a;})) 2331188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#endif /* __x86_64__ */ 2341188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2351188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Extract int from packed integer array at index. This returns the element 2361188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker * as a zero extended value, so it is unsigned. 2371188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker */ 2381188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_extract_epi8(X, N) (__extension__ \ 2391188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ __v16qi __a = (__v16qi)(__m128i)(X); \ 2401188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(unsigned char) __a[(N) & 15];})) 2411188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_extract_epi32(X, N) (__extension__ \ 2421188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ __v4si __a = (__v4si)(__m128i)(X); \ 2431188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__a[(N) & 3];})) 2441188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#ifdef __x86_64__ 2451188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_extract_epi64(X, N) (__extension__ \ 2461188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker ({ __v2di __a = (__v2di)(__m128i)(X); \ 2471188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (long long)__a[(N) & 1];})) 2481188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#endif /* __x86_64 */ 2491188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2501188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 128-bit Packed Integer Comparisons. */ 2511188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ int __DEFAULT_FN_ATTRS 2521188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_testz_si128(__m128i __M, __m128i __V) 2531188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 2541188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); 2551188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 2561188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2571188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ int __DEFAULT_FN_ATTRS 2581188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_testc_si128(__m128i __M, __m128i __V) 2591188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 2601188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); 2611188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 2621188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2631188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ int __DEFAULT_FN_ATTRS 2641188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_testnzc_si128(__m128i __M, __m128i __V) 2651188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 2661188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); 2671188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 2681188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2691188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) 2701188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) 2711188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) 2721188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2731188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 64-bit Packed Integer Comparisons. */ 2741188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2751188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) 2761188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 2771188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)((__v2di)__V1 == (__v2di)__V2); 2781188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 2791188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2801188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Packed Integer Sign-Extension. */ 2811188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2821188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepi8_epi16(__m128i __V) 2831188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 2841188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker /* This function always performs a signed extension, but __v16qi is a char 2851188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker which may be signed or unsigned, so use __v16qs. */ 2861188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); 2871188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 2881188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2891188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2901188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepi8_epi32(__m128i __V) 2911188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 2921188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker /* This function always performs a signed extension, but __v16qi is a char 2931188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker which may be signed or unsigned, so use __v16qs. */ 2941188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); 2951188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 2961188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 2971188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2981188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepi8_epi64(__m128i __V) 2991188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3001188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker /* This function always performs a signed extension, but __v16qi is a char 3011188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker which may be signed or unsigned, so use __v16qs. */ 3021188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); 3031188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3041188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3051188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3061188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepi16_epi32(__m128i __V) 3071188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3081188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); 3091188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3101188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3111188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3121188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepi16_epi64(__m128i __V) 3131188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3141188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); 3151188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3161188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3171188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3181188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepi32_epi64(__m128i __V) 3191188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3201188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); 3211188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3221188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3231188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Packed Integer Zero-Extension. */ 3241188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3251188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepu8_epi16(__m128i __V) 3261188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3271188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); 3281188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3291188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3301188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3311188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepu8_epi32(__m128i __V) 3321188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3331188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); 3341188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3351188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3361188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3371188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepu8_epi64(__m128i __V) 3381188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3391188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); 3401188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3411188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3421188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3431188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepu16_epi32(__m128i __V) 3441188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3451188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); 3461188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3471188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3481188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3491188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepu16_epi64(__m128i __V) 3501188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3511188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); 3521188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3531188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3541188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3551188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cvtepu32_epi64(__m128i __V) 3561188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3571188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); 3581188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3591188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3601188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Pack with Unsigned Saturation. */ 3611188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3621188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_packus_epi32(__m128i __V1, __m128i __V2) 3631188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3641188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); 3651188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3661188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3671188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4 Multiple Packed Sums of Absolute Difference. */ 3681188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ 3691188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ 3701188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(Y), (M)); }) 3711188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3721188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 3731188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_minpos_epu16(__m128i __V) 3741188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 3751188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); 3761188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 3771188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3781188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* Handle the sse4.2 definitions here. */ 3791188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3801188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* These definitions are normally in nmmintrin.h, but gcc puts them in here 3811188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker so we'll do the same. */ 3821188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3831188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#undef __DEFAULT_FN_ATTRS 3841188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 3851188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3861188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* These specify the type of data that we're comparing. */ 3871188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_UBYTE_OPS 0x00 3881188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_UWORD_OPS 0x01 3891188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_SBYTE_OPS 0x02 3901188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_SWORD_OPS 0x03 3911188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3921188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* These specify the type of comparison operation. */ 3931188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_CMP_EQUAL_ANY 0x00 3941188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_CMP_RANGES 0x04 3951188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_CMP_EQUAL_EACH 0x08 3961188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_CMP_EQUAL_ORDERED 0x0c 3971188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 3981188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* These macros specify the polarity of the operation. */ 3991188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_POSITIVE_POLARITY 0x00 4001188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_NEGATIVE_POLARITY 0x10 4011188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 4021188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 4031188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4041188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* These macros are used in _mm_cmpXstri() to specify the return. */ 4051188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_LEAST_SIGNIFICANT 0x00 4061188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_MOST_SIGNIFICANT 0x40 4071188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4081188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* These macros are used in _mm_cmpXstri() to specify the return. */ 4091188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_BIT_MASK 0x00 4101188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _SIDD_UNIT_MASK 0x40 4111188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4121188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4.2 Packed Comparison Intrinsics. */ 4131188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistrm(A, B, M) \ 4141188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ 4151188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4161188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistri(A, B, M) \ 4171188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ 4181188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4191188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4201188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestrm(A, LA, B, LB, M) \ 4211188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ 4221188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4231188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4241188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestri(A, LA, B, LB, M) \ 4251188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ 4261188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4271188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4281188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4291188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ 4301188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistra(A, B, M) \ 4311188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ 4321188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4331188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistrc(A, B, M) \ 4341188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ 4351188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4361188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistro(A, B, M) \ 4371188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ 4381188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4391188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistrs(A, B, M) \ 4401188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ 4411188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4421188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpistrz(A, B, M) \ 4431188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ 4441188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(M)) 4451188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4461188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestra(A, LA, B, LB, M) \ 4471188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ 4481188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4491188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4501188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestrc(A, LA, B, LB, M) \ 4511188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ 4521188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4531188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4541188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestro(A, LA, B, LB, M) \ 4551188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ 4561188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4571188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4581188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestrs(A, LA, B, LB, M) \ 4591188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ 4601188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4611188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4621188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#define _mm_cmpestrz(A, LA, B, LB, M) \ 4631188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ 4641188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (__v16qi)(__m128i)(B), (int)(LB), \ 4651188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker (int)(M)) 4661188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4671188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4.2 Compare Packed Data -- Greater Than. */ 4681188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ __m128i __DEFAULT_FN_ATTRS 4691188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) 4701188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 4711188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return (__m128i)((__v2di)__V1 > (__v2di)__V2); 4721188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 4731188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4741188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker/* SSE4.2 Accumulate CRC32. */ 4751188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 4761188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_crc32_u8(unsigned int __C, unsigned char __D) 4771188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 4781188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_crc32qi(__C, __D); 4791188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 4801188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4811188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 4821188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_crc32_u16(unsigned int __C, unsigned short __D) 4831188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 4841188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_crc32hi(__C, __D); 4851188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 4861188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4871188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 4881188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_crc32_u32(unsigned int __C, unsigned int __D) 4891188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 4901188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_crc32si(__C, __D); 4911188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 4921188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 4931188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#ifdef __x86_64__ 4941188dcf30923cb444143ffa4b83dc951037e76agitbuildkickerstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 4951188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker_mm_crc32_u64(unsigned long long __C, unsigned long long __D) 4961188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker{ 4971188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker return __builtin_ia32_crc32di(__C, __D); 4981188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker} 4991188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#endif /* __x86_64__ */ 5001188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 5011188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#undef __DEFAULT_FN_ATTRS 5021188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 5031188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#ifdef __POPCNT__ 5041188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#include <popcntintrin.h> 5051188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#endif 5061188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker 5071188dcf30923cb444143ffa4b83dc951037e76agitbuildkicker#endif /* _SMMINTRIN_H */ 508